aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/8021q/vlan.c13
-rw-r--r--net/8021q/vlan_dev.c8
-rw-r--r--net/9p/client.c14
-rw-r--r--net/9p/trans_rdma.c11
-rw-r--r--net/9p/trans_virtio.c5
-rw-r--r--net/Kconfig4
-rw-r--r--net/appletalk/atalk_proc.c2
-rw-r--r--net/batman-adv/bat_iv_ogm.c32
-rw-r--r--net/batman-adv/gateway_client.c27
-rw-r--r--net/batman-adv/gateway_client.h1
-rw-r--r--net/batman-adv/icmp_socket.c1
-rw-r--r--net/batman-adv/main.c58
-rw-r--r--net/batman-adv/main.h5
-rw-r--r--net/batman-adv/routing.c20
-rw-r--r--net/batman-adv/send.c1
-rw-r--r--net/batman-adv/soft-interface.c2
-rw-r--r--net/batman-adv/sysfs.c4
-rw-r--r--net/batman-adv/translation-table.c5
-rw-r--r--net/batman-adv/unicast.c2
-rw-r--r--net/batman-adv/vis.c2
-rw-r--r--net/bluetooth/hci_conn.c62
-rw-r--r--net/bluetooth/hci_core.c14
-rw-r--r--net/bluetooth/hci_event.c29
-rw-r--r--net/bluetooth/hci_sysfs.c2
-rw-r--r--net/bluetooth/hidp/core.c55
-rw-r--r--net/bluetooth/l2cap_core.c3
-rw-r--r--net/bluetooth/rfcomm/tty.c271
-rw-r--r--net/bluetooth/sco.c85
-rw-r--r--net/bridge/br_device.c12
-rw-r--r--net/bridge/br_if.c6
-rw-r--r--net/bridge/br_mdb.c6
-rw-r--r--net/bridge/br_multicast.c17
-rw-r--r--net/bridge/br_notify.c5
-rw-r--r--net/bridge/br_private.h22
-rw-r--r--net/bridge/netfilter/ebtable_broute.c2
-rw-r--r--net/bridge/netfilter/ebtable_filter.c2
-rw-r--r--net/bridge/netfilter/ebtable_nat.c2
-rw-r--r--net/caif/cfctrl.c3
-rw-r--r--net/can/gw.c35
-rw-r--r--net/ceph/messenger.c2
-rw-r--r--net/core/datagram.c72
-rw-r--r--net/core/dev.c371
-rw-r--r--net/core/fib_rules.c25
-rw-r--r--net/core/flow_dissector.c8
-rw-r--r--net/core/iovec.c24
-rw-r--r--net/core/neighbour.c2
-rw-r--r--net/core/net-sysfs.c165
-rw-r--r--net/core/net_namespace.c2
-rw-r--r--net/core/netprio_cgroup.c72
-rw-r--r--net/core/pktgen.c61
-rw-r--r--net/core/rtnetlink.c29
-rw-r--r--net/core/scm.c4
-rw-r--r--net/core/skbuff.c19
-rw-r--r--net/core/sock.c166
-rw-r--r--net/core/stream.c2
-rw-r--r--net/core/sysctl_net_core.c30
-rw-r--r--net/dccp/proto.c4
-rw-r--r--net/dsa/slave.c2
-rw-r--r--net/ieee802154/6lowpan.c286
-rw-r--r--net/ieee802154/6lowpan.h20
-rw-r--r--net/ieee802154/wpan-class.c23
-rw-r--r--net/ipv4/Kconfig16
-rw-r--r--net/ipv4/af_inet.c12
-rw-r--r--net/ipv4/arp.c2
-rw-r--r--net/ipv4/devinet.c17
-rw-r--r--net/ipv4/fib_rules.c25
-rw-r--r--net/ipv4/igmp.c80
-rw-r--r--net/ipv4/ip_gre.c4
-rw-r--r--net/ipv4/ip_input.c8
-rw-r--r--net/ipv4/ip_tunnel.c71
-rw-r--r--net/ipv4/ip_tunnel_core.c10
-rw-r--r--net/ipv4/ip_vti.c528
-rw-r--r--net/ipv4/ipip.c3
-rw-r--r--net/ipv4/ipmr.c18
-rw-r--r--net/ipv4/netfilter/Kconfig13
-rw-r--r--net/ipv4/netfilter/Makefile1
-rw-r--r--net/ipv4/netfilter/arptable_filter.c2
-rw-r--r--net/ipv4/netfilter/ipt_MASQUERADE.c2
-rw-r--r--net/ipv4/netfilter/ipt_REJECT.c21
-rw-r--r--net/ipv4/netfilter/ipt_SYNPROXY.c476
-rw-r--r--net/ipv4/netfilter/iptable_filter.c2
-rw-r--r--net/ipv4/netfilter/iptable_mangle.c2
-rw-r--r--net/ipv4/netfilter/iptable_nat.c2
-rw-r--r--net/ipv4/netfilter/iptable_raw.c2
-rw-r--r--net/ipv4/netfilter/iptable_security.c2
-rw-r--r--net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c7
-rw-r--r--net/ipv4/ping.c2
-rw-r--r--net/ipv4/proc.c7
-rw-r--r--net/ipv4/raw.c2
-rw-r--r--net/ipv4/route.c24
-rw-r--r--net/ipv4/syncookies.c29
-rw-r--r--net/ipv4/sysctl_net_ipv4.c17
-rw-r--r--net/ipv4/tcp.c46
-rw-r--r--net/ipv4/tcp_fastopen.c13
-rw-r--r--net/ipv4/tcp_input.c204
-rw-r--r--net/ipv4/tcp_ipv4.c32
-rw-r--r--net/ipv4/tcp_memcontrol.c22
-rw-r--r--net/ipv4/tcp_metrics.c42
-rw-r--r--net/ipv4/tcp_minisocks.c8
-rw-r--r--net/ipv4/tcp_output.c5
-rw-r--r--net/ipv4/tcp_probe.c87
-rw-r--r--net/ipv4/udp.c18
-rw-r--r--net/ipv6/addrconf.c165
-rw-r--r--net/ipv6/addrconf_core.c50
-rw-r--r--net/ipv6/addrlabel.c48
-rw-r--r--net/ipv6/af_inet6.c21
-rw-r--r--net/ipv6/ah6.c2
-rw-r--r--net/ipv6/datagram.c2
-rw-r--r--net/ipv6/esp6.c2
-rw-r--r--net/ipv6/exthdrs.c6
-rw-r--r--net/ipv6/fib6_rules.c37
-rw-r--r--net/ipv6/icmp.c12
-rw-r--r--net/ipv6/ip6_fib.c18
-rw-r--r--net/ipv6/ip6_gre.c14
-rw-r--r--net/ipv6/ip6_input.c6
-rw-r--r--net/ipv6/ip6_offload.c4
-rw-r--r--net/ipv6/ip6_output.c25
-rw-r--r--net/ipv6/ip6_tunnel.c46
-rw-r--r--net/ipv6/ip6mr.c14
-rw-r--r--net/ipv6/ipcomp6.c2
-rw-r--r--net/ipv6/mcast.c289
-rw-r--r--net/ipv6/ndisc.c63
-rw-r--r--net/ipv6/netfilter/Kconfig13
-rw-r--r--net/ipv6/netfilter/Makefile3
-rw-r--r--net/ipv6/netfilter/ip6t_MASQUERADE.c2
-rw-r--r--net/ipv6/netfilter/ip6t_REJECT.c20
-rw-r--r--net/ipv6/netfilter/ip6t_SYNPROXY.c499
-rw-r--r--net/ipv6/netfilter/ip6table_filter.c2
-rw-r--r--net/ipv6/netfilter/ip6table_mangle.c2
-rw-r--r--net/ipv6/netfilter/ip6table_nat.c2
-rw-r--r--net/ipv6/netfilter/ip6table_raw.c2
-rw-r--r--net/ipv6/netfilter/ip6table_security.c2
-rw-r--r--net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c7
-rw-r--r--net/ipv6/output_core.c48
-rw-r--r--net/ipv6/proc.c4
-rw-r--r--net/ipv6/raw.c9
-rw-r--r--net/ipv6/route.c105
-rw-r--r--net/ipv6/sit.c15
-rw-r--r--net/ipv6/syncookies.c25
-rw-r--r--net/ipv6/tcp_ipv6.c15
-rw-r--r--net/ipv6/udp_offload.c105
-rw-r--r--net/ipx/ipx_proc.c2
-rw-r--r--net/irda/irttp.c50
-rw-r--r--net/key/af_key.c14
-rw-r--r--net/llc/af_llc.c6
-rw-r--r--net/llc/llc_conn.c6
-rw-r--r--net/llc/llc_proc.c2
-rw-r--r--net/llc/llc_sap.c4
-rw-r--r--net/mac80211/cfg.c247
-rw-r--r--net/mac80211/chan.c58
-rw-r--r--net/mac80211/debugfs_sta.c9
-rw-r--r--net/mac80211/driver-ops.h13
-rw-r--r--net/mac80211/ht.c53
-rw-r--r--net/mac80211/ibss.c356
-rw-r--r--net/mac80211/ieee80211_i.h70
-rw-r--r--net/mac80211/iface.c30
-rw-r--r--net/mac80211/key.c154
-rw-r--r--net/mac80211/led.c19
-rw-r--r--net/mac80211/led.h4
-rw-r--r--net/mac80211/main.c18
-rw-r--r--net/mac80211/mesh.c10
-rw-r--r--net/mac80211/mesh_plink.c2
-rw-r--r--net/mac80211/mlme.c118
-rw-r--r--net/mac80211/rate.c69
-rw-r--r--net/mac80211/rate.h22
-rw-r--r--net/mac80211/rc80211_minstrel.c33
-rw-r--r--net/mac80211/rc80211_minstrel_ht.c17
-rw-r--r--net/mac80211/rc80211_pid_algo.c1
-rw-r--r--net/mac80211/rx.c504
-rw-r--r--net/mac80211/scan.c72
-rw-r--r--net/mac80211/status.c90
-rw-r--r--net/mac80211/trace.h26
-rw-r--r--net/mac80211/tx.c122
-rw-r--r--net/mac80211/util.c218
-rw-r--r--net/netfilter/Kconfig26
-rw-r--r--net/netfilter/Makefile6
-rw-r--r--net/netfilter/core.c7
-rw-r--r--net/netfilter/ipvs/ip_vs_lblcr.c8
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_sctp.c23
-rw-r--r--net/netfilter/ipvs/ip_vs_sh.c6
-rw-r--r--net/netfilter/nf_conntrack_core.c89
-rw-r--r--net/netfilter/nf_conntrack_labels.c4
-rw-r--r--net/netfilter/nf_conntrack_netlink.c384
-rw-r--r--net/netfilter/nf_conntrack_proto.c4
-rw-r--r--net/netfilter/nf_conntrack_proto_tcp.c36
-rw-r--r--net/netfilter/nf_conntrack_seqadj.c238
-rw-r--r--net/netfilter/nf_nat_core.c22
-rw-r--r--net/netfilter/nf_nat_helper.c230
-rw-r--r--net/netfilter/nf_nat_proto_sctp.c8
-rw-r--r--net/netfilter/nf_nat_sip.c3
-rw-r--r--net/netfilter/nf_synproxy_core.c432
-rw-r--r--net/netfilter/nf_tproxy_core.c62
-rw-r--r--net/netfilter/nfnetlink_queue_core.c11
-rw-r--r--net/netfilter/nfnetlink_queue_ct.c23
-rw-r--r--net/netfilter/xt_TCPMSS.c2
-rw-r--r--net/netfilter/xt_TPROXY.c169
-rw-r--r--net/netfilter/xt_addrtype.c2
-rw-r--r--net/netfilter/xt_socket.c66
-rw-r--r--net/netlink/af_netlink.c131
-rw-r--r--net/netlink/af_netlink.h3
-rw-r--r--net/nfc/core.c22
-rw-r--r--net/nfc/hci/core.c2
-rw-r--r--net/nfc/netlink.c95
-rw-r--r--net/nfc/nfc.h5
-rw-r--r--net/openvswitch/Kconfig14
-rw-r--r--net/openvswitch/Makefile9
-rw-r--r--net/openvswitch/actions.c45
-rw-r--r--net/openvswitch/datapath.c176
-rw-r--r--net/openvswitch/datapath.h6
-rw-r--r--net/openvswitch/flow.c1487
-rw-r--r--net/openvswitch/flow.h89
-rw-r--r--net/openvswitch/vport-gre.c7
-rw-r--r--net/openvswitch/vport-netdev.c20
-rw-r--r--net/openvswitch/vport-vxlan.c204
-rw-r--r--net/openvswitch/vport.c6
-rw-r--r--net/openvswitch/vport.h1
-rw-r--r--net/packet/af_packet.c65
-rw-r--r--net/phonet/socket.c2
-rw-r--r--net/rfkill/core.c90
-rw-r--r--net/rfkill/rfkill-regulator.c8
-rw-r--r--net/sched/Kconfig14
-rw-r--r--net/sched/Makefile1
-rw-r--r--net/sched/cls_cgroup.c39
-rw-r--r--net/sched/sch_api.c53
-rw-r--r--net/sched/sch_choke.c3
-rw-r--r--net/sched/sch_fq.c793
-rw-r--r--net/sched/sch_generic.c20
-rw-r--r--net/sched/sch_htb.c2
-rw-r--r--net/sched/sch_mq.c2
-rw-r--r--net/sched/sch_mqprio.c2
-rw-r--r--net/sched/sch_netem.c5
-rw-r--r--net/sctp/associola.c8
-rw-r--r--net/sctp/auth.c8
-rw-r--r--net/sctp/bind_addr.c8
-rw-r--r--net/sctp/chunk.c12
-rw-r--r--net/sctp/command.c8
-rw-r--r--net/sctp/debug.c8
-rw-r--r--net/sctp/endpointola.c8
-rw-r--r--net/sctp/input.c18
-rw-r--r--net/sctp/inqueue.c8
-rw-r--r--net/sctp/ipv6.c10
-rw-r--r--net/sctp/objcnt.c8
-rw-r--r--net/sctp/output.c8
-rw-r--r--net/sctp/outqueue.c8
-rw-r--r--net/sctp/primitive.c8
-rw-r--r--net/sctp/probe.c27
-rw-r--r--net/sctp/proc.c12
-rw-r--r--net/sctp/protocol.c10
-rw-r--r--net/sctp/sm_make_chunk.c133
-rw-r--r--net/sctp/sm_sideeffect.c8
-rw-r--r--net/sctp/sm_statefuns.c8
-rw-r--r--net/sctp/sm_statetable.c8
-rw-r--r--net/sctp/socket.c13
-rw-r--r--net/sctp/ssnmap.c8
-rw-r--r--net/sctp/sysctl.c8
-rw-r--r--net/sctp/transport.c8
-rw-r--r--net/sctp/tsnmap.c8
-rw-r--r--net/sctp/ulpevent.c8
-rw-r--r--net/sctp/ulpqueue.c8
-rw-r--r--net/socket.c65
-rw-r--r--net/sunrpc/auth.c68
-rw-r--r--net/sunrpc/auth_generic.c82
-rw-r--r--net/sunrpc/auth_gss/auth_gss.c442
-rw-r--r--net/sunrpc/auth_gss/gss_rpc_upcall.c26
-rw-r--r--net/sunrpc/auth_gss/gss_rpc_xdr.c41
-rw-r--r--net/sunrpc/auth_gss/gss_rpc_xdr.h5
-rw-r--r--net/sunrpc/auth_null.c6
-rw-r--r--net/sunrpc/auth_unix.c6
-rw-r--r--net/sunrpc/clnt.c157
-rw-r--r--net/sunrpc/rpc_pipe.c193
-rw-r--r--net/sunrpc/sched.c2
-rw-r--r--net/sunrpc/stats.c2
-rw-r--r--net/sunrpc/svcsock.c2
-rw-r--r--net/sunrpc/xprtsock.c15
-rw-r--r--net/unix/af_unix.c70
-rw-r--r--net/vmw_vsock/af_vsock.c3
-rw-r--r--net/vmw_vsock/af_vsock.h175
-rw-r--r--net/vmw_vsock/vmci_transport.c2
-rw-r--r--net/vmw_vsock/vmci_transport.h4
-rw-r--r--net/vmw_vsock/vsock_addr.c3
-rw-r--r--net/vmw_vsock/vsock_addr.h30
-rw-r--r--net/wireless/core.c9
-rw-r--r--net/wireless/core.h2
-rw-r--r--net/wireless/mesh.c5
-rw-r--r--net/wireless/mlme.c4
-rw-r--r--net/wireless/nl80211.c560
-rw-r--r--net/wireless/nl80211.h4
-rw-r--r--net/wireless/rdev-ops.h17
-rw-r--r--net/wireless/scan.c35
-rw-r--r--net/wireless/sysfs.c25
-rw-r--r--net/wireless/trace.h53
-rw-r--r--net/wireless/util.c14
-rw-r--r--net/x25/x25_facilities.c4
-rw-r--r--net/xfrm/xfrm_policy.c12
-rw-r--r--net/xfrm/xfrm_state.c15
295 files changed, 11283 insertions, 4800 deletions
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 2fb2d88e8c2e..61fc573f1142 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -210,6 +210,7 @@ out_vid_del:
210static int register_vlan_device(struct net_device *real_dev, u16 vlan_id) 210static int register_vlan_device(struct net_device *real_dev, u16 vlan_id)
211{ 211{
212 struct net_device *new_dev; 212 struct net_device *new_dev;
213 struct vlan_dev_priv *vlan;
213 struct net *net = dev_net(real_dev); 214 struct net *net = dev_net(real_dev);
214 struct vlan_net *vn = net_generic(net, vlan_net_id); 215 struct vlan_net *vn = net_generic(net, vlan_net_id);
215 char name[IFNAMSIZ]; 216 char name[IFNAMSIZ];
@@ -260,11 +261,12 @@ static int register_vlan_device(struct net_device *real_dev, u16 vlan_id)
260 new_dev->mtu = real_dev->mtu; 261 new_dev->mtu = real_dev->mtu;
261 new_dev->priv_flags |= (real_dev->priv_flags & IFF_UNICAST_FLT); 262 new_dev->priv_flags |= (real_dev->priv_flags & IFF_UNICAST_FLT);
262 263
263 vlan_dev_priv(new_dev)->vlan_proto = htons(ETH_P_8021Q); 264 vlan = vlan_dev_priv(new_dev);
264 vlan_dev_priv(new_dev)->vlan_id = vlan_id; 265 vlan->vlan_proto = htons(ETH_P_8021Q);
265 vlan_dev_priv(new_dev)->real_dev = real_dev; 266 vlan->vlan_id = vlan_id;
266 vlan_dev_priv(new_dev)->dent = NULL; 267 vlan->real_dev = real_dev;
267 vlan_dev_priv(new_dev)->flags = VLAN_FLAG_REORDER_HDR; 268 vlan->dent = NULL;
269 vlan->flags = VLAN_FLAG_REORDER_HDR;
268 270
269 new_dev->rtnl_link_ops = &vlan_link_ops; 271 new_dev->rtnl_link_ops = &vlan_link_ops;
270 err = register_vlan_dev(new_dev); 272 err = register_vlan_dev(new_dev);
@@ -459,6 +461,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
459 461
460 case NETDEV_NOTIFY_PEERS: 462 case NETDEV_NOTIFY_PEERS:
461 case NETDEV_BONDING_FAILOVER: 463 case NETDEV_BONDING_FAILOVER:
464 case NETDEV_RESEND_IGMP:
462 /* Propagate to vlan devices */ 465 /* Propagate to vlan devices */
463 vlan_group_for_each_dev(grp, i, vlandev) 466 vlan_group_for_each_dev(grp, i, vlandev)
464 call_netdevice_notifiers(event, vlandev); 467 call_netdevice_notifiers(event, vlandev);
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 1cd3d2a406f5..09bf1c38805b 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -107,10 +107,10 @@ static int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev,
107 u16 vlan_tci = 0; 107 u16 vlan_tci = 0;
108 int rc; 108 int rc;
109 109
110 if (!(vlan_dev_priv(dev)->flags & VLAN_FLAG_REORDER_HDR)) { 110 if (!(vlan->flags & VLAN_FLAG_REORDER_HDR)) {
111 vhdr = (struct vlan_hdr *) skb_push(skb, VLAN_HLEN); 111 vhdr = (struct vlan_hdr *) skb_push(skb, VLAN_HLEN);
112 112
113 vlan_tci = vlan_dev_priv(dev)->vlan_id; 113 vlan_tci = vlan->vlan_id;
114 vlan_tci |= vlan_dev_get_egress_qos_mask(dev, skb); 114 vlan_tci |= vlan_dev_get_egress_qos_mask(dev, skb);
115 vhdr->h_vlan_TCI = htons(vlan_tci); 115 vhdr->h_vlan_TCI = htons(vlan_tci);
116 116
@@ -133,7 +133,7 @@ static int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev,
133 saddr = dev->dev_addr; 133 saddr = dev->dev_addr;
134 134
135 /* Now make the underlying real hard header */ 135 /* Now make the underlying real hard header */
136 dev = vlan_dev_priv(dev)->real_dev; 136 dev = vlan->real_dev;
137 rc = dev_hard_header(skb, dev, type, daddr, saddr, len + vhdrlen); 137 rc = dev_hard_header(skb, dev, type, daddr, saddr, len + vhdrlen);
138 if (rc > 0) 138 if (rc > 0)
139 rc += vhdrlen; 139 rc += vhdrlen;
@@ -582,7 +582,7 @@ static int vlan_dev_init(struct net_device *dev)
582 dev->dev_id = real_dev->dev_id; 582 dev->dev_id = real_dev->dev_id;
583 583
584 if (is_zero_ether_addr(dev->dev_addr)) 584 if (is_zero_ether_addr(dev->dev_addr))
585 memcpy(dev->dev_addr, real_dev->dev_addr, dev->addr_len); 585 eth_hw_addr_inherit(dev, real_dev);
586 if (is_zero_ether_addr(dev->broadcast)) 586 if (is_zero_ether_addr(dev->broadcast))
587 memcpy(dev->broadcast, real_dev->broadcast, dev->addr_len); 587 memcpy(dev->broadcast, real_dev->broadcast, dev->addr_len);
588 588
diff --git a/net/9p/client.c b/net/9p/client.c
index 8b93cae2d11d..ee8fd6bd4035 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -658,17 +658,12 @@ static int p9_client_flush(struct p9_client *c, struct p9_req_t *oldreq)
658 658
659 /* 659 /*
660 * if we haven't received a response for oldreq, 660 * if we haven't received a response for oldreq,
661 * remove it from the list, and notify the transport 661 * remove it from the list
662 * layer that the reply will never arrive.
663 */ 662 */
664 spin_lock(&c->lock);
665 if (oldreq->status == REQ_STATUS_FLSH) { 663 if (oldreq->status == REQ_STATUS_FLSH) {
664 spin_lock(&c->lock);
666 list_del(&oldreq->req_list); 665 list_del(&oldreq->req_list);
667 spin_unlock(&c->lock); 666 spin_unlock(&c->lock);
668 if (c->trans_mod->cancelled)
669 c->trans_mod->cancelled(c, req);
670 } else {
671 spin_unlock(&c->lock);
672 } 667 }
673 668
674 p9_free_req(c, req); 669 p9_free_req(c, req);
@@ -992,6 +987,7 @@ struct p9_client *p9_client_create(const char *dev_name, char *options)
992{ 987{
993 int err; 988 int err;
994 struct p9_client *clnt; 989 struct p9_client *clnt;
990 char *client_id;
995 991
996 err = 0; 992 err = 0;
997 clnt = kmalloc(sizeof(struct p9_client), GFP_KERNEL); 993 clnt = kmalloc(sizeof(struct p9_client), GFP_KERNEL);
@@ -1000,6 +996,10 @@ struct p9_client *p9_client_create(const char *dev_name, char *options)
1000 996
1001 clnt->trans_mod = NULL; 997 clnt->trans_mod = NULL;
1002 clnt->trans = NULL; 998 clnt->trans = NULL;
999
1000 client_id = utsname()->nodename;
1001 memcpy(clnt->name, client_id, strlen(client_id) + 1);
1002
1003 spin_lock_init(&clnt->lock); 1003 spin_lock_init(&clnt->lock);
1004 INIT_LIST_HEAD(&clnt->fidlist); 1004 INIT_LIST_HEAD(&clnt->fidlist);
1005 1005
diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c
index 928f2bb9bf8d..8f68df5d2973 100644
--- a/net/9p/trans_rdma.c
+++ b/net/9p/trans_rdma.c
@@ -588,17 +588,6 @@ static int rdma_cancel(struct p9_client *client, struct p9_req_t *req)
588 return 1; 588 return 1;
589} 589}
590 590
591/* A request has been fully flushed without a reply.
592 * That means we have posted one buffer in excess.
593 */
594static int rdma_cancelled(struct p9_client *client, struct p9_req_t *req)
595{
596 struct p9_trans_rdma *rdma = client->trans;
597
598 atomic_inc(&rdma->excess_rc);
599 return 0;
600}
601
602/** 591/**
603 * trans_create_rdma - Transport method for creating atransport instance 592 * trans_create_rdma - Transport method for creating atransport instance
604 * @client: client instance 593 * @client: client instance
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index e1c26b101830..990afab2be1b 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -577,6 +577,10 @@ static int p9_virtio_probe(struct virtio_device *vdev)
577 mutex_lock(&virtio_9p_lock); 577 mutex_lock(&virtio_9p_lock);
578 list_add_tail(&chan->chan_list, &virtio_chan_list); 578 list_add_tail(&chan->chan_list, &virtio_chan_list);
579 mutex_unlock(&virtio_9p_lock); 579 mutex_unlock(&virtio_9p_lock);
580
581 /* Let udev rules use the new mount_tag attribute. */
582 kobject_uevent(&(vdev->dev.kobj), KOBJ_CHANGE);
583
580 return 0; 584 return 0;
581 585
582out_free_tag: 586out_free_tag:
@@ -654,6 +658,7 @@ static void p9_virtio_remove(struct virtio_device *vdev)
654 list_del(&chan->chan_list); 658 list_del(&chan->chan_list);
655 mutex_unlock(&virtio_9p_lock); 659 mutex_unlock(&virtio_9p_lock);
656 sysfs_remove_file(&(vdev->dev.kobj), &dev_attr_mount_tag.attr); 660 sysfs_remove_file(&(vdev->dev.kobj), &dev_attr_mount_tag.attr);
661 kobject_uevent(&(vdev->dev.kobj), KOBJ_CHANGE);
657 kfree(chan->tag); 662 kfree(chan->tag);
658 kfree(chan->vc_wq); 663 kfree(chan->vc_wq);
659 kfree(chan); 664 kfree(chan);
diff --git a/net/Kconfig b/net/Kconfig
index 2b406608a1a4..b50dacc072f0 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -228,7 +228,7 @@ config RPS
228 228
229config RFS_ACCEL 229config RFS_ACCEL
230 boolean 230 boolean
231 depends on RPS && GENERIC_HARDIRQS 231 depends on RPS
232 select CPU_RMAP 232 select CPU_RMAP
233 default y 233 default y
234 234
@@ -281,7 +281,7 @@ menu "Network testing"
281 281
282config NET_PKTGEN 282config NET_PKTGEN
283 tristate "Packet Generator (USE WITH CAUTION)" 283 tristate "Packet Generator (USE WITH CAUTION)"
284 depends on PROC_FS 284 depends on INET && PROC_FS
285 ---help--- 285 ---help---
286 This module will inject preconfigured packets, at a configurable 286 This module will inject preconfigured packets, at a configurable
287 rate, out of a given interface. It is used for network interface 287 rate, out of a given interface. It is used for network interface
diff --git a/net/appletalk/atalk_proc.c b/net/appletalk/atalk_proc.c
index c30f3a0717fb..af46bc49e1e9 100644
--- a/net/appletalk/atalk_proc.c
+++ b/net/appletalk/atalk_proc.c
@@ -178,7 +178,7 @@ static int atalk_seq_socket_show(struct seq_file *seq, void *v)
178 at = at_sk(s); 178 at = at_sk(s);
179 179
180 seq_printf(seq, "%02X %04X:%02X:%02X %04X:%02X:%02X %08X:%08X " 180 seq_printf(seq, "%02X %04X:%02X:%02X %04X:%02X:%02X %08X:%08X "
181 "%02X %d\n", 181 "%02X %u\n",
182 s->sk_type, ntohs(at->src_net), at->src_node, at->src_port, 182 s->sk_type, ntohs(at->src_net), at->src_node, at->src_port,
183 ntohs(at->dest_net), at->dest_node, at->dest_port, 183 ntohs(at->dest_net), at->dest_node, at->dest_port,
184 sk_wmem_alloc_get(s), 184 sk_wmem_alloc_get(s),
diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c
index 62da5278014a..0a8a80cd4bf1 100644
--- a/net/batman-adv/bat_iv_ogm.c
+++ b/net/batman-adv/bat_iv_ogm.c
@@ -28,6 +28,22 @@
28#include "bat_algo.h" 28#include "bat_algo.h"
29#include "network-coding.h" 29#include "network-coding.h"
30 30
31
32/**
33 * batadv_dup_status - duplicate status
34 * @BATADV_NO_DUP: the packet is a duplicate
35 * @BATADV_ORIG_DUP: OGM is a duplicate in the originator (but not for the
36 * neighbor)
37 * @BATADV_NEIGH_DUP: OGM is a duplicate for the neighbor
38 * @BATADV_PROTECTED: originator is currently protected (after reboot)
39 */
40enum batadv_dup_status {
41 BATADV_NO_DUP = 0,
42 BATADV_ORIG_DUP,
43 BATADV_NEIGH_DUP,
44 BATADV_PROTECTED,
45};
46
31/** 47/**
32 * batadv_ring_buffer_set - update the ring buffer with the given value 48 * batadv_ring_buffer_set - update the ring buffer with the given value
33 * @lq_recv: pointer to the ring buffer 49 * @lq_recv: pointer to the ring buffer
@@ -71,21 +87,6 @@ static uint8_t batadv_ring_buffer_avg(const uint8_t lq_recv[])
71 return (uint8_t)(sum / count); 87 return (uint8_t)(sum / count);
72} 88}
73 89
74/*
75 * batadv_dup_status - duplicate status
76 * @BATADV_NO_DUP: the packet is a duplicate
77 * @BATADV_ORIG_DUP: OGM is a duplicate in the originator (but not for the
78 * neighbor)
79 * @BATADV_NEIGH_DUP: OGM is a duplicate for the neighbor
80 * @BATADV_PROTECTED: originator is currently protected (after reboot)
81 */
82enum batadv_dup_status {
83 BATADV_NO_DUP = 0,
84 BATADV_ORIG_DUP,
85 BATADV_NEIGH_DUP,
86 BATADV_PROTECTED,
87};
88
89static struct batadv_neigh_node * 90static struct batadv_neigh_node *
90batadv_iv_ogm_neigh_new(struct batadv_hard_iface *hard_iface, 91batadv_iv_ogm_neigh_new(struct batadv_hard_iface *hard_iface,
91 const uint8_t *neigh_addr, 92 const uint8_t *neigh_addr,
@@ -478,6 +479,7 @@ static void batadv_iv_ogm_aggregate_new(const unsigned char *packet_buff,
478 kfree(forw_packet_aggr); 479 kfree(forw_packet_aggr);
479 goto out; 480 goto out;
480 } 481 }
482 forw_packet_aggr->skb->priority = TC_PRIO_CONTROL;
481 skb_reserve(forw_packet_aggr->skb, ETH_HLEN); 483 skb_reserve(forw_packet_aggr->skb, ETH_HLEN);
482 484
483 skb_buff = skb_put(forw_packet_aggr->skb, packet_len); 485 skb_buff = skb_put(forw_packet_aggr->skb, packet_len);
diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c
index 7614af31daff..1ce4b8763ef2 100644
--- a/net/batman-adv/gateway_client.c
+++ b/net/batman-adv/gateway_client.c
@@ -190,6 +190,33 @@ next:
190 return curr_gw; 190 return curr_gw;
191} 191}
192 192
193/**
194 * batadv_gw_check_client_stop - check if client mode has been switched off
195 * @bat_priv: the bat priv with all the soft interface information
196 *
197 * This function assumes the caller has checked that the gw state *is actually
198 * changing*. This function is not supposed to be called when there is no state
199 * change.
200 */
201void batadv_gw_check_client_stop(struct batadv_priv *bat_priv)
202{
203 struct batadv_gw_node *curr_gw;
204
205 if (atomic_read(&bat_priv->gw_mode) != BATADV_GW_MODE_CLIENT)
206 return;
207
208 curr_gw = batadv_gw_get_selected_gw_node(bat_priv);
209 if (!curr_gw)
210 return;
211
212 /* if batman-adv is switching the gw client mode off and a gateway was
213 * already selected, send a DEL uevent
214 */
215 batadv_throw_uevent(bat_priv, BATADV_UEV_GW, BATADV_UEV_DEL, NULL);
216
217 batadv_gw_node_free_ref(curr_gw);
218}
219
193void batadv_gw_election(struct batadv_priv *bat_priv) 220void batadv_gw_election(struct batadv_priv *bat_priv)
194{ 221{
195 struct batadv_gw_node *curr_gw = NULL, *next_gw = NULL; 222 struct batadv_gw_node *curr_gw = NULL, *next_gw = NULL;
diff --git a/net/batman-adv/gateway_client.h b/net/batman-adv/gateway_client.h
index 1037d75da51f..ceef4ebe8bcd 100644
--- a/net/batman-adv/gateway_client.h
+++ b/net/batman-adv/gateway_client.h
@@ -20,6 +20,7 @@
20#ifndef _NET_BATMAN_ADV_GATEWAY_CLIENT_H_ 20#ifndef _NET_BATMAN_ADV_GATEWAY_CLIENT_H_
21#define _NET_BATMAN_ADV_GATEWAY_CLIENT_H_ 21#define _NET_BATMAN_ADV_GATEWAY_CLIENT_H_
22 22
23void batadv_gw_check_client_stop(struct batadv_priv *bat_priv);
23void batadv_gw_deselect(struct batadv_priv *bat_priv); 24void batadv_gw_deselect(struct batadv_priv *bat_priv);
24void batadv_gw_election(struct batadv_priv *bat_priv); 25void batadv_gw_election(struct batadv_priv *bat_priv);
25struct batadv_orig_node * 26struct batadv_orig_node *
diff --git a/net/batman-adv/icmp_socket.c b/net/batman-adv/icmp_socket.c
index b27508b8085c..5a99bb4b6b82 100644
--- a/net/batman-adv/icmp_socket.c
+++ b/net/batman-adv/icmp_socket.c
@@ -183,6 +183,7 @@ static ssize_t batadv_socket_write(struct file *file, const char __user *buff,
183 goto out; 183 goto out;
184 } 184 }
185 185
186 skb->priority = TC_PRIO_CONTROL;
186 skb_reserve(skb, ETH_HLEN); 187 skb_reserve(skb, ETH_HLEN);
187 icmp_packet = (struct batadv_icmp_packet_rr *)skb_put(skb, packet_len); 188 icmp_packet = (struct batadv_icmp_packet_rr *)skb_put(skb, packet_len);
188 189
diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c
index 08125f3f6064..c72d1bcdcf49 100644
--- a/net/batman-adv/main.c
+++ b/net/batman-adv/main.c
@@ -19,6 +19,10 @@
19 19
20#include <linux/crc32c.h> 20#include <linux/crc32c.h>
21#include <linux/highmem.h> 21#include <linux/highmem.h>
22#include <linux/if_vlan.h>
23#include <net/ip.h>
24#include <net/ipv6.h>
25#include <net/dsfield.h>
22#include "main.h" 26#include "main.h"
23#include "sysfs.h" 27#include "sysfs.h"
24#include "debugfs.h" 28#include "debugfs.h"
@@ -249,6 +253,60 @@ out:
249 return primary_if; 253 return primary_if;
250} 254}
251 255
256/**
257 * batadv_skb_set_priority - sets skb priority according to packet content
258 * @skb: the packet to be sent
259 * @offset: offset to the packet content
260 *
261 * This function sets a value between 256 and 263 (802.1d priority), which
262 * can be interpreted by the cfg80211 or other drivers.
263 */
264void batadv_skb_set_priority(struct sk_buff *skb, int offset)
265{
266 struct iphdr ip_hdr_tmp, *ip_hdr;
267 struct ipv6hdr ip6_hdr_tmp, *ip6_hdr;
268 struct ethhdr ethhdr_tmp, *ethhdr;
269 struct vlan_ethhdr *vhdr, vhdr_tmp;
270 u32 prio;
271
272 /* already set, do nothing */
273 if (skb->priority >= 256 && skb->priority <= 263)
274 return;
275
276 ethhdr = skb_header_pointer(skb, offset, sizeof(*ethhdr), &ethhdr_tmp);
277 if (!ethhdr)
278 return;
279
280 switch (ethhdr->h_proto) {
281 case htons(ETH_P_8021Q):
282 vhdr = skb_header_pointer(skb, offset + sizeof(*vhdr),
283 sizeof(*vhdr), &vhdr_tmp);
284 if (!vhdr)
285 return;
286 prio = ntohs(vhdr->h_vlan_TCI) & VLAN_PRIO_MASK;
287 prio = prio >> VLAN_PRIO_SHIFT;
288 break;
289 case htons(ETH_P_IP):
290 ip_hdr = skb_header_pointer(skb, offset + sizeof(*ethhdr),
291 sizeof(*ip_hdr), &ip_hdr_tmp);
292 if (!ip_hdr)
293 return;
294 prio = (ipv4_get_dsfield(ip_hdr) & 0xfc) >> 5;
295 break;
296 case htons(ETH_P_IPV6):
297 ip6_hdr = skb_header_pointer(skb, offset + sizeof(*ethhdr),
298 sizeof(*ip6_hdr), &ip6_hdr_tmp);
299 if (!ip6_hdr)
300 return;
301 prio = (ipv6_get_dsfield(ip6_hdr) & 0xfc) >> 5;
302 break;
303 default:
304 return;
305 }
306
307 skb->priority = prio + 256;
308}
309
252static int batadv_recv_unhandled_packet(struct sk_buff *skb, 310static int batadv_recv_unhandled_packet(struct sk_buff *skb,
253 struct batadv_hard_iface *recv_if) 311 struct batadv_hard_iface *recv_if)
254{ 312{
diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h
index 5e9aebb7d56b..24675523930f 100644
--- a/net/batman-adv/main.h
+++ b/net/batman-adv/main.h
@@ -26,7 +26,7 @@
26#define BATADV_DRIVER_DEVICE "batman-adv" 26#define BATADV_DRIVER_DEVICE "batman-adv"
27 27
28#ifndef BATADV_SOURCE_VERSION 28#ifndef BATADV_SOURCE_VERSION
29#define BATADV_SOURCE_VERSION "2013.3.0" 29#define BATADV_SOURCE_VERSION "2013.4.0"
30#endif 30#endif
31 31
32/* B.A.T.M.A.N. parameters */ 32/* B.A.T.M.A.N. parameters */
@@ -184,6 +184,7 @@ void batadv_mesh_free(struct net_device *soft_iface);
184int batadv_is_my_mac(struct batadv_priv *bat_priv, const uint8_t *addr); 184int batadv_is_my_mac(struct batadv_priv *bat_priv, const uint8_t *addr);
185struct batadv_hard_iface * 185struct batadv_hard_iface *
186batadv_seq_print_text_primary_if_get(struct seq_file *seq); 186batadv_seq_print_text_primary_if_get(struct seq_file *seq);
187void batadv_skb_set_priority(struct sk_buff *skb, int offset);
187int batadv_batman_skb_recv(struct sk_buff *skb, struct net_device *dev, 188int batadv_batman_skb_recv(struct sk_buff *skb, struct net_device *dev,
188 struct packet_type *ptype, 189 struct packet_type *ptype,
189 struct net_device *orig_dev); 190 struct net_device *orig_dev);
@@ -253,7 +254,7 @@ static inline void batadv_dbg(int type __always_unused,
253 254
254/* returns 1 if they are the same ethernet addr 255/* returns 1 if they are the same ethernet addr
255 * 256 *
256 * note: can't use compare_ether_addr() as it requires aligned memory 257 * note: can't use ether_addr_equal() as it requires aligned memory
257 */ 258 */
258static inline int batadv_compare_eth(const void *data1, const void *data2) 259static inline int batadv_compare_eth(const void *data1, const void *data2)
259{ 260{
diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c
index 2f0bd3ffe6e8..0439395d7ba5 100644
--- a/net/batman-adv/routing.c
+++ b/net/batman-adv/routing.c
@@ -775,7 +775,7 @@ static int batadv_route_unicast_packet(struct sk_buff *skb,
775 struct batadv_neigh_node *neigh_node = NULL; 775 struct batadv_neigh_node *neigh_node = NULL;
776 struct batadv_unicast_packet *unicast_packet; 776 struct batadv_unicast_packet *unicast_packet;
777 struct ethhdr *ethhdr = eth_hdr(skb); 777 struct ethhdr *ethhdr = eth_hdr(skb);
778 int res, ret = NET_RX_DROP; 778 int res, hdr_len, ret = NET_RX_DROP;
779 struct sk_buff *new_skb; 779 struct sk_buff *new_skb;
780 780
781 unicast_packet = (struct batadv_unicast_packet *)skb->data; 781 unicast_packet = (struct batadv_unicast_packet *)skb->data;
@@ -835,6 +835,22 @@ static int batadv_route_unicast_packet(struct sk_buff *skb,
835 /* decrement ttl */ 835 /* decrement ttl */
836 unicast_packet->header.ttl--; 836 unicast_packet->header.ttl--;
837 837
838 switch (unicast_packet->header.packet_type) {
839 case BATADV_UNICAST_4ADDR:
840 hdr_len = sizeof(struct batadv_unicast_4addr_packet);
841 break;
842 case BATADV_UNICAST:
843 hdr_len = sizeof(struct batadv_unicast_packet);
844 break;
845 default:
846 /* other packet types not supported - yet */
847 hdr_len = -1;
848 break;
849 }
850
851 if (hdr_len > 0)
852 batadv_skb_set_priority(skb, hdr_len);
853
838 res = batadv_send_skb_to_orig(skb, orig_node, recv_if); 854 res = batadv_send_skb_to_orig(skb, orig_node, recv_if);
839 855
840 /* translate transmit result into receive result */ 856 /* translate transmit result into receive result */
@@ -1193,6 +1209,8 @@ int batadv_recv_bcast_packet(struct sk_buff *skb,
1193 if (batadv_bla_check_bcast_duplist(bat_priv, skb)) 1209 if (batadv_bla_check_bcast_duplist(bat_priv, skb))
1194 goto out; 1210 goto out;
1195 1211
1212 batadv_skb_set_priority(skb, sizeof(struct batadv_bcast_packet));
1213
1196 /* rebroadcast packet */ 1214 /* rebroadcast packet */
1197 batadv_add_bcast_packet_to_list(bat_priv, skb, 1); 1215 batadv_add_bcast_packet_to_list(bat_priv, skb, 1);
1198 1216
diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c
index e9ff8d801201..0266edd0fa7f 100644
--- a/net/batman-adv/send.c
+++ b/net/batman-adv/send.c
@@ -67,7 +67,6 @@ int batadv_send_skb_packet(struct sk_buff *skb,
67 ethhdr->h_proto = __constant_htons(ETH_P_BATMAN); 67 ethhdr->h_proto = __constant_htons(ETH_P_BATMAN);
68 68
69 skb_set_network_header(skb, ETH_HLEN); 69 skb_set_network_header(skb, ETH_HLEN);
70 skb->priority = TC_PRIO_CONTROL;
71 skb->protocol = __constant_htons(ETH_P_BATMAN); 70 skb->protocol = __constant_htons(ETH_P_BATMAN);
72 71
73 skb->dev = hard_iface->net_dev; 72 skb->dev = hard_iface->net_dev;
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index 0f04e1c302b4..4493913f0d5c 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -229,6 +229,8 @@ static int batadv_interface_tx(struct sk_buff *skb,
229 */ 229 */
230 } 230 }
231 231
232 batadv_skb_set_priority(skb, 0);
233
232 /* ethernet packet should be broadcasted */ 234 /* ethernet packet should be broadcasted */
233 if (do_bcast) { 235 if (do_bcast) {
234 primary_if = batadv_primary_if_get_selected(bat_priv); 236 primary_if = batadv_primary_if_get_selected(bat_priv);
diff --git a/net/batman-adv/sysfs.c b/net/batman-adv/sysfs.c
index 929e304dacb2..4114b961bc2c 100644
--- a/net/batman-adv/sysfs.c
+++ b/net/batman-adv/sysfs.c
@@ -385,6 +385,10 @@ static ssize_t batadv_store_gw_mode(struct kobject *kobj,
385 curr_gw_mode_str, buff); 385 curr_gw_mode_str, buff);
386 386
387 batadv_gw_deselect(bat_priv); 387 batadv_gw_deselect(bat_priv);
388 /* always call batadv_gw_check_client_stop() before changing the gateway
389 * state
390 */
391 batadv_gw_check_client_stop(bat_priv);
388 atomic_set(&bat_priv->gw_mode, (unsigned int)gw_mode_tmp); 392 atomic_set(&bat_priv->gw_mode, (unsigned int)gw_mode_tmp);
389 return count; 393 return count;
390} 394}
diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
index 429aeef3d8b2..34510f38708f 100644
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@ -1626,6 +1626,7 @@ batadv_tt_response_fill_table(uint16_t tt_len, uint8_t ttvn,
1626 if (!skb) 1626 if (!skb)
1627 goto out; 1627 goto out;
1628 1628
1629 skb->priority = TC_PRIO_CONTROL;
1629 skb_reserve(skb, ETH_HLEN); 1630 skb_reserve(skb, ETH_HLEN);
1630 tt_response = (struct batadv_tt_query_packet *)skb_put(skb, len); 1631 tt_response = (struct batadv_tt_query_packet *)skb_put(skb, len);
1631 tt_response->ttvn = ttvn; 1632 tt_response->ttvn = ttvn;
@@ -1691,6 +1692,7 @@ static int batadv_send_tt_request(struct batadv_priv *bat_priv,
1691 if (!skb) 1692 if (!skb)
1692 goto out; 1693 goto out;
1693 1694
1695 skb->priority = TC_PRIO_CONTROL;
1694 skb_reserve(skb, ETH_HLEN); 1696 skb_reserve(skb, ETH_HLEN);
1695 1697
1696 tt_req_len = sizeof(*tt_request); 1698 tt_req_len = sizeof(*tt_request);
@@ -1788,6 +1790,7 @@ batadv_send_other_tt_response(struct batadv_priv *bat_priv,
1788 if (!skb) 1790 if (!skb)
1789 goto unlock; 1791 goto unlock;
1790 1792
1793 skb->priority = TC_PRIO_CONTROL;
1791 skb_reserve(skb, ETH_HLEN); 1794 skb_reserve(skb, ETH_HLEN);
1792 packet_pos = skb_put(skb, len); 1795 packet_pos = skb_put(skb, len);
1793 tt_response = (struct batadv_tt_query_packet *)packet_pos; 1796 tt_response = (struct batadv_tt_query_packet *)packet_pos;
@@ -1906,6 +1909,7 @@ batadv_send_my_tt_response(struct batadv_priv *bat_priv,
1906 if (!skb) 1909 if (!skb)
1907 goto unlock; 1910 goto unlock;
1908 1911
1912 skb->priority = TC_PRIO_CONTROL;
1909 skb_reserve(skb, ETH_HLEN); 1913 skb_reserve(skb, ETH_HLEN);
1910 packet_pos = skb_put(skb, len); 1914 packet_pos = skb_put(skb, len);
1911 tt_response = (struct batadv_tt_query_packet *)packet_pos; 1915 tt_response = (struct batadv_tt_query_packet *)packet_pos;
@@ -2240,6 +2244,7 @@ static void batadv_send_roam_adv(struct batadv_priv *bat_priv, uint8_t *client,
2240 if (!skb) 2244 if (!skb)
2241 goto out; 2245 goto out;
2242 2246
2247 skb->priority = TC_PRIO_CONTROL;
2243 skb_reserve(skb, ETH_HLEN); 2248 skb_reserve(skb, ETH_HLEN);
2244 2249
2245 roam_adv_packet = (struct batadv_roam_adv_packet *)skb_put(skb, len); 2250 roam_adv_packet = (struct batadv_roam_adv_packet *)skb_put(skb, len);
diff --git a/net/batman-adv/unicast.c b/net/batman-adv/unicast.c
index 857e1b8349ee..48b31d33ce6b 100644
--- a/net/batman-adv/unicast.c
+++ b/net/batman-adv/unicast.c
@@ -242,6 +242,8 @@ int batadv_frag_send_skb(struct sk_buff *skb, struct batadv_priv *bat_priv,
242 frag_skb = dev_alloc_skb(data_len - (data_len / 2) + ucf_hdr_len); 242 frag_skb = dev_alloc_skb(data_len - (data_len / 2) + ucf_hdr_len);
243 if (!frag_skb) 243 if (!frag_skb)
244 goto dropped; 244 goto dropped;
245
246 skb->priority = TC_PRIO_CONTROL;
245 skb_reserve(frag_skb, ucf_hdr_len); 247 skb_reserve(frag_skb, ucf_hdr_len);
246 248
247 unicast_packet = (struct batadv_unicast_packet *)skb->data; 249 unicast_packet = (struct batadv_unicast_packet *)skb->data;
diff --git a/net/batman-adv/vis.c b/net/batman-adv/vis.c
index 4983340f1943..d8ea31a58457 100644
--- a/net/batman-adv/vis.c
+++ b/net/batman-adv/vis.c
@@ -397,6 +397,7 @@ batadv_add_packet(struct batadv_priv *bat_priv,
397 kfree(info); 397 kfree(info);
398 return NULL; 398 return NULL;
399 } 399 }
400 info->skb_packet->priority = TC_PRIO_CONTROL;
400 skb_reserve(info->skb_packet, ETH_HLEN); 401 skb_reserve(info->skb_packet, ETH_HLEN);
401 packet = (struct batadv_vis_packet *)skb_put(info->skb_packet, len); 402 packet = (struct batadv_vis_packet *)skb_put(info->skb_packet, len);
402 403
@@ -861,6 +862,7 @@ int batadv_vis_init(struct batadv_priv *bat_priv)
861 if (!bat_priv->vis.my_info->skb_packet) 862 if (!bat_priv->vis.my_info->skb_packet)
862 goto free_info; 863 goto free_info;
863 864
865 bat_priv->vis.my_info->skb_packet->priority = TC_PRIO_CONTROL;
864 skb_reserve(bat_priv->vis.my_info->skb_packet, ETH_HLEN); 866 skb_reserve(bat_priv->vis.my_info->skb_packet, ETH_HLEN);
865 tmp_skb = bat_priv->vis.my_info->skb_packet; 867 tmp_skb = bat_priv->vis.my_info->skb_packet;
866 packet = (struct batadv_vis_packet *)skb_put(tmp_skb, sizeof(*packet)); 868 packet = (struct batadv_vis_packet *)skb_put(tmp_skb, sizeof(*packet));
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index 6c7f36379722..f0817121ec5e 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -31,6 +31,24 @@
31#include <net/bluetooth/a2mp.h> 31#include <net/bluetooth/a2mp.h>
32#include <net/bluetooth/smp.h> 32#include <net/bluetooth/smp.h>
33 33
34struct sco_param {
35 u16 pkt_type;
36 u16 max_latency;
37};
38
39static const struct sco_param sco_param_cvsd[] = {
40 { EDR_ESCO_MASK & ~ESCO_2EV3, 0x000a }, /* S3 */
41 { EDR_ESCO_MASK & ~ESCO_2EV3, 0x0007 }, /* S2 */
42 { EDR_ESCO_MASK | ESCO_EV3, 0x0007 }, /* S1 */
43 { EDR_ESCO_MASK | ESCO_HV3, 0xffff }, /* D1 */
44 { EDR_ESCO_MASK | ESCO_HV1, 0xffff }, /* D0 */
45};
46
47static const struct sco_param sco_param_wideband[] = {
48 { EDR_ESCO_MASK & ~ESCO_2EV3, 0x000d }, /* T2 */
49 { EDR_ESCO_MASK | ESCO_EV3, 0x0008 }, /* T1 */
50};
51
34static void hci_le_create_connection(struct hci_conn *conn) 52static void hci_le_create_connection(struct hci_conn *conn)
35{ 53{
36 struct hci_dev *hdev = conn->hdev; 54 struct hci_dev *hdev = conn->hdev;
@@ -172,10 +190,11 @@ static void hci_add_sco(struct hci_conn *conn, __u16 handle)
172 hci_send_cmd(hdev, HCI_OP_ADD_SCO, sizeof(cp), &cp); 190 hci_send_cmd(hdev, HCI_OP_ADD_SCO, sizeof(cp), &cp);
173} 191}
174 192
175void hci_setup_sync(struct hci_conn *conn, __u16 handle) 193bool hci_setup_sync(struct hci_conn *conn, __u16 handle)
176{ 194{
177 struct hci_dev *hdev = conn->hdev; 195 struct hci_dev *hdev = conn->hdev;
178 struct hci_cp_setup_sync_conn cp; 196 struct hci_cp_setup_sync_conn cp;
197 const struct sco_param *param;
179 198
180 BT_DBG("hcon %p", conn); 199 BT_DBG("hcon %p", conn);
181 200
@@ -185,15 +204,35 @@ void hci_setup_sync(struct hci_conn *conn, __u16 handle)
185 conn->attempt++; 204 conn->attempt++;
186 205
187 cp.handle = cpu_to_le16(handle); 206 cp.handle = cpu_to_le16(handle);
188 cp.pkt_type = cpu_to_le16(conn->pkt_type);
189 207
190 cp.tx_bandwidth = __constant_cpu_to_le32(0x00001f40); 208 cp.tx_bandwidth = __constant_cpu_to_le32(0x00001f40);
191 cp.rx_bandwidth = __constant_cpu_to_le32(0x00001f40); 209 cp.rx_bandwidth = __constant_cpu_to_le32(0x00001f40);
192 cp.max_latency = __constant_cpu_to_le16(0xffff); 210 cp.voice_setting = cpu_to_le16(conn->setting);
193 cp.voice_setting = cpu_to_le16(hdev->voice_setting); 211
194 cp.retrans_effort = 0xff; 212 switch (conn->setting & SCO_AIRMODE_MASK) {
213 case SCO_AIRMODE_TRANSP:
214 if (conn->attempt > ARRAY_SIZE(sco_param_wideband))
215 return false;
216 cp.retrans_effort = 0x02;
217 param = &sco_param_wideband[conn->attempt - 1];
218 break;
219 case SCO_AIRMODE_CVSD:
220 if (conn->attempt > ARRAY_SIZE(sco_param_cvsd))
221 return false;
222 cp.retrans_effort = 0x01;
223 param = &sco_param_cvsd[conn->attempt - 1];
224 break;
225 default:
226 return false;
227 }
195 228
196 hci_send_cmd(hdev, HCI_OP_SETUP_SYNC_CONN, sizeof(cp), &cp); 229 cp.pkt_type = __cpu_to_le16(param->pkt_type);
230 cp.max_latency = __cpu_to_le16(param->max_latency);
231
232 if (hci_send_cmd(hdev, HCI_OP_SETUP_SYNC_CONN, sizeof(cp), &cp) < 0)
233 return false;
234
235 return true;
197} 236}
198 237
199void hci_le_conn_update(struct hci_conn *conn, u16 min, u16 max, 238void hci_le_conn_update(struct hci_conn *conn, u16 min, u16 max,
@@ -560,13 +599,13 @@ static struct hci_conn *hci_connect_acl(struct hci_dev *hdev, bdaddr_t *dst,
560 return acl; 599 return acl;
561} 600}
562 601
563static struct hci_conn *hci_connect_sco(struct hci_dev *hdev, int type, 602struct hci_conn *hci_connect_sco(struct hci_dev *hdev, int type, bdaddr_t *dst,
564 bdaddr_t *dst, u8 sec_level, u8 auth_type) 603 __u16 setting)
565{ 604{
566 struct hci_conn *acl; 605 struct hci_conn *acl;
567 struct hci_conn *sco; 606 struct hci_conn *sco;
568 607
569 acl = hci_connect_acl(hdev, dst, sec_level, auth_type); 608 acl = hci_connect_acl(hdev, dst, BT_SECURITY_LOW, HCI_AT_NO_BONDING);
570 if (IS_ERR(acl)) 609 if (IS_ERR(acl))
571 return acl; 610 return acl;
572 611
@@ -584,6 +623,8 @@ static struct hci_conn *hci_connect_sco(struct hci_dev *hdev, int type,
584 623
585 hci_conn_hold(sco); 624 hci_conn_hold(sco);
586 625
626 sco->setting = setting;
627
587 if (acl->state == BT_CONNECTED && 628 if (acl->state == BT_CONNECTED &&
588 (sco->state == BT_OPEN || sco->state == BT_CLOSED)) { 629 (sco->state == BT_OPEN || sco->state == BT_CLOSED)) {
589 set_bit(HCI_CONN_POWER_SAVE, &acl->flags); 630 set_bit(HCI_CONN_POWER_SAVE, &acl->flags);
@@ -612,9 +653,6 @@ struct hci_conn *hci_connect(struct hci_dev *hdev, int type, bdaddr_t *dst,
612 return hci_connect_le(hdev, dst, dst_type, sec_level, auth_type); 653 return hci_connect_le(hdev, dst, dst_type, sec_level, auth_type);
613 case ACL_LINK: 654 case ACL_LINK:
614 return hci_connect_acl(hdev, dst, sec_level, auth_type); 655 return hci_connect_acl(hdev, dst, sec_level, auth_type);
615 case SCO_LINK:
616 case ESCO_LINK:
617 return hci_connect_sco(hdev, type, dst, sec_level, auth_type);
618 } 656 }
619 657
620 return ERR_PTR(-EINVAL); 658 return ERR_PTR(-EINVAL);
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index cc27297da5a9..634debab4d54 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -454,6 +454,18 @@ static void hci_setup_event_mask(struct hci_request *req)
454 events[4] |= 0x04; /* Read Remote Extended Features Complete */ 454 events[4] |= 0x04; /* Read Remote Extended Features Complete */
455 events[5] |= 0x08; /* Synchronous Connection Complete */ 455 events[5] |= 0x08; /* Synchronous Connection Complete */
456 events[5] |= 0x10; /* Synchronous Connection Changed */ 456 events[5] |= 0x10; /* Synchronous Connection Changed */
457 } else {
458 /* Use a different default for LE-only devices */
459 memset(events, 0, sizeof(events));
460 events[0] |= 0x10; /* Disconnection Complete */
461 events[0] |= 0x80; /* Encryption Change */
462 events[1] |= 0x08; /* Read Remote Version Information Complete */
463 events[1] |= 0x20; /* Command Complete */
464 events[1] |= 0x40; /* Command Status */
465 events[1] |= 0x80; /* Hardware Error */
466 events[2] |= 0x04; /* Number of Completed Packets */
467 events[3] |= 0x02; /* Data Buffer Overflow */
468 events[5] |= 0x80; /* Encryption Key Refresh Complete */
457 } 469 }
458 470
459 if (lmp_inq_rssi_capable(hdev)) 471 if (lmp_inq_rssi_capable(hdev))
@@ -608,7 +620,7 @@ static void hci_init3_req(struct hci_request *req, unsigned long opt)
608 * as supported send it. If not supported assume that the controller 620 * as supported send it. If not supported assume that the controller
609 * does not have actual support for stored link keys which makes this 621 * does not have actual support for stored link keys which makes this
610 * command redundant anyway. 622 * command redundant anyway.
611 */ 623 */
612 if (hdev->commands[6] & 0x80) { 624 if (hdev->commands[6] & 0x80) {
613 struct hci_cp_delete_stored_link_key cp; 625 struct hci_cp_delete_stored_link_key cp;
614 626
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 0437200d92f4..94aab73f89d4 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -2904,15 +2904,16 @@ static void hci_sync_conn_complete_evt(struct hci_dev *hdev,
2904 hci_conn_add_sysfs(conn); 2904 hci_conn_add_sysfs(conn);
2905 break; 2905 break;
2906 2906
2907 case 0x0d: /* Connection Rejected due to Limited Resources */
2907 case 0x11: /* Unsupported Feature or Parameter Value */ 2908 case 0x11: /* Unsupported Feature or Parameter Value */
2908 case 0x1c: /* SCO interval rejected */ 2909 case 0x1c: /* SCO interval rejected */
2909 case 0x1a: /* Unsupported Remote Feature */ 2910 case 0x1a: /* Unsupported Remote Feature */
2910 case 0x1f: /* Unspecified error */ 2911 case 0x1f: /* Unspecified error */
2911 if (conn->out && conn->attempt < 2) { 2912 if (conn->out) {
2912 conn->pkt_type = (hdev->esco_type & SCO_ESCO_MASK) | 2913 conn->pkt_type = (hdev->esco_type & SCO_ESCO_MASK) |
2913 (hdev->esco_type & EDR_ESCO_MASK); 2914 (hdev->esco_type & EDR_ESCO_MASK);
2914 hci_setup_sync(conn, conn->link->handle); 2915 if (hci_setup_sync(conn, conn->link->handle))
2915 goto unlock; 2916 goto unlock;
2916 } 2917 }
2917 /* fall through */ 2918 /* fall through */
2918 2919
@@ -3024,17 +3025,20 @@ unlock:
3024static u8 hci_get_auth_req(struct hci_conn *conn) 3025static u8 hci_get_auth_req(struct hci_conn *conn)
3025{ 3026{
3026 /* If remote requests dedicated bonding follow that lead */ 3027 /* If remote requests dedicated bonding follow that lead */
3027 if (conn->remote_auth == 0x02 || conn->remote_auth == 0x03) { 3028 if (conn->remote_auth == HCI_AT_DEDICATED_BONDING ||
3029 conn->remote_auth == HCI_AT_DEDICATED_BONDING_MITM) {
3028 /* If both remote and local IO capabilities allow MITM 3030 /* If both remote and local IO capabilities allow MITM
3029 * protection then require it, otherwise don't */ 3031 * protection then require it, otherwise don't */
3030 if (conn->remote_cap == 0x03 || conn->io_capability == 0x03) 3032 if (conn->remote_cap == HCI_IO_NO_INPUT_OUTPUT ||
3031 return 0x02; 3033 conn->io_capability == HCI_IO_NO_INPUT_OUTPUT)
3034 return HCI_AT_DEDICATED_BONDING;
3032 else 3035 else
3033 return 0x03; 3036 return HCI_AT_DEDICATED_BONDING_MITM;
3034 } 3037 }
3035 3038
3036 /* If remote requests no-bonding follow that lead */ 3039 /* If remote requests no-bonding follow that lead */
3037 if (conn->remote_auth == 0x00 || conn->remote_auth == 0x01) 3040 if (conn->remote_auth == HCI_AT_NO_BONDING ||
3041 conn->remote_auth == HCI_AT_NO_BONDING_MITM)
3038 return conn->remote_auth | (conn->auth_type & 0x01); 3042 return conn->remote_auth | (conn->auth_type & 0x01);
3039 3043
3040 return conn->auth_type; 3044 return conn->auth_type;
@@ -3066,7 +3070,7 @@ static void hci_io_capa_request_evt(struct hci_dev *hdev, struct sk_buff *skb)
3066 /* Change the IO capability from KeyboardDisplay 3070 /* Change the IO capability from KeyboardDisplay
3067 * to DisplayYesNo as it is not supported by BT spec. */ 3071 * to DisplayYesNo as it is not supported by BT spec. */
3068 cp.capability = (conn->io_capability == 0x04) ? 3072 cp.capability = (conn->io_capability == 0x04) ?
3069 0x01 : conn->io_capability; 3073 HCI_IO_DISPLAY_YESNO : conn->io_capability;
3070 conn->auth_type = hci_get_auth_req(conn); 3074 conn->auth_type = hci_get_auth_req(conn);
3071 cp.authentication = conn->auth_type; 3075 cp.authentication = conn->auth_type;
3072 3076
@@ -3140,7 +3144,8 @@ static void hci_user_confirm_request_evt(struct hci_dev *hdev,
3140 * request. The only exception is when we're dedicated bonding 3144 * request. The only exception is when we're dedicated bonding
3141 * initiators (connect_cfm_cb set) since then we always have the MITM 3145 * initiators (connect_cfm_cb set) since then we always have the MITM
3142 * bit set. */ 3146 * bit set. */
3143 if (!conn->connect_cfm_cb && loc_mitm && conn->remote_cap == 0x03) { 3147 if (!conn->connect_cfm_cb && loc_mitm &&
3148 conn->remote_cap == HCI_IO_NO_INPUT_OUTPUT) {
3144 BT_DBG("Rejecting request: remote device can't provide MITM"); 3149 BT_DBG("Rejecting request: remote device can't provide MITM");
3145 hci_send_cmd(hdev, HCI_OP_USER_CONFIRM_NEG_REPLY, 3150 hci_send_cmd(hdev, HCI_OP_USER_CONFIRM_NEG_REPLY,
3146 sizeof(ev->bdaddr), &ev->bdaddr); 3151 sizeof(ev->bdaddr), &ev->bdaddr);
@@ -3148,8 +3153,8 @@ static void hci_user_confirm_request_evt(struct hci_dev *hdev,
3148 } 3153 }
3149 3154
3150 /* If no side requires MITM protection; auto-accept */ 3155 /* If no side requires MITM protection; auto-accept */
3151 if ((!loc_mitm || conn->remote_cap == 0x03) && 3156 if ((!loc_mitm || conn->remote_cap == HCI_IO_NO_INPUT_OUTPUT) &&
3152 (!rem_mitm || conn->io_capability == 0x03)) { 3157 (!rem_mitm || conn->io_capability == HCI_IO_NO_INPUT_OUTPUT)) {
3153 3158
3154 /* If we're not the initiators request authorization to 3159 /* If we're not the initiators request authorization to
3155 * proceed from user space (mgmt_user_confirm with 3160 * proceed from user space (mgmt_user_confirm with
diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c
index 7ad6ecf36f20..edf623a29043 100644
--- a/net/bluetooth/hci_sysfs.c
+++ b/net/bluetooth/hci_sysfs.c
@@ -590,7 +590,7 @@ int __init bt_sysfs_init(void)
590 590
591 bt_class = class_create(THIS_MODULE, "bluetooth"); 591 bt_class = class_create(THIS_MODULE, "bluetooth");
592 592
593 return PTR_RET(bt_class); 593 return PTR_ERR_OR_ZERO(bt_class);
594} 594}
595 595
596void bt_sysfs_cleanup(void) 596void bt_sysfs_cleanup(void)
diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c
index 0c699cdc3696..bdc35a7a7fee 100644
--- a/net/bluetooth/hidp/core.c
+++ b/net/bluetooth/hidp/core.c
@@ -225,17 +225,47 @@ static void hidp_input_report(struct hidp_session *session, struct sk_buff *skb)
225 225
226static int hidp_send_report(struct hidp_session *session, struct hid_report *report) 226static int hidp_send_report(struct hidp_session *session, struct hid_report *report)
227{ 227{
228 unsigned char buf[32], hdr; 228 unsigned char hdr;
229 int rsize; 229 u8 *buf;
230 int rsize, ret;
230 231
231 rsize = ((report->size - 1) >> 3) + 1 + (report->id > 0); 232 buf = hid_alloc_report_buf(report, GFP_ATOMIC);
232 if (rsize > sizeof(buf)) 233 if (!buf)
233 return -EIO; 234 return -EIO;
234 235
235 hid_output_report(report, buf); 236 hid_output_report(report, buf);
236 hdr = HIDP_TRANS_DATA | HIDP_DATA_RTYPE_OUPUT; 237 hdr = HIDP_TRANS_DATA | HIDP_DATA_RTYPE_OUPUT;
237 238
238 return hidp_send_intr_message(session, hdr, buf, rsize); 239 rsize = ((report->size - 1) >> 3) + 1 + (report->id > 0);
240 ret = hidp_send_intr_message(session, hdr, buf, rsize);
241
242 kfree(buf);
243 return ret;
244}
245
246static int hidp_hidinput_event(struct input_dev *dev, unsigned int type,
247 unsigned int code, int value)
248{
249 struct hid_device *hid = input_get_drvdata(dev);
250 struct hidp_session *session = hid->driver_data;
251 struct hid_field *field;
252 int offset;
253
254 BT_DBG("session %p type %d code %d value %d",
255 session, type, code, value);
256
257 if (type != EV_LED)
258 return -1;
259
260 offset = hidinput_find_field(hid, type, code, &field);
261 if (offset == -1) {
262 hid_warn(dev, "event field not found\n");
263 return -1;
264 }
265
266 hid_set_field(field, offset, value);
267
268 return hidp_send_report(session, field->report);
239} 269}
240 270
241static int hidp_get_raw_report(struct hid_device *hid, 271static int hidp_get_raw_report(struct hid_device *hid,
@@ -678,20 +708,6 @@ static int hidp_parse(struct hid_device *hid)
678 708
679static int hidp_start(struct hid_device *hid) 709static int hidp_start(struct hid_device *hid)
680{ 710{
681 struct hidp_session *session = hid->driver_data;
682 struct hid_report *report;
683
684 if (hid->quirks & HID_QUIRK_NO_INIT_REPORTS)
685 return 0;
686
687 list_for_each_entry(report, &hid->report_enum[HID_INPUT_REPORT].
688 report_list, list)
689 hidp_send_report(session, report);
690
691 list_for_each_entry(report, &hid->report_enum[HID_FEATURE_REPORT].
692 report_list, list)
693 hidp_send_report(session, report);
694
695 return 0; 711 return 0;
696} 712}
697 713
@@ -711,6 +727,7 @@ static struct hid_ll_driver hidp_hid_driver = {
711 .stop = hidp_stop, 727 .stop = hidp_stop,
712 .open = hidp_open, 728 .open = hidp_open,
713 .close = hidp_close, 729 .close = hidp_close,
730 .hidinput_input_event = hidp_hidinput_event,
714}; 731};
715 732
716/* This function sets up the hid device. It does not add it 733/* This function sets up the hid device. It does not add it
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index 8c3499bec893..b3bb7bca8e60 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -1415,8 +1415,9 @@ static void l2cap_conn_ready(struct l2cap_conn *conn)
1415 sk->sk_state_change(sk); 1415 sk->sk_state_change(sk);
1416 release_sock(sk); 1416 release_sock(sk);
1417 1417
1418 } else if (chan->state == BT_CONNECT) 1418 } else if (chan->state == BT_CONNECT) {
1419 l2cap_do_start(chan); 1419 l2cap_do_start(chan);
1420 }
1420 1421
1421 l2cap_chan_unlock(chan); 1422 l2cap_chan_unlock(chan);
1422 } 1423 }
diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c
index b6e44ad6cca6..6d126faf145f 100644
--- a/net/bluetooth/rfcomm/tty.c
+++ b/net/bluetooth/rfcomm/tty.c
@@ -58,7 +58,6 @@ struct rfcomm_dev {
58 uint modem_status; 58 uint modem_status;
59 59
60 struct rfcomm_dlc *dlc; 60 struct rfcomm_dlc *dlc;
61 wait_queue_head_t wait;
62 61
63 struct device *tty_dev; 62 struct device *tty_dev;
64 63
@@ -76,13 +75,6 @@ static void rfcomm_dev_modem_status(struct rfcomm_dlc *dlc, u8 v24_sig);
76 75
77/* ---- Device functions ---- */ 76/* ---- Device functions ---- */
78 77
79/*
80 * The reason this isn't actually a race, as you no doubt have a little voice
81 * screaming at you in your head, is that the refcount should never actually
82 * reach zero unless the device has already been taken off the list, in
83 * rfcomm_dev_del(). And if that's not true, we'll hit the BUG() in
84 * rfcomm_dev_destruct() anyway.
85 */
86static void rfcomm_dev_destruct(struct tty_port *port) 78static void rfcomm_dev_destruct(struct tty_port *port)
87{ 79{
88 struct rfcomm_dev *dev = container_of(port, struct rfcomm_dev, port); 80 struct rfcomm_dev *dev = container_of(port, struct rfcomm_dev, port);
@@ -90,10 +82,9 @@ static void rfcomm_dev_destruct(struct tty_port *port)
90 82
91 BT_DBG("dev %p dlc %p", dev, dlc); 83 BT_DBG("dev %p dlc %p", dev, dlc);
92 84
93 /* Refcount should only hit zero when called from rfcomm_dev_del() 85 spin_lock(&rfcomm_dev_lock);
94 which will have taken us off the list. Everything else are 86 list_del(&dev->list);
95 refcounting bugs. */ 87 spin_unlock(&rfcomm_dev_lock);
96 BUG_ON(!list_empty(&dev->list));
97 88
98 rfcomm_dlc_lock(dlc); 89 rfcomm_dlc_lock(dlc);
99 /* Detach DLC if it's owned by this dev */ 90 /* Detach DLC if it's owned by this dev */
@@ -112,8 +103,39 @@ static void rfcomm_dev_destruct(struct tty_port *port)
112 module_put(THIS_MODULE); 103 module_put(THIS_MODULE);
113} 104}
114 105
106/* device-specific initialization: open the dlc */
107static int rfcomm_dev_activate(struct tty_port *port, struct tty_struct *tty)
108{
109 struct rfcomm_dev *dev = container_of(port, struct rfcomm_dev, port);
110
111 return rfcomm_dlc_open(dev->dlc, &dev->src, &dev->dst, dev->channel);
112}
113
114/* we block the open until the dlc->state becomes BT_CONNECTED */
115static int rfcomm_dev_carrier_raised(struct tty_port *port)
116{
117 struct rfcomm_dev *dev = container_of(port, struct rfcomm_dev, port);
118
119 return (dev->dlc->state == BT_CONNECTED);
120}
121
122/* device-specific cleanup: close the dlc */
123static void rfcomm_dev_shutdown(struct tty_port *port)
124{
125 struct rfcomm_dev *dev = container_of(port, struct rfcomm_dev, port);
126
127 if (dev->tty_dev->parent)
128 device_move(dev->tty_dev, NULL, DPM_ORDER_DEV_LAST);
129
130 /* close the dlc */
131 rfcomm_dlc_close(dev->dlc, 0);
132}
133
115static const struct tty_port_operations rfcomm_port_ops = { 134static const struct tty_port_operations rfcomm_port_ops = {
116 .destruct = rfcomm_dev_destruct, 135 .destruct = rfcomm_dev_destruct,
136 .activate = rfcomm_dev_activate,
137 .shutdown = rfcomm_dev_shutdown,
138 .carrier_raised = rfcomm_dev_carrier_raised,
117}; 139};
118 140
119static struct rfcomm_dev *__rfcomm_dev_get(int id) 141static struct rfcomm_dev *__rfcomm_dev_get(int id)
@@ -236,7 +258,6 @@ static int rfcomm_dev_add(struct rfcomm_dev_req *req, struct rfcomm_dlc *dlc)
236 258
237 tty_port_init(&dev->port); 259 tty_port_init(&dev->port);
238 dev->port.ops = &rfcomm_port_ops; 260 dev->port.ops = &rfcomm_port_ops;
239 init_waitqueue_head(&dev->wait);
240 261
241 skb_queue_head_init(&dev->pending); 262 skb_queue_head_init(&dev->pending);
242 263
@@ -282,7 +303,9 @@ out:
282 dev->id, NULL); 303 dev->id, NULL);
283 if (IS_ERR(dev->tty_dev)) { 304 if (IS_ERR(dev->tty_dev)) {
284 err = PTR_ERR(dev->tty_dev); 305 err = PTR_ERR(dev->tty_dev);
306 spin_lock(&rfcomm_dev_lock);
285 list_del(&dev->list); 307 list_del(&dev->list);
308 spin_unlock(&rfcomm_dev_lock);
286 goto free; 309 goto free;
287 } 310 }
288 311
@@ -301,27 +324,6 @@ free:
301 return err; 324 return err;
302} 325}
303 326
304static void rfcomm_dev_del(struct rfcomm_dev *dev)
305{
306 unsigned long flags;
307 BT_DBG("dev %p", dev);
308
309 BUG_ON(test_and_set_bit(RFCOMM_TTY_RELEASED, &dev->flags));
310
311 spin_lock_irqsave(&dev->port.lock, flags);
312 if (dev->port.count > 0) {
313 spin_unlock_irqrestore(&dev->port.lock, flags);
314 return;
315 }
316 spin_unlock_irqrestore(&dev->port.lock, flags);
317
318 spin_lock(&rfcomm_dev_lock);
319 list_del_init(&dev->list);
320 spin_unlock(&rfcomm_dev_lock);
321
322 tty_port_put(&dev->port);
323}
324
325/* ---- Send buffer ---- */ 327/* ---- Send buffer ---- */
326static inline unsigned int rfcomm_room(struct rfcomm_dlc *dlc) 328static inline unsigned int rfcomm_room(struct rfcomm_dlc *dlc)
327{ 329{
@@ -333,10 +335,9 @@ static inline unsigned int rfcomm_room(struct rfcomm_dlc *dlc)
333static void rfcomm_wfree(struct sk_buff *skb) 335static void rfcomm_wfree(struct sk_buff *skb)
334{ 336{
335 struct rfcomm_dev *dev = (void *) skb->sk; 337 struct rfcomm_dev *dev = (void *) skb->sk;
336 struct tty_struct *tty = dev->port.tty;
337 atomic_sub(skb->truesize, &dev->wmem_alloc); 338 atomic_sub(skb->truesize, &dev->wmem_alloc);
338 if (test_bit(RFCOMM_TTY_ATTACHED, &dev->flags) && tty) 339 if (test_bit(RFCOMM_TTY_ATTACHED, &dev->flags))
339 tty_wakeup(tty); 340 tty_port_tty_wakeup(&dev->port);
340 tty_port_put(&dev->port); 341 tty_port_put(&dev->port);
341} 342}
342 343
@@ -410,6 +411,7 @@ static int rfcomm_release_dev(void __user *arg)
410{ 411{
411 struct rfcomm_dev_req req; 412 struct rfcomm_dev_req req;
412 struct rfcomm_dev *dev; 413 struct rfcomm_dev *dev;
414 struct tty_struct *tty;
413 415
414 if (copy_from_user(&req, arg, sizeof(req))) 416 if (copy_from_user(&req, arg, sizeof(req)))
415 return -EFAULT; 417 return -EFAULT;
@@ -429,11 +431,15 @@ static int rfcomm_release_dev(void __user *arg)
429 rfcomm_dlc_close(dev->dlc, 0); 431 rfcomm_dlc_close(dev->dlc, 0);
430 432
431 /* Shut down TTY synchronously before freeing rfcomm_dev */ 433 /* Shut down TTY synchronously before freeing rfcomm_dev */
432 if (dev->port.tty) 434 tty = tty_port_tty_get(&dev->port);
433 tty_vhangup(dev->port.tty); 435 if (tty) {
436 tty_vhangup(tty);
437 tty_kref_put(tty);
438 }
439
440 if (!test_and_set_bit(RFCOMM_TTY_RELEASED, &dev->flags))
441 tty_port_put(&dev->port);
434 442
435 if (!test_bit(RFCOMM_RELEASE_ONHUP, &dev->flags))
436 rfcomm_dev_del(dev);
437 tty_port_put(&dev->port); 443 tty_port_put(&dev->port);
438 return 0; 444 return 0;
439} 445}
@@ -563,16 +569,21 @@ static void rfcomm_dev_data_ready(struct rfcomm_dlc *dlc, struct sk_buff *skb)
563static void rfcomm_dev_state_change(struct rfcomm_dlc *dlc, int err) 569static void rfcomm_dev_state_change(struct rfcomm_dlc *dlc, int err)
564{ 570{
565 struct rfcomm_dev *dev = dlc->owner; 571 struct rfcomm_dev *dev = dlc->owner;
572 struct tty_struct *tty;
566 if (!dev) 573 if (!dev)
567 return; 574 return;
568 575
569 BT_DBG("dlc %p dev %p err %d", dlc, dev, err); 576 BT_DBG("dlc %p dev %p err %d", dlc, dev, err);
570 577
571 dev->err = err; 578 dev->err = err;
572 wake_up_interruptible(&dev->wait); 579 if (dlc->state == BT_CONNECTED) {
580 device_move(dev->tty_dev, rfcomm_get_device(dev),
581 DPM_ORDER_DEV_AFTER_PARENT);
573 582
574 if (dlc->state == BT_CLOSED) { 583 wake_up_interruptible(&dev->port.open_wait);
575 if (!dev->port.tty) { 584 } else if (dlc->state == BT_CLOSED) {
585 tty = tty_port_tty_get(&dev->port);
586 if (!tty) {
576 if (test_bit(RFCOMM_RELEASE_ONHUP, &dev->flags)) { 587 if (test_bit(RFCOMM_RELEASE_ONHUP, &dev->flags)) {
577 /* Drop DLC lock here to avoid deadlock 588 /* Drop DLC lock here to avoid deadlock
578 * 1. rfcomm_dev_get will take rfcomm_dev_lock 589 * 1. rfcomm_dev_get will take rfcomm_dev_lock
@@ -580,6 +591,9 @@ static void rfcomm_dev_state_change(struct rfcomm_dlc *dlc, int err)
580 * rfcomm_dev_lock -> dlc lock 591 * rfcomm_dev_lock -> dlc lock
581 * 2. tty_port_put will deadlock if it's 592 * 2. tty_port_put will deadlock if it's
582 * the last reference 593 * the last reference
594 *
595 * FIXME: when we release the lock anything
596 * could happen to dev, even its destruction
583 */ 597 */
584 rfcomm_dlc_unlock(dlc); 598 rfcomm_dlc_unlock(dlc);
585 if (rfcomm_dev_get(dev->id) == NULL) { 599 if (rfcomm_dev_get(dev->id) == NULL) {
@@ -587,12 +601,17 @@ static void rfcomm_dev_state_change(struct rfcomm_dlc *dlc, int err)
587 return; 601 return;
588 } 602 }
589 603
590 rfcomm_dev_del(dev); 604 if (!test_and_set_bit(RFCOMM_TTY_RELEASED,
605 &dev->flags))
606 tty_port_put(&dev->port);
607
591 tty_port_put(&dev->port); 608 tty_port_put(&dev->port);
592 rfcomm_dlc_lock(dlc); 609 rfcomm_dlc_lock(dlc);
593 } 610 }
594 } else 611 } else {
595 tty_hangup(dev->port.tty); 612 tty_hangup(tty);
613 tty_kref_put(tty);
614 }
596 } 615 }
597} 616}
598 617
@@ -604,10 +623,8 @@ static void rfcomm_dev_modem_status(struct rfcomm_dlc *dlc, u8 v24_sig)
604 623
605 BT_DBG("dlc %p dev %p v24_sig 0x%02x", dlc, dev, v24_sig); 624 BT_DBG("dlc %p dev %p v24_sig 0x%02x", dlc, dev, v24_sig);
606 625
607 if ((dev->modem_status & TIOCM_CD) && !(v24_sig & RFCOMM_V24_DV)) { 626 if ((dev->modem_status & TIOCM_CD) && !(v24_sig & RFCOMM_V24_DV))
608 if (dev->port.tty && !C_CLOCAL(dev->port.tty)) 627 tty_port_tty_hangup(&dev->port, true);
609 tty_hangup(dev->port.tty);
610 }
611 628
612 dev->modem_status = 629 dev->modem_status =
613 ((v24_sig & RFCOMM_V24_RTC) ? (TIOCM_DSR | TIOCM_DTR) : 0) | 630 ((v24_sig & RFCOMM_V24_RTC) ? (TIOCM_DSR | TIOCM_DTR) : 0) |
@@ -638,124 +655,92 @@ static void rfcomm_tty_copy_pending(struct rfcomm_dev *dev)
638 tty_flip_buffer_push(&dev->port); 655 tty_flip_buffer_push(&dev->port);
639} 656}
640 657
641static int rfcomm_tty_open(struct tty_struct *tty, struct file *filp) 658/* do the reverse of install, clearing the tty fields and releasing the
659 * reference to tty_port
660 */
661static void rfcomm_tty_cleanup(struct tty_struct *tty)
642{ 662{
643 DECLARE_WAITQUEUE(wait, current); 663 struct rfcomm_dev *dev = tty->driver_data;
644 struct rfcomm_dev *dev;
645 struct rfcomm_dlc *dlc;
646 unsigned long flags;
647 int err, id;
648 664
649 id = tty->index; 665 clear_bit(RFCOMM_TTY_ATTACHED, &dev->flags);
650 666
651 BT_DBG("tty %p id %d", tty, id); 667 rfcomm_dlc_lock(dev->dlc);
668 tty->driver_data = NULL;
669 rfcomm_dlc_unlock(dev->dlc);
652 670
653 /* We don't leak this refcount. For reasons which are not entirely 671 /*
654 clear, the TTY layer will call our ->close() method even if the 672 * purge the dlc->tx_queue to avoid circular dependencies
655 open fails. We decrease the refcount there, and decreasing it 673 * between dev and dlc
656 here too would cause breakage. */ 674 */
657 dev = rfcomm_dev_get(id); 675 skb_queue_purge(&dev->dlc->tx_queue);
658 if (!dev)
659 return -ENODEV;
660 676
661 BT_DBG("dev %p dst %pMR channel %d opened %d", dev, &dev->dst, 677 tty_port_put(&dev->port);
662 dev->channel, dev->port.count); 678}
663 679
664 spin_lock_irqsave(&dev->port.lock, flags); 680/* we acquire the tty_port reference since it's here the tty is first used
665 if (++dev->port.count > 1) { 681 * by setting the termios. We also populate the driver_data field and install
666 spin_unlock_irqrestore(&dev->port.lock, flags); 682 * the tty port
667 return 0; 683 */
668 } 684static int rfcomm_tty_install(struct tty_driver *driver, struct tty_struct *tty)
669 spin_unlock_irqrestore(&dev->port.lock, flags); 685{
686 struct rfcomm_dev *dev;
687 struct rfcomm_dlc *dlc;
688 int err;
689
690 dev = rfcomm_dev_get(tty->index);
691 if (!dev)
692 return -ENODEV;
670 693
671 dlc = dev->dlc; 694 dlc = dev->dlc;
672 695
673 /* Attach TTY and open DLC */ 696 /* Attach TTY and open DLC */
674
675 rfcomm_dlc_lock(dlc); 697 rfcomm_dlc_lock(dlc);
676 tty->driver_data = dev; 698 tty->driver_data = dev;
677 dev->port.tty = tty;
678 rfcomm_dlc_unlock(dlc); 699 rfcomm_dlc_unlock(dlc);
679 set_bit(RFCOMM_TTY_ATTACHED, &dev->flags); 700 set_bit(RFCOMM_TTY_ATTACHED, &dev->flags);
680 701
681 err = rfcomm_dlc_open(dlc, &dev->src, &dev->dst, dev->channel); 702 /* install the tty_port */
682 if (err < 0) 703 err = tty_port_install(&dev->port, driver, tty);
683 return err; 704 if (err)
684 705 rfcomm_tty_cleanup(tty);
685 /* Wait for DLC to connect */
686 add_wait_queue(&dev->wait, &wait);
687 while (1) {
688 set_current_state(TASK_INTERRUPTIBLE);
689 706
690 if (dlc->state == BT_CLOSED) { 707 return err;
691 err = -dev->err; 708}
692 break;
693 }
694 709
695 if (dlc->state == BT_CONNECTED) 710static int rfcomm_tty_open(struct tty_struct *tty, struct file *filp)
696 break; 711{
712 struct rfcomm_dev *dev = tty->driver_data;
713 int err;
697 714
698 if (signal_pending(current)) { 715 BT_DBG("tty %p id %d", tty, tty->index);
699 err = -EINTR;
700 break;
701 }
702 716
703 tty_unlock(tty); 717 BT_DBG("dev %p dst %pMR channel %d opened %d", dev, &dev->dst,
704 schedule(); 718 dev->channel, dev->port.count);
705 tty_lock(tty);
706 }
707 set_current_state(TASK_RUNNING);
708 remove_wait_queue(&dev->wait, &wait);
709 719
710 if (err == 0) 720 err = tty_port_open(&dev->port, tty, filp);
711 device_move(dev->tty_dev, rfcomm_get_device(dev), 721 if (err)
712 DPM_ORDER_DEV_AFTER_PARENT); 722 return err;
713 723
724 /*
725 * FIXME: rfcomm should use proper flow control for
726 * received data. This hack will be unnecessary and can
727 * be removed when that's implemented
728 */
714 rfcomm_tty_copy_pending(dev); 729 rfcomm_tty_copy_pending(dev);
715 730
716 rfcomm_dlc_unthrottle(dev->dlc); 731 rfcomm_dlc_unthrottle(dev->dlc);
717 732
718 return err; 733 return 0;
719} 734}
720 735
721static void rfcomm_tty_close(struct tty_struct *tty, struct file *filp) 736static void rfcomm_tty_close(struct tty_struct *tty, struct file *filp)
722{ 737{
723 struct rfcomm_dev *dev = (struct rfcomm_dev *) tty->driver_data; 738 struct rfcomm_dev *dev = (struct rfcomm_dev *) tty->driver_data;
724 unsigned long flags;
725
726 if (!dev)
727 return;
728 739
729 BT_DBG("tty %p dev %p dlc %p opened %d", tty, dev, dev->dlc, 740 BT_DBG("tty %p dev %p dlc %p opened %d", tty, dev, dev->dlc,
730 dev->port.count); 741 dev->port.count);
731 742
732 spin_lock_irqsave(&dev->port.lock, flags); 743 tty_port_close(&dev->port, tty, filp);
733 if (!--dev->port.count) {
734 spin_unlock_irqrestore(&dev->port.lock, flags);
735 if (dev->tty_dev->parent)
736 device_move(dev->tty_dev, NULL, DPM_ORDER_DEV_LAST);
737
738 /* Close DLC and dettach TTY */
739 rfcomm_dlc_close(dev->dlc, 0);
740
741 clear_bit(RFCOMM_TTY_ATTACHED, &dev->flags);
742
743 rfcomm_dlc_lock(dev->dlc);
744 tty->driver_data = NULL;
745 dev->port.tty = NULL;
746 rfcomm_dlc_unlock(dev->dlc);
747
748 if (test_bit(RFCOMM_TTY_RELEASED, &dev->flags)) {
749 spin_lock(&rfcomm_dev_lock);
750 list_del_init(&dev->list);
751 spin_unlock(&rfcomm_dev_lock);
752
753 tty_port_put(&dev->port);
754 }
755 } else
756 spin_unlock_irqrestore(&dev->port.lock, flags);
757
758 tty_port_put(&dev->port);
759} 744}
760 745
761static int rfcomm_tty_write(struct tty_struct *tty, const unsigned char *buf, int count) 746static int rfcomm_tty_write(struct tty_struct *tty, const unsigned char *buf, int count)
@@ -1055,17 +1040,11 @@ static void rfcomm_tty_hangup(struct tty_struct *tty)
1055 1040
1056 BT_DBG("tty %p dev %p", tty, dev); 1041 BT_DBG("tty %p dev %p", tty, dev);
1057 1042
1058 if (!dev) 1043 tty_port_hangup(&dev->port);
1059 return;
1060
1061 rfcomm_tty_flush_buffer(tty);
1062 1044
1063 if (test_bit(RFCOMM_RELEASE_ONHUP, &dev->flags)) { 1045 if (test_bit(RFCOMM_RELEASE_ONHUP, &dev->flags) &&
1064 if (rfcomm_dev_get(dev->id) == NULL) 1046 !test_and_set_bit(RFCOMM_TTY_RELEASED, &dev->flags))
1065 return;
1066 rfcomm_dev_del(dev);
1067 tty_port_put(&dev->port); 1047 tty_port_put(&dev->port);
1068 }
1069} 1048}
1070 1049
1071static int rfcomm_tty_tiocmget(struct tty_struct *tty) 1050static int rfcomm_tty_tiocmget(struct tty_struct *tty)
@@ -1128,6 +1107,8 @@ static const struct tty_operations rfcomm_ops = {
1128 .wait_until_sent = rfcomm_tty_wait_until_sent, 1107 .wait_until_sent = rfcomm_tty_wait_until_sent,
1129 .tiocmget = rfcomm_tty_tiocmget, 1108 .tiocmget = rfcomm_tty_tiocmget,
1130 .tiocmset = rfcomm_tty_tiocmset, 1109 .tiocmset = rfcomm_tty_tiocmset,
1110 .install = rfcomm_tty_install,
1111 .cleanup = rfcomm_tty_cleanup,
1131}; 1112};
1132 1113
1133int __init rfcomm_init_ttys(void) 1114int __init rfcomm_init_ttys(void)
@@ -1146,7 +1127,7 @@ int __init rfcomm_init_ttys(void)
1146 rfcomm_tty_driver->subtype = SERIAL_TYPE_NORMAL; 1127 rfcomm_tty_driver->subtype = SERIAL_TYPE_NORMAL;
1147 rfcomm_tty_driver->flags = TTY_DRIVER_REAL_RAW | TTY_DRIVER_DYNAMIC_DEV; 1128 rfcomm_tty_driver->flags = TTY_DRIVER_REAL_RAW | TTY_DRIVER_DYNAMIC_DEV;
1148 rfcomm_tty_driver->init_termios = tty_std_termios; 1129 rfcomm_tty_driver->init_termios = tty_std_termios;
1149 rfcomm_tty_driver->init_termios.c_cflag = B9600 | CS8 | CREAD | HUPCL | CLOCAL; 1130 rfcomm_tty_driver->init_termios.c_cflag = B9600 | CS8 | CREAD | HUPCL;
1150 rfcomm_tty_driver->init_termios.c_lflag &= ~ICANON; 1131 rfcomm_tty_driver->init_termios.c_lflag &= ~ICANON;
1151 tty_set_operations(rfcomm_tty_driver, &rfcomm_ops); 1132 tty_set_operations(rfcomm_tty_driver, &rfcomm_ops);
1152 1133
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index e7bd4eea575c..96bd388d93a4 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -176,8 +176,13 @@ static int sco_connect(struct sock *sk)
176 else 176 else
177 type = SCO_LINK; 177 type = SCO_LINK;
178 178
179 hcon = hci_connect(hdev, type, dst, BDADDR_BREDR, BT_SECURITY_LOW, 179 if (sco_pi(sk)->setting == BT_VOICE_TRANSPARENT &&
180 HCI_AT_NO_BONDING); 180 (!lmp_transp_capable(hdev) || !lmp_esco_capable(hdev))) {
181 err = -EOPNOTSUPP;
182 goto done;
183 }
184
185 hcon = hci_connect_sco(hdev, type, dst, sco_pi(sk)->setting);
181 if (IS_ERR(hcon)) { 186 if (IS_ERR(hcon)) {
182 err = PTR_ERR(hcon); 187 err = PTR_ERR(hcon);
183 goto done; 188 goto done;
@@ -417,6 +422,8 @@ static struct sock *sco_sock_alloc(struct net *net, struct socket *sock, int pro
417 sk->sk_protocol = proto; 422 sk->sk_protocol = proto;
418 sk->sk_state = BT_OPEN; 423 sk->sk_state = BT_OPEN;
419 424
425 sco_pi(sk)->setting = BT_VOICE_CVSD_16BIT;
426
420 setup_timer(&sk->sk_timer, sco_sock_timeout, (unsigned long)sk); 427 setup_timer(&sk->sk_timer, sco_sock_timeout, (unsigned long)sk);
421 428
422 bt_sock_link(&sco_sk_list, sk); 429 bt_sock_link(&sco_sk_list, sk);
@@ -652,7 +659,7 @@ static int sco_sock_sendmsg(struct kiocb *iocb, struct socket *sock,
652 return err; 659 return err;
653} 660}
654 661
655static void sco_conn_defer_accept(struct hci_conn *conn, int mask) 662static void sco_conn_defer_accept(struct hci_conn *conn, u16 setting)
656{ 663{
657 struct hci_dev *hdev = conn->hdev; 664 struct hci_dev *hdev = conn->hdev;
658 665
@@ -664,11 +671,7 @@ static void sco_conn_defer_accept(struct hci_conn *conn, int mask)
664 struct hci_cp_accept_conn_req cp; 671 struct hci_cp_accept_conn_req cp;
665 672
666 bacpy(&cp.bdaddr, &conn->dst); 673 bacpy(&cp.bdaddr, &conn->dst);
667 674 cp.role = 0x00; /* Ignored */
668 if (lmp_rswitch_capable(hdev) && (mask & HCI_LM_MASTER))
669 cp.role = 0x00; /* Become master */
670 else
671 cp.role = 0x01; /* Remain slave */
672 675
673 hci_send_cmd(hdev, HCI_OP_ACCEPT_CONN_REQ, sizeof(cp), &cp); 676 hci_send_cmd(hdev, HCI_OP_ACCEPT_CONN_REQ, sizeof(cp), &cp);
674 } else { 677 } else {
@@ -679,9 +682,21 @@ static void sco_conn_defer_accept(struct hci_conn *conn, int mask)
679 682
680 cp.tx_bandwidth = __constant_cpu_to_le32(0x00001f40); 683 cp.tx_bandwidth = __constant_cpu_to_le32(0x00001f40);
681 cp.rx_bandwidth = __constant_cpu_to_le32(0x00001f40); 684 cp.rx_bandwidth = __constant_cpu_to_le32(0x00001f40);
682 cp.max_latency = __constant_cpu_to_le16(0xffff); 685 cp.content_format = cpu_to_le16(setting);
683 cp.content_format = cpu_to_le16(hdev->voice_setting); 686
684 cp.retrans_effort = 0xff; 687 switch (setting & SCO_AIRMODE_MASK) {
688 case SCO_AIRMODE_TRANSP:
689 if (conn->pkt_type & ESCO_2EV3)
690 cp.max_latency = __constant_cpu_to_le16(0x0008);
691 else
692 cp.max_latency = __constant_cpu_to_le16(0x000D);
693 cp.retrans_effort = 0x02;
694 break;
695 case SCO_AIRMODE_CVSD:
696 cp.max_latency = __constant_cpu_to_le16(0xffff);
697 cp.retrans_effort = 0xff;
698 break;
699 }
685 700
686 hci_send_cmd(hdev, HCI_OP_ACCEPT_SYNC_CONN_REQ, 701 hci_send_cmd(hdev, HCI_OP_ACCEPT_SYNC_CONN_REQ,
687 sizeof(cp), &cp); 702 sizeof(cp), &cp);
@@ -698,7 +713,7 @@ static int sco_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
698 713
699 if (sk->sk_state == BT_CONNECT2 && 714 if (sk->sk_state == BT_CONNECT2 &&
700 test_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags)) { 715 test_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags)) {
701 sco_conn_defer_accept(pi->conn->hcon, 0); 716 sco_conn_defer_accept(pi->conn->hcon, pi->setting);
702 sk->sk_state = BT_CONFIG; 717 sk->sk_state = BT_CONFIG;
703 msg->msg_namelen = 0; 718 msg->msg_namelen = 0;
704 719
@@ -714,7 +729,8 @@ static int sco_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
714static int sco_sock_setsockopt(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen) 729static int sco_sock_setsockopt(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen)
715{ 730{
716 struct sock *sk = sock->sk; 731 struct sock *sk = sock->sk;
717 int err = 0; 732 int len, err = 0;
733 struct bt_voice voice;
718 u32 opt; 734 u32 opt;
719 735
720 BT_DBG("sk %p", sk); 736 BT_DBG("sk %p", sk);
@@ -740,6 +756,31 @@ static int sco_sock_setsockopt(struct socket *sock, int level, int optname, char
740 clear_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags); 756 clear_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags);
741 break; 757 break;
742 758
759 case BT_VOICE:
760 if (sk->sk_state != BT_OPEN && sk->sk_state != BT_BOUND &&
761 sk->sk_state != BT_CONNECT2) {
762 err = -EINVAL;
763 break;
764 }
765
766 voice.setting = sco_pi(sk)->setting;
767
768 len = min_t(unsigned int, sizeof(voice), optlen);
769 if (copy_from_user((char *) &voice, optval, len)) {
770 err = -EFAULT;
771 break;
772 }
773
774 /* Explicitly check for these values */
775 if (voice.setting != BT_VOICE_TRANSPARENT &&
776 voice.setting != BT_VOICE_CVSD_16BIT) {
777 err = -EINVAL;
778 break;
779 }
780
781 sco_pi(sk)->setting = voice.setting;
782 break;
783
743 default: 784 default:
744 err = -ENOPROTOOPT; 785 err = -ENOPROTOOPT;
745 break; 786 break;
@@ -765,7 +806,9 @@ static int sco_sock_getsockopt_old(struct socket *sock, int optname, char __user
765 806
766 switch (optname) { 807 switch (optname) {
767 case SCO_OPTIONS: 808 case SCO_OPTIONS:
768 if (sk->sk_state != BT_CONNECTED) { 809 if (sk->sk_state != BT_CONNECTED &&
810 !(sk->sk_state == BT_CONNECT2 &&
811 test_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags))) {
769 err = -ENOTCONN; 812 err = -ENOTCONN;
770 break; 813 break;
771 } 814 }
@@ -781,7 +824,9 @@ static int sco_sock_getsockopt_old(struct socket *sock, int optname, char __user
781 break; 824 break;
782 825
783 case SCO_CONNINFO: 826 case SCO_CONNINFO:
784 if (sk->sk_state != BT_CONNECTED) { 827 if (sk->sk_state != BT_CONNECTED &&
828 !(sk->sk_state == BT_CONNECT2 &&
829 test_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags))) {
785 err = -ENOTCONN; 830 err = -ENOTCONN;
786 break; 831 break;
787 } 832 }
@@ -809,6 +854,7 @@ static int sco_sock_getsockopt(struct socket *sock, int level, int optname, char
809{ 854{
810 struct sock *sk = sock->sk; 855 struct sock *sk = sock->sk;
811 int len, err = 0; 856 int len, err = 0;
857 struct bt_voice voice;
812 858
813 BT_DBG("sk %p", sk); 859 BT_DBG("sk %p", sk);
814 860
@@ -834,6 +880,15 @@ static int sco_sock_getsockopt(struct socket *sock, int level, int optname, char
834 880
835 break; 881 break;
836 882
883 case BT_VOICE:
884 voice.setting = sco_pi(sk)->setting;
885
886 len = min_t(unsigned int, len, sizeof(voice));
887 if (copy_to_user(optval, (char *)&voice, len))
888 err = -EFAULT;
889
890 break;
891
837 default: 892 default:
838 err = -ENOPROTOOPT; 893 err = -ENOPROTOOPT;
839 break; 894 break;
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 89659d4ed1f9..ca04163635da 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -245,22 +245,22 @@ fail:
245int br_netpoll_enable(struct net_bridge_port *p, gfp_t gfp) 245int br_netpoll_enable(struct net_bridge_port *p, gfp_t gfp)
246{ 246{
247 struct netpoll *np; 247 struct netpoll *np;
248 int err = 0; 248 int err;
249
250 if (!p->br->dev->npinfo)
251 return 0;
249 252
250 np = kzalloc(sizeof(*p->np), gfp); 253 np = kzalloc(sizeof(*p->np), gfp);
251 err = -ENOMEM;
252 if (!np) 254 if (!np)
253 goto out; 255 return -ENOMEM;
254 256
255 err = __netpoll_setup(np, p->dev, gfp); 257 err = __netpoll_setup(np, p->dev, gfp);
256 if (err) { 258 if (err) {
257 kfree(np); 259 kfree(np);
258 goto out; 260 return err;
259 } 261 }
260 262
261 p->np = np; 263 p->np = np;
262
263out:
264 return err; 264 return err;
265} 265}
266 266
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index 5623be6b9ecd..c41d5fbb91d0 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -363,7 +363,8 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
363 if (err) 363 if (err)
364 goto err2; 364 goto err2;
365 365
366 if (br_netpoll_info(br) && ((err = br_netpoll_enable(p, GFP_KERNEL)))) 366 err = br_netpoll_enable(p, GFP_KERNEL);
367 if (err)
367 goto err3; 368 goto err3;
368 369
369 err = netdev_master_upper_dev_link(dev, br->dev); 370 err = netdev_master_upper_dev_link(dev, br->dev);
@@ -382,6 +383,9 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
382 383
383 netdev_update_features(br->dev); 384 netdev_update_features(br->dev);
384 385
386 if (br->dev->needed_headroom < dev->needed_headroom)
387 br->dev->needed_headroom = dev->needed_headroom;
388
385 spin_lock_bh(&br->lock); 389 spin_lock_bh(&br->lock);
386 changed_addr = br_stp_recalculate_bridge_id(br); 390 changed_addr = br_stp_recalculate_bridge_id(br);
387 391
diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c
index 6319c4333c39..85a09bb5ca51 100644
--- a/net/bridge/br_mdb.c
+++ b/net/bridge/br_mdb.c
@@ -9,6 +9,7 @@
9#include <net/netlink.h> 9#include <net/netlink.h>
10#if IS_ENABLED(CONFIG_IPV6) 10#if IS_ENABLED(CONFIG_IPV6)
11#include <net/ipv6.h> 11#include <net/ipv6.h>
12#include <net/addrconf.h>
12#endif 13#endif
13 14
14#include "br_private.h" 15#include "br_private.h"
@@ -61,7 +62,8 @@ static int br_mdb_fill_info(struct sk_buff *skb, struct netlink_callback *cb,
61 62
62 for (i = 0; i < mdb->max; i++) { 63 for (i = 0; i < mdb->max; i++) {
63 struct net_bridge_mdb_entry *mp; 64 struct net_bridge_mdb_entry *mp;
64 struct net_bridge_port_group *p, **pp; 65 struct net_bridge_port_group *p;
66 struct net_bridge_port_group __rcu **pp;
65 struct net_bridge_port *port; 67 struct net_bridge_port *port;
66 68
67 hlist_for_each_entry_rcu(mp, &mdb->mhash[i], hlist[mdb->ver]) { 69 hlist_for_each_entry_rcu(mp, &mdb->mhash[i], hlist[mdb->ver]) {
@@ -253,7 +255,7 @@ static bool is_valid_mdb_entry(struct br_mdb_entry *entry)
253 return false; 255 return false;
254#if IS_ENABLED(CONFIG_IPV6) 256#if IS_ENABLED(CONFIG_IPV6)
255 } else if (entry->addr.proto == htons(ETH_P_IPV6)) { 257 } else if (entry->addr.proto == htons(ETH_P_IPV6)) {
256 if (!ipv6_is_transient_multicast(&entry->addr.u.ip6)) 258 if (ipv6_addr_is_ll_all_nodes(&entry->addr.u.ip6))
257 return false; 259 return false;
258#endif 260#endif
259 } else 261 } else
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index bbcb43582496..d1c578630678 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -29,6 +29,7 @@
29#include <net/ipv6.h> 29#include <net/ipv6.h>
30#include <net/mld.h> 30#include <net/mld.h>
31#include <net/ip6_checksum.h> 31#include <net/ip6_checksum.h>
32#include <net/addrconf.h>
32#endif 33#endif
33 34
34#include "br_private.h" 35#include "br_private.h"
@@ -724,7 +725,7 @@ static int br_ip6_multicast_add_group(struct net_bridge *br,
724{ 725{
725 struct br_ip br_group; 726 struct br_ip br_group;
726 727
727 if (!ipv6_is_transient_multicast(group)) 728 if (ipv6_addr_is_ll_all_nodes(group))
728 return 0; 729 return 0;
729 730
730 br_group.u.ip6 = *group; 731 br_group.u.ip6 = *group;
@@ -1255,7 +1256,7 @@ static int br_ip6_multicast_query(struct net_bridge *br,
1255 if (!mld2q->mld2q_nsrcs) 1256 if (!mld2q->mld2q_nsrcs)
1256 group = &mld2q->mld2q_mca; 1257 group = &mld2q->mld2q_mca;
1257 1258
1258 max_delay = max(msecs_to_jiffies(MLDV2_MRC(ntohs(mld2q->mld2q_mrc))), 1UL); 1259 max_delay = max(msecs_to_jiffies(mldv2_mrc(mld2q)), 1UL);
1259 } 1260 }
1260 1261
1261 br_multicast_query_received(br, port, &br->ip6_querier, 1262 br_multicast_query_received(br, port, &br->ip6_querier,
@@ -1410,7 +1411,7 @@ static void br_ip6_multicast_leave_group(struct net_bridge *br,
1410 &br->ip6_query; 1411 &br->ip6_query;
1411 1412
1412 1413
1413 if (!ipv6_is_transient_multicast(group)) 1414 if (ipv6_addr_is_ll_all_nodes(group))
1414 return; 1415 return;
1415 1416
1416 br_group.u.ip6 = *group; 1417 br_group.u.ip6 = *group;
@@ -1547,8 +1548,14 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br,
1547 * - MLD has always Router Alert hop-by-hop option 1548 * - MLD has always Router Alert hop-by-hop option
1548 * - But we do not support jumbrograms. 1549 * - But we do not support jumbrograms.
1549 */ 1550 */
1550 if (ip6h->version != 6 || 1551 if (ip6h->version != 6)
1551 ip6h->nexthdr != IPPROTO_HOPOPTS || 1552 return 0;
1553
1554 /* Prevent flooding this packet if there is no listener present */
1555 if (!ipv6_addr_is_ll_all_nodes(&ip6h->daddr))
1556 BR_INPUT_SKB_CB(skb)->mrouters_only = 1;
1557
1558 if (ip6h->nexthdr != IPPROTO_HOPOPTS ||
1552 ip6h->payload_len == 0) 1559 ip6h->payload_len == 0)
1553 return 0; 1560 return 0;
1554 1561
diff --git a/net/bridge/br_notify.c b/net/bridge/br_notify.c
index 3a3f371b2841..2998dd1769a0 100644
--- a/net/bridge/br_notify.c
+++ b/net/bridge/br_notify.c
@@ -102,6 +102,11 @@ static int br_device_event(struct notifier_block *unused, unsigned long event, v
102 case NETDEV_PRE_TYPE_CHANGE: 102 case NETDEV_PRE_TYPE_CHANGE:
103 /* Forbid underlaying device to change its type. */ 103 /* Forbid underlaying device to change its type. */
104 return NOTIFY_BAD; 104 return NOTIFY_BAD;
105
106 case NETDEV_RESEND_IGMP:
107 /* Propagate to master device */
108 call_netdevice_notifiers(event, br->dev);
109 break;
105 } 110 }
106 111
107 /* Events that may cause spanning tree to refresh */ 112 /* Events that may cause spanning tree to refresh */
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 263ba9034468..598cb0b333c6 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -352,11 +352,6 @@ extern void br_dev_delete(struct net_device *dev, struct list_head *list);
352extern netdev_tx_t br_dev_xmit(struct sk_buff *skb, 352extern netdev_tx_t br_dev_xmit(struct sk_buff *skb,
353 struct net_device *dev); 353 struct net_device *dev);
354#ifdef CONFIG_NET_POLL_CONTROLLER 354#ifdef CONFIG_NET_POLL_CONTROLLER
355static inline struct netpoll_info *br_netpoll_info(struct net_bridge *br)
356{
357 return br->dev->npinfo;
358}
359
360static inline void br_netpoll_send_skb(const struct net_bridge_port *p, 355static inline void br_netpoll_send_skb(const struct net_bridge_port *p,
361 struct sk_buff *skb) 356 struct sk_buff *skb)
362{ 357{
@@ -369,11 +364,6 @@ static inline void br_netpoll_send_skb(const struct net_bridge_port *p,
369extern int br_netpoll_enable(struct net_bridge_port *p, gfp_t gfp); 364extern int br_netpoll_enable(struct net_bridge_port *p, gfp_t gfp);
370extern void br_netpoll_disable(struct net_bridge_port *p); 365extern void br_netpoll_disable(struct net_bridge_port *p);
371#else 366#else
372static inline struct netpoll_info *br_netpoll_info(struct net_bridge *br)
373{
374 return NULL;
375}
376
377static inline void br_netpoll_send_skb(const struct net_bridge_port *p, 367static inline void br_netpoll_send_skb(const struct net_bridge_port *p,
378 struct sk_buff *skb) 368 struct sk_buff *skb)
379{ 369{
@@ -494,7 +484,7 @@ extern void br_multicast_free_pg(struct rcu_head *head);
494extern struct net_bridge_port_group *br_multicast_new_port_group( 484extern struct net_bridge_port_group *br_multicast_new_port_group(
495 struct net_bridge_port *port, 485 struct net_bridge_port *port,
496 struct br_ip *group, 486 struct br_ip *group,
497 struct net_bridge_port_group *next, 487 struct net_bridge_port_group __rcu *next,
498 unsigned char state); 488 unsigned char state);
499extern void br_mdb_init(void); 489extern void br_mdb_init(void);
500extern void br_mdb_uninit(void); 490extern void br_mdb_uninit(void);
@@ -504,16 +494,6 @@ extern void br_mdb_notify(struct net_device *dev, struct net_bridge_port *port,
504#define mlock_dereference(X, br) \ 494#define mlock_dereference(X, br) \
505 rcu_dereference_protected(X, lockdep_is_held(&br->multicast_lock)) 495 rcu_dereference_protected(X, lockdep_is_held(&br->multicast_lock))
506 496
507#if IS_ENABLED(CONFIG_IPV6)
508#include <net/addrconf.h>
509static inline int ipv6_is_transient_multicast(const struct in6_addr *addr)
510{
511 if (ipv6_addr_is_multicast(addr) && IPV6_ADDR_MC_FLAG_TRANSIENT(addr))
512 return 1;
513 return 0;
514}
515#endif
516
517static inline bool br_multicast_is_router(struct net_bridge *br) 497static inline bool br_multicast_is_router(struct net_bridge *br)
518{ 498{
519 return br->multicast_router == 2 || 499 return br->multicast_router == 2 ||
diff --git a/net/bridge/netfilter/ebtable_broute.c b/net/bridge/netfilter/ebtable_broute.c
index 70f656ce0f4a..dbd1c783431b 100644
--- a/net/bridge/netfilter/ebtable_broute.c
+++ b/net/bridge/netfilter/ebtable_broute.c
@@ -64,7 +64,7 @@ static int ebt_broute(struct sk_buff *skb)
64static int __net_init broute_net_init(struct net *net) 64static int __net_init broute_net_init(struct net *net)
65{ 65{
66 net->xt.broute_table = ebt_register_table(net, &broute_table); 66 net->xt.broute_table = ebt_register_table(net, &broute_table);
67 return PTR_RET(net->xt.broute_table); 67 return PTR_ERR_OR_ZERO(net->xt.broute_table);
68} 68}
69 69
70static void __net_exit broute_net_exit(struct net *net) 70static void __net_exit broute_net_exit(struct net *net)
diff --git a/net/bridge/netfilter/ebtable_filter.c b/net/bridge/netfilter/ebtable_filter.c
index 3c2e9dced9e0..94b2b700cff8 100644
--- a/net/bridge/netfilter/ebtable_filter.c
+++ b/net/bridge/netfilter/ebtable_filter.c
@@ -100,7 +100,7 @@ static struct nf_hook_ops ebt_ops_filter[] __read_mostly = {
100static int __net_init frame_filter_net_init(struct net *net) 100static int __net_init frame_filter_net_init(struct net *net)
101{ 101{
102 net->xt.frame_filter = ebt_register_table(net, &frame_filter); 102 net->xt.frame_filter = ebt_register_table(net, &frame_filter);
103 return PTR_RET(net->xt.frame_filter); 103 return PTR_ERR_OR_ZERO(net->xt.frame_filter);
104} 104}
105 105
106static void __net_exit frame_filter_net_exit(struct net *net) 106static void __net_exit frame_filter_net_exit(struct net *net)
diff --git a/net/bridge/netfilter/ebtable_nat.c b/net/bridge/netfilter/ebtable_nat.c
index 10871bc77908..322555acdd40 100644
--- a/net/bridge/netfilter/ebtable_nat.c
+++ b/net/bridge/netfilter/ebtable_nat.c
@@ -100,7 +100,7 @@ static struct nf_hook_ops ebt_ops_nat[] __read_mostly = {
100static int __net_init frame_nat_net_init(struct net *net) 100static int __net_init frame_nat_net_init(struct net *net)
101{ 101{
102 net->xt.frame_nat = ebt_register_table(net, &frame_nat); 102 net->xt.frame_nat = ebt_register_table(net, &frame_nat);
103 return PTR_RET(net->xt.frame_nat); 103 return PTR_ERR_OR_ZERO(net->xt.frame_nat);
104} 104}
105 105
106static void __net_exit frame_nat_net_exit(struct net *net) 106static void __net_exit frame_nat_net_exit(struct net *net)
diff --git a/net/caif/cfctrl.c b/net/caif/cfctrl.c
index 2bd4b58f4372..0f455227da83 100644
--- a/net/caif/cfctrl.c
+++ b/net/caif/cfctrl.c
@@ -293,9 +293,10 @@ int cfctrl_linkup_request(struct cflayer *layer,
293 293
294 count = cfctrl_cancel_req(&cfctrl->serv.layer, 294 count = cfctrl_cancel_req(&cfctrl->serv.layer,
295 user_layer); 295 user_layer);
296 if (count != 1) 296 if (count != 1) {
297 pr_err("Could not remove request (%d)", count); 297 pr_err("Could not remove request (%d)", count);
298 return -ENODEV; 298 return -ENODEV;
299 }
299 } 300 }
300 return 0; 301 return 0;
301} 302}
diff --git a/net/can/gw.c b/net/can/gw.c
index 2f291f961a17..3f9b0f3a2818 100644
--- a/net/can/gw.c
+++ b/net/can/gw.c
@@ -146,6 +146,7 @@ struct cgw_job {
146 /* tbc */ 146 /* tbc */
147 }; 147 };
148 u8 gwtype; 148 u8 gwtype;
149 u8 limit_hops;
149 u16 flags; 150 u16 flags;
150}; 151};
151 152
@@ -402,6 +403,11 @@ static void can_can_gw_rcv(struct sk_buff *skb, void *data)
402 403
403 /* put the incremented hop counter in the cloned skb */ 404 /* put the incremented hop counter in the cloned skb */
404 cgw_hops(nskb) = cgw_hops(skb) + 1; 405 cgw_hops(nskb) = cgw_hops(skb) + 1;
406
407 /* first processing of this CAN frame -> adjust to private hop limit */
408 if (gwj->limit_hops && cgw_hops(nskb) == 1)
409 cgw_hops(nskb) = max_hops - gwj->limit_hops + 1;
410
405 nskb->dev = gwj->dst.dev; 411 nskb->dev = gwj->dst.dev;
406 412
407 /* pointer to modifiable CAN frame */ 413 /* pointer to modifiable CAN frame */
@@ -509,6 +515,11 @@ static int cgw_put_job(struct sk_buff *skb, struct cgw_job *gwj, int type,
509 515
510 /* check non default settings of attributes */ 516 /* check non default settings of attributes */
511 517
518 if (gwj->limit_hops) {
519 if (nla_put_u8(skb, CGW_LIM_HOPS, gwj->limit_hops) < 0)
520 goto cancel;
521 }
522
512 if (gwj->mod.modtype.and) { 523 if (gwj->mod.modtype.and) {
513 memcpy(&mb.cf, &gwj->mod.modframe.and, sizeof(mb.cf)); 524 memcpy(&mb.cf, &gwj->mod.modframe.and, sizeof(mb.cf));
514 mb.modtype = gwj->mod.modtype.and; 525 mb.modtype = gwj->mod.modtype.and;
@@ -606,11 +617,12 @@ static const struct nla_policy cgw_policy[CGW_MAX+1] = {
606 [CGW_SRC_IF] = { .type = NLA_U32 }, 617 [CGW_SRC_IF] = { .type = NLA_U32 },
607 [CGW_DST_IF] = { .type = NLA_U32 }, 618 [CGW_DST_IF] = { .type = NLA_U32 },
608 [CGW_FILTER] = { .len = sizeof(struct can_filter) }, 619 [CGW_FILTER] = { .len = sizeof(struct can_filter) },
620 [CGW_LIM_HOPS] = { .type = NLA_U8 },
609}; 621};
610 622
611/* check for common and gwtype specific attributes */ 623/* check for common and gwtype specific attributes */
612static int cgw_parse_attr(struct nlmsghdr *nlh, struct cf_mod *mod, 624static int cgw_parse_attr(struct nlmsghdr *nlh, struct cf_mod *mod,
613 u8 gwtype, void *gwtypeattr) 625 u8 gwtype, void *gwtypeattr, u8 *limhops)
614{ 626{
615 struct nlattr *tb[CGW_MAX+1]; 627 struct nlattr *tb[CGW_MAX+1];
616 struct cgw_frame_mod mb; 628 struct cgw_frame_mod mb;
@@ -625,6 +637,13 @@ static int cgw_parse_attr(struct nlmsghdr *nlh, struct cf_mod *mod,
625 if (err < 0) 637 if (err < 0)
626 return err; 638 return err;
627 639
640 if (tb[CGW_LIM_HOPS]) {
641 *limhops = nla_get_u8(tb[CGW_LIM_HOPS]);
642
643 if (*limhops < 1 || *limhops > max_hops)
644 return -EINVAL;
645 }
646
628 /* check for AND/OR/XOR/SET modifications */ 647 /* check for AND/OR/XOR/SET modifications */
629 648
630 if (tb[CGW_MOD_AND]) { 649 if (tb[CGW_MOD_AND]) {
@@ -782,6 +801,7 @@ static int cgw_create_job(struct sk_buff *skb, struct nlmsghdr *nlh)
782{ 801{
783 struct rtcanmsg *r; 802 struct rtcanmsg *r;
784 struct cgw_job *gwj; 803 struct cgw_job *gwj;
804 u8 limhops = 0;
785 int err = 0; 805 int err = 0;
786 806
787 if (!capable(CAP_NET_ADMIN)) 807 if (!capable(CAP_NET_ADMIN))
@@ -808,7 +828,8 @@ static int cgw_create_job(struct sk_buff *skb, struct nlmsghdr *nlh)
808 gwj->flags = r->flags; 828 gwj->flags = r->flags;
809 gwj->gwtype = r->gwtype; 829 gwj->gwtype = r->gwtype;
810 830
811 err = cgw_parse_attr(nlh, &gwj->mod, CGW_TYPE_CAN_CAN, &gwj->ccgw); 831 err = cgw_parse_attr(nlh, &gwj->mod, CGW_TYPE_CAN_CAN, &gwj->ccgw,
832 &limhops);
812 if (err < 0) 833 if (err < 0)
813 goto out; 834 goto out;
814 835
@@ -836,6 +857,8 @@ static int cgw_create_job(struct sk_buff *skb, struct nlmsghdr *nlh)
836 if (gwj->dst.dev->type != ARPHRD_CAN || gwj->dst.dev->header_ops) 857 if (gwj->dst.dev->type != ARPHRD_CAN || gwj->dst.dev->header_ops)
837 goto put_src_dst_out; 858 goto put_src_dst_out;
838 859
860 gwj->limit_hops = limhops;
861
839 ASSERT_RTNL(); 862 ASSERT_RTNL();
840 863
841 err = cgw_register_filter(gwj); 864 err = cgw_register_filter(gwj);
@@ -867,13 +890,14 @@ static void cgw_remove_all_jobs(void)
867 } 890 }
868} 891}
869 892
870static int cgw_remove_job(struct sk_buff *skb, struct nlmsghdr *nlh) 893static int cgw_remove_job(struct sk_buff *skb, struct nlmsghdr *nlh)
871{ 894{
872 struct cgw_job *gwj = NULL; 895 struct cgw_job *gwj = NULL;
873 struct hlist_node *nx; 896 struct hlist_node *nx;
874 struct rtcanmsg *r; 897 struct rtcanmsg *r;
875 struct cf_mod mod; 898 struct cf_mod mod;
876 struct can_can_gw ccgw; 899 struct can_can_gw ccgw;
900 u8 limhops = 0;
877 int err = 0; 901 int err = 0;
878 902
879 if (!capable(CAP_NET_ADMIN)) 903 if (!capable(CAP_NET_ADMIN))
@@ -890,7 +914,7 @@ static int cgw_remove_job(struct sk_buff *skb, struct nlmsghdr *nlh)
890 if (r->gwtype != CGW_TYPE_CAN_CAN) 914 if (r->gwtype != CGW_TYPE_CAN_CAN)
891 return -EINVAL; 915 return -EINVAL;
892 916
893 err = cgw_parse_attr(nlh, &mod, CGW_TYPE_CAN_CAN, &ccgw); 917 err = cgw_parse_attr(nlh, &mod, CGW_TYPE_CAN_CAN, &ccgw, &limhops);
894 if (err < 0) 918 if (err < 0)
895 return err; 919 return err;
896 920
@@ -910,6 +934,9 @@ static int cgw_remove_job(struct sk_buff *skb, struct nlmsghdr *nlh)
910 if (gwj->flags != r->flags) 934 if (gwj->flags != r->flags)
911 continue; 935 continue;
912 936
937 if (gwj->limit_hops != limhops)
938 continue;
939
913 if (memcmp(&gwj->mod, &mod, sizeof(mod))) 940 if (memcmp(&gwj->mod, &mod, sizeof(mod)))
914 continue; 941 continue;
915 942
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index dd9b5857ef5c..4a5df7b1cc9f 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -409,7 +409,7 @@ static void ceph_sock_write_space(struct sock *sk)
409 * and net/core/stream.c:sk_stream_write_space(). 409 * and net/core/stream.c:sk_stream_write_space().
410 */ 410 */
411 if (con_flag_test(con, CON_FLAG_WRITE_PENDING)) { 411 if (con_flag_test(con, CON_FLAG_WRITE_PENDING)) {
412 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) { 412 if (sk_stream_is_writeable(sk)) {
413 dout("%s %p queueing write work\n", __func__, con); 413 dout("%s %p queueing write work\n", __func__, con);
414 clear_bit(SOCK_NOSPACE, &sk->sk_socket->flags); 414 clear_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
415 queue_con(con); 415 queue_con(con);
diff --git a/net/core/datagram.c b/net/core/datagram.c
index 8ab48cd89559..af814e764206 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -48,6 +48,7 @@
48#include <linux/highmem.h> 48#include <linux/highmem.h>
49#include <linux/spinlock.h> 49#include <linux/spinlock.h>
50#include <linux/slab.h> 50#include <linux/slab.h>
51#include <linux/pagemap.h>
51 52
52#include <net/protocol.h> 53#include <net/protocol.h>
53#include <linux/skbuff.h> 54#include <linux/skbuff.h>
@@ -573,6 +574,77 @@ fault:
573} 574}
574EXPORT_SYMBOL(skb_copy_datagram_from_iovec); 575EXPORT_SYMBOL(skb_copy_datagram_from_iovec);
575 576
577/**
578 * zerocopy_sg_from_iovec - Build a zerocopy datagram from an iovec
579 * @skb: buffer to copy
580 * @from: io vector to copy to
581 * @offset: offset in the io vector to start copying from
582 * @count: amount of vectors to copy to buffer from
583 *
584 * The function will first copy up to headlen, and then pin the userspace
585 * pages and build frags through them.
586 *
587 * Returns 0, -EFAULT or -EMSGSIZE.
588 * Note: the iovec is not modified during the copy
589 */
590int zerocopy_sg_from_iovec(struct sk_buff *skb, const struct iovec *from,
591 int offset, size_t count)
592{
593 int len = iov_length(from, count) - offset;
594 int copy = min_t(int, skb_headlen(skb), len);
595 int size;
596 int i = 0;
597
598 /* copy up to skb headlen */
599 if (skb_copy_datagram_from_iovec(skb, 0, from, offset, copy))
600 return -EFAULT;
601
602 if (len == copy)
603 return 0;
604
605 offset += copy;
606 while (count--) {
607 struct page *page[MAX_SKB_FRAGS];
608 int num_pages;
609 unsigned long base;
610 unsigned long truesize;
611
612 /* Skip over from offset and copied */
613 if (offset >= from->iov_len) {
614 offset -= from->iov_len;
615 ++from;
616 continue;
617 }
618 len = from->iov_len - offset;
619 base = (unsigned long)from->iov_base + offset;
620 size = ((base & ~PAGE_MASK) + len + ~PAGE_MASK) >> PAGE_SHIFT;
621 if (i + size > MAX_SKB_FRAGS)
622 return -EMSGSIZE;
623 num_pages = get_user_pages_fast(base, size, 0, &page[i]);
624 if (num_pages != size) {
625 release_pages(&page[i], num_pages, 0);
626 return -EFAULT;
627 }
628 truesize = size * PAGE_SIZE;
629 skb->data_len += len;
630 skb->len += len;
631 skb->truesize += truesize;
632 atomic_add(truesize, &skb->sk->sk_wmem_alloc);
633 while (len) {
634 int off = base & ~PAGE_MASK;
635 int size = min_t(int, len, PAGE_SIZE - off);
636 skb_fill_page_desc(skb, i, page[i], off, size);
637 base += size;
638 len -= size;
639 i++;
640 }
641 offset = 0;
642 ++from;
643 }
644 return 0;
645}
646EXPORT_SYMBOL(zerocopy_sg_from_iovec);
647
576static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset, 648static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset,
577 u8 __user *to, int len, 649 u8 __user *to, int len,
578 __wsum *csump) 650 __wsum *csump)
diff --git a/net/core/dev.c b/net/core/dev.c
index 26755dd40daa..5c713f2239cc 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -174,7 +174,7 @@ static DEFINE_SPINLOCK(napi_hash_lock);
174static unsigned int napi_gen_id; 174static unsigned int napi_gen_id;
175static DEFINE_HASHTABLE(napi_hash, 8); 175static DEFINE_HASHTABLE(napi_hash, 8);
176 176
177seqcount_t devnet_rename_seq; 177static seqcount_t devnet_rename_seq;
178 178
179static inline void dev_base_seq_inc(struct net *net) 179static inline void dev_base_seq_inc(struct net *net)
180{ 180{
@@ -1691,13 +1691,13 @@ int dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
1691 kfree_skb(skb); 1691 kfree_skb(skb);
1692 return NET_RX_DROP; 1692 return NET_RX_DROP;
1693 } 1693 }
1694 skb_scrub_packet(skb);
1695 skb->protocol = eth_type_trans(skb, dev); 1694 skb->protocol = eth_type_trans(skb, dev);
1696 1695
1697 /* eth_type_trans() can set pkt_type. 1696 /* eth_type_trans() can set pkt_type.
1698 * clear pkt_type _after_ calling eth_type_trans() 1697 * call skb_scrub_packet() after it to clear pkt_type _after_ calling
1698 * eth_type_trans().
1699 */ 1699 */
1700 skb->pkt_type = PACKET_HOST; 1700 skb_scrub_packet(skb, true);
1701 1701
1702 return netif_rx(skb); 1702 return netif_rx(skb);
1703} 1703}
@@ -4367,57 +4367,48 @@ softnet_break:
4367 goto out; 4367 goto out;
4368} 4368}
4369 4369
4370struct netdev_upper { 4370struct netdev_adjacent {
4371 struct net_device *dev; 4371 struct net_device *dev;
4372
4373 /* upper master flag, there can only be one master device per list */
4372 bool master; 4374 bool master;
4375
4376 /* indicates that this dev is our first-level lower/upper device */
4377 bool neighbour;
4378
4379 /* counter for the number of times this device was added to us */
4380 u16 ref_nr;
4381
4373 struct list_head list; 4382 struct list_head list;
4374 struct rcu_head rcu; 4383 struct rcu_head rcu;
4375 struct list_head search_list;
4376}; 4384};
4377 4385
4378static void __append_search_uppers(struct list_head *search_list, 4386static struct netdev_adjacent *__netdev_find_adj(struct net_device *dev,
4379 struct net_device *dev) 4387 struct net_device *adj_dev,
4388 bool upper)
4380{ 4389{
4381 struct netdev_upper *upper; 4390 struct netdev_adjacent *adj;
4391 struct list_head *dev_list;
4382 4392
4383 list_for_each_entry(upper, &dev->upper_dev_list, list) { 4393 dev_list = upper ? &dev->upper_dev_list : &dev->lower_dev_list;
4384 /* check if this upper is not already in search list */ 4394
4385 if (list_empty(&upper->search_list)) 4395 list_for_each_entry(adj, dev_list, list) {
4386 list_add_tail(&upper->search_list, search_list); 4396 if (adj->dev == adj_dev)
4397 return adj;
4387 } 4398 }
4399 return NULL;
4388} 4400}
4389 4401
4390static bool __netdev_search_upper_dev(struct net_device *dev, 4402static inline struct netdev_adjacent *__netdev_find_upper(struct net_device *dev,
4391 struct net_device *upper_dev) 4403 struct net_device *udev)
4392{ 4404{
4393 LIST_HEAD(search_list); 4405 return __netdev_find_adj(dev, udev, true);
4394 struct netdev_upper *upper;
4395 struct netdev_upper *tmp;
4396 bool ret = false;
4397
4398 __append_search_uppers(&search_list, dev);
4399 list_for_each_entry(upper, &search_list, search_list) {
4400 if (upper->dev == upper_dev) {
4401 ret = true;
4402 break;
4403 }
4404 __append_search_uppers(&search_list, upper->dev);
4405 }
4406 list_for_each_entry_safe(upper, tmp, &search_list, search_list)
4407 INIT_LIST_HEAD(&upper->search_list);
4408 return ret;
4409} 4406}
4410 4407
4411static struct netdev_upper *__netdev_find_upper(struct net_device *dev, 4408static inline struct netdev_adjacent *__netdev_find_lower(struct net_device *dev,
4412 struct net_device *upper_dev) 4409 struct net_device *ldev)
4413{ 4410{
4414 struct netdev_upper *upper; 4411 return __netdev_find_adj(dev, ldev, false);
4415
4416 list_for_each_entry(upper, &dev->upper_dev_list, list) {
4417 if (upper->dev == upper_dev)
4418 return upper;
4419 }
4420 return NULL;
4421} 4412}
4422 4413
4423/** 4414/**
@@ -4462,7 +4453,7 @@ EXPORT_SYMBOL(netdev_has_any_upper_dev);
4462 */ 4453 */
4463struct net_device *netdev_master_upper_dev_get(struct net_device *dev) 4454struct net_device *netdev_master_upper_dev_get(struct net_device *dev)
4464{ 4455{
4465 struct netdev_upper *upper; 4456 struct netdev_adjacent *upper;
4466 4457
4467 ASSERT_RTNL(); 4458 ASSERT_RTNL();
4468 4459
@@ -4470,13 +4461,38 @@ struct net_device *netdev_master_upper_dev_get(struct net_device *dev)
4470 return NULL; 4461 return NULL;
4471 4462
4472 upper = list_first_entry(&dev->upper_dev_list, 4463 upper = list_first_entry(&dev->upper_dev_list,
4473 struct netdev_upper, list); 4464 struct netdev_adjacent, list);
4474 if (likely(upper->master)) 4465 if (likely(upper->master))
4475 return upper->dev; 4466 return upper->dev;
4476 return NULL; 4467 return NULL;
4477} 4468}
4478EXPORT_SYMBOL(netdev_master_upper_dev_get); 4469EXPORT_SYMBOL(netdev_master_upper_dev_get);
4479 4470
4471/* netdev_upper_get_next_dev_rcu - Get the next dev from upper list
4472 * @dev: device
4473 * @iter: list_head ** of the current position
4474 *
4475 * Gets the next device from the dev's upper list, starting from iter
4476 * position. The caller must hold RCU read lock.
4477 */
4478struct net_device *netdev_upper_get_next_dev_rcu(struct net_device *dev,
4479 struct list_head **iter)
4480{
4481 struct netdev_adjacent *upper;
4482
4483 WARN_ON_ONCE(!rcu_read_lock_held());
4484
4485 upper = list_entry_rcu((*iter)->next, struct netdev_adjacent, list);
4486
4487 if (&upper->list == &dev->upper_dev_list)
4488 return NULL;
4489
4490 *iter = &upper->list;
4491
4492 return upper->dev;
4493}
4494EXPORT_SYMBOL(netdev_upper_get_next_dev_rcu);
4495
4480/** 4496/**
4481 * netdev_master_upper_dev_get_rcu - Get master upper device 4497 * netdev_master_upper_dev_get_rcu - Get master upper device
4482 * @dev: device 4498 * @dev: device
@@ -4486,20 +4502,158 @@ EXPORT_SYMBOL(netdev_master_upper_dev_get);
4486 */ 4502 */
4487struct net_device *netdev_master_upper_dev_get_rcu(struct net_device *dev) 4503struct net_device *netdev_master_upper_dev_get_rcu(struct net_device *dev)
4488{ 4504{
4489 struct netdev_upper *upper; 4505 struct netdev_adjacent *upper;
4490 4506
4491 upper = list_first_or_null_rcu(&dev->upper_dev_list, 4507 upper = list_first_or_null_rcu(&dev->upper_dev_list,
4492 struct netdev_upper, list); 4508 struct netdev_adjacent, list);
4493 if (upper && likely(upper->master)) 4509 if (upper && likely(upper->master))
4494 return upper->dev; 4510 return upper->dev;
4495 return NULL; 4511 return NULL;
4496} 4512}
4497EXPORT_SYMBOL(netdev_master_upper_dev_get_rcu); 4513EXPORT_SYMBOL(netdev_master_upper_dev_get_rcu);
4498 4514
4515static int __netdev_adjacent_dev_insert(struct net_device *dev,
4516 struct net_device *adj_dev,
4517 bool neighbour, bool master,
4518 bool upper)
4519{
4520 struct netdev_adjacent *adj;
4521
4522 adj = __netdev_find_adj(dev, adj_dev, upper);
4523
4524 if (adj) {
4525 BUG_ON(neighbour);
4526 adj->ref_nr++;
4527 return 0;
4528 }
4529
4530 adj = kmalloc(sizeof(*adj), GFP_KERNEL);
4531 if (!adj)
4532 return -ENOMEM;
4533
4534 adj->dev = adj_dev;
4535 adj->master = master;
4536 adj->neighbour = neighbour;
4537 adj->ref_nr = 1;
4538
4539 dev_hold(adj_dev);
4540 pr_debug("dev_hold for %s, because of %s link added from %s to %s\n",
4541 adj_dev->name, upper ? "upper" : "lower", dev->name,
4542 adj_dev->name);
4543
4544 if (!upper) {
4545 list_add_tail_rcu(&adj->list, &dev->lower_dev_list);
4546 return 0;
4547 }
4548
4549 /* Ensure that master upper link is always the first item in list. */
4550 if (master)
4551 list_add_rcu(&adj->list, &dev->upper_dev_list);
4552 else
4553 list_add_tail_rcu(&adj->list, &dev->upper_dev_list);
4554
4555 return 0;
4556}
4557
4558static inline int __netdev_upper_dev_insert(struct net_device *dev,
4559 struct net_device *udev,
4560 bool master, bool neighbour)
4561{
4562 return __netdev_adjacent_dev_insert(dev, udev, neighbour, master,
4563 true);
4564}
4565
4566static inline int __netdev_lower_dev_insert(struct net_device *dev,
4567 struct net_device *ldev,
4568 bool neighbour)
4569{
4570 return __netdev_adjacent_dev_insert(dev, ldev, neighbour, false,
4571 false);
4572}
4573
4574void __netdev_adjacent_dev_remove(struct net_device *dev,
4575 struct net_device *adj_dev, bool upper)
4576{
4577 struct netdev_adjacent *adj;
4578
4579 if (upper)
4580 adj = __netdev_find_upper(dev, adj_dev);
4581 else
4582 adj = __netdev_find_lower(dev, adj_dev);
4583
4584 if (!adj)
4585 BUG();
4586
4587 if (adj->ref_nr > 1) {
4588 adj->ref_nr--;
4589 return;
4590 }
4591
4592 list_del_rcu(&adj->list);
4593 pr_debug("dev_put for %s, because of %s link removed from %s to %s\n",
4594 adj_dev->name, upper ? "upper" : "lower", dev->name,
4595 adj_dev->name);
4596 dev_put(adj_dev);
4597 kfree_rcu(adj, rcu);
4598}
4599
4600static inline void __netdev_upper_dev_remove(struct net_device *dev,
4601 struct net_device *udev)
4602{
4603 return __netdev_adjacent_dev_remove(dev, udev, true);
4604}
4605
4606static inline void __netdev_lower_dev_remove(struct net_device *dev,
4607 struct net_device *ldev)
4608{
4609 return __netdev_adjacent_dev_remove(dev, ldev, false);
4610}
4611
4612int __netdev_adjacent_dev_insert_link(struct net_device *dev,
4613 struct net_device *upper_dev,
4614 bool master, bool neighbour)
4615{
4616 int ret;
4617
4618 ret = __netdev_upper_dev_insert(dev, upper_dev, master, neighbour);
4619 if (ret)
4620 return ret;
4621
4622 ret = __netdev_lower_dev_insert(upper_dev, dev, neighbour);
4623 if (ret) {
4624 __netdev_upper_dev_remove(dev, upper_dev);
4625 return ret;
4626 }
4627
4628 return 0;
4629}
4630
4631static inline int __netdev_adjacent_dev_link(struct net_device *dev,
4632 struct net_device *udev)
4633{
4634 return __netdev_adjacent_dev_insert_link(dev, udev, false, false);
4635}
4636
4637static inline int __netdev_adjacent_dev_link_neighbour(struct net_device *dev,
4638 struct net_device *udev,
4639 bool master)
4640{
4641 return __netdev_adjacent_dev_insert_link(dev, udev, master, true);
4642}
4643
4644void __netdev_adjacent_dev_unlink(struct net_device *dev,
4645 struct net_device *upper_dev)
4646{
4647 __netdev_upper_dev_remove(dev, upper_dev);
4648 __netdev_lower_dev_remove(upper_dev, dev);
4649}
4650
4651
4499static int __netdev_upper_dev_link(struct net_device *dev, 4652static int __netdev_upper_dev_link(struct net_device *dev,
4500 struct net_device *upper_dev, bool master) 4653 struct net_device *upper_dev, bool master)
4501{ 4654{
4502 struct netdev_upper *upper; 4655 struct netdev_adjacent *i, *j, *to_i, *to_j;
4656 int ret = 0;
4503 4657
4504 ASSERT_RTNL(); 4658 ASSERT_RTNL();
4505 4659
@@ -4507,7 +4661,7 @@ static int __netdev_upper_dev_link(struct net_device *dev,
4507 return -EBUSY; 4661 return -EBUSY;
4508 4662
4509 /* To prevent loops, check if dev is not upper device to upper_dev. */ 4663 /* To prevent loops, check if dev is not upper device to upper_dev. */
4510 if (__netdev_search_upper_dev(upper_dev, dev)) 4664 if (__netdev_find_upper(upper_dev, dev))
4511 return -EBUSY; 4665 return -EBUSY;
4512 4666
4513 if (__netdev_find_upper(dev, upper_dev)) 4667 if (__netdev_find_upper(dev, upper_dev))
@@ -4516,22 +4670,76 @@ static int __netdev_upper_dev_link(struct net_device *dev,
4516 if (master && netdev_master_upper_dev_get(dev)) 4670 if (master && netdev_master_upper_dev_get(dev))
4517 return -EBUSY; 4671 return -EBUSY;
4518 4672
4519 upper = kmalloc(sizeof(*upper), GFP_KERNEL); 4673 ret = __netdev_adjacent_dev_link_neighbour(dev, upper_dev, master);
4520 if (!upper) 4674 if (ret)
4521 return -ENOMEM; 4675 return ret;
4522 4676
4523 upper->dev = upper_dev; 4677 /* Now that we linked these devs, make all the upper_dev's
4524 upper->master = master; 4678 * upper_dev_list visible to every dev's lower_dev_list and vice
4525 INIT_LIST_HEAD(&upper->search_list); 4679 * versa, and don't forget the devices itself. All of these
4680 * links are non-neighbours.
4681 */
4682 list_for_each_entry(i, &dev->lower_dev_list, list) {
4683 list_for_each_entry(j, &upper_dev->upper_dev_list, list) {
4684 ret = __netdev_adjacent_dev_link(i->dev, j->dev);
4685 if (ret)
4686 goto rollback_mesh;
4687 }
4688 }
4689
4690 /* add dev to every upper_dev's upper device */
4691 list_for_each_entry(i, &upper_dev->upper_dev_list, list) {
4692 ret = __netdev_adjacent_dev_link(dev, i->dev);
4693 if (ret)
4694 goto rollback_upper_mesh;
4695 }
4696
4697 /* add upper_dev to every dev's lower device */
4698 list_for_each_entry(i, &dev->lower_dev_list, list) {
4699 ret = __netdev_adjacent_dev_link(i->dev, upper_dev);
4700 if (ret)
4701 goto rollback_lower_mesh;
4702 }
4526 4703
4527 /* Ensure that master upper link is always the first item in list. */
4528 if (master)
4529 list_add_rcu(&upper->list, &dev->upper_dev_list);
4530 else
4531 list_add_tail_rcu(&upper->list, &dev->upper_dev_list);
4532 dev_hold(upper_dev);
4533 call_netdevice_notifiers(NETDEV_CHANGEUPPER, dev); 4704 call_netdevice_notifiers(NETDEV_CHANGEUPPER, dev);
4534 return 0; 4705 return 0;
4706
4707rollback_lower_mesh:
4708 to_i = i;
4709 list_for_each_entry(i, &dev->lower_dev_list, list) {
4710 if (i == to_i)
4711 break;
4712 __netdev_adjacent_dev_unlink(i->dev, upper_dev);
4713 }
4714
4715 i = NULL;
4716
4717rollback_upper_mesh:
4718 to_i = i;
4719 list_for_each_entry(i, &upper_dev->upper_dev_list, list) {
4720 if (i == to_i)
4721 break;
4722 __netdev_adjacent_dev_unlink(dev, i->dev);
4723 }
4724
4725 i = j = NULL;
4726
4727rollback_mesh:
4728 to_i = i;
4729 to_j = j;
4730 list_for_each_entry(i, &dev->lower_dev_list, list) {
4731 list_for_each_entry(j, &upper_dev->upper_dev_list, list) {
4732 if (i == to_i && j == to_j)
4733 break;
4734 __netdev_adjacent_dev_unlink(i->dev, j->dev);
4735 }
4736 if (i == to_i)
4737 break;
4738 }
4739
4740 __netdev_adjacent_dev_unlink(dev, upper_dev);
4741
4742 return ret;
4535} 4743}
4536 4744
4537/** 4745/**
@@ -4580,16 +4788,28 @@ EXPORT_SYMBOL(netdev_master_upper_dev_link);
4580void netdev_upper_dev_unlink(struct net_device *dev, 4788void netdev_upper_dev_unlink(struct net_device *dev,
4581 struct net_device *upper_dev) 4789 struct net_device *upper_dev)
4582{ 4790{
4583 struct netdev_upper *upper; 4791 struct netdev_adjacent *i, *j;
4584
4585 ASSERT_RTNL(); 4792 ASSERT_RTNL();
4586 4793
4587 upper = __netdev_find_upper(dev, upper_dev); 4794 __netdev_adjacent_dev_unlink(dev, upper_dev);
4588 if (!upper) 4795
4589 return; 4796 /* Here is the tricky part. We must remove all dev's lower
4590 list_del_rcu(&upper->list); 4797 * devices from all upper_dev's upper devices and vice
4591 dev_put(upper_dev); 4798 * versa, to maintain the graph relationship.
4592 kfree_rcu(upper, rcu); 4799 */
4800 list_for_each_entry(i, &dev->lower_dev_list, list)
4801 list_for_each_entry(j, &upper_dev->upper_dev_list, list)
4802 __netdev_adjacent_dev_unlink(i->dev, j->dev);
4803
4804 /* remove also the devices itself from lower/upper device
4805 * list
4806 */
4807 list_for_each_entry(i, &dev->lower_dev_list, list)
4808 __netdev_adjacent_dev_unlink(i->dev, upper_dev);
4809
4810 list_for_each_entry(i, &upper_dev->upper_dev_list, list)
4811 __netdev_adjacent_dev_unlink(dev, i->dev);
4812
4593 call_netdevice_notifiers(NETDEV_CHANGEUPPER, dev); 4813 call_netdevice_notifiers(NETDEV_CHANGEUPPER, dev);
4594} 4814}
4595EXPORT_SYMBOL(netdev_upper_dev_unlink); 4815EXPORT_SYMBOL(netdev_upper_dev_unlink);
@@ -4989,6 +5209,24 @@ int dev_change_carrier(struct net_device *dev, bool new_carrier)
4989EXPORT_SYMBOL(dev_change_carrier); 5209EXPORT_SYMBOL(dev_change_carrier);
4990 5210
4991/** 5211/**
5212 * dev_get_phys_port_id - Get device physical port ID
5213 * @dev: device
5214 * @ppid: port ID
5215 *
5216 * Get device physical port ID
5217 */
5218int dev_get_phys_port_id(struct net_device *dev,
5219 struct netdev_phys_port_id *ppid)
5220{
5221 const struct net_device_ops *ops = dev->netdev_ops;
5222
5223 if (!ops->ndo_get_phys_port_id)
5224 return -EOPNOTSUPP;
5225 return ops->ndo_get_phys_port_id(dev, ppid);
5226}
5227EXPORT_SYMBOL(dev_get_phys_port_id);
5228
5229/**
4992 * dev_new_index - allocate an ifindex 5230 * dev_new_index - allocate an ifindex
4993 * @net: the applicable net namespace 5231 * @net: the applicable net namespace
4994 * 5232 *
@@ -5832,6 +6070,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
5832 INIT_LIST_HEAD(&dev->unreg_list); 6070 INIT_LIST_HEAD(&dev->unreg_list);
5833 INIT_LIST_HEAD(&dev->link_watch_list); 6071 INIT_LIST_HEAD(&dev->link_watch_list);
5834 INIT_LIST_HEAD(&dev->upper_dev_list); 6072 INIT_LIST_HEAD(&dev->upper_dev_list);
6073 INIT_LIST_HEAD(&dev->lower_dev_list);
5835 dev->priv_flags = IFF_XMIT_DST_RELEASE; 6074 dev->priv_flags = IFF_XMIT_DST_RELEASE;
5836 setup(dev); 6075 setup(dev);
5837 6076
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 21735440c44a..2e654138433c 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -33,6 +33,9 @@ int fib_default_rule_add(struct fib_rules_ops *ops,
33 r->flags = flags; 33 r->flags = flags;
34 r->fr_net = hold_net(ops->fro_net); 34 r->fr_net = hold_net(ops->fro_net);
35 35
36 r->suppress_prefixlen = -1;
37 r->suppress_ifgroup = -1;
38
36 /* The lock is not required here, the list in unreacheable 39 /* The lock is not required here, the list in unreacheable
37 * at the moment this function is called */ 40 * at the moment this function is called */
38 list_add_tail(&r->list, &ops->rules_list); 41 list_add_tail(&r->list, &ops->rules_list);
@@ -226,6 +229,9 @@ jumped:
226 else 229 else
227 err = ops->action(rule, fl, flags, arg); 230 err = ops->action(rule, fl, flags, arg);
228 231
232 if (!err && ops->suppress && ops->suppress(rule, arg))
233 continue;
234
229 if (err != -EAGAIN) { 235 if (err != -EAGAIN) {
230 if ((arg->flags & FIB_LOOKUP_NOREF) || 236 if ((arg->flags & FIB_LOOKUP_NOREF) ||
231 likely(atomic_inc_not_zero(&rule->refcnt))) { 237 likely(atomic_inc_not_zero(&rule->refcnt))) {
@@ -337,6 +343,15 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh)
337 rule->action = frh->action; 343 rule->action = frh->action;
338 rule->flags = frh->flags; 344 rule->flags = frh->flags;
339 rule->table = frh_get_table(frh, tb); 345 rule->table = frh_get_table(frh, tb);
346 if (tb[FRA_SUPPRESS_PREFIXLEN])
347 rule->suppress_prefixlen = nla_get_u32(tb[FRA_SUPPRESS_PREFIXLEN]);
348 else
349 rule->suppress_prefixlen = -1;
350
351 if (tb[FRA_SUPPRESS_IFGROUP])
352 rule->suppress_ifgroup = nla_get_u32(tb[FRA_SUPPRESS_IFGROUP]);
353 else
354 rule->suppress_ifgroup = -1;
340 355
341 if (!tb[FRA_PRIORITY] && ops->default_pref) 356 if (!tb[FRA_PRIORITY] && ops->default_pref)
342 rule->pref = ops->default_pref(ops); 357 rule->pref = ops->default_pref(ops);
@@ -523,6 +538,8 @@ static inline size_t fib_rule_nlmsg_size(struct fib_rules_ops *ops,
523 + nla_total_size(IFNAMSIZ) /* FRA_OIFNAME */ 538 + nla_total_size(IFNAMSIZ) /* FRA_OIFNAME */
524 + nla_total_size(4) /* FRA_PRIORITY */ 539 + nla_total_size(4) /* FRA_PRIORITY */
525 + nla_total_size(4) /* FRA_TABLE */ 540 + nla_total_size(4) /* FRA_TABLE */
541 + nla_total_size(4) /* FRA_SUPPRESS_PREFIXLEN */
542 + nla_total_size(4) /* FRA_SUPPRESS_IFGROUP */
526 + nla_total_size(4) /* FRA_FWMARK */ 543 + nla_total_size(4) /* FRA_FWMARK */
527 + nla_total_size(4); /* FRA_FWMASK */ 544 + nla_total_size(4); /* FRA_FWMASK */
528 545
@@ -548,6 +565,8 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule,
548 frh->table = rule->table; 565 frh->table = rule->table;
549 if (nla_put_u32(skb, FRA_TABLE, rule->table)) 566 if (nla_put_u32(skb, FRA_TABLE, rule->table))
550 goto nla_put_failure; 567 goto nla_put_failure;
568 if (nla_put_u32(skb, FRA_SUPPRESS_PREFIXLEN, rule->suppress_prefixlen))
569 goto nla_put_failure;
551 frh->res1 = 0; 570 frh->res1 = 0;
552 frh->res2 = 0; 571 frh->res2 = 0;
553 frh->action = rule->action; 572 frh->action = rule->action;
@@ -580,6 +599,12 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule,
580 (rule->target && 599 (rule->target &&
581 nla_put_u32(skb, FRA_GOTO, rule->target))) 600 nla_put_u32(skb, FRA_GOTO, rule->target)))
582 goto nla_put_failure; 601 goto nla_put_failure;
602
603 if (rule->suppress_ifgroup != -1) {
604 if (nla_put_u32(skb, FRA_SUPPRESS_IFGROUP, rule->suppress_ifgroup))
605 goto nla_put_failure;
606 }
607
583 if (ops->fill(rule, skb, frh) < 0) 608 if (ops->fill(rule, skb, frh) < 0)
584 goto nla_put_failure; 609 goto nla_put_failure;
585 610
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index d12e3a9a5356..1929af87b260 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -140,7 +140,11 @@ ipv6:
140 break; 140 break;
141 } 141 }
142 case IPPROTO_IPIP: 142 case IPPROTO_IPIP:
143 goto again; 143 proto = htons(ETH_P_IP);
144 goto ip;
145 case IPPROTO_IPV6:
146 proto = htons(ETH_P_IPV6);
147 goto ipv6;
144 default: 148 default:
145 break; 149 break;
146 } 150 }
@@ -348,7 +352,7 @@ u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb)
348 352
349 if (queue_index != new_index && sk && 353 if (queue_index != new_index && sk &&
350 rcu_access_pointer(sk->sk_dst_cache)) 354 rcu_access_pointer(sk->sk_dst_cache))
351 sk_tx_queue_set(sk, queue_index); 355 sk_tx_queue_set(sk, new_index);
352 356
353 queue_index = new_index; 357 queue_index = new_index;
354 } 358 }
diff --git a/net/core/iovec.c b/net/core/iovec.c
index de178e462682..b77eeecc0011 100644
--- a/net/core/iovec.c
+++ b/net/core/iovec.c
@@ -212,3 +212,27 @@ out_fault:
212 goto out; 212 goto out;
213} 213}
214EXPORT_SYMBOL(csum_partial_copy_fromiovecend); 214EXPORT_SYMBOL(csum_partial_copy_fromiovecend);
215
216unsigned long iov_pages(const struct iovec *iov, int offset,
217 unsigned long nr_segs)
218{
219 unsigned long seg, base;
220 int pages = 0, len, size;
221
222 while (nr_segs && (offset >= iov->iov_len)) {
223 offset -= iov->iov_len;
224 ++iov;
225 --nr_segs;
226 }
227
228 for (seg = 0; seg < nr_segs; seg++) {
229 base = (unsigned long)iov[seg].iov_base + offset;
230 len = iov[seg].iov_len - offset;
231 size = ((base & ~PAGE_MASK) + len + ~PAGE_MASK) >> PAGE_SHIFT;
232 pages += size;
233 offset = 0;
234 }
235
236 return pages;
237}
238EXPORT_SYMBOL(iov_pages);
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 60533db8b72d..6072610a8672 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -2759,13 +2759,11 @@ errout:
2759 rtnl_set_sk_err(net, RTNLGRP_NEIGH, err); 2759 rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
2760} 2760}
2761 2761
2762#ifdef CONFIG_ARPD
2763void neigh_app_ns(struct neighbour *n) 2762void neigh_app_ns(struct neighbour *n)
2764{ 2763{
2765 __neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST); 2764 __neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST);
2766} 2765}
2767EXPORT_SYMBOL(neigh_app_ns); 2766EXPORT_SYMBOL(neigh_app_ns);
2768#endif /* CONFIG_ARPD */
2769 2767
2770#ifdef CONFIG_SYSCTL 2768#ifdef CONFIG_SYSCTL
2771static int zero; 2769static int zero;
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 981fed397d1d..d954b56b4e47 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -60,12 +60,19 @@ static ssize_t format_##field(const struct net_device *net, char *buf) \
60{ \ 60{ \
61 return sprintf(buf, format_string, net->field); \ 61 return sprintf(buf, format_string, net->field); \
62} \ 62} \
63static ssize_t show_##field(struct device *dev, \ 63static ssize_t field##_show(struct device *dev, \
64 struct device_attribute *attr, char *buf) \ 64 struct device_attribute *attr, char *buf) \
65{ \ 65{ \
66 return netdev_show(dev, attr, buf, format_##field); \ 66 return netdev_show(dev, attr, buf, format_##field); \
67} 67} \
68
69#define NETDEVICE_SHOW_RO(field, format_string) \
70NETDEVICE_SHOW(field, format_string); \
71static DEVICE_ATTR_RO(field)
68 72
73#define NETDEVICE_SHOW_RW(field, format_string) \
74NETDEVICE_SHOW(field, format_string); \
75static DEVICE_ATTR_RW(field)
69 76
70/* use same locking and permission rules as SIF* ioctl's */ 77/* use same locking and permission rules as SIF* ioctl's */
71static ssize_t netdev_store(struct device *dev, struct device_attribute *attr, 78static ssize_t netdev_store(struct device *dev, struct device_attribute *attr,
@@ -96,16 +103,16 @@ static ssize_t netdev_store(struct device *dev, struct device_attribute *attr,
96 return ret; 103 return ret;
97} 104}
98 105
99NETDEVICE_SHOW(dev_id, fmt_hex); 106NETDEVICE_SHOW_RO(dev_id, fmt_hex);
100NETDEVICE_SHOW(addr_assign_type, fmt_dec); 107NETDEVICE_SHOW_RO(addr_assign_type, fmt_dec);
101NETDEVICE_SHOW(addr_len, fmt_dec); 108NETDEVICE_SHOW_RO(addr_len, fmt_dec);
102NETDEVICE_SHOW(iflink, fmt_dec); 109NETDEVICE_SHOW_RO(iflink, fmt_dec);
103NETDEVICE_SHOW(ifindex, fmt_dec); 110NETDEVICE_SHOW_RO(ifindex, fmt_dec);
104NETDEVICE_SHOW(type, fmt_dec); 111NETDEVICE_SHOW_RO(type, fmt_dec);
105NETDEVICE_SHOW(link_mode, fmt_dec); 112NETDEVICE_SHOW_RO(link_mode, fmt_dec);
106 113
107/* use same locking rules as GIFHWADDR ioctl's */ 114/* use same locking rules as GIFHWADDR ioctl's */
108static ssize_t show_address(struct device *dev, struct device_attribute *attr, 115static ssize_t address_show(struct device *dev, struct device_attribute *attr,
109 char *buf) 116 char *buf)
110{ 117{
111 struct net_device *net = to_net_dev(dev); 118 struct net_device *net = to_net_dev(dev);
@@ -117,15 +124,17 @@ static ssize_t show_address(struct device *dev, struct device_attribute *attr,
117 read_unlock(&dev_base_lock); 124 read_unlock(&dev_base_lock);
118 return ret; 125 return ret;
119} 126}
127static DEVICE_ATTR_RO(address);
120 128
121static ssize_t show_broadcast(struct device *dev, 129static ssize_t broadcast_show(struct device *dev,
122 struct device_attribute *attr, char *buf) 130 struct device_attribute *attr, char *buf)
123{ 131{
124 struct net_device *net = to_net_dev(dev); 132 struct net_device *net = to_net_dev(dev);
125 if (dev_isalive(net)) 133 if (dev_isalive(net))
126 return sysfs_format_mac(buf, net->broadcast, net->addr_len); 134 return sysfs_format_mac(buf, net->broadcast, net->addr_len);
127 return -EINVAL; 135 return -EINVAL;
128} 136}
137static DEVICE_ATTR_RO(broadcast);
129 138
130static int change_carrier(struct net_device *net, unsigned long new_carrier) 139static int change_carrier(struct net_device *net, unsigned long new_carrier)
131{ 140{
@@ -134,13 +143,13 @@ static int change_carrier(struct net_device *net, unsigned long new_carrier)
134 return dev_change_carrier(net, (bool) new_carrier); 143 return dev_change_carrier(net, (bool) new_carrier);
135} 144}
136 145
137static ssize_t store_carrier(struct device *dev, struct device_attribute *attr, 146static ssize_t carrier_store(struct device *dev, struct device_attribute *attr,
138 const char *buf, size_t len) 147 const char *buf, size_t len)
139{ 148{
140 return netdev_store(dev, attr, buf, len, change_carrier); 149 return netdev_store(dev, attr, buf, len, change_carrier);
141} 150}
142 151
143static ssize_t show_carrier(struct device *dev, 152static ssize_t carrier_show(struct device *dev,
144 struct device_attribute *attr, char *buf) 153 struct device_attribute *attr, char *buf)
145{ 154{
146 struct net_device *netdev = to_net_dev(dev); 155 struct net_device *netdev = to_net_dev(dev);
@@ -149,8 +158,9 @@ static ssize_t show_carrier(struct device *dev,
149 } 158 }
150 return -EINVAL; 159 return -EINVAL;
151} 160}
161static DEVICE_ATTR_RW(carrier);
152 162
153static ssize_t show_speed(struct device *dev, 163static ssize_t speed_show(struct device *dev,
154 struct device_attribute *attr, char *buf) 164 struct device_attribute *attr, char *buf)
155{ 165{
156 struct net_device *netdev = to_net_dev(dev); 166 struct net_device *netdev = to_net_dev(dev);
@@ -167,8 +177,9 @@ static ssize_t show_speed(struct device *dev,
167 rtnl_unlock(); 177 rtnl_unlock();
168 return ret; 178 return ret;
169} 179}
180static DEVICE_ATTR_RO(speed);
170 181
171static ssize_t show_duplex(struct device *dev, 182static ssize_t duplex_show(struct device *dev,
172 struct device_attribute *attr, char *buf) 183 struct device_attribute *attr, char *buf)
173{ 184{
174 struct net_device *netdev = to_net_dev(dev); 185 struct net_device *netdev = to_net_dev(dev);
@@ -198,8 +209,9 @@ static ssize_t show_duplex(struct device *dev,
198 rtnl_unlock(); 209 rtnl_unlock();
199 return ret; 210 return ret;
200} 211}
212static DEVICE_ATTR_RO(duplex);
201 213
202static ssize_t show_dormant(struct device *dev, 214static ssize_t dormant_show(struct device *dev,
203 struct device_attribute *attr, char *buf) 215 struct device_attribute *attr, char *buf)
204{ 216{
205 struct net_device *netdev = to_net_dev(dev); 217 struct net_device *netdev = to_net_dev(dev);
@@ -209,6 +221,7 @@ static ssize_t show_dormant(struct device *dev,
209 221
210 return -EINVAL; 222 return -EINVAL;
211} 223}
224static DEVICE_ATTR_RO(dormant);
212 225
213static const char *const operstates[] = { 226static const char *const operstates[] = {
214 "unknown", 227 "unknown",
@@ -220,7 +233,7 @@ static const char *const operstates[] = {
220 "up" 233 "up"
221}; 234};
222 235
223static ssize_t show_operstate(struct device *dev, 236static ssize_t operstate_show(struct device *dev,
224 struct device_attribute *attr, char *buf) 237 struct device_attribute *attr, char *buf)
225{ 238{
226 const struct net_device *netdev = to_net_dev(dev); 239 const struct net_device *netdev = to_net_dev(dev);
@@ -237,35 +250,33 @@ static ssize_t show_operstate(struct device *dev,
237 250
238 return sprintf(buf, "%s\n", operstates[operstate]); 251 return sprintf(buf, "%s\n", operstates[operstate]);
239} 252}
253static DEVICE_ATTR_RO(operstate);
240 254
241/* read-write attributes */ 255/* read-write attributes */
242NETDEVICE_SHOW(mtu, fmt_dec);
243 256
244static int change_mtu(struct net_device *net, unsigned long new_mtu) 257static int change_mtu(struct net_device *net, unsigned long new_mtu)
245{ 258{
246 return dev_set_mtu(net, (int) new_mtu); 259 return dev_set_mtu(net, (int) new_mtu);
247} 260}
248 261
249static ssize_t store_mtu(struct device *dev, struct device_attribute *attr, 262static ssize_t mtu_store(struct device *dev, struct device_attribute *attr,
250 const char *buf, size_t len) 263 const char *buf, size_t len)
251{ 264{
252 return netdev_store(dev, attr, buf, len, change_mtu); 265 return netdev_store(dev, attr, buf, len, change_mtu);
253} 266}
254 267NETDEVICE_SHOW_RW(mtu, fmt_dec);
255NETDEVICE_SHOW(flags, fmt_hex);
256 268
257static int change_flags(struct net_device *net, unsigned long new_flags) 269static int change_flags(struct net_device *net, unsigned long new_flags)
258{ 270{
259 return dev_change_flags(net, (unsigned int) new_flags); 271 return dev_change_flags(net, (unsigned int) new_flags);
260} 272}
261 273
262static ssize_t store_flags(struct device *dev, struct device_attribute *attr, 274static ssize_t flags_store(struct device *dev, struct device_attribute *attr,
263 const char *buf, size_t len) 275 const char *buf, size_t len)
264{ 276{
265 return netdev_store(dev, attr, buf, len, change_flags); 277 return netdev_store(dev, attr, buf, len, change_flags);
266} 278}
267 279NETDEVICE_SHOW_RW(flags, fmt_hex);
268NETDEVICE_SHOW(tx_queue_len, fmt_ulong);
269 280
270static int change_tx_queue_len(struct net_device *net, unsigned long new_len) 281static int change_tx_queue_len(struct net_device *net, unsigned long new_len)
271{ 282{
@@ -273,7 +284,7 @@ static int change_tx_queue_len(struct net_device *net, unsigned long new_len)
273 return 0; 284 return 0;
274} 285}
275 286
276static ssize_t store_tx_queue_len(struct device *dev, 287static ssize_t tx_queue_len_store(struct device *dev,
277 struct device_attribute *attr, 288 struct device_attribute *attr,
278 const char *buf, size_t len) 289 const char *buf, size_t len)
279{ 290{
@@ -282,8 +293,9 @@ static ssize_t store_tx_queue_len(struct device *dev,
282 293
283 return netdev_store(dev, attr, buf, len, change_tx_queue_len); 294 return netdev_store(dev, attr, buf, len, change_tx_queue_len);
284} 295}
296NETDEVICE_SHOW_RW(tx_queue_len, fmt_ulong);
285 297
286static ssize_t store_ifalias(struct device *dev, struct device_attribute *attr, 298static ssize_t ifalias_store(struct device *dev, struct device_attribute *attr,
287 const char *buf, size_t len) 299 const char *buf, size_t len)
288{ 300{
289 struct net_device *netdev = to_net_dev(dev); 301 struct net_device *netdev = to_net_dev(dev);
@@ -306,7 +318,7 @@ static ssize_t store_ifalias(struct device *dev, struct device_attribute *attr,
306 return ret < 0 ? ret : len; 318 return ret < 0 ? ret : len;
307} 319}
308 320
309static ssize_t show_ifalias(struct device *dev, 321static ssize_t ifalias_show(struct device *dev,
310 struct device_attribute *attr, char *buf) 322 struct device_attribute *attr, char *buf)
311{ 323{
312 const struct net_device *netdev = to_net_dev(dev); 324 const struct net_device *netdev = to_net_dev(dev);
@@ -319,8 +331,7 @@ static ssize_t show_ifalias(struct device *dev,
319 rtnl_unlock(); 331 rtnl_unlock();
320 return ret; 332 return ret;
321} 333}
322 334static DEVICE_ATTR_RW(ifalias);
323NETDEVICE_SHOW(group, fmt_dec);
324 335
325static int change_group(struct net_device *net, unsigned long new_group) 336static int change_group(struct net_device *net, unsigned long new_group)
326{ 337{
@@ -328,35 +339,60 @@ static int change_group(struct net_device *net, unsigned long new_group)
328 return 0; 339 return 0;
329} 340}
330 341
331static ssize_t store_group(struct device *dev, struct device_attribute *attr, 342static ssize_t group_store(struct device *dev, struct device_attribute *attr,
332 const char *buf, size_t len) 343 const char *buf, size_t len)
333{ 344{
334 return netdev_store(dev, attr, buf, len, change_group); 345 return netdev_store(dev, attr, buf, len, change_group);
335} 346}
347NETDEVICE_SHOW(group, fmt_dec);
348static DEVICE_ATTR(netdev_group, S_IRUGO | S_IWUSR, group_show, group_store);
349
350static ssize_t phys_port_id_show(struct device *dev,
351 struct device_attribute *attr, char *buf)
352{
353 struct net_device *netdev = to_net_dev(dev);
354 ssize_t ret = -EINVAL;
336 355
337static struct device_attribute net_class_attributes[] = { 356 if (!rtnl_trylock())
338 __ATTR(addr_assign_type, S_IRUGO, show_addr_assign_type, NULL), 357 return restart_syscall();
339 __ATTR(addr_len, S_IRUGO, show_addr_len, NULL), 358
340 __ATTR(dev_id, S_IRUGO, show_dev_id, NULL), 359 if (dev_isalive(netdev)) {
341 __ATTR(ifalias, S_IRUGO | S_IWUSR, show_ifalias, store_ifalias), 360 struct netdev_phys_port_id ppid;
342 __ATTR(iflink, S_IRUGO, show_iflink, NULL), 361
343 __ATTR(ifindex, S_IRUGO, show_ifindex, NULL), 362 ret = dev_get_phys_port_id(netdev, &ppid);
344 __ATTR(type, S_IRUGO, show_type, NULL), 363 if (!ret)
345 __ATTR(link_mode, S_IRUGO, show_link_mode, NULL), 364 ret = sprintf(buf, "%*phN\n", ppid.id_len, ppid.id);
346 __ATTR(address, S_IRUGO, show_address, NULL), 365 }
347 __ATTR(broadcast, S_IRUGO, show_broadcast, NULL), 366 rtnl_unlock();
348 __ATTR(carrier, S_IRUGO | S_IWUSR, show_carrier, store_carrier), 367
349 __ATTR(speed, S_IRUGO, show_speed, NULL), 368 return ret;
350 __ATTR(duplex, S_IRUGO, show_duplex, NULL), 369}
351 __ATTR(dormant, S_IRUGO, show_dormant, NULL), 370static DEVICE_ATTR_RO(phys_port_id);
352 __ATTR(operstate, S_IRUGO, show_operstate, NULL), 371
353 __ATTR(mtu, S_IRUGO | S_IWUSR, show_mtu, store_mtu), 372static struct attribute *net_class_attrs[] = {
354 __ATTR(flags, S_IRUGO | S_IWUSR, show_flags, store_flags), 373 &dev_attr_netdev_group.attr,
355 __ATTR(tx_queue_len, S_IRUGO | S_IWUSR, show_tx_queue_len, 374 &dev_attr_type.attr,
356 store_tx_queue_len), 375 &dev_attr_dev_id.attr,
357 __ATTR(netdev_group, S_IRUGO | S_IWUSR, show_group, store_group), 376 &dev_attr_iflink.attr,
358 {} 377 &dev_attr_ifindex.attr,
378 &dev_attr_addr_assign_type.attr,
379 &dev_attr_addr_len.attr,
380 &dev_attr_link_mode.attr,
381 &dev_attr_address.attr,
382 &dev_attr_broadcast.attr,
383 &dev_attr_speed.attr,
384 &dev_attr_duplex.attr,
385 &dev_attr_dormant.attr,
386 &dev_attr_operstate.attr,
387 &dev_attr_ifalias.attr,
388 &dev_attr_carrier.attr,
389 &dev_attr_mtu.attr,
390 &dev_attr_flags.attr,
391 &dev_attr_tx_queue_len.attr,
392 &dev_attr_phys_port_id.attr,
393 NULL,
359}; 394};
395ATTRIBUTE_GROUPS(net_class);
360 396
361/* Show a given an attribute in the statistics group */ 397/* Show a given an attribute in the statistics group */
362static ssize_t netstat_show(const struct device *d, 398static ssize_t netstat_show(const struct device *d,
@@ -382,13 +418,13 @@ static ssize_t netstat_show(const struct device *d,
382 418
383/* generate a read-only statistics attribute */ 419/* generate a read-only statistics attribute */
384#define NETSTAT_ENTRY(name) \ 420#define NETSTAT_ENTRY(name) \
385static ssize_t show_##name(struct device *d, \ 421static ssize_t name##_show(struct device *d, \
386 struct device_attribute *attr, char *buf) \ 422 struct device_attribute *attr, char *buf) \
387{ \ 423{ \
388 return netstat_show(d, attr, buf, \ 424 return netstat_show(d, attr, buf, \
389 offsetof(struct rtnl_link_stats64, name)); \ 425 offsetof(struct rtnl_link_stats64, name)); \
390} \ 426} \
391static DEVICE_ATTR(name, S_IRUGO, show_##name, NULL) 427static DEVICE_ATTR_RO(name)
392 428
393NETSTAT_ENTRY(rx_packets); 429NETSTAT_ENTRY(rx_packets);
394NETSTAT_ENTRY(tx_packets); 430NETSTAT_ENTRY(tx_packets);
@@ -457,6 +493,9 @@ static struct attribute_group wireless_group = {
457 .attrs = wireless_attrs, 493 .attrs = wireless_attrs,
458}; 494};
459#endif 495#endif
496
497#else /* CONFIG_SYSFS */
498#define net_class_groups NULL
460#endif /* CONFIG_SYSFS */ 499#endif /* CONFIG_SYSFS */
461 500
462#ifdef CONFIG_RPS 501#ifdef CONFIG_RPS
@@ -1157,6 +1196,13 @@ static void remove_queue_kobjects(struct net_device *net)
1157#endif 1196#endif
1158} 1197}
1159 1198
1199static bool net_current_may_mount(void)
1200{
1201 struct net *net = current->nsproxy->net_ns;
1202
1203 return ns_capable(net->user_ns, CAP_SYS_ADMIN);
1204}
1205
1160static void *net_grab_current_ns(void) 1206static void *net_grab_current_ns(void)
1161{ 1207{
1162 struct net *ns = current->nsproxy->net_ns; 1208 struct net *ns = current->nsproxy->net_ns;
@@ -1179,6 +1225,7 @@ static const void *net_netlink_ns(struct sock *sk)
1179 1225
1180struct kobj_ns_type_operations net_ns_type_operations = { 1226struct kobj_ns_type_operations net_ns_type_operations = {
1181 .type = KOBJ_NS_TYPE_NET, 1227 .type = KOBJ_NS_TYPE_NET,
1228 .current_may_mount = net_current_may_mount,
1182 .grab_current_ns = net_grab_current_ns, 1229 .grab_current_ns = net_grab_current_ns,
1183 .netlink_ns = net_netlink_ns, 1230 .netlink_ns = net_netlink_ns,
1184 .initial_ns = net_initial_ns, 1231 .initial_ns = net_initial_ns,
@@ -1229,9 +1276,7 @@ static const void *net_namespace(struct device *d)
1229static struct class net_class = { 1276static struct class net_class = {
1230 .name = "net", 1277 .name = "net",
1231 .dev_release = netdev_release, 1278 .dev_release = netdev_release,
1232#ifdef CONFIG_SYSFS 1279 .dev_groups = net_class_groups,
1233 .dev_attrs = net_class_attributes,
1234#endif /* CONFIG_SYSFS */
1235 .dev_uevent = netdev_uevent, 1280 .dev_uevent = netdev_uevent,
1236 .ns_type = &net_ns_type_operations, 1281 .ns_type = &net_ns_type_operations,
1237 .namespace = net_namespace, 1282 .namespace = net_namespace,
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index f97652036754..81d3a9a08453 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -651,7 +651,7 @@ static int netns_install(struct nsproxy *nsproxy, void *ns)
651 struct net *net = ns; 651 struct net *net = ns;
652 652
653 if (!ns_capable(net->user_ns, CAP_SYS_ADMIN) || 653 if (!ns_capable(net->user_ns, CAP_SYS_ADMIN) ||
654 !nsown_capable(CAP_SYS_ADMIN)) 654 !ns_capable(current_user_ns(), CAP_SYS_ADMIN))
655 return -EPERM; 655 return -EPERM;
656 656
657 put_net(nsproxy->net_ns); 657 put_net(nsproxy->net_ns);
diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c
index e533259dce3c..d9cd627e6a16 100644
--- a/net/core/netprio_cgroup.c
+++ b/net/core/netprio_cgroup.c
@@ -29,12 +29,6 @@
29 29
30#define PRIOMAP_MIN_SZ 128 30#define PRIOMAP_MIN_SZ 128
31 31
32static inline struct cgroup_netprio_state *cgrp_netprio_state(struct cgroup *cgrp)
33{
34 return container_of(cgroup_subsys_state(cgrp, net_prio_subsys_id),
35 struct cgroup_netprio_state, css);
36}
37
38/* 32/*
39 * Extend @dev->priomap so that it's large enough to accomodate 33 * Extend @dev->priomap so that it's large enough to accomodate
40 * @target_idx. @dev->priomap.priomap_len > @target_idx after successful 34 * @target_idx. @dev->priomap.priomap_len > @target_idx after successful
@@ -87,67 +81,70 @@ static int extend_netdev_table(struct net_device *dev, u32 target_idx)
87 81
88/** 82/**
89 * netprio_prio - return the effective netprio of a cgroup-net_device pair 83 * netprio_prio - return the effective netprio of a cgroup-net_device pair
90 * @cgrp: cgroup part of the target pair 84 * @css: css part of the target pair
91 * @dev: net_device part of the target pair 85 * @dev: net_device part of the target pair
92 * 86 *
93 * Should be called under RCU read or rtnl lock. 87 * Should be called under RCU read or rtnl lock.
94 */ 88 */
95static u32 netprio_prio(struct cgroup *cgrp, struct net_device *dev) 89static u32 netprio_prio(struct cgroup_subsys_state *css, struct net_device *dev)
96{ 90{
97 struct netprio_map *map = rcu_dereference_rtnl(dev->priomap); 91 struct netprio_map *map = rcu_dereference_rtnl(dev->priomap);
92 int id = css->cgroup->id;
98 93
99 if (map && cgrp->id < map->priomap_len) 94 if (map && id < map->priomap_len)
100 return map->priomap[cgrp->id]; 95 return map->priomap[id];
101 return 0; 96 return 0;
102} 97}
103 98
104/** 99/**
105 * netprio_set_prio - set netprio on a cgroup-net_device pair 100 * netprio_set_prio - set netprio on a cgroup-net_device pair
106 * @cgrp: cgroup part of the target pair 101 * @css: css part of the target pair
107 * @dev: net_device part of the target pair 102 * @dev: net_device part of the target pair
108 * @prio: prio to set 103 * @prio: prio to set
109 * 104 *
110 * Set netprio to @prio on @cgrp-@dev pair. Should be called under rtnl 105 * Set netprio to @prio on @css-@dev pair. Should be called under rtnl
111 * lock and may fail under memory pressure for non-zero @prio. 106 * lock and may fail under memory pressure for non-zero @prio.
112 */ 107 */
113static int netprio_set_prio(struct cgroup *cgrp, struct net_device *dev, 108static int netprio_set_prio(struct cgroup_subsys_state *css,
114 u32 prio) 109 struct net_device *dev, u32 prio)
115{ 110{
116 struct netprio_map *map; 111 struct netprio_map *map;
112 int id = css->cgroup->id;
117 int ret; 113 int ret;
118 114
119 /* avoid extending priomap for zero writes */ 115 /* avoid extending priomap for zero writes */
120 map = rtnl_dereference(dev->priomap); 116 map = rtnl_dereference(dev->priomap);
121 if (!prio && (!map || map->priomap_len <= cgrp->id)) 117 if (!prio && (!map || map->priomap_len <= id))
122 return 0; 118 return 0;
123 119
124 ret = extend_netdev_table(dev, cgrp->id); 120 ret = extend_netdev_table(dev, id);
125 if (ret) 121 if (ret)
126 return ret; 122 return ret;
127 123
128 map = rtnl_dereference(dev->priomap); 124 map = rtnl_dereference(dev->priomap);
129 map->priomap[cgrp->id] = prio; 125 map->priomap[id] = prio;
130 return 0; 126 return 0;
131} 127}
132 128
133static struct cgroup_subsys_state *cgrp_css_alloc(struct cgroup *cgrp) 129static struct cgroup_subsys_state *
130cgrp_css_alloc(struct cgroup_subsys_state *parent_css)
134{ 131{
135 struct cgroup_netprio_state *cs; 132 struct cgroup_subsys_state *css;
136 133
137 cs = kzalloc(sizeof(*cs), GFP_KERNEL); 134 css = kzalloc(sizeof(*css), GFP_KERNEL);
138 if (!cs) 135 if (!css)
139 return ERR_PTR(-ENOMEM); 136 return ERR_PTR(-ENOMEM);
140 137
141 return &cs->css; 138 return css;
142} 139}
143 140
144static int cgrp_css_online(struct cgroup *cgrp) 141static int cgrp_css_online(struct cgroup_subsys_state *css)
145{ 142{
146 struct cgroup *parent = cgrp->parent; 143 struct cgroup_subsys_state *parent_css = css_parent(css);
147 struct net_device *dev; 144 struct net_device *dev;
148 int ret = 0; 145 int ret = 0;
149 146
150 if (!parent) 147 if (!parent_css)
151 return 0; 148 return 0;
152 149
153 rtnl_lock(); 150 rtnl_lock();
@@ -156,9 +153,9 @@ static int cgrp_css_online(struct cgroup *cgrp)
156 * onlining, there is no need to clear them on offline. 153 * onlining, there is no need to clear them on offline.
157 */ 154 */
158 for_each_netdev(&init_net, dev) { 155 for_each_netdev(&init_net, dev) {
159 u32 prio = netprio_prio(parent, dev); 156 u32 prio = netprio_prio(parent_css, dev);
160 157
161 ret = netprio_set_prio(cgrp, dev, prio); 158 ret = netprio_set_prio(css, dev, prio);
162 if (ret) 159 if (ret)
163 break; 160 break;
164 } 161 }
@@ -166,29 +163,29 @@ static int cgrp_css_online(struct cgroup *cgrp)
166 return ret; 163 return ret;
167} 164}
168 165
169static void cgrp_css_free(struct cgroup *cgrp) 166static void cgrp_css_free(struct cgroup_subsys_state *css)
170{ 167{
171 kfree(cgrp_netprio_state(cgrp)); 168 kfree(css);
172} 169}
173 170
174static u64 read_prioidx(struct cgroup *cgrp, struct cftype *cft) 171static u64 read_prioidx(struct cgroup_subsys_state *css, struct cftype *cft)
175{ 172{
176 return cgrp->id; 173 return css->cgroup->id;
177} 174}
178 175
179static int read_priomap(struct cgroup *cont, struct cftype *cft, 176static int read_priomap(struct cgroup_subsys_state *css, struct cftype *cft,
180 struct cgroup_map_cb *cb) 177 struct cgroup_map_cb *cb)
181{ 178{
182 struct net_device *dev; 179 struct net_device *dev;
183 180
184 rcu_read_lock(); 181 rcu_read_lock();
185 for_each_netdev_rcu(&init_net, dev) 182 for_each_netdev_rcu(&init_net, dev)
186 cb->fill(cb, dev->name, netprio_prio(cont, dev)); 183 cb->fill(cb, dev->name, netprio_prio(css, dev));
187 rcu_read_unlock(); 184 rcu_read_unlock();
188 return 0; 185 return 0;
189} 186}
190 187
191static int write_priomap(struct cgroup *cgrp, struct cftype *cft, 188static int write_priomap(struct cgroup_subsys_state *css, struct cftype *cft,
192 const char *buffer) 189 const char *buffer)
193{ 190{
194 char devname[IFNAMSIZ + 1]; 191 char devname[IFNAMSIZ + 1];
@@ -205,7 +202,7 @@ static int write_priomap(struct cgroup *cgrp, struct cftype *cft,
205 202
206 rtnl_lock(); 203 rtnl_lock();
207 204
208 ret = netprio_set_prio(cgrp, dev, prio); 205 ret = netprio_set_prio(css, dev, prio);
209 206
210 rtnl_unlock(); 207 rtnl_unlock();
211 dev_put(dev); 208 dev_put(dev);
@@ -221,12 +218,13 @@ static int update_netprio(const void *v, struct file *file, unsigned n)
221 return 0; 218 return 0;
222} 219}
223 220
224static void net_prio_attach(struct cgroup *cgrp, struct cgroup_taskset *tset) 221static void net_prio_attach(struct cgroup_subsys_state *css,
222 struct cgroup_taskset *tset)
225{ 223{
226 struct task_struct *p; 224 struct task_struct *p;
227 void *v; 225 void *v;
228 226
229 cgroup_taskset_for_each(p, cgrp, tset) { 227 cgroup_taskset_for_each(p, css, tset) {
230 task_lock(p); 228 task_lock(p);
231 v = (void *)(unsigned long)task_netprioidx(p); 229 v = (void *)(unsigned long)task_netprioidx(p);
232 iterate_fd(p->files, 0, update_netprio, v); 230 iterate_fd(p->files, 0, update_netprio, v);
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 9640972ec50e..261357a66300 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -160,6 +160,8 @@
160#include <net/net_namespace.h> 160#include <net/net_namespace.h>
161#include <net/checksum.h> 161#include <net/checksum.h>
162#include <net/ipv6.h> 162#include <net/ipv6.h>
163#include <net/udp.h>
164#include <net/ip6_checksum.h>
163#include <net/addrconf.h> 165#include <net/addrconf.h>
164#ifdef CONFIG_XFRM 166#ifdef CONFIG_XFRM
165#include <net/xfrm.h> 167#include <net/xfrm.h>
@@ -198,6 +200,7 @@
198#define F_QUEUE_MAP_RND (1<<13) /* queue map Random */ 200#define F_QUEUE_MAP_RND (1<<13) /* queue map Random */
199#define F_QUEUE_MAP_CPU (1<<14) /* queue map mirrors smp_processor_id() */ 201#define F_QUEUE_MAP_CPU (1<<14) /* queue map mirrors smp_processor_id() */
200#define F_NODE (1<<15) /* Node memory alloc*/ 202#define F_NODE (1<<15) /* Node memory alloc*/
203#define F_UDPCSUM (1<<16) /* Include UDP checksum */
201 204
202/* Thread control flag bits */ 205/* Thread control flag bits */
203#define T_STOP (1<<0) /* Stop run */ 206#define T_STOP (1<<0) /* Stop run */
@@ -631,6 +634,9 @@ static int pktgen_if_show(struct seq_file *seq, void *v)
631 if (pkt_dev->flags & F_UDPDST_RND) 634 if (pkt_dev->flags & F_UDPDST_RND)
632 seq_printf(seq, "UDPDST_RND "); 635 seq_printf(seq, "UDPDST_RND ");
633 636
637 if (pkt_dev->flags & F_UDPCSUM)
638 seq_printf(seq, "UDPCSUM ");
639
634 if (pkt_dev->flags & F_MPLS_RND) 640 if (pkt_dev->flags & F_MPLS_RND)
635 seq_printf(seq, "MPLS_RND "); 641 seq_printf(seq, "MPLS_RND ");
636 642
@@ -1228,6 +1234,12 @@ static ssize_t pktgen_if_write(struct file *file,
1228 else if (strcmp(f, "!NODE_ALLOC") == 0) 1234 else if (strcmp(f, "!NODE_ALLOC") == 0)
1229 pkt_dev->flags &= ~F_NODE; 1235 pkt_dev->flags &= ~F_NODE;
1230 1236
1237 else if (strcmp(f, "UDPCSUM") == 0)
1238 pkt_dev->flags |= F_UDPCSUM;
1239
1240 else if (strcmp(f, "!UDPCSUM") == 0)
1241 pkt_dev->flags &= ~F_UDPCSUM;
1242
1231 else { 1243 else {
1232 sprintf(pg_result, 1244 sprintf(pg_result,
1233 "Flag -:%s:- unknown\nAvailable flags, (prepend ! to un-set flag):\n%s", 1245 "Flag -:%s:- unknown\nAvailable flags, (prepend ! to un-set flag):\n%s",
@@ -2733,7 +2745,7 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev,
2733 udph->source = htons(pkt_dev->cur_udp_src); 2745 udph->source = htons(pkt_dev->cur_udp_src);
2734 udph->dest = htons(pkt_dev->cur_udp_dst); 2746 udph->dest = htons(pkt_dev->cur_udp_dst);
2735 udph->len = htons(datalen + 8); /* DATA + udphdr */ 2747 udph->len = htons(datalen + 8); /* DATA + udphdr */
2736 udph->check = 0; /* No checksum */ 2748 udph->check = 0;
2737 2749
2738 iph->ihl = 5; 2750 iph->ihl = 5;
2739 iph->version = 4; 2751 iph->version = 4;
@@ -2747,11 +2759,28 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev,
2747 iph->frag_off = 0; 2759 iph->frag_off = 0;
2748 iplen = 20 + 8 + datalen; 2760 iplen = 20 + 8 + datalen;
2749 iph->tot_len = htons(iplen); 2761 iph->tot_len = htons(iplen);
2750 iph->check = 0; 2762 ip_send_check(iph);
2751 iph->check = ip_fast_csum((void *)iph, iph->ihl);
2752 skb->protocol = protocol; 2763 skb->protocol = protocol;
2753 skb->dev = odev; 2764 skb->dev = odev;
2754 skb->pkt_type = PACKET_HOST; 2765 skb->pkt_type = PACKET_HOST;
2766
2767 if (!(pkt_dev->flags & F_UDPCSUM)) {
2768 skb->ip_summed = CHECKSUM_NONE;
2769 } else if (odev->features & NETIF_F_V4_CSUM) {
2770 skb->ip_summed = CHECKSUM_PARTIAL;
2771 skb->csum = 0;
2772 udp4_hwcsum(skb, udph->source, udph->dest);
2773 } else {
2774 __wsum csum = udp_csum(skb);
2775
2776 /* add protocol-dependent pseudo-header */
2777 udph->check = csum_tcpudp_magic(udph->source, udph->dest,
2778 datalen + 8, IPPROTO_UDP, csum);
2779
2780 if (udph->check == 0)
2781 udph->check = CSUM_MANGLED_0;
2782 }
2783
2755 pktgen_finalize_skb(pkt_dev, skb, datalen); 2784 pktgen_finalize_skb(pkt_dev, skb, datalen);
2756 2785
2757#ifdef CONFIG_XFRM 2786#ifdef CONFIG_XFRM
@@ -2768,7 +2797,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
2768 struct sk_buff *skb = NULL; 2797 struct sk_buff *skb = NULL;
2769 __u8 *eth; 2798 __u8 *eth;
2770 struct udphdr *udph; 2799 struct udphdr *udph;
2771 int datalen; 2800 int datalen, udplen;
2772 struct ipv6hdr *iph; 2801 struct ipv6hdr *iph;
2773 __be16 protocol = htons(ETH_P_IPV6); 2802 __be16 protocol = htons(ETH_P_IPV6);
2774 __be32 *mpls; 2803 __be32 *mpls;
@@ -2844,10 +2873,11 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
2844 net_info_ratelimited("increased datalen to %d\n", datalen); 2873 net_info_ratelimited("increased datalen to %d\n", datalen);
2845 } 2874 }
2846 2875
2876 udplen = datalen + sizeof(struct udphdr);
2847 udph->source = htons(pkt_dev->cur_udp_src); 2877 udph->source = htons(pkt_dev->cur_udp_src);
2848 udph->dest = htons(pkt_dev->cur_udp_dst); 2878 udph->dest = htons(pkt_dev->cur_udp_dst);
2849 udph->len = htons(datalen + sizeof(struct udphdr)); 2879 udph->len = htons(udplen);
2850 udph->check = 0; /* No checksum */ 2880 udph->check = 0;
2851 2881
2852 *(__be32 *) iph = htonl(0x60000000); /* Version + flow */ 2882 *(__be32 *) iph = htonl(0x60000000); /* Version + flow */
2853 2883
@@ -2858,7 +2888,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
2858 2888
2859 iph->hop_limit = 32; 2889 iph->hop_limit = 32;
2860 2890
2861 iph->payload_len = htons(sizeof(struct udphdr) + datalen); 2891 iph->payload_len = htons(udplen);
2862 iph->nexthdr = IPPROTO_UDP; 2892 iph->nexthdr = IPPROTO_UDP;
2863 2893
2864 iph->daddr = pkt_dev->cur_in6_daddr; 2894 iph->daddr = pkt_dev->cur_in6_daddr;
@@ -2868,6 +2898,23 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
2868 skb->dev = odev; 2898 skb->dev = odev;
2869 skb->pkt_type = PACKET_HOST; 2899 skb->pkt_type = PACKET_HOST;
2870 2900
2901 if (!(pkt_dev->flags & F_UDPCSUM)) {
2902 skb->ip_summed = CHECKSUM_NONE;
2903 } else if (odev->features & NETIF_F_V6_CSUM) {
2904 skb->ip_summed = CHECKSUM_PARTIAL;
2905 skb->csum_start = skb_transport_header(skb) - skb->head;
2906 skb->csum_offset = offsetof(struct udphdr, check);
2907 udph->check = ~csum_ipv6_magic(&iph->saddr, &iph->daddr, udplen, IPPROTO_UDP, 0);
2908 } else {
2909 __wsum csum = udp_csum(skb);
2910
2911 /* add protocol-dependent pseudo-header */
2912 udph->check = csum_ipv6_magic(&iph->saddr, &iph->daddr, udplen, IPPROTO_UDP, csum);
2913
2914 if (udph->check == 0)
2915 udph->check = CSUM_MANGLED_0;
2916 }
2917
2871 pktgen_finalize_skb(pkt_dev, skb, datalen); 2918 pktgen_finalize_skb(pkt_dev, skb, datalen);
2872 2919
2873 return skb; 2920 return skb;
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index ca198c1d1d30..2a0e21de3060 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -767,7 +767,8 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev,
767 + rtnl_vfinfo_size(dev, ext_filter_mask) /* IFLA_VFINFO_LIST */ 767 + rtnl_vfinfo_size(dev, ext_filter_mask) /* IFLA_VFINFO_LIST */
768 + rtnl_port_size(dev) /* IFLA_VF_PORTS + IFLA_PORT_SELF */ 768 + rtnl_port_size(dev) /* IFLA_VF_PORTS + IFLA_PORT_SELF */
769 + rtnl_link_get_size(dev) /* IFLA_LINKINFO */ 769 + rtnl_link_get_size(dev) /* IFLA_LINKINFO */
770 + rtnl_link_get_af_size(dev); /* IFLA_AF_SPEC */ 770 + rtnl_link_get_af_size(dev) /* IFLA_AF_SPEC */
771 + nla_total_size(MAX_PHYS_PORT_ID_LEN); /* IFLA_PHYS_PORT_ID */
771} 772}
772 773
773static int rtnl_vf_ports_fill(struct sk_buff *skb, struct net_device *dev) 774static int rtnl_vf_ports_fill(struct sk_buff *skb, struct net_device *dev)
@@ -846,6 +847,24 @@ static int rtnl_port_fill(struct sk_buff *skb, struct net_device *dev)
846 return 0; 847 return 0;
847} 848}
848 849
850static int rtnl_phys_port_id_fill(struct sk_buff *skb, struct net_device *dev)
851{
852 int err;
853 struct netdev_phys_port_id ppid;
854
855 err = dev_get_phys_port_id(dev, &ppid);
856 if (err) {
857 if (err == -EOPNOTSUPP)
858 return 0;
859 return err;
860 }
861
862 if (nla_put(skb, IFLA_PHYS_PORT_ID, ppid.id_len, ppid.id))
863 return -EMSGSIZE;
864
865 return 0;
866}
867
849static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, 868static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
850 int type, u32 pid, u32 seq, u32 change, 869 int type, u32 pid, u32 seq, u32 change,
851 unsigned int flags, u32 ext_filter_mask) 870 unsigned int flags, u32 ext_filter_mask)
@@ -913,6 +932,9 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
913 goto nla_put_failure; 932 goto nla_put_failure;
914 } 933 }
915 934
935 if (rtnl_phys_port_id_fill(skb, dev))
936 goto nla_put_failure;
937
916 attr = nla_reserve(skb, IFLA_STATS, 938 attr = nla_reserve(skb, IFLA_STATS,
917 sizeof(struct rtnl_link_stats)); 939 sizeof(struct rtnl_link_stats));
918 if (attr == NULL) 940 if (attr == NULL)
@@ -1113,6 +1135,7 @@ const struct nla_policy ifla_policy[IFLA_MAX+1] = {
1113 [IFLA_PROMISCUITY] = { .type = NLA_U32 }, 1135 [IFLA_PROMISCUITY] = { .type = NLA_U32 },
1114 [IFLA_NUM_TX_QUEUES] = { .type = NLA_U32 }, 1136 [IFLA_NUM_TX_QUEUES] = { .type = NLA_U32 },
1115 [IFLA_NUM_RX_QUEUES] = { .type = NLA_U32 }, 1137 [IFLA_NUM_RX_QUEUES] = { .type = NLA_U32 },
1138 [IFLA_PHYS_PORT_ID] = { .type = NLA_BINARY, .len = MAX_PHYS_PORT_ID_LEN },
1116}; 1139};
1117EXPORT_SYMBOL(ifla_policy); 1140EXPORT_SYMBOL(ifla_policy);
1118 1141
@@ -1844,10 +1867,10 @@ replay:
1844 else 1867 else
1845 err = register_netdevice(dev); 1868 err = register_netdevice(dev);
1846 1869
1847 if (err < 0 && !IS_ERR(dev)) 1870 if (err < 0) {
1848 free_netdev(dev); 1871 free_netdev(dev);
1849 if (err < 0)
1850 goto out; 1872 goto out;
1873 }
1851 1874
1852 err = rtnl_configure_link(dev, ifm); 1875 err = rtnl_configure_link(dev, ifm);
1853 if (err < 0) 1876 if (err < 0)
diff --git a/net/core/scm.c b/net/core/scm.c
index b4da80b1cc07..b442e7e25e60 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -56,9 +56,9 @@ static __inline__ int scm_check_creds(struct ucred *creds)
56 if ((creds->pid == task_tgid_vnr(current) || 56 if ((creds->pid == task_tgid_vnr(current) ||
57 ns_capable(task_active_pid_ns(current)->user_ns, CAP_SYS_ADMIN)) && 57 ns_capable(task_active_pid_ns(current)->user_ns, CAP_SYS_ADMIN)) &&
58 ((uid_eq(uid, cred->uid) || uid_eq(uid, cred->euid) || 58 ((uid_eq(uid, cred->uid) || uid_eq(uid, cred->euid) ||
59 uid_eq(uid, cred->suid)) || nsown_capable(CAP_SETUID)) && 59 uid_eq(uid, cred->suid)) || ns_capable(cred->user_ns, CAP_SETUID)) &&
60 ((gid_eq(gid, cred->gid) || gid_eq(gid, cred->egid) || 60 ((gid_eq(gid, cred->gid) || gid_eq(gid, cred->egid) ||
61 gid_eq(gid, cred->sgid)) || nsown_capable(CAP_SETGID))) { 61 gid_eq(gid, cred->sgid)) || ns_capable(cred->user_ns, CAP_SETGID))) {
62 return 0; 62 return 0;
63 } 63 }
64 return -EPERM; 64 return -EPERM;
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 2c3d0f53d198..d81cff119f73 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3500,17 +3500,22 @@ bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from,
3500EXPORT_SYMBOL(skb_try_coalesce); 3500EXPORT_SYMBOL(skb_try_coalesce);
3501 3501
3502/** 3502/**
3503 * skb_scrub_packet - scrub an skb before sending it to another netns 3503 * skb_scrub_packet - scrub an skb
3504 * 3504 *
3505 * @skb: buffer to clean 3505 * @skb: buffer to clean
3506 * 3506 * @xnet: packet is crossing netns
3507 * skb_scrub_packet can be used to clean an skb before injecting it in 3507 *
3508 * another namespace. We have to clear all information in the skb that 3508 * skb_scrub_packet can be used after encapsulating or decapsulting a packet
3509 * could impact namespace isolation. 3509 * into/from a tunnel. Some information have to be cleared during these
3510 * operations.
3511 * skb_scrub_packet can also be used to clean a skb before injecting it in
3512 * another namespace (@xnet == true). We have to clear all information in the
3513 * skb that could impact namespace isolation.
3510 */ 3514 */
3511void skb_scrub_packet(struct sk_buff *skb) 3515void skb_scrub_packet(struct sk_buff *skb, bool xnet)
3512{ 3516{
3513 skb_orphan(skb); 3517 if (xnet)
3518 skb_orphan(skb);
3514 skb->tstamp.tv64 = 0; 3519 skb->tstamp.tv64 = 0;
3515 skb->pkt_type = PACKET_HOST; 3520 skb->pkt_type = PACKET_HOST;
3516 skb->skb_iif = 0; 3521 skb->skb_iif = 0;
diff --git a/net/core/sock.c b/net/core/sock.c
index 2c097c5a35dd..5b6beba494a3 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -93,6 +93,7 @@
93 93
94#include <linux/capability.h> 94#include <linux/capability.h>
95#include <linux/errno.h> 95#include <linux/errno.h>
96#include <linux/errqueue.h>
96#include <linux/types.h> 97#include <linux/types.h>
97#include <linux/socket.h> 98#include <linux/socket.h>
98#include <linux/in.h> 99#include <linux/in.h>
@@ -1575,6 +1576,25 @@ void sock_wfree(struct sk_buff *skb)
1575} 1576}
1576EXPORT_SYMBOL(sock_wfree); 1577EXPORT_SYMBOL(sock_wfree);
1577 1578
1579void skb_orphan_partial(struct sk_buff *skb)
1580{
1581 /* TCP stack sets skb->ooo_okay based on sk_wmem_alloc,
1582 * so we do not completely orphan skb, but transfert all
1583 * accounted bytes but one, to avoid unexpected reorders.
1584 */
1585 if (skb->destructor == sock_wfree
1586#ifdef CONFIG_INET
1587 || skb->destructor == tcp_wfree
1588#endif
1589 ) {
1590 atomic_sub(skb->truesize - 1, &skb->sk->sk_wmem_alloc);
1591 skb->truesize = 1;
1592 } else {
1593 skb_orphan(skb);
1594 }
1595}
1596EXPORT_SYMBOL(skb_orphan_partial);
1597
1578/* 1598/*
1579 * Read buffer destructor automatically called from kfree_skb. 1599 * Read buffer destructor automatically called from kfree_skb.
1580 */ 1600 */
@@ -1721,24 +1741,23 @@ static long sock_wait_for_wmem(struct sock *sk, long timeo)
1721 1741
1722struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len, 1742struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len,
1723 unsigned long data_len, int noblock, 1743 unsigned long data_len, int noblock,
1724 int *errcode) 1744 int *errcode, int max_page_order)
1725{ 1745{
1726 struct sk_buff *skb; 1746 struct sk_buff *skb = NULL;
1747 unsigned long chunk;
1727 gfp_t gfp_mask; 1748 gfp_t gfp_mask;
1728 long timeo; 1749 long timeo;
1729 int err; 1750 int err;
1730 int npages = (data_len + (PAGE_SIZE - 1)) >> PAGE_SHIFT; 1751 int npages = (data_len + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
1752 struct page *page;
1753 int i;
1731 1754
1732 err = -EMSGSIZE; 1755 err = -EMSGSIZE;
1733 if (npages > MAX_SKB_FRAGS) 1756 if (npages > MAX_SKB_FRAGS)
1734 goto failure; 1757 goto failure;
1735 1758
1736 gfp_mask = sk->sk_allocation;
1737 if (gfp_mask & __GFP_WAIT)
1738 gfp_mask |= __GFP_REPEAT;
1739
1740 timeo = sock_sndtimeo(sk, noblock); 1759 timeo = sock_sndtimeo(sk, noblock);
1741 while (1) { 1760 while (!skb) {
1742 err = sock_error(sk); 1761 err = sock_error(sk);
1743 if (err != 0) 1762 if (err != 0)
1744 goto failure; 1763 goto failure;
@@ -1747,50 +1766,52 @@ struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len,
1747 if (sk->sk_shutdown & SEND_SHUTDOWN) 1766 if (sk->sk_shutdown & SEND_SHUTDOWN)
1748 goto failure; 1767 goto failure;
1749 1768
1750 if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) { 1769 if (atomic_read(&sk->sk_wmem_alloc) >= sk->sk_sndbuf) {
1751 skb = alloc_skb(header_len, gfp_mask); 1770 set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
1752 if (skb) { 1771 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
1753 int i; 1772 err = -EAGAIN;
1754 1773 if (!timeo)
1755 /* No pages, we're done... */ 1774 goto failure;
1756 if (!data_len) 1775 if (signal_pending(current))
1757 break; 1776 goto interrupted;
1758 1777 timeo = sock_wait_for_wmem(sk, timeo);
1759 skb->truesize += data_len; 1778 continue;
1760 skb_shinfo(skb)->nr_frags = npages; 1779 }
1761 for (i = 0; i < npages; i++) {
1762 struct page *page;
1763
1764 page = alloc_pages(sk->sk_allocation, 0);
1765 if (!page) {
1766 err = -ENOBUFS;
1767 skb_shinfo(skb)->nr_frags = i;
1768 kfree_skb(skb);
1769 goto failure;
1770 }
1771
1772 __skb_fill_page_desc(skb, i,
1773 page, 0,
1774 (data_len >= PAGE_SIZE ?
1775 PAGE_SIZE :
1776 data_len));
1777 data_len -= PAGE_SIZE;
1778 }
1779 1780
1780 /* Full success... */ 1781 err = -ENOBUFS;
1781 break; 1782 gfp_mask = sk->sk_allocation;
1782 } 1783 if (gfp_mask & __GFP_WAIT)
1783 err = -ENOBUFS; 1784 gfp_mask |= __GFP_REPEAT;
1785
1786 skb = alloc_skb(header_len, gfp_mask);
1787 if (!skb)
1784 goto failure; 1788 goto failure;
1789
1790 skb->truesize += data_len;
1791
1792 for (i = 0; npages > 0; i++) {
1793 int order = max_page_order;
1794
1795 while (order) {
1796 if (npages >= 1 << order) {
1797 page = alloc_pages(sk->sk_allocation |
1798 __GFP_COMP | __GFP_NOWARN,
1799 order);
1800 if (page)
1801 goto fill_page;
1802 }
1803 order--;
1804 }
1805 page = alloc_page(sk->sk_allocation);
1806 if (!page)
1807 goto failure;
1808fill_page:
1809 chunk = min_t(unsigned long, data_len,
1810 PAGE_SIZE << order);
1811 skb_fill_page_desc(skb, i, page, 0, chunk);
1812 data_len -= chunk;
1813 npages -= 1 << order;
1785 } 1814 }
1786 set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
1787 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
1788 err = -EAGAIN;
1789 if (!timeo)
1790 goto failure;
1791 if (signal_pending(current))
1792 goto interrupted;
1793 timeo = sock_wait_for_wmem(sk, timeo);
1794 } 1815 }
1795 1816
1796 skb_set_owner_w(skb, sk); 1817 skb_set_owner_w(skb, sk);
@@ -1799,6 +1820,7 @@ struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len,
1799interrupted: 1820interrupted:
1800 err = sock_intr_errno(timeo); 1821 err = sock_intr_errno(timeo);
1801failure: 1822failure:
1823 kfree_skb(skb);
1802 *errcode = err; 1824 *errcode = err;
1803 return NULL; 1825 return NULL;
1804} 1826}
@@ -1807,7 +1829,7 @@ EXPORT_SYMBOL(sock_alloc_send_pskb);
1807struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size, 1829struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size,
1808 int noblock, int *errcode) 1830 int noblock, int *errcode)
1809{ 1831{
1810 return sock_alloc_send_pskb(sk, size, 0, noblock, errcode); 1832 return sock_alloc_send_pskb(sk, size, 0, noblock, errcode, 0);
1811} 1833}
1812EXPORT_SYMBOL(sock_alloc_send_skb); 1834EXPORT_SYMBOL(sock_alloc_send_skb);
1813 1835
@@ -2425,6 +2447,52 @@ void sock_enable_timestamp(struct sock *sk, int flag)
2425 } 2447 }
2426} 2448}
2427 2449
2450int sock_recv_errqueue(struct sock *sk, struct msghdr *msg, int len,
2451 int level, int type)
2452{
2453 struct sock_exterr_skb *serr;
2454 struct sk_buff *skb, *skb2;
2455 int copied, err;
2456
2457 err = -EAGAIN;
2458 skb = skb_dequeue(&sk->sk_error_queue);
2459 if (skb == NULL)
2460 goto out;
2461
2462 copied = skb->len;
2463 if (copied > len) {
2464 msg->msg_flags |= MSG_TRUNC;
2465 copied = len;
2466 }
2467 err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
2468 if (err)
2469 goto out_free_skb;
2470
2471 sock_recv_timestamp(msg, sk, skb);
2472
2473 serr = SKB_EXT_ERR(skb);
2474 put_cmsg(msg, level, type, sizeof(serr->ee), &serr->ee);
2475
2476 msg->msg_flags |= MSG_ERRQUEUE;
2477 err = copied;
2478
2479 /* Reset and regenerate socket error */
2480 spin_lock_bh(&sk->sk_error_queue.lock);
2481 sk->sk_err = 0;
2482 if ((skb2 = skb_peek(&sk->sk_error_queue)) != NULL) {
2483 sk->sk_err = SKB_EXT_ERR(skb2)->ee.ee_errno;
2484 spin_unlock_bh(&sk->sk_error_queue.lock);
2485 sk->sk_error_report(sk);
2486 } else
2487 spin_unlock_bh(&sk->sk_error_queue.lock);
2488
2489out_free_skb:
2490 kfree_skb(skb);
2491out:
2492 return err;
2493}
2494EXPORT_SYMBOL(sock_recv_errqueue);
2495
2428/* 2496/*
2429 * Get a socket option on an socket. 2497 * Get a socket option on an socket.
2430 * 2498 *
diff --git a/net/core/stream.c b/net/core/stream.c
index f5df85dcd20b..512f0a24269b 100644
--- a/net/core/stream.c
+++ b/net/core/stream.c
@@ -30,7 +30,7 @@ void sk_stream_write_space(struct sock *sk)
30 struct socket *sock = sk->sk_socket; 30 struct socket *sock = sk->sk_socket;
31 struct socket_wq *wq; 31 struct socket_wq *wq;
32 32
33 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk) && sock) { 33 if (sk_stream_is_writeable(sk) && sock) {
34 clear_bit(SOCK_NOSPACE, &sock->flags); 34 clear_bit(SOCK_NOSPACE, &sock->flags);
35 35
36 rcu_read_lock(); 36 rcu_read_lock();
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 31107abd2783..cca444190907 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -20,6 +20,7 @@
20#include <net/sock.h> 20#include <net/sock.h>
21#include <net/net_ratelimit.h> 21#include <net/net_ratelimit.h>
22#include <net/busy_poll.h> 22#include <net/busy_poll.h>
23#include <net/pkt_sched.h>
23 24
24static int zero = 0; 25static int zero = 0;
25static int one = 1; 26static int one = 1;
@@ -193,6 +194,26 @@ static int flow_limit_table_len_sysctl(struct ctl_table *table, int write,
193} 194}
194#endif /* CONFIG_NET_FLOW_LIMIT */ 195#endif /* CONFIG_NET_FLOW_LIMIT */
195 196
197#ifdef CONFIG_NET_SCHED
198static int set_default_qdisc(struct ctl_table *table, int write,
199 void __user *buffer, size_t *lenp, loff_t *ppos)
200{
201 char id[IFNAMSIZ];
202 struct ctl_table tbl = {
203 .data = id,
204 .maxlen = IFNAMSIZ,
205 };
206 int ret;
207
208 qdisc_get_default(id, IFNAMSIZ);
209
210 ret = proc_dostring(&tbl, write, buffer, lenp, ppos);
211 if (write && ret == 0)
212 ret = qdisc_set_default(id);
213 return ret;
214}
215#endif
216
196static struct ctl_table net_core_table[] = { 217static struct ctl_table net_core_table[] = {
197#ifdef CONFIG_NET 218#ifdef CONFIG_NET
198 { 219 {
@@ -315,7 +336,14 @@ static struct ctl_table net_core_table[] = {
315 .mode = 0644, 336 .mode = 0644,
316 .proc_handler = proc_dointvec 337 .proc_handler = proc_dointvec
317 }, 338 },
318# 339#endif
340#ifdef CONFIG_NET_SCHED
341 {
342 .procname = "default_qdisc",
343 .mode = 0644,
344 .maxlen = IFNAMSIZ,
345 .proc_handler = set_default_qdisc
346 },
319#endif 347#endif
320#endif /* CONFIG_NET */ 348#endif /* CONFIG_NET */
321 { 349 {
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 6c7c78b83940..ba64750f0387 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -336,7 +336,7 @@ unsigned int dccp_poll(struct file *file, struct socket *sock,
336 mask |= POLLIN | POLLRDNORM; 336 mask |= POLLIN | POLLRDNORM;
337 337
338 if (!(sk->sk_shutdown & SEND_SHUTDOWN)) { 338 if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
339 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) { 339 if (sk_stream_is_writeable(sk)) {
340 mask |= POLLOUT | POLLWRNORM; 340 mask |= POLLOUT | POLLWRNORM;
341 } else { /* send SIGIO later */ 341 } else { /* send SIGIO later */
342 set_bit(SOCK_ASYNC_NOSPACE, 342 set_bit(SOCK_ASYNC_NOSPACE,
@@ -347,7 +347,7 @@ unsigned int dccp_poll(struct file *file, struct socket *sock,
347 * wspace test but before the flags are set, 347 * wspace test but before the flags are set,
348 * IO signal will be lost. 348 * IO signal will be lost.
349 */ 349 */
350 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) 350 if (sk_stream_is_writeable(sk))
351 mask |= POLLOUT | POLLWRNORM; 351 mask |= POLLOUT | POLLWRNORM;
352 } 352 }
353 } 353 }
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 6ebd8fbd9285..29d684ebca6a 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -347,7 +347,7 @@ dsa_slave_create(struct dsa_switch *ds, struct device *parent,
347 347
348 slave_dev->features = master->vlan_features; 348 slave_dev->features = master->vlan_features;
349 SET_ETHTOOL_OPS(slave_dev, &dsa_slave_ethtool_ops); 349 SET_ETHTOOL_OPS(slave_dev, &dsa_slave_ethtool_ops);
350 memcpy(slave_dev->dev_addr, master->dev_addr, ETH_ALEN); 350 eth_hw_addr_inherit(slave_dev, master);
351 slave_dev->tx_queue_len = 0; 351 slave_dev->tx_queue_len = 0;
352 352
353 switch (ds->dst->tag_protocol) { 353 switch (ds->dst->tag_protocol) {
diff --git a/net/ieee802154/6lowpan.c b/net/ieee802154/6lowpan.c
index 3b9d5f20bd1c..c85e71e0c7ff 100644
--- a/net/ieee802154/6lowpan.c
+++ b/net/ieee802154/6lowpan.c
@@ -67,39 +67,6 @@ static const u8 lowpan_ttl_values[] = {0, 1, 64, 255};
67 67
68static LIST_HEAD(lowpan_devices); 68static LIST_HEAD(lowpan_devices);
69 69
70/*
71 * Uncompression of linklocal:
72 * 0 -> 16 bytes from packet
73 * 1 -> 2 bytes from prefix - bunch of zeroes and 8 from packet
74 * 2 -> 2 bytes from prefix - zeroes + 2 from packet
75 * 3 -> 2 bytes from prefix - infer 8 bytes from lladdr
76 *
77 * NOTE: => the uncompress function does change 0xf to 0x10
78 * NOTE: 0x00 => no-autoconfig => unspecified
79 */
80static const u8 lowpan_unc_llconf[] = {0x0f, 0x28, 0x22, 0x20};
81
82/*
83 * Uncompression of ctx-based:
84 * 0 -> 0 bits from packet [unspecified / reserved]
85 * 1 -> 8 bytes from prefix - bunch of zeroes and 8 from packet
86 * 2 -> 8 bytes from prefix - zeroes + 2 from packet
87 * 3 -> 8 bytes from prefix - infer 8 bytes from lladdr
88 */
89static const u8 lowpan_unc_ctxconf[] = {0x00, 0x88, 0x82, 0x80};
90
91/*
92 * Uncompression of ctx-base
93 * 0 -> 0 bits from packet
94 * 1 -> 2 bytes from prefix - bunch of zeroes 5 from packet
95 * 2 -> 2 bytes from prefix - zeroes + 3 from packet
96 * 3 -> 2 bytes from prefix - infer 1 bytes from lladdr
97 */
98static const u8 lowpan_unc_mxconf[] = {0x0f, 0x25, 0x23, 0x21};
99
100/* Link local prefix */
101static const u8 lowpan_llprefix[] = {0xfe, 0x80};
102
103/* private device info */ 70/* private device info */
104struct lowpan_dev_info { 71struct lowpan_dev_info {
105 struct net_device *real_dev; /* real WPAN device ptr */ 72 struct net_device *real_dev; /* real WPAN device ptr */
@@ -191,55 +158,177 @@ lowpan_compress_addr_64(u8 **hc06_ptr, u8 shift, const struct in6_addr *ipaddr,
191 return rol8(val, shift); 158 return rol8(val, shift);
192} 159}
193 160
194static void 161/*
195lowpan_uip_ds6_set_addr_iid(struct in6_addr *ipaddr, unsigned char *lladdr) 162 * Uncompress address function for source and
163 * destination address(non-multicast).
164 *
165 * address_mode is sam value or dam value.
166 */
167static int
168lowpan_uncompress_addr(struct sk_buff *skb,
169 struct in6_addr *ipaddr,
170 const u8 address_mode,
171 const struct ieee802154_addr *lladdr)
196{ 172{
197 memcpy(&ipaddr->s6_addr[8], lladdr, IEEE802154_ADDR_LEN); 173 bool fail;
198 /* second bit-flip (Universe/Local) is done according RFC2464 */ 174
199 ipaddr->s6_addr[8] ^= 0x02; 175 switch (address_mode) {
176 case LOWPAN_IPHC_ADDR_00:
177 /* for global link addresses */
178 fail = lowpan_fetch_skb(skb, ipaddr->s6_addr, 16);
179 break;
180 case LOWPAN_IPHC_ADDR_01:
181 /* fe:80::XXXX:XXXX:XXXX:XXXX */
182 ipaddr->s6_addr[0] = 0xFE;
183 ipaddr->s6_addr[1] = 0x80;
184 fail = lowpan_fetch_skb(skb, &ipaddr->s6_addr[8], 8);
185 break;
186 case LOWPAN_IPHC_ADDR_02:
187 /* fe:80::ff:fe00:XXXX */
188 ipaddr->s6_addr[0] = 0xFE;
189 ipaddr->s6_addr[1] = 0x80;
190 ipaddr->s6_addr[11] = 0xFF;
191 ipaddr->s6_addr[12] = 0xFE;
192 fail = lowpan_fetch_skb(skb, &ipaddr->s6_addr[14], 2);
193 break;
194 case LOWPAN_IPHC_ADDR_03:
195 fail = false;
196 switch (lladdr->addr_type) {
197 case IEEE802154_ADDR_LONG:
198 /* fe:80::XXXX:XXXX:XXXX:XXXX
199 * \_________________/
200 * hwaddr
201 */
202 ipaddr->s6_addr[0] = 0xFE;
203 ipaddr->s6_addr[1] = 0x80;
204 memcpy(&ipaddr->s6_addr[8], lladdr->hwaddr,
205 IEEE802154_ADDR_LEN);
206 /* second bit-flip (Universe/Local)
207 * is done according RFC2464
208 */
209 ipaddr->s6_addr[8] ^= 0x02;
210 break;
211 case IEEE802154_ADDR_SHORT:
212 /* fe:80::ff:fe00:XXXX
213 * \__/
214 * short_addr
215 *
216 * Universe/Local bit is zero.
217 */
218 ipaddr->s6_addr[0] = 0xFE;
219 ipaddr->s6_addr[1] = 0x80;
220 ipaddr->s6_addr[11] = 0xFF;
221 ipaddr->s6_addr[12] = 0xFE;
222 ipaddr->s6_addr16[7] = htons(lladdr->short_addr);
223 break;
224 default:
225 pr_debug("Invalid addr_type set\n");
226 return -EINVAL;
227 }
228 break;
229 default:
230 pr_debug("Invalid address mode value: 0x%x\n", address_mode);
231 return -EINVAL;
232 }
233
234 if (fail) {
235 pr_debug("Failed to fetch skb data\n");
236 return -EIO;
237 }
238
239 lowpan_raw_dump_inline(NULL, "Reconstructed ipv6 addr is:\n",
240 ipaddr->s6_addr, 16);
241
242 return 0;
200} 243}
201 244
202/* 245/* Uncompress address function for source context
203 * Uncompress addresses based on a prefix and a postfix with zeroes in 246 * based address(non-multicast).
204 * between. If the postfix is zero in length it will use the link address
205 * to configure the IP address (autoconf style).
206 * pref_post_count takes a byte where the first nibble specify prefix count
207 * and the second postfix count (NOTE: 15/0xf => 16 bytes copy).
208 */ 247 */
209static int 248static int
210lowpan_uncompress_addr(struct sk_buff *skb, struct in6_addr *ipaddr, 249lowpan_uncompress_context_based_src_addr(struct sk_buff *skb,
211 u8 const *prefix, u8 pref_post_count, unsigned char *lladdr) 250 struct in6_addr *ipaddr,
251 const u8 sam)
212{ 252{
213 u8 prefcount = pref_post_count >> 4; 253 switch (sam) {
214 u8 postcount = pref_post_count & 0x0f; 254 case LOWPAN_IPHC_ADDR_00:
215 255 /* unspec address ::
216 /* full nibble 15 => 16 */ 256 * Do nothing, address is already ::
217 prefcount = (prefcount == 15 ? 16 : prefcount); 257 */
218 postcount = (postcount == 15 ? 16 : postcount); 258 break;
219 259 case LOWPAN_IPHC_ADDR_01:
220 if (lladdr) 260 /* TODO */
221 lowpan_raw_dump_inline(__func__, "linklocal address", 261 case LOWPAN_IPHC_ADDR_02:
222 lladdr, IEEE802154_ADDR_LEN); 262 /* TODO */
223 if (prefcount > 0) 263 case LOWPAN_IPHC_ADDR_03:
224 memcpy(ipaddr, prefix, prefcount); 264 /* TODO */
225 265 netdev_warn(skb->dev, "SAM value 0x%x not supported\n", sam);
226 if (prefcount + postcount < 16) 266 return -EINVAL;
227 memset(&ipaddr->s6_addr[prefcount], 0, 267 default:
228 16 - (prefcount + postcount)); 268 pr_debug("Invalid sam value: 0x%x\n", sam);
229 269 return -EINVAL;
230 if (postcount > 0) { 270 }
231 memcpy(&ipaddr->s6_addr[16 - postcount], skb->data, postcount); 271
232 skb_pull(skb, postcount); 272 lowpan_raw_dump_inline(NULL,
233 } else if (prefcount > 0) { 273 "Reconstructed context based ipv6 src addr is:\n",
234 if (lladdr == NULL) 274 ipaddr->s6_addr, 16);
235 return -EINVAL; 275
276 return 0;
277}
236 278
237 /* no IID based configuration if no prefix and no data */ 279/* Uncompress function for multicast destination address,
238 lowpan_uip_ds6_set_addr_iid(ipaddr, lladdr); 280 * when M bit is set.
281 */
282static int
283lowpan_uncompress_multicast_daddr(struct sk_buff *skb,
284 struct in6_addr *ipaddr,
285 const u8 dam)
286{
287 bool fail;
288
289 switch (dam) {
290 case LOWPAN_IPHC_DAM_00:
291 /* 00: 128 bits. The full address
292 * is carried in-line.
293 */
294 fail = lowpan_fetch_skb(skb, ipaddr->s6_addr, 16);
295 break;
296 case LOWPAN_IPHC_DAM_01:
297 /* 01: 48 bits. The address takes
298 * the form ffXX::00XX:XXXX:XXXX.
299 */
300 ipaddr->s6_addr[0] = 0xFF;
301 fail = lowpan_fetch_skb(skb, &ipaddr->s6_addr[1], 1);
302 fail |= lowpan_fetch_skb(skb, &ipaddr->s6_addr[11], 5);
303 break;
304 case LOWPAN_IPHC_DAM_10:
305 /* 10: 32 bits. The address takes
306 * the form ffXX::00XX:XXXX.
307 */
308 ipaddr->s6_addr[0] = 0xFF;
309 fail = lowpan_fetch_skb(skb, &ipaddr->s6_addr[1], 1);
310 fail |= lowpan_fetch_skb(skb, &ipaddr->s6_addr[13], 3);
311 break;
312 case LOWPAN_IPHC_DAM_11:
313 /* 11: 8 bits. The address takes
314 * the form ff02::00XX.
315 */
316 ipaddr->s6_addr[0] = 0xFF;
317 ipaddr->s6_addr[1] = 0x02;
318 fail = lowpan_fetch_skb(skb, &ipaddr->s6_addr[15], 1);
319 break;
320 default:
321 pr_debug("DAM value has a wrong value: 0x%x\n", dam);
322 return -EINVAL;
323 }
324
325 if (fail) {
326 pr_debug("Failed to fetch skb data\n");
327 return -EIO;
239 } 328 }
240 329
241 pr_debug("uncompressing %d + %d => ", prefcount, postcount); 330 lowpan_raw_dump_inline(NULL, "Reconstructed ipv6 multicast addr is:\n",
242 lowpan_raw_dump_inline(NULL, NULL, ipaddr->s6_addr, 16); 331 ipaddr->s6_addr, 16);
243 332
244 return 0; 333 return 0;
245} 334}
@@ -702,6 +791,12 @@ lowpan_alloc_new_frame(struct sk_buff *skb, u16 len, u16 tag)
702 skb_reserve(frame->skb, sizeof(struct ipv6hdr)); 791 skb_reserve(frame->skb, sizeof(struct ipv6hdr));
703 skb_put(frame->skb, frame->length); 792 skb_put(frame->skb, frame->length);
704 793
794 /* copy the first control block to keep a
795 * trace of the link-layer addresses in case
796 * of a link-local compressed address
797 */
798 memcpy(frame->skb->cb, skb->cb, sizeof(skb->cb));
799
705 init_timer(&frame->timer); 800 init_timer(&frame->timer);
706 /* time out is the same as for ipv6 - 60 sec */ 801 /* time out is the same as for ipv6 - 60 sec */
707 frame->timer.expires = jiffies + LOWPAN_FRAG_TIMEOUT; 802 frame->timer.expires = jiffies + LOWPAN_FRAG_TIMEOUT;
@@ -723,9 +818,9 @@ frame_err:
723static int 818static int
724lowpan_process_data(struct sk_buff *skb) 819lowpan_process_data(struct sk_buff *skb)
725{ 820{
726 struct ipv6hdr hdr; 821 struct ipv6hdr hdr = {};
727 u8 tmp, iphc0, iphc1, num_context = 0; 822 u8 tmp, iphc0, iphc1, num_context = 0;
728 u8 *_saddr, *_daddr; 823 const struct ieee802154_addr *_saddr, *_daddr;
729 int err; 824 int err;
730 825
731 lowpan_raw_dump_table(__func__, "raw skb data dump", skb->data, 826 lowpan_raw_dump_table(__func__, "raw skb data dump", skb->data,
@@ -828,8 +923,8 @@ lowpan_process_data(struct sk_buff *skb)
828 if (lowpan_fetch_skb_u8(skb, &iphc1)) 923 if (lowpan_fetch_skb_u8(skb, &iphc1))
829 goto drop; 924 goto drop;
830 925
831 _saddr = mac_cb(skb)->sa.hwaddr; 926 _saddr = &mac_cb(skb)->sa;
832 _daddr = mac_cb(skb)->da.hwaddr; 927 _daddr = &mac_cb(skb)->da;
833 928
834 pr_debug("iphc0 = %02x, iphc1 = %02x\n", iphc0, iphc1); 929 pr_debug("iphc0 = %02x, iphc1 = %02x\n", iphc0, iphc1);
835 930
@@ -868,8 +963,6 @@ lowpan_process_data(struct sk_buff *skb)
868 963
869 hdr.priority = ((tmp >> 2) & 0x0f); 964 hdr.priority = ((tmp >> 2) & 0x0f);
870 hdr.flow_lbl[0] = ((tmp << 6) & 0xC0) | ((tmp >> 2) & 0x30); 965 hdr.flow_lbl[0] = ((tmp << 6) & 0xC0) | ((tmp >> 2) & 0x30);
871 hdr.flow_lbl[1] = 0;
872 hdr.flow_lbl[2] = 0;
873 break; 966 break;
874 /* 967 /*
875 * Flow Label carried in-line 968 * Flow Label carried in-line
@@ -885,10 +978,6 @@ lowpan_process_data(struct sk_buff *skb)
885 break; 978 break;
886 /* Traffic Class and Flow Label are elided */ 979 /* Traffic Class and Flow Label are elided */
887 case 3: /* 11b */ 980 case 3: /* 11b */
888 hdr.priority = 0;
889 hdr.flow_lbl[0] = 0;
890 hdr.flow_lbl[1] = 0;
891 hdr.flow_lbl[2] = 0;
892 break; 981 break;
893 default: 982 default:
894 break; 983 break;
@@ -915,10 +1004,18 @@ lowpan_process_data(struct sk_buff *skb)
915 /* Extract SAM to the tmp variable */ 1004 /* Extract SAM to the tmp variable */
916 tmp = ((iphc1 & LOWPAN_IPHC_SAM) >> LOWPAN_IPHC_SAM_BIT) & 0x03; 1005 tmp = ((iphc1 & LOWPAN_IPHC_SAM) >> LOWPAN_IPHC_SAM_BIT) & 0x03;
917 1006
918 /* Source address uncompression */ 1007 if (iphc1 & LOWPAN_IPHC_SAC) {
919 pr_debug("source address stateless compression\n"); 1008 /* Source address context based uncompression */
920 err = lowpan_uncompress_addr(skb, &hdr.saddr, lowpan_llprefix, 1009 pr_debug("SAC bit is set. Handle context based source address.\n");
921 lowpan_unc_llconf[tmp], skb->data); 1010 err = lowpan_uncompress_context_based_src_addr(
1011 skb, &hdr.saddr, tmp);
1012 } else {
1013 /* Source address uncompression */
1014 pr_debug("source address stateless compression\n");
1015 err = lowpan_uncompress_addr(skb, &hdr.saddr, tmp, _saddr);
1016 }
1017
1018 /* Check on error of previous branch */
922 if (err) 1019 if (err)
923 goto drop; 1020 goto drop;
924 1021
@@ -931,23 +1028,14 @@ lowpan_process_data(struct sk_buff *skb)
931 pr_debug("dest: context-based mcast compression\n"); 1028 pr_debug("dest: context-based mcast compression\n");
932 /* TODO: implement this */ 1029 /* TODO: implement this */
933 } else { 1030 } else {
934 u8 prefix[] = {0xff, 0x02}; 1031 err = lowpan_uncompress_multicast_daddr(
935 1032 skb, &hdr.daddr, tmp);
936 pr_debug("dest: non context-based mcast compression\n");
937 if (0 < tmp && tmp < 3) {
938 if (lowpan_fetch_skb_u8(skb, &prefix[1]))
939 goto drop;
940 }
941
942 err = lowpan_uncompress_addr(skb, &hdr.daddr, prefix,
943 lowpan_unc_mxconf[tmp], NULL);
944 if (err) 1033 if (err)
945 goto drop; 1034 goto drop;
946 } 1035 }
947 } else { 1036 } else {
948 pr_debug("dest: stateless compression\n"); 1037 pr_debug("dest: stateless compression\n");
949 err = lowpan_uncompress_addr(skb, &hdr.daddr, lowpan_llprefix, 1038 err = lowpan_uncompress_addr(skb, &hdr.daddr, tmp, _daddr);
950 lowpan_unc_llconf[tmp], skb->data);
951 if (err) 1039 if (err)
952 goto drop; 1040 goto drop;
953 } 1041 }
diff --git a/net/ieee802154/6lowpan.h b/net/ieee802154/6lowpan.h
index 4b8f917658b5..2869c0526dad 100644
--- a/net/ieee802154/6lowpan.h
+++ b/net/ieee802154/6lowpan.h
@@ -193,10 +193,12 @@
193/* Values of fields within the IPHC encoding second byte */ 193/* Values of fields within the IPHC encoding second byte */
194#define LOWPAN_IPHC_CID 0x80 194#define LOWPAN_IPHC_CID 0x80
195 195
196#define LOWPAN_IPHC_ADDR_00 0x00
197#define LOWPAN_IPHC_ADDR_01 0x01
198#define LOWPAN_IPHC_ADDR_02 0x02
199#define LOWPAN_IPHC_ADDR_03 0x03
200
196#define LOWPAN_IPHC_SAC 0x40 201#define LOWPAN_IPHC_SAC 0x40
197#define LOWPAN_IPHC_SAM_00 0x00
198#define LOWPAN_IPHC_SAM_01 0x10
199#define LOWPAN_IPHC_SAM_10 0x20
200#define LOWPAN_IPHC_SAM 0x30 202#define LOWPAN_IPHC_SAM 0x30
201 203
202#define LOWPAN_IPHC_SAM_BIT 4 204#define LOWPAN_IPHC_SAM_BIT 4
@@ -230,4 +232,16 @@
230 dest = 16 bit inline */ 232 dest = 16 bit inline */
231#define LOWPAN_NHC_UDP_CS_P_11 0xF3 /* source & dest = 0xF0B + 4bit inline */ 233#define LOWPAN_NHC_UDP_CS_P_11 0xF3 /* source & dest = 0xF0B + 4bit inline */
232 234
235static inline bool lowpan_fetch_skb(struct sk_buff *skb,
236 void *data, const unsigned int len)
237{
238 if (unlikely(!pskb_may_pull(skb, len)))
239 return true;
240
241 skb_copy_from_linear_data(skb, data, len);
242 skb_pull(skb, len);
243
244 return false;
245}
246
233#endif /* __6LOWPAN_H__ */ 247#endif /* __6LOWPAN_H__ */
diff --git a/net/ieee802154/wpan-class.c b/net/ieee802154/wpan-class.c
index 13571eae6bae..ef56ab5b35fe 100644
--- a/net/ieee802154/wpan-class.c
+++ b/net/ieee802154/wpan-class.c
@@ -36,7 +36,8 @@ static ssize_t name ## _show(struct device *dev, \
36 ret = snprintf(buf, PAGE_SIZE, format_string "\n", args); \ 36 ret = snprintf(buf, PAGE_SIZE, format_string "\n", args); \
37 mutex_unlock(&phy->pib_lock); \ 37 mutex_unlock(&phy->pib_lock); \
38 return ret; \ 38 return ret; \
39} 39} \
40static DEVICE_ATTR_RO(name);
40 41
41#define MASTER_SHOW(field, format_string) \ 42#define MASTER_SHOW(field, format_string) \
42 MASTER_SHOW_COMPLEX(field, format_string, phy->field) 43 MASTER_SHOW_COMPLEX(field, format_string, phy->field)
@@ -66,15 +67,17 @@ static ssize_t channels_supported_show(struct device *dev,
66 mutex_unlock(&phy->pib_lock); 67 mutex_unlock(&phy->pib_lock);
67 return len; 68 return len;
68} 69}
69 70static DEVICE_ATTR_RO(channels_supported);
70static struct device_attribute pmib_attrs[] = { 71
71 __ATTR_RO(current_channel), 72static struct attribute *pmib_attrs[] = {
72 __ATTR_RO(current_page), 73 &dev_attr_current_channel.attr,
73 __ATTR_RO(channels_supported), 74 &dev_attr_current_page.attr,
74 __ATTR_RO(transmit_power), 75 &dev_attr_channels_supported.attr,
75 __ATTR_RO(cca_mode), 76 &dev_attr_transmit_power.attr,
76 {}, 77 &dev_attr_cca_mode.attr,
78 NULL,
77}; 79};
80ATTRIBUTE_GROUPS(pmib);
78 81
79static void wpan_phy_release(struct device *d) 82static void wpan_phy_release(struct device *d)
80{ 83{
@@ -85,7 +88,7 @@ static void wpan_phy_release(struct device *d)
85static struct class wpan_phy_class = { 88static struct class wpan_phy_class = {
86 .name = "ieee802154", 89 .name = "ieee802154",
87 .dev_release = wpan_phy_release, 90 .dev_release = wpan_phy_release,
88 .dev_attrs = pmib_attrs, 91 .dev_groups = pmib_groups,
89}; 92};
90 93
91static DEFINE_MUTEX(wpan_phy_mutex); 94static DEFINE_MUTEX(wpan_phy_mutex);
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index 37cf1a6ea3ad..05c57f0fcabe 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -259,22 +259,6 @@ config IP_PIMSM_V2
259 gated-5). This routing protocol is not used widely, so say N unless 259 gated-5). This routing protocol is not used widely, so say N unless
260 you want to play with it. 260 you want to play with it.
261 261
262config ARPD
263 bool "IP: ARP daemon support"
264 ---help---
265 The kernel maintains an internal cache which maps IP addresses to
266 hardware addresses on the local network, so that Ethernet
267 frames are sent to the proper address on the physical networking
268 layer. Normally, kernel uses the ARP protocol to resolve these
269 mappings.
270
271 Saying Y here adds support to have an user space daemon to do this
272 resolution instead. This is useful for implementing an alternate
273 address resolution protocol (e.g. NHRP on mGRE tunnels) and also for
274 testing purposes.
275
276 If unsure, say N.
277
278config SYN_COOKIES 262config SYN_COOKIES
279 bool "IP: TCP syncookie support" 263 bool "IP: TCP syncookie support"
280 ---help--- 264 ---help---
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index b4d0be2b7ce9..7a1874b7b8fd 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1532,18 +1532,6 @@ int snmp_mib_init(void __percpu *ptr[2], size_t mibsize, size_t align)
1532} 1532}
1533EXPORT_SYMBOL_GPL(snmp_mib_init); 1533EXPORT_SYMBOL_GPL(snmp_mib_init);
1534 1534
1535void snmp_mib_free(void __percpu *ptr[SNMP_ARRAY_SZ])
1536{
1537 int i;
1538
1539 BUG_ON(ptr == NULL);
1540 for (i = 0; i < SNMP_ARRAY_SZ; i++) {
1541 free_percpu(ptr[i]);
1542 ptr[i] = NULL;
1543 }
1544}
1545EXPORT_SYMBOL_GPL(snmp_mib_free);
1546
1547#ifdef CONFIG_IP_MULTICAST 1535#ifdef CONFIG_IP_MULTICAST
1548static const struct net_protocol igmp_protocol = { 1536static const struct net_protocol igmp_protocol = {
1549 .handler = igmp_rcv, 1537 .handler = igmp_rcv,
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 4429b013f269..7808093cede6 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -368,9 +368,7 @@ static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb)
368 } else { 368 } else {
369 probes -= neigh->parms->app_probes; 369 probes -= neigh->parms->app_probes;
370 if (probes < 0) { 370 if (probes < 0) {
371#ifdef CONFIG_ARPD
372 neigh_app_ns(neigh); 371 neigh_app_ns(neigh);
373#endif
374 return; 372 return;
375 } 373 }
376 } 374 }
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 34ca6d5a3a4b..a1b5bcbd04ae 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -73,6 +73,8 @@ static struct ipv4_devconf ipv4_devconf = {
73 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1, 73 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
74 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1, 74 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
75 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1, 75 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
76 [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
77 [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] = 1000 /*ms*/,
76 }, 78 },
77}; 79};
78 80
@@ -83,6 +85,8 @@ static struct ipv4_devconf ipv4_devconf_dflt = {
83 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1, 85 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
84 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1, 86 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
85 [IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1, 87 [IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
88 [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
89 [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] = 1000 /*ms*/,
86 }, 90 },
87}; 91};
88 92
@@ -1126,10 +1130,7 @@ static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
1126 if (len < (int) sizeof(ifr)) 1130 if (len < (int) sizeof(ifr))
1127 break; 1131 break;
1128 memset(&ifr, 0, sizeof(struct ifreq)); 1132 memset(&ifr, 0, sizeof(struct ifreq));
1129 if (ifa->ifa_label) 1133 strcpy(ifr.ifr_name, ifa->ifa_label);
1130 strcpy(ifr.ifr_name, ifa->ifa_label);
1131 else
1132 strcpy(ifr.ifr_name, dev->name);
1133 1134
1134 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET; 1135 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
1135 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr = 1136 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
@@ -2097,11 +2098,15 @@ static struct devinet_sysctl_table {
2097 DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"), 2098 DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
2098 DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"), 2099 DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
2099 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"), 2100 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
2101 DEVINET_SYSCTL_RW_ENTRY(FORCE_IGMP_VERSION,
2102 "force_igmp_version"),
2103 DEVINET_SYSCTL_RW_ENTRY(IGMPV2_UNSOLICITED_REPORT_INTERVAL,
2104 "igmpv2_unsolicited_report_interval"),
2105 DEVINET_SYSCTL_RW_ENTRY(IGMPV3_UNSOLICITED_REPORT_INTERVAL,
2106 "igmpv3_unsolicited_report_interval"),
2100 2107
2101 DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"), 2108 DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
2102 DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"), 2109 DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
2103 DEVINET_SYSCTL_FLUSHING_ENTRY(FORCE_IGMP_VERSION,
2104 "force_igmp_version"),
2105 DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES, 2110 DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
2106 "promote_secondaries"), 2111 "promote_secondaries"),
2107 DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET, 2112 DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index 26aa65d1fce4..523be38e37de 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -101,6 +101,30 @@ errout:
101 return err; 101 return err;
102} 102}
103 103
104static bool fib4_rule_suppress(struct fib_rule *rule, struct fib_lookup_arg *arg)
105{
106 struct fib_result *result = (struct fib_result *) arg->result;
107 struct net_device *dev = result->fi->fib_dev;
108
109 /* do not accept result if the route does
110 * not meet the required prefix length
111 */
112 if (result->prefixlen <= rule->suppress_prefixlen)
113 goto suppress_route;
114
115 /* do not accept result if the route uses a device
116 * belonging to a forbidden interface group
117 */
118 if (rule->suppress_ifgroup != -1 && dev && dev->group == rule->suppress_ifgroup)
119 goto suppress_route;
120
121 return false;
122
123suppress_route:
124 if (!(arg->flags & FIB_LOOKUP_NOREF))
125 fib_info_put(result->fi);
126 return true;
127}
104 128
105static int fib4_rule_match(struct fib_rule *rule, struct flowi *fl, int flags) 129static int fib4_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
106{ 130{
@@ -267,6 +291,7 @@ static const struct fib_rules_ops __net_initconst fib4_rules_ops_template = {
267 .rule_size = sizeof(struct fib4_rule), 291 .rule_size = sizeof(struct fib4_rule),
268 .addr_size = sizeof(u32), 292 .addr_size = sizeof(u32),
269 .action = fib4_rule_action, 293 .action = fib4_rule_action,
294 .suppress = fib4_rule_suppress,
270 .match = fib4_rule_match, 295 .match = fib4_rule_match,
271 .configure = fib4_rule_configure, 296 .configure = fib4_rule_configure,
272 .delete = fib4_rule_delete, 297 .delete = fib4_rule_delete,
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index cd71190d2962..d6c0e64ec97f 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -88,6 +88,7 @@
88#include <linux/if_arp.h> 88#include <linux/if_arp.h>
89#include <linux/rtnetlink.h> 89#include <linux/rtnetlink.h>
90#include <linux/times.h> 90#include <linux/times.h>
91#include <linux/pkt_sched.h>
91 92
92#include <net/net_namespace.h> 93#include <net/net_namespace.h>
93#include <net/arp.h> 94#include <net/arp.h>
@@ -113,7 +114,8 @@
113 114
114#define IGMP_V1_Router_Present_Timeout (400*HZ) 115#define IGMP_V1_Router_Present_Timeout (400*HZ)
115#define IGMP_V2_Router_Present_Timeout (400*HZ) 116#define IGMP_V2_Router_Present_Timeout (400*HZ)
116#define IGMP_Unsolicited_Report_Interval (10*HZ) 117#define IGMP_V2_Unsolicited_Report_Interval (10*HZ)
118#define IGMP_V3_Unsolicited_Report_Interval (1*HZ)
117#define IGMP_Query_Response_Interval (10*HZ) 119#define IGMP_Query_Response_Interval (10*HZ)
118#define IGMP_Unsolicited_Report_Count 2 120#define IGMP_Unsolicited_Report_Count 2
119 121
@@ -138,6 +140,29 @@
138 ((in_dev)->mr_v2_seen && \ 140 ((in_dev)->mr_v2_seen && \
139 time_before(jiffies, (in_dev)->mr_v2_seen))) 141 time_before(jiffies, (in_dev)->mr_v2_seen)))
140 142
143static int unsolicited_report_interval(struct in_device *in_dev)
144{
145 int interval_ms, interval_jiffies;
146
147 if (IGMP_V1_SEEN(in_dev) || IGMP_V2_SEEN(in_dev))
148 interval_ms = IN_DEV_CONF_GET(
149 in_dev,
150 IGMPV2_UNSOLICITED_REPORT_INTERVAL);
151 else /* v3 */
152 interval_ms = IN_DEV_CONF_GET(
153 in_dev,
154 IGMPV3_UNSOLICITED_REPORT_INTERVAL);
155
156 interval_jiffies = msecs_to_jiffies(interval_ms);
157
158 /* _timer functions can't handle a delay of 0 jiffies so ensure
159 * we always return a positive value.
160 */
161 if (interval_jiffies <= 0)
162 interval_jiffies = 1;
163 return interval_jiffies;
164}
165
141static void igmpv3_add_delrec(struct in_device *in_dev, struct ip_mc_list *im); 166static void igmpv3_add_delrec(struct in_device *in_dev, struct ip_mc_list *im);
142static void igmpv3_del_delrec(struct in_device *in_dev, __be32 multiaddr); 167static void igmpv3_del_delrec(struct in_device *in_dev, __be32 multiaddr);
143static void igmpv3_clear_delrec(struct in_device *in_dev); 168static void igmpv3_clear_delrec(struct in_device *in_dev);
@@ -315,6 +340,7 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size)
315 if (size < 256) 340 if (size < 256)
316 return NULL; 341 return NULL;
317 } 342 }
343 skb->priority = TC_PRIO_CONTROL;
318 igmp_skb_size(skb) = size; 344 igmp_skb_size(skb) = size;
319 345
320 rt = ip_route_output_ports(net, &fl4, NULL, IGMPV3_ALL_MCR, 0, 346 rt = ip_route_output_ports(net, &fl4, NULL, IGMPV3_ALL_MCR, 0,
@@ -670,6 +696,7 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc,
670 ip_rt_put(rt); 696 ip_rt_put(rt);
671 return -1; 697 return -1;
672 } 698 }
699 skb->priority = TC_PRIO_CONTROL;
673 700
674 skb_dst_set(skb, &rt->dst); 701 skb_dst_set(skb, &rt->dst);
675 702
@@ -719,7 +746,8 @@ static void igmp_ifc_timer_expire(unsigned long data)
719 igmpv3_send_cr(in_dev); 746 igmpv3_send_cr(in_dev);
720 if (in_dev->mr_ifc_count) { 747 if (in_dev->mr_ifc_count) {
721 in_dev->mr_ifc_count--; 748 in_dev->mr_ifc_count--;
722 igmp_ifc_start_timer(in_dev, IGMP_Unsolicited_Report_Interval); 749 igmp_ifc_start_timer(in_dev,
750 unsolicited_report_interval(in_dev));
723 } 751 }
724 __in_dev_put(in_dev); 752 __in_dev_put(in_dev);
725} 753}
@@ -744,7 +772,7 @@ static void igmp_timer_expire(unsigned long data)
744 772
745 if (im->unsolicit_count) { 773 if (im->unsolicit_count) {
746 im->unsolicit_count--; 774 im->unsolicit_count--;
747 igmp_start_timer(im, IGMP_Unsolicited_Report_Interval); 775 igmp_start_timer(im, unsolicited_report_interval(in_dev));
748 } 776 }
749 im->reporter = 1; 777 im->reporter = 1;
750 spin_unlock(&im->lock); 778 spin_unlock(&im->lock);
@@ -1323,16 +1351,17 @@ out:
1323EXPORT_SYMBOL(ip_mc_inc_group); 1351EXPORT_SYMBOL(ip_mc_inc_group);
1324 1352
1325/* 1353/*
1326 * Resend IGMP JOIN report; used for bonding. 1354 * Resend IGMP JOIN report; used by netdev notifier.
1327 * Called with rcu_read_lock()
1328 */ 1355 */
1329void ip_mc_rejoin_groups(struct in_device *in_dev) 1356static void ip_mc_rejoin_groups(struct in_device *in_dev)
1330{ 1357{
1331#ifdef CONFIG_IP_MULTICAST 1358#ifdef CONFIG_IP_MULTICAST
1332 struct ip_mc_list *im; 1359 struct ip_mc_list *im;
1333 int type; 1360 int type;
1334 1361
1335 for_each_pmc_rcu(in_dev, im) { 1362 ASSERT_RTNL();
1363
1364 for_each_pmc_rtnl(in_dev, im) {
1336 if (im->multiaddr == IGMP_ALL_HOSTS) 1365 if (im->multiaddr == IGMP_ALL_HOSTS)
1337 continue; 1366 continue;
1338 1367
@@ -1349,7 +1378,6 @@ void ip_mc_rejoin_groups(struct in_device *in_dev)
1349 } 1378 }
1350#endif 1379#endif
1351} 1380}
1352EXPORT_SYMBOL(ip_mc_rejoin_groups);
1353 1381
1354/* 1382/*
1355 * A socket has left a multicast group on device dev 1383 * A socket has left a multicast group on device dev
@@ -2735,8 +2763,42 @@ static struct pernet_operations igmp_net_ops = {
2735 .exit = igmp_net_exit, 2763 .exit = igmp_net_exit,
2736}; 2764};
2737 2765
2766static int igmp_netdev_event(struct notifier_block *this,
2767 unsigned long event, void *ptr)
2768{
2769 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
2770 struct in_device *in_dev;
2771
2772 switch (event) {
2773 case NETDEV_RESEND_IGMP:
2774 in_dev = __in_dev_get_rtnl(dev);
2775 if (in_dev)
2776 ip_mc_rejoin_groups(in_dev);
2777 break;
2778 default:
2779 break;
2780 }
2781 return NOTIFY_DONE;
2782}
2783
2784static struct notifier_block igmp_notifier = {
2785 .notifier_call = igmp_netdev_event,
2786};
2787
2738int __init igmp_mc_proc_init(void) 2788int __init igmp_mc_proc_init(void)
2739{ 2789{
2740 return register_pernet_subsys(&igmp_net_ops); 2790 int err;
2791
2792 err = register_pernet_subsys(&igmp_net_ops);
2793 if (err)
2794 return err;
2795 err = register_netdevice_notifier(&igmp_notifier);
2796 if (err)
2797 goto reg_notif_fail;
2798 return 0;
2799
2800reg_notif_fail:
2801 unregister_pernet_subsys(&igmp_net_ops);
2802 return err;
2741} 2803}
2742#endif 2804#endif
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 8d6939eeb492..d7aea4c5b940 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -534,7 +534,7 @@ static int __net_init ipgre_init_net(struct net *net)
534static void __net_exit ipgre_exit_net(struct net *net) 534static void __net_exit ipgre_exit_net(struct net *net)
535{ 535{
536 struct ip_tunnel_net *itn = net_generic(net, ipgre_net_id); 536 struct ip_tunnel_net *itn = net_generic(net, ipgre_net_id);
537 ip_tunnel_delete_net(itn); 537 ip_tunnel_delete_net(itn, &ipgre_link_ops);
538} 538}
539 539
540static struct pernet_operations ipgre_net_ops = { 540static struct pernet_operations ipgre_net_ops = {
@@ -767,7 +767,7 @@ static int __net_init ipgre_tap_init_net(struct net *net)
767static void __net_exit ipgre_tap_exit_net(struct net *net) 767static void __net_exit ipgre_tap_exit_net(struct net *net)
768{ 768{
769 struct ip_tunnel_net *itn = net_generic(net, gre_tap_net_id); 769 struct ip_tunnel_net *itn = net_generic(net, gre_tap_net_id);
770 ip_tunnel_delete_net(itn); 770 ip_tunnel_delete_net(itn, &ipgre_tap_ops);
771} 771}
772 772
773static struct pernet_operations ipgre_tap_net_ops = { 773static struct pernet_operations ipgre_tap_net_ops = {
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index 15e3e683adec..054a3e97d822 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -141,6 +141,7 @@
141#include <net/icmp.h> 141#include <net/icmp.h>
142#include <net/raw.h> 142#include <net/raw.h>
143#include <net/checksum.h> 143#include <net/checksum.h>
144#include <net/inet_ecn.h>
144#include <linux/netfilter_ipv4.h> 145#include <linux/netfilter_ipv4.h>
145#include <net/xfrm.h> 146#include <net/xfrm.h>
146#include <linux/mroute.h> 147#include <linux/mroute.h>
@@ -410,6 +411,13 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt,
410 if (iph->ihl < 5 || iph->version != 4) 411 if (iph->ihl < 5 || iph->version != 4)
411 goto inhdr_error; 412 goto inhdr_error;
412 413
414 BUILD_BUG_ON(IPSTATS_MIB_ECT1PKTS != IPSTATS_MIB_NOECTPKTS + INET_ECN_ECT_1);
415 BUILD_BUG_ON(IPSTATS_MIB_ECT0PKTS != IPSTATS_MIB_NOECTPKTS + INET_ECN_ECT_0);
416 BUILD_BUG_ON(IPSTATS_MIB_CEPKTS != IPSTATS_MIB_NOECTPKTS + INET_ECN_CE);
417 IP_ADD_STATS_BH(dev_net(dev),
418 IPSTATS_MIB_NOECTPKTS + (iph->tos & INET_ECN_MASK),
419 max_t(unsigned short, 1, skb_shinfo(skb)->gso_segs));
420
413 if (!pskb_may_pull(skb, iph->ihl*4)) 421 if (!pskb_may_pull(skb, iph->ihl*4))
414 goto inhdr_error; 422 goto inhdr_error;
415 423
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index ca1cb2d5f6e2..ac9fabe0300f 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -350,7 +350,7 @@ static int ip_tunnel_bind_dev(struct net_device *dev)
350 struct flowi4 fl4; 350 struct flowi4 fl4;
351 struct rtable *rt; 351 struct rtable *rt;
352 352
353 rt = ip_route_output_tunnel(dev_net(dev), &fl4, 353 rt = ip_route_output_tunnel(tunnel->net, &fl4,
354 tunnel->parms.iph.protocol, 354 tunnel->parms.iph.protocol,
355 iph->daddr, iph->saddr, 355 iph->daddr, iph->saddr,
356 tunnel->parms.o_key, 356 tunnel->parms.o_key,
@@ -365,7 +365,7 @@ static int ip_tunnel_bind_dev(struct net_device *dev)
365 } 365 }
366 366
367 if (!tdev && tunnel->parms.link) 367 if (!tdev && tunnel->parms.link)
368 tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link); 368 tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link);
369 369
370 if (tdev) { 370 if (tdev) {
371 hlen = tdev->hard_header_len + tdev->needed_headroom; 371 hlen = tdev->hard_header_len + tdev->needed_headroom;
@@ -454,15 +454,15 @@ int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
454 tstats->rx_bytes += skb->len; 454 tstats->rx_bytes += skb->len;
455 u64_stats_update_end(&tstats->syncp); 455 u64_stats_update_end(&tstats->syncp);
456 456
457 if (tunnel->net != dev_net(tunnel->dev))
458 skb_scrub_packet(skb);
459
460 if (tunnel->dev->type == ARPHRD_ETHER) { 457 if (tunnel->dev->type == ARPHRD_ETHER) {
461 skb->protocol = eth_type_trans(skb, tunnel->dev); 458 skb->protocol = eth_type_trans(skb, tunnel->dev);
462 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); 459 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
463 } else { 460 } else {
464 skb->dev = tunnel->dev; 461 skb->dev = tunnel->dev;
465 } 462 }
463
464 skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev)));
465
466 gro_cells_receive(&tunnel->gro_cells, skb); 466 gro_cells_receive(&tunnel->gro_cells, skb);
467 return 0; 467 return 0;
468 468
@@ -613,9 +613,6 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
613 goto tx_error; 613 goto tx_error;
614 } 614 }
615 615
616 if (tunnel->net != dev_net(dev))
617 skb_scrub_packet(skb);
618
619 if (tunnel->err_count > 0) { 616 if (tunnel->err_count > 0) {
620 if (time_before(jiffies, 617 if (time_before(jiffies,
621 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) { 618 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
@@ -653,9 +650,9 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
653 } 650 }
654 } 651 }
655 652
656 err = iptunnel_xmit(dev_net(dev), rt, skb, 653 err = iptunnel_xmit(rt, skb, fl4.saddr, fl4.daddr, protocol,
657 fl4.saddr, fl4.daddr, protocol, 654 ip_tunnel_ecn_encap(tos, inner_iph, skb), ttl, df,
658 ip_tunnel_ecn_encap(tos, inner_iph, skb), ttl, df); 655 !net_eq(tunnel->net, dev_net(dev)));
659 iptunnel_xmit_stats(err, &dev->stats, dev->tstats); 656 iptunnel_xmit_stats(err, &dev->stats, dev->tstats);
660 657
661 return; 658 return;
@@ -820,11 +817,10 @@ static void ip_tunnel_dev_free(struct net_device *dev)
820 817
821void ip_tunnel_dellink(struct net_device *dev, struct list_head *head) 818void ip_tunnel_dellink(struct net_device *dev, struct list_head *head)
822{ 819{
823 struct net *net = dev_net(dev);
824 struct ip_tunnel *tunnel = netdev_priv(dev); 820 struct ip_tunnel *tunnel = netdev_priv(dev);
825 struct ip_tunnel_net *itn; 821 struct ip_tunnel_net *itn;
826 822
827 itn = net_generic(net, tunnel->ip_tnl_net_id); 823 itn = net_generic(tunnel->net, tunnel->ip_tnl_net_id);
828 824
829 if (itn->fb_tunnel_dev != dev) { 825 if (itn->fb_tunnel_dev != dev) {
830 ip_tunnel_del(netdev_priv(dev)); 826 ip_tunnel_del(netdev_priv(dev));
@@ -838,56 +834,68 @@ int ip_tunnel_init_net(struct net *net, int ip_tnl_net_id,
838{ 834{
839 struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id); 835 struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id);
840 struct ip_tunnel_parm parms; 836 struct ip_tunnel_parm parms;
837 unsigned int i;
841 838
842 itn->tunnels = kzalloc(IP_TNL_HASH_SIZE * sizeof(struct hlist_head), GFP_KERNEL); 839 for (i = 0; i < IP_TNL_HASH_SIZE; i++)
843 if (!itn->tunnels) 840 INIT_HLIST_HEAD(&itn->tunnels[i]);
844 return -ENOMEM;
845 841
846 if (!ops) { 842 if (!ops) {
847 itn->fb_tunnel_dev = NULL; 843 itn->fb_tunnel_dev = NULL;
848 return 0; 844 return 0;
849 } 845 }
846
850 memset(&parms, 0, sizeof(parms)); 847 memset(&parms, 0, sizeof(parms));
851 if (devname) 848 if (devname)
852 strlcpy(parms.name, devname, IFNAMSIZ); 849 strlcpy(parms.name, devname, IFNAMSIZ);
853 850
854 rtnl_lock(); 851 rtnl_lock();
855 itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms); 852 itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms);
853 /* FB netdevice is special: we have one, and only one per netns.
854 * Allowing to move it to another netns is clearly unsafe.
855 */
856 if (!IS_ERR(itn->fb_tunnel_dev))
857 itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
856 rtnl_unlock(); 858 rtnl_unlock();
857 if (IS_ERR(itn->fb_tunnel_dev)) {
858 kfree(itn->tunnels);
859 return PTR_ERR(itn->fb_tunnel_dev);
860 }
861 859
862 return 0; 860 return PTR_RET(itn->fb_tunnel_dev);
863} 861}
864EXPORT_SYMBOL_GPL(ip_tunnel_init_net); 862EXPORT_SYMBOL_GPL(ip_tunnel_init_net);
865 863
866static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head) 864static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head,
865 struct rtnl_link_ops *ops)
867{ 866{
867 struct net *net = dev_net(itn->fb_tunnel_dev);
868 struct net_device *dev, *aux;
868 int h; 869 int h;
869 870
871 for_each_netdev_safe(net, dev, aux)
872 if (dev->rtnl_link_ops == ops)
873 unregister_netdevice_queue(dev, head);
874
870 for (h = 0; h < IP_TNL_HASH_SIZE; h++) { 875 for (h = 0; h < IP_TNL_HASH_SIZE; h++) {
871 struct ip_tunnel *t; 876 struct ip_tunnel *t;
872 struct hlist_node *n; 877 struct hlist_node *n;
873 struct hlist_head *thead = &itn->tunnels[h]; 878 struct hlist_head *thead = &itn->tunnels[h];
874 879
875 hlist_for_each_entry_safe(t, n, thead, hash_node) 880 hlist_for_each_entry_safe(t, n, thead, hash_node)
876 unregister_netdevice_queue(t->dev, head); 881 /* If dev is in the same netns, it has already
882 * been added to the list by the previous loop.
883 */
884 if (!net_eq(dev_net(t->dev), net))
885 unregister_netdevice_queue(t->dev, head);
877 } 886 }
878 if (itn->fb_tunnel_dev) 887 if (itn->fb_tunnel_dev)
879 unregister_netdevice_queue(itn->fb_tunnel_dev, head); 888 unregister_netdevice_queue(itn->fb_tunnel_dev, head);
880} 889}
881 890
882void ip_tunnel_delete_net(struct ip_tunnel_net *itn) 891void ip_tunnel_delete_net(struct ip_tunnel_net *itn, struct rtnl_link_ops *ops)
883{ 892{
884 LIST_HEAD(list); 893 LIST_HEAD(list);
885 894
886 rtnl_lock(); 895 rtnl_lock();
887 ip_tunnel_destroy(itn, &list); 896 ip_tunnel_destroy(itn, &list, ops);
888 unregister_netdevice_many(&list); 897 unregister_netdevice_many(&list);
889 rtnl_unlock(); 898 rtnl_unlock();
890 kfree(itn->tunnels);
891} 899}
892EXPORT_SYMBOL_GPL(ip_tunnel_delete_net); 900EXPORT_SYMBOL_GPL(ip_tunnel_delete_net);
893 901
@@ -929,23 +937,21 @@ EXPORT_SYMBOL_GPL(ip_tunnel_newlink);
929int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[], 937int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
930 struct ip_tunnel_parm *p) 938 struct ip_tunnel_parm *p)
931{ 939{
932 struct ip_tunnel *t, *nt; 940 struct ip_tunnel *t;
933 struct net *net = dev_net(dev);
934 struct ip_tunnel *tunnel = netdev_priv(dev); 941 struct ip_tunnel *tunnel = netdev_priv(dev);
942 struct net *net = tunnel->net;
935 struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id); 943 struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
936 944
937 if (dev == itn->fb_tunnel_dev) 945 if (dev == itn->fb_tunnel_dev)
938 return -EINVAL; 946 return -EINVAL;
939 947
940 nt = netdev_priv(dev);
941
942 t = ip_tunnel_find(itn, p, dev->type); 948 t = ip_tunnel_find(itn, p, dev->type);
943 949
944 if (t) { 950 if (t) {
945 if (t->dev != dev) 951 if (t->dev != dev)
946 return -EEXIST; 952 return -EEXIST;
947 } else { 953 } else {
948 t = nt; 954 t = tunnel;
949 955
950 if (dev->type != ARPHRD_ETHER) { 956 if (dev->type != ARPHRD_ETHER) {
951 unsigned int nflags = 0; 957 unsigned int nflags = 0;
@@ -984,6 +990,7 @@ int ip_tunnel_init(struct net_device *dev)
984 } 990 }
985 991
986 tunnel->dev = dev; 992 tunnel->dev = dev;
993 tunnel->net = dev_net(dev);
987 strcpy(tunnel->parms.name, dev->name); 994 strcpy(tunnel->parms.name, dev->name);
988 iph->version = 4; 995 iph->version = 4;
989 iph->ihl = 5; 996 iph->ihl = 5;
@@ -994,8 +1001,8 @@ EXPORT_SYMBOL_GPL(ip_tunnel_init);
994 1001
995void ip_tunnel_uninit(struct net_device *dev) 1002void ip_tunnel_uninit(struct net_device *dev)
996{ 1003{
997 struct net *net = dev_net(dev);
998 struct ip_tunnel *tunnel = netdev_priv(dev); 1004 struct ip_tunnel *tunnel = netdev_priv(dev);
1005 struct net *net = tunnel->net;
999 struct ip_tunnel_net *itn; 1006 struct ip_tunnel_net *itn;
1000 1007
1001 itn = net_generic(net, tunnel->ip_tnl_net_id); 1008 itn = net_generic(net, tunnel->ip_tnl_net_id);
diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c
index 850525b34899..d6c856b17fd4 100644
--- a/net/ipv4/ip_tunnel_core.c
+++ b/net/ipv4/ip_tunnel_core.c
@@ -46,19 +46,17 @@
46#include <net/netns/generic.h> 46#include <net/netns/generic.h>
47#include <net/rtnetlink.h> 47#include <net/rtnetlink.h>
48 48
49int iptunnel_xmit(struct net *net, struct rtable *rt, 49int iptunnel_xmit(struct rtable *rt, struct sk_buff *skb,
50 struct sk_buff *skb,
51 __be32 src, __be32 dst, __u8 proto, 50 __be32 src, __be32 dst, __u8 proto,
52 __u8 tos, __u8 ttl, __be16 df) 51 __u8 tos, __u8 ttl, __be16 df, bool xnet)
53{ 52{
54 int pkt_len = skb->len; 53 int pkt_len = skb->len;
55 struct iphdr *iph; 54 struct iphdr *iph;
56 int err; 55 int err;
57 56
58 nf_reset(skb); 57 skb_scrub_packet(skb, xnet);
59 secpath_reset(skb); 58
60 skb->rxhash = 0; 59 skb->rxhash = 0;
61 skb_dst_drop(skb);
62 skb_dst_set(skb, &rt->dst); 60 skb_dst_set(skb, &rt->dst);
63 memset(IPCB(skb), 0, sizeof(*IPCB(skb))); 61 memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
64 62
diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index 17cc0ffa8c0d..e805e7b3030e 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -44,176 +44,10 @@
44#include <net/net_namespace.h> 44#include <net/net_namespace.h>
45#include <net/netns/generic.h> 45#include <net/netns/generic.h>
46 46
47#define HASH_SIZE 16
48#define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&(HASH_SIZE-1))
49
50static struct rtnl_link_ops vti_link_ops __read_mostly; 47static struct rtnl_link_ops vti_link_ops __read_mostly;
51 48
52static int vti_net_id __read_mostly; 49static int vti_net_id __read_mostly;
53struct vti_net {
54 struct ip_tunnel __rcu *tunnels_r_l[HASH_SIZE];
55 struct ip_tunnel __rcu *tunnels_r[HASH_SIZE];
56 struct ip_tunnel __rcu *tunnels_l[HASH_SIZE];
57 struct ip_tunnel __rcu *tunnels_wc[1];
58 struct ip_tunnel __rcu **tunnels[4];
59
60 struct net_device *fb_tunnel_dev;
61};
62
63static int vti_fb_tunnel_init(struct net_device *dev);
64static int vti_tunnel_init(struct net_device *dev); 50static int vti_tunnel_init(struct net_device *dev);
65static void vti_tunnel_setup(struct net_device *dev);
66static void vti_dev_free(struct net_device *dev);
67static int vti_tunnel_bind_dev(struct net_device *dev);
68
69#define VTI_XMIT(stats1, stats2) do { \
70 int err; \
71 int pkt_len = skb->len; \
72 err = dst_output(skb); \
73 if (net_xmit_eval(err) == 0) { \
74 u64_stats_update_begin(&(stats1)->syncp); \
75 (stats1)->tx_bytes += pkt_len; \
76 (stats1)->tx_packets++; \
77 u64_stats_update_end(&(stats1)->syncp); \
78 } else { \
79 (stats2)->tx_errors++; \
80 (stats2)->tx_aborted_errors++; \
81 } \
82} while (0)
83
84
85static struct ip_tunnel *vti_tunnel_lookup(struct net *net,
86 __be32 remote, __be32 local)
87{
88 unsigned h0 = HASH(remote);
89 unsigned h1 = HASH(local);
90 struct ip_tunnel *t;
91 struct vti_net *ipn = net_generic(net, vti_net_id);
92
93 for_each_ip_tunnel_rcu(t, ipn->tunnels_r_l[h0 ^ h1])
94 if (local == t->parms.iph.saddr &&
95 remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
96 return t;
97 for_each_ip_tunnel_rcu(t, ipn->tunnels_r[h0])
98 if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
99 return t;
100
101 for_each_ip_tunnel_rcu(t, ipn->tunnels_l[h1])
102 if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP))
103 return t;
104
105 for_each_ip_tunnel_rcu(t, ipn->tunnels_wc[0])
106 if (t && (t->dev->flags&IFF_UP))
107 return t;
108 return NULL;
109}
110
111static struct ip_tunnel __rcu **__vti_bucket(struct vti_net *ipn,
112 struct ip_tunnel_parm *parms)
113{
114 __be32 remote = parms->iph.daddr;
115 __be32 local = parms->iph.saddr;
116 unsigned h = 0;
117 int prio = 0;
118
119 if (remote) {
120 prio |= 2;
121 h ^= HASH(remote);
122 }
123 if (local) {
124 prio |= 1;
125 h ^= HASH(local);
126 }
127 return &ipn->tunnels[prio][h];
128}
129
130static inline struct ip_tunnel __rcu **vti_bucket(struct vti_net *ipn,
131 struct ip_tunnel *t)
132{
133 return __vti_bucket(ipn, &t->parms);
134}
135
136static void vti_tunnel_unlink(struct vti_net *ipn, struct ip_tunnel *t)
137{
138 struct ip_tunnel __rcu **tp;
139 struct ip_tunnel *iter;
140
141 for (tp = vti_bucket(ipn, t);
142 (iter = rtnl_dereference(*tp)) != NULL;
143 tp = &iter->next) {
144 if (t == iter) {
145 rcu_assign_pointer(*tp, t->next);
146 break;
147 }
148 }
149}
150
151static void vti_tunnel_link(struct vti_net *ipn, struct ip_tunnel *t)
152{
153 struct ip_tunnel __rcu **tp = vti_bucket(ipn, t);
154
155 rcu_assign_pointer(t->next, rtnl_dereference(*tp));
156 rcu_assign_pointer(*tp, t);
157}
158
159static struct ip_tunnel *vti_tunnel_locate(struct net *net,
160 struct ip_tunnel_parm *parms,
161 int create)
162{
163 __be32 remote = parms->iph.daddr;
164 __be32 local = parms->iph.saddr;
165 struct ip_tunnel *t, *nt;
166 struct ip_tunnel __rcu **tp;
167 struct net_device *dev;
168 char name[IFNAMSIZ];
169 struct vti_net *ipn = net_generic(net, vti_net_id);
170
171 for (tp = __vti_bucket(ipn, parms);
172 (t = rtnl_dereference(*tp)) != NULL;
173 tp = &t->next) {
174 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr)
175 return t;
176 }
177 if (!create)
178 return NULL;
179
180 if (parms->name[0])
181 strlcpy(name, parms->name, IFNAMSIZ);
182 else
183 strcpy(name, "vti%d");
184
185 dev = alloc_netdev(sizeof(*t), name, vti_tunnel_setup);
186 if (dev == NULL)
187 return NULL;
188
189 dev_net_set(dev, net);
190
191 nt = netdev_priv(dev);
192 nt->parms = *parms;
193 dev->rtnl_link_ops = &vti_link_ops;
194
195 vti_tunnel_bind_dev(dev);
196
197 if (register_netdevice(dev) < 0)
198 goto failed_free;
199
200 dev_hold(dev);
201 vti_tunnel_link(ipn, nt);
202 return nt;
203
204failed_free:
205 free_netdev(dev);
206 return NULL;
207}
208
209static void vti_tunnel_uninit(struct net_device *dev)
210{
211 struct net *net = dev_net(dev);
212 struct vti_net *ipn = net_generic(net, vti_net_id);
213
214 vti_tunnel_unlink(ipn, netdev_priv(dev));
215 dev_put(dev);
216}
217 51
218static int vti_err(struct sk_buff *skb, u32 info) 52static int vti_err(struct sk_buff *skb, u32 info)
219{ 53{
@@ -222,6 +56,8 @@ static int vti_err(struct sk_buff *skb, u32 info)
222 * 8 bytes of packet payload. It means, that precise relaying of 56 * 8 bytes of packet payload. It means, that precise relaying of
223 * ICMP in the real Internet is absolutely infeasible. 57 * ICMP in the real Internet is absolutely infeasible.
224 */ 58 */
59 struct net *net = dev_net(skb->dev);
60 struct ip_tunnel_net *itn = net_generic(net, vti_net_id);
225 struct iphdr *iph = (struct iphdr *)skb->data; 61 struct iphdr *iph = (struct iphdr *)skb->data;
226 const int type = icmp_hdr(skb)->type; 62 const int type = icmp_hdr(skb)->type;
227 const int code = icmp_hdr(skb)->code; 63 const int code = icmp_hdr(skb)->code;
@@ -252,7 +88,8 @@ static int vti_err(struct sk_buff *skb, u32 info)
252 88
253 err = -ENOENT; 89 err = -ENOENT;
254 90
255 t = vti_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr); 91 t = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY,
92 iph->daddr, iph->saddr, 0);
256 if (t == NULL) 93 if (t == NULL)
257 goto out; 94 goto out;
258 95
@@ -281,8 +118,11 @@ static int vti_rcv(struct sk_buff *skb)
281{ 118{
282 struct ip_tunnel *tunnel; 119 struct ip_tunnel *tunnel;
283 const struct iphdr *iph = ip_hdr(skb); 120 const struct iphdr *iph = ip_hdr(skb);
121 struct net *net = dev_net(skb->dev);
122 struct ip_tunnel_net *itn = net_generic(net, vti_net_id);
284 123
285 tunnel = vti_tunnel_lookup(dev_net(skb->dev), iph->saddr, iph->daddr); 124 tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY,
125 iph->saddr, iph->daddr, 0);
286 if (tunnel != NULL) { 126 if (tunnel != NULL) {
287 struct pcpu_tstats *tstats; 127 struct pcpu_tstats *tstats;
288 128
@@ -311,7 +151,6 @@ static int vti_rcv(struct sk_buff *skb)
311static netdev_tx_t vti_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) 151static netdev_tx_t vti_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
312{ 152{
313 struct ip_tunnel *tunnel = netdev_priv(dev); 153 struct ip_tunnel *tunnel = netdev_priv(dev);
314 struct pcpu_tstats *tstats;
315 struct iphdr *tiph = &tunnel->parms.iph; 154 struct iphdr *tiph = &tunnel->parms.iph;
316 u8 tos; 155 u8 tos;
317 struct rtable *rt; /* Route to the other host */ 156 struct rtable *rt; /* Route to the other host */
@@ -319,6 +158,7 @@ static netdev_tx_t vti_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
319 struct iphdr *old_iph = ip_hdr(skb); 158 struct iphdr *old_iph = ip_hdr(skb);
320 __be32 dst = tiph->daddr; 159 __be32 dst = tiph->daddr;
321 struct flowi4 fl4; 160 struct flowi4 fl4;
161 int err;
322 162
323 if (skb->protocol != htons(ETH_P_IP)) 163 if (skb->protocol != htons(ETH_P_IP))
324 goto tx_error; 164 goto tx_error;
@@ -367,8 +207,10 @@ static netdev_tx_t vti_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
367 nf_reset(skb); 207 nf_reset(skb);
368 skb->dev = skb_dst(skb)->dev; 208 skb->dev = skb_dst(skb)->dev;
369 209
370 tstats = this_cpu_ptr(dev->tstats); 210 err = dst_output(skb);
371 VTI_XMIT(tstats, &dev->stats); 211 if (net_xmit_eval(err) == 0)
212 err = skb->len;
213 iptunnel_xmit_stats(err, &dev->stats, dev->tstats);
372 return NETDEV_TX_OK; 214 return NETDEV_TX_OK;
373 215
374tx_error_icmp: 216tx_error_icmp:
@@ -379,198 +221,57 @@ tx_error:
379 return NETDEV_TX_OK; 221 return NETDEV_TX_OK;
380} 222}
381 223
382static int vti_tunnel_bind_dev(struct net_device *dev)
383{
384 struct net_device *tdev = NULL;
385 struct ip_tunnel *tunnel;
386 struct iphdr *iph;
387
388 tunnel = netdev_priv(dev);
389 iph = &tunnel->parms.iph;
390
391 if (iph->daddr) {
392 struct rtable *rt;
393 struct flowi4 fl4;
394 memset(&fl4, 0, sizeof(fl4));
395 flowi4_init_output(&fl4, tunnel->parms.link,
396 be32_to_cpu(tunnel->parms.i_key),
397 RT_TOS(iph->tos), RT_SCOPE_UNIVERSE,
398 IPPROTO_IPIP, 0,
399 iph->daddr, iph->saddr, 0, 0);
400 rt = ip_route_output_key(dev_net(dev), &fl4);
401 if (!IS_ERR(rt)) {
402 tdev = rt->dst.dev;
403 ip_rt_put(rt);
404 }
405 dev->flags |= IFF_POINTOPOINT;
406 }
407
408 if (!tdev && tunnel->parms.link)
409 tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
410
411 if (tdev) {
412 dev->hard_header_len = tdev->hard_header_len +
413 sizeof(struct iphdr);
414 dev->mtu = tdev->mtu;
415 }
416 dev->iflink = tunnel->parms.link;
417 return dev->mtu;
418}
419
420static int 224static int
421vti_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) 225vti_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
422{ 226{
423 int err = 0; 227 int err = 0;
424 struct ip_tunnel_parm p; 228 struct ip_tunnel_parm p;
425 struct ip_tunnel *t;
426 struct net *net = dev_net(dev);
427 struct vti_net *ipn = net_generic(net, vti_net_id);
428
429 switch (cmd) {
430 case SIOCGETTUNNEL:
431 t = NULL;
432 if (dev == ipn->fb_tunnel_dev) {
433 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data,
434 sizeof(p))) {
435 err = -EFAULT;
436 break;
437 }
438 t = vti_tunnel_locate(net, &p, 0);
439 }
440 if (t == NULL)
441 t = netdev_priv(dev);
442 memcpy(&p, &t->parms, sizeof(p));
443 p.i_flags |= GRE_KEY | VTI_ISVTI;
444 p.o_flags |= GRE_KEY;
445 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
446 err = -EFAULT;
447 break;
448
449 case SIOCADDTUNNEL:
450 case SIOCCHGTUNNEL:
451 err = -EPERM;
452 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
453 goto done;
454 229
455 err = -EFAULT; 230 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
456 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) 231 return -EFAULT;
457 goto done;
458 232
459 err = -EINVAL; 233 if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) {
460 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP || 234 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP ||
461 p.iph.ihl != 5) 235 p.iph.ihl != 5)
462 goto done; 236 return -EINVAL;
463 237 }
464 t = vti_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
465
466 if (dev != ipn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
467 if (t != NULL) {
468 if (t->dev != dev) {
469 err = -EEXIST;
470 break;
471 }
472 } else {
473 if (((dev->flags&IFF_POINTOPOINT) &&
474 !p.iph.daddr) ||
475 (!(dev->flags&IFF_POINTOPOINT) &&
476 p.iph.daddr)) {
477 err = -EINVAL;
478 break;
479 }
480 t = netdev_priv(dev);
481 vti_tunnel_unlink(ipn, t);
482 synchronize_net();
483 t->parms.iph.saddr = p.iph.saddr;
484 t->parms.iph.daddr = p.iph.daddr;
485 t->parms.i_key = p.i_key;
486 t->parms.o_key = p.o_key;
487 t->parms.iph.protocol = IPPROTO_IPIP;
488 memcpy(dev->dev_addr, &p.iph.saddr, 4);
489 memcpy(dev->broadcast, &p.iph.daddr, 4);
490 vti_tunnel_link(ipn, t);
491 netdev_state_change(dev);
492 }
493 }
494
495 if (t) {
496 err = 0;
497 if (cmd == SIOCCHGTUNNEL) {
498 t->parms.i_key = p.i_key;
499 t->parms.o_key = p.o_key;
500 if (t->parms.link != p.link) {
501 t->parms.link = p.link;
502 vti_tunnel_bind_dev(dev);
503 netdev_state_change(dev);
504 }
505 }
506 p.i_flags |= GRE_KEY | VTI_ISVTI;
507 p.o_flags |= GRE_KEY;
508 if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms,
509 sizeof(p)))
510 err = -EFAULT;
511 } else
512 err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
513 break;
514 238
515 case SIOCDELTUNNEL: 239 err = ip_tunnel_ioctl(dev, &p, cmd);
516 err = -EPERM; 240 if (err)
517 if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) 241 return err;
518 goto done;
519
520 if (dev == ipn->fb_tunnel_dev) {
521 err = -EFAULT;
522 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data,
523 sizeof(p)))
524 goto done;
525 err = -ENOENT;
526
527 t = vti_tunnel_locate(net, &p, 0);
528 if (t == NULL)
529 goto done;
530 err = -EPERM;
531 if (t->dev == ipn->fb_tunnel_dev)
532 goto done;
533 dev = t->dev;
534 }
535 unregister_netdevice(dev);
536 err = 0;
537 break;
538 242
539 default: 243 if (cmd != SIOCDELTUNNEL) {
540 err = -EINVAL; 244 p.i_flags |= GRE_KEY | VTI_ISVTI;
245 p.o_flags |= GRE_KEY;
541 } 246 }
542 247
543done: 248 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
544 return err; 249 return -EFAULT;
545}
546
547static int vti_tunnel_change_mtu(struct net_device *dev, int new_mtu)
548{
549 if (new_mtu < 68 || new_mtu > 0xFFF8)
550 return -EINVAL;
551 dev->mtu = new_mtu;
552 return 0; 250 return 0;
553} 251}
554 252
555static const struct net_device_ops vti_netdev_ops = { 253static const struct net_device_ops vti_netdev_ops = {
556 .ndo_init = vti_tunnel_init, 254 .ndo_init = vti_tunnel_init,
557 .ndo_uninit = vti_tunnel_uninit, 255 .ndo_uninit = ip_tunnel_uninit,
558 .ndo_start_xmit = vti_tunnel_xmit, 256 .ndo_start_xmit = vti_tunnel_xmit,
559 .ndo_do_ioctl = vti_tunnel_ioctl, 257 .ndo_do_ioctl = vti_tunnel_ioctl,
560 .ndo_change_mtu = vti_tunnel_change_mtu, 258 .ndo_change_mtu = ip_tunnel_change_mtu,
561 .ndo_get_stats64 = ip_tunnel_get_stats64, 259 .ndo_get_stats64 = ip_tunnel_get_stats64,
562}; 260};
563 261
564static void vti_dev_free(struct net_device *dev) 262static void vti_tunnel_setup(struct net_device *dev)
565{ 263{
566 free_percpu(dev->tstats); 264 dev->netdev_ops = &vti_netdev_ops;
567 free_netdev(dev); 265 ip_tunnel_setup(dev, vti_net_id);
568} 266}
569 267
570static void vti_tunnel_setup(struct net_device *dev) 268static int vti_tunnel_init(struct net_device *dev)
571{ 269{
572 dev->netdev_ops = &vti_netdev_ops; 270 struct ip_tunnel *tunnel = netdev_priv(dev);
573 dev->destructor = vti_dev_free; 271 struct iphdr *iph = &tunnel->parms.iph;
272
273 memcpy(dev->dev_addr, &iph->saddr, 4);
274 memcpy(dev->broadcast, &iph->daddr, 4);
574 275
575 dev->type = ARPHRD_TUNNEL; 276 dev->type = ARPHRD_TUNNEL;
576 dev->hard_header_len = LL_MAX_HEADER + sizeof(struct iphdr); 277 dev->hard_header_len = LL_MAX_HEADER + sizeof(struct iphdr);
@@ -581,38 +282,18 @@ static void vti_tunnel_setup(struct net_device *dev)
581 dev->features |= NETIF_F_NETNS_LOCAL; 282 dev->features |= NETIF_F_NETNS_LOCAL;
582 dev->features |= NETIF_F_LLTX; 283 dev->features |= NETIF_F_LLTX;
583 dev->priv_flags &= ~IFF_XMIT_DST_RELEASE; 284 dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
584}
585 285
586static int vti_tunnel_init(struct net_device *dev) 286 return ip_tunnel_init(dev);
587{
588 struct ip_tunnel *tunnel = netdev_priv(dev);
589
590 tunnel->dev = dev;
591 strcpy(tunnel->parms.name, dev->name);
592
593 memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
594 memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
595
596 dev->tstats = alloc_percpu(struct pcpu_tstats);
597 if (!dev->tstats)
598 return -ENOMEM;
599
600 return 0;
601} 287}
602 288
603static int __net_init vti_fb_tunnel_init(struct net_device *dev) 289static void __net_init vti_fb_tunnel_init(struct net_device *dev)
604{ 290{
605 struct ip_tunnel *tunnel = netdev_priv(dev); 291 struct ip_tunnel *tunnel = netdev_priv(dev);
606 struct iphdr *iph = &tunnel->parms.iph; 292 struct iphdr *iph = &tunnel->parms.iph;
607 struct vti_net *ipn = net_generic(dev_net(dev), vti_net_id);
608 293
609 iph->version = 4; 294 iph->version = 4;
610 iph->protocol = IPPROTO_IPIP; 295 iph->protocol = IPPROTO_IPIP;
611 iph->ihl = 5; 296 iph->ihl = 5;
612
613 dev_hold(dev);
614 rcu_assign_pointer(ipn->tunnels_wc[0], tunnel);
615 return 0;
616} 297}
617 298
618static struct xfrm_tunnel vti_handler __read_mostly = { 299static struct xfrm_tunnel vti_handler __read_mostly = {
@@ -621,76 +302,30 @@ static struct xfrm_tunnel vti_handler __read_mostly = {
621 .priority = 1, 302 .priority = 1,
622}; 303};
623 304
624static void vti_destroy_tunnels(struct vti_net *ipn, struct list_head *head)
625{
626 int prio;
627
628 for (prio = 1; prio < 4; prio++) {
629 int h;
630 for (h = 0; h < HASH_SIZE; h++) {
631 struct ip_tunnel *t;
632
633 t = rtnl_dereference(ipn->tunnels[prio][h]);
634 while (t != NULL) {
635 unregister_netdevice_queue(t->dev, head);
636 t = rtnl_dereference(t->next);
637 }
638 }
639 }
640}
641
642static int __net_init vti_init_net(struct net *net) 305static int __net_init vti_init_net(struct net *net)
643{ 306{
644 int err; 307 int err;
645 struct vti_net *ipn = net_generic(net, vti_net_id); 308 struct ip_tunnel_net *itn;
646
647 ipn->tunnels[0] = ipn->tunnels_wc;
648 ipn->tunnels[1] = ipn->tunnels_l;
649 ipn->tunnels[2] = ipn->tunnels_r;
650 ipn->tunnels[3] = ipn->tunnels_r_l;
651
652 ipn->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel),
653 "ip_vti0",
654 vti_tunnel_setup);
655 if (!ipn->fb_tunnel_dev) {
656 err = -ENOMEM;
657 goto err_alloc_dev;
658 }
659 dev_net_set(ipn->fb_tunnel_dev, net);
660
661 err = vti_fb_tunnel_init(ipn->fb_tunnel_dev);
662 if (err)
663 goto err_reg_dev;
664 ipn->fb_tunnel_dev->rtnl_link_ops = &vti_link_ops;
665 309
666 err = register_netdev(ipn->fb_tunnel_dev); 310 err = ip_tunnel_init_net(net, vti_net_id, &vti_link_ops, "ip_vti0");
667 if (err) 311 if (err)
668 goto err_reg_dev; 312 return err;
313 itn = net_generic(net, vti_net_id);
314 vti_fb_tunnel_init(itn->fb_tunnel_dev);
669 return 0; 315 return 0;
670
671err_reg_dev:
672 vti_dev_free(ipn->fb_tunnel_dev);
673err_alloc_dev:
674 /* nothing */
675 return err;
676} 316}
677 317
678static void __net_exit vti_exit_net(struct net *net) 318static void __net_exit vti_exit_net(struct net *net)
679{ 319{
680 struct vti_net *ipn = net_generic(net, vti_net_id); 320 struct ip_tunnel_net *itn = net_generic(net, vti_net_id);
681 LIST_HEAD(list); 321 ip_tunnel_delete_net(itn, &vti_link_ops);
682
683 rtnl_lock();
684 vti_destroy_tunnels(ipn, &list);
685 unregister_netdevice_many(&list);
686 rtnl_unlock();
687} 322}
688 323
689static struct pernet_operations vti_net_ops = { 324static struct pernet_operations vti_net_ops = {
690 .init = vti_init_net, 325 .init = vti_init_net,
691 .exit = vti_exit_net, 326 .exit = vti_exit_net,
692 .id = &vti_net_id, 327 .id = &vti_net_id,
693 .size = sizeof(struct vti_net), 328 .size = sizeof(struct ip_tunnel_net),
694}; 329};
695 330
696static int vti_tunnel_validate(struct nlattr *tb[], struct nlattr *data[]) 331static int vti_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
@@ -728,78 +363,19 @@ static void vti_netlink_parms(struct nlattr *data[],
728static int vti_newlink(struct net *src_net, struct net_device *dev, 363static int vti_newlink(struct net *src_net, struct net_device *dev,
729 struct nlattr *tb[], struct nlattr *data[]) 364 struct nlattr *tb[], struct nlattr *data[])
730{ 365{
731 struct ip_tunnel *nt; 366 struct ip_tunnel_parm parms;
732 struct net *net = dev_net(dev);
733 struct vti_net *ipn = net_generic(net, vti_net_id);
734 int mtu;
735 int err;
736
737 nt = netdev_priv(dev);
738 vti_netlink_parms(data, &nt->parms);
739
740 if (vti_tunnel_locate(net, &nt->parms, 0))
741 return -EEXIST;
742 367
743 mtu = vti_tunnel_bind_dev(dev); 368 vti_netlink_parms(data, &parms);
744 if (!tb[IFLA_MTU]) 369 return ip_tunnel_newlink(dev, tb, &parms);
745 dev->mtu = mtu;
746
747 err = register_netdevice(dev);
748 if (err)
749 goto out;
750
751 dev_hold(dev);
752 vti_tunnel_link(ipn, nt);
753
754out:
755 return err;
756} 370}
757 371
758static int vti_changelink(struct net_device *dev, struct nlattr *tb[], 372static int vti_changelink(struct net_device *dev, struct nlattr *tb[],
759 struct nlattr *data[]) 373 struct nlattr *data[])
760{ 374{
761 struct ip_tunnel *t, *nt;
762 struct net *net = dev_net(dev);
763 struct vti_net *ipn = net_generic(net, vti_net_id);
764 struct ip_tunnel_parm p; 375 struct ip_tunnel_parm p;
765 int mtu;
766
767 if (dev == ipn->fb_tunnel_dev)
768 return -EINVAL;
769 376
770 nt = netdev_priv(dev);
771 vti_netlink_parms(data, &p); 377 vti_netlink_parms(data, &p);
772 378 return ip_tunnel_changelink(dev, tb, &p);
773 t = vti_tunnel_locate(net, &p, 0);
774
775 if (t) {
776 if (t->dev != dev)
777 return -EEXIST;
778 } else {
779 t = nt;
780
781 vti_tunnel_unlink(ipn, t);
782 t->parms.iph.saddr = p.iph.saddr;
783 t->parms.iph.daddr = p.iph.daddr;
784 t->parms.i_key = p.i_key;
785 t->parms.o_key = p.o_key;
786 if (dev->type != ARPHRD_ETHER) {
787 memcpy(dev->dev_addr, &p.iph.saddr, 4);
788 memcpy(dev->broadcast, &p.iph.daddr, 4);
789 }
790 vti_tunnel_link(ipn, t);
791 netdev_state_change(dev);
792 }
793
794 if (t->parms.link != p.link) {
795 t->parms.link = p.link;
796 mtu = vti_tunnel_bind_dev(dev);
797 if (!tb[IFLA_MTU])
798 dev->mtu = mtu;
799 netdev_state_change(dev);
800 }
801
802 return 0;
803} 379}
804 380
805static size_t vti_get_size(const struct net_device *dev) 381static size_t vti_get_size(const struct net_device *dev)
@@ -865,7 +441,7 @@ static int __init vti_init(void)
865 err = xfrm4_mode_tunnel_input_register(&vti_handler); 441 err = xfrm4_mode_tunnel_input_register(&vti_handler);
866 if (err < 0) { 442 if (err < 0) {
867 unregister_pernet_device(&vti_net_ops); 443 unregister_pernet_device(&vti_net_ops);
868 pr_info(KERN_INFO "vti init: can't register tunnel\n"); 444 pr_info("vti init: can't register tunnel\n");
869 } 445 }
870 446
871 err = rtnl_link_register(&vti_link_ops); 447 err = rtnl_link_register(&vti_link_ops);
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index b3ac3c3f6219..7f80fb4b82d3 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -285,7 +285,6 @@ static void ipip_tunnel_setup(struct net_device *dev)
285 dev->flags = IFF_NOARP; 285 dev->flags = IFF_NOARP;
286 dev->iflink = 0; 286 dev->iflink = 0;
287 dev->addr_len = 4; 287 dev->addr_len = 4;
288 dev->features |= NETIF_F_NETNS_LOCAL;
289 dev->features |= NETIF_F_LLTX; 288 dev->features |= NETIF_F_LLTX;
290 dev->priv_flags &= ~IFF_XMIT_DST_RELEASE; 289 dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
291 290
@@ -436,7 +435,7 @@ static int __net_init ipip_init_net(struct net *net)
436static void __net_exit ipip_exit_net(struct net *net) 435static void __net_exit ipip_exit_net(struct net *net)
437{ 436{
438 struct ip_tunnel_net *itn = net_generic(net, ipip_net_id); 437 struct ip_tunnel_net *itn = net_generic(net, ipip_net_id);
439 ip_tunnel_delete_net(itn); 438 ip_tunnel_delete_net(itn, &ipip_link_ops);
440} 439}
441 440
442static struct pernet_operations ipip_net_ops = { 441static struct pernet_operations ipip_net_ops = {
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 132a09664704..9ae54b09254f 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -127,9 +127,9 @@ static struct kmem_cache *mrt_cachep __read_mostly;
127static struct mr_table *ipmr_new_table(struct net *net, u32 id); 127static struct mr_table *ipmr_new_table(struct net *net, u32 id);
128static void ipmr_free_table(struct mr_table *mrt); 128static void ipmr_free_table(struct mr_table *mrt);
129 129
130static int ip_mr_forward(struct net *net, struct mr_table *mrt, 130static void ip_mr_forward(struct net *net, struct mr_table *mrt,
131 struct sk_buff *skb, struct mfc_cache *cache, 131 struct sk_buff *skb, struct mfc_cache *cache,
132 int local); 132 int local);
133static int ipmr_cache_report(struct mr_table *mrt, 133static int ipmr_cache_report(struct mr_table *mrt,
134 struct sk_buff *pkt, vifi_t vifi, int assert); 134 struct sk_buff *pkt, vifi_t vifi, int assert);
135static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, 135static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
@@ -1795,9 +1795,9 @@ static int ipmr_find_vif(struct mr_table *mrt, struct net_device *dev)
1795 1795
1796/* "local" means that we should preserve one skb (for local delivery) */ 1796/* "local" means that we should preserve one skb (for local delivery) */
1797 1797
1798static int ip_mr_forward(struct net *net, struct mr_table *mrt, 1798static void ip_mr_forward(struct net *net, struct mr_table *mrt,
1799 struct sk_buff *skb, struct mfc_cache *cache, 1799 struct sk_buff *skb, struct mfc_cache *cache,
1800 int local) 1800 int local)
1801{ 1801{
1802 int psend = -1; 1802 int psend = -1;
1803 int vif, ct; 1803 int vif, ct;
@@ -1903,14 +1903,13 @@ last_forward:
1903 ipmr_queue_xmit(net, mrt, skb2, cache, psend); 1903 ipmr_queue_xmit(net, mrt, skb2, cache, psend);
1904 } else { 1904 } else {
1905 ipmr_queue_xmit(net, mrt, skb, cache, psend); 1905 ipmr_queue_xmit(net, mrt, skb, cache, psend);
1906 return 0; 1906 return;
1907 } 1907 }
1908 } 1908 }
1909 1909
1910dont_forward: 1910dont_forward:
1911 if (!local) 1911 if (!local)
1912 kfree_skb(skb); 1912 kfree_skb(skb);
1913 return 0;
1914} 1913}
1915 1914
1916static struct mr_table *ipmr_rt_fib_lookup(struct net *net, struct sk_buff *skb) 1915static struct mr_table *ipmr_rt_fib_lookup(struct net *net, struct sk_buff *skb)
@@ -2068,9 +2067,8 @@ static int __pim_rcv(struct mr_table *mrt, struct sk_buff *skb,
2068 skb_reset_network_header(skb); 2067 skb_reset_network_header(skb);
2069 skb->protocol = htons(ETH_P_IP); 2068 skb->protocol = htons(ETH_P_IP);
2070 skb->ip_summed = CHECKSUM_NONE; 2069 skb->ip_summed = CHECKSUM_NONE;
2071 skb->pkt_type = PACKET_HOST;
2072 2070
2073 skb_tunnel_rx(skb, reg_dev); 2071 skb_tunnel_rx(skb, reg_dev, dev_net(reg_dev));
2074 2072
2075 netif_rx(skb); 2073 netif_rx(skb);
2076 2074
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index 4e9028017428..1657e39b291f 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -110,6 +110,19 @@ config IP_NF_TARGET_REJECT
110 110
111 To compile it as a module, choose M here. If unsure, say N. 111 To compile it as a module, choose M here. If unsure, say N.
112 112
113config IP_NF_TARGET_SYNPROXY
114 tristate "SYNPROXY target support"
115 depends on NF_CONNTRACK && NETFILTER_ADVANCED
116 select NETFILTER_SYNPROXY
117 select SYN_COOKIES
118 help
119 The SYNPROXY target allows you to intercept TCP connections and
120 establish them using syncookies before they are passed on to the
121 server. This allows to avoid conntrack and server resource usage
122 during SYN-flood attacks.
123
124 To compile it as a module, choose M here. If unsure, say N.
125
113config IP_NF_TARGET_ULOG 126config IP_NF_TARGET_ULOG
114 tristate "ULOG target support (obsolete)" 127 tristate "ULOG target support (obsolete)"
115 default m if NETFILTER_ADVANCED=n 128 default m if NETFILTER_ADVANCED=n
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index 007b128eecc9..3622b248b6dd 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -46,6 +46,7 @@ obj-$(CONFIG_IP_NF_TARGET_CLUSTERIP) += ipt_CLUSTERIP.o
46obj-$(CONFIG_IP_NF_TARGET_ECN) += ipt_ECN.o 46obj-$(CONFIG_IP_NF_TARGET_ECN) += ipt_ECN.o
47obj-$(CONFIG_IP_NF_TARGET_MASQUERADE) += ipt_MASQUERADE.o 47obj-$(CONFIG_IP_NF_TARGET_MASQUERADE) += ipt_MASQUERADE.o
48obj-$(CONFIG_IP_NF_TARGET_REJECT) += ipt_REJECT.o 48obj-$(CONFIG_IP_NF_TARGET_REJECT) += ipt_REJECT.o
49obj-$(CONFIG_IP_NF_TARGET_SYNPROXY) += ipt_SYNPROXY.o
49obj-$(CONFIG_IP_NF_TARGET_ULOG) += ipt_ULOG.o 50obj-$(CONFIG_IP_NF_TARGET_ULOG) += ipt_ULOG.o
50 51
51# generic ARP tables 52# generic ARP tables
diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c
index eadab1ed6500..a865f6f94013 100644
--- a/net/ipv4/netfilter/arptable_filter.c
+++ b/net/ipv4/netfilter/arptable_filter.c
@@ -48,7 +48,7 @@ static int __net_init arptable_filter_net_init(struct net *net)
48 net->ipv4.arptable_filter = 48 net->ipv4.arptable_filter =
49 arpt_register_table(net, &packet_filter, repl); 49 arpt_register_table(net, &packet_filter, repl);
50 kfree(repl); 50 kfree(repl);
51 return PTR_RET(net->ipv4.arptable_filter); 51 return PTR_ERR_OR_ZERO(net->ipv4.arptable_filter);
52} 52}
53 53
54static void __net_exit arptable_filter_net_exit(struct net *net) 54static void __net_exit arptable_filter_net_exit(struct net *net)
diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c
index 30e4de940567..00352ce0f0de 100644
--- a/net/ipv4/netfilter/ipt_MASQUERADE.c
+++ b/net/ipv4/netfilter/ipt_MASQUERADE.c
@@ -118,7 +118,7 @@ static int masq_device_event(struct notifier_block *this,
118 NF_CT_ASSERT(dev->ifindex != 0); 118 NF_CT_ASSERT(dev->ifindex != 0);
119 119
120 nf_ct_iterate_cleanup(net, device_cmp, 120 nf_ct_iterate_cleanup(net, device_cmp,
121 (void *)(long)dev->ifindex); 121 (void *)(long)dev->ifindex, 0, 0);
122 } 122 }
123 123
124 return NOTIFY_DONE; 124 return NOTIFY_DONE;
diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c
index 04b18c1ac345..b969131ad1c1 100644
--- a/net/ipv4/netfilter/ipt_REJECT.c
+++ b/net/ipv4/netfilter/ipt_REJECT.c
@@ -119,7 +119,26 @@ static void send_reset(struct sk_buff *oldskb, int hook)
119 119
120 nf_ct_attach(nskb, oldskb); 120 nf_ct_attach(nskb, oldskb);
121 121
122 ip_local_out(nskb); 122#ifdef CONFIG_BRIDGE_NETFILTER
123 /* If we use ip_local_out for bridged traffic, the MAC source on
124 * the RST will be ours, instead of the destination's. This confuses
125 * some routers/firewalls, and they drop the packet. So we need to
126 * build the eth header using the original destination's MAC as the
127 * source, and send the RST packet directly.
128 */
129 if (oldskb->nf_bridge) {
130 struct ethhdr *oeth = eth_hdr(oldskb);
131 nskb->dev = oldskb->nf_bridge->physindev;
132 niph->tot_len = htons(nskb->len);
133 ip_send_check(niph);
134 if (dev_hard_header(nskb, nskb->dev, ntohs(nskb->protocol),
135 oeth->h_source, oeth->h_dest, nskb->len) < 0)
136 goto free_nskb;
137 dev_queue_xmit(nskb);
138 } else
139#endif
140 ip_local_out(nskb);
141
123 return; 142 return;
124 143
125 free_nskb: 144 free_nskb:
diff --git a/net/ipv4/netfilter/ipt_SYNPROXY.c b/net/ipv4/netfilter/ipt_SYNPROXY.c
new file mode 100644
index 000000000000..67e17dcda65e
--- /dev/null
+++ b/net/ipv4/netfilter/ipt_SYNPROXY.c
@@ -0,0 +1,476 @@
1/*
2 * Copyright (c) 2013 Patrick McHardy <kaber@trash.net>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 */
8
9#include <linux/module.h>
10#include <linux/skbuff.h>
11#include <net/tcp.h>
12
13#include <linux/netfilter_ipv4/ip_tables.h>
14#include <linux/netfilter/x_tables.h>
15#include <linux/netfilter/xt_SYNPROXY.h>
16#include <net/netfilter/nf_conntrack.h>
17#include <net/netfilter/nf_conntrack_seqadj.h>
18#include <net/netfilter/nf_conntrack_synproxy.h>
19
20static struct iphdr *
21synproxy_build_ip(struct sk_buff *skb, u32 saddr, u32 daddr)
22{
23 struct iphdr *iph;
24
25 skb_reset_network_header(skb);
26 iph = (struct iphdr *)skb_put(skb, sizeof(*iph));
27 iph->version = 4;
28 iph->ihl = sizeof(*iph) / 4;
29 iph->tos = 0;
30 iph->id = 0;
31 iph->frag_off = htons(IP_DF);
32 iph->ttl = sysctl_ip_default_ttl;
33 iph->protocol = IPPROTO_TCP;
34 iph->check = 0;
35 iph->saddr = saddr;
36 iph->daddr = daddr;
37
38 return iph;
39}
40
41static void
42synproxy_send_tcp(const struct sk_buff *skb, struct sk_buff *nskb,
43 struct nf_conntrack *nfct, enum ip_conntrack_info ctinfo,
44 struct iphdr *niph, struct tcphdr *nth,
45 unsigned int tcp_hdr_size)
46{
47 nth->check = ~tcp_v4_check(tcp_hdr_size, niph->saddr, niph->daddr, 0);
48 nskb->ip_summed = CHECKSUM_PARTIAL;
49 nskb->csum_start = (unsigned char *)nth - nskb->head;
50 nskb->csum_offset = offsetof(struct tcphdr, check);
51
52 skb_dst_set_noref(nskb, skb_dst(skb));
53 nskb->protocol = htons(ETH_P_IP);
54 if (ip_route_me_harder(nskb, RTN_UNSPEC))
55 goto free_nskb;
56
57 if (nfct) {
58 nskb->nfct = nfct;
59 nskb->nfctinfo = ctinfo;
60 nf_conntrack_get(nfct);
61 }
62
63 ip_local_out(nskb);
64 return;
65
66free_nskb:
67 kfree_skb(nskb);
68}
69
70static void
71synproxy_send_client_synack(const struct sk_buff *skb, const struct tcphdr *th,
72 const struct synproxy_options *opts)
73{
74 struct sk_buff *nskb;
75 struct iphdr *iph, *niph;
76 struct tcphdr *nth;
77 unsigned int tcp_hdr_size;
78 u16 mss = opts->mss;
79
80 iph = ip_hdr(skb);
81
82 tcp_hdr_size = sizeof(*nth) + synproxy_options_size(opts);
83 nskb = alloc_skb(sizeof(*niph) + tcp_hdr_size + MAX_TCP_HEADER,
84 GFP_ATOMIC);
85 if (nskb == NULL)
86 return;
87 skb_reserve(nskb, MAX_TCP_HEADER);
88
89 niph = synproxy_build_ip(nskb, iph->daddr, iph->saddr);
90
91 skb_reset_transport_header(nskb);
92 nth = (struct tcphdr *)skb_put(nskb, tcp_hdr_size);
93 nth->source = th->dest;
94 nth->dest = th->source;
95 nth->seq = htonl(__cookie_v4_init_sequence(iph, th, &mss));
96 nth->ack_seq = htonl(ntohl(th->seq) + 1);
97 tcp_flag_word(nth) = TCP_FLAG_SYN | TCP_FLAG_ACK;
98 if (opts->options & XT_SYNPROXY_OPT_ECN)
99 tcp_flag_word(nth) |= TCP_FLAG_ECE;
100 nth->doff = tcp_hdr_size / 4;
101 nth->window = 0;
102 nth->check = 0;
103 nth->urg_ptr = 0;
104
105 synproxy_build_options(nth, opts);
106
107 synproxy_send_tcp(skb, nskb, skb->nfct, IP_CT_ESTABLISHED_REPLY,
108 niph, nth, tcp_hdr_size);
109}
110
111static void
112synproxy_send_server_syn(const struct synproxy_net *snet,
113 const struct sk_buff *skb, const struct tcphdr *th,
114 const struct synproxy_options *opts, u32 recv_seq)
115{
116 struct sk_buff *nskb;
117 struct iphdr *iph, *niph;
118 struct tcphdr *nth;
119 unsigned int tcp_hdr_size;
120
121 iph = ip_hdr(skb);
122
123 tcp_hdr_size = sizeof(*nth) + synproxy_options_size(opts);
124 nskb = alloc_skb(sizeof(*niph) + tcp_hdr_size + MAX_TCP_HEADER,
125 GFP_ATOMIC);
126 if (nskb == NULL)
127 return;
128 skb_reserve(nskb, MAX_TCP_HEADER);
129
130 niph = synproxy_build_ip(nskb, iph->saddr, iph->daddr);
131
132 skb_reset_transport_header(nskb);
133 nth = (struct tcphdr *)skb_put(nskb, tcp_hdr_size);
134 nth->source = th->source;
135 nth->dest = th->dest;
136 nth->seq = htonl(recv_seq - 1);
137 /* ack_seq is used to relay our ISN to the synproxy hook to initialize
138 * sequence number translation once a connection tracking entry exists.
139 */
140 nth->ack_seq = htonl(ntohl(th->ack_seq) - 1);
141 tcp_flag_word(nth) = TCP_FLAG_SYN;
142 if (opts->options & XT_SYNPROXY_OPT_ECN)
143 tcp_flag_word(nth) |= TCP_FLAG_ECE | TCP_FLAG_CWR;
144 nth->doff = tcp_hdr_size / 4;
145 nth->window = th->window;
146 nth->check = 0;
147 nth->urg_ptr = 0;
148
149 synproxy_build_options(nth, opts);
150
151 synproxy_send_tcp(skb, nskb, &snet->tmpl->ct_general, IP_CT_NEW,
152 niph, nth, tcp_hdr_size);
153}
154
155static void
156synproxy_send_server_ack(const struct synproxy_net *snet,
157 const struct ip_ct_tcp *state,
158 const struct sk_buff *skb, const struct tcphdr *th,
159 const struct synproxy_options *opts)
160{
161 struct sk_buff *nskb;
162 struct iphdr *iph, *niph;
163 struct tcphdr *nth;
164 unsigned int tcp_hdr_size;
165
166 iph = ip_hdr(skb);
167
168 tcp_hdr_size = sizeof(*nth) + synproxy_options_size(opts);
169 nskb = alloc_skb(sizeof(*niph) + tcp_hdr_size + MAX_TCP_HEADER,
170 GFP_ATOMIC);
171 if (nskb == NULL)
172 return;
173 skb_reserve(nskb, MAX_TCP_HEADER);
174
175 niph = synproxy_build_ip(nskb, iph->daddr, iph->saddr);
176
177 skb_reset_transport_header(nskb);
178 nth = (struct tcphdr *)skb_put(nskb, tcp_hdr_size);
179 nth->source = th->dest;
180 nth->dest = th->source;
181 nth->seq = htonl(ntohl(th->ack_seq));
182 nth->ack_seq = htonl(ntohl(th->seq) + 1);
183 tcp_flag_word(nth) = TCP_FLAG_ACK;
184 nth->doff = tcp_hdr_size / 4;
185 nth->window = htons(state->seen[IP_CT_DIR_ORIGINAL].td_maxwin);
186 nth->check = 0;
187 nth->urg_ptr = 0;
188
189 synproxy_build_options(nth, opts);
190
191 synproxy_send_tcp(skb, nskb, NULL, 0, niph, nth, tcp_hdr_size);
192}
193
194static void
195synproxy_send_client_ack(const struct synproxy_net *snet,
196 const struct sk_buff *skb, const struct tcphdr *th,
197 const struct synproxy_options *opts)
198{
199 struct sk_buff *nskb;
200 struct iphdr *iph, *niph;
201 struct tcphdr *nth;
202 unsigned int tcp_hdr_size;
203
204 iph = ip_hdr(skb);
205
206 tcp_hdr_size = sizeof(*nth) + synproxy_options_size(opts);
207 nskb = alloc_skb(sizeof(*niph) + tcp_hdr_size + MAX_TCP_HEADER,
208 GFP_ATOMIC);
209 if (nskb == NULL)
210 return;
211 skb_reserve(nskb, MAX_TCP_HEADER);
212
213 niph = synproxy_build_ip(nskb, iph->saddr, iph->daddr);
214
215 skb_reset_transport_header(nskb);
216 nth = (struct tcphdr *)skb_put(nskb, tcp_hdr_size);
217 nth->source = th->source;
218 nth->dest = th->dest;
219 nth->seq = htonl(ntohl(th->seq) + 1);
220 nth->ack_seq = th->ack_seq;
221 tcp_flag_word(nth) = TCP_FLAG_ACK;
222 nth->doff = tcp_hdr_size / 4;
223 nth->window = ntohs(htons(th->window) >> opts->wscale);
224 nth->check = 0;
225 nth->urg_ptr = 0;
226
227 synproxy_build_options(nth, opts);
228
229 synproxy_send_tcp(skb, nskb, NULL, 0, niph, nth, tcp_hdr_size);
230}
231
232static bool
233synproxy_recv_client_ack(const struct synproxy_net *snet,
234 const struct sk_buff *skb, const struct tcphdr *th,
235 struct synproxy_options *opts, u32 recv_seq)
236{
237 int mss;
238
239 mss = __cookie_v4_check(ip_hdr(skb), th, ntohl(th->ack_seq) - 1);
240 if (mss == 0) {
241 this_cpu_inc(snet->stats->cookie_invalid);
242 return false;
243 }
244
245 this_cpu_inc(snet->stats->cookie_valid);
246 opts->mss = mss;
247
248 if (opts->options & XT_SYNPROXY_OPT_TIMESTAMP)
249 synproxy_check_timestamp_cookie(opts);
250
251 synproxy_send_server_syn(snet, skb, th, opts, recv_seq);
252 return true;
253}
254
255static unsigned int
256synproxy_tg4(struct sk_buff *skb, const struct xt_action_param *par)
257{
258 const struct xt_synproxy_info *info = par->targinfo;
259 struct synproxy_net *snet = synproxy_pernet(dev_net(par->in));
260 struct synproxy_options opts = {};
261 struct tcphdr *th, _th;
262
263 if (nf_ip_checksum(skb, par->hooknum, par->thoff, IPPROTO_TCP))
264 return NF_DROP;
265
266 th = skb_header_pointer(skb, par->thoff, sizeof(_th), &_th);
267 if (th == NULL)
268 return NF_DROP;
269
270 synproxy_parse_options(skb, par->thoff, th, &opts);
271
272 if (th->syn && !(th->ack || th->fin || th->rst)) {
273 /* Initial SYN from client */
274 this_cpu_inc(snet->stats->syn_received);
275
276 if (th->ece && th->cwr)
277 opts.options |= XT_SYNPROXY_OPT_ECN;
278
279 opts.options &= info->options;
280 if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP)
281 synproxy_init_timestamp_cookie(info, &opts);
282 else
283 opts.options &= ~(XT_SYNPROXY_OPT_WSCALE |
284 XT_SYNPROXY_OPT_SACK_PERM |
285 XT_SYNPROXY_OPT_ECN);
286
287 synproxy_send_client_synack(skb, th, &opts);
288 return NF_DROP;
289
290 } else if (th->ack && !(th->fin || th->rst || th->syn)) {
291 /* ACK from client */
292 synproxy_recv_client_ack(snet, skb, th, &opts, ntohl(th->seq));
293 return NF_DROP;
294 }
295
296 return XT_CONTINUE;
297}
298
299static unsigned int ipv4_synproxy_hook(unsigned int hooknum,
300 struct sk_buff *skb,
301 const struct net_device *in,
302 const struct net_device *out,
303 int (*okfn)(struct sk_buff *))
304{
305 struct synproxy_net *snet = synproxy_pernet(dev_net(in ? : out));
306 enum ip_conntrack_info ctinfo;
307 struct nf_conn *ct;
308 struct nf_conn_synproxy *synproxy;
309 struct synproxy_options opts = {};
310 const struct ip_ct_tcp *state;
311 struct tcphdr *th, _th;
312 unsigned int thoff;
313
314 ct = nf_ct_get(skb, &ctinfo);
315 if (ct == NULL)
316 return NF_ACCEPT;
317
318 synproxy = nfct_synproxy(ct);
319 if (synproxy == NULL)
320 return NF_ACCEPT;
321
322 if (nf_is_loopback_packet(skb))
323 return NF_ACCEPT;
324
325 thoff = ip_hdrlen(skb);
326 th = skb_header_pointer(skb, thoff, sizeof(_th), &_th);
327 if (th == NULL)
328 return NF_DROP;
329
330 state = &ct->proto.tcp;
331 switch (state->state) {
332 case TCP_CONNTRACK_CLOSE:
333 if (th->rst && !test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
334 nf_ct_seqadj_init(ct, ctinfo, synproxy->isn -
335 ntohl(th->seq) + 1);
336 break;
337 }
338
339 if (!th->syn || th->ack ||
340 CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL)
341 break;
342
343 /* Reopened connection - reset the sequence number and timestamp
344 * adjustments, they will get initialized once the connection is
345 * reestablished.
346 */
347 nf_ct_seqadj_init(ct, ctinfo, 0);
348 synproxy->tsoff = 0;
349 this_cpu_inc(snet->stats->conn_reopened);
350
351 /* fall through */
352 case TCP_CONNTRACK_SYN_SENT:
353 synproxy_parse_options(skb, thoff, th, &opts);
354
355 if (!th->syn && th->ack &&
356 CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) {
357 /* Keep-Alives are sent with SEG.SEQ = SND.NXT-1,
358 * therefore we need to add 1 to make the SYN sequence
359 * number match the one of first SYN.
360 */
361 if (synproxy_recv_client_ack(snet, skb, th, &opts,
362 ntohl(th->seq) + 1))
363 this_cpu_inc(snet->stats->cookie_retrans);
364
365 return NF_DROP;
366 }
367
368 synproxy->isn = ntohl(th->ack_seq);
369 if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP)
370 synproxy->its = opts.tsecr;
371 break;
372 case TCP_CONNTRACK_SYN_RECV:
373 if (!th->syn || !th->ack)
374 break;
375
376 synproxy_parse_options(skb, thoff, th, &opts);
377 if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP)
378 synproxy->tsoff = opts.tsval - synproxy->its;
379
380 opts.options &= ~(XT_SYNPROXY_OPT_MSS |
381 XT_SYNPROXY_OPT_WSCALE |
382 XT_SYNPROXY_OPT_SACK_PERM);
383
384 swap(opts.tsval, opts.tsecr);
385 synproxy_send_server_ack(snet, state, skb, th, &opts);
386
387 nf_ct_seqadj_init(ct, ctinfo, synproxy->isn - ntohl(th->seq));
388
389 swap(opts.tsval, opts.tsecr);
390 synproxy_send_client_ack(snet, skb, th, &opts);
391
392 consume_skb(skb);
393 return NF_STOLEN;
394 default:
395 break;
396 }
397
398 synproxy_tstamp_adjust(skb, thoff, th, ct, ctinfo, synproxy);
399 return NF_ACCEPT;
400}
401
402static int synproxy_tg4_check(const struct xt_tgchk_param *par)
403{
404 const struct ipt_entry *e = par->entryinfo;
405
406 if (e->ip.proto != IPPROTO_TCP ||
407 e->ip.invflags & XT_INV_PROTO)
408 return -EINVAL;
409
410 return nf_ct_l3proto_try_module_get(par->family);
411}
412
413static void synproxy_tg4_destroy(const struct xt_tgdtor_param *par)
414{
415 nf_ct_l3proto_module_put(par->family);
416}
417
418static struct xt_target synproxy_tg4_reg __read_mostly = {
419 .name = "SYNPROXY",
420 .family = NFPROTO_IPV4,
421 .target = synproxy_tg4,
422 .targetsize = sizeof(struct xt_synproxy_info),
423 .checkentry = synproxy_tg4_check,
424 .destroy = synproxy_tg4_destroy,
425 .me = THIS_MODULE,
426};
427
428static struct nf_hook_ops ipv4_synproxy_ops[] __read_mostly = {
429 {
430 .hook = ipv4_synproxy_hook,
431 .owner = THIS_MODULE,
432 .pf = NFPROTO_IPV4,
433 .hooknum = NF_INET_LOCAL_IN,
434 .priority = NF_IP_PRI_CONNTRACK_CONFIRM - 1,
435 },
436 {
437 .hook = ipv4_synproxy_hook,
438 .owner = THIS_MODULE,
439 .pf = NFPROTO_IPV4,
440 .hooknum = NF_INET_POST_ROUTING,
441 .priority = NF_IP_PRI_CONNTRACK_CONFIRM - 1,
442 },
443};
444
445static int __init synproxy_tg4_init(void)
446{
447 int err;
448
449 err = nf_register_hooks(ipv4_synproxy_ops,
450 ARRAY_SIZE(ipv4_synproxy_ops));
451 if (err < 0)
452 goto err1;
453
454 err = xt_register_target(&synproxy_tg4_reg);
455 if (err < 0)
456 goto err2;
457
458 return 0;
459
460err2:
461 nf_unregister_hooks(ipv4_synproxy_ops, ARRAY_SIZE(ipv4_synproxy_ops));
462err1:
463 return err;
464}
465
466static void __exit synproxy_tg4_exit(void)
467{
468 xt_unregister_target(&synproxy_tg4_reg);
469 nf_unregister_hooks(ipv4_synproxy_ops, ARRAY_SIZE(ipv4_synproxy_ops));
470}
471
472module_init(synproxy_tg4_init);
473module_exit(synproxy_tg4_exit);
474
475MODULE_LICENSE("GPL");
476MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c
index 6b3da5cf54e9..50af5b45c050 100644
--- a/net/ipv4/netfilter/iptable_filter.c
+++ b/net/ipv4/netfilter/iptable_filter.c
@@ -69,7 +69,7 @@ static int __net_init iptable_filter_net_init(struct net *net)
69 net->ipv4.iptable_filter = 69 net->ipv4.iptable_filter =
70 ipt_register_table(net, &packet_filter, repl); 70 ipt_register_table(net, &packet_filter, repl);
71 kfree(repl); 71 kfree(repl);
72 return PTR_RET(net->ipv4.iptable_filter); 72 return PTR_ERR_OR_ZERO(net->ipv4.iptable_filter);
73} 73}
74 74
75static void __net_exit iptable_filter_net_exit(struct net *net) 75static void __net_exit iptable_filter_net_exit(struct net *net)
diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c
index cba5658ec82c..0d8cd82e0fad 100644
--- a/net/ipv4/netfilter/iptable_mangle.c
+++ b/net/ipv4/netfilter/iptable_mangle.c
@@ -107,7 +107,7 @@ static int __net_init iptable_mangle_net_init(struct net *net)
107 net->ipv4.iptable_mangle = 107 net->ipv4.iptable_mangle =
108 ipt_register_table(net, &packet_mangler, repl); 108 ipt_register_table(net, &packet_mangler, repl);
109 kfree(repl); 109 kfree(repl);
110 return PTR_RET(net->ipv4.iptable_mangle); 110 return PTR_ERR_OR_ZERO(net->ipv4.iptable_mangle);
111} 111}
112 112
113static void __net_exit iptable_mangle_net_exit(struct net *net) 113static void __net_exit iptable_mangle_net_exit(struct net *net)
diff --git a/net/ipv4/netfilter/iptable_nat.c b/net/ipv4/netfilter/iptable_nat.c
index 6383273d54e1..683bfaffed65 100644
--- a/net/ipv4/netfilter/iptable_nat.c
+++ b/net/ipv4/netfilter/iptable_nat.c
@@ -292,7 +292,7 @@ static int __net_init iptable_nat_net_init(struct net *net)
292 return -ENOMEM; 292 return -ENOMEM;
293 net->ipv4.nat_table = ipt_register_table(net, &nf_nat_ipv4_table, repl); 293 net->ipv4.nat_table = ipt_register_table(net, &nf_nat_ipv4_table, repl);
294 kfree(repl); 294 kfree(repl);
295 return PTR_RET(net->ipv4.nat_table); 295 return PTR_ERR_OR_ZERO(net->ipv4.nat_table);
296} 296}
297 297
298static void __net_exit iptable_nat_net_exit(struct net *net) 298static void __net_exit iptable_nat_net_exit(struct net *net)
diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c
index 03d9696d3c6e..1f82aea11df6 100644
--- a/net/ipv4/netfilter/iptable_raw.c
+++ b/net/ipv4/netfilter/iptable_raw.c
@@ -48,7 +48,7 @@ static int __net_init iptable_raw_net_init(struct net *net)
48 net->ipv4.iptable_raw = 48 net->ipv4.iptable_raw =
49 ipt_register_table(net, &packet_raw, repl); 49 ipt_register_table(net, &packet_raw, repl);
50 kfree(repl); 50 kfree(repl);
51 return PTR_RET(net->ipv4.iptable_raw); 51 return PTR_ERR_OR_ZERO(net->ipv4.iptable_raw);
52} 52}
53 53
54static void __net_exit iptable_raw_net_exit(struct net *net) 54static void __net_exit iptable_raw_net_exit(struct net *net)
diff --git a/net/ipv4/netfilter/iptable_security.c b/net/ipv4/netfilter/iptable_security.c
index b283d8e2601a..f867a8d38bf7 100644
--- a/net/ipv4/netfilter/iptable_security.c
+++ b/net/ipv4/netfilter/iptable_security.c
@@ -66,7 +66,7 @@ static int __net_init iptable_security_net_init(struct net *net)
66 net->ipv4.iptable_security = 66 net->ipv4.iptable_security =
67 ipt_register_table(net, &security_table, repl); 67 ipt_register_table(net, &security_table, repl);
68 kfree(repl); 68 kfree(repl);
69 return PTR_RET(net->ipv4.iptable_security); 69 return PTR_ERR_OR_ZERO(net->ipv4.iptable_security);
70} 70}
71 71
72static void __net_exit iptable_security_net_exit(struct net *net) 72static void __net_exit iptable_security_net_exit(struct net *net)
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index 0a2e0e3e95ba..86f5b34a4ed1 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -25,6 +25,7 @@
25#include <net/netfilter/nf_conntrack_l3proto.h> 25#include <net/netfilter/nf_conntrack_l3proto.h>
26#include <net/netfilter/nf_conntrack_zones.h> 26#include <net/netfilter/nf_conntrack_zones.h>
27#include <net/netfilter/nf_conntrack_core.h> 27#include <net/netfilter/nf_conntrack_core.h>
28#include <net/netfilter/nf_conntrack_seqadj.h>
28#include <net/netfilter/ipv4/nf_conntrack_ipv4.h> 29#include <net/netfilter/ipv4/nf_conntrack_ipv4.h>
29#include <net/netfilter/nf_nat_helper.h> 30#include <net/netfilter/nf_nat_helper.h>
30#include <net/netfilter/ipv4/nf_defrag_ipv4.h> 31#include <net/netfilter/ipv4/nf_defrag_ipv4.h>
@@ -136,11 +137,7 @@ static unsigned int ipv4_confirm(unsigned int hooknum,
136 /* adjust seqs for loopback traffic only in outgoing direction */ 137 /* adjust seqs for loopback traffic only in outgoing direction */
137 if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status) && 138 if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status) &&
138 !nf_is_loopback_packet(skb)) { 139 !nf_is_loopback_packet(skb)) {
139 typeof(nf_nat_seq_adjust_hook) seq_adjust; 140 if (!nf_ct_seq_adjust(skb, ct, ctinfo, ip_hdrlen(skb))) {
140
141 seq_adjust = rcu_dereference(nf_nat_seq_adjust_hook);
142 if (!seq_adjust ||
143 !seq_adjust(skb, ct, ctinfo, ip_hdrlen(skb))) {
144 NF_CT_STAT_INC_ATOMIC(nf_ct_net(ct), drop); 141 NF_CT_STAT_INC_ATOMIC(nf_ct_net(ct), drop);
145 return NF_DROP; 142 return NF_DROP;
146 } 143 }
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index 746427c9e719..d7d9882d4cae 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -1082,7 +1082,7 @@ static void ping_v4_format_sock(struct sock *sp, struct seq_file *f,
1082 __u16 srcp = ntohs(inet->inet_sport); 1082 __u16 srcp = ntohs(inet->inet_sport);
1083 1083
1084 seq_printf(f, "%5d: %08X:%04X %08X:%04X" 1084 seq_printf(f, "%5d: %08X:%04X %08X:%04X"
1085 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %pK %d%n", 1085 " %02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %d%n",
1086 bucket, src, srcp, dest, destp, sp->sk_state, 1086 bucket, src, srcp, dest, destp, sp->sk_state,
1087 sk_wmem_alloc_get(sp), 1087 sk_wmem_alloc_get(sp),
1088 sk_rmem_alloc_get(sp), 1088 sk_rmem_alloc_get(sp),
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 463bd1273346..4a0335854b89 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -111,7 +111,7 @@ static const struct snmp_mib snmp4_ipstats_list[] = {
111 SNMP_MIB_SENTINEL 111 SNMP_MIB_SENTINEL
112}; 112};
113 113
114/* Following RFC4293 items are displayed in /proc/net/netstat */ 114/* Following items are displayed in /proc/net/netstat */
115static const struct snmp_mib snmp4_ipextstats_list[] = { 115static const struct snmp_mib snmp4_ipextstats_list[] = {
116 SNMP_MIB_ITEM("InNoRoutes", IPSTATS_MIB_INNOROUTES), 116 SNMP_MIB_ITEM("InNoRoutes", IPSTATS_MIB_INNOROUTES),
117 SNMP_MIB_ITEM("InTruncatedPkts", IPSTATS_MIB_INTRUNCATEDPKTS), 117 SNMP_MIB_ITEM("InTruncatedPkts", IPSTATS_MIB_INTRUNCATEDPKTS),
@@ -125,7 +125,12 @@ static const struct snmp_mib snmp4_ipextstats_list[] = {
125 SNMP_MIB_ITEM("OutMcastOctets", IPSTATS_MIB_OUTMCASTOCTETS), 125 SNMP_MIB_ITEM("OutMcastOctets", IPSTATS_MIB_OUTMCASTOCTETS),
126 SNMP_MIB_ITEM("InBcastOctets", IPSTATS_MIB_INBCASTOCTETS), 126 SNMP_MIB_ITEM("InBcastOctets", IPSTATS_MIB_INBCASTOCTETS),
127 SNMP_MIB_ITEM("OutBcastOctets", IPSTATS_MIB_OUTBCASTOCTETS), 127 SNMP_MIB_ITEM("OutBcastOctets", IPSTATS_MIB_OUTBCASTOCTETS),
128 /* Non RFC4293 fields */
128 SNMP_MIB_ITEM("InCsumErrors", IPSTATS_MIB_CSUMERRORS), 129 SNMP_MIB_ITEM("InCsumErrors", IPSTATS_MIB_CSUMERRORS),
130 SNMP_MIB_ITEM("InNoECTPkts", IPSTATS_MIB_NOECTPKTS),
131 SNMP_MIB_ITEM("InECT1Pkts", IPSTATS_MIB_ECT1PKTS),
132 SNMP_MIB_ITEM("InECT0Pkts", IPSTATS_MIB_ECT0PKTS),
133 SNMP_MIB_ITEM("InCEPkts", IPSTATS_MIB_CEPKTS),
129 SNMP_MIB_SENTINEL 134 SNMP_MIB_SENTINEL
130}; 135};
131 136
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 61e60d67adca..a86c7ae71881 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -988,7 +988,7 @@ static void raw_sock_seq_show(struct seq_file *seq, struct sock *sp, int i)
988 srcp = inet->inet_num; 988 srcp = inet->inet_num;
989 989
990 seq_printf(seq, "%4d: %08X:%04X %08X:%04X" 990 seq_printf(seq, "%4d: %08X:%04X %08X:%04X"
991 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %pK %d\n", 991 " %02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %d\n",
992 i, src, srcp, dest, destp, sp->sk_state, 992 i, src, srcp, dest, destp, sp->sk_state,
993 sk_wmem_alloc_get(sp), 993 sk_wmem_alloc_get(sp),
994 sk_rmem_alloc_get(sp), 994 sk_rmem_alloc_get(sp),
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index a9a54a236832..727f4365bcdf 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -112,7 +112,8 @@
112#define RT_FL_TOS(oldflp4) \ 112#define RT_FL_TOS(oldflp4) \
113 ((oldflp4)->flowi4_tos & (IPTOS_RT_MASK | RTO_ONLINK)) 113 ((oldflp4)->flowi4_tos & (IPTOS_RT_MASK | RTO_ONLINK))
114 114
115#define IP_MAX_MTU 0xFFF0 115/* IPv4 datagram length is stored into 16bit field (tot_len) */
116#define IP_MAX_MTU 0xFFFF
116 117
117#define RT_GC_TIMEOUT (300*HZ) 118#define RT_GC_TIMEOUT (300*HZ)
118 119
@@ -435,12 +436,12 @@ static inline int ip_rt_proc_init(void)
435 436
436static inline bool rt_is_expired(const struct rtable *rth) 437static inline bool rt_is_expired(const struct rtable *rth)
437{ 438{
438 return rth->rt_genid != rt_genid(dev_net(rth->dst.dev)); 439 return rth->rt_genid != rt_genid_ipv4(dev_net(rth->dst.dev));
439} 440}
440 441
441void rt_cache_flush(struct net *net) 442void rt_cache_flush(struct net *net)
442{ 443{
443 rt_genid_bump(net); 444 rt_genid_bump_ipv4(net);
444} 445}
445 446
446static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst, 447static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst,
@@ -1227,10 +1228,7 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst)
1227 mtu = 576; 1228 mtu = 576;
1228 } 1229 }
1229 1230
1230 if (mtu > IP_MAX_MTU) 1231 return min_t(unsigned int, mtu, IP_MAX_MTU);
1231 mtu = IP_MAX_MTU;
1232
1233 return mtu;
1234} 1232}
1235 1233
1236static struct fib_nh_exception *find_exception(struct fib_nh *nh, __be32 daddr) 1234static struct fib_nh_exception *find_exception(struct fib_nh *nh, __be32 daddr)
@@ -1458,7 +1456,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1458#endif 1456#endif
1459 rth->dst.output = ip_rt_bug; 1457 rth->dst.output = ip_rt_bug;
1460 1458
1461 rth->rt_genid = rt_genid(dev_net(dev)); 1459 rth->rt_genid = rt_genid_ipv4(dev_net(dev));
1462 rth->rt_flags = RTCF_MULTICAST; 1460 rth->rt_flags = RTCF_MULTICAST;
1463 rth->rt_type = RTN_MULTICAST; 1461 rth->rt_type = RTN_MULTICAST;
1464 rth->rt_is_input= 1; 1462 rth->rt_is_input= 1;
@@ -1589,7 +1587,7 @@ static int __mkroute_input(struct sk_buff *skb,
1589 goto cleanup; 1587 goto cleanup;
1590 } 1588 }
1591 1589
1592 rth->rt_genid = rt_genid(dev_net(rth->dst.dev)); 1590 rth->rt_genid = rt_genid_ipv4(dev_net(rth->dst.dev));
1593 rth->rt_flags = flags; 1591 rth->rt_flags = flags;
1594 rth->rt_type = res->type; 1592 rth->rt_type = res->type;
1595 rth->rt_is_input = 1; 1593 rth->rt_is_input = 1;
@@ -1760,7 +1758,7 @@ local_input:
1760 rth->dst.tclassid = itag; 1758 rth->dst.tclassid = itag;
1761#endif 1759#endif
1762 1760
1763 rth->rt_genid = rt_genid(net); 1761 rth->rt_genid = rt_genid_ipv4(net);
1764 rth->rt_flags = flags|RTCF_LOCAL; 1762 rth->rt_flags = flags|RTCF_LOCAL;
1765 rth->rt_type = res.type; 1763 rth->rt_type = res.type;
1766 rth->rt_is_input = 1; 1764 rth->rt_is_input = 1;
@@ -1945,7 +1943,7 @@ add:
1945 1943
1946 rth->dst.output = ip_output; 1944 rth->dst.output = ip_output;
1947 1945
1948 rth->rt_genid = rt_genid(dev_net(dev_out)); 1946 rth->rt_genid = rt_genid_ipv4(dev_net(dev_out));
1949 rth->rt_flags = flags; 1947 rth->rt_flags = flags;
1950 rth->rt_type = type; 1948 rth->rt_type = type;
1951 rth->rt_is_input = 0; 1949 rth->rt_is_input = 0;
@@ -2227,7 +2225,7 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or
2227 rt->rt_iif = ort->rt_iif; 2225 rt->rt_iif = ort->rt_iif;
2228 rt->rt_pmtu = ort->rt_pmtu; 2226 rt->rt_pmtu = ort->rt_pmtu;
2229 2227
2230 rt->rt_genid = rt_genid(net); 2228 rt->rt_genid = rt_genid_ipv4(net);
2231 rt->rt_flags = ort->rt_flags; 2229 rt->rt_flags = ort->rt_flags;
2232 rt->rt_type = ort->rt_type; 2230 rt->rt_type = ort->rt_type;
2233 rt->rt_gateway = ort->rt_gateway; 2231 rt->rt_gateway = ort->rt_gateway;
@@ -2665,7 +2663,7 @@ static __net_initdata struct pernet_operations sysctl_route_ops = {
2665 2663
2666static __net_init int rt_genid_init(struct net *net) 2664static __net_init int rt_genid_init(struct net *net)
2667{ 2665{
2668 atomic_set(&net->rt_genid, 0); 2666 atomic_set(&net->ipv4.rt_genid, 0);
2669 atomic_set(&net->fnhe_genid, 0); 2667 atomic_set(&net->fnhe_genid, 0);
2670 get_random_bytes(&net->ipv4.dev_addr_genid, 2668 get_random_bytes(&net->ipv4.dev_addr_genid,
2671 sizeof(net->ipv4.dev_addr_genid)); 2669 sizeof(net->ipv4.dev_addr_genid));
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index b05c96e7af8b..14a15c49129d 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -160,26 +160,33 @@ static __u16 const msstab[] = {
160 * Generate a syncookie. mssp points to the mss, which is returned 160 * Generate a syncookie. mssp points to the mss, which is returned
161 * rounded down to the value encoded in the cookie. 161 * rounded down to the value encoded in the cookie.
162 */ 162 */
163__u32 cookie_v4_init_sequence(struct sock *sk, struct sk_buff *skb, __u16 *mssp) 163u32 __cookie_v4_init_sequence(const struct iphdr *iph, const struct tcphdr *th,
164 u16 *mssp)
164{ 165{
165 const struct iphdr *iph = ip_hdr(skb);
166 const struct tcphdr *th = tcp_hdr(skb);
167 int mssind; 166 int mssind;
168 const __u16 mss = *mssp; 167 const __u16 mss = *mssp;
169 168
170 tcp_synq_overflow(sk);
171
172 for (mssind = ARRAY_SIZE(msstab) - 1; mssind ; mssind--) 169 for (mssind = ARRAY_SIZE(msstab) - 1; mssind ; mssind--)
173 if (mss >= msstab[mssind]) 170 if (mss >= msstab[mssind])
174 break; 171 break;
175 *mssp = msstab[mssind]; 172 *mssp = msstab[mssind];
176 173
177 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESSENT);
178
179 return secure_tcp_syn_cookie(iph->saddr, iph->daddr, 174 return secure_tcp_syn_cookie(iph->saddr, iph->daddr,
180 th->source, th->dest, ntohl(th->seq), 175 th->source, th->dest, ntohl(th->seq),
181 jiffies / (HZ * 60), mssind); 176 jiffies / (HZ * 60), mssind);
182} 177}
178EXPORT_SYMBOL_GPL(__cookie_v4_init_sequence);
179
180__u32 cookie_v4_init_sequence(struct sock *sk, struct sk_buff *skb, __u16 *mssp)
181{
182 const struct iphdr *iph = ip_hdr(skb);
183 const struct tcphdr *th = tcp_hdr(skb);
184
185 tcp_synq_overflow(sk);
186 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESSENT);
187
188 return __cookie_v4_init_sequence(iph, th, mssp);
189}
183 190
184/* 191/*
185 * This (misnamed) value is the age of syncookie which is permitted. 192 * This (misnamed) value is the age of syncookie which is permitted.
@@ -192,10 +199,9 @@ __u32 cookie_v4_init_sequence(struct sock *sk, struct sk_buff *skb, __u16 *mssp)
192 * Check if a ack sequence number is a valid syncookie. 199 * Check if a ack sequence number is a valid syncookie.
193 * Return the decoded mss if it is, or 0 if not. 200 * Return the decoded mss if it is, or 0 if not.
194 */ 201 */
195static inline int cookie_check(struct sk_buff *skb, __u32 cookie) 202int __cookie_v4_check(const struct iphdr *iph, const struct tcphdr *th,
203 u32 cookie)
196{ 204{
197 const struct iphdr *iph = ip_hdr(skb);
198 const struct tcphdr *th = tcp_hdr(skb);
199 __u32 seq = ntohl(th->seq) - 1; 205 __u32 seq = ntohl(th->seq) - 1;
200 __u32 mssind = check_tcp_syn_cookie(cookie, iph->saddr, iph->daddr, 206 __u32 mssind = check_tcp_syn_cookie(cookie, iph->saddr, iph->daddr,
201 th->source, th->dest, seq, 207 th->source, th->dest, seq,
@@ -204,6 +210,7 @@ static inline int cookie_check(struct sk_buff *skb, __u32 cookie)
204 210
205 return mssind < ARRAY_SIZE(msstab) ? msstab[mssind] : 0; 211 return mssind < ARRAY_SIZE(msstab) ? msstab[mssind] : 0;
206} 212}
213EXPORT_SYMBOL_GPL(__cookie_v4_check);
207 214
208static inline struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb, 215static inline struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb,
209 struct request_sock *req, 216 struct request_sock *req,
@@ -284,7 +291,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
284 goto out; 291 goto out;
285 292
286 if (tcp_synq_no_recent_overflow(sk) || 293 if (tcp_synq_no_recent_overflow(sk) ||
287 (mss = cookie_check(skb, cookie)) == 0) { 294 (mss = __cookie_v4_check(ip_hdr(skb), th, cookie)) == 0) {
288 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESFAILED); 295 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESFAILED);
289 goto out; 296 goto out;
290 } 297 }
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 610e324348d1..540279f4c531 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -29,6 +29,7 @@
29static int zero; 29static int zero;
30static int one = 1; 30static int one = 1;
31static int four = 4; 31static int four = 4;
32static int gso_max_segs = GSO_MAX_SEGS;
32static int tcp_retr1_max = 255; 33static int tcp_retr1_max = 255;
33static int ip_local_port_range_min[] = { 1, 1 }; 34static int ip_local_port_range_min[] = { 1, 1 };
34static int ip_local_port_range_max[] = { 65535, 65535 }; 35static int ip_local_port_range_max[] = { 65535, 65535 };
@@ -559,6 +560,13 @@ static struct ctl_table ipv4_table[] = {
559 .extra1 = &one, 560 .extra1 = &one,
560 }, 561 },
561 { 562 {
563 .procname = "tcp_notsent_lowat",
564 .data = &sysctl_tcp_notsent_lowat,
565 .maxlen = sizeof(sysctl_tcp_notsent_lowat),
566 .mode = 0644,
567 .proc_handler = proc_dointvec,
568 },
569 {
562 .procname = "tcp_rmem", 570 .procname = "tcp_rmem",
563 .data = &sysctl_tcp_rmem, 571 .data = &sysctl_tcp_rmem,
564 .maxlen = sizeof(sysctl_tcp_rmem), 572 .maxlen = sizeof(sysctl_tcp_rmem),
@@ -754,6 +762,15 @@ static struct ctl_table ipv4_table[] = {
754 .extra2 = &four, 762 .extra2 = &four,
755 }, 763 },
756 { 764 {
765 .procname = "tcp_min_tso_segs",
766 .data = &sysctl_tcp_min_tso_segs,
767 .maxlen = sizeof(int),
768 .mode = 0644,
769 .proc_handler = proc_dointvec_minmax,
770 .extra1 = &zero,
771 .extra2 = &gso_max_segs,
772 },
773 {
757 .procname = "udp_mem", 774 .procname = "udp_mem",
758 .data = &sysctl_udp_mem, 775 .data = &sysctl_udp_mem,
759 .maxlen = sizeof(sysctl_udp_mem), 776 .maxlen = sizeof(sysctl_udp_mem),
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index b2f6c74861af..6e5617b9f9db 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -283,6 +283,8 @@
283 283
284int sysctl_tcp_fin_timeout __read_mostly = TCP_FIN_TIMEOUT; 284int sysctl_tcp_fin_timeout __read_mostly = TCP_FIN_TIMEOUT;
285 285
286int sysctl_tcp_min_tso_segs __read_mostly = 2;
287
286struct percpu_counter tcp_orphan_count; 288struct percpu_counter tcp_orphan_count;
287EXPORT_SYMBOL_GPL(tcp_orphan_count); 289EXPORT_SYMBOL_GPL(tcp_orphan_count);
288 290
@@ -410,10 +412,6 @@ void tcp_init_sock(struct sock *sk)
410 412
411 icsk->icsk_sync_mss = tcp_sync_mss; 413 icsk->icsk_sync_mss = tcp_sync_mss;
412 414
413 /* Presumed zeroed, in order of appearance:
414 * cookie_in_always, cookie_out_never,
415 * s_data_constant, s_data_in, s_data_out
416 */
417 sk->sk_sndbuf = sysctl_tcp_wmem[1]; 415 sk->sk_sndbuf = sysctl_tcp_wmem[1];
418 sk->sk_rcvbuf = sysctl_tcp_rmem[1]; 416 sk->sk_rcvbuf = sysctl_tcp_rmem[1];
419 417
@@ -499,7 +497,7 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
499 mask |= POLLIN | POLLRDNORM; 497 mask |= POLLIN | POLLRDNORM;
500 498
501 if (!(sk->sk_shutdown & SEND_SHUTDOWN)) { 499 if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
502 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) { 500 if (sk_stream_is_writeable(sk)) {
503 mask |= POLLOUT | POLLWRNORM; 501 mask |= POLLOUT | POLLWRNORM;
504 } else { /* send SIGIO later */ 502 } else { /* send SIGIO later */
505 set_bit(SOCK_ASYNC_NOSPACE, 503 set_bit(SOCK_ASYNC_NOSPACE,
@@ -510,7 +508,7 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
510 * wspace test but before the flags are set, 508 * wspace test but before the flags are set,
511 * IO signal will be lost. 509 * IO signal will be lost.
512 */ 510 */
513 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) 511 if (sk_stream_is_writeable(sk))
514 mask |= POLLOUT | POLLWRNORM; 512 mask |= POLLOUT | POLLWRNORM;
515 } 513 }
516 } else 514 } else
@@ -789,12 +787,28 @@ static unsigned int tcp_xmit_size_goal(struct sock *sk, u32 mss_now,
789 xmit_size_goal = mss_now; 787 xmit_size_goal = mss_now;
790 788
791 if (large_allowed && sk_can_gso(sk)) { 789 if (large_allowed && sk_can_gso(sk)) {
792 xmit_size_goal = ((sk->sk_gso_max_size - 1) - 790 u32 gso_size, hlen;
793 inet_csk(sk)->icsk_af_ops->net_header_len - 791
794 inet_csk(sk)->icsk_ext_hdr_len - 792 /* Maybe we should/could use sk->sk_prot->max_header here ? */
795 tp->tcp_header_len); 793 hlen = inet_csk(sk)->icsk_af_ops->net_header_len +
794 inet_csk(sk)->icsk_ext_hdr_len +
795 tp->tcp_header_len;
796
797 /* Goal is to send at least one packet per ms,
798 * not one big TSO packet every 100 ms.
799 * This preserves ACK clocking and is consistent
800 * with tcp_tso_should_defer() heuristic.
801 */
802 gso_size = sk->sk_pacing_rate / (2 * MSEC_PER_SEC);
803 gso_size = max_t(u32, gso_size,
804 sysctl_tcp_min_tso_segs * mss_now);
805
806 xmit_size_goal = min_t(u32, gso_size,
807 sk->sk_gso_max_size - 1 - hlen);
796 808
797 /* TSQ : try to have two TSO segments in flight */ 809 /* TSQ : try to have at least two segments in flight
810 * (one in NIC TX ring, another in Qdisc)
811 */
798 xmit_size_goal = min_t(u32, xmit_size_goal, 812 xmit_size_goal = min_t(u32, xmit_size_goal,
799 sysctl_tcp_limit_output_bytes >> 1); 813 sysctl_tcp_limit_output_bytes >> 1);
800 814
@@ -2454,10 +2468,11 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
2454 case TCP_THIN_DUPACK: 2468 case TCP_THIN_DUPACK:
2455 if (val < 0 || val > 1) 2469 if (val < 0 || val > 1)
2456 err = -EINVAL; 2470 err = -EINVAL;
2457 else 2471 else {
2458 tp->thin_dupack = val; 2472 tp->thin_dupack = val;
2459 if (tp->thin_dupack) 2473 if (tp->thin_dupack)
2460 tcp_disable_early_retrans(tp); 2474 tcp_disable_early_retrans(tp);
2475 }
2461 break; 2476 break;
2462 2477
2463 case TCP_REPAIR: 2478 case TCP_REPAIR:
@@ -2638,6 +2653,10 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
2638 else 2653 else
2639 tp->tsoffset = val - tcp_time_stamp; 2654 tp->tsoffset = val - tcp_time_stamp;
2640 break; 2655 break;
2656 case TCP_NOTSENT_LOWAT:
2657 tp->notsent_lowat = val;
2658 sk->sk_write_space(sk);
2659 break;
2641 default: 2660 default:
2642 err = -ENOPROTOOPT; 2661 err = -ENOPROTOOPT;
2643 break; 2662 break;
@@ -2854,6 +2873,9 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
2854 case TCP_TIMESTAMP: 2873 case TCP_TIMESTAMP:
2855 val = tcp_time_stamp + tp->tsoffset; 2874 val = tcp_time_stamp + tp->tsoffset;
2856 break; 2875 break;
2876 case TCP_NOTSENT_LOWAT:
2877 val = tp->notsent_lowat;
2878 break;
2857 default: 2879 default:
2858 return -ENOPROTOOPT; 2880 return -ENOPROTOOPT;
2859 } 2881 }
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
index 8f7ef0ad80e5..ab7bd35bb312 100644
--- a/net/ipv4/tcp_fastopen.c
+++ b/net/ipv4/tcp_fastopen.c
@@ -58,23 +58,22 @@ error: kfree(ctx);
58 return err; 58 return err;
59} 59}
60 60
61/* Computes the fastopen cookie for the peer. 61/* Computes the fastopen cookie for the IP path.
62 * The peer address is a 128 bits long (pad with zeros for IPv4). 62 * The path is a 128 bits long (pad with zeros for IPv4).
63 * 63 *
64 * The caller must check foc->len to determine if a valid cookie 64 * The caller must check foc->len to determine if a valid cookie
65 * has been generated successfully. 65 * has been generated successfully.
66*/ 66*/
67void tcp_fastopen_cookie_gen(__be32 addr, struct tcp_fastopen_cookie *foc) 67void tcp_fastopen_cookie_gen(__be32 src, __be32 dst,
68 struct tcp_fastopen_cookie *foc)
68{ 69{
69 __be32 peer_addr[4] = { addr, 0, 0, 0 }; 70 __be32 path[4] = { src, dst, 0, 0 };
70 struct tcp_fastopen_context *ctx; 71 struct tcp_fastopen_context *ctx;
71 72
72 rcu_read_lock(); 73 rcu_read_lock();
73 ctx = rcu_dereference(tcp_fastopen_ctx); 74 ctx = rcu_dereference(tcp_fastopen_ctx);
74 if (ctx) { 75 if (ctx) {
75 crypto_cipher_encrypt_one(ctx->tfm, 76 crypto_cipher_encrypt_one(ctx->tfm, foc->val, (__u8 *)path);
76 foc->val,
77 (__u8 *)peer_addr);
78 foc->len = TCP_FASTOPEN_COOKIE_SIZE; 77 foc->len = TCP_FASTOPEN_COOKIE_SIZE;
79 } 78 }
80 rcu_read_unlock(); 79 rcu_read_unlock();
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 3ca2139a130b..25a89eaa669d 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -688,6 +688,34 @@ static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt)
688 } 688 }
689} 689}
690 690
691/* Set the sk_pacing_rate to allow proper sizing of TSO packets.
692 * Note: TCP stack does not yet implement pacing.
693 * FQ packet scheduler can be used to implement cheap but effective
694 * TCP pacing, to smooth the burst on large writes when packets
695 * in flight is significantly lower than cwnd (or rwin)
696 */
697static void tcp_update_pacing_rate(struct sock *sk)
698{
699 const struct tcp_sock *tp = tcp_sk(sk);
700 u64 rate;
701
702 /* set sk_pacing_rate to 200 % of current rate (mss * cwnd / srtt) */
703 rate = (u64)tp->mss_cache * 2 * (HZ << 3);
704
705 rate *= max(tp->snd_cwnd, tp->packets_out);
706
707 /* Correction for small srtt : minimum srtt being 8 (1 jiffy << 3),
708 * be conservative and assume srtt = 1 (125 us instead of 1.25 ms)
709 * We probably need usec resolution in the future.
710 * Note: This also takes care of possible srtt=0 case,
711 * when tcp_rtt_estimator() was not yet called.
712 */
713 if (tp->srtt > 8 + 2)
714 do_div(rate, tp->srtt);
715
716 sk->sk_pacing_rate = min_t(u64, rate, ~0U);
717}
718
691/* Calculate rto without backoff. This is the second half of Van Jacobson's 719/* Calculate rto without backoff. This is the second half of Van Jacobson's
692 * routine referred to above. 720 * routine referred to above.
693 */ 721 */
@@ -1048,6 +1076,7 @@ struct tcp_sacktag_state {
1048 int reord; 1076 int reord;
1049 int fack_count; 1077 int fack_count;
1050 int flag; 1078 int flag;
1079 s32 rtt; /* RTT measured by SACKing never-retransmitted data */
1051}; 1080};
1052 1081
1053/* Check if skb is fully within the SACK block. In presence of GSO skbs, 1082/* Check if skb is fully within the SACK block. In presence of GSO skbs,
@@ -1108,7 +1137,7 @@ static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb,
1108static u8 tcp_sacktag_one(struct sock *sk, 1137static u8 tcp_sacktag_one(struct sock *sk,
1109 struct tcp_sacktag_state *state, u8 sacked, 1138 struct tcp_sacktag_state *state, u8 sacked,
1110 u32 start_seq, u32 end_seq, 1139 u32 start_seq, u32 end_seq,
1111 bool dup_sack, int pcount) 1140 int dup_sack, int pcount, u32 xmit_time)
1112{ 1141{
1113 struct tcp_sock *tp = tcp_sk(sk); 1142 struct tcp_sock *tp = tcp_sk(sk);
1114 int fack_count = state->fack_count; 1143 int fack_count = state->fack_count;
@@ -1148,6 +1177,9 @@ static u8 tcp_sacktag_one(struct sock *sk,
1148 state->reord); 1177 state->reord);
1149 if (!after(end_seq, tp->high_seq)) 1178 if (!after(end_seq, tp->high_seq))
1150 state->flag |= FLAG_ORIG_SACK_ACKED; 1179 state->flag |= FLAG_ORIG_SACK_ACKED;
1180 /* Pick the earliest sequence sacked for RTT */
1181 if (state->rtt < 0)
1182 state->rtt = tcp_time_stamp - xmit_time;
1151 } 1183 }
1152 1184
1153 if (sacked & TCPCB_LOST) { 1185 if (sacked & TCPCB_LOST) {
@@ -1205,7 +1237,8 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,
1205 * tcp_highest_sack_seq() when skb is highest_sack. 1237 * tcp_highest_sack_seq() when skb is highest_sack.
1206 */ 1238 */
1207 tcp_sacktag_one(sk, state, TCP_SKB_CB(skb)->sacked, 1239 tcp_sacktag_one(sk, state, TCP_SKB_CB(skb)->sacked,
1208 start_seq, end_seq, dup_sack, pcount); 1240 start_seq, end_seq, dup_sack, pcount,
1241 TCP_SKB_CB(skb)->when);
1209 1242
1210 if (skb == tp->lost_skb_hint) 1243 if (skb == tp->lost_skb_hint)
1211 tp->lost_cnt_hint += pcount; 1244 tp->lost_cnt_hint += pcount;
@@ -1479,7 +1512,8 @@ static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk,
1479 TCP_SKB_CB(skb)->seq, 1512 TCP_SKB_CB(skb)->seq,
1480 TCP_SKB_CB(skb)->end_seq, 1513 TCP_SKB_CB(skb)->end_seq,
1481 dup_sack, 1514 dup_sack,
1482 tcp_skb_pcount(skb)); 1515 tcp_skb_pcount(skb),
1516 TCP_SKB_CB(skb)->when);
1483 1517
1484 if (!before(TCP_SKB_CB(skb)->seq, 1518 if (!before(TCP_SKB_CB(skb)->seq,
1485 tcp_highest_sack_seq(tp))) 1519 tcp_highest_sack_seq(tp)))
@@ -1536,7 +1570,7 @@ static int tcp_sack_cache_ok(const struct tcp_sock *tp, const struct tcp_sack_bl
1536 1570
1537static int 1571static int
1538tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb, 1572tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
1539 u32 prior_snd_una) 1573 u32 prior_snd_una, s32 *sack_rtt)
1540{ 1574{
1541 struct tcp_sock *tp = tcp_sk(sk); 1575 struct tcp_sock *tp = tcp_sk(sk);
1542 const unsigned char *ptr = (skb_transport_header(ack_skb) + 1576 const unsigned char *ptr = (skb_transport_header(ack_skb) +
@@ -1554,6 +1588,7 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
1554 1588
1555 state.flag = 0; 1589 state.flag = 0;
1556 state.reord = tp->packets_out; 1590 state.reord = tp->packets_out;
1591 state.rtt = -1;
1557 1592
1558 if (!tp->sacked_out) { 1593 if (!tp->sacked_out) {
1559 if (WARN_ON(tp->fackets_out)) 1594 if (WARN_ON(tp->fackets_out))
@@ -1737,6 +1772,7 @@ out:
1737 WARN_ON((int)tp->retrans_out < 0); 1772 WARN_ON((int)tp->retrans_out < 0);
1738 WARN_ON((int)tcp_packets_in_flight(tp) < 0); 1773 WARN_ON((int)tcp_packets_in_flight(tp) < 0);
1739#endif 1774#endif
1775 *sack_rtt = state.rtt;
1740 return state.flag; 1776 return state.flag;
1741} 1777}
1742 1778
@@ -1869,8 +1905,13 @@ void tcp_enter_loss(struct sock *sk, int how)
1869 } 1905 }
1870 tcp_verify_left_out(tp); 1906 tcp_verify_left_out(tp);
1871 1907
1872 tp->reordering = min_t(unsigned int, tp->reordering, 1908 /* Timeout in disordered state after receiving substantial DUPACKs
1873 sysctl_tcp_reordering); 1909 * suggests that the degree of reordering is over-estimated.
1910 */
1911 if (icsk->icsk_ca_state <= TCP_CA_Disorder &&
1912 tp->sacked_out >= sysctl_tcp_reordering)
1913 tp->reordering = min_t(unsigned int, tp->reordering,
1914 sysctl_tcp_reordering);
1874 tcp_set_ca_state(sk, TCP_CA_Loss); 1915 tcp_set_ca_state(sk, TCP_CA_Loss);
1875 tp->high_seq = tp->snd_nxt; 1916 tp->high_seq = tp->snd_nxt;
1876 TCP_ECN_queue_cwr(tp); 1917 TCP_ECN_queue_cwr(tp);
@@ -2472,8 +2513,6 @@ static void tcp_try_to_open(struct sock *sk, int flag, const int prior_unsacked)
2472 2513
2473 if (inet_csk(sk)->icsk_ca_state != TCP_CA_CWR) { 2514 if (inet_csk(sk)->icsk_ca_state != TCP_CA_CWR) {
2474 tcp_try_keep_open(sk); 2515 tcp_try_keep_open(sk);
2475 if (inet_csk(sk)->icsk_ca_state != TCP_CA_Open)
2476 tcp_moderate_cwnd(tp);
2477 } else { 2516 } else {
2478 tcp_cwnd_reduction(sk, prior_unsacked, 0); 2517 tcp_cwnd_reduction(sk, prior_unsacked, 0);
2479 } 2518 }
@@ -2792,65 +2831,51 @@ static void tcp_fastretrans_alert(struct sock *sk, const int acked,
2792 tcp_xmit_retransmit_queue(sk); 2831 tcp_xmit_retransmit_queue(sk);
2793} 2832}
2794 2833
2795void tcp_valid_rtt_meas(struct sock *sk, u32 seq_rtt) 2834static inline bool tcp_ack_update_rtt(struct sock *sk, const int flag,
2835 s32 seq_rtt, s32 sack_rtt)
2796{ 2836{
2797 tcp_rtt_estimator(sk, seq_rtt); 2837 const struct tcp_sock *tp = tcp_sk(sk);
2798 tcp_set_rto(sk); 2838
2799 inet_csk(sk)->icsk_backoff = 0; 2839 /* Prefer RTT measured from ACK's timing to TS-ECR. This is because
2800} 2840 * broken middle-boxes or peers may corrupt TS-ECR fields. But
2801EXPORT_SYMBOL(tcp_valid_rtt_meas); 2841 * Karn's algorithm forbids taking RTT if some retransmitted data
2842 * is acked (RFC6298).
2843 */
2844 if (flag & FLAG_RETRANS_DATA_ACKED)
2845 seq_rtt = -1;
2846
2847 if (seq_rtt < 0)
2848 seq_rtt = sack_rtt;
2802 2849
2803/* Read draft-ietf-tcplw-high-performance before mucking
2804 * with this code. (Supersedes RFC1323)
2805 */
2806static void tcp_ack_saw_tstamp(struct sock *sk, int flag)
2807{
2808 /* RTTM Rule: A TSecr value received in a segment is used to 2850 /* RTTM Rule: A TSecr value received in a segment is used to
2809 * update the averaged RTT measurement only if the segment 2851 * update the averaged RTT measurement only if the segment
2810 * acknowledges some new data, i.e., only if it advances the 2852 * acknowledges some new data, i.e., only if it advances the
2811 * left edge of the send window. 2853 * left edge of the send window.
2812 *
2813 * See draft-ietf-tcplw-high-performance-00, section 3.3. 2854 * See draft-ietf-tcplw-high-performance-00, section 3.3.
2814 * 1998/04/10 Andrey V. Savochkin <saw@msu.ru>
2815 *
2816 * Changed: reset backoff as soon as we see the first valid sample.
2817 * If we do not, we get strongly overestimated rto. With timestamps
2818 * samples are accepted even from very old segments: f.e., when rtt=1
2819 * increases to 8, we retransmit 5 times and after 8 seconds delayed
2820 * answer arrives rto becomes 120 seconds! If at least one of segments
2821 * in window is lost... Voila. --ANK (010210)
2822 */ 2855 */
2823 struct tcp_sock *tp = tcp_sk(sk); 2856 if (seq_rtt < 0 && tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr)
2824 2857 seq_rtt = tcp_time_stamp - tp->rx_opt.rcv_tsecr;
2825 tcp_valid_rtt_meas(sk, tcp_time_stamp - tp->rx_opt.rcv_tsecr);
2826}
2827 2858
2828static void tcp_ack_no_tstamp(struct sock *sk, u32 seq_rtt, int flag) 2859 if (seq_rtt < 0)
2829{ 2860 return false;
2830 /* We don't have a timestamp. Can only use
2831 * packets that are not retransmitted to determine
2832 * rtt estimates. Also, we must not reset the
2833 * backoff for rto until we get a non-retransmitted
2834 * packet. This allows us to deal with a situation
2835 * where the network delay has increased suddenly.
2836 * I.e. Karn's algorithm. (SIGCOMM '87, p5.)
2837 */
2838 2861
2839 if (flag & FLAG_RETRANS_DATA_ACKED) 2862 tcp_rtt_estimator(sk, seq_rtt);
2840 return; 2863 tcp_set_rto(sk);
2841 2864
2842 tcp_valid_rtt_meas(sk, seq_rtt); 2865 /* RFC6298: only reset backoff on valid RTT measurement. */
2866 inet_csk(sk)->icsk_backoff = 0;
2867 return true;
2843} 2868}
2844 2869
2845static inline void tcp_ack_update_rtt(struct sock *sk, const int flag, 2870/* Compute time elapsed between (last) SYNACK and the ACK completing 3WHS. */
2846 const s32 seq_rtt) 2871static void tcp_synack_rtt_meas(struct sock *sk, struct request_sock *req)
2847{ 2872{
2848 const struct tcp_sock *tp = tcp_sk(sk); 2873 struct tcp_sock *tp = tcp_sk(sk);
2849 /* Note that peer MAY send zero echo. In this case it is ignored. (rfc1323) */ 2874 s32 seq_rtt = -1;
2850 if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr) 2875
2851 tcp_ack_saw_tstamp(sk, flag); 2876 if (tp->lsndtime && !tp->total_retrans)
2852 else if (seq_rtt >= 0) 2877 seq_rtt = tcp_time_stamp - tp->lsndtime;
2853 tcp_ack_no_tstamp(sk, seq_rtt, flag); 2878 tcp_ack_update_rtt(sk, FLAG_SYN_ACKED, seq_rtt, -1);
2854} 2879}
2855 2880
2856static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) 2881static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
@@ -2939,7 +2964,7 @@ static u32 tcp_tso_acked(struct sock *sk, struct sk_buff *skb)
2939 * arrived at the other end. 2964 * arrived at the other end.
2940 */ 2965 */
2941static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, 2966static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
2942 u32 prior_snd_una) 2967 u32 prior_snd_una, s32 sack_rtt)
2943{ 2968{
2944 struct tcp_sock *tp = tcp_sk(sk); 2969 struct tcp_sock *tp = tcp_sk(sk);
2945 const struct inet_connection_sock *icsk = inet_csk(sk); 2970 const struct inet_connection_sock *icsk = inet_csk(sk);
@@ -2978,8 +3003,6 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
2978 if (sacked & TCPCB_SACKED_RETRANS) 3003 if (sacked & TCPCB_SACKED_RETRANS)
2979 tp->retrans_out -= acked_pcount; 3004 tp->retrans_out -= acked_pcount;
2980 flag |= FLAG_RETRANS_DATA_ACKED; 3005 flag |= FLAG_RETRANS_DATA_ACKED;
2981 ca_seq_rtt = -1;
2982 seq_rtt = -1;
2983 } else { 3006 } else {
2984 ca_seq_rtt = now - scb->when; 3007 ca_seq_rtt = now - scb->when;
2985 last_ackt = skb->tstamp; 3008 last_ackt = skb->tstamp;
@@ -3031,6 +3054,10 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
3031 if (skb && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) 3054 if (skb && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))
3032 flag |= FLAG_SACK_RENEGING; 3055 flag |= FLAG_SACK_RENEGING;
3033 3056
3057 if (tcp_ack_update_rtt(sk, flag, seq_rtt, sack_rtt) ||
3058 (flag & FLAG_ACKED))
3059 tcp_rearm_rto(sk);
3060
3034 if (flag & FLAG_ACKED) { 3061 if (flag & FLAG_ACKED) {
3035 const struct tcp_congestion_ops *ca_ops 3062 const struct tcp_congestion_ops *ca_ops
3036 = inet_csk(sk)->icsk_ca_ops; 3063 = inet_csk(sk)->icsk_ca_ops;
@@ -3040,9 +3067,6 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
3040 tcp_mtup_probe_success(sk); 3067 tcp_mtup_probe_success(sk);
3041 } 3068 }
3042 3069
3043 tcp_ack_update_rtt(sk, flag, seq_rtt);
3044 tcp_rearm_rto(sk);
3045
3046 if (tcp_is_reno(tp)) { 3070 if (tcp_is_reno(tp)) {
3047 tcp_remove_reno_sacks(sk, pkts_acked); 3071 tcp_remove_reno_sacks(sk, pkts_acked);
3048 } else { 3072 } else {
@@ -3130,11 +3154,22 @@ static inline bool tcp_ack_is_dubious(const struct sock *sk, const int flag)
3130 inet_csk(sk)->icsk_ca_state != TCP_CA_Open; 3154 inet_csk(sk)->icsk_ca_state != TCP_CA_Open;
3131} 3155}
3132 3156
3157/* Decide wheather to run the increase function of congestion control. */
3133static inline bool tcp_may_raise_cwnd(const struct sock *sk, const int flag) 3158static inline bool tcp_may_raise_cwnd(const struct sock *sk, const int flag)
3134{ 3159{
3135 const struct tcp_sock *tp = tcp_sk(sk); 3160 if (tcp_in_cwnd_reduction(sk))
3136 return (!(flag & FLAG_ECE) || tp->snd_cwnd < tp->snd_ssthresh) && 3161 return false;
3137 !tcp_in_cwnd_reduction(sk); 3162
3163 /* If reordering is high then always grow cwnd whenever data is
3164 * delivered regardless of its ordering. Otherwise stay conservative
3165 * and only grow cwnd on in-order delivery (RFC5681). A stretched ACK w/
3166 * new SACK or ECE mark may first advance cwnd here and later reduce
3167 * cwnd in tcp_fastretrans_alert() based on more states.
3168 */
3169 if (tcp_sk(sk)->reordering > sysctl_tcp_reordering)
3170 return flag & FLAG_FORWARD_PROGRESS;
3171
3172 return flag & FLAG_DATA_ACKED;
3138} 3173}
3139 3174
3140/* Check that window update is acceptable. 3175/* Check that window update is acceptable.
@@ -3269,11 +3304,12 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
3269 u32 ack_seq = TCP_SKB_CB(skb)->seq; 3304 u32 ack_seq = TCP_SKB_CB(skb)->seq;
3270 u32 ack = TCP_SKB_CB(skb)->ack_seq; 3305 u32 ack = TCP_SKB_CB(skb)->ack_seq;
3271 bool is_dupack = false; 3306 bool is_dupack = false;
3272 u32 prior_in_flight; 3307 u32 prior_in_flight, prior_cwnd = tp->snd_cwnd, prior_rtt = tp->srtt;
3273 u32 prior_fackets; 3308 u32 prior_fackets;
3274 int prior_packets = tp->packets_out; 3309 int prior_packets = tp->packets_out;
3275 const int prior_unsacked = tp->packets_out - tp->sacked_out; 3310 const int prior_unsacked = tp->packets_out - tp->sacked_out;
3276 int acked = 0; /* Number of packets newly acked */ 3311 int acked = 0; /* Number of packets newly acked */
3312 s32 sack_rtt = -1;
3277 3313
3278 /* If the ack is older than previous acks 3314 /* If the ack is older than previous acks
3279 * then we can probably ignore it. 3315 * then we can probably ignore it.
@@ -3330,7 +3366,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
3330 flag |= tcp_ack_update_window(sk, skb, ack, ack_seq); 3366 flag |= tcp_ack_update_window(sk, skb, ack, ack_seq);
3331 3367
3332 if (TCP_SKB_CB(skb)->sacked) 3368 if (TCP_SKB_CB(skb)->sacked)
3333 flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una); 3369 flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una,
3370 &sack_rtt);
3334 3371
3335 if (TCP_ECN_rcv_ecn_echo(tp, tcp_hdr(skb))) 3372 if (TCP_ECN_rcv_ecn_echo(tp, tcp_hdr(skb)))
3336 flag |= FLAG_ECE; 3373 flag |= FLAG_ECE;
@@ -3349,21 +3386,18 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
3349 3386
3350 /* See if we can take anything off of the retransmit queue. */ 3387 /* See if we can take anything off of the retransmit queue. */
3351 acked = tp->packets_out; 3388 acked = tp->packets_out;
3352 flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una); 3389 flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una, sack_rtt);
3353 acked -= tp->packets_out; 3390 acked -= tp->packets_out;
3354 3391
3392 /* Advance cwnd if state allows */
3393 if (tcp_may_raise_cwnd(sk, flag))
3394 tcp_cong_avoid(sk, ack, prior_in_flight);
3395
3355 if (tcp_ack_is_dubious(sk, flag)) { 3396 if (tcp_ack_is_dubious(sk, flag)) {
3356 /* Advance CWND, if state allows this. */
3357 if ((flag & FLAG_DATA_ACKED) && tcp_may_raise_cwnd(sk, flag))
3358 tcp_cong_avoid(sk, ack, prior_in_flight);
3359 is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP)); 3397 is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP));
3360 tcp_fastretrans_alert(sk, acked, prior_unsacked, 3398 tcp_fastretrans_alert(sk, acked, prior_unsacked,
3361 is_dupack, flag); 3399 is_dupack, flag);
3362 } else {
3363 if (flag & FLAG_DATA_ACKED)
3364 tcp_cong_avoid(sk, ack, prior_in_flight);
3365 } 3400 }
3366
3367 if (tp->tlp_high_seq) 3401 if (tp->tlp_high_seq)
3368 tcp_process_tlp_ack(sk, ack, flag); 3402 tcp_process_tlp_ack(sk, ack, flag);
3369 3403
@@ -3375,6 +3409,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
3375 3409
3376 if (icsk->icsk_pending == ICSK_TIME_RETRANS) 3410 if (icsk->icsk_pending == ICSK_TIME_RETRANS)
3377 tcp_schedule_loss_probe(sk); 3411 tcp_schedule_loss_probe(sk);
3412 if (tp->srtt != prior_rtt || tp->snd_cwnd != prior_cwnd)
3413 tcp_update_pacing_rate(sk);
3378 return 1; 3414 return 1;
3379 3415
3380no_queue: 3416no_queue:
@@ -3402,7 +3438,8 @@ old_ack:
3402 * If data was DSACKed, see if we can undo a cwnd reduction. 3438 * If data was DSACKed, see if we can undo a cwnd reduction.
3403 */ 3439 */
3404 if (TCP_SKB_CB(skb)->sacked) { 3440 if (TCP_SKB_CB(skb)->sacked) {
3405 flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una); 3441 flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una,
3442 &sack_rtt);
3406 tcp_fastretrans_alert(sk, acked, prior_unsacked, 3443 tcp_fastretrans_alert(sk, acked, prior_unsacked,
3407 is_dupack, flag); 3444 is_dupack, flag);
3408 } 3445 }
@@ -4102,6 +4139,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
4102 if (!tcp_try_coalesce(sk, skb1, skb, &fragstolen)) { 4139 if (!tcp_try_coalesce(sk, skb1, skb, &fragstolen)) {
4103 __skb_queue_after(&tp->out_of_order_queue, skb1, skb); 4140 __skb_queue_after(&tp->out_of_order_queue, skb1, skb);
4104 } else { 4141 } else {
4142 tcp_grow_window(sk, skb);
4105 kfree_skb_partial(skb, fragstolen); 4143 kfree_skb_partial(skb, fragstolen);
4106 skb = NULL; 4144 skb = NULL;
4107 } 4145 }
@@ -4177,8 +4215,10 @@ add_sack:
4177 if (tcp_is_sack(tp)) 4215 if (tcp_is_sack(tp))
4178 tcp_sack_new_ofo_skb(sk, seq, end_seq); 4216 tcp_sack_new_ofo_skb(sk, seq, end_seq);
4179end: 4217end:
4180 if (skb) 4218 if (skb) {
4219 tcp_grow_window(sk, skb);
4181 skb_set_owner_r(skb, sk); 4220 skb_set_owner_r(skb, sk);
4221 }
4182} 4222}
4183 4223
4184static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, int hdrlen, 4224static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, int hdrlen,
@@ -5013,8 +5053,8 @@ discard:
5013 * the rest is checked inline. Fast processing is turned on in 5053 * the rest is checked inline. Fast processing is turned on in
5014 * tcp_data_queue when everything is OK. 5054 * tcp_data_queue when everything is OK.
5015 */ 5055 */
5016int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, 5056void tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
5017 const struct tcphdr *th, unsigned int len) 5057 const struct tcphdr *th, unsigned int len)
5018{ 5058{
5019 struct tcp_sock *tp = tcp_sk(sk); 5059 struct tcp_sock *tp = tcp_sk(sk);
5020 5060
@@ -5091,7 +5131,7 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
5091 tcp_ack(sk, skb, 0); 5131 tcp_ack(sk, skb, 0);
5092 __kfree_skb(skb); 5132 __kfree_skb(skb);
5093 tcp_data_snd_check(sk); 5133 tcp_data_snd_check(sk);
5094 return 0; 5134 return;
5095 } else { /* Header too small */ 5135 } else { /* Header too small */
5096 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS); 5136 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
5097 goto discard; 5137 goto discard;
@@ -5184,7 +5224,7 @@ no_ack:
5184 if (eaten) 5224 if (eaten)
5185 kfree_skb_partial(skb, fragstolen); 5225 kfree_skb_partial(skb, fragstolen);
5186 sk->sk_data_ready(sk, 0); 5226 sk->sk_data_ready(sk, 0);
5187 return 0; 5227 return;
5188 } 5228 }
5189 } 5229 }
5190 5230
@@ -5200,7 +5240,7 @@ slow_path:
5200 */ 5240 */
5201 5241
5202 if (!tcp_validate_incoming(sk, skb, th, 1)) 5242 if (!tcp_validate_incoming(sk, skb, th, 1))
5203 return 0; 5243 return;
5204 5244
5205step5: 5245step5:
5206 if (tcp_ack(sk, skb, FLAG_SLOWPATH | FLAG_UPDATE_TS_RECENT) < 0) 5246 if (tcp_ack(sk, skb, FLAG_SLOWPATH | FLAG_UPDATE_TS_RECENT) < 0)
@@ -5216,7 +5256,7 @@ step5:
5216 5256
5217 tcp_data_snd_check(sk); 5257 tcp_data_snd_check(sk);
5218 tcp_ack_snd_check(sk); 5258 tcp_ack_snd_check(sk);
5219 return 0; 5259 return;
5220 5260
5221csum_error: 5261csum_error:
5222 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_CSUMERRORS); 5262 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_CSUMERRORS);
@@ -5224,7 +5264,6 @@ csum_error:
5224 5264
5225discard: 5265discard:
5226 __kfree_skb(skb); 5266 __kfree_skb(skb);
5227 return 0;
5228} 5267}
5229EXPORT_SYMBOL(tcp_rcv_established); 5268EXPORT_SYMBOL(tcp_rcv_established);
5230 5269
@@ -5627,9 +5666,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
5627 * so release it. 5666 * so release it.
5628 */ 5667 */
5629 if (req) { 5668 if (req) {
5630 tcp_synack_rtt_meas(sk, req);
5631 tp->total_retrans = req->num_retrans; 5669 tp->total_retrans = req->num_retrans;
5632
5633 reqsk_fastopen_remove(sk, req, false); 5670 reqsk_fastopen_remove(sk, req, false);
5634 } else { 5671 } else {
5635 /* Make sure socket is routed, for correct metrics. */ 5672 /* Make sure socket is routed, for correct metrics. */
@@ -5654,6 +5691,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
5654 tp->snd_una = TCP_SKB_CB(skb)->ack_seq; 5691 tp->snd_una = TCP_SKB_CB(skb)->ack_seq;
5655 tp->snd_wnd = ntohs(th->window) << tp->rx_opt.snd_wscale; 5692 tp->snd_wnd = ntohs(th->window) << tp->rx_opt.snd_wscale;
5656 tcp_init_wl(tp, TCP_SKB_CB(skb)->seq); 5693 tcp_init_wl(tp, TCP_SKB_CB(skb)->seq);
5694 tcp_synack_rtt_meas(sk, req);
5657 5695
5658 if (tp->rx_opt.tstamp_ok) 5696 if (tp->rx_opt.tstamp_ok)
5659 tp->advmss -= TCPOLEN_TSTAMP_ALIGNED; 5697 tp->advmss -= TCPOLEN_TSTAMP_ALIGNED;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index b299da5ff499..b14266bb91eb 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -821,8 +821,7 @@ static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
821 */ 821 */
822static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst, 822static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
823 struct request_sock *req, 823 struct request_sock *req,
824 u16 queue_mapping, 824 u16 queue_mapping)
825 bool nocache)
826{ 825{
827 const struct inet_request_sock *ireq = inet_rsk(req); 826 const struct inet_request_sock *ireq = inet_rsk(req);
828 struct flowi4 fl4; 827 struct flowi4 fl4;
@@ -852,7 +851,7 @@ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
852 851
853static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req) 852static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req)
854{ 853{
855 int res = tcp_v4_send_synack(sk, NULL, req, 0, false); 854 int res = tcp_v4_send_synack(sk, NULL, req, 0);
856 855
857 if (!res) 856 if (!res)
858 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS); 857 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
@@ -890,7 +889,7 @@ bool tcp_syn_flood_action(struct sock *sk,
890 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP); 889 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP);
891 890
892 lopt = inet_csk(sk)->icsk_accept_queue.listen_opt; 891 lopt = inet_csk(sk)->icsk_accept_queue.listen_opt;
893 if (!lopt->synflood_warned) { 892 if (!lopt->synflood_warned && sysctl_tcp_syncookies != 2) {
894 lopt->synflood_warned = 1; 893 lopt->synflood_warned = 1;
895 pr_info("%s: Possible SYN flooding on port %d. %s. Check SNMP counters.\n", 894 pr_info("%s: Possible SYN flooding on port %d. %s. Check SNMP counters.\n",
896 proto, ntohs(tcp_hdr(skb)->dest), msg); 895 proto, ntohs(tcp_hdr(skb)->dest), msg);
@@ -1316,9 +1315,11 @@ static bool tcp_fastopen_check(struct sock *sk, struct sk_buff *skb,
1316 tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->end_seq; 1315 tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
1317 return true; 1316 return true;
1318 } 1317 }
1318
1319 if (foc->len == TCP_FASTOPEN_COOKIE_SIZE) { 1319 if (foc->len == TCP_FASTOPEN_COOKIE_SIZE) {
1320 if ((sysctl_tcp_fastopen & TFO_SERVER_COOKIE_NOT_CHKED) == 0) { 1320 if ((sysctl_tcp_fastopen & TFO_SERVER_COOKIE_NOT_CHKED) == 0) {
1321 tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr, valid_foc); 1321 tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr,
1322 ip_hdr(skb)->daddr, valid_foc);
1322 if ((valid_foc->len != TCP_FASTOPEN_COOKIE_SIZE) || 1323 if ((valid_foc->len != TCP_FASTOPEN_COOKIE_SIZE) ||
1323 memcmp(&foc->val[0], &valid_foc->val[0], 1324 memcmp(&foc->val[0], &valid_foc->val[0],
1324 TCP_FASTOPEN_COOKIE_SIZE) != 0) 1325 TCP_FASTOPEN_COOKIE_SIZE) != 0)
@@ -1329,14 +1330,16 @@ static bool tcp_fastopen_check(struct sock *sk, struct sk_buff *skb,
1329 tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->end_seq; 1330 tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
1330 return true; 1331 return true;
1331 } else if (foc->len == 0) { /* Client requesting a cookie */ 1332 } else if (foc->len == 0) { /* Client requesting a cookie */
1332 tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr, valid_foc); 1333 tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr,
1334 ip_hdr(skb)->daddr, valid_foc);
1333 NET_INC_STATS_BH(sock_net(sk), 1335 NET_INC_STATS_BH(sock_net(sk),
1334 LINUX_MIB_TCPFASTOPENCOOKIEREQD); 1336 LINUX_MIB_TCPFASTOPENCOOKIEREQD);
1335 } else { 1337 } else {
1336 /* Client sent a cookie with wrong size. Treat it 1338 /* Client sent a cookie with wrong size. Treat it
1337 * the same as invalid and return a valid one. 1339 * the same as invalid and return a valid one.
1338 */ 1340 */
1339 tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr, valid_foc); 1341 tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr,
1342 ip_hdr(skb)->daddr, valid_foc);
1340 } 1343 }
1341 return false; 1344 return false;
1342} 1345}
@@ -1462,7 +1465,8 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1462 * limitations, they conserve resources and peer is 1465 * limitations, they conserve resources and peer is
1463 * evidently real one. 1466 * evidently real one.
1464 */ 1467 */
1465 if (inet_csk_reqsk_queue_is_full(sk) && !isn) { 1468 if ((sysctl_tcp_syncookies == 2 ||
1469 inet_csk_reqsk_queue_is_full(sk)) && !isn) {
1466 want_cookie = tcp_syn_flood_action(sk, skb, "TCP"); 1470 want_cookie = tcp_syn_flood_action(sk, skb, "TCP");
1467 if (!want_cookie) 1471 if (!want_cookie)
1468 goto drop; 1472 goto drop;
@@ -1671,8 +1675,6 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1671 newtp->advmss = tcp_sk(sk)->rx_opt.user_mss; 1675 newtp->advmss = tcp_sk(sk)->rx_opt.user_mss;
1672 1676
1673 tcp_initialize_rcv_mss(newsk); 1677 tcp_initialize_rcv_mss(newsk);
1674 tcp_synack_rtt_meas(newsk, req);
1675 newtp->total_retrans = req->num_retrans;
1676 1678
1677#ifdef CONFIG_TCP_MD5SIG 1679#ifdef CONFIG_TCP_MD5SIG
1678 /* Copy over the MD5 key from the original socket */ 1680 /* Copy over the MD5 key from the original socket */
@@ -1797,10 +1799,7 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
1797 sk->sk_rx_dst = NULL; 1799 sk->sk_rx_dst = NULL;
1798 } 1800 }
1799 } 1801 }
1800 if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) { 1802 tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len);
1801 rsk = sk;
1802 goto reset;
1803 }
1804 return 0; 1803 return 0;
1805 } 1804 }
1806 1805
@@ -2605,7 +2604,7 @@ static void get_openreq4(const struct sock *sk, const struct request_sock *req,
2605 long delta = req->expires - jiffies; 2604 long delta = req->expires - jiffies;
2606 2605
2607 seq_printf(f, "%4d: %08X:%04X %08X:%04X" 2606 seq_printf(f, "%4d: %08X:%04X %08X:%04X"
2608 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %u %d %pK%n", 2607 " %02X %08X:%08X %02X:%08lX %08X %5u %8d %u %d %pK%n",
2609 i, 2608 i,
2610 ireq->loc_addr, 2609 ireq->loc_addr,
2611 ntohs(inet_sk(sk)->inet_sport), 2610 ntohs(inet_sk(sk)->inet_sport),
@@ -2663,7 +2662,7 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len)
2663 rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0); 2662 rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0);
2664 2663
2665 seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX " 2664 seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
2666 "%08X %5d %8d %lu %d %pK %lu %lu %u %u %d%n", 2665 "%08X %5u %8d %lu %d %pK %lu %lu %u %u %d%n",
2667 i, src, srcp, dest, destp, sk->sk_state, 2666 i, src, srcp, dest, destp, sk->sk_state,
2668 tp->write_seq - tp->snd_una, 2667 tp->write_seq - tp->snd_una,
2669 rx_queue, 2668 rx_queue,
@@ -2802,6 +2801,7 @@ struct proto tcp_prot = {
2802 .unhash = inet_unhash, 2801 .unhash = inet_unhash,
2803 .get_port = inet_csk_get_port, 2802 .get_port = inet_csk_get_port,
2804 .enter_memory_pressure = tcp_enter_memory_pressure, 2803 .enter_memory_pressure = tcp_enter_memory_pressure,
2804 .stream_memory_free = tcp_stream_memory_free,
2805 .sockets_allocated = &tcp_sockets_allocated, 2805 .sockets_allocated = &tcp_sockets_allocated,
2806 .orphan_count = &tcp_orphan_count, 2806 .orphan_count = &tcp_orphan_count,
2807 .memory_allocated = &tcp_memory_allocated, 2807 .memory_allocated = &tcp_memory_allocated,
diff --git a/net/ipv4/tcp_memcontrol.c b/net/ipv4/tcp_memcontrol.c
index da14436c1735..559d4ae6ebf4 100644
--- a/net/ipv4/tcp_memcontrol.c
+++ b/net/ipv4/tcp_memcontrol.c
@@ -87,8 +87,8 @@ static int tcp_update_limit(struct mem_cgroup *memcg, u64 val)
87 if (!cg_proto) 87 if (!cg_proto)
88 return -EINVAL; 88 return -EINVAL;
89 89
90 if (val > RESOURCE_MAX) 90 if (val > RES_COUNTER_MAX)
91 val = RESOURCE_MAX; 91 val = RES_COUNTER_MAX;
92 92
93 tcp = tcp_from_cgproto(cg_proto); 93 tcp = tcp_from_cgproto(cg_proto);
94 94
@@ -101,9 +101,9 @@ static int tcp_update_limit(struct mem_cgroup *memcg, u64 val)
101 tcp->tcp_prot_mem[i] = min_t(long, val >> PAGE_SHIFT, 101 tcp->tcp_prot_mem[i] = min_t(long, val >> PAGE_SHIFT,
102 net->ipv4.sysctl_tcp_mem[i]); 102 net->ipv4.sysctl_tcp_mem[i]);
103 103
104 if (val == RESOURCE_MAX) 104 if (val == RES_COUNTER_MAX)
105 clear_bit(MEMCG_SOCK_ACTIVE, &cg_proto->flags); 105 clear_bit(MEMCG_SOCK_ACTIVE, &cg_proto->flags);
106 else if (val != RESOURCE_MAX) { 106 else if (val != RES_COUNTER_MAX) {
107 /* 107 /*
108 * The active bit needs to be written after the static_key 108 * The active bit needs to be written after the static_key
109 * update. This is what guarantees that the socket activation 109 * update. This is what guarantees that the socket activation
@@ -132,10 +132,10 @@ static int tcp_update_limit(struct mem_cgroup *memcg, u64 val)
132 return 0; 132 return 0;
133} 133}
134 134
135static int tcp_cgroup_write(struct cgroup *cont, struct cftype *cft, 135static int tcp_cgroup_write(struct cgroup_subsys_state *css, struct cftype *cft,
136 const char *buffer) 136 const char *buffer)
137{ 137{
138 struct mem_cgroup *memcg = mem_cgroup_from_cont(cont); 138 struct mem_cgroup *memcg = mem_cgroup_from_css(css);
139 unsigned long long val; 139 unsigned long long val;
140 int ret = 0; 140 int ret = 0;
141 141
@@ -180,14 +180,14 @@ static u64 tcp_read_usage(struct mem_cgroup *memcg)
180 return res_counter_read_u64(&tcp->tcp_memory_allocated, RES_USAGE); 180 return res_counter_read_u64(&tcp->tcp_memory_allocated, RES_USAGE);
181} 181}
182 182
183static u64 tcp_cgroup_read(struct cgroup *cont, struct cftype *cft) 183static u64 tcp_cgroup_read(struct cgroup_subsys_state *css, struct cftype *cft)
184{ 184{
185 struct mem_cgroup *memcg = mem_cgroup_from_cont(cont); 185 struct mem_cgroup *memcg = mem_cgroup_from_css(css);
186 u64 val; 186 u64 val;
187 187
188 switch (cft->private) { 188 switch (cft->private) {
189 case RES_LIMIT: 189 case RES_LIMIT:
190 val = tcp_read_stat(memcg, RES_LIMIT, RESOURCE_MAX); 190 val = tcp_read_stat(memcg, RES_LIMIT, RES_COUNTER_MAX);
191 break; 191 break;
192 case RES_USAGE: 192 case RES_USAGE:
193 val = tcp_read_usage(memcg); 193 val = tcp_read_usage(memcg);
@@ -202,13 +202,13 @@ static u64 tcp_cgroup_read(struct cgroup *cont, struct cftype *cft)
202 return val; 202 return val;
203} 203}
204 204
205static int tcp_cgroup_reset(struct cgroup *cont, unsigned int event) 205static int tcp_cgroup_reset(struct cgroup_subsys_state *css, unsigned int event)
206{ 206{
207 struct mem_cgroup *memcg; 207 struct mem_cgroup *memcg;
208 struct tcp_memcontrol *tcp; 208 struct tcp_memcontrol *tcp;
209 struct cg_proto *cg_proto; 209 struct cg_proto *cg_proto;
210 210
211 memcg = mem_cgroup_from_cont(cont); 211 memcg = mem_cgroup_from_css(css);
212 cg_proto = tcp_prot.proto_cgroup(memcg); 212 cg_proto = tcp_prot.proto_cgroup(memcg);
213 if (!cg_proto) 213 if (!cg_proto)
214 return 0; 214 return 0;
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index f6a005c485a9..4a22f3e715df 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c
@@ -443,7 +443,7 @@ void tcp_init_metrics(struct sock *sk)
443 struct dst_entry *dst = __sk_dst_get(sk); 443 struct dst_entry *dst = __sk_dst_get(sk);
444 struct tcp_sock *tp = tcp_sk(sk); 444 struct tcp_sock *tp = tcp_sk(sk);
445 struct tcp_metrics_block *tm; 445 struct tcp_metrics_block *tm;
446 u32 val; 446 u32 val, crtt = 0; /* cached RTT scaled by 8 */
447 447
448 if (dst == NULL) 448 if (dst == NULL)
449 goto reset; 449 goto reset;
@@ -478,15 +478,19 @@ void tcp_init_metrics(struct sock *sk)
478 tp->reordering = val; 478 tp->reordering = val;
479 } 479 }
480 480
481 val = tcp_metric_get(tm, TCP_METRIC_RTT); 481 crtt = tcp_metric_get_jiffies(tm, TCP_METRIC_RTT);
482 if (val == 0 || tp->srtt == 0) { 482 rcu_read_unlock();
483 rcu_read_unlock(); 483reset:
484 goto reset; 484 /* The initial RTT measurement from the SYN/SYN-ACK is not ideal
485 } 485 * to seed the RTO for later data packets because SYN packets are
486 /* Initial rtt is determined from SYN,SYN-ACK. 486 * small. Use the per-dst cached values to seed the RTO but keep
487 * The segment is small and rtt may appear much 487 * the RTT estimator variables intact (e.g., srtt, mdev, rttvar).
488 * less than real one. Use per-dst memory 488 * Later the RTO will be updated immediately upon obtaining the first
489 * to make it more realistic. 489 * data RTT sample (tcp_rtt_estimator()). Hence the cached RTT only
490 * influences the first RTO but not later RTT estimation.
491 *
492 * But if RTT is not available from the SYN (due to retransmits or
493 * syn cookies) or the cache, force a conservative 3secs timeout.
490 * 494 *
491 * A bit of theory. RTT is time passed after "normal" sized packet 495 * A bit of theory. RTT is time passed after "normal" sized packet
492 * is sent until it is ACKed. In normal circumstances sending small 496 * is sent until it is ACKed. In normal circumstances sending small
@@ -497,21 +501,9 @@ void tcp_init_metrics(struct sock *sk)
497 * to low value, and then abruptly stops to do it and starts to delay 501 * to low value, and then abruptly stops to do it and starts to delay
498 * ACKs, wait for troubles. 502 * ACKs, wait for troubles.
499 */ 503 */
500 val = msecs_to_jiffies(val); 504 if (crtt > tp->srtt) {
501 if (val > tp->srtt) { 505 inet_csk(sk)->icsk_rto = crtt + max(crtt >> 2, tcp_rto_min(sk));
502 tp->srtt = val; 506 } else if (tp->srtt == 0) {
503 tp->rtt_seq = tp->snd_nxt;
504 }
505 val = tcp_metric_get_jiffies(tm, TCP_METRIC_RTTVAR);
506 if (val > tp->mdev) {
507 tp->mdev = val;
508 tp->mdev_max = tp->rttvar = max(tp->mdev, tcp_rto_min(sk));
509 }
510 rcu_read_unlock();
511
512 tcp_set_rto(sk);
513reset:
514 if (tp->srtt == 0) {
515 /* RFC6298: 5.7 We've failed to get a valid RTT sample from 507 /* RFC6298: 5.7 We've failed to get a valid RTT sample from
516 * 3WHS. This is most likely due to retransmission, 508 * 3WHS. This is most likely due to retransmission,
517 * including spurious one. Reset the RTO back to 3secs 509 * including spurious one. Reset the RTO back to 3secs
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index ab1c08658528..58a3e69aef64 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -411,6 +411,8 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
411 newtp->snd_ssthresh = TCP_INFINITE_SSTHRESH; 411 newtp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
412 tcp_enable_early_retrans(newtp); 412 tcp_enable_early_retrans(newtp);
413 newtp->tlp_high_seq = 0; 413 newtp->tlp_high_seq = 0;
414 newtp->lsndtime = treq->snt_synack;
415 newtp->total_retrans = req->num_retrans;
414 416
415 /* So many TCP implementations out there (incorrectly) count the 417 /* So many TCP implementations out there (incorrectly) count the
416 * initial SYN frame in their delayed-ACK and congestion control 418 * initial SYN frame in their delayed-ACK and congestion control
@@ -666,12 +668,6 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
666 if (!(flg & TCP_FLAG_ACK)) 668 if (!(flg & TCP_FLAG_ACK))
667 return NULL; 669 return NULL;
668 670
669 /* Got ACK for our SYNACK, so update baseline for SYNACK RTT sample. */
670 if (tmp_opt.saw_tstamp && tmp_opt.rcv_tsecr)
671 tcp_rsk(req)->snt_synack = tmp_opt.rcv_tsecr;
672 else if (req->num_retrans) /* don't take RTT sample if retrans && ~TS */
673 tcp_rsk(req)->snt_synack = 0;
674
675 /* For Fast Open no more processing is needed (sk is the 671 /* For Fast Open no more processing is needed (sk is the
676 * child socket). 672 * child socket).
677 */ 673 */
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 170737a9d56d..7c83cb8bf137 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -65,6 +65,9 @@ int sysctl_tcp_base_mss __read_mostly = TCP_BASE_MSS;
65/* By default, RFC2861 behavior. */ 65/* By default, RFC2861 behavior. */
66int sysctl_tcp_slow_start_after_idle __read_mostly = 1; 66int sysctl_tcp_slow_start_after_idle __read_mostly = 1;
67 67
68unsigned int sysctl_tcp_notsent_lowat __read_mostly = UINT_MAX;
69EXPORT_SYMBOL(sysctl_tcp_notsent_lowat);
70
68static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, 71static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
69 int push_one, gfp_t gfp); 72 int push_one, gfp_t gfp);
70 73
@@ -1628,7 +1631,7 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb)
1628 1631
1629 /* If a full-sized TSO skb can be sent, do it. */ 1632 /* If a full-sized TSO skb can be sent, do it. */
1630 if (limit >= min_t(unsigned int, sk->sk_gso_max_size, 1633 if (limit >= min_t(unsigned int, sk->sk_gso_max_size,
1631 sk->sk_gso_max_segs * tp->mss_cache)) 1634 tp->xmit_size_goal_segs * tp->mss_cache))
1632 goto send_now; 1635 goto send_now;
1633 1636
1634 /* Middle in queue won't get any more data, full sendable already? */ 1637 /* Middle in queue won't get any more data, full sendable already? */
diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c
index d4943f67aff2..611beab38a00 100644
--- a/net/ipv4/tcp_probe.c
+++ b/net/ipv4/tcp_probe.c
@@ -46,6 +46,10 @@ static unsigned int bufsize __read_mostly = 4096;
46MODULE_PARM_DESC(bufsize, "Log buffer size in packets (4096)"); 46MODULE_PARM_DESC(bufsize, "Log buffer size in packets (4096)");
47module_param(bufsize, uint, 0); 47module_param(bufsize, uint, 0);
48 48
49static unsigned int fwmark __read_mostly = 0;
50MODULE_PARM_DESC(fwmark, "skb mark to match (0=no mark)");
51module_param(fwmark, uint, 0);
52
49static int full __read_mostly; 53static int full __read_mostly;
50MODULE_PARM_DESC(full, "Full log (1=every ack packet received, 0=only cwnd changes)"); 54MODULE_PARM_DESC(full, "Full log (1=every ack packet received, 0=only cwnd changes)");
51module_param(full, int, 0); 55module_param(full, int, 0);
@@ -54,12 +58,16 @@ static const char procname[] = "tcpprobe";
54 58
55struct tcp_log { 59struct tcp_log {
56 ktime_t tstamp; 60 ktime_t tstamp;
57 __be32 saddr, daddr; 61 union {
58 __be16 sport, dport; 62 struct sockaddr raw;
63 struct sockaddr_in v4;
64 struct sockaddr_in6 v6;
65 } src, dst;
59 u16 length; 66 u16 length;
60 u32 snd_nxt; 67 u32 snd_nxt;
61 u32 snd_una; 68 u32 snd_una;
62 u32 snd_wnd; 69 u32 snd_wnd;
70 u32 rcv_wnd;
63 u32 snd_cwnd; 71 u32 snd_cwnd;
64 u32 ssthresh; 72 u32 ssthresh;
65 u32 srtt; 73 u32 srtt;
@@ -86,19 +94,45 @@ static inline int tcp_probe_avail(void)
86 return bufsize - tcp_probe_used() - 1; 94 return bufsize - tcp_probe_used() - 1;
87} 95}
88 96
97#define tcp_probe_copy_fl_to_si4(inet, si4, mem) \
98 do { \
99 si4.sin_family = AF_INET; \
100 si4.sin_port = inet->inet_##mem##port; \
101 si4.sin_addr.s_addr = inet->inet_##mem##addr; \
102 } while (0) \
103
104#if IS_ENABLED(CONFIG_IPV6)
105#define tcp_probe_copy_fl_to_si6(inet, si6, mem) \
106 do { \
107 struct ipv6_pinfo *pi6 = inet->pinet6; \
108 si6.sin6_family = AF_INET6; \
109 si6.sin6_port = inet->inet_##mem##port; \
110 si6.sin6_addr = pi6->mem##addr; \
111 si6.sin6_flowinfo = 0; /* No need here. */ \
112 si6.sin6_scope_id = 0; /* No need here. */ \
113 } while (0)
114#else
115#define tcp_probe_copy_fl_to_si6(fl, si6, mem) \
116 do { \
117 memset(&si6, 0, sizeof(si6)); \
118 } while (0)
119#endif
120
89/* 121/*
90 * Hook inserted to be called before each receive packet. 122 * Hook inserted to be called before each receive packet.
91 * Note: arguments must match tcp_rcv_established()! 123 * Note: arguments must match tcp_rcv_established()!
92 */ 124 */
93static int jtcp_rcv_established(struct sock *sk, struct sk_buff *skb, 125static void jtcp_rcv_established(struct sock *sk, struct sk_buff *skb,
94 struct tcphdr *th, unsigned int len) 126 const struct tcphdr *th, unsigned int len)
95{ 127{
96 const struct tcp_sock *tp = tcp_sk(sk); 128 const struct tcp_sock *tp = tcp_sk(sk);
97 const struct inet_sock *inet = inet_sk(sk); 129 const struct inet_sock *inet = inet_sk(sk);
98 130
99 /* Only update if port matches */ 131 /* Only update if port or skb mark matches */
100 if ((port == 0 || ntohs(inet->inet_dport) == port || 132 if (((port == 0 && fwmark == 0) ||
101 ntohs(inet->inet_sport) == port) && 133 ntohs(inet->inet_dport) == port ||
134 ntohs(inet->inet_sport) == port ||
135 (fwmark > 0 && skb->mark == fwmark)) &&
102 (full || tp->snd_cwnd != tcp_probe.lastcwnd)) { 136 (full || tp->snd_cwnd != tcp_probe.lastcwnd)) {
103 137
104 spin_lock(&tcp_probe.lock); 138 spin_lock(&tcp_probe.lock);
@@ -107,15 +141,25 @@ static int jtcp_rcv_established(struct sock *sk, struct sk_buff *skb,
107 struct tcp_log *p = tcp_probe.log + tcp_probe.head; 141 struct tcp_log *p = tcp_probe.log + tcp_probe.head;
108 142
109 p->tstamp = ktime_get(); 143 p->tstamp = ktime_get();
110 p->saddr = inet->inet_saddr; 144 switch (sk->sk_family) {
111 p->sport = inet->inet_sport; 145 case AF_INET:
112 p->daddr = inet->inet_daddr; 146 tcp_probe_copy_fl_to_si4(inet, p->src.v4, s);
113 p->dport = inet->inet_dport; 147 tcp_probe_copy_fl_to_si4(inet, p->dst.v4, d);
148 break;
149 case AF_INET6:
150 tcp_probe_copy_fl_to_si6(inet, p->src.v6, s);
151 tcp_probe_copy_fl_to_si6(inet, p->dst.v6, d);
152 break;
153 default:
154 BUG();
155 }
156
114 p->length = skb->len; 157 p->length = skb->len;
115 p->snd_nxt = tp->snd_nxt; 158 p->snd_nxt = tp->snd_nxt;
116 p->snd_una = tp->snd_una; 159 p->snd_una = tp->snd_una;
117 p->snd_cwnd = tp->snd_cwnd; 160 p->snd_cwnd = tp->snd_cwnd;
118 p->snd_wnd = tp->snd_wnd; 161 p->snd_wnd = tp->snd_wnd;
162 p->rcv_wnd = tp->rcv_wnd;
119 p->ssthresh = tcp_current_ssthresh(sk); 163 p->ssthresh = tcp_current_ssthresh(sk);
120 p->srtt = tp->srtt >> 3; 164 p->srtt = tp->srtt >> 3;
121 165
@@ -128,7 +172,6 @@ static int jtcp_rcv_established(struct sock *sk, struct sk_buff *skb,
128 } 172 }
129 173
130 jprobe_return(); 174 jprobe_return();
131 return 0;
132} 175}
133 176
134static struct jprobe tcp_jprobe = { 177static struct jprobe tcp_jprobe = {
@@ -157,13 +200,11 @@ static int tcpprobe_sprint(char *tbuf, int n)
157 = ktime_to_timespec(ktime_sub(p->tstamp, tcp_probe.start)); 200 = ktime_to_timespec(ktime_sub(p->tstamp, tcp_probe.start));
158 201
159 return scnprintf(tbuf, n, 202 return scnprintf(tbuf, n,
160 "%lu.%09lu %pI4:%u %pI4:%u %d %#x %#x %u %u %u %u\n", 203 "%lu.%09lu %pISpc %pISpc %d %#x %#x %u %u %u %u %u\n",
161 (unsigned long) tv.tv_sec, 204 (unsigned long) tv.tv_sec,
162 (unsigned long) tv.tv_nsec, 205 (unsigned long) tv.tv_nsec,
163 &p->saddr, ntohs(p->sport), 206 &p->src, &p->dst, p->length, p->snd_nxt, p->snd_una,
164 &p->daddr, ntohs(p->dport), 207 p->snd_cwnd, p->ssthresh, p->snd_wnd, p->srtt, p->rcv_wnd);
165 p->length, p->snd_nxt, p->snd_una,
166 p->snd_cwnd, p->ssthresh, p->snd_wnd, p->srtt);
167} 208}
168 209
169static ssize_t tcpprobe_read(struct file *file, char __user *buf, 210static ssize_t tcpprobe_read(struct file *file, char __user *buf,
@@ -176,7 +217,7 @@ static ssize_t tcpprobe_read(struct file *file, char __user *buf,
176 return -EINVAL; 217 return -EINVAL;
177 218
178 while (cnt < len) { 219 while (cnt < len) {
179 char tbuf[164]; 220 char tbuf[256];
180 int width; 221 int width;
181 222
182 /* Wait for data in buffer */ 223 /* Wait for data in buffer */
@@ -223,6 +264,13 @@ static __init int tcpprobe_init(void)
223{ 264{
224 int ret = -ENOMEM; 265 int ret = -ENOMEM;
225 266
267 /* Warning: if the function signature of tcp_rcv_established,
268 * has been changed, you also have to change the signature of
269 * jtcp_rcv_established, otherwise you end up right here!
270 */
271 BUILD_BUG_ON(__same_type(tcp_rcv_established,
272 jtcp_rcv_established) == 0);
273
226 init_waitqueue_head(&tcp_probe.wait); 274 init_waitqueue_head(&tcp_probe.wait);
227 spin_lock_init(&tcp_probe.lock); 275 spin_lock_init(&tcp_probe.lock);
228 276
@@ -241,7 +289,8 @@ static __init int tcpprobe_init(void)
241 if (ret) 289 if (ret)
242 goto err1; 290 goto err1;
243 291
244 pr_info("probe registered (port=%d) bufsize=%u\n", port, bufsize); 292 pr_info("probe registered (port=%d/fwmark=%u) bufsize=%u\n",
293 port, fwmark, bufsize);
245 return 0; 294 return 0;
246 err1: 295 err1:
247 remove_proc_entry(procname, init_net.proc_net); 296 remove_proc_entry(procname, init_net.proc_net);
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 766e6bab9113..74d2c95db57f 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -704,7 +704,7 @@ EXPORT_SYMBOL(udp_flush_pending_frames);
704 * @src: source IP address 704 * @src: source IP address
705 * @dst: destination IP address 705 * @dst: destination IP address
706 */ 706 */
707static void udp4_hwcsum(struct sk_buff *skb, __be32 src, __be32 dst) 707void udp4_hwcsum(struct sk_buff *skb, __be32 src, __be32 dst)
708{ 708{
709 struct udphdr *uh = udp_hdr(skb); 709 struct udphdr *uh = udp_hdr(skb);
710 struct sk_buff *frags = skb_shinfo(skb)->frag_list; 710 struct sk_buff *frags = skb_shinfo(skb)->frag_list;
@@ -740,6 +740,7 @@ static void udp4_hwcsum(struct sk_buff *skb, __be32 src, __be32 dst)
740 uh->check = CSUM_MANGLED_0; 740 uh->check = CSUM_MANGLED_0;
741 } 741 }
742} 742}
743EXPORT_SYMBOL_GPL(udp4_hwcsum);
743 744
744static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4) 745static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4)
745{ 746{
@@ -2158,7 +2159,7 @@ static void udp4_format_sock(struct sock *sp, struct seq_file *f,
2158 __u16 srcp = ntohs(inet->inet_sport); 2159 __u16 srcp = ntohs(inet->inet_sport);
2159 2160
2160 seq_printf(f, "%5d: %08X:%04X %08X:%04X" 2161 seq_printf(f, "%5d: %08X:%04X %08X:%04X"
2161 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %pK %d%n", 2162 " %02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %d%n",
2162 bucket, src, srcp, dest, destp, sp->sk_state, 2163 bucket, src, srcp, dest, destp, sp->sk_state,
2163 sk_wmem_alloc_get(sp), 2164 sk_wmem_alloc_get(sp),
2164 sk_rmem_alloc_get(sp), 2165 sk_rmem_alloc_get(sp),
@@ -2336,7 +2337,7 @@ struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb,
2336 uh->len = htons(skb->len - udp_offset); 2337 uh->len = htons(skb->len - udp_offset);
2337 2338
2338 /* csum segment if tunnel sets skb with csum. */ 2339 /* csum segment if tunnel sets skb with csum. */
2339 if (unlikely(uh->check)) { 2340 if (protocol == htons(ETH_P_IP) && unlikely(uh->check)) {
2340 struct iphdr *iph = ip_hdr(skb); 2341 struct iphdr *iph = ip_hdr(skb);
2341 2342
2342 uh->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, 2343 uh->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
@@ -2347,7 +2348,18 @@ struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb,
2347 if (uh->check == 0) 2348 if (uh->check == 0)
2348 uh->check = CSUM_MANGLED_0; 2349 uh->check = CSUM_MANGLED_0;
2349 2350
2351 } else if (protocol == htons(ETH_P_IPV6)) {
2352 struct ipv6hdr *ipv6h = ipv6_hdr(skb);
2353 u32 len = skb->len - udp_offset;
2354
2355 uh->check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr,
2356 len, IPPROTO_UDP, 0);
2357 uh->check = csum_fold(skb_checksum(skb, udp_offset, len, 0));
2358 if (uh->check == 0)
2359 uh->check = CSUM_MANGLED_0;
2360 skb->ip_summed = CHECKSUM_NONE;
2350 } 2361 }
2362
2351 skb->protocol = protocol; 2363 skb->protocol = protocol;
2352 } while ((skb = skb->next)); 2364 } while ((skb = skb->next));
2353out: 2365out:
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 498ea99194af..d6ff12617f36 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -99,9 +99,9 @@
99#define ACONF_DEBUG 2 99#define ACONF_DEBUG 2
100 100
101#if ACONF_DEBUG >= 3 101#if ACONF_DEBUG >= 3
102#define ADBG(x) printk x 102#define ADBG(fmt, ...) printk(fmt, ##__VA_ARGS__)
103#else 103#else
104#define ADBG(x) 104#define ADBG(fmt, ...) do { if (0) printk(fmt, ##__VA_ARGS__); } while (0)
105#endif 105#endif
106 106
107#define INFINITY_LIFE_TIME 0xFFFFFFFF 107#define INFINITY_LIFE_TIME 0xFFFFFFFF
@@ -177,6 +177,8 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = {
177 .accept_redirects = 1, 177 .accept_redirects = 1,
178 .autoconf = 1, 178 .autoconf = 1,
179 .force_mld_version = 0, 179 .force_mld_version = 0,
180 .mldv1_unsolicited_report_interval = 10 * HZ,
181 .mldv2_unsolicited_report_interval = HZ,
180 .dad_transmits = 1, 182 .dad_transmits = 1,
181 .rtr_solicits = MAX_RTR_SOLICITATIONS, 183 .rtr_solicits = MAX_RTR_SOLICITATIONS,
182 .rtr_solicit_interval = RTR_SOLICITATION_INTERVAL, 184 .rtr_solicit_interval = RTR_SOLICITATION_INTERVAL,
@@ -202,6 +204,7 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = {
202 .accept_source_route = 0, /* we do not accept RH0 by default. */ 204 .accept_source_route = 0, /* we do not accept RH0 by default. */
203 .disable_ipv6 = 0, 205 .disable_ipv6 = 0,
204 .accept_dad = 1, 206 .accept_dad = 1,
207 .suppress_frag_ndisc = 1,
205}; 208};
206 209
207static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { 210static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
@@ -211,6 +214,9 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
211 .accept_ra = 1, 214 .accept_ra = 1,
212 .accept_redirects = 1, 215 .accept_redirects = 1,
213 .autoconf = 1, 216 .autoconf = 1,
217 .force_mld_version = 0,
218 .mldv1_unsolicited_report_interval = 10 * HZ,
219 .mldv2_unsolicited_report_interval = HZ,
214 .dad_transmits = 1, 220 .dad_transmits = 1,
215 .rtr_solicits = MAX_RTR_SOLICITATIONS, 221 .rtr_solicits = MAX_RTR_SOLICITATIONS,
216 .rtr_solicit_interval = RTR_SOLICITATION_INTERVAL, 222 .rtr_solicit_interval = RTR_SOLICITATION_INTERVAL,
@@ -236,17 +242,9 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
236 .accept_source_route = 0, /* we do not accept RH0 by default. */ 242 .accept_source_route = 0, /* we do not accept RH0 by default. */
237 .disable_ipv6 = 0, 243 .disable_ipv6 = 0,
238 .accept_dad = 1, 244 .accept_dad = 1,
245 .suppress_frag_ndisc = 1,
239}; 246};
240 247
241/* IPv6 Wildcard Address and Loopback Address defined by RFC2553 */
242const struct in6_addr in6addr_any = IN6ADDR_ANY_INIT;
243const struct in6_addr in6addr_loopback = IN6ADDR_LOOPBACK_INIT;
244const struct in6_addr in6addr_linklocal_allnodes = IN6ADDR_LINKLOCAL_ALLNODES_INIT;
245const struct in6_addr in6addr_linklocal_allrouters = IN6ADDR_LINKLOCAL_ALLROUTERS_INIT;
246const struct in6_addr in6addr_interfacelocal_allnodes = IN6ADDR_INTERFACELOCAL_ALLNODES_INIT;
247const struct in6_addr in6addr_interfacelocal_allrouters = IN6ADDR_INTERFACELOCAL_ALLROUTERS_INIT;
248const struct in6_addr in6addr_sitelocal_allrouters = IN6ADDR_SITELOCAL_ALLROUTERS_INIT;
249
250/* Check if a valid qdisc is available */ 248/* Check if a valid qdisc is available */
251static inline bool addrconf_qdisc_ok(const struct net_device *dev) 249static inline bool addrconf_qdisc_ok(const struct net_device *dev)
252{ 250{
@@ -306,36 +304,6 @@ err_ip:
306 return -ENOMEM; 304 return -ENOMEM;
307} 305}
308 306
309static void snmp6_free_dev(struct inet6_dev *idev)
310{
311 kfree(idev->stats.icmpv6msgdev);
312 kfree(idev->stats.icmpv6dev);
313 snmp_mib_free((void __percpu **)idev->stats.ipv6);
314}
315
316/* Nobody refers to this device, we may destroy it. */
317
318void in6_dev_finish_destroy(struct inet6_dev *idev)
319{
320 struct net_device *dev = idev->dev;
321
322 WARN_ON(!list_empty(&idev->addr_list));
323 WARN_ON(idev->mc_list != NULL);
324 WARN_ON(timer_pending(&idev->rs_timer));
325
326#ifdef NET_REFCNT_DEBUG
327 pr_debug("%s: %s\n", __func__, dev ? dev->name : "NIL");
328#endif
329 dev_put(dev);
330 if (!idev->dead) {
331 pr_warn("Freeing alive inet6 device %p\n", idev);
332 return;
333 }
334 snmp6_free_dev(idev);
335 kfree_rcu(idev, rcu);
336}
337EXPORT_SYMBOL(in6_dev_finish_destroy);
338
339static struct inet6_dev *ipv6_add_dev(struct net_device *dev) 307static struct inet6_dev *ipv6_add_dev(struct net_device *dev)
340{ 308{
341 struct inet6_dev *ndev; 309 struct inet6_dev *ndev;
@@ -369,9 +337,9 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev)
369 dev_hold(dev); 337 dev_hold(dev);
370 338
371 if (snmp6_alloc_dev(ndev) < 0) { 339 if (snmp6_alloc_dev(ndev) < 0) {
372 ADBG((KERN_WARNING 340 ADBG(KERN_WARNING
373 "%s: cannot allocate memory for statistics; dev=%s.\n", 341 "%s: cannot allocate memory for statistics; dev=%s.\n",
374 __func__, dev->name)); 342 __func__, dev->name);
375 neigh_parms_release(&nd_tbl, ndev->nd_parms); 343 neigh_parms_release(&nd_tbl, ndev->nd_parms);
376 dev_put(dev); 344 dev_put(dev);
377 kfree(ndev); 345 kfree(ndev);
@@ -379,9 +347,9 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev)
379 } 347 }
380 348
381 if (snmp6_register_dev(ndev) < 0) { 349 if (snmp6_register_dev(ndev) < 0) {
382 ADBG((KERN_WARNING 350 ADBG(KERN_WARNING
383 "%s: cannot create /proc/net/dev_snmp6/%s\n", 351 "%s: cannot create /proc/net/dev_snmp6/%s\n",
384 __func__, dev->name)); 352 __func__, dev->name);
385 neigh_parms_release(&nd_tbl, ndev->nd_parms); 353 neigh_parms_release(&nd_tbl, ndev->nd_parms);
386 ndev->dead = 1; 354 ndev->dead = 1;
387 in6_dev_finish_destroy(ndev); 355 in6_dev_finish_destroy(ndev);
@@ -844,7 +812,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr,
844 812
845 /* Ignore adding duplicate addresses on an interface */ 813 /* Ignore adding duplicate addresses on an interface */
846 if (ipv6_chk_same_addr(dev_net(idev->dev), addr, idev->dev)) { 814 if (ipv6_chk_same_addr(dev_net(idev->dev), addr, idev->dev)) {
847 ADBG(("ipv6_add_addr: already assigned\n")); 815 ADBG("ipv6_add_addr: already assigned\n");
848 err = -EEXIST; 816 err = -EEXIST;
849 goto out; 817 goto out;
850 } 818 }
@@ -852,7 +820,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr,
852 ifa = kzalloc(sizeof(struct inet6_ifaddr), GFP_ATOMIC); 820 ifa = kzalloc(sizeof(struct inet6_ifaddr), GFP_ATOMIC);
853 821
854 if (ifa == NULL) { 822 if (ifa == NULL) {
855 ADBG(("ipv6_add_addr: malloc failed\n")); 823 ADBG("ipv6_add_addr: malloc failed\n");
856 err = -ENOBUFS; 824 err = -ENOBUFS;
857 goto out; 825 goto out;
858 } 826 }
@@ -1054,7 +1022,6 @@ static int ipv6_create_tempaddr(struct inet6_ifaddr *ifp, struct inet6_ifaddr *i
1054 unsigned long regen_advance; 1022 unsigned long regen_advance;
1055 int tmp_plen; 1023 int tmp_plen;
1056 int ret = 0; 1024 int ret = 0;
1057 int max_addresses;
1058 u32 addr_flags; 1025 u32 addr_flags;
1059 unsigned long now = jiffies; 1026 unsigned long now = jiffies;
1060 1027
@@ -1100,7 +1067,6 @@ retry:
1100 idev->cnf.temp_prefered_lft + age - 1067 idev->cnf.temp_prefered_lft + age -
1101 idev->cnf.max_desync_factor); 1068 idev->cnf.max_desync_factor);
1102 tmp_plen = ifp->prefix_len; 1069 tmp_plen = ifp->prefix_len;
1103 max_addresses = idev->cnf.max_addresses;
1104 tmp_tstamp = ifp->tstamp; 1070 tmp_tstamp = ifp->tstamp;
1105 spin_unlock_bh(&ifp->lock); 1071 spin_unlock_bh(&ifp->lock);
1106 1072
@@ -1807,6 +1773,16 @@ static int addrconf_ifid_gre(u8 *eui, struct net_device *dev)
1807 return __ipv6_isatap_ifid(eui, *(__be32 *)dev->dev_addr); 1773 return __ipv6_isatap_ifid(eui, *(__be32 *)dev->dev_addr);
1808} 1774}
1809 1775
1776static int addrconf_ifid_ip6tnl(u8 *eui, struct net_device *dev)
1777{
1778 memcpy(eui, dev->perm_addr, 3);
1779 memcpy(eui + 5, dev->perm_addr + 3, 3);
1780 eui[3] = 0xFF;
1781 eui[4] = 0xFE;
1782 eui[0] ^= 2;
1783 return 0;
1784}
1785
1810static int ipv6_generate_eui64(u8 *eui, struct net_device *dev) 1786static int ipv6_generate_eui64(u8 *eui, struct net_device *dev)
1811{ 1787{
1812 switch (dev->type) { 1788 switch (dev->type) {
@@ -1825,6 +1801,8 @@ static int ipv6_generate_eui64(u8 *eui, struct net_device *dev)
1825 return addrconf_ifid_eui64(eui, dev); 1801 return addrconf_ifid_eui64(eui, dev);
1826 case ARPHRD_IEEE1394: 1802 case ARPHRD_IEEE1394:
1827 return addrconf_ifid_ieee1394(eui, dev); 1803 return addrconf_ifid_ieee1394(eui, dev);
1804 case ARPHRD_TUNNEL6:
1805 return addrconf_ifid_ip6tnl(eui, dev);
1828 } 1806 }
1829 return -1; 1807 return -1;
1830} 1808}
@@ -2050,7 +2028,7 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao)
2050 pinfo = (struct prefix_info *) opt; 2028 pinfo = (struct prefix_info *) opt;
2051 2029
2052 if (len < sizeof(struct prefix_info)) { 2030 if (len < sizeof(struct prefix_info)) {
2053 ADBG(("addrconf: prefix option too short\n")); 2031 ADBG("addrconf: prefix option too short\n");
2054 return; 2032 return;
2055 } 2033 }
2056 2034
@@ -2702,7 +2680,8 @@ static void addrconf_dev_config(struct net_device *dev)
2702 (dev->type != ARPHRD_ARCNET) && 2680 (dev->type != ARPHRD_ARCNET) &&
2703 (dev->type != ARPHRD_INFINIBAND) && 2681 (dev->type != ARPHRD_INFINIBAND) &&
2704 (dev->type != ARPHRD_IEEE802154) && 2682 (dev->type != ARPHRD_IEEE802154) &&
2705 (dev->type != ARPHRD_IEEE1394)) { 2683 (dev->type != ARPHRD_IEEE1394) &&
2684 (dev->type != ARPHRD_TUNNEL6)) {
2706 /* Alas, we support only Ethernet autoconfiguration. */ 2685 /* Alas, we support only Ethernet autoconfiguration. */
2707 return; 2686 return;
2708 } 2687 }
@@ -2788,44 +2767,6 @@ ipv6_inherit_linklocal(struct inet6_dev *idev, struct net_device *link_dev)
2788 return -1; 2767 return -1;
2789} 2768}
2790 2769
2791static void ip6_tnl_add_linklocal(struct inet6_dev *idev)
2792{
2793 struct net_device *link_dev;
2794 struct net *net = dev_net(idev->dev);
2795
2796 /* first try to inherit the link-local address from the link device */
2797 if (idev->dev->iflink &&
2798 (link_dev = __dev_get_by_index(net, idev->dev->iflink))) {
2799 if (!ipv6_inherit_linklocal(idev, link_dev))
2800 return;
2801 }
2802 /* then try to inherit it from any device */
2803 for_each_netdev(net, link_dev) {
2804 if (!ipv6_inherit_linklocal(idev, link_dev))
2805 return;
2806 }
2807 pr_debug("init ip6-ip6: add_linklocal failed\n");
2808}
2809
2810/*
2811 * Autoconfigure tunnel with a link-local address so routing protocols,
2812 * DHCPv6, MLD etc. can be run over the virtual link
2813 */
2814
2815static void addrconf_ip6_tnl_config(struct net_device *dev)
2816{
2817 struct inet6_dev *idev;
2818
2819 ASSERT_RTNL();
2820
2821 idev = addrconf_add_dev(dev);
2822 if (IS_ERR(idev)) {
2823 pr_debug("init ip6-ip6: add_dev failed\n");
2824 return;
2825 }
2826 ip6_tnl_add_linklocal(idev);
2827}
2828
2829static int addrconf_notify(struct notifier_block *this, unsigned long event, 2770static int addrconf_notify(struct notifier_block *this, unsigned long event,
2830 void *ptr) 2771 void *ptr)
2831{ 2772{
@@ -2893,9 +2834,6 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
2893 addrconf_gre_config(dev); 2834 addrconf_gre_config(dev);
2894 break; 2835 break;
2895#endif 2836#endif
2896 case ARPHRD_TUNNEL6:
2897 addrconf_ip6_tnl_config(dev);
2898 break;
2899 case ARPHRD_LOOPBACK: 2837 case ARPHRD_LOOPBACK:
2900 init_loopback(dev); 2838 init_loopback(dev);
2901 break; 2839 break;
@@ -3120,6 +3058,7 @@ static int addrconf_ifdown(struct net_device *dev, int how)
3120static void addrconf_rs_timer(unsigned long data) 3058static void addrconf_rs_timer(unsigned long data)
3121{ 3059{
3122 struct inet6_dev *idev = (struct inet6_dev *)data; 3060 struct inet6_dev *idev = (struct inet6_dev *)data;
3061 struct net_device *dev = idev->dev;
3123 struct in6_addr lladdr; 3062 struct in6_addr lladdr;
3124 3063
3125 write_lock(&idev->lock); 3064 write_lock(&idev->lock);
@@ -3134,12 +3073,14 @@ static void addrconf_rs_timer(unsigned long data)
3134 goto out; 3073 goto out;
3135 3074
3136 if (idev->rs_probes++ < idev->cnf.rtr_solicits) { 3075 if (idev->rs_probes++ < idev->cnf.rtr_solicits) {
3137 if (!__ipv6_get_lladdr(idev, &lladdr, IFA_F_TENTATIVE)) 3076 write_unlock(&idev->lock);
3138 ndisc_send_rs(idev->dev, &lladdr, 3077 if (!ipv6_get_lladdr(dev, &lladdr, IFA_F_TENTATIVE))
3078 ndisc_send_rs(dev, &lladdr,
3139 &in6addr_linklocal_allrouters); 3079 &in6addr_linklocal_allrouters);
3140 else 3080 else
3141 goto out; 3081 goto put;
3142 3082
3083 write_lock(&idev->lock);
3143 /* The wait after the last probe can be shorter */ 3084 /* The wait after the last probe can be shorter */
3144 addrconf_mod_rs_timer(idev, (idev->rs_probes == 3085 addrconf_mod_rs_timer(idev, (idev->rs_probes ==
3145 idev->cnf.rtr_solicits) ? 3086 idev->cnf.rtr_solicits) ?
@@ -3155,6 +3096,7 @@ static void addrconf_rs_timer(unsigned long data)
3155 3096
3156out: 3097out:
3157 write_unlock(&idev->lock); 3098 write_unlock(&idev->lock);
3099put:
3158 in6_dev_put(idev); 3100 in6_dev_put(idev);
3159} 3101}
3160 3102
@@ -3630,8 +3572,8 @@ restart:
3630 if (time_before(next_sched, jiffies + ADDRCONF_TIMER_FUZZ_MAX)) 3572 if (time_before(next_sched, jiffies + ADDRCONF_TIMER_FUZZ_MAX))
3631 next_sched = jiffies + ADDRCONF_TIMER_FUZZ_MAX; 3573 next_sched = jiffies + ADDRCONF_TIMER_FUZZ_MAX;
3632 3574
3633 ADBG((KERN_DEBUG "now = %lu, schedule = %lu, rounded schedule = %lu => %lu\n", 3575 ADBG(KERN_DEBUG "now = %lu, schedule = %lu, rounded schedule = %lu => %lu\n",
3634 now, next, next_sec, next_sched)); 3576 now, next, next_sec, next_sched);
3635 3577
3636 addr_chk_timer.expires = next_sched; 3578 addr_chk_timer.expires = next_sched;
3637 add_timer(&addr_chk_timer); 3579 add_timer(&addr_chk_timer);
@@ -4177,6 +4119,10 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
4177 array[DEVCONF_RTR_SOLICIT_DELAY] = 4119 array[DEVCONF_RTR_SOLICIT_DELAY] =
4178 jiffies_to_msecs(cnf->rtr_solicit_delay); 4120 jiffies_to_msecs(cnf->rtr_solicit_delay);
4179 array[DEVCONF_FORCE_MLD_VERSION] = cnf->force_mld_version; 4121 array[DEVCONF_FORCE_MLD_VERSION] = cnf->force_mld_version;
4122 array[DEVCONF_MLDV1_UNSOLICITED_REPORT_INTERVAL] =
4123 jiffies_to_msecs(cnf->mldv1_unsolicited_report_interval);
4124 array[DEVCONF_MLDV2_UNSOLICITED_REPORT_INTERVAL] =
4125 jiffies_to_msecs(cnf->mldv2_unsolicited_report_interval);
4180#ifdef CONFIG_IPV6_PRIVACY 4126#ifdef CONFIG_IPV6_PRIVACY
4181 array[DEVCONF_USE_TEMPADDR] = cnf->use_tempaddr; 4127 array[DEVCONF_USE_TEMPADDR] = cnf->use_tempaddr;
4182 array[DEVCONF_TEMP_VALID_LFT] = cnf->temp_valid_lft; 4128 array[DEVCONF_TEMP_VALID_LFT] = cnf->temp_valid_lft;
@@ -4207,6 +4153,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
4207 array[DEVCONF_ACCEPT_DAD] = cnf->accept_dad; 4153 array[DEVCONF_ACCEPT_DAD] = cnf->accept_dad;
4208 array[DEVCONF_FORCE_TLLAO] = cnf->force_tllao; 4154 array[DEVCONF_FORCE_TLLAO] = cnf->force_tllao;
4209 array[DEVCONF_NDISC_NOTIFY] = cnf->ndisc_notify; 4155 array[DEVCONF_NDISC_NOTIFY] = cnf->ndisc_notify;
4156 array[DEVCONF_SUPPRESS_FRAG_NDISC] = cnf->suppress_frag_ndisc;
4210} 4157}
4211 4158
4212static inline size_t inet6_ifla6_size(void) 4159static inline size_t inet6_ifla6_size(void)
@@ -4652,6 +4599,7 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
4652 break; 4599 break;
4653 } 4600 }
4654 atomic_inc(&net->ipv6.dev_addr_genid); 4601 atomic_inc(&net->ipv6.dev_addr_genid);
4602 rt_genid_bump_ipv6(net);
4655} 4603}
4656 4604
4657static void ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) 4605static void ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
@@ -4859,6 +4807,22 @@ static struct addrconf_sysctl_table
4859 .mode = 0644, 4807 .mode = 0644,
4860 .proc_handler = proc_dointvec, 4808 .proc_handler = proc_dointvec,
4861 }, 4809 },
4810 {
4811 .procname = "mldv1_unsolicited_report_interval",
4812 .data =
4813 &ipv6_devconf.mldv1_unsolicited_report_interval,
4814 .maxlen = sizeof(int),
4815 .mode = 0644,
4816 .proc_handler = proc_dointvec_ms_jiffies,
4817 },
4818 {
4819 .procname = "mldv2_unsolicited_report_interval",
4820 .data =
4821 &ipv6_devconf.mldv2_unsolicited_report_interval,
4822 .maxlen = sizeof(int),
4823 .mode = 0644,
4824 .proc_handler = proc_dointvec_ms_jiffies,
4825 },
4862#ifdef CONFIG_IPV6_PRIVACY 4826#ifdef CONFIG_IPV6_PRIVACY
4863 { 4827 {
4864 .procname = "use_tempaddr", 4828 .procname = "use_tempaddr",
@@ -5004,6 +4968,13 @@ static struct addrconf_sysctl_table
5004 .proc_handler = proc_dointvec 4968 .proc_handler = proc_dointvec
5005 }, 4969 },
5006 { 4970 {
4971 .procname = "suppress_frag_ndisc",
4972 .data = &ipv6_devconf.suppress_frag_ndisc,
4973 .maxlen = sizeof(int),
4974 .mode = 0644,
4975 .proc_handler = proc_dointvec
4976 },
4977 {
5007 /* sentinel */ 4978 /* sentinel */
5008 } 4979 }
5009 }, 4980 },
diff --git a/net/ipv6/addrconf_core.c b/net/ipv6/addrconf_core.c
index d2f87427244b..4c11cbcf8308 100644
--- a/net/ipv6/addrconf_core.c
+++ b/net/ipv6/addrconf_core.c
@@ -6,6 +6,7 @@
6#include <linux/export.h> 6#include <linux/export.h>
7#include <net/ipv6.h> 7#include <net/ipv6.h>
8#include <net/addrconf.h> 8#include <net/addrconf.h>
9#include <net/ip.h>
9 10
10#define IPV6_ADDR_SCOPE_TYPE(scope) ((scope) << 16) 11#define IPV6_ADDR_SCOPE_TYPE(scope) ((scope) << 16)
11 12
@@ -98,3 +99,52 @@ int inet6addr_notifier_call_chain(unsigned long val, void *v)
98 return atomic_notifier_call_chain(&inet6addr_chain, val, v); 99 return atomic_notifier_call_chain(&inet6addr_chain, val, v);
99} 100}
100EXPORT_SYMBOL(inet6addr_notifier_call_chain); 101EXPORT_SYMBOL(inet6addr_notifier_call_chain);
102
103const struct ipv6_stub *ipv6_stub __read_mostly;
104EXPORT_SYMBOL_GPL(ipv6_stub);
105
106/* IPv6 Wildcard Address and Loopback Address defined by RFC2553 */
107const struct in6_addr in6addr_loopback = IN6ADDR_LOOPBACK_INIT;
108EXPORT_SYMBOL(in6addr_loopback);
109const struct in6_addr in6addr_any = IN6ADDR_ANY_INIT;
110EXPORT_SYMBOL(in6addr_any);
111const struct in6_addr in6addr_linklocal_allnodes = IN6ADDR_LINKLOCAL_ALLNODES_INIT;
112EXPORT_SYMBOL(in6addr_linklocal_allnodes);
113const struct in6_addr in6addr_linklocal_allrouters = IN6ADDR_LINKLOCAL_ALLROUTERS_INIT;
114EXPORT_SYMBOL(in6addr_linklocal_allrouters);
115const struct in6_addr in6addr_interfacelocal_allnodes = IN6ADDR_INTERFACELOCAL_ALLNODES_INIT;
116EXPORT_SYMBOL(in6addr_interfacelocal_allnodes);
117const struct in6_addr in6addr_interfacelocal_allrouters = IN6ADDR_INTERFACELOCAL_ALLROUTERS_INIT;
118EXPORT_SYMBOL(in6addr_interfacelocal_allrouters);
119const struct in6_addr in6addr_sitelocal_allrouters = IN6ADDR_SITELOCAL_ALLROUTERS_INIT;
120EXPORT_SYMBOL(in6addr_sitelocal_allrouters);
121
122static void snmp6_free_dev(struct inet6_dev *idev)
123{
124 kfree(idev->stats.icmpv6msgdev);
125 kfree(idev->stats.icmpv6dev);
126 snmp_mib_free((void __percpu **)idev->stats.ipv6);
127}
128
129/* Nobody refers to this device, we may destroy it. */
130
131void in6_dev_finish_destroy(struct inet6_dev *idev)
132{
133 struct net_device *dev = idev->dev;
134
135 WARN_ON(!list_empty(&idev->addr_list));
136 WARN_ON(idev->mc_list != NULL);
137 WARN_ON(timer_pending(&idev->rs_timer));
138
139#ifdef NET_REFCNT_DEBUG
140 pr_debug("%s: %s\n", __func__, dev ? dev->name : "NIL");
141#endif
142 dev_put(dev);
143 if (!idev->dead) {
144 pr_warn("Freeing alive inet6 device %p\n", idev);
145 return;
146 }
147 snmp6_free_dev(idev);
148 kfree_rcu(idev, rcu);
149}
150EXPORT_SYMBOL(in6_dev_finish_destroy);
diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c
index f083a583a05c..b30ad3741b46 100644
--- a/net/ipv6/addrlabel.c
+++ b/net/ipv6/addrlabel.c
@@ -251,38 +251,36 @@ static struct ip6addrlbl_entry *ip6addrlbl_alloc(struct net *net,
251/* add a label */ 251/* add a label */
252static int __ip6addrlbl_add(struct ip6addrlbl_entry *newp, int replace) 252static int __ip6addrlbl_add(struct ip6addrlbl_entry *newp, int replace)
253{ 253{
254 struct hlist_node *n;
255 struct ip6addrlbl_entry *last = NULL, *p = NULL;
254 int ret = 0; 256 int ret = 0;
255 257
256 ADDRLABEL(KERN_DEBUG "%s(newp=%p, replace=%d)\n", 258 ADDRLABEL(KERN_DEBUG "%s(newp=%p, replace=%d)\n", __func__, newp,
257 __func__, 259 replace);
258 newp, replace);
259 260
260 if (hlist_empty(&ip6addrlbl_table.head)) { 261 hlist_for_each_entry_safe(p, n, &ip6addrlbl_table.head, list) {
261 hlist_add_head_rcu(&newp->list, &ip6addrlbl_table.head); 262 if (p->prefixlen == newp->prefixlen &&
262 } else { 263 net_eq(ip6addrlbl_net(p), ip6addrlbl_net(newp)) &&
263 struct hlist_node *n; 264 p->ifindex == newp->ifindex &&
264 struct ip6addrlbl_entry *p = NULL; 265 ipv6_addr_equal(&p->prefix, &newp->prefix)) {
265 hlist_for_each_entry_safe(p, n, 266 if (!replace) {
266 &ip6addrlbl_table.head, list) { 267 ret = -EEXIST;
267 if (p->prefixlen == newp->prefixlen &&
268 net_eq(ip6addrlbl_net(p), ip6addrlbl_net(newp)) &&
269 p->ifindex == newp->ifindex &&
270 ipv6_addr_equal(&p->prefix, &newp->prefix)) {
271 if (!replace) {
272 ret = -EEXIST;
273 goto out;
274 }
275 hlist_replace_rcu(&p->list, &newp->list);
276 ip6addrlbl_put(p);
277 goto out;
278 } else if ((p->prefixlen == newp->prefixlen && !p->ifindex) ||
279 (p->prefixlen < newp->prefixlen)) {
280 hlist_add_before_rcu(&newp->list, &p->list);
281 goto out; 268 goto out;
282 } 269 }
270 hlist_replace_rcu(&p->list, &newp->list);
271 ip6addrlbl_put(p);
272 goto out;
273 } else if ((p->prefixlen == newp->prefixlen && !p->ifindex) ||
274 (p->prefixlen < newp->prefixlen)) {
275 hlist_add_before_rcu(&newp->list, &p->list);
276 goto out;
283 } 277 }
284 hlist_add_after_rcu(&p->list, &newp->list); 278 last = p;
285 } 279 }
280 if (last)
281 hlist_add_after_rcu(&last->list, &newp->list);
282 else
283 hlist_add_head_rcu(&newp->list, &ip6addrlbl_table.head);
286out: 284out:
287 if (!ret) 285 if (!ret)
288 ip6addrlbl_table.seq++; 286 ip6addrlbl_table.seq++;
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index a5ac969aeefe..7c96100b021e 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -56,6 +56,7 @@
56#include <net/transp_v6.h> 56#include <net/transp_v6.h>
57#include <net/ip6_route.h> 57#include <net/ip6_route.h>
58#include <net/addrconf.h> 58#include <net/addrconf.h>
59#include <net/ndisc.h>
59#ifdef CONFIG_IPV6_TUNNEL 60#ifdef CONFIG_IPV6_TUNNEL
60#include <net/ip6_tunnel.h> 61#include <net/ip6_tunnel.h>
61#endif 62#endif
@@ -766,6 +767,7 @@ static int __net_init inet6_net_init(struct net *net)
766 767
767 net->ipv6.sysctl.bindv6only = 0; 768 net->ipv6.sysctl.bindv6only = 0;
768 net->ipv6.sysctl.icmpv6_time = 1*HZ; 769 net->ipv6.sysctl.icmpv6_time = 1*HZ;
770 atomic_set(&net->ipv6.rt_genid, 0);
769 771
770 err = ipv6_init_mibs(net); 772 err = ipv6_init_mibs(net);
771 if (err) 773 if (err)
@@ -809,6 +811,15 @@ static struct pernet_operations inet6_net_ops = {
809 .exit = inet6_net_exit, 811 .exit = inet6_net_exit,
810}; 812};
811 813
814static const struct ipv6_stub ipv6_stub_impl = {
815 .ipv6_sock_mc_join = ipv6_sock_mc_join,
816 .ipv6_sock_mc_drop = ipv6_sock_mc_drop,
817 .ipv6_dst_lookup = ip6_dst_lookup,
818 .udpv6_encap_enable = udpv6_encap_enable,
819 .ndisc_send_na = ndisc_send_na,
820 .nd_tbl = &nd_tbl,
821};
822
812static int __init inet6_init(void) 823static int __init inet6_init(void)
813{ 824{
814 struct list_head *r; 825 struct list_head *r;
@@ -883,6 +894,9 @@ static int __init inet6_init(void)
883 err = igmp6_init(); 894 err = igmp6_init();
884 if (err) 895 if (err)
885 goto igmp_fail; 896 goto igmp_fail;
897
898 ipv6_stub = &ipv6_stub_impl;
899
886 err = ipv6_netfilter_init(); 900 err = ipv6_netfilter_init();
887 if (err) 901 if (err)
888 goto netfilter_fail; 902 goto netfilter_fail;
@@ -901,6 +915,9 @@ static int __init inet6_init(void)
901 err = ip6_route_init(); 915 err = ip6_route_init();
902 if (err) 916 if (err)
903 goto ip6_route_fail; 917 goto ip6_route_fail;
918 err = ndisc_late_init();
919 if (err)
920 goto ndisc_late_fail;
904 err = ip6_flowlabel_init(); 921 err = ip6_flowlabel_init();
905 if (err) 922 if (err)
906 goto ip6_flowlabel_fail; 923 goto ip6_flowlabel_fail;
@@ -967,6 +984,8 @@ ipv6_exthdrs_fail:
967addrconf_fail: 984addrconf_fail:
968 ip6_flowlabel_cleanup(); 985 ip6_flowlabel_cleanup();
969ip6_flowlabel_fail: 986ip6_flowlabel_fail:
987 ndisc_late_cleanup();
988ndisc_late_fail:
970 ip6_route_cleanup(); 989 ip6_route_cleanup();
971ip6_route_fail: 990ip6_route_fail:
972#ifdef CONFIG_PROC_FS 991#ifdef CONFIG_PROC_FS
@@ -1029,6 +1048,7 @@ static void __exit inet6_exit(void)
1029 ipv6_exthdrs_exit(); 1048 ipv6_exthdrs_exit();
1030 addrconf_cleanup(); 1049 addrconf_cleanup();
1031 ip6_flowlabel_cleanup(); 1050 ip6_flowlabel_cleanup();
1051 ndisc_late_cleanup();
1032 ip6_route_cleanup(); 1052 ip6_route_cleanup();
1033#ifdef CONFIG_PROC_FS 1053#ifdef CONFIG_PROC_FS
1034 1054
@@ -1039,6 +1059,7 @@ static void __exit inet6_exit(void)
1039 raw6_proc_exit(); 1059 raw6_proc_exit();
1040#endif 1060#endif
1041 ipv6_netfilter_fini(); 1061 ipv6_netfilter_fini();
1062 ipv6_stub = NULL;
1042 igmp6_cleanup(); 1063 igmp6_cleanup();
1043 ndisc_cleanup(); 1064 ndisc_cleanup();
1044 ip6_mr_cleanup(); 1065 ip6_mr_cleanup();
diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index bb02e176cb70..73784c3d4642 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -628,7 +628,7 @@ static void ah6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
628 return; 628 return;
629 629
630 if (type == NDISC_REDIRECT) 630 if (type == NDISC_REDIRECT)
631 ip6_redirect(skb, net, 0, 0); 631 ip6_redirect(skb, net, skb->dev->ifindex, 0);
632 else 632 else
633 ip6_update_pmtu(skb, net, info, 0, 0); 633 ip6_update_pmtu(skb, net, info, 0, 0);
634 xfrm_state_put(x); 634 xfrm_state_put(x);
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 197e6f4a2b74..48b6bd2a9a14 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -890,7 +890,7 @@ void ip6_dgram_sock_seq_show(struct seq_file *seq, struct sock *sp,
890 src = &np->rcv_saddr; 890 src = &np->rcv_saddr;
891 seq_printf(seq, 891 seq_printf(seq,
892 "%5d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " 892 "%5d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
893 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %pK %d\n", 893 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %d\n",
894 bucket, 894 bucket,
895 src->s6_addr32[0], src->s6_addr32[1], 895 src->s6_addr32[0], src->s6_addr32[1],
896 src->s6_addr32[2], src->s6_addr32[3], srcp, 896 src->s6_addr32[2], src->s6_addr32[3], srcp,
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index aeac0dc3635d..d3618a78fcac 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -447,7 +447,7 @@ static void esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
447 return; 447 return;
448 448
449 if (type == NDISC_REDIRECT) 449 if (type == NDISC_REDIRECT)
450 ip6_redirect(skb, net, 0, 0); 450 ip6_redirect(skb, net, skb->dev->ifindex, 0);
451 else 451 else
452 ip6_update_pmtu(skb, net, info, 0, 0); 452 ip6_update_pmtu(skb, net, info, 0, 0);
453 xfrm_state_put(x); 453 xfrm_state_put(x);
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index 07a7d65a7cb6..8d67900aa003 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -162,12 +162,6 @@ static bool ip6_parse_tlv(const struct tlvtype_proc *procs, struct sk_buff *skb)
162 off += optlen; 162 off += optlen;
163 len -= optlen; 163 len -= optlen;
164 } 164 }
165 /* This case will not be caught by above check since its padding
166 * length is smaller than 7:
167 * 1 byte NH + 1 byte Length + 6 bytes Padding
168 */
169 if ((padlen == 6) && ((off - skb_network_header_len(skb)) == 8))
170 goto bad;
171 165
172 if (len == 0) 166 if (len == 0)
173 return true; 167 return true;
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index 2e1a432867c0..e27591635f92 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -55,26 +55,33 @@ static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
55 struct fib6_table *table; 55 struct fib6_table *table;
56 struct net *net = rule->fr_net; 56 struct net *net = rule->fr_net;
57 pol_lookup_t lookup = arg->lookup_ptr; 57 pol_lookup_t lookup = arg->lookup_ptr;
58 int err = 0;
58 59
59 switch (rule->action) { 60 switch (rule->action) {
60 case FR_ACT_TO_TBL: 61 case FR_ACT_TO_TBL:
61 break; 62 break;
62 case FR_ACT_UNREACHABLE: 63 case FR_ACT_UNREACHABLE:
64 err = -ENETUNREACH;
63 rt = net->ipv6.ip6_null_entry; 65 rt = net->ipv6.ip6_null_entry;
64 goto discard_pkt; 66 goto discard_pkt;
65 default: 67 default:
66 case FR_ACT_BLACKHOLE: 68 case FR_ACT_BLACKHOLE:
69 err = -EINVAL;
67 rt = net->ipv6.ip6_blk_hole_entry; 70 rt = net->ipv6.ip6_blk_hole_entry;
68 goto discard_pkt; 71 goto discard_pkt;
69 case FR_ACT_PROHIBIT: 72 case FR_ACT_PROHIBIT:
73 err = -EACCES;
70 rt = net->ipv6.ip6_prohibit_entry; 74 rt = net->ipv6.ip6_prohibit_entry;
71 goto discard_pkt; 75 goto discard_pkt;
72 } 76 }
73 77
74 table = fib6_get_table(net, rule->table); 78 table = fib6_get_table(net, rule->table);
75 if (table) 79 if (!table) {
76 rt = lookup(net, table, flp6, flags); 80 err = -EAGAIN;
81 goto out;
82 }
77 83
84 rt = lookup(net, table, flp6, flags);
78 if (rt != net->ipv6.ip6_null_entry) { 85 if (rt != net->ipv6.ip6_null_entry) {
79 struct fib6_rule *r = (struct fib6_rule *)rule; 86 struct fib6_rule *r = (struct fib6_rule *)rule;
80 87
@@ -101,6 +108,7 @@ static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
101 } 108 }
102again: 109again:
103 ip6_rt_put(rt); 110 ip6_rt_put(rt);
111 err = -EAGAIN;
104 rt = NULL; 112 rt = NULL;
105 goto out; 113 goto out;
106 114
@@ -108,9 +116,31 @@ discard_pkt:
108 dst_hold(&rt->dst); 116 dst_hold(&rt->dst);
109out: 117out:
110 arg->result = rt; 118 arg->result = rt;
111 return rt == NULL ? -EAGAIN : 0; 119 return err;
112} 120}
113 121
122static bool fib6_rule_suppress(struct fib_rule *rule, struct fib_lookup_arg *arg)
123{
124 struct rt6_info *rt = (struct rt6_info *) arg->result;
125 struct net_device *dev = rt->rt6i_idev->dev;
126 /* do not accept result if the route does
127 * not meet the required prefix length
128 */
129 if (rt->rt6i_dst.plen <= rule->suppress_prefixlen)
130 goto suppress_route;
131
132 /* do not accept result if the route uses a device
133 * belonging to a forbidden interface group
134 */
135 if (rule->suppress_ifgroup != -1 && dev && dev->group == rule->suppress_ifgroup)
136 goto suppress_route;
137
138 return false;
139
140suppress_route:
141 ip6_rt_put(rt);
142 return true;
143}
114 144
115static int fib6_rule_match(struct fib_rule *rule, struct flowi *fl, int flags) 145static int fib6_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
116{ 146{
@@ -244,6 +274,7 @@ static const struct fib_rules_ops __net_initconst fib6_rules_ops_template = {
244 .addr_size = sizeof(struct in6_addr), 274 .addr_size = sizeof(struct in6_addr),
245 .action = fib6_rule_action, 275 .action = fib6_rule_action,
246 .match = fib6_rule_match, 276 .match = fib6_rule_match,
277 .suppress = fib6_rule_suppress,
247 .configure = fib6_rule_configure, 278 .configure = fib6_rule_configure,
248 .compare = fib6_rule_compare, 279 .compare = fib6_rule_compare,
249 .fill = fib6_rule_fill, 280 .fill = fib6_rule_fill,
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 7cfc8d284870..eef8d945b362 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -92,7 +92,7 @@ static void icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
92 if (type == ICMPV6_PKT_TOOBIG) 92 if (type == ICMPV6_PKT_TOOBIG)
93 ip6_update_pmtu(skb, net, info, 0, 0); 93 ip6_update_pmtu(skb, net, info, 0, 0);
94 else if (type == NDISC_REDIRECT) 94 else if (type == NDISC_REDIRECT)
95 ip6_redirect(skb, net, 0, 0); 95 ip6_redirect(skb, net, skb->dev->ifindex, 0);
96 96
97 if (!(type & ICMPV6_INFOMSG_MASK)) 97 if (!(type & ICMPV6_INFOMSG_MASK))
98 if (icmp6->icmp6_type == ICMPV6_ECHO_REQUEST) 98 if (icmp6->icmp6_type == ICMPV6_ECHO_REQUEST)
@@ -940,6 +940,14 @@ static const struct icmp6_err {
940 .err = ECONNREFUSED, 940 .err = ECONNREFUSED,
941 .fatal = 1, 941 .fatal = 1,
942 }, 942 },
943 { /* POLICY_FAIL */
944 .err = EACCES,
945 .fatal = 1,
946 },
947 { /* REJECT_ROUTE */
948 .err = EACCES,
949 .fatal = 1,
950 },
943}; 951};
944 952
945int icmpv6_err_convert(u8 type, u8 code, int *err) 953int icmpv6_err_convert(u8 type, u8 code, int *err)
@@ -951,7 +959,7 @@ int icmpv6_err_convert(u8 type, u8 code, int *err)
951 switch (type) { 959 switch (type) {
952 case ICMPV6_DEST_UNREACH: 960 case ICMPV6_DEST_UNREACH:
953 fatal = 1; 961 fatal = 1;
954 if (code <= ICMPV6_PORT_UNREACH) { 962 if (code < ARRAY_SIZE(tab_unreach)) {
955 *err = tab_unreach[code].err; 963 *err = tab_unreach[code].err;
956 fatal = tab_unreach[code].fatal; 964 fatal = tab_unreach[code].fatal;
957 } 965 }
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index c4ff5bbb45c4..5bec666aba61 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -425,8 +425,8 @@ out:
425 * node. 425 * node.
426 */ 426 */
427 427
428static struct fib6_node * fib6_add_1(struct fib6_node *root, void *addr, 428static struct fib6_node *fib6_add_1(struct fib6_node *root,
429 int addrlen, int plen, 429 struct in6_addr *addr, int plen,
430 int offset, int allow_create, 430 int offset, int allow_create,
431 int replace_required) 431 int replace_required)
432{ 432{
@@ -543,7 +543,7 @@ insert_above:
543 but if it is >= plen, the value is ignored in any case. 543 but if it is >= plen, the value is ignored in any case.
544 */ 544 */
545 545
546 bit = __ipv6_addr_diff(addr, &key->addr, addrlen); 546 bit = __ipv6_addr_diff(addr, &key->addr, sizeof(*addr));
547 547
548 /* 548 /*
549 * (intermediate)[in] 549 * (intermediate)[in]
@@ -822,12 +822,12 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info)
822 if (!allow_create && !replace_required) 822 if (!allow_create && !replace_required)
823 pr_warn("RTM_NEWROUTE with no NLM_F_CREATE or NLM_F_REPLACE\n"); 823 pr_warn("RTM_NEWROUTE with no NLM_F_CREATE or NLM_F_REPLACE\n");
824 824
825 fn = fib6_add_1(root, &rt->rt6i_dst.addr, sizeof(struct in6_addr), 825 fn = fib6_add_1(root, &rt->rt6i_dst.addr, rt->rt6i_dst.plen,
826 rt->rt6i_dst.plen, offsetof(struct rt6_info, rt6i_dst), 826 offsetof(struct rt6_info, rt6i_dst), allow_create,
827 allow_create, replace_required); 827 replace_required);
828
829 if (IS_ERR(fn)) { 828 if (IS_ERR(fn)) {
830 err = PTR_ERR(fn); 829 err = PTR_ERR(fn);
830 fn = NULL;
831 goto out; 831 goto out;
832 } 832 }
833 833
@@ -863,7 +863,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info)
863 /* Now add the first leaf node to new subtree */ 863 /* Now add the first leaf node to new subtree */
864 864
865 sn = fib6_add_1(sfn, &rt->rt6i_src.addr, 865 sn = fib6_add_1(sfn, &rt->rt6i_src.addr,
866 sizeof(struct in6_addr), rt->rt6i_src.plen, 866 rt->rt6i_src.plen,
867 offsetof(struct rt6_info, rt6i_src), 867 offsetof(struct rt6_info, rt6i_src),
868 allow_create, replace_required); 868 allow_create, replace_required);
869 869
@@ -882,7 +882,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info)
882 fn->subtree = sfn; 882 fn->subtree = sfn;
883 } else { 883 } else {
884 sn = fib6_add_1(fn->subtree, &rt->rt6i_src.addr, 884 sn = fib6_add_1(fn->subtree, &rt->rt6i_src.addr,
885 sizeof(struct in6_addr), rt->rt6i_src.plen, 885 rt->rt6i_src.plen,
886 offsetof(struct rt6_info, rt6i_src), 886 offsetof(struct rt6_info, rt6i_src),
887 allow_create, replace_required); 887 allow_create, replace_required);
888 888
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 90747f1973fe..6b26e9feafb9 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -335,6 +335,7 @@ static struct ip6_tnl *ip6gre_tunnel_locate(struct net *net,
335 dev->rtnl_link_ops = &ip6gre_link_ops; 335 dev->rtnl_link_ops = &ip6gre_link_ops;
336 336
337 nt->dev = dev; 337 nt->dev = dev;
338 nt->net = dev_net(dev);
338 ip6gre_tnl_link_config(nt, 1); 339 ip6gre_tnl_link_config(nt, 1);
339 340
340 if (register_netdevice(dev) < 0) 341 if (register_netdevice(dev) < 0)
@@ -508,8 +509,6 @@ static int ip6gre_rcv(struct sk_buff *skb)
508 goto drop; 509 goto drop;
509 } 510 }
510 511
511 secpath_reset(skb);
512
513 skb->protocol = gre_proto; 512 skb->protocol = gre_proto;
514 /* WCCP version 1 and 2 protocol decoding. 513 /* WCCP version 1 and 2 protocol decoding.
515 * - Change protocol to IP 514 * - Change protocol to IP
@@ -524,7 +523,6 @@ static int ip6gre_rcv(struct sk_buff *skb)
524 skb->mac_header = skb->network_header; 523 skb->mac_header = skb->network_header;
525 __pskb_pull(skb, offset); 524 __pskb_pull(skb, offset);
526 skb_postpull_rcsum(skb, skb_transport_header(skb), offset); 525 skb_postpull_rcsum(skb, skb_transport_header(skb), offset);
527 skb->pkt_type = PACKET_HOST;
528 526
529 if (((flags&GRE_CSUM) && csum) || 527 if (((flags&GRE_CSUM) && csum) ||
530 (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) { 528 (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) {
@@ -556,7 +554,7 @@ static int ip6gre_rcv(struct sk_buff *skb)
556 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); 554 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
557 } 555 }
558 556
559 __skb_tunnel_rx(skb, tunnel->dev); 557 __skb_tunnel_rx(skb, tunnel->dev, tunnel->net);
560 558
561 skb_reset_network_header(skb); 559 skb_reset_network_header(skb);
562 560
@@ -693,6 +691,8 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb,
693 tunnel->err_count = 0; 691 tunnel->err_count = 0;
694 } 692 }
695 693
694 skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(dev)));
695
696 max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen + dst->header_len; 696 max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen + dst->header_len;
697 697
698 if (skb_headroom(skb) < max_headroom || skb_shared(skb) || 698 if (skb_headroom(skb) < max_headroom || skb_shared(skb) ||
@@ -709,8 +709,6 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb,
709 skb = new_skb; 709 skb = new_skb;
710 } 710 }
711 711
712 skb_dst_drop(skb);
713
714 if (fl6->flowi6_mark) { 712 if (fl6->flowi6_mark) {
715 skb_dst_set(skb, dst); 713 skb_dst_set(skb, dst);
716 ndst = NULL; 714 ndst = NULL;
@@ -1260,6 +1258,7 @@ static int ip6gre_tunnel_init(struct net_device *dev)
1260 tunnel = netdev_priv(dev); 1258 tunnel = netdev_priv(dev);
1261 1259
1262 tunnel->dev = dev; 1260 tunnel->dev = dev;
1261 tunnel->net = dev_net(dev);
1263 strcpy(tunnel->parms.name, dev->name); 1262 strcpy(tunnel->parms.name, dev->name);
1264 1263
1265 memcpy(dev->dev_addr, &tunnel->parms.laddr, sizeof(struct in6_addr)); 1264 memcpy(dev->dev_addr, &tunnel->parms.laddr, sizeof(struct in6_addr));
@@ -1280,6 +1279,7 @@ static void ip6gre_fb_tunnel_init(struct net_device *dev)
1280 struct ip6_tnl *tunnel = netdev_priv(dev); 1279 struct ip6_tnl *tunnel = netdev_priv(dev);
1281 1280
1282 tunnel->dev = dev; 1281 tunnel->dev = dev;
1282 tunnel->net = dev_net(dev);
1283 strcpy(tunnel->parms.name, dev->name); 1283 strcpy(tunnel->parms.name, dev->name);
1284 1284
1285 tunnel->hlen = sizeof(struct ipv6hdr) + 4; 1285 tunnel->hlen = sizeof(struct ipv6hdr) + 4;
@@ -1455,6 +1455,7 @@ static int ip6gre_tap_init(struct net_device *dev)
1455 tunnel = netdev_priv(dev); 1455 tunnel = netdev_priv(dev);
1456 1456
1457 tunnel->dev = dev; 1457 tunnel->dev = dev;
1458 tunnel->net = dev_net(dev);
1458 strcpy(tunnel->parms.name, dev->name); 1459 strcpy(tunnel->parms.name, dev->name);
1459 1460
1460 ip6gre_tnl_link_config(tunnel, 1); 1461 ip6gre_tnl_link_config(tunnel, 1);
@@ -1506,6 +1507,7 @@ static int ip6gre_newlink(struct net *src_net, struct net_device *dev,
1506 eth_hw_addr_random(dev); 1507 eth_hw_addr_random(dev);
1507 1508
1508 nt->dev = dev; 1509 nt->dev = dev;
1510 nt->net = dev_net(dev);
1509 ip6gre_tnl_link_config(nt, !tb[IFLA_MTU]); 1511 ip6gre_tnl_link_config(nt, !tb[IFLA_MTU]);
1510 1512
1511 /* Can use a lockless transmit, unless we generate output sequences */ 1513 /* Can use a lockless transmit, unless we generate output sequences */
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index 2bab2aa59745..302d6fb1ff2b 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -44,7 +44,7 @@
44#include <net/ip6_route.h> 44#include <net/ip6_route.h>
45#include <net/addrconf.h> 45#include <net/addrconf.h>
46#include <net/xfrm.h> 46#include <net/xfrm.h>
47 47#include <net/inet_ecn.h>
48 48
49 49
50int ip6_rcv_finish(struct sk_buff *skb) 50int ip6_rcv_finish(struct sk_buff *skb)
@@ -109,6 +109,10 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
109 if (hdr->version != 6) 109 if (hdr->version != 6)
110 goto err; 110 goto err;
111 111
112 IP6_ADD_STATS_BH(dev_net(dev), idev,
113 IPSTATS_MIB_NOECTPKTS +
114 (ipv6_get_dsfield(hdr) & INET_ECN_MASK),
115 max_t(unsigned short, 1, skb_shinfo(skb)->gso_segs));
112 /* 116 /*
113 * RFC4291 2.5.3 117 * RFC4291 2.5.3
114 * A packet received on an interface with a destination address 118 * A packet received on an interface with a destination address
diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
index a263b990ee11..d82de7228100 100644
--- a/net/ipv6/ip6_offload.c
+++ b/net/ipv6/ip6_offload.c
@@ -91,6 +91,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb,
91 unsigned int unfrag_ip6hlen; 91 unsigned int unfrag_ip6hlen;
92 u8 *prevhdr; 92 u8 *prevhdr;
93 int offset = 0; 93 int offset = 0;
94 bool tunnel;
94 95
95 if (unlikely(skb_shinfo(skb)->gso_type & 96 if (unlikely(skb_shinfo(skb)->gso_type &
96 ~(SKB_GSO_UDP | 97 ~(SKB_GSO_UDP |
@@ -106,6 +107,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb,
106 if (unlikely(!pskb_may_pull(skb, sizeof(*ipv6h)))) 107 if (unlikely(!pskb_may_pull(skb, sizeof(*ipv6h))))
107 goto out; 108 goto out;
108 109
110 tunnel = skb->encapsulation;
109 ipv6h = ipv6_hdr(skb); 111 ipv6h = ipv6_hdr(skb);
110 __skb_pull(skb, sizeof(*ipv6h)); 112 __skb_pull(skb, sizeof(*ipv6h));
111 segs = ERR_PTR(-EPROTONOSUPPORT); 113 segs = ERR_PTR(-EPROTONOSUPPORT);
@@ -126,7 +128,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb,
126 ipv6h = ipv6_hdr(skb); 128 ipv6h = ipv6_hdr(skb);
127 ipv6h->payload_len = htons(skb->len - skb->mac_len - 129 ipv6h->payload_len = htons(skb->len - skb->mac_len -
128 sizeof(*ipv6h)); 130 sizeof(*ipv6h));
129 if (proto == IPPROTO_UDP) { 131 if (!tunnel && proto == IPPROTO_UDP) {
130 unfrag_ip6hlen = ip6_find_1stfragopt(skb, &prevhdr); 132 unfrag_ip6hlen = ip6_find_1stfragopt(skb, &prevhdr);
131 fptr = (struct frag_hdr *)(skb_network_header(skb) + 133 fptr = (struct frag_hdr *)(skb_network_header(skb) +
132 unfrag_ip6hlen); 134 unfrag_ip6hlen);
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index e7ceb6c871d1..3a692d529163 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -56,31 +56,6 @@
56#include <net/checksum.h> 56#include <net/checksum.h>
57#include <linux/mroute6.h> 57#include <linux/mroute6.h>
58 58
59int __ip6_local_out(struct sk_buff *skb)
60{
61 int len;
62
63 len = skb->len - sizeof(struct ipv6hdr);
64 if (len > IPV6_MAXPLEN)
65 len = 0;
66 ipv6_hdr(skb)->payload_len = htons(len);
67
68 return nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL,
69 skb_dst(skb)->dev, dst_output);
70}
71
72int ip6_local_out(struct sk_buff *skb)
73{
74 int err;
75
76 err = __ip6_local_out(skb);
77 if (likely(err == 1))
78 err = dst_output(skb);
79
80 return err;
81}
82EXPORT_SYMBOL_GPL(ip6_local_out);
83
84static int ip6_finish_output2(struct sk_buff *skb) 59static int ip6_finish_output2(struct sk_buff *skb)
85{ 60{
86 struct dst_entry *dst = skb_dst(skb); 61 struct dst_entry *dst = skb_dst(skb);
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 46ba243605a3..61355f7f4da5 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -41,6 +41,7 @@
41#include <linux/netfilter_ipv6.h> 41#include <linux/netfilter_ipv6.h>
42#include <linux/slab.h> 42#include <linux/slab.h>
43#include <linux/hash.h> 43#include <linux/hash.h>
44#include <linux/etherdevice.h>
44 45
45#include <asm/uaccess.h> 46#include <asm/uaccess.h>
46#include <linux/atomic.h> 47#include <linux/atomic.h>
@@ -315,6 +316,7 @@ static struct ip6_tnl *ip6_tnl_create(struct net *net, struct __ip6_tnl_parm *p)
315 316
316 t = netdev_priv(dev); 317 t = netdev_priv(dev);
317 t->parms = *p; 318 t->parms = *p;
319 t->net = dev_net(dev);
318 err = ip6_tnl_create2(dev); 320 err = ip6_tnl_create2(dev);
319 if (err < 0) 321 if (err < 0)
320 goto failed_free; 322 goto failed_free;
@@ -374,7 +376,7 @@ static void
374ip6_tnl_dev_uninit(struct net_device *dev) 376ip6_tnl_dev_uninit(struct net_device *dev)
375{ 377{
376 struct ip6_tnl *t = netdev_priv(dev); 378 struct ip6_tnl *t = netdev_priv(dev);
377 struct net *net = dev_net(dev); 379 struct net *net = t->net;
378 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); 380 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
379 381
380 if (dev == ip6n->fb_tnl_dev) 382 if (dev == ip6n->fb_tnl_dev)
@@ -741,7 +743,7 @@ int ip6_tnl_rcv_ctl(struct ip6_tnl *t,
741{ 743{
742 struct __ip6_tnl_parm *p = &t->parms; 744 struct __ip6_tnl_parm *p = &t->parms;
743 int ret = 0; 745 int ret = 0;
744 struct net *net = dev_net(t->dev); 746 struct net *net = t->net;
745 747
746 if ((p->flags & IP6_TNL_F_CAP_RCV) || 748 if ((p->flags & IP6_TNL_F_CAP_RCV) ||
747 ((p->flags & IP6_TNL_F_CAP_PER_PACKET) && 749 ((p->flags & IP6_TNL_F_CAP_PER_PACKET) &&
@@ -800,14 +802,12 @@ static int ip6_tnl_rcv(struct sk_buff *skb, __u16 protocol,
800 rcu_read_unlock(); 802 rcu_read_unlock();
801 goto discard; 803 goto discard;
802 } 804 }
803 secpath_reset(skb);
804 skb->mac_header = skb->network_header; 805 skb->mac_header = skb->network_header;
805 skb_reset_network_header(skb); 806 skb_reset_network_header(skb);
806 skb->protocol = htons(protocol); 807 skb->protocol = htons(protocol);
807 skb->pkt_type = PACKET_HOST;
808 memset(skb->cb, 0, sizeof(struct inet6_skb_parm)); 808 memset(skb->cb, 0, sizeof(struct inet6_skb_parm));
809 809
810 __skb_tunnel_rx(skb, t->dev); 810 __skb_tunnel_rx(skb, t->dev, t->net);
811 811
812 err = dscp_ecn_decapsulate(t, ipv6h, skb); 812 err = dscp_ecn_decapsulate(t, ipv6h, skb);
813 if (unlikely(err)) { 813 if (unlikely(err)) {
@@ -895,7 +895,7 @@ int ip6_tnl_xmit_ctl(struct ip6_tnl *t)
895{ 895{
896 struct __ip6_tnl_parm *p = &t->parms; 896 struct __ip6_tnl_parm *p = &t->parms;
897 int ret = 0; 897 int ret = 0;
898 struct net *net = dev_net(t->dev); 898 struct net *net = t->net;
899 899
900 if (p->flags & IP6_TNL_F_CAP_XMIT) { 900 if (p->flags & IP6_TNL_F_CAP_XMIT) {
901 struct net_device *ldev = NULL; 901 struct net_device *ldev = NULL;
@@ -945,8 +945,8 @@ static int ip6_tnl_xmit2(struct sk_buff *skb,
945 int encap_limit, 945 int encap_limit,
946 __u32 *pmtu) 946 __u32 *pmtu)
947{ 947{
948 struct net *net = dev_net(dev);
949 struct ip6_tnl *t = netdev_priv(dev); 948 struct ip6_tnl *t = netdev_priv(dev);
949 struct net *net = t->net;
950 struct net_device_stats *stats = &t->dev->stats; 950 struct net_device_stats *stats = &t->dev->stats;
951 struct ipv6hdr *ipv6h = ipv6_hdr(skb); 951 struct ipv6hdr *ipv6h = ipv6_hdr(skb);
952 struct ipv6_tel_txoption opt; 952 struct ipv6_tel_txoption opt;
@@ -996,6 +996,8 @@ static int ip6_tnl_xmit2(struct sk_buff *skb,
996 goto tx_err_dst_release; 996 goto tx_err_dst_release;
997 } 997 }
998 998
999 skb_scrub_packet(skb, !net_eq(t->net, dev_net(dev)));
1000
999 /* 1001 /*
1000 * Okay, now see if we can stuff it in the buffer as-is. 1002 * Okay, now see if we can stuff it in the buffer as-is.
1001 */ 1003 */
@@ -1013,7 +1015,6 @@ static int ip6_tnl_xmit2(struct sk_buff *skb,
1013 consume_skb(skb); 1015 consume_skb(skb);
1014 skb = new_skb; 1016 skb = new_skb;
1015 } 1017 }
1016 skb_dst_drop(skb);
1017 if (fl6->flowi6_mark) { 1018 if (fl6->flowi6_mark) {
1018 skb_dst_set(skb, dst); 1019 skb_dst_set(skb, dst);
1019 ndst = NULL; 1020 ndst = NULL;
@@ -1208,7 +1209,7 @@ static void ip6_tnl_link_config(struct ip6_tnl *t)
1208 int strict = (ipv6_addr_type(&p->raddr) & 1209 int strict = (ipv6_addr_type(&p->raddr) &
1209 (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL)); 1210 (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL));
1210 1211
1211 struct rt6_info *rt = rt6_lookup(dev_net(dev), 1212 struct rt6_info *rt = rt6_lookup(t->net,
1212 &p->raddr, &p->laddr, 1213 &p->raddr, &p->laddr,
1213 p->link, strict); 1214 p->link, strict);
1214 1215
@@ -1257,7 +1258,7 @@ ip6_tnl_change(struct ip6_tnl *t, const struct __ip6_tnl_parm *p)
1257 1258
1258static int ip6_tnl_update(struct ip6_tnl *t, struct __ip6_tnl_parm *p) 1259static int ip6_tnl_update(struct ip6_tnl *t, struct __ip6_tnl_parm *p)
1259{ 1260{
1260 struct net *net = dev_net(t->dev); 1261 struct net *net = t->net;
1261 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); 1262 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
1262 int err; 1263 int err;
1263 1264
@@ -1469,8 +1470,10 @@ static void ip6_tnl_dev_setup(struct net_device *dev)
1469 dev->mtu-=8; 1470 dev->mtu-=8;
1470 dev->flags |= IFF_NOARP; 1471 dev->flags |= IFF_NOARP;
1471 dev->addr_len = sizeof(struct in6_addr); 1472 dev->addr_len = sizeof(struct in6_addr);
1472 dev->features |= NETIF_F_NETNS_LOCAL;
1473 dev->priv_flags &= ~IFF_XMIT_DST_RELEASE; 1473 dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
1474 /* This perm addr will be used as interface identifier by IPv6 */
1475 dev->addr_assign_type = NET_ADDR_RANDOM;
1476 eth_random_addr(dev->perm_addr);
1474} 1477}
1475 1478
1476 1479
@@ -1485,6 +1488,7 @@ ip6_tnl_dev_init_gen(struct net_device *dev)
1485 struct ip6_tnl *t = netdev_priv(dev); 1488 struct ip6_tnl *t = netdev_priv(dev);
1486 1489
1487 t->dev = dev; 1490 t->dev = dev;
1491 t->net = dev_net(dev);
1488 dev->tstats = alloc_percpu(struct pcpu_tstats); 1492 dev->tstats = alloc_percpu(struct pcpu_tstats);
1489 if (!dev->tstats) 1493 if (!dev->tstats)
1490 return -ENOMEM; 1494 return -ENOMEM;
@@ -1602,9 +1606,9 @@ static int ip6_tnl_newlink(struct net *src_net, struct net_device *dev,
1602static int ip6_tnl_changelink(struct net_device *dev, struct nlattr *tb[], 1606static int ip6_tnl_changelink(struct net_device *dev, struct nlattr *tb[],
1603 struct nlattr *data[]) 1607 struct nlattr *data[])
1604{ 1608{
1605 struct ip6_tnl *t; 1609 struct ip6_tnl *t = netdev_priv(dev);
1606 struct __ip6_tnl_parm p; 1610 struct __ip6_tnl_parm p;
1607 struct net *net = dev_net(dev); 1611 struct net *net = t->net;
1608 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); 1612 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
1609 1613
1610 if (dev == ip6n->fb_tnl_dev) 1614 if (dev == ip6n->fb_tnl_dev)
@@ -1705,14 +1709,24 @@ static struct xfrm6_tunnel ip6ip6_handler __read_mostly = {
1705 1709
1706static void __net_exit ip6_tnl_destroy_tunnels(struct ip6_tnl_net *ip6n) 1710static void __net_exit ip6_tnl_destroy_tunnels(struct ip6_tnl_net *ip6n)
1707{ 1711{
1712 struct net *net = dev_net(ip6n->fb_tnl_dev);
1713 struct net_device *dev, *aux;
1708 int h; 1714 int h;
1709 struct ip6_tnl *t; 1715 struct ip6_tnl *t;
1710 LIST_HEAD(list); 1716 LIST_HEAD(list);
1711 1717
1718 for_each_netdev_safe(net, dev, aux)
1719 if (dev->rtnl_link_ops == &ip6_link_ops)
1720 unregister_netdevice_queue(dev, &list);
1721
1712 for (h = 0; h < HASH_SIZE; h++) { 1722 for (h = 0; h < HASH_SIZE; h++) {
1713 t = rtnl_dereference(ip6n->tnls_r_l[h]); 1723 t = rtnl_dereference(ip6n->tnls_r_l[h]);
1714 while (t != NULL) { 1724 while (t != NULL) {
1715 unregister_netdevice_queue(t->dev, &list); 1725 /* If dev is in the same netns, it has already
1726 * been added to the list by the previous loop.
1727 */
1728 if (!net_eq(dev_net(t->dev), net))
1729 unregister_netdevice_queue(t->dev, &list);
1716 t = rtnl_dereference(t->next); 1730 t = rtnl_dereference(t->next);
1717 } 1731 }
1718 } 1732 }
@@ -1738,6 +1752,10 @@ static int __net_init ip6_tnl_init_net(struct net *net)
1738 if (!ip6n->fb_tnl_dev) 1752 if (!ip6n->fb_tnl_dev)
1739 goto err_alloc_dev; 1753 goto err_alloc_dev;
1740 dev_net_set(ip6n->fb_tnl_dev, net); 1754 dev_net_set(ip6n->fb_tnl_dev, net);
1755 /* FB netdevice is special: we have one, and only one per netns.
1756 * Allowing to move it to another netns is clearly unsafe.
1757 */
1758 ip6n->fb_tnl_dev->features |= NETIF_F_NETNS_LOCAL;
1741 1759
1742 err = ip6_fb_tnl_dev_init(ip6n->fb_tnl_dev); 1760 err = ip6_fb_tnl_dev_init(ip6n->fb_tnl_dev);
1743 if (err < 0) 1761 if (err < 0)
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 03986d31fa41..f365310bfcca 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -110,8 +110,8 @@ static struct kmem_cache *mrt_cachep __read_mostly;
110static struct mr6_table *ip6mr_new_table(struct net *net, u32 id); 110static struct mr6_table *ip6mr_new_table(struct net *net, u32 id);
111static void ip6mr_free_table(struct mr6_table *mrt); 111static void ip6mr_free_table(struct mr6_table *mrt);
112 112
113static int ip6_mr_forward(struct net *net, struct mr6_table *mrt, 113static void ip6_mr_forward(struct net *net, struct mr6_table *mrt,
114 struct sk_buff *skb, struct mfc6_cache *cache); 114 struct sk_buff *skb, struct mfc6_cache *cache);
115static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt, 115static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
116 mifi_t mifi, int assert); 116 mifi_t mifi, int assert);
117static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb, 117static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
@@ -672,9 +672,8 @@ static int pim6_rcv(struct sk_buff *skb)
672 skb_reset_network_header(skb); 672 skb_reset_network_header(skb);
673 skb->protocol = htons(ETH_P_IPV6); 673 skb->protocol = htons(ETH_P_IPV6);
674 skb->ip_summed = CHECKSUM_NONE; 674 skb->ip_summed = CHECKSUM_NONE;
675 skb->pkt_type = PACKET_HOST;
676 675
677 skb_tunnel_rx(skb, reg_dev); 676 skb_tunnel_rx(skb, reg_dev, dev_net(reg_dev));
678 677
679 netif_rx(skb); 678 netif_rx(skb);
680 679
@@ -2074,8 +2073,8 @@ static int ip6mr_find_vif(struct mr6_table *mrt, struct net_device *dev)
2074 return ct; 2073 return ct;
2075} 2074}
2076 2075
2077static int ip6_mr_forward(struct net *net, struct mr6_table *mrt, 2076static void ip6_mr_forward(struct net *net, struct mr6_table *mrt,
2078 struct sk_buff *skb, struct mfc6_cache *cache) 2077 struct sk_buff *skb, struct mfc6_cache *cache)
2079{ 2078{
2080 int psend = -1; 2079 int psend = -1;
2081 int vif, ct; 2080 int vif, ct;
@@ -2156,12 +2155,11 @@ forward:
2156last_forward: 2155last_forward:
2157 if (psend != -1) { 2156 if (psend != -1) {
2158 ip6mr_forward2(net, mrt, skb, cache, psend); 2157 ip6mr_forward2(net, mrt, skb, cache, psend);
2159 return 0; 2158 return;
2160 } 2159 }
2161 2160
2162dont_forward: 2161dont_forward:
2163 kfree_skb(skb); 2162 kfree_skb(skb);
2164 return 0;
2165} 2163}
2166 2164
2167 2165
diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c
index 7af5aee75d98..5636a912074a 100644
--- a/net/ipv6/ipcomp6.c
+++ b/net/ipv6/ipcomp6.c
@@ -76,7 +76,7 @@ static void ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
76 return; 76 return;
77 77
78 if (type == NDISC_REDIRECT) 78 if (type == NDISC_REDIRECT)
79 ip6_redirect(skb, net, 0, 0); 79 ip6_redirect(skb, net, skb->dev->ifindex, 0);
80 else 80 else
81 ip6_update_pmtu(skb, net, info, 0, 0); 81 ip6_update_pmtu(skb, net, info, 0, 0);
82 xfrm_state_put(x); 82 xfrm_state_put(x);
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 99cd65c715cd..096cd67b737c 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -44,6 +44,7 @@
44#include <linux/proc_fs.h> 44#include <linux/proc_fs.h>
45#include <linux/seq_file.h> 45#include <linux/seq_file.h>
46#include <linux/slab.h> 46#include <linux/slab.h>
47#include <linux/pkt_sched.h>
47#include <net/mld.h> 48#include <net/mld.h>
48 49
49#include <linux/netfilter.h> 50#include <linux/netfilter.h>
@@ -94,6 +95,7 @@ static void mld_ifc_event(struct inet6_dev *idev);
94static void mld_add_delrec(struct inet6_dev *idev, struct ifmcaddr6 *pmc); 95static void mld_add_delrec(struct inet6_dev *idev, struct ifmcaddr6 *pmc);
95static void mld_del_delrec(struct inet6_dev *idev, const struct in6_addr *addr); 96static void mld_del_delrec(struct inet6_dev *idev, const struct in6_addr *addr);
96static void mld_clear_delrec(struct inet6_dev *idev); 97static void mld_clear_delrec(struct inet6_dev *idev);
98static bool mld_in_v1_mode(const struct inet6_dev *idev);
97static int sf_setstate(struct ifmcaddr6 *pmc); 99static int sf_setstate(struct ifmcaddr6 *pmc);
98static void sf_markstate(struct ifmcaddr6 *pmc); 100static void sf_markstate(struct ifmcaddr6 *pmc);
99static void ip6_mc_clear_src(struct ifmcaddr6 *pmc); 101static void ip6_mc_clear_src(struct ifmcaddr6 *pmc);
@@ -106,14 +108,15 @@ static int ip6_mc_add_src(struct inet6_dev *idev, const struct in6_addr *pmca,
106static int ip6_mc_leave_src(struct sock *sk, struct ipv6_mc_socklist *iml, 108static int ip6_mc_leave_src(struct sock *sk, struct ipv6_mc_socklist *iml,
107 struct inet6_dev *idev); 109 struct inet6_dev *idev);
108 110
109
110#define IGMP6_UNSOLICITED_IVAL (10*HZ)
111#define MLD_QRV_DEFAULT 2 111#define MLD_QRV_DEFAULT 2
112/* RFC3810, 9.2. Query Interval */
113#define MLD_QI_DEFAULT (125 * HZ)
114/* RFC3810, 9.3. Query Response Interval */
115#define MLD_QRI_DEFAULT (10 * HZ)
112 116
113#define MLD_V1_SEEN(idev) (dev_net((idev)->dev)->ipv6.devconf_all->force_mld_version == 1 || \ 117/* RFC3810, 8.1 Query Version Distinctions */
114 (idev)->cnf.force_mld_version == 1 || \ 118#define MLD_V1_QUERY_LEN 24
115 ((idev)->mc_v1_seen && \ 119#define MLD_V2_QUERY_LEN_MIN 28
116 time_before(jiffies, (idev)->mc_v1_seen)))
117 120
118#define IPV6_MLD_MAX_MSF 64 121#define IPV6_MLD_MAX_MSF 64
119 122
@@ -128,6 +131,18 @@ int sysctl_mld_max_msf __read_mostly = IPV6_MLD_MAX_MSF;
128 pmc != NULL; \ 131 pmc != NULL; \
129 pmc = rcu_dereference(pmc->next)) 132 pmc = rcu_dereference(pmc->next))
130 133
134static int unsolicited_report_interval(struct inet6_dev *idev)
135{
136 int iv;
137
138 if (mld_in_v1_mode(idev))
139 iv = idev->cnf.mldv1_unsolicited_report_interval;
140 else
141 iv = idev->cnf.mldv2_unsolicited_report_interval;
142
143 return iv > 0 ? iv : 1;
144}
145
131int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr) 146int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
132{ 147{
133 struct net_device *dev = NULL; 148 struct net_device *dev = NULL;
@@ -676,7 +691,7 @@ static void igmp6_group_added(struct ifmcaddr6 *mc)
676 if (!(dev->flags & IFF_UP) || (mc->mca_flags & MAF_NOREPORT)) 691 if (!(dev->flags & IFF_UP) || (mc->mca_flags & MAF_NOREPORT))
677 return; 692 return;
678 693
679 if (MLD_V1_SEEN(mc->idev)) { 694 if (mld_in_v1_mode(mc->idev)) {
680 igmp6_join_group(mc); 695 igmp6_join_group(mc);
681 return; 696 return;
682 } 697 }
@@ -984,29 +999,49 @@ bool ipv6_chk_mcast_addr(struct net_device *dev, const struct in6_addr *group,
984 999
985static void mld_gq_start_timer(struct inet6_dev *idev) 1000static void mld_gq_start_timer(struct inet6_dev *idev)
986{ 1001{
987 int tv = net_random() % idev->mc_maxdelay; 1002 unsigned long tv = net_random() % idev->mc_maxdelay;
988 1003
989 idev->mc_gq_running = 1; 1004 idev->mc_gq_running = 1;
990 if (!mod_timer(&idev->mc_gq_timer, jiffies+tv+2)) 1005 if (!mod_timer(&idev->mc_gq_timer, jiffies+tv+2))
991 in6_dev_hold(idev); 1006 in6_dev_hold(idev);
992} 1007}
993 1008
994static void mld_ifc_start_timer(struct inet6_dev *idev, int delay) 1009static void mld_gq_stop_timer(struct inet6_dev *idev)
995{ 1010{
996 int tv = net_random() % delay; 1011 idev->mc_gq_running = 0;
1012 if (del_timer(&idev->mc_gq_timer))
1013 __in6_dev_put(idev);
1014}
1015
1016static void mld_ifc_start_timer(struct inet6_dev *idev, unsigned long delay)
1017{
1018 unsigned long tv = net_random() % delay;
997 1019
998 if (!mod_timer(&idev->mc_ifc_timer, jiffies+tv+2)) 1020 if (!mod_timer(&idev->mc_ifc_timer, jiffies+tv+2))
999 in6_dev_hold(idev); 1021 in6_dev_hold(idev);
1000} 1022}
1001 1023
1002static void mld_dad_start_timer(struct inet6_dev *idev, int delay) 1024static void mld_ifc_stop_timer(struct inet6_dev *idev)
1003{ 1025{
1004 int tv = net_random() % delay; 1026 idev->mc_ifc_count = 0;
1027 if (del_timer(&idev->mc_ifc_timer))
1028 __in6_dev_put(idev);
1029}
1030
1031static void mld_dad_start_timer(struct inet6_dev *idev, unsigned long delay)
1032{
1033 unsigned long tv = net_random() % delay;
1005 1034
1006 if (!mod_timer(&idev->mc_dad_timer, jiffies+tv+2)) 1035 if (!mod_timer(&idev->mc_dad_timer, jiffies+tv+2))
1007 in6_dev_hold(idev); 1036 in6_dev_hold(idev);
1008} 1037}
1009 1038
1039static void mld_dad_stop_timer(struct inet6_dev *idev)
1040{
1041 if (del_timer(&idev->mc_dad_timer))
1042 __in6_dev_put(idev);
1043}
1044
1010/* 1045/*
1011 * IGMP handling (alias multicast ICMPv6 messages) 1046 * IGMP handling (alias multicast ICMPv6 messages)
1012 */ 1047 */
@@ -1025,12 +1060,9 @@ static void igmp6_group_queried(struct ifmcaddr6 *ma, unsigned long resptime)
1025 delay = ma->mca_timer.expires - jiffies; 1060 delay = ma->mca_timer.expires - jiffies;
1026 } 1061 }
1027 1062
1028 if (delay >= resptime) { 1063 if (delay >= resptime)
1029 if (resptime) 1064 delay = net_random() % resptime;
1030 delay = net_random() % resptime; 1065
1031 else
1032 delay = 1;
1033 }
1034 ma->mca_timer.expires = jiffies + delay; 1066 ma->mca_timer.expires = jiffies + delay;
1035 if (!mod_timer(&ma->mca_timer, jiffies + delay)) 1067 if (!mod_timer(&ma->mca_timer, jiffies + delay))
1036 atomic_inc(&ma->mca_refcnt); 1068 atomic_inc(&ma->mca_refcnt);
@@ -1097,6 +1129,158 @@ static bool mld_marksources(struct ifmcaddr6 *pmc, int nsrcs,
1097 return true; 1129 return true;
1098} 1130}
1099 1131
1132static int mld_force_mld_version(const struct inet6_dev *idev)
1133{
1134 /* Normally, both are 0 here. If enforcement to a particular is
1135 * being used, individual device enforcement will have a lower
1136 * precedence over 'all' device (.../conf/all/force_mld_version).
1137 */
1138
1139 if (dev_net(idev->dev)->ipv6.devconf_all->force_mld_version != 0)
1140 return dev_net(idev->dev)->ipv6.devconf_all->force_mld_version;
1141 else
1142 return idev->cnf.force_mld_version;
1143}
1144
1145static bool mld_in_v2_mode_only(const struct inet6_dev *idev)
1146{
1147 return mld_force_mld_version(idev) == 2;
1148}
1149
1150static bool mld_in_v1_mode_only(const struct inet6_dev *idev)
1151{
1152 return mld_force_mld_version(idev) == 1;
1153}
1154
1155static bool mld_in_v1_mode(const struct inet6_dev *idev)
1156{
1157 if (mld_in_v2_mode_only(idev))
1158 return false;
1159 if (mld_in_v1_mode_only(idev))
1160 return true;
1161 if (idev->mc_v1_seen && time_before(jiffies, idev->mc_v1_seen))
1162 return true;
1163
1164 return false;
1165}
1166
1167static void mld_set_v1_mode(struct inet6_dev *idev)
1168{
1169 /* RFC3810, relevant sections:
1170 * - 9.1. Robustness Variable
1171 * - 9.2. Query Interval
1172 * - 9.3. Query Response Interval
1173 * - 9.12. Older Version Querier Present Timeout
1174 */
1175 unsigned long switchback;
1176
1177 switchback = (idev->mc_qrv * idev->mc_qi) + idev->mc_qri;
1178
1179 idev->mc_v1_seen = jiffies + switchback;
1180}
1181
1182static void mld_update_qrv(struct inet6_dev *idev,
1183 const struct mld2_query *mlh2)
1184{
1185 /* RFC3810, relevant sections:
1186 * - 5.1.8. QRV (Querier's Robustness Variable)
1187 * - 9.1. Robustness Variable
1188 */
1189
1190 /* The value of the Robustness Variable MUST NOT be zero,
1191 * and SHOULD NOT be one. Catch this here if we ever run
1192 * into such a case in future.
1193 */
1194 WARN_ON(idev->mc_qrv == 0);
1195
1196 if (mlh2->mld2q_qrv > 0)
1197 idev->mc_qrv = mlh2->mld2q_qrv;
1198
1199 if (unlikely(idev->mc_qrv < 2)) {
1200 net_warn_ratelimited("IPv6: MLD: clamping QRV from %u to %u!\n",
1201 idev->mc_qrv, MLD_QRV_DEFAULT);
1202 idev->mc_qrv = MLD_QRV_DEFAULT;
1203 }
1204}
1205
1206static void mld_update_qi(struct inet6_dev *idev,
1207 const struct mld2_query *mlh2)
1208{
1209 /* RFC3810, relevant sections:
1210 * - 5.1.9. QQIC (Querier's Query Interval Code)
1211 * - 9.2. Query Interval
1212 * - 9.12. Older Version Querier Present Timeout
1213 * (the [Query Interval] in the last Query received)
1214 */
1215 unsigned long mc_qqi;
1216
1217 if (mlh2->mld2q_qqic < 128) {
1218 mc_qqi = mlh2->mld2q_qqic;
1219 } else {
1220 unsigned long mc_man, mc_exp;
1221
1222 mc_exp = MLDV2_QQIC_EXP(mlh2->mld2q_qqic);
1223 mc_man = MLDV2_QQIC_MAN(mlh2->mld2q_qqic);
1224
1225 mc_qqi = (mc_man | 0x10) << (mc_exp + 3);
1226 }
1227
1228 idev->mc_qi = mc_qqi * HZ;
1229}
1230
1231static void mld_update_qri(struct inet6_dev *idev,
1232 const struct mld2_query *mlh2)
1233{
1234 /* RFC3810, relevant sections:
1235 * - 5.1.3. Maximum Response Code
1236 * - 9.3. Query Response Interval
1237 */
1238 idev->mc_qri = msecs_to_jiffies(mldv2_mrc(mlh2));
1239}
1240
1241static int mld_process_v1(struct inet6_dev *idev, struct mld_msg *mld,
1242 unsigned long *max_delay)
1243{
1244 unsigned long mldv1_md;
1245
1246 /* Ignore v1 queries */
1247 if (mld_in_v2_mode_only(idev))
1248 return -EINVAL;
1249
1250 /* MLDv1 router present */
1251 mldv1_md = ntohs(mld->mld_maxdelay);
1252 *max_delay = max(msecs_to_jiffies(mldv1_md), 1UL);
1253
1254 mld_set_v1_mode(idev);
1255
1256 /* cancel MLDv2 report timer */
1257 mld_gq_stop_timer(idev);
1258 /* cancel the interface change timer */
1259 mld_ifc_stop_timer(idev);
1260 /* clear deleted report items */
1261 mld_clear_delrec(idev);
1262
1263 return 0;
1264}
1265
1266static int mld_process_v2(struct inet6_dev *idev, struct mld2_query *mld,
1267 unsigned long *max_delay)
1268{
1269 /* hosts need to stay in MLDv1 mode, discard MLDv2 queries */
1270 if (mld_in_v1_mode(idev))
1271 return -EINVAL;
1272
1273 *max_delay = max(msecs_to_jiffies(mldv2_mrc(mld)), 1UL);
1274
1275 mld_update_qrv(idev, mld);
1276 mld_update_qi(idev, mld);
1277 mld_update_qri(idev, mld);
1278
1279 idev->mc_maxdelay = *max_delay;
1280
1281 return 0;
1282}
1283
1100/* called with rcu_read_lock() */ 1284/* called with rcu_read_lock() */
1101int igmp6_event_query(struct sk_buff *skb) 1285int igmp6_event_query(struct sk_buff *skb)
1102{ 1286{
@@ -1108,7 +1292,7 @@ int igmp6_event_query(struct sk_buff *skb)
1108 struct mld_msg *mld; 1292 struct mld_msg *mld;
1109 int group_type; 1293 int group_type;
1110 int mark = 0; 1294 int mark = 0;
1111 int len; 1295 int len, err;
1112 1296
1113 if (!pskb_may_pull(skb, sizeof(struct in6_addr))) 1297 if (!pskb_may_pull(skb, sizeof(struct in6_addr)))
1114 return -EINVAL; 1298 return -EINVAL;
@@ -1122,7 +1306,6 @@ int igmp6_event_query(struct sk_buff *skb)
1122 return -EINVAL; 1306 return -EINVAL;
1123 1307
1124 idev = __in6_dev_get(skb->dev); 1308 idev = __in6_dev_get(skb->dev);
1125
1126 if (idev == NULL) 1309 if (idev == NULL)
1127 return 0; 1310 return 0;
1128 1311
@@ -1134,35 +1317,23 @@ int igmp6_event_query(struct sk_buff *skb)
1134 !(group_type&IPV6_ADDR_MULTICAST)) 1317 !(group_type&IPV6_ADDR_MULTICAST))
1135 return -EINVAL; 1318 return -EINVAL;
1136 1319
1137 if (len == 24) { 1320 if (len == MLD_V1_QUERY_LEN) {
1138 int switchback; 1321 err = mld_process_v1(idev, mld, &max_delay);
1139 /* MLDv1 router present */ 1322 if (err < 0)
1140 1323 return err;
1141 /* Translate milliseconds to jiffies */ 1324 } else if (len >= MLD_V2_QUERY_LEN_MIN) {
1142 max_delay = (ntohs(mld->mld_maxdelay)*HZ)/1000;
1143
1144 switchback = (idev->mc_qrv + 1) * max_delay;
1145 idev->mc_v1_seen = jiffies + switchback;
1146
1147 /* cancel the interface change timer */
1148 idev->mc_ifc_count = 0;
1149 if (del_timer(&idev->mc_ifc_timer))
1150 __in6_dev_put(idev);
1151 /* clear deleted report items */
1152 mld_clear_delrec(idev);
1153 } else if (len >= 28) {
1154 int srcs_offset = sizeof(struct mld2_query) - 1325 int srcs_offset = sizeof(struct mld2_query) -
1155 sizeof(struct icmp6hdr); 1326 sizeof(struct icmp6hdr);
1327
1156 if (!pskb_may_pull(skb, srcs_offset)) 1328 if (!pskb_may_pull(skb, srcs_offset))
1157 return -EINVAL; 1329 return -EINVAL;
1158 1330
1159 mlh2 = (struct mld2_query *)skb_transport_header(skb); 1331 mlh2 = (struct mld2_query *)skb_transport_header(skb);
1160 max_delay = (MLDV2_MRC(ntohs(mlh2->mld2q_mrc))*HZ)/1000; 1332
1161 if (!max_delay) 1333 err = mld_process_v2(idev, mlh2, &max_delay);
1162 max_delay = 1; 1334 if (err < 0)
1163 idev->mc_maxdelay = max_delay; 1335 return err;
1164 if (mlh2->mld2q_qrv) 1336
1165 idev->mc_qrv = mlh2->mld2q_qrv;
1166 if (group_type == IPV6_ADDR_ANY) { /* general query */ 1337 if (group_type == IPV6_ADDR_ANY) { /* general query */
1167 if (mlh2->mld2q_nsrcs) 1338 if (mlh2->mld2q_nsrcs)
1168 return -EINVAL; /* no sources allowed */ 1339 return -EINVAL; /* no sources allowed */
@@ -1376,6 +1547,7 @@ static struct sk_buff *mld_newpack(struct inet6_dev *idev, int size)
1376 if (!skb) 1547 if (!skb)
1377 return NULL; 1548 return NULL;
1378 1549
1550 skb->priority = TC_PRIO_CONTROL;
1379 skb_reserve(skb, hlen); 1551 skb_reserve(skb, hlen);
1380 1552
1381 if (__ipv6_get_lladdr(idev, &addr_buf, IFA_F_TENTATIVE)) { 1553 if (__ipv6_get_lladdr(idev, &addr_buf, IFA_F_TENTATIVE)) {
@@ -1769,7 +1941,7 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type)
1769 rcu_read_unlock(); 1941 rcu_read_unlock();
1770 return; 1942 return;
1771 } 1943 }
1772 1944 skb->priority = TC_PRIO_CONTROL;
1773 skb_reserve(skb, hlen); 1945 skb_reserve(skb, hlen);
1774 1946
1775 if (ipv6_get_lladdr(dev, &addr_buf, IFA_F_TENTATIVE)) { 1947 if (ipv6_get_lladdr(dev, &addr_buf, IFA_F_TENTATIVE)) {
@@ -1827,7 +1999,7 @@ err_out:
1827 1999
1828static void mld_resend_report(struct inet6_dev *idev) 2000static void mld_resend_report(struct inet6_dev *idev)
1829{ 2001{
1830 if (MLD_V1_SEEN(idev)) { 2002 if (mld_in_v1_mode(idev)) {
1831 struct ifmcaddr6 *mcaddr; 2003 struct ifmcaddr6 *mcaddr;
1832 read_lock_bh(&idev->lock); 2004 read_lock_bh(&idev->lock);
1833 for (mcaddr = idev->mc_list; mcaddr; mcaddr = mcaddr->next) { 2005 for (mcaddr = idev->mc_list; mcaddr; mcaddr = mcaddr->next) {
@@ -1891,7 +2063,7 @@ static int ip6_mc_del1_src(struct ifmcaddr6 *pmc, int sfmode,
1891 else 2063 else
1892 pmc->mca_sources = psf->sf_next; 2064 pmc->mca_sources = psf->sf_next;
1893 if (psf->sf_oldin && !(pmc->mca_flags & MAF_NOREPORT) && 2065 if (psf->sf_oldin && !(pmc->mca_flags & MAF_NOREPORT) &&
1894 !MLD_V1_SEEN(idev)) { 2066 !mld_in_v1_mode(idev)) {
1895 psf->sf_crcount = idev->mc_qrv; 2067 psf->sf_crcount = idev->mc_qrv;
1896 psf->sf_next = pmc->mca_tomb; 2068 psf->sf_next = pmc->mca_tomb;
1897 pmc->mca_tomb = psf; 2069 pmc->mca_tomb = psf;
@@ -2156,7 +2328,7 @@ static void igmp6_join_group(struct ifmcaddr6 *ma)
2156 2328
2157 igmp6_send(&ma->mca_addr, ma->idev->dev, ICMPV6_MGM_REPORT); 2329 igmp6_send(&ma->mca_addr, ma->idev->dev, ICMPV6_MGM_REPORT);
2158 2330
2159 delay = net_random() % IGMP6_UNSOLICITED_IVAL; 2331 delay = net_random() % unsolicited_report_interval(ma->idev);
2160 2332
2161 spin_lock_bh(&ma->mca_lock); 2333 spin_lock_bh(&ma->mca_lock);
2162 if (del_timer(&ma->mca_timer)) { 2334 if (del_timer(&ma->mca_timer)) {
@@ -2191,7 +2363,7 @@ static int ip6_mc_leave_src(struct sock *sk, struct ipv6_mc_socklist *iml,
2191 2363
2192static void igmp6_leave_group(struct ifmcaddr6 *ma) 2364static void igmp6_leave_group(struct ifmcaddr6 *ma)
2193{ 2365{
2194 if (MLD_V1_SEEN(ma->idev)) { 2366 if (mld_in_v1_mode(ma->idev)) {
2195 if (ma->mca_flags & MAF_LAST_REPORTER) 2367 if (ma->mca_flags & MAF_LAST_REPORTER)
2196 igmp6_send(&ma->mca_addr, ma->idev->dev, 2368 igmp6_send(&ma->mca_addr, ma->idev->dev,
2197 ICMPV6_MGM_REDUCTION); 2369 ICMPV6_MGM_REDUCTION);
@@ -2225,7 +2397,7 @@ static void mld_ifc_timer_expire(unsigned long data)
2225 2397
2226static void mld_ifc_event(struct inet6_dev *idev) 2398static void mld_ifc_event(struct inet6_dev *idev)
2227{ 2399{
2228 if (MLD_V1_SEEN(idev)) 2400 if (mld_in_v1_mode(idev))
2229 return; 2401 return;
2230 idev->mc_ifc_count = idev->mc_qrv; 2402 idev->mc_ifc_count = idev->mc_qrv;
2231 mld_ifc_start_timer(idev, 1); 2403 mld_ifc_start_timer(idev, 1);
@@ -2236,7 +2408,7 @@ static void igmp6_timer_handler(unsigned long data)
2236{ 2408{
2237 struct ifmcaddr6 *ma = (struct ifmcaddr6 *) data; 2409 struct ifmcaddr6 *ma = (struct ifmcaddr6 *) data;
2238 2410
2239 if (MLD_V1_SEEN(ma->idev)) 2411 if (mld_in_v1_mode(ma->idev))
2240 igmp6_send(&ma->mca_addr, ma->idev->dev, ICMPV6_MGM_REPORT); 2412 igmp6_send(&ma->mca_addr, ma->idev->dev, ICMPV6_MGM_REPORT);
2241 else 2413 else
2242 mld_send_report(ma->idev, ma); 2414 mld_send_report(ma->idev, ma);
@@ -2276,14 +2448,9 @@ void ipv6_mc_down(struct inet6_dev *idev)
2276 /* Withdraw multicast list */ 2448 /* Withdraw multicast list */
2277 2449
2278 read_lock_bh(&idev->lock); 2450 read_lock_bh(&idev->lock);
2279 idev->mc_ifc_count = 0; 2451 mld_ifc_stop_timer(idev);
2280 if (del_timer(&idev->mc_ifc_timer)) 2452 mld_gq_stop_timer(idev);
2281 __in6_dev_put(idev); 2453 mld_dad_stop_timer(idev);
2282 idev->mc_gq_running = 0;
2283 if (del_timer(&idev->mc_gq_timer))
2284 __in6_dev_put(idev);
2285 if (del_timer(&idev->mc_dad_timer))
2286 __in6_dev_put(idev);
2287 2454
2288 for (i = idev->mc_list; i; i=i->next) 2455 for (i = idev->mc_list; i; i=i->next)
2289 igmp6_group_dropped(i); 2456 igmp6_group_dropped(i);
@@ -2322,8 +2489,12 @@ void ipv6_mc_init_dev(struct inet6_dev *idev)
2322 (unsigned long)idev); 2489 (unsigned long)idev);
2323 setup_timer(&idev->mc_dad_timer, mld_dad_timer_expire, 2490 setup_timer(&idev->mc_dad_timer, mld_dad_timer_expire,
2324 (unsigned long)idev); 2491 (unsigned long)idev);
2492
2325 idev->mc_qrv = MLD_QRV_DEFAULT; 2493 idev->mc_qrv = MLD_QRV_DEFAULT;
2326 idev->mc_maxdelay = IGMP6_UNSOLICITED_IVAL; 2494 idev->mc_qi = MLD_QI_DEFAULT;
2495 idev->mc_qri = MLD_QRI_DEFAULT;
2496
2497 idev->mc_maxdelay = unsolicited_report_interval(idev);
2327 idev->mc_v1_seen = 0; 2498 idev->mc_v1_seen = 0;
2328 write_unlock_bh(&idev->lock); 2499 write_unlock_bh(&idev->lock);
2329} 2500}
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 04d31c2fbef1..f8a55ff1971b 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -372,14 +372,11 @@ static struct sk_buff *ndisc_alloc_skb(struct net_device *dev,
372 int tlen = dev->needed_tailroom; 372 int tlen = dev->needed_tailroom;
373 struct sock *sk = dev_net(dev)->ipv6.ndisc_sk; 373 struct sock *sk = dev_net(dev)->ipv6.ndisc_sk;
374 struct sk_buff *skb; 374 struct sk_buff *skb;
375 int err;
376 375
377 skb = sock_alloc_send_skb(sk, 376 skb = alloc_skb(hlen + sizeof(struct ipv6hdr) + len + tlen, GFP_ATOMIC);
378 hlen + sizeof(struct ipv6hdr) + len + tlen,
379 1, &err);
380 if (!skb) { 377 if (!skb) {
381 ND_PRINTK(0, err, "ndisc: %s failed to allocate an skb, err=%d\n", 378 ND_PRINTK(0, err, "ndisc: %s failed to allocate an skb\n",
382 __func__, err); 379 __func__);
383 return NULL; 380 return NULL;
384 } 381 }
385 382
@@ -389,6 +386,11 @@ static struct sk_buff *ndisc_alloc_skb(struct net_device *dev,
389 skb_reserve(skb, hlen + sizeof(struct ipv6hdr)); 386 skb_reserve(skb, hlen + sizeof(struct ipv6hdr));
390 skb_reset_transport_header(skb); 387 skb_reset_transport_header(skb);
391 388
389 /* Manually assign socket ownership as we avoid calling
390 * sock_alloc_send_pskb() to bypass wmem buffer limits
391 */
392 skb_set_owner_w(skb, sk);
393
392 return skb; 394 return skb;
393} 395}
394 396
@@ -428,7 +430,6 @@ static void ndisc_send_skb(struct sk_buff *skb,
428 type = icmp6h->icmp6_type; 430 type = icmp6h->icmp6_type;
429 431
430 if (!dst) { 432 if (!dst) {
431 struct sock *sk = net->ipv6.ndisc_sk;
432 struct flowi6 fl6; 433 struct flowi6 fl6;
433 434
434 icmpv6_flow_init(sk, &fl6, type, saddr, daddr, skb->dev->ifindex); 435 icmpv6_flow_init(sk, &fl6, type, saddr, daddr, skb->dev->ifindex);
@@ -462,10 +463,10 @@ static void ndisc_send_skb(struct sk_buff *skb,
462 rcu_read_unlock(); 463 rcu_read_unlock();
463} 464}
464 465
465static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh, 466void ndisc_send_na(struct net_device *dev, struct neighbour *neigh,
466 const struct in6_addr *daddr, 467 const struct in6_addr *daddr,
467 const struct in6_addr *solicited_addr, 468 const struct in6_addr *solicited_addr,
468 bool router, bool solicited, bool override, bool inc_opt) 469 bool router, bool solicited, bool override, bool inc_opt)
469{ 470{
470 struct sk_buff *skb; 471 struct sk_buff *skb;
471 struct in6_addr tmpaddr; 472 struct in6_addr tmpaddr;
@@ -663,9 +664,7 @@ static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb)
663 } 664 }
664 ndisc_send_ns(dev, neigh, target, target, saddr); 665 ndisc_send_ns(dev, neigh, target, target, saddr);
665 } else if ((probes -= neigh->parms->app_probes) < 0) { 666 } else if ((probes -= neigh->parms->app_probes) < 0) {
666#ifdef CONFIG_ARPD
667 neigh_app_ns(neigh); 667 neigh_app_ns(neigh);
668#endif
669 } else { 668 } else {
670 addrconf_addr_solict_mult(target, &mcaddr); 669 addrconf_addr_solict_mult(target, &mcaddr);
671 ndisc_send_ns(dev, NULL, target, &mcaddr, saddr); 670 ndisc_send_ns(dev, NULL, target, &mcaddr, saddr);
@@ -1370,7 +1369,8 @@ static void ndisc_redirect_rcv(struct sk_buff *skb)
1370 return; 1369 return;
1371 1370
1372 if (!ndopts.nd_opts_rh) { 1371 if (!ndopts.nd_opts_rh) {
1373 ip6_redirect_no_header(skb, dev_net(skb->dev), 0, 0); 1372 ip6_redirect_no_header(skb, dev_net(skb->dev),
1373 skb->dev->ifindex, 0);
1374 return; 1374 return;
1375 } 1375 }
1376 1376
@@ -1519,10 +1519,27 @@ static void pndisc_redo(struct sk_buff *skb)
1519 kfree_skb(skb); 1519 kfree_skb(skb);
1520} 1520}
1521 1521
1522static bool ndisc_suppress_frag_ndisc(struct sk_buff *skb)
1523{
1524 struct inet6_dev *idev = __in6_dev_get(skb->dev);
1525
1526 if (!idev)
1527 return true;
1528 if (IP6CB(skb)->flags & IP6SKB_FRAGMENTED &&
1529 idev->cnf.suppress_frag_ndisc) {
1530 net_warn_ratelimited("Received fragmented ndisc packet. Carefully consider disabling suppress_frag_ndisc.\n");
1531 return true;
1532 }
1533 return false;
1534}
1535
1522int ndisc_rcv(struct sk_buff *skb) 1536int ndisc_rcv(struct sk_buff *skb)
1523{ 1537{
1524 struct nd_msg *msg; 1538 struct nd_msg *msg;
1525 1539
1540 if (ndisc_suppress_frag_ndisc(skb))
1541 return 0;
1542
1526 if (skb_linearize(skb)) 1543 if (skb_linearize(skb))
1527 return 0; 1544 return 0;
1528 1545
@@ -1710,24 +1727,28 @@ int __init ndisc_init(void)
1710 if (err) 1727 if (err)
1711 goto out_unregister_pernet; 1728 goto out_unregister_pernet;
1712#endif 1729#endif
1713 err = register_netdevice_notifier(&ndisc_netdev_notifier);
1714 if (err)
1715 goto out_unregister_sysctl;
1716out: 1730out:
1717 return err; 1731 return err;
1718 1732
1719out_unregister_sysctl:
1720#ifdef CONFIG_SYSCTL 1733#ifdef CONFIG_SYSCTL
1721 neigh_sysctl_unregister(&nd_tbl.parms);
1722out_unregister_pernet: 1734out_unregister_pernet:
1723#endif
1724 unregister_pernet_subsys(&ndisc_net_ops); 1735 unregister_pernet_subsys(&ndisc_net_ops);
1725 goto out; 1736 goto out;
1737#endif
1726} 1738}
1727 1739
1728void ndisc_cleanup(void) 1740int __init ndisc_late_init(void)
1741{
1742 return register_netdevice_notifier(&ndisc_netdev_notifier);
1743}
1744
1745void ndisc_late_cleanup(void)
1729{ 1746{
1730 unregister_netdevice_notifier(&ndisc_netdev_notifier); 1747 unregister_netdevice_notifier(&ndisc_netdev_notifier);
1748}
1749
1750void ndisc_cleanup(void)
1751{
1731#ifdef CONFIG_SYSCTL 1752#ifdef CONFIG_SYSCTL
1732 neigh_sysctl_unregister(&nd_tbl.parms); 1753 neigh_sysctl_unregister(&nd_tbl.parms);
1733#endif 1754#endif
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
index 4433ab40e7de..a7f842b29b67 100644
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -153,6 +153,19 @@ config IP6_NF_TARGET_REJECT
153 153
154 To compile it as a module, choose M here. If unsure, say N. 154 To compile it as a module, choose M here. If unsure, say N.
155 155
156config IP6_NF_TARGET_SYNPROXY
157 tristate "SYNPROXY target support"
158 depends on NF_CONNTRACK && NETFILTER_ADVANCED
159 select NETFILTER_SYNPROXY
160 select SYN_COOKIES
161 help
162 The SYNPROXY target allows you to intercept TCP connections and
163 establish them using syncookies before they are passed on to the
164 server. This allows to avoid conntrack and server resource usage
165 during SYN-flood attacks.
166
167 To compile it as a module, choose M here. If unsure, say N.
168
156config IP6_NF_MANGLE 169config IP6_NF_MANGLE
157 tristate "Packet mangling" 170 tristate "Packet mangling"
158 default m if NETFILTER_ADVANCED=n 171 default m if NETFILTER_ADVANCED=n
diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile
index 2d11fcc2cf3c..2b53738f798c 100644
--- a/net/ipv6/netfilter/Makefile
+++ b/net/ipv6/netfilter/Makefile
@@ -14,7 +14,7 @@ obj-$(CONFIG_NF_NAT_IPV6) += ip6table_nat.o
14nf_conntrack_ipv6-y := nf_conntrack_l3proto_ipv6.o nf_conntrack_proto_icmpv6.o 14nf_conntrack_ipv6-y := nf_conntrack_l3proto_ipv6.o nf_conntrack_proto_icmpv6.o
15 15
16# l3 independent conntrack 16# l3 independent conntrack
17obj-$(CONFIG_NF_CONNTRACK_IPV6) += nf_conntrack_ipv6.o nf_defrag_ipv6.o 17obj-$(CONFIG_NF_CONNTRACK_IPV6) += nf_conntrack_ipv6.o
18 18
19nf_nat_ipv6-y := nf_nat_l3proto_ipv6.o nf_nat_proto_icmpv6.o 19nf_nat_ipv6-y := nf_nat_l3proto_ipv6.o nf_nat_proto_icmpv6.o
20obj-$(CONFIG_NF_NAT_IPV6) += nf_nat_ipv6.o 20obj-$(CONFIG_NF_NAT_IPV6) += nf_nat_ipv6.o
@@ -37,3 +37,4 @@ obj-$(CONFIG_IP6_NF_MATCH_RT) += ip6t_rt.o
37obj-$(CONFIG_IP6_NF_TARGET_MASQUERADE) += ip6t_MASQUERADE.o 37obj-$(CONFIG_IP6_NF_TARGET_MASQUERADE) += ip6t_MASQUERADE.o
38obj-$(CONFIG_IP6_NF_TARGET_NPT) += ip6t_NPT.o 38obj-$(CONFIG_IP6_NF_TARGET_NPT) += ip6t_NPT.o
39obj-$(CONFIG_IP6_NF_TARGET_REJECT) += ip6t_REJECT.o 39obj-$(CONFIG_IP6_NF_TARGET_REJECT) += ip6t_REJECT.o
40obj-$(CONFIG_IP6_NF_TARGET_SYNPROXY) += ip6t_SYNPROXY.o
diff --git a/net/ipv6/netfilter/ip6t_MASQUERADE.c b/net/ipv6/netfilter/ip6t_MASQUERADE.c
index 47bff6107519..3e4e92d5e157 100644
--- a/net/ipv6/netfilter/ip6t_MASQUERADE.c
+++ b/net/ipv6/netfilter/ip6t_MASQUERADE.c
@@ -76,7 +76,7 @@ static int masq_device_event(struct notifier_block *this,
76 76
77 if (event == NETDEV_DOWN) 77 if (event == NETDEV_DOWN)
78 nf_ct_iterate_cleanup(net, device_cmp, 78 nf_ct_iterate_cleanup(net, device_cmp,
79 (void *)(long)dev->ifindex); 79 (void *)(long)dev->ifindex, 0, 0);
80 80
81 return NOTIFY_DONE; 81 return NOTIFY_DONE;
82} 82}
diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c
index 70f9abc0efe9..56eef30ee5f6 100644
--- a/net/ipv6/netfilter/ip6t_REJECT.c
+++ b/net/ipv6/netfilter/ip6t_REJECT.c
@@ -169,7 +169,25 @@ static void send_reset(struct net *net, struct sk_buff *oldskb)
169 169
170 nf_ct_attach(nskb, oldskb); 170 nf_ct_attach(nskb, oldskb);
171 171
172 ip6_local_out(nskb); 172#ifdef CONFIG_BRIDGE_NETFILTER
173 /* If we use ip6_local_out for bridged traffic, the MAC source on
174 * the RST will be ours, instead of the destination's. This confuses
175 * some routers/firewalls, and they drop the packet. So we need to
176 * build the eth header using the original destination's MAC as the
177 * source, and send the RST packet directly.
178 */
179 if (oldskb->nf_bridge) {
180 struct ethhdr *oeth = eth_hdr(oldskb);
181 nskb->dev = oldskb->nf_bridge->physindev;
182 nskb->protocol = htons(ETH_P_IPV6);
183 ip6h->payload_len = htons(sizeof(struct tcphdr));
184 if (dev_hard_header(nskb, nskb->dev, ntohs(nskb->protocol),
185 oeth->h_source, oeth->h_dest, nskb->len) < 0)
186 return;
187 dev_queue_xmit(nskb);
188 } else
189#endif
190 ip6_local_out(nskb);
173} 191}
174 192
175static inline void 193static inline void
diff --git a/net/ipv6/netfilter/ip6t_SYNPROXY.c b/net/ipv6/netfilter/ip6t_SYNPROXY.c
new file mode 100644
index 000000000000..19cfea8dbcaa
--- /dev/null
+++ b/net/ipv6/netfilter/ip6t_SYNPROXY.c
@@ -0,0 +1,499 @@
1/*
2 * Copyright (c) 2013 Patrick McHardy <kaber@trash.net>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 */
8
9#include <linux/module.h>
10#include <linux/skbuff.h>
11#include <net/ip6_checksum.h>
12#include <net/ip6_route.h>
13#include <net/tcp.h>
14
15#include <linux/netfilter_ipv6/ip6_tables.h>
16#include <linux/netfilter/x_tables.h>
17#include <linux/netfilter/xt_SYNPROXY.h>
18#include <net/netfilter/nf_conntrack.h>
19#include <net/netfilter/nf_conntrack_seqadj.h>
20#include <net/netfilter/nf_conntrack_synproxy.h>
21
22static struct ipv6hdr *
23synproxy_build_ip(struct sk_buff *skb, const struct in6_addr *saddr,
24 const struct in6_addr *daddr)
25{
26 struct ipv6hdr *iph;
27
28 skb_reset_network_header(skb);
29 iph = (struct ipv6hdr *)skb_put(skb, sizeof(*iph));
30 ip6_flow_hdr(iph, 0, 0);
31 iph->hop_limit = 64; //XXX
32 iph->nexthdr = IPPROTO_TCP;
33 iph->saddr = *saddr;
34 iph->daddr = *daddr;
35
36 return iph;
37}
38
39static void
40synproxy_send_tcp(const struct sk_buff *skb, struct sk_buff *nskb,
41 struct nf_conntrack *nfct, enum ip_conntrack_info ctinfo,
42 struct ipv6hdr *niph, struct tcphdr *nth,
43 unsigned int tcp_hdr_size)
44{
45 struct net *net = nf_ct_net((struct nf_conn *)nfct);
46 struct dst_entry *dst;
47 struct flowi6 fl6;
48
49 nth->check = ~tcp_v6_check(tcp_hdr_size, &niph->saddr, &niph->daddr, 0);
50 nskb->ip_summed = CHECKSUM_PARTIAL;
51 nskb->csum_start = (unsigned char *)nth - nskb->head;
52 nskb->csum_offset = offsetof(struct tcphdr, check);
53
54 memset(&fl6, 0, sizeof(fl6));
55 fl6.flowi6_proto = IPPROTO_TCP;
56 fl6.saddr = niph->saddr;
57 fl6.daddr = niph->daddr;
58 fl6.fl6_sport = nth->source;
59 fl6.fl6_dport = nth->dest;
60 security_skb_classify_flow((struct sk_buff *)skb, flowi6_to_flowi(&fl6));
61 dst = ip6_route_output(net, NULL, &fl6);
62 if (dst == NULL || dst->error) {
63 dst_release(dst);
64 goto free_nskb;
65 }
66 dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0);
67 if (IS_ERR(dst))
68 goto free_nskb;
69
70 skb_dst_set(nskb, dst);
71
72 if (nfct) {
73 nskb->nfct = nfct;
74 nskb->nfctinfo = ctinfo;
75 nf_conntrack_get(nfct);
76 }
77
78 ip6_local_out(nskb);
79 return;
80
81free_nskb:
82 kfree_skb(nskb);
83}
84
85static void
86synproxy_send_client_synack(const struct sk_buff *skb, const struct tcphdr *th,
87 const struct synproxy_options *opts)
88{
89 struct sk_buff *nskb;
90 struct ipv6hdr *iph, *niph;
91 struct tcphdr *nth;
92 unsigned int tcp_hdr_size;
93 u16 mss = opts->mss;
94
95 iph = ipv6_hdr(skb);
96
97 tcp_hdr_size = sizeof(*nth) + synproxy_options_size(opts);
98 nskb = alloc_skb(sizeof(*niph) + tcp_hdr_size + MAX_TCP_HEADER,
99 GFP_ATOMIC);
100 if (nskb == NULL)
101 return;
102 skb_reserve(nskb, MAX_TCP_HEADER);
103
104 niph = synproxy_build_ip(nskb, &iph->daddr, &iph->saddr);
105
106 skb_reset_transport_header(nskb);
107 nth = (struct tcphdr *)skb_put(nskb, tcp_hdr_size);
108 nth->source = th->dest;
109 nth->dest = th->source;
110 nth->seq = htonl(__cookie_v6_init_sequence(iph, th, &mss));
111 nth->ack_seq = htonl(ntohl(th->seq) + 1);
112 tcp_flag_word(nth) = TCP_FLAG_SYN | TCP_FLAG_ACK;
113 if (opts->options & XT_SYNPROXY_OPT_ECN)
114 tcp_flag_word(nth) |= TCP_FLAG_ECE;
115 nth->doff = tcp_hdr_size / 4;
116 nth->window = 0;
117 nth->check = 0;
118 nth->urg_ptr = 0;
119
120 synproxy_build_options(nth, opts);
121
122 synproxy_send_tcp(skb, nskb, skb->nfct, IP_CT_ESTABLISHED_REPLY,
123 niph, nth, tcp_hdr_size);
124}
125
126static void
127synproxy_send_server_syn(const struct synproxy_net *snet,
128 const struct sk_buff *skb, const struct tcphdr *th,
129 const struct synproxy_options *opts, u32 recv_seq)
130{
131 struct sk_buff *nskb;
132 struct ipv6hdr *iph, *niph;
133 struct tcphdr *nth;
134 unsigned int tcp_hdr_size;
135
136 iph = ipv6_hdr(skb);
137
138 tcp_hdr_size = sizeof(*nth) + synproxy_options_size(opts);
139 nskb = alloc_skb(sizeof(*niph) + tcp_hdr_size + MAX_TCP_HEADER,
140 GFP_ATOMIC);
141 if (nskb == NULL)
142 return;
143 skb_reserve(nskb, MAX_TCP_HEADER);
144
145 niph = synproxy_build_ip(nskb, &iph->saddr, &iph->daddr);
146
147 skb_reset_transport_header(nskb);
148 nth = (struct tcphdr *)skb_put(nskb, tcp_hdr_size);
149 nth->source = th->source;
150 nth->dest = th->dest;
151 nth->seq = htonl(recv_seq - 1);
152 /* ack_seq is used to relay our ISN to the synproxy hook to initialize
153 * sequence number translation once a connection tracking entry exists.
154 */
155 nth->ack_seq = htonl(ntohl(th->ack_seq) - 1);
156 tcp_flag_word(nth) = TCP_FLAG_SYN;
157 if (opts->options & XT_SYNPROXY_OPT_ECN)
158 tcp_flag_word(nth) |= TCP_FLAG_ECE | TCP_FLAG_CWR;
159 nth->doff = tcp_hdr_size / 4;
160 nth->window = th->window;
161 nth->check = 0;
162 nth->urg_ptr = 0;
163
164 synproxy_build_options(nth, opts);
165
166 synproxy_send_tcp(skb, nskb, &snet->tmpl->ct_general, IP_CT_NEW,
167 niph, nth, tcp_hdr_size);
168}
169
170static void
171synproxy_send_server_ack(const struct synproxy_net *snet,
172 const struct ip_ct_tcp *state,
173 const struct sk_buff *skb, const struct tcphdr *th,
174 const struct synproxy_options *opts)
175{
176 struct sk_buff *nskb;
177 struct ipv6hdr *iph, *niph;
178 struct tcphdr *nth;
179 unsigned int tcp_hdr_size;
180
181 iph = ipv6_hdr(skb);
182
183 tcp_hdr_size = sizeof(*nth) + synproxy_options_size(opts);
184 nskb = alloc_skb(sizeof(*niph) + tcp_hdr_size + MAX_TCP_HEADER,
185 GFP_ATOMIC);
186 if (nskb == NULL)
187 return;
188 skb_reserve(nskb, MAX_TCP_HEADER);
189
190 niph = synproxy_build_ip(nskb, &iph->daddr, &iph->saddr);
191
192 skb_reset_transport_header(nskb);
193 nth = (struct tcphdr *)skb_put(nskb, tcp_hdr_size);
194 nth->source = th->dest;
195 nth->dest = th->source;
196 nth->seq = htonl(ntohl(th->ack_seq));
197 nth->ack_seq = htonl(ntohl(th->seq) + 1);
198 tcp_flag_word(nth) = TCP_FLAG_ACK;
199 nth->doff = tcp_hdr_size / 4;
200 nth->window = htons(state->seen[IP_CT_DIR_ORIGINAL].td_maxwin);
201 nth->check = 0;
202 nth->urg_ptr = 0;
203
204 synproxy_build_options(nth, opts);
205
206 synproxy_send_tcp(skb, nskb, NULL, 0, niph, nth, tcp_hdr_size);
207}
208
209static void
210synproxy_send_client_ack(const struct synproxy_net *snet,
211 const struct sk_buff *skb, const struct tcphdr *th,
212 const struct synproxy_options *opts)
213{
214 struct sk_buff *nskb;
215 struct ipv6hdr *iph, *niph;
216 struct tcphdr *nth;
217 unsigned int tcp_hdr_size;
218
219 iph = ipv6_hdr(skb);
220
221 tcp_hdr_size = sizeof(*nth) + synproxy_options_size(opts);
222 nskb = alloc_skb(sizeof(*niph) + tcp_hdr_size + MAX_TCP_HEADER,
223 GFP_ATOMIC);
224 if (nskb == NULL)
225 return;
226 skb_reserve(nskb, MAX_TCP_HEADER);
227
228 niph = synproxy_build_ip(nskb, &iph->saddr, &iph->daddr);
229
230 skb_reset_transport_header(nskb);
231 nth = (struct tcphdr *)skb_put(nskb, tcp_hdr_size);
232 nth->source = th->source;
233 nth->dest = th->dest;
234 nth->seq = htonl(ntohl(th->seq) + 1);
235 nth->ack_seq = th->ack_seq;
236 tcp_flag_word(nth) = TCP_FLAG_ACK;
237 nth->doff = tcp_hdr_size / 4;
238 nth->window = ntohs(htons(th->window) >> opts->wscale);
239 nth->check = 0;
240 nth->urg_ptr = 0;
241
242 synproxy_build_options(nth, opts);
243
244 synproxy_send_tcp(skb, nskb, NULL, 0, niph, nth, tcp_hdr_size);
245}
246
247static bool
248synproxy_recv_client_ack(const struct synproxy_net *snet,
249 const struct sk_buff *skb, const struct tcphdr *th,
250 struct synproxy_options *opts, u32 recv_seq)
251{
252 int mss;
253
254 mss = __cookie_v6_check(ipv6_hdr(skb), th, ntohl(th->ack_seq) - 1);
255 if (mss == 0) {
256 this_cpu_inc(snet->stats->cookie_invalid);
257 return false;
258 }
259
260 this_cpu_inc(snet->stats->cookie_valid);
261 opts->mss = mss;
262
263 if (opts->options & XT_SYNPROXY_OPT_TIMESTAMP)
264 synproxy_check_timestamp_cookie(opts);
265
266 synproxy_send_server_syn(snet, skb, th, opts, recv_seq);
267 return true;
268}
269
270static unsigned int
271synproxy_tg6(struct sk_buff *skb, const struct xt_action_param *par)
272{
273 const struct xt_synproxy_info *info = par->targinfo;
274 struct synproxy_net *snet = synproxy_pernet(dev_net(par->in));
275 struct synproxy_options opts = {};
276 struct tcphdr *th, _th;
277
278 if (nf_ip6_checksum(skb, par->hooknum, par->thoff, IPPROTO_TCP))
279 return NF_DROP;
280
281 th = skb_header_pointer(skb, par->thoff, sizeof(_th), &_th);
282 if (th == NULL)
283 return NF_DROP;
284
285 synproxy_parse_options(skb, par->thoff, th, &opts);
286
287 if (th->syn && !(th->ack || th->fin || th->rst)) {
288 /* Initial SYN from client */
289 this_cpu_inc(snet->stats->syn_received);
290
291 if (th->ece && th->cwr)
292 opts.options |= XT_SYNPROXY_OPT_ECN;
293
294 opts.options &= info->options;
295 if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP)
296 synproxy_init_timestamp_cookie(info, &opts);
297 else
298 opts.options &= ~(XT_SYNPROXY_OPT_WSCALE |
299 XT_SYNPROXY_OPT_SACK_PERM |
300 XT_SYNPROXY_OPT_ECN);
301
302 synproxy_send_client_synack(skb, th, &opts);
303 return NF_DROP;
304
305 } else if (th->ack && !(th->fin || th->rst || th->syn)) {
306 /* ACK from client */
307 synproxy_recv_client_ack(snet, skb, th, &opts, ntohl(th->seq));
308 return NF_DROP;
309 }
310
311 return XT_CONTINUE;
312}
313
314static unsigned int ipv6_synproxy_hook(unsigned int hooknum,
315 struct sk_buff *skb,
316 const struct net_device *in,
317 const struct net_device *out,
318 int (*okfn)(struct sk_buff *))
319{
320 struct synproxy_net *snet = synproxy_pernet(dev_net(in ? : out));
321 enum ip_conntrack_info ctinfo;
322 struct nf_conn *ct;
323 struct nf_conn_synproxy *synproxy;
324 struct synproxy_options opts = {};
325 const struct ip_ct_tcp *state;
326 struct tcphdr *th, _th;
327 __be16 frag_off;
328 u8 nexthdr;
329 int thoff;
330
331 ct = nf_ct_get(skb, &ctinfo);
332 if (ct == NULL)
333 return NF_ACCEPT;
334
335 synproxy = nfct_synproxy(ct);
336 if (synproxy == NULL)
337 return NF_ACCEPT;
338
339 if (nf_is_loopback_packet(skb))
340 return NF_ACCEPT;
341
342 nexthdr = ipv6_hdr(skb)->nexthdr;
343 thoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr,
344 &frag_off);
345 if (thoff < 0)
346 return NF_ACCEPT;
347
348 th = skb_header_pointer(skb, thoff, sizeof(_th), &_th);
349 if (th == NULL)
350 return NF_DROP;
351
352 state = &ct->proto.tcp;
353 switch (state->state) {
354 case TCP_CONNTRACK_CLOSE:
355 if (th->rst && !test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
356 nf_ct_seqadj_init(ct, ctinfo, synproxy->isn -
357 ntohl(th->seq) + 1);
358 break;
359 }
360
361 if (!th->syn || th->ack ||
362 CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL)
363 break;
364
365 /* Reopened connection - reset the sequence number and timestamp
366 * adjustments, they will get initialized once the connection is
367 * reestablished.
368 */
369 nf_ct_seqadj_init(ct, ctinfo, 0);
370 synproxy->tsoff = 0;
371 this_cpu_inc(snet->stats->conn_reopened);
372
373 /* fall through */
374 case TCP_CONNTRACK_SYN_SENT:
375 synproxy_parse_options(skb, thoff, th, &opts);
376
377 if (!th->syn && th->ack &&
378 CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) {
379 /* Keep-Alives are sent with SEG.SEQ = SND.NXT-1,
380 * therefore we need to add 1 to make the SYN sequence
381 * number match the one of first SYN.
382 */
383 if (synproxy_recv_client_ack(snet, skb, th, &opts,
384 ntohl(th->seq) + 1))
385 this_cpu_inc(snet->stats->cookie_retrans);
386
387 return NF_DROP;
388 }
389
390 synproxy->isn = ntohl(th->ack_seq);
391 if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP)
392 synproxy->its = opts.tsecr;
393 break;
394 case TCP_CONNTRACK_SYN_RECV:
395 if (!th->syn || !th->ack)
396 break;
397
398 synproxy_parse_options(skb, thoff, th, &opts);
399 if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP)
400 synproxy->tsoff = opts.tsval - synproxy->its;
401
402 opts.options &= ~(XT_SYNPROXY_OPT_MSS |
403 XT_SYNPROXY_OPT_WSCALE |
404 XT_SYNPROXY_OPT_SACK_PERM);
405
406 swap(opts.tsval, opts.tsecr);
407 synproxy_send_server_ack(snet, state, skb, th, &opts);
408
409 nf_ct_seqadj_init(ct, ctinfo, synproxy->isn - ntohl(th->seq));
410
411 swap(opts.tsval, opts.tsecr);
412 synproxy_send_client_ack(snet, skb, th, &opts);
413
414 consume_skb(skb);
415 return NF_STOLEN;
416 default:
417 break;
418 }
419
420 synproxy_tstamp_adjust(skb, thoff, th, ct, ctinfo, synproxy);
421 return NF_ACCEPT;
422}
423
424static int synproxy_tg6_check(const struct xt_tgchk_param *par)
425{
426 const struct ip6t_entry *e = par->entryinfo;
427
428 if (!(e->ipv6.flags & IP6T_F_PROTO) ||
429 e->ipv6.proto != IPPROTO_TCP ||
430 e->ipv6.invflags & XT_INV_PROTO)
431 return -EINVAL;
432
433 return nf_ct_l3proto_try_module_get(par->family);
434}
435
436static void synproxy_tg6_destroy(const struct xt_tgdtor_param *par)
437{
438 nf_ct_l3proto_module_put(par->family);
439}
440
441static struct xt_target synproxy_tg6_reg __read_mostly = {
442 .name = "SYNPROXY",
443 .family = NFPROTO_IPV6,
444 .target = synproxy_tg6,
445 .targetsize = sizeof(struct xt_synproxy_info),
446 .checkentry = synproxy_tg6_check,
447 .destroy = synproxy_tg6_destroy,
448 .me = THIS_MODULE,
449};
450
451static struct nf_hook_ops ipv6_synproxy_ops[] __read_mostly = {
452 {
453 .hook = ipv6_synproxy_hook,
454 .owner = THIS_MODULE,
455 .pf = NFPROTO_IPV6,
456 .hooknum = NF_INET_LOCAL_IN,
457 .priority = NF_IP_PRI_CONNTRACK_CONFIRM - 1,
458 },
459 {
460 .hook = ipv6_synproxy_hook,
461 .owner = THIS_MODULE,
462 .pf = NFPROTO_IPV6,
463 .hooknum = NF_INET_POST_ROUTING,
464 .priority = NF_IP_PRI_CONNTRACK_CONFIRM - 1,
465 },
466};
467
468static int __init synproxy_tg6_init(void)
469{
470 int err;
471
472 err = nf_register_hooks(ipv6_synproxy_ops,
473 ARRAY_SIZE(ipv6_synproxy_ops));
474 if (err < 0)
475 goto err1;
476
477 err = xt_register_target(&synproxy_tg6_reg);
478 if (err < 0)
479 goto err2;
480
481 return 0;
482
483err2:
484 nf_unregister_hooks(ipv6_synproxy_ops, ARRAY_SIZE(ipv6_synproxy_ops));
485err1:
486 return err;
487}
488
489static void __exit synproxy_tg6_exit(void)
490{
491 xt_unregister_target(&synproxy_tg6_reg);
492 nf_unregister_hooks(ipv6_synproxy_ops, ARRAY_SIZE(ipv6_synproxy_ops));
493}
494
495module_init(synproxy_tg6_init);
496module_exit(synproxy_tg6_exit);
497
498MODULE_LICENSE("GPL");
499MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c
index beb5777d2043..29b44b14c5ea 100644
--- a/net/ipv6/netfilter/ip6table_filter.c
+++ b/net/ipv6/netfilter/ip6table_filter.c
@@ -61,7 +61,7 @@ static int __net_init ip6table_filter_net_init(struct net *net)
61 net->ipv6.ip6table_filter = 61 net->ipv6.ip6table_filter =
62 ip6t_register_table(net, &packet_filter, repl); 62 ip6t_register_table(net, &packet_filter, repl);
63 kfree(repl); 63 kfree(repl);
64 return PTR_RET(net->ipv6.ip6table_filter); 64 return PTR_ERR_OR_ZERO(net->ipv6.ip6table_filter);
65} 65}
66 66
67static void __net_exit ip6table_filter_net_exit(struct net *net) 67static void __net_exit ip6table_filter_net_exit(struct net *net)
diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c
index e075399d8b72..c705907ae6ab 100644
--- a/net/ipv6/netfilter/ip6table_mangle.c
+++ b/net/ipv6/netfilter/ip6table_mangle.c
@@ -101,7 +101,7 @@ static int __net_init ip6table_mangle_net_init(struct net *net)
101 net->ipv6.ip6table_mangle = 101 net->ipv6.ip6table_mangle =
102 ip6t_register_table(net, &packet_mangler, repl); 102 ip6t_register_table(net, &packet_mangler, repl);
103 kfree(repl); 103 kfree(repl);
104 return PTR_RET(net->ipv6.ip6table_mangle); 104 return PTR_ERR_OR_ZERO(net->ipv6.ip6table_mangle);
105} 105}
106 106
107static void __net_exit ip6table_mangle_net_exit(struct net *net) 107static void __net_exit ip6table_mangle_net_exit(struct net *net)
diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c
index 6383f90efda8..9b076d2d3a7b 100644
--- a/net/ipv6/netfilter/ip6table_nat.c
+++ b/net/ipv6/netfilter/ip6table_nat.c
@@ -293,7 +293,7 @@ static int __net_init ip6table_nat_net_init(struct net *net)
293 return -ENOMEM; 293 return -ENOMEM;
294 net->ipv6.ip6table_nat = ip6t_register_table(net, &nf_nat_ipv6_table, repl); 294 net->ipv6.ip6table_nat = ip6t_register_table(net, &nf_nat_ipv6_table, repl);
295 kfree(repl); 295 kfree(repl);
296 return PTR_RET(net->ipv6.ip6table_nat); 296 return PTR_ERR_OR_ZERO(net->ipv6.ip6table_nat);
297} 297}
298 298
299static void __net_exit ip6table_nat_net_exit(struct net *net) 299static void __net_exit ip6table_nat_net_exit(struct net *net)
diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c
index 60d1bddff7a0..9a626d86720f 100644
--- a/net/ipv6/netfilter/ip6table_raw.c
+++ b/net/ipv6/netfilter/ip6table_raw.c
@@ -40,7 +40,7 @@ static int __net_init ip6table_raw_net_init(struct net *net)
40 net->ipv6.ip6table_raw = 40 net->ipv6.ip6table_raw =
41 ip6t_register_table(net, &packet_raw, repl); 41 ip6t_register_table(net, &packet_raw, repl);
42 kfree(repl); 42 kfree(repl);
43 return PTR_RET(net->ipv6.ip6table_raw); 43 return PTR_ERR_OR_ZERO(net->ipv6.ip6table_raw);
44} 44}
45 45
46static void __net_exit ip6table_raw_net_exit(struct net *net) 46static void __net_exit ip6table_raw_net_exit(struct net *net)
diff --git a/net/ipv6/netfilter/ip6table_security.c b/net/ipv6/netfilter/ip6table_security.c
index db155351339c..ce88d1d7e525 100644
--- a/net/ipv6/netfilter/ip6table_security.c
+++ b/net/ipv6/netfilter/ip6table_security.c
@@ -58,7 +58,7 @@ static int __net_init ip6table_security_net_init(struct net *net)
58 net->ipv6.ip6table_security = 58 net->ipv6.ip6table_security =
59 ip6t_register_table(net, &security_table, repl); 59 ip6t_register_table(net, &security_table, repl);
60 kfree(repl); 60 kfree(repl);
61 return PTR_RET(net->ipv6.ip6table_security); 61 return PTR_ERR_OR_ZERO(net->ipv6.ip6table_security);
62} 62}
63 63
64static void __net_exit ip6table_security_net_exit(struct net *net) 64static void __net_exit ip6table_security_net_exit(struct net *net)
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index c9b6a6e6a1e8..d6e4dd8b58df 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -28,6 +28,7 @@
28#include <net/netfilter/nf_conntrack_l3proto.h> 28#include <net/netfilter/nf_conntrack_l3proto.h>
29#include <net/netfilter/nf_conntrack_core.h> 29#include <net/netfilter/nf_conntrack_core.h>
30#include <net/netfilter/nf_conntrack_zones.h> 30#include <net/netfilter/nf_conntrack_zones.h>
31#include <net/netfilter/nf_conntrack_seqadj.h>
31#include <net/netfilter/ipv6/nf_conntrack_ipv6.h> 32#include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
32#include <net/netfilter/nf_nat_helper.h> 33#include <net/netfilter/nf_nat_helper.h>
33#include <net/netfilter/ipv6/nf_defrag_ipv6.h> 34#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
@@ -158,11 +159,7 @@ static unsigned int ipv6_confirm(unsigned int hooknum,
158 /* adjust seqs for loopback traffic only in outgoing direction */ 159 /* adjust seqs for loopback traffic only in outgoing direction */
159 if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status) && 160 if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status) &&
160 !nf_is_loopback_packet(skb)) { 161 !nf_is_loopback_packet(skb)) {
161 typeof(nf_nat_seq_adjust_hook) seq_adjust; 162 if (!nf_ct_seq_adjust(skb, ct, ctinfo, protoff)) {
162
163 seq_adjust = rcu_dereference(nf_nat_seq_adjust_hook);
164 if (!seq_adjust ||
165 !seq_adjust(skb, ct, ctinfo, protoff)) {
166 NF_CT_STAT_INC_ATOMIC(nf_ct_net(ct), drop); 163 NF_CT_STAT_INC_ATOMIC(nf_ct_net(ct), drop);
167 return NF_DROP; 164 return NF_DROP;
168 } 165 }
diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c
index ab92a3673fbb..827f795209cf 100644
--- a/net/ipv6/output_core.c
+++ b/net/ipv6/output_core.c
@@ -5,6 +5,7 @@
5#include <linux/export.h> 5#include <linux/export.h>
6#include <net/ipv6.h> 6#include <net/ipv6.h>
7#include <net/ip6_fib.h> 7#include <net/ip6_fib.h>
8#include <net/addrconf.h>
8 9
9void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt) 10void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt)
10{ 11{
@@ -75,3 +76,50 @@ int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
75 return offset; 76 return offset;
76} 77}
77EXPORT_SYMBOL(ip6_find_1stfragopt); 78EXPORT_SYMBOL(ip6_find_1stfragopt);
79
80#if IS_ENABLED(CONFIG_IPV6)
81int ip6_dst_hoplimit(struct dst_entry *dst)
82{
83 int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
84 if (hoplimit == 0) {
85 struct net_device *dev = dst->dev;
86 struct inet6_dev *idev;
87
88 rcu_read_lock();
89 idev = __in6_dev_get(dev);
90 if (idev)
91 hoplimit = idev->cnf.hop_limit;
92 else
93 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
94 rcu_read_unlock();
95 }
96 return hoplimit;
97}
98EXPORT_SYMBOL(ip6_dst_hoplimit);
99#endif
100
101int __ip6_local_out(struct sk_buff *skb)
102{
103 int len;
104
105 len = skb->len - sizeof(struct ipv6hdr);
106 if (len > IPV6_MAXPLEN)
107 len = 0;
108 ipv6_hdr(skb)->payload_len = htons(len);
109
110 return nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL,
111 skb_dst(skb)->dev, dst_output);
112}
113EXPORT_SYMBOL_GPL(__ip6_local_out);
114
115int ip6_local_out(struct sk_buff *skb)
116{
117 int err;
118
119 err = __ip6_local_out(skb);
120 if (likely(err == 1))
121 err = dst_output(skb);
122
123 return err;
124}
125EXPORT_SYMBOL_GPL(ip6_local_out);
diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index 51c3285b5d9b..091d066a57b3 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -91,6 +91,10 @@ static const struct snmp_mib snmp6_ipstats_list[] = {
91 SNMP_MIB_ITEM("Ip6InBcastOctets", IPSTATS_MIB_INBCASTOCTETS), 91 SNMP_MIB_ITEM("Ip6InBcastOctets", IPSTATS_MIB_INBCASTOCTETS),
92 SNMP_MIB_ITEM("Ip6OutBcastOctets", IPSTATS_MIB_OUTBCASTOCTETS), 92 SNMP_MIB_ITEM("Ip6OutBcastOctets", IPSTATS_MIB_OUTBCASTOCTETS),
93 /* IPSTATS_MIB_CSUMERRORS is not relevant in IPv6 (no checksum) */ 93 /* IPSTATS_MIB_CSUMERRORS is not relevant in IPv6 (no checksum) */
94 SNMP_MIB_ITEM("Ip6InNoECTPkts", IPSTATS_MIB_NOECTPKTS),
95 SNMP_MIB_ITEM("Ip6InECT1Pkts", IPSTATS_MIB_ECT1PKTS),
96 SNMP_MIB_ITEM("Ip6InECT0Pkts", IPSTATS_MIB_ECT0PKTS),
97 SNMP_MIB_ITEM("Ip6InCEPkts", IPSTATS_MIB_CEPKTS),
94 SNMP_MIB_SENTINEL 98 SNMP_MIB_SENTINEL
95}; 99};
96 100
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index cdaed47ba932..58916bbb1728 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -63,6 +63,8 @@
63#include <linux/seq_file.h> 63#include <linux/seq_file.h>
64#include <linux/export.h> 64#include <linux/export.h>
65 65
66#define ICMPV6_HDRLEN 4 /* ICMPv6 header, RFC 4443 Section 2.1 */
67
66static struct raw_hashinfo raw_v6_hashinfo = { 68static struct raw_hashinfo raw_v6_hashinfo = {
67 .lock = __RW_LOCK_UNLOCKED(raw_v6_hashinfo.lock), 69 .lock = __RW_LOCK_UNLOCKED(raw_v6_hashinfo.lock),
68}; 70};
@@ -108,11 +110,14 @@ found:
108 */ 110 */
109static int icmpv6_filter(const struct sock *sk, const struct sk_buff *skb) 111static int icmpv6_filter(const struct sock *sk, const struct sk_buff *skb)
110{ 112{
111 struct icmp6hdr *_hdr; 113 struct icmp6hdr _hdr;
112 const struct icmp6hdr *hdr; 114 const struct icmp6hdr *hdr;
113 115
116 /* We require only the four bytes of the ICMPv6 header, not any
117 * additional bytes of message body in "struct icmp6hdr".
118 */
114 hdr = skb_header_pointer(skb, skb_transport_offset(skb), 119 hdr = skb_header_pointer(skb, skb_transport_offset(skb),
115 sizeof(_hdr), &_hdr); 120 ICMPV6_HDRLEN, &_hdr);
116 if (hdr) { 121 if (hdr) {
117 const __u32 *data = &raw6_sk(sk)->filter.data[0]; 122 const __u32 *data = &raw6_sk(sk)->filter.data[0];
118 unsigned int type = hdr->icmp6_type; 123 unsigned int type = hdr->icmp6_type;
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 8d9a93ed9c59..c979dd96d82a 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -283,9 +283,8 @@ static inline struct rt6_info *ip6_dst_alloc(struct net *net,
283 283
284 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst)); 284 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
285 rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers); 285 rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers);
286 rt->rt6i_genid = rt_genid(net); 286 rt->rt6i_genid = rt_genid_ipv6(net);
287 INIT_LIST_HEAD(&rt->rt6i_siblings); 287 INIT_LIST_HEAD(&rt->rt6i_siblings);
288 rt->rt6i_nsiblings = 0;
289 } 288 }
290 return rt; 289 return rt;
291} 290}
@@ -1062,7 +1061,7 @@ static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1062 * DST_OBSOLETE_FORCE_CHK which forces validation calls down 1061 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1063 * into this function always. 1062 * into this function always.
1064 */ 1063 */
1065 if (rt->rt6i_genid != rt_genid(dev_net(rt->dst.dev))) 1064 if (rt->rt6i_genid != rt_genid_ipv6(dev_net(rt->dst.dev)))
1066 return NULL; 1065 return NULL;
1067 1066
1068 if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) 1067 if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
@@ -1157,6 +1156,77 @@ void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1157} 1156}
1158EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu); 1157EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1159 1158
1159/* Handle redirects */
1160struct ip6rd_flowi {
1161 struct flowi6 fl6;
1162 struct in6_addr gateway;
1163};
1164
1165static struct rt6_info *__ip6_route_redirect(struct net *net,
1166 struct fib6_table *table,
1167 struct flowi6 *fl6,
1168 int flags)
1169{
1170 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1171 struct rt6_info *rt;
1172 struct fib6_node *fn;
1173
1174 /* Get the "current" route for this destination and
1175 * check if the redirect has come from approriate router.
1176 *
1177 * RFC 4861 specifies that redirects should only be
1178 * accepted if they come from the nexthop to the target.
1179 * Due to the way the routes are chosen, this notion
1180 * is a bit fuzzy and one might need to check all possible
1181 * routes.
1182 */
1183
1184 read_lock_bh(&table->tb6_lock);
1185 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1186restart:
1187 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1188 if (rt6_check_expired(rt))
1189 continue;
1190 if (rt->dst.error)
1191 break;
1192 if (!(rt->rt6i_flags & RTF_GATEWAY))
1193 continue;
1194 if (fl6->flowi6_oif != rt->dst.dev->ifindex)
1195 continue;
1196 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1197 continue;
1198 break;
1199 }
1200
1201 if (!rt)
1202 rt = net->ipv6.ip6_null_entry;
1203 else if (rt->dst.error) {
1204 rt = net->ipv6.ip6_null_entry;
1205 goto out;
1206 }
1207 BACKTRACK(net, &fl6->saddr);
1208out:
1209 dst_hold(&rt->dst);
1210
1211 read_unlock_bh(&table->tb6_lock);
1212
1213 return rt;
1214};
1215
1216static struct dst_entry *ip6_route_redirect(struct net *net,
1217 const struct flowi6 *fl6,
1218 const struct in6_addr *gateway)
1219{
1220 int flags = RT6_LOOKUP_F_HAS_SADDR;
1221 struct ip6rd_flowi rdfl;
1222
1223 rdfl.fl6 = *fl6;
1224 rdfl.gateway = *gateway;
1225
1226 return fib6_rule_lookup(net, &rdfl.fl6,
1227 flags, __ip6_route_redirect);
1228}
1229
1160void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark) 1230void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
1161{ 1231{
1162 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data; 1232 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
@@ -1171,9 +1241,8 @@ void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
1171 fl6.saddr = iph->saddr; 1241 fl6.saddr = iph->saddr;
1172 fl6.flowlabel = ip6_flowinfo(iph); 1242 fl6.flowlabel = ip6_flowinfo(iph);
1173 1243
1174 dst = ip6_route_output(net, NULL, &fl6); 1244 dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr);
1175 if (!dst->error) 1245 rt6_do_redirect(dst, NULL, skb);
1176 rt6_do_redirect(dst, NULL, skb);
1177 dst_release(dst); 1246 dst_release(dst);
1178} 1247}
1179EXPORT_SYMBOL_GPL(ip6_redirect); 1248EXPORT_SYMBOL_GPL(ip6_redirect);
@@ -1193,9 +1262,8 @@ void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
1193 fl6.daddr = msg->dest; 1262 fl6.daddr = msg->dest;
1194 fl6.saddr = iph->daddr; 1263 fl6.saddr = iph->daddr;
1195 1264
1196 dst = ip6_route_output(net, NULL, &fl6); 1265 dst = ip6_route_redirect(net, &fl6, &iph->saddr);
1197 if (!dst->error) 1266 rt6_do_redirect(dst, NULL, skb);
1198 rt6_do_redirect(dst, NULL, skb);
1199 dst_release(dst); 1267 dst_release(dst);
1200} 1268}
1201 1269
@@ -1355,25 +1423,6 @@ out:
1355 return entries > rt_max_size; 1423 return entries > rt_max_size;
1356} 1424}
1357 1425
1358int ip6_dst_hoplimit(struct dst_entry *dst)
1359{
1360 int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
1361 if (hoplimit == 0) {
1362 struct net_device *dev = dst->dev;
1363 struct inet6_dev *idev;
1364
1365 rcu_read_lock();
1366 idev = __in6_dev_get(dev);
1367 if (idev)
1368 hoplimit = idev->cnf.hop_limit;
1369 else
1370 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
1371 rcu_read_unlock();
1372 }
1373 return hoplimit;
1374}
1375EXPORT_SYMBOL(ip6_dst_hoplimit);
1376
1377/* 1426/*
1378 * 1427 *
1379 */ 1428 */
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 21b25dd8466b..7ee5cb96db34 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -581,12 +581,10 @@ static int ipip6_rcv(struct sk_buff *skb)
581 tunnel->parms.iph.protocol != 0) 581 tunnel->parms.iph.protocol != 0)
582 goto out; 582 goto out;
583 583
584 secpath_reset(skb);
585 skb->mac_header = skb->network_header; 584 skb->mac_header = skb->network_header;
586 skb_reset_network_header(skb); 585 skb_reset_network_header(skb);
587 IPCB(skb)->flags = 0; 586 IPCB(skb)->flags = 0;
588 skb->protocol = htons(ETH_P_IPV6); 587 skb->protocol = htons(ETH_P_IPV6);
589 skb->pkt_type = PACKET_HOST;
590 588
591 if (tunnel->dev->priv_flags & IFF_ISATAP) { 589 if (tunnel->dev->priv_flags & IFF_ISATAP) {
592 if (!isatap_chksrc(skb, iph, tunnel)) { 590 if (!isatap_chksrc(skb, iph, tunnel)) {
@@ -603,7 +601,7 @@ static int ipip6_rcv(struct sk_buff *skb)
603 } 601 }
604 } 602 }
605 603
606 __skb_tunnel_rx(skb, tunnel->dev); 604 __skb_tunnel_rx(skb, tunnel->dev, tunnel->net);
607 605
608 err = IP_ECN_decapsulate(iph, skb); 606 err = IP_ECN_decapsulate(iph, skb);
609 if (unlikely(err)) { 607 if (unlikely(err)) {
@@ -621,8 +619,6 @@ static int ipip6_rcv(struct sk_buff *skb)
621 tstats->rx_packets++; 619 tstats->rx_packets++;
622 tstats->rx_bytes += skb->len; 620 tstats->rx_bytes += skb->len;
623 621
624 if (tunnel->net != dev_net(tunnel->dev))
625 skb_scrub_packet(skb);
626 netif_rx(skb); 622 netif_rx(skb);
627 623
628 return 0; 624 return 0;
@@ -858,9 +854,6 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
858 tunnel->err_count = 0; 854 tunnel->err_count = 0;
859 } 855 }
860 856
861 if (tunnel->net != dev_net(dev))
862 skb_scrub_packet(skb);
863
864 /* 857 /*
865 * Okay, now see if we can stuff it in the buffer as-is. 858 * Okay, now see if we can stuff it in the buffer as-is.
866 */ 859 */
@@ -891,8 +884,8 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
891 skb->encapsulation = 1; 884 skb->encapsulation = 1;
892 } 885 }
893 886
894 err = iptunnel_xmit(dev_net(dev), rt, skb, fl4.saddr, fl4.daddr, 887 err = iptunnel_xmit(rt, skb, fl4.saddr, fl4.daddr, IPPROTO_IPV6, tos,
895 IPPROTO_IPV6, tos, ttl, df); 888 ttl, df, !net_eq(tunnel->net, dev_net(dev)));
896 iptunnel_xmit_stats(err, &dev->stats, dev->tstats); 889 iptunnel_xmit_stats(err, &dev->stats, dev->tstats);
897 return NETDEV_TX_OK; 890 return NETDEV_TX_OK;
898 891
@@ -1592,7 +1585,7 @@ static void __net_exit sit_destroy_tunnels(struct sit_net *sitn, struct list_hea
1592 /* If dev is in the same netns, it has already 1585 /* If dev is in the same netns, it has already
1593 * been added to the list by the previous loop. 1586 * been added to the list by the previous loop.
1594 */ 1587 */
1595 if (dev_net(t->dev) != net) 1588 if (!net_eq(dev_net(t->dev), net))
1596 unregister_netdevice_queue(t->dev, 1589 unregister_netdevice_queue(t->dev,
1597 head); 1590 head);
1598 t = rtnl_dereference(t->next); 1591 t = rtnl_dereference(t->next);
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index d5dda20bd717..bf63ac8a49b9 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -112,32 +112,38 @@ static __u32 check_tcp_syn_cookie(__u32 cookie, const struct in6_addr *saddr,
112 & COOKIEMASK; 112 & COOKIEMASK;
113} 113}
114 114
115__u32 cookie_v6_init_sequence(struct sock *sk, const struct sk_buff *skb, __u16 *mssp) 115u32 __cookie_v6_init_sequence(const struct ipv6hdr *iph,
116 const struct tcphdr *th, __u16 *mssp)
116{ 117{
117 const struct ipv6hdr *iph = ipv6_hdr(skb);
118 const struct tcphdr *th = tcp_hdr(skb);
119 int mssind; 118 int mssind;
120 const __u16 mss = *mssp; 119 const __u16 mss = *mssp;
121 120
122 tcp_synq_overflow(sk);
123
124 for (mssind = ARRAY_SIZE(msstab) - 1; mssind ; mssind--) 121 for (mssind = ARRAY_SIZE(msstab) - 1; mssind ; mssind--)
125 if (mss >= msstab[mssind]) 122 if (mss >= msstab[mssind])
126 break; 123 break;
127 124
128 *mssp = msstab[mssind]; 125 *mssp = msstab[mssind];
129 126
130 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESSENT);
131
132 return secure_tcp_syn_cookie(&iph->saddr, &iph->daddr, th->source, 127 return secure_tcp_syn_cookie(&iph->saddr, &iph->daddr, th->source,
133 th->dest, ntohl(th->seq), 128 th->dest, ntohl(th->seq),
134 jiffies / (HZ * 60), mssind); 129 jiffies / (HZ * 60), mssind);
135} 130}
131EXPORT_SYMBOL_GPL(__cookie_v6_init_sequence);
136 132
137static inline int cookie_check(const struct sk_buff *skb, __u32 cookie) 133__u32 cookie_v6_init_sequence(struct sock *sk, const struct sk_buff *skb, __u16 *mssp)
138{ 134{
139 const struct ipv6hdr *iph = ipv6_hdr(skb); 135 const struct ipv6hdr *iph = ipv6_hdr(skb);
140 const struct tcphdr *th = tcp_hdr(skb); 136 const struct tcphdr *th = tcp_hdr(skb);
137
138 tcp_synq_overflow(sk);
139 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESSENT);
140
141 return __cookie_v6_init_sequence(iph, th, mssp);
142}
143
144int __cookie_v6_check(const struct ipv6hdr *iph, const struct tcphdr *th,
145 __u32 cookie)
146{
141 __u32 seq = ntohl(th->seq) - 1; 147 __u32 seq = ntohl(th->seq) - 1;
142 __u32 mssind = check_tcp_syn_cookie(cookie, &iph->saddr, &iph->daddr, 148 __u32 mssind = check_tcp_syn_cookie(cookie, &iph->saddr, &iph->daddr,
143 th->source, th->dest, seq, 149 th->source, th->dest, seq,
@@ -145,6 +151,7 @@ static inline int cookie_check(const struct sk_buff *skb, __u32 cookie)
145 151
146 return mssind < ARRAY_SIZE(msstab) ? msstab[mssind] : 0; 152 return mssind < ARRAY_SIZE(msstab) ? msstab[mssind] : 0;
147} 153}
154EXPORT_SYMBOL_GPL(__cookie_v6_check);
148 155
149struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) 156struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
150{ 157{
@@ -167,7 +174,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
167 goto out; 174 goto out;
168 175
169 if (tcp_synq_no_recent_overflow(sk) || 176 if (tcp_synq_no_recent_overflow(sk) ||
170 (mss = cookie_check(skb, cookie)) == 0) { 177 (mss = __cookie_v6_check(ipv6_hdr(skb), th, cookie)) == 0) {
171 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESFAILED); 178 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESFAILED);
172 goto out; 179 goto out;
173 } 180 }
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 6e1649d58533..5c71501fc917 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -963,7 +963,8 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
963 if (!ipv6_unicast_destination(skb)) 963 if (!ipv6_unicast_destination(skb))
964 goto drop; 964 goto drop;
965 965
966 if (inet_csk_reqsk_queue_is_full(sk) && !isn) { 966 if ((sysctl_tcp_syncookies == 2 ||
967 inet_csk_reqsk_queue_is_full(sk)) && !isn) {
967 want_cookie = tcp_syn_flood_action(sk, skb, "TCPv6"); 968 want_cookie = tcp_syn_flood_action(sk, skb, "TCPv6");
968 if (!want_cookie) 969 if (!want_cookie)
969 goto drop; 970 goto drop;
@@ -1237,8 +1238,6 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1237 newtp->advmss = tcp_sk(sk)->rx_opt.user_mss; 1238 newtp->advmss = tcp_sk(sk)->rx_opt.user_mss;
1238 1239
1239 tcp_initialize_rcv_mss(newsk); 1240 tcp_initialize_rcv_mss(newsk);
1240 tcp_synack_rtt_meas(newsk, req);
1241 newtp->total_retrans = req->num_retrans;
1242 1241
1243 newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6; 1242 newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
1244 newinet->inet_rcv_saddr = LOOPBACK4_IPV6; 1243 newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
@@ -1361,8 +1360,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1361 } 1360 }
1362 } 1361 }
1363 1362
1364 if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) 1363 tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len);
1365 goto reset;
1366 if (opt_skb) 1364 if (opt_skb)
1367 goto ipv6_pktoptions; 1365 goto ipv6_pktoptions;
1368 return 0; 1366 return 0;
@@ -1427,7 +1425,7 @@ ipv6_pktoptions:
1427 if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) 1425 if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
1428 np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit; 1426 np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
1429 if (np->rxopt.bits.rxtclass) 1427 if (np->rxopt.bits.rxtclass)
1430 np->rcv_tclass = ipv6_get_dsfield(ipv6_hdr(skb)); 1428 np->rcv_tclass = ipv6_get_dsfield(ipv6_hdr(opt_skb));
1431 if (ipv6_opt_accepted(sk, opt_skb)) { 1429 if (ipv6_opt_accepted(sk, opt_skb)) {
1432 skb_set_owner_r(opt_skb, sk); 1430 skb_set_owner_r(opt_skb, sk);
1433 opt_skb = xchg(&np->pktoptions, opt_skb); 1431 opt_skb = xchg(&np->pktoptions, opt_skb);
@@ -1732,7 +1730,7 @@ static void get_openreq6(struct seq_file *seq,
1732 1730
1733 seq_printf(seq, 1731 seq_printf(seq,
1734 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " 1732 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1735 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n", 1733 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %d %d %pK\n",
1736 i, 1734 i,
1737 src->s6_addr32[0], src->s6_addr32[1], 1735 src->s6_addr32[0], src->s6_addr32[1],
1738 src->s6_addr32[2], src->s6_addr32[3], 1736 src->s6_addr32[2], src->s6_addr32[3],
@@ -1783,7 +1781,7 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
1783 1781
1784 seq_printf(seq, 1782 seq_printf(seq,
1785 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " 1783 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1786 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %pK %lu %lu %u %u %d\n", 1784 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n",
1787 i, 1785 i,
1788 src->s6_addr32[0], src->s6_addr32[1], 1786 src->s6_addr32[0], src->s6_addr32[1],
1789 src->s6_addr32[2], src->s6_addr32[3], srcp, 1787 src->s6_addr32[2], src->s6_addr32[3], srcp,
@@ -1926,6 +1924,7 @@ struct proto tcpv6_prot = {
1926 .unhash = inet_unhash, 1924 .unhash = inet_unhash,
1927 .get_port = inet_csk_get_port, 1925 .get_port = inet_csk_get_port,
1928 .enter_memory_pressure = tcp_enter_memory_pressure, 1926 .enter_memory_pressure = tcp_enter_memory_pressure,
1927 .stream_memory_free = tcp_stream_memory_free,
1929 .sockets_allocated = &tcp_sockets_allocated, 1928 .sockets_allocated = &tcp_sockets_allocated,
1930 .memory_allocated = &tcp_memory_allocated, 1929 .memory_allocated = &tcp_memory_allocated,
1931 .memory_pressure = &tcp_memory_pressure, 1930 .memory_pressure = &tcp_memory_pressure,
diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
index 5d1b8d7ac993..60559511bd9c 100644
--- a/net/ipv6/udp_offload.c
+++ b/net/ipv6/udp_offload.c
@@ -21,26 +21,25 @@ static int udp6_ufo_send_check(struct sk_buff *skb)
21 const struct ipv6hdr *ipv6h; 21 const struct ipv6hdr *ipv6h;
22 struct udphdr *uh; 22 struct udphdr *uh;
23 23
24 /* UDP Tunnel offload on ipv6 is not yet supported. */
25 if (skb->encapsulation)
26 return -EINVAL;
27
28 if (!pskb_may_pull(skb, sizeof(*uh))) 24 if (!pskb_may_pull(skb, sizeof(*uh)))
29 return -EINVAL; 25 return -EINVAL;
30 26
31 ipv6h = ipv6_hdr(skb); 27 if (likely(!skb->encapsulation)) {
32 uh = udp_hdr(skb); 28 ipv6h = ipv6_hdr(skb);
29 uh = udp_hdr(skb);
30
31 uh->check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, skb->len,
32 IPPROTO_UDP, 0);
33 skb->csum_start = skb_transport_header(skb) - skb->head;
34 skb->csum_offset = offsetof(struct udphdr, check);
35 skb->ip_summed = CHECKSUM_PARTIAL;
36 }
33 37
34 uh->check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, skb->len,
35 IPPROTO_UDP, 0);
36 skb->csum_start = skb_transport_header(skb) - skb->head;
37 skb->csum_offset = offsetof(struct udphdr, check);
38 skb->ip_summed = CHECKSUM_PARTIAL;
39 return 0; 38 return 0;
40} 39}
41 40
42static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, 41static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
43 netdev_features_t features) 42 netdev_features_t features)
44{ 43{
45 struct sk_buff *segs = ERR_PTR(-EINVAL); 44 struct sk_buff *segs = ERR_PTR(-EINVAL);
46 unsigned int mss; 45 unsigned int mss;
@@ -75,47 +74,51 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
75 goto out; 74 goto out;
76 } 75 }
77 76
78 /* Do software UFO. Complete and fill in the UDP checksum as HW cannot 77 if (skb->encapsulation && skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL)
79 * do checksum of UDP packets sent as multiple IP fragments. 78 segs = skb_udp_tunnel_segment(skb, features);
80 */ 79 else {
81 offset = skb_checksum_start_offset(skb); 80 /* Do software UFO. Complete and fill in the UDP checksum as HW cannot
82 csum = skb_checksum(skb, offset, skb->len - offset, 0); 81 * do checksum of UDP packets sent as multiple IP fragments.
83 offset += skb->csum_offset; 82 */
84 *(__sum16 *)(skb->data + offset) = csum_fold(csum); 83 offset = skb_checksum_start_offset(skb);
85 skb->ip_summed = CHECKSUM_NONE; 84 csum = skb_checksum(skb, offset, skb->len - offset, 0);
86 85 offset += skb->csum_offset;
87 /* Check if there is enough headroom to insert fragment header. */ 86 *(__sum16 *)(skb->data + offset) = csum_fold(csum);
88 tnl_hlen = skb_tnl_header_len(skb); 87 skb->ip_summed = CHECKSUM_NONE;
89 if (skb_headroom(skb) < (tnl_hlen + frag_hdr_sz)) { 88
90 if (gso_pskb_expand_head(skb, tnl_hlen + frag_hdr_sz)) 89 /* Check if there is enough headroom to insert fragment header. */
91 goto out; 90 tnl_hlen = skb_tnl_header_len(skb);
91 if (skb_headroom(skb) < (tnl_hlen + frag_hdr_sz)) {
92 if (gso_pskb_expand_head(skb, tnl_hlen + frag_hdr_sz))
93 goto out;
94 }
95
96 /* Find the unfragmentable header and shift it left by frag_hdr_sz
97 * bytes to insert fragment header.
98 */
99 unfrag_ip6hlen = ip6_find_1stfragopt(skb, &prevhdr);
100 nexthdr = *prevhdr;
101 *prevhdr = NEXTHDR_FRAGMENT;
102 unfrag_len = (skb_network_header(skb) - skb_mac_header(skb)) +
103 unfrag_ip6hlen + tnl_hlen;
104 packet_start = (u8 *) skb->head + SKB_GSO_CB(skb)->mac_offset;
105 memmove(packet_start-frag_hdr_sz, packet_start, unfrag_len);
106
107 SKB_GSO_CB(skb)->mac_offset -= frag_hdr_sz;
108 skb->mac_header -= frag_hdr_sz;
109 skb->network_header -= frag_hdr_sz;
110
111 fptr = (struct frag_hdr *)(skb_network_header(skb) + unfrag_ip6hlen);
112 fptr->nexthdr = nexthdr;
113 fptr->reserved = 0;
114 ipv6_select_ident(fptr, (struct rt6_info *)skb_dst(skb));
115
116 /* Fragment the skb. ipv6 header and the remaining fields of the
117 * fragment header are updated in ipv6_gso_segment()
118 */
119 segs = skb_segment(skb, features);
92 } 120 }
93 121
94 /* Find the unfragmentable header and shift it left by frag_hdr_sz
95 * bytes to insert fragment header.
96 */
97 unfrag_ip6hlen = ip6_find_1stfragopt(skb, &prevhdr);
98 nexthdr = *prevhdr;
99 *prevhdr = NEXTHDR_FRAGMENT;
100 unfrag_len = (skb_network_header(skb) - skb_mac_header(skb)) +
101 unfrag_ip6hlen + tnl_hlen;
102 packet_start = (u8 *) skb->head + SKB_GSO_CB(skb)->mac_offset;
103 memmove(packet_start-frag_hdr_sz, packet_start, unfrag_len);
104
105 SKB_GSO_CB(skb)->mac_offset -= frag_hdr_sz;
106 skb->mac_header -= frag_hdr_sz;
107 skb->network_header -= frag_hdr_sz;
108
109 fptr = (struct frag_hdr *)(skb_network_header(skb) + unfrag_ip6hlen);
110 fptr->nexthdr = nexthdr;
111 fptr->reserved = 0;
112 ipv6_select_ident(fptr, (struct rt6_info *)skb_dst(skb));
113
114 /* Fragment the skb. ipv6 header and the remaining fields of the
115 * fragment header are updated in ipv6_gso_segment()
116 */
117 segs = skb_segment(skb, features);
118
119out: 122out:
120 return segs; 123 return segs;
121} 124}
diff --git a/net/ipx/ipx_proc.c b/net/ipx/ipx_proc.c
index 65e8833a2510..e15c16a517e7 100644
--- a/net/ipx/ipx_proc.c
+++ b/net/ipx/ipx_proc.c
@@ -213,7 +213,7 @@ static int ipx_seq_socket_show(struct seq_file *seq, void *v)
213 ntohs(ipxs->dest_addr.sock)); 213 ntohs(ipxs->dest_addr.sock));
214 } 214 }
215 215
216 seq_printf(seq, "%08X %08X %02X %03d\n", 216 seq_printf(seq, "%08X %08X %02X %03u\n",
217 sk_wmem_alloc_get(s), 217 sk_wmem_alloc_get(s),
218 sk_rmem_alloc_get(s), 218 sk_rmem_alloc_get(s),
219 s->sk_state, 219 s->sk_state,
diff --git a/net/irda/irttp.c b/net/irda/irttp.c
index ae43c62f9045..85372cfa7b9f 100644
--- a/net/irda/irttp.c
+++ b/net/irda/irttp.c
@@ -75,7 +75,7 @@ static pi_minor_info_t pi_minor_call_table[] = {
75 { NULL, 0 }, /* 0x00 */ 75 { NULL, 0 }, /* 0x00 */
76 { irttp_param_max_sdu_size, PV_INTEGER | PV_BIG_ENDIAN } /* 0x01 */ 76 { irttp_param_max_sdu_size, PV_INTEGER | PV_BIG_ENDIAN } /* 0x01 */
77}; 77};
78static pi_major_info_t pi_major_call_table[] = {{ pi_minor_call_table, 2 }}; 78static pi_major_info_t pi_major_call_table[] = { { pi_minor_call_table, 2 } };
79static pi_param_info_t param_info = { pi_major_call_table, 1, 0x0f, 4 }; 79static pi_param_info_t param_info = { pi_major_call_table, 1, 0x0f, 4 };
80 80
81/************************ GLOBAL PROCEDURES ************************/ 81/************************ GLOBAL PROCEDURES ************************/
@@ -205,7 +205,7 @@ static void irttp_todo_expired(unsigned long data)
205 */ 205 */
206static void irttp_flush_queues(struct tsap_cb *self) 206static void irttp_flush_queues(struct tsap_cb *self)
207{ 207{
208 struct sk_buff* skb; 208 struct sk_buff *skb;
209 209
210 IRDA_DEBUG(4, "%s()\n", __func__); 210 IRDA_DEBUG(4, "%s()\n", __func__);
211 211
@@ -400,7 +400,7 @@ struct tsap_cb *irttp_open_tsap(__u8 stsap_sel, int credit, notify_t *notify)
400 /* The IrLMP spec (IrLMP 1.1 p10) says that we have the right to 400 /* The IrLMP spec (IrLMP 1.1 p10) says that we have the right to
401 * use only 0x01-0x6F. Of course, we can use LSAP_ANY as well. 401 * use only 0x01-0x6F. Of course, we can use LSAP_ANY as well.
402 * JeanII */ 402 * JeanII */
403 if((stsap_sel != LSAP_ANY) && 403 if ((stsap_sel != LSAP_ANY) &&
404 ((stsap_sel < 0x01) || (stsap_sel >= 0x70))) { 404 ((stsap_sel < 0x01) || (stsap_sel >= 0x70))) {
405 IRDA_DEBUG(0, "%s(), invalid tsap!\n", __func__); 405 IRDA_DEBUG(0, "%s(), invalid tsap!\n", __func__);
406 return NULL; 406 return NULL;
@@ -427,7 +427,7 @@ struct tsap_cb *irttp_open_tsap(__u8 stsap_sel, int credit, notify_t *notify)
427 ttp_notify.data_indication = irttp_data_indication; 427 ttp_notify.data_indication = irttp_data_indication;
428 ttp_notify.udata_indication = irttp_udata_indication; 428 ttp_notify.udata_indication = irttp_udata_indication;
429 ttp_notify.flow_indication = irttp_flow_indication; 429 ttp_notify.flow_indication = irttp_flow_indication;
430 if(notify->status_indication != NULL) 430 if (notify->status_indication != NULL)
431 ttp_notify.status_indication = irttp_status_indication; 431 ttp_notify.status_indication = irttp_status_indication;
432 ttp_notify.instance = self; 432 ttp_notify.instance = self;
433 strncpy(ttp_notify.name, notify->name, NOTIFY_MAX_NAME); 433 strncpy(ttp_notify.name, notify->name, NOTIFY_MAX_NAME);
@@ -639,8 +639,7 @@ int irttp_data_request(struct tsap_cb *self, struct sk_buff *skb)
639 */ 639 */
640 if ((self->tx_max_sdu_size != 0) && 640 if ((self->tx_max_sdu_size != 0) &&
641 (self->tx_max_sdu_size != TTP_SAR_UNBOUND) && 641 (self->tx_max_sdu_size != TTP_SAR_UNBOUND) &&
642 (skb->len > self->tx_max_sdu_size)) 642 (skb->len > self->tx_max_sdu_size)) {
643 {
644 IRDA_ERROR("%s: SAR enabled, but data is larger than TxMaxSduSize!\n", 643 IRDA_ERROR("%s: SAR enabled, but data is larger than TxMaxSduSize!\n",
645 __func__); 644 __func__);
646 ret = -EMSGSIZE; 645 ret = -EMSGSIZE;
@@ -733,8 +732,7 @@ static void irttp_run_tx_queue(struct tsap_cb *self)
733 * poll us through irttp_flow_indication() - Jean II */ 732 * poll us through irttp_flow_indication() - Jean II */
734 while ((self->send_credit > 0) && 733 while ((self->send_credit > 0) &&
735 (!irlmp_lap_tx_queue_full(self->lsap)) && 734 (!irlmp_lap_tx_queue_full(self->lsap)) &&
736 (skb = skb_dequeue(&self->tx_queue))) 735 (skb = skb_dequeue(&self->tx_queue))) {
737 {
738 /* 736 /*
739 * Since we can transmit and receive frames concurrently, 737 * Since we can transmit and receive frames concurrently,
740 * the code below is a critical region and we must assure that 738 * the code below is a critical region and we must assure that
@@ -798,8 +796,7 @@ static void irttp_run_tx_queue(struct tsap_cb *self)
798 * where we can spend a bit of time doing stuff. - Jean II */ 796 * where we can spend a bit of time doing stuff. - Jean II */
799 if ((self->tx_sdu_busy) && 797 if ((self->tx_sdu_busy) &&
800 (skb_queue_len(&self->tx_queue) < TTP_TX_LOW_THRESHOLD) && 798 (skb_queue_len(&self->tx_queue) < TTP_TX_LOW_THRESHOLD) &&
801 (!self->close_pend)) 799 (!self->close_pend)) {
802 {
803 if (self->notify.flow_indication) 800 if (self->notify.flow_indication)
804 self->notify.flow_indication(self->notify.instance, 801 self->notify.flow_indication(self->notify.instance,
805 self, FLOW_START); 802 self, FLOW_START);
@@ -892,7 +889,7 @@ static int irttp_udata_indication(void *instance, void *sap,
892 /* Just pass data to layer above */ 889 /* Just pass data to layer above */
893 if (self->notify.udata_indication) { 890 if (self->notify.udata_indication) {
894 err = self->notify.udata_indication(self->notify.instance, 891 err = self->notify.udata_indication(self->notify.instance,
895 self,skb); 892 self, skb);
896 /* Same comment as in irttp_do_data_indication() */ 893 /* Same comment as in irttp_do_data_indication() */
897 if (!err) 894 if (!err)
898 return 0; 895 return 0;
@@ -1057,7 +1054,7 @@ static void irttp_flow_indication(void *instance, void *sap, LOCAL_FLOW flow)
1057 * to do that. Jean II */ 1054 * to do that. Jean II */
1058 1055
1059 /* If we need to send disconnect. try to do it now */ 1056 /* If we need to send disconnect. try to do it now */
1060 if(self->disconnect_pend) 1057 if (self->disconnect_pend)
1061 irttp_start_todo_timer(self, 0); 1058 irttp_start_todo_timer(self, 0);
1062} 1059}
1063 1060
@@ -1116,7 +1113,7 @@ int irttp_connect_request(struct tsap_cb *self, __u8 dtsap_sel,
1116 IRDA_ASSERT(self->magic == TTP_TSAP_MAGIC, return -EBADR;); 1113 IRDA_ASSERT(self->magic == TTP_TSAP_MAGIC, return -EBADR;);
1117 1114
1118 if (self->connected) { 1115 if (self->connected) {
1119 if(userdata) 1116 if (userdata)
1120 dev_kfree_skb(userdata); 1117 dev_kfree_skb(userdata);
1121 return -EISCONN; 1118 return -EISCONN;
1122 } 1119 }
@@ -1137,7 +1134,7 @@ int irttp_connect_request(struct tsap_cb *self, __u8 dtsap_sel,
1137 * headers 1134 * headers
1138 */ 1135 */
1139 IRDA_ASSERT(skb_headroom(userdata) >= TTP_MAX_HEADER, 1136 IRDA_ASSERT(skb_headroom(userdata) >= TTP_MAX_HEADER,
1140 { dev_kfree_skb(userdata); return -1; } ); 1137 { dev_kfree_skb(userdata); return -1; });
1141 } 1138 }
1142 1139
1143 /* Initialize connection parameters */ 1140 /* Initialize connection parameters */
@@ -1157,7 +1154,7 @@ int irttp_connect_request(struct tsap_cb *self, __u8 dtsap_sel,
1157 * Give away max 127 credits for now 1154 * Give away max 127 credits for now
1158 */ 1155 */
1159 if (n > 127) { 1156 if (n > 127) {
1160 self->avail_credit=n-127; 1157 self->avail_credit = n - 127;
1161 n = 127; 1158 n = 127;
1162 } 1159 }
1163 1160
@@ -1166,10 +1163,10 @@ int irttp_connect_request(struct tsap_cb *self, __u8 dtsap_sel,
1166 /* SAR enabled? */ 1163 /* SAR enabled? */
1167 if (max_sdu_size > 0) { 1164 if (max_sdu_size > 0) {
1168 IRDA_ASSERT(skb_headroom(tx_skb) >= (TTP_MAX_HEADER + TTP_SAR_HEADER), 1165 IRDA_ASSERT(skb_headroom(tx_skb) >= (TTP_MAX_HEADER + TTP_SAR_HEADER),
1169 { dev_kfree_skb(tx_skb); return -1; } ); 1166 { dev_kfree_skb(tx_skb); return -1; });
1170 1167
1171 /* Insert SAR parameters */ 1168 /* Insert SAR parameters */
1172 frame = skb_push(tx_skb, TTP_HEADER+TTP_SAR_HEADER); 1169 frame = skb_push(tx_skb, TTP_HEADER + TTP_SAR_HEADER);
1173 1170
1174 frame[0] = TTP_PARAMETERS | n; 1171 frame[0] = TTP_PARAMETERS | n;
1175 frame[1] = 0x04; /* Length */ 1172 frame[1] = 0x04; /* Length */
@@ -1386,7 +1383,7 @@ int irttp_connect_response(struct tsap_cb *self, __u32 max_sdu_size,
1386 * headers 1383 * headers
1387 */ 1384 */
1388 IRDA_ASSERT(skb_headroom(userdata) >= TTP_MAX_HEADER, 1385 IRDA_ASSERT(skb_headroom(userdata) >= TTP_MAX_HEADER,
1389 { dev_kfree_skb(userdata); return -1; } ); 1386 { dev_kfree_skb(userdata); return -1; });
1390 } 1387 }
1391 1388
1392 self->avail_credit = 0; 1389 self->avail_credit = 0;
@@ -1409,10 +1406,10 @@ int irttp_connect_response(struct tsap_cb *self, __u32 max_sdu_size,
1409 /* SAR enabled? */ 1406 /* SAR enabled? */
1410 if (max_sdu_size > 0) { 1407 if (max_sdu_size > 0) {
1411 IRDA_ASSERT(skb_headroom(tx_skb) >= (TTP_MAX_HEADER + TTP_SAR_HEADER), 1408 IRDA_ASSERT(skb_headroom(tx_skb) >= (TTP_MAX_HEADER + TTP_SAR_HEADER),
1412 { dev_kfree_skb(tx_skb); return -1; } ); 1409 { dev_kfree_skb(tx_skb); return -1; });
1413 1410
1414 /* Insert TTP header with SAR parameters */ 1411 /* Insert TTP header with SAR parameters */
1415 frame = skb_push(tx_skb, TTP_HEADER+TTP_SAR_HEADER); 1412 frame = skb_push(tx_skb, TTP_HEADER + TTP_SAR_HEADER);
1416 1413
1417 frame[0] = TTP_PARAMETERS | n; 1414 frame[0] = TTP_PARAMETERS | n;
1418 frame[1] = 0x04; /* Length */ 1415 frame[1] = 0x04; /* Length */
@@ -1522,7 +1519,7 @@ int irttp_disconnect_request(struct tsap_cb *self, struct sk_buff *userdata,
1522 * function may be called from various context, like user, timer 1519 * function may be called from various context, like user, timer
1523 * for following a disconnect_indication() (i.e. net_bh). 1520 * for following a disconnect_indication() (i.e. net_bh).
1524 * Jean II */ 1521 * Jean II */
1525 if(test_and_set_bit(0, &self->disconnect_pend)) { 1522 if (test_and_set_bit(0, &self->disconnect_pend)) {
1526 IRDA_DEBUG(0, "%s(), disconnect already pending\n", 1523 IRDA_DEBUG(0, "%s(), disconnect already pending\n",
1527 __func__); 1524 __func__);
1528 if (userdata) 1525 if (userdata)
@@ -1627,7 +1624,7 @@ static void irttp_disconnect_indication(void *instance, void *sap,
1627 * Jean II */ 1624 * Jean II */
1628 1625
1629 /* No need to notify the client if has already tried to disconnect */ 1626 /* No need to notify the client if has already tried to disconnect */
1630 if(self->notify.disconnect_indication) 1627 if (self->notify.disconnect_indication)
1631 self->notify.disconnect_indication(self->notify.instance, self, 1628 self->notify.disconnect_indication(self->notify.instance, self,
1632 reason, skb); 1629 reason, skb);
1633 else 1630 else
@@ -1738,8 +1735,7 @@ static void irttp_run_rx_queue(struct tsap_cb *self)
1738 * This is the last fragment, so time to reassemble! 1735 * This is the last fragment, so time to reassemble!
1739 */ 1736 */
1740 if ((self->rx_sdu_size <= self->rx_max_sdu_size) || 1737 if ((self->rx_sdu_size <= self->rx_max_sdu_size) ||
1741 (self->rx_max_sdu_size == TTP_SAR_UNBOUND)) 1738 (self->rx_max_sdu_size == TTP_SAR_UNBOUND)) {
1742 {
1743 /* 1739 /*
1744 * A little optimizing. Only queue the fragment if 1740 * A little optimizing. Only queue the fragment if
1745 * there are other fragments. Since if this is the 1741 * there are other fragments. Since if this is the
@@ -1860,7 +1856,7 @@ static int irttp_seq_show(struct seq_file *seq, void *v)
1860 seq_printf(seq, "dtsap_sel: %02x\n", 1856 seq_printf(seq, "dtsap_sel: %02x\n",
1861 self->dtsap_sel); 1857 self->dtsap_sel);
1862 seq_printf(seq, " connected: %s, ", 1858 seq_printf(seq, " connected: %s, ",
1863 self->connected? "TRUE":"FALSE"); 1859 self->connected ? "TRUE" : "FALSE");
1864 seq_printf(seq, "avail credit: %d, ", 1860 seq_printf(seq, "avail credit: %d, ",
1865 self->avail_credit); 1861 self->avail_credit);
1866 seq_printf(seq, "remote credit: %d, ", 1862 seq_printf(seq, "remote credit: %d, ",
@@ -1876,9 +1872,9 @@ static int irttp_seq_show(struct seq_file *seq, void *v)
1876 seq_printf(seq, "rx_queue len: %u\n", 1872 seq_printf(seq, "rx_queue len: %u\n",
1877 skb_queue_len(&self->rx_queue)); 1873 skb_queue_len(&self->rx_queue));
1878 seq_printf(seq, " tx_sdu_busy: %s, ", 1874 seq_printf(seq, " tx_sdu_busy: %s, ",
1879 self->tx_sdu_busy? "TRUE":"FALSE"); 1875 self->tx_sdu_busy ? "TRUE" : "FALSE");
1880 seq_printf(seq, "rx_sdu_busy: %s\n", 1876 seq_printf(seq, "rx_sdu_busy: %s\n",
1881 self->rx_sdu_busy? "TRUE":"FALSE"); 1877 self->rx_sdu_busy ? "TRUE" : "FALSE");
1882 seq_printf(seq, " max_seg_size: %u, ", 1878 seq_printf(seq, " max_seg_size: %u, ",
1883 self->max_seg_size); 1879 self->max_seg_size);
1884 seq_printf(seq, "tx_max_sdu_size: %u, ", 1880 seq_printf(seq, "tx_max_sdu_size: %u, ",
diff --git a/net/key/af_key.c b/net/key/af_key.c
index ab8bd2cabfa0..9d585370c5b4 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -45,7 +45,7 @@ struct netns_pfkey {
45static DEFINE_MUTEX(pfkey_mutex); 45static DEFINE_MUTEX(pfkey_mutex);
46 46
47#define DUMMY_MARK 0 47#define DUMMY_MARK 0
48static struct xfrm_mark dummy_mark = {0, 0}; 48static const struct xfrm_mark dummy_mark = {0, 0};
49struct pfkey_sock { 49struct pfkey_sock {
50 /* struct sock must be the first member of struct pfkey_sock */ 50 /* struct sock must be the first member of struct pfkey_sock */
51 struct sock sk; 51 struct sock sk;
@@ -338,7 +338,7 @@ static int pfkey_error(const struct sadb_msg *orig, int err, struct sock *sk)
338 return 0; 338 return 0;
339} 339}
340 340
341static u8 sadb_ext_min_len[] = { 341static const u8 sadb_ext_min_len[] = {
342 [SADB_EXT_RESERVED] = (u8) 0, 342 [SADB_EXT_RESERVED] = (u8) 0,
343 [SADB_EXT_SA] = (u8) sizeof(struct sadb_sa), 343 [SADB_EXT_SA] = (u8) sizeof(struct sadb_sa),
344 [SADB_EXT_LIFETIME_CURRENT] = (u8) sizeof(struct sadb_lifetime), 344 [SADB_EXT_LIFETIME_CURRENT] = (u8) sizeof(struct sadb_lifetime),
@@ -1196,10 +1196,6 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net,
1196 1196
1197 x->props.family = pfkey_sadb_addr2xfrm_addr((struct sadb_address *) ext_hdrs[SADB_EXT_ADDRESS_SRC-1], 1197 x->props.family = pfkey_sadb_addr2xfrm_addr((struct sadb_address *) ext_hdrs[SADB_EXT_ADDRESS_SRC-1],
1198 &x->props.saddr); 1198 &x->props.saddr);
1199 if (!x->props.family) {
1200 err = -EAFNOSUPPORT;
1201 goto out;
1202 }
1203 pfkey_sadb_addr2xfrm_addr((struct sadb_address *) ext_hdrs[SADB_EXT_ADDRESS_DST-1], 1199 pfkey_sadb_addr2xfrm_addr((struct sadb_address *) ext_hdrs[SADB_EXT_ADDRESS_DST-1],
1204 &x->id.daddr); 1200 &x->id.daddr);
1205 1201
@@ -2205,10 +2201,6 @@ static int pfkey_spdadd(struct sock *sk, struct sk_buff *skb, const struct sadb_
2205 2201
2206 sa = ext_hdrs[SADB_EXT_ADDRESS_SRC-1]; 2202 sa = ext_hdrs[SADB_EXT_ADDRESS_SRC-1];
2207 xp->family = pfkey_sadb_addr2xfrm_addr(sa, &xp->selector.saddr); 2203 xp->family = pfkey_sadb_addr2xfrm_addr(sa, &xp->selector.saddr);
2208 if (!xp->family) {
2209 err = -EINVAL;
2210 goto out;
2211 }
2212 xp->selector.family = xp->family; 2204 xp->selector.family = xp->family;
2213 xp->selector.prefixlen_s = sa->sadb_address_prefixlen; 2205 xp->selector.prefixlen_s = sa->sadb_address_prefixlen;
2214 xp->selector.proto = pfkey_proto_to_xfrm(sa->sadb_address_proto); 2206 xp->selector.proto = pfkey_proto_to_xfrm(sa->sadb_address_proto);
@@ -2737,7 +2729,7 @@ static int pfkey_spdflush(struct sock *sk, struct sk_buff *skb, const struct sad
2737 2729
2738typedef int (*pfkey_handler)(struct sock *sk, struct sk_buff *skb, 2730typedef int (*pfkey_handler)(struct sock *sk, struct sk_buff *skb,
2739 const struct sadb_msg *hdr, void * const *ext_hdrs); 2731 const struct sadb_msg *hdr, void * const *ext_hdrs);
2740static pfkey_handler pfkey_funcs[SADB_MAX + 1] = { 2732static const pfkey_handler pfkey_funcs[SADB_MAX + 1] = {
2741 [SADB_RESERVED] = pfkey_reserved, 2733 [SADB_RESERVED] = pfkey_reserved,
2742 [SADB_GETSPI] = pfkey_getspi, 2734 [SADB_GETSPI] = pfkey_getspi,
2743 [SADB_UPDATE] = pfkey_add, 2735 [SADB_UPDATE] = pfkey_add,
diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index 48aaa89253e0..6cba486353e8 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -321,12 +321,12 @@ static int llc_ui_bind(struct socket *sock, struct sockaddr *uaddr, int addrlen)
321 if (llc->dev) { 321 if (llc->dev) {
322 if (!addr->sllc_arphrd) 322 if (!addr->sllc_arphrd)
323 addr->sllc_arphrd = llc->dev->type; 323 addr->sllc_arphrd = llc->dev->type;
324 if (llc_mac_null(addr->sllc_mac)) 324 if (is_zero_ether_addr(addr->sllc_mac))
325 memcpy(addr->sllc_mac, llc->dev->dev_addr, 325 memcpy(addr->sllc_mac, llc->dev->dev_addr,
326 IFHWADDRLEN); 326 IFHWADDRLEN);
327 if (addr->sllc_arphrd != llc->dev->type || 327 if (addr->sllc_arphrd != llc->dev->type ||
328 !llc_mac_match(addr->sllc_mac, 328 !ether_addr_equal(addr->sllc_mac,
329 llc->dev->dev_addr)) { 329 llc->dev->dev_addr)) {
330 rc = -EINVAL; 330 rc = -EINVAL;
331 llc->dev = NULL; 331 llc->dev = NULL;
332 } 332 }
diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c
index 0d0d416dfab6..cd8724177965 100644
--- a/net/llc/llc_conn.c
+++ b/net/llc/llc_conn.c
@@ -478,8 +478,8 @@ static inline bool llc_estab_match(const struct llc_sap *sap,
478 478
479 return llc->laddr.lsap == laddr->lsap && 479 return llc->laddr.lsap == laddr->lsap &&
480 llc->daddr.lsap == daddr->lsap && 480 llc->daddr.lsap == daddr->lsap &&
481 llc_mac_match(llc->laddr.mac, laddr->mac) && 481 ether_addr_equal(llc->laddr.mac, laddr->mac) &&
482 llc_mac_match(llc->daddr.mac, daddr->mac); 482 ether_addr_equal(llc->daddr.mac, daddr->mac);
483} 483}
484 484
485/** 485/**
@@ -550,7 +550,7 @@ static inline bool llc_listener_match(const struct llc_sap *sap,
550 550
551 return sk->sk_type == SOCK_STREAM && sk->sk_state == TCP_LISTEN && 551 return sk->sk_type == SOCK_STREAM && sk->sk_state == TCP_LISTEN &&
552 llc->laddr.lsap == laddr->lsap && 552 llc->laddr.lsap == laddr->lsap &&
553 llc_mac_match(llc->laddr.mac, laddr->mac); 553 ether_addr_equal(llc->laddr.mac, laddr->mac);
554} 554}
555 555
556static struct sock *__llc_lookup_listener(struct llc_sap *sap, 556static struct sock *__llc_lookup_listener(struct llc_sap *sap,
diff --git a/net/llc/llc_proc.c b/net/llc/llc_proc.c
index 7b4799cfbf8d..1a3c7e0f5d0d 100644
--- a/net/llc/llc_proc.c
+++ b/net/llc/llc_proc.c
@@ -147,7 +147,7 @@ static int llc_seq_socket_show(struct seq_file *seq, void *v)
147 } 147 }
148 seq_printf(seq, "@%02X ", llc->sap->laddr.lsap); 148 seq_printf(seq, "@%02X ", llc->sap->laddr.lsap);
149 llc_ui_format_mac(seq, llc->daddr.mac); 149 llc_ui_format_mac(seq, llc->daddr.mac);
150 seq_printf(seq, "@%02X %8d %8d %2d %3d %4d\n", llc->daddr.lsap, 150 seq_printf(seq, "@%02X %8d %8d %2d %3u %4d\n", llc->daddr.lsap,
151 sk_wmem_alloc_get(sk), 151 sk_wmem_alloc_get(sk),
152 sk_rmem_alloc_get(sk) - llc->copied_seq, 152 sk_rmem_alloc_get(sk) - llc->copied_seq,
153 sk->sk_state, 153 sk->sk_state,
diff --git a/net/llc/llc_sap.c b/net/llc/llc_sap.c
index 78be45cda5c1..e5850699098e 100644
--- a/net/llc/llc_sap.c
+++ b/net/llc/llc_sap.c
@@ -302,7 +302,7 @@ static inline bool llc_dgram_match(const struct llc_sap *sap,
302 302
303 return sk->sk_type == SOCK_DGRAM && 303 return sk->sk_type == SOCK_DGRAM &&
304 llc->laddr.lsap == laddr->lsap && 304 llc->laddr.lsap == laddr->lsap &&
305 llc_mac_match(llc->laddr.mac, laddr->mac); 305 ether_addr_equal(llc->laddr.mac, laddr->mac);
306} 306}
307 307
308/** 308/**
@@ -425,7 +425,7 @@ void llc_sap_handler(struct llc_sap *sap, struct sk_buff *skb)
425 llc_pdu_decode_da(skb, laddr.mac); 425 llc_pdu_decode_da(skb, laddr.mac);
426 llc_pdu_decode_dsap(skb, &laddr.lsap); 426 llc_pdu_decode_dsap(skb, &laddr.lsap);
427 427
428 if (llc_mac_multicast(laddr.mac)) { 428 if (is_multicast_ether_addr(laddr.mac)) {
429 llc_sap_mcast(sap, &laddr, skb); 429 llc_sap_mcast(sap, &laddr, skb);
430 kfree_skb(skb); 430 kfree_skb(skb);
431 } else { 431 } else {
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 43dd7525bfcb..2e7855a1b10d 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -395,9 +395,13 @@ void sta_set_rate_info_tx(struct sta_info *sta,
395 rinfo->nss = ieee80211_rate_get_vht_nss(rate); 395 rinfo->nss = ieee80211_rate_get_vht_nss(rate);
396 } else { 396 } else {
397 struct ieee80211_supported_band *sband; 397 struct ieee80211_supported_band *sband;
398 int shift = ieee80211_vif_get_shift(&sta->sdata->vif);
399 u16 brate;
400
398 sband = sta->local->hw.wiphy->bands[ 401 sband = sta->local->hw.wiphy->bands[
399 ieee80211_get_sdata_band(sta->sdata)]; 402 ieee80211_get_sdata_band(sta->sdata)];
400 rinfo->legacy = sband->bitrates[rate->idx].bitrate; 403 brate = sband->bitrates[rate->idx].bitrate;
404 rinfo->legacy = DIV_ROUND_UP(brate, 1 << shift);
401 } 405 }
402 if (rate->flags & IEEE80211_TX_RC_40_MHZ_WIDTH) 406 if (rate->flags & IEEE80211_TX_RC_40_MHZ_WIDTH)
403 rinfo->flags |= RATE_INFO_FLAGS_40_MHZ_WIDTH; 407 rinfo->flags |= RATE_INFO_FLAGS_40_MHZ_WIDTH;
@@ -422,11 +426,13 @@ void sta_set_rate_info_rx(struct sta_info *sta, struct rate_info *rinfo)
422 rinfo->mcs = sta->last_rx_rate_idx; 426 rinfo->mcs = sta->last_rx_rate_idx;
423 } else { 427 } else {
424 struct ieee80211_supported_band *sband; 428 struct ieee80211_supported_band *sband;
429 int shift = ieee80211_vif_get_shift(&sta->sdata->vif);
430 u16 brate;
425 431
426 sband = sta->local->hw.wiphy->bands[ 432 sband = sta->local->hw.wiphy->bands[
427 ieee80211_get_sdata_band(sta->sdata)]; 433 ieee80211_get_sdata_band(sta->sdata)];
428 rinfo->legacy = 434 brate = sband->bitrates[sta->last_rx_rate_idx].bitrate;
429 sband->bitrates[sta->last_rx_rate_idx].bitrate; 435 rinfo->legacy = DIV_ROUND_UP(brate, 1 << shift);
430 } 436 }
431 437
432 if (sta->last_rx_rate_flag & RX_FLAG_40MHZ) 438 if (sta->last_rx_rate_flag & RX_FLAG_40MHZ)
@@ -856,8 +862,8 @@ static int ieee80211_set_probe_resp(struct ieee80211_sub_if_data *sdata,
856 return 0; 862 return 0;
857} 863}
858 864
859static int ieee80211_assign_beacon(struct ieee80211_sub_if_data *sdata, 865int ieee80211_assign_beacon(struct ieee80211_sub_if_data *sdata,
860 struct cfg80211_beacon_data *params) 866 struct cfg80211_beacon_data *params)
861{ 867{
862 struct beacon_data *new, *old; 868 struct beacon_data *new, *old;
863 int new_head_len, new_tail_len; 869 int new_head_len, new_tail_len;
@@ -1020,6 +1026,12 @@ static int ieee80211_change_beacon(struct wiphy *wiphy, struct net_device *dev,
1020 1026
1021 sdata = IEEE80211_DEV_TO_SUB_IF(dev); 1027 sdata = IEEE80211_DEV_TO_SUB_IF(dev);
1022 1028
1029 /* don't allow changing the beacon while CSA is in place - offset
1030 * of channel switch counter may change
1031 */
1032 if (sdata->vif.csa_active)
1033 return -EBUSY;
1034
1023 old = rtnl_dereference(sdata->u.ap.beacon); 1035 old = rtnl_dereference(sdata->u.ap.beacon);
1024 if (!old) 1036 if (!old)
1025 return -ENOENT; 1037 return -ENOENT;
@@ -1044,6 +1056,10 @@ static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev)
1044 return -ENOENT; 1056 return -ENOENT;
1045 old_probe_resp = rtnl_dereference(sdata->u.ap.probe_resp); 1057 old_probe_resp = rtnl_dereference(sdata->u.ap.probe_resp);
1046 1058
1059 /* abort any running channel switch */
1060 sdata->vif.csa_active = false;
1061 cancel_work_sync(&sdata->csa_finalize_work);
1062
1047 /* turn off carrier for this interface and dependent VLANs */ 1063 /* turn off carrier for this interface and dependent VLANs */
1048 list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list) 1064 list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list)
1049 netif_carrier_off(vlan->dev); 1065 netif_carrier_off(vlan->dev);
@@ -1192,8 +1208,6 @@ static int sta_apply_parameters(struct ieee80211_local *local,
1192 struct station_parameters *params) 1208 struct station_parameters *params)
1193{ 1209{
1194 int ret = 0; 1210 int ret = 0;
1195 u32 rates;
1196 int i, j;
1197 struct ieee80211_supported_band *sband; 1211 struct ieee80211_supported_band *sband;
1198 struct ieee80211_sub_if_data *sdata = sta->sdata; 1212 struct ieee80211_sub_if_data *sdata = sta->sdata;
1199 enum ieee80211_band band = ieee80211_get_sdata_band(sdata); 1213 enum ieee80211_band band = ieee80211_get_sdata_band(sdata);
@@ -1286,16 +1300,10 @@ static int sta_apply_parameters(struct ieee80211_local *local,
1286 sta->listen_interval = params->listen_interval; 1300 sta->listen_interval = params->listen_interval;
1287 1301
1288 if (params->supported_rates) { 1302 if (params->supported_rates) {
1289 rates = 0; 1303 ieee80211_parse_bitrates(&sdata->vif.bss_conf.chandef,
1290 1304 sband, params->supported_rates,
1291 for (i = 0; i < params->supported_rates_len; i++) { 1305 params->supported_rates_len,
1292 int rate = (params->supported_rates[i] & 0x7f) * 5; 1306 &sta->sta.supp_rates[band]);
1293 for (j = 0; j < sband->n_bitrates; j++) {
1294 if (sband->bitrates[j].bitrate == rate)
1295 rates |= BIT(j);
1296 }
1297 }
1298 sta->sta.supp_rates[band] = rates;
1299 } 1307 }
1300 1308
1301 if (params->ht_capa) 1309 if (params->ht_capa)
@@ -1958,18 +1966,11 @@ static int ieee80211_change_bss(struct wiphy *wiphy,
1958 } 1966 }
1959 1967
1960 if (params->basic_rates) { 1968 if (params->basic_rates) {
1961 int i, j; 1969 ieee80211_parse_bitrates(&sdata->vif.bss_conf.chandef,
1962 u32 rates = 0; 1970 wiphy->bands[band],
1963 struct ieee80211_supported_band *sband = wiphy->bands[band]; 1971 params->basic_rates,
1964 1972 params->basic_rates_len,
1965 for (i = 0; i < params->basic_rates_len; i++) { 1973 &sdata->vif.bss_conf.basic_rates);
1966 int rate = (params->basic_rates[i] & 0x7f) * 5;
1967 for (j = 0; j < sband->n_bitrates; j++) {
1968 if (sband->bitrates[j].bitrate == rate)
1969 rates |= BIT(j);
1970 }
1971 }
1972 sdata->vif.bss_conf.basic_rates = rates;
1973 changed |= BSS_CHANGED_BASIC_RATES; 1974 changed |= BSS_CHANGED_BASIC_RATES;
1974 } 1975 }
1975 1976
@@ -2301,14 +2302,25 @@ static void ieee80211_rfkill_poll(struct wiphy *wiphy)
2301} 2302}
2302 2303
2303#ifdef CONFIG_NL80211_TESTMODE 2304#ifdef CONFIG_NL80211_TESTMODE
2304static int ieee80211_testmode_cmd(struct wiphy *wiphy, void *data, int len) 2305static int ieee80211_testmode_cmd(struct wiphy *wiphy,
2306 struct wireless_dev *wdev,
2307 void *data, int len)
2305{ 2308{
2306 struct ieee80211_local *local = wiphy_priv(wiphy); 2309 struct ieee80211_local *local = wiphy_priv(wiphy);
2310 struct ieee80211_vif *vif = NULL;
2307 2311
2308 if (!local->ops->testmode_cmd) 2312 if (!local->ops->testmode_cmd)
2309 return -EOPNOTSUPP; 2313 return -EOPNOTSUPP;
2310 2314
2311 return local->ops->testmode_cmd(&local->hw, data, len); 2315 if (wdev) {
2316 struct ieee80211_sub_if_data *sdata;
2317
2318 sdata = IEEE80211_WDEV_TO_SUB_IF(wdev);
2319 if (sdata->flags & IEEE80211_SDATA_IN_DRIVER)
2320 vif = &sdata->vif;
2321 }
2322
2323 return local->ops->testmode_cmd(&local->hw, vif, data, len);
2312} 2324}
2313 2325
2314static int ieee80211_testmode_dump(struct wiphy *wiphy, 2326static int ieee80211_testmode_dump(struct wiphy *wiphy,
@@ -2786,6 +2798,178 @@ static int ieee80211_start_radar_detection(struct wiphy *wiphy,
2786 return 0; 2798 return 0;
2787} 2799}
2788 2800
2801static struct cfg80211_beacon_data *
2802cfg80211_beacon_dup(struct cfg80211_beacon_data *beacon)
2803{
2804 struct cfg80211_beacon_data *new_beacon;
2805 u8 *pos;
2806 int len;
2807
2808 len = beacon->head_len + beacon->tail_len + beacon->beacon_ies_len +
2809 beacon->proberesp_ies_len + beacon->assocresp_ies_len +
2810 beacon->probe_resp_len;
2811
2812 new_beacon = kzalloc(sizeof(*new_beacon) + len, GFP_KERNEL);
2813 if (!new_beacon)
2814 return NULL;
2815
2816 pos = (u8 *)(new_beacon + 1);
2817 if (beacon->head_len) {
2818 new_beacon->head_len = beacon->head_len;
2819 new_beacon->head = pos;
2820 memcpy(pos, beacon->head, beacon->head_len);
2821 pos += beacon->head_len;
2822 }
2823 if (beacon->tail_len) {
2824 new_beacon->tail_len = beacon->tail_len;
2825 new_beacon->tail = pos;
2826 memcpy(pos, beacon->tail, beacon->tail_len);
2827 pos += beacon->tail_len;
2828 }
2829 if (beacon->beacon_ies_len) {
2830 new_beacon->beacon_ies_len = beacon->beacon_ies_len;
2831 new_beacon->beacon_ies = pos;
2832 memcpy(pos, beacon->beacon_ies, beacon->beacon_ies_len);
2833 pos += beacon->beacon_ies_len;
2834 }
2835 if (beacon->proberesp_ies_len) {
2836 new_beacon->proberesp_ies_len = beacon->proberesp_ies_len;
2837 new_beacon->proberesp_ies = pos;
2838 memcpy(pos, beacon->proberesp_ies, beacon->proberesp_ies_len);
2839 pos += beacon->proberesp_ies_len;
2840 }
2841 if (beacon->assocresp_ies_len) {
2842 new_beacon->assocresp_ies_len = beacon->assocresp_ies_len;
2843 new_beacon->assocresp_ies = pos;
2844 memcpy(pos, beacon->assocresp_ies, beacon->assocresp_ies_len);
2845 pos += beacon->assocresp_ies_len;
2846 }
2847 if (beacon->probe_resp_len) {
2848 new_beacon->probe_resp_len = beacon->probe_resp_len;
2849 beacon->probe_resp = pos;
2850 memcpy(pos, beacon->probe_resp, beacon->probe_resp_len);
2851 pos += beacon->probe_resp_len;
2852 }
2853
2854 return new_beacon;
2855}
2856
2857void ieee80211_csa_finalize_work(struct work_struct *work)
2858{
2859 struct ieee80211_sub_if_data *sdata =
2860 container_of(work, struct ieee80211_sub_if_data,
2861 csa_finalize_work);
2862 struct ieee80211_local *local = sdata->local;
2863 int err, changed;
2864
2865 if (!ieee80211_sdata_running(sdata))
2866 return;
2867
2868 if (WARN_ON(sdata->vif.type != NL80211_IFTYPE_AP))
2869 return;
2870
2871 sdata->radar_required = sdata->csa_radar_required;
2872 err = ieee80211_vif_change_channel(sdata, &local->csa_chandef,
2873 &changed);
2874 if (WARN_ON(err < 0))
2875 return;
2876
2877 err = ieee80211_assign_beacon(sdata, sdata->u.ap.next_beacon);
2878 if (err < 0)
2879 return;
2880
2881 changed |= err;
2882 kfree(sdata->u.ap.next_beacon);
2883 sdata->u.ap.next_beacon = NULL;
2884 sdata->vif.csa_active = false;
2885
2886 ieee80211_wake_queues_by_reason(&sdata->local->hw,
2887 IEEE80211_MAX_QUEUE_MAP,
2888 IEEE80211_QUEUE_STOP_REASON_CSA);
2889
2890 ieee80211_bss_info_change_notify(sdata, changed);
2891
2892 cfg80211_ch_switch_notify(sdata->dev, &local->csa_chandef);
2893}
2894
2895static int ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev,
2896 struct cfg80211_csa_settings *params)
2897{
2898 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
2899 struct ieee80211_local *local = sdata->local;
2900 struct ieee80211_chanctx_conf *chanctx_conf;
2901 struct ieee80211_chanctx *chanctx;
2902 int err, num_chanctx;
2903
2904 if (!list_empty(&local->roc_list) || local->scanning)
2905 return -EBUSY;
2906
2907 if (sdata->wdev.cac_started)
2908 return -EBUSY;
2909
2910 if (cfg80211_chandef_identical(&params->chandef,
2911 &sdata->vif.bss_conf.chandef))
2912 return -EINVAL;
2913
2914 rcu_read_lock();
2915 chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf);
2916 if (!chanctx_conf) {
2917 rcu_read_unlock();
2918 return -EBUSY;
2919 }
2920
2921 /* don't handle for multi-VIF cases */
2922 chanctx = container_of(chanctx_conf, struct ieee80211_chanctx, conf);
2923 if (chanctx->refcount > 1) {
2924 rcu_read_unlock();
2925 return -EBUSY;
2926 }
2927 num_chanctx = 0;
2928 list_for_each_entry_rcu(chanctx, &local->chanctx_list, list)
2929 num_chanctx++;
2930 rcu_read_unlock();
2931
2932 if (num_chanctx > 1)
2933 return -EBUSY;
2934
2935 /* don't allow another channel switch if one is already active. */
2936 if (sdata->vif.csa_active)
2937 return -EBUSY;
2938
2939 /* only handle AP for now. */
2940 switch (sdata->vif.type) {
2941 case NL80211_IFTYPE_AP:
2942 break;
2943 default:
2944 return -EOPNOTSUPP;
2945 }
2946
2947 sdata->u.ap.next_beacon = cfg80211_beacon_dup(&params->beacon_after);
2948 if (!sdata->u.ap.next_beacon)
2949 return -ENOMEM;
2950
2951 sdata->csa_counter_offset_beacon = params->counter_offset_beacon;
2952 sdata->csa_counter_offset_presp = params->counter_offset_presp;
2953 sdata->csa_radar_required = params->radar_required;
2954
2955 if (params->block_tx)
2956 ieee80211_stop_queues_by_reason(&local->hw,
2957 IEEE80211_MAX_QUEUE_MAP,
2958 IEEE80211_QUEUE_STOP_REASON_CSA);
2959
2960 err = ieee80211_assign_beacon(sdata, &params->beacon_csa);
2961 if (err < 0)
2962 return err;
2963
2964 local->csa_chandef = params->chandef;
2965 sdata->vif.csa_active = true;
2966
2967 ieee80211_bss_info_change_notify(sdata, err);
2968 drv_channel_switch_beacon(sdata, &params->chandef);
2969
2970 return 0;
2971}
2972
2789static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev, 2973static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev,
2790 struct ieee80211_channel *chan, bool offchan, 2974 struct ieee80211_channel *chan, bool offchan,
2791 unsigned int wait, const u8 *buf, size_t len, 2975 unsigned int wait, const u8 *buf, size_t len,
@@ -3503,4 +3687,5 @@ struct cfg80211_ops mac80211_config_ops = {
3503 .get_et_strings = ieee80211_get_et_strings, 3687 .get_et_strings = ieee80211_get_et_strings,
3504 .get_channel = ieee80211_cfg_get_channel, 3688 .get_channel = ieee80211_cfg_get_channel,
3505 .start_radar_detection = ieee80211_start_radar_detection, 3689 .start_radar_detection = ieee80211_start_radar_detection,
3690 .channel_switch = ieee80211_channel_switch,
3506}; 3691};
diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c
index 03e8d2e3270e..3a4764b2869e 100644
--- a/net/mac80211/chan.c
+++ b/net/mac80211/chan.c
@@ -410,6 +410,64 @@ int ieee80211_vif_use_channel(struct ieee80211_sub_if_data *sdata,
410 return ret; 410 return ret;
411} 411}
412 412
413int ieee80211_vif_change_channel(struct ieee80211_sub_if_data *sdata,
414 const struct cfg80211_chan_def *chandef,
415 u32 *changed)
416{
417 struct ieee80211_local *local = sdata->local;
418 struct ieee80211_chanctx_conf *conf;
419 struct ieee80211_chanctx *ctx;
420 int ret;
421 u32 chanctx_changed = 0;
422
423 /* should never be called if not performing a channel switch. */
424 if (WARN_ON(!sdata->vif.csa_active))
425 return -EINVAL;
426
427 if (!cfg80211_chandef_usable(sdata->local->hw.wiphy, chandef,
428 IEEE80211_CHAN_DISABLED))
429 return -EINVAL;
430
431 mutex_lock(&local->chanctx_mtx);
432 conf = rcu_dereference_protected(sdata->vif.chanctx_conf,
433 lockdep_is_held(&local->chanctx_mtx));
434 if (!conf) {
435 ret = -EINVAL;
436 goto out;
437 }
438
439 ctx = container_of(conf, struct ieee80211_chanctx, conf);
440 if (ctx->refcount != 1) {
441 ret = -EINVAL;
442 goto out;
443 }
444
445 if (sdata->vif.bss_conf.chandef.width != chandef->width) {
446 chanctx_changed = IEEE80211_CHANCTX_CHANGE_WIDTH;
447 *changed |= BSS_CHANGED_BANDWIDTH;
448 }
449
450 sdata->vif.bss_conf.chandef = *chandef;
451 ctx->conf.def = *chandef;
452
453 chanctx_changed |= IEEE80211_CHANCTX_CHANGE_CHANNEL;
454 drv_change_chanctx(local, ctx, chanctx_changed);
455
456 if (!local->use_chanctx) {
457 local->_oper_chandef = *chandef;
458 ieee80211_hw_config(local, 0);
459 }
460
461 ieee80211_recalc_chanctx_chantype(local, ctx);
462 ieee80211_recalc_smps_chanctx(local, ctx);
463 ieee80211_recalc_radar_chanctx(local, ctx);
464
465 ret = 0;
466 out:
467 mutex_unlock(&local->chanctx_mtx);
468 return ret;
469}
470
413int ieee80211_vif_change_bandwidth(struct ieee80211_sub_if_data *sdata, 471int ieee80211_vif_change_bandwidth(struct ieee80211_sub_if_data *sdata,
414 const struct cfg80211_chan_def *chandef, 472 const struct cfg80211_chan_def *chandef,
415 u32 *changed) 473 u32 *changed)
diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c
index 44e201d60a13..19c54a44ed47 100644
--- a/net/mac80211/debugfs_sta.c
+++ b/net/mac80211/debugfs_sta.c
@@ -455,6 +455,15 @@ void ieee80211_sta_debugfs_add(struct sta_info *sta)
455 DEBUGFS_ADD_COUNTER(tx_retry_count, tx_retry_count); 455 DEBUGFS_ADD_COUNTER(tx_retry_count, tx_retry_count);
456 DEBUGFS_ADD_COUNTER(wep_weak_iv_count, wep_weak_iv_count); 456 DEBUGFS_ADD_COUNTER(wep_weak_iv_count, wep_weak_iv_count);
457 457
458 if (sizeof(sta->driver_buffered_tids) == sizeof(u32))
459 debugfs_create_x32("driver_buffered_tids", 0400,
460 sta->debugfs.dir,
461 (u32 *)&sta->driver_buffered_tids);
462 else
463 debugfs_create_x64("driver_buffered_tids", 0400,
464 sta->debugfs.dir,
465 (u64 *)&sta->driver_buffered_tids);
466
458 drv_sta_add_debugfs(local, sdata, &sta->sta, sta->debugfs.dir); 467 drv_sta_add_debugfs(local, sdata, &sta->sta, sta->debugfs.dir);
459} 468}
460 469
diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index b931c96a596f..b3ea11f3d526 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h
@@ -1072,4 +1072,17 @@ static inline void drv_ipv6_addr_change(struct ieee80211_local *local,
1072} 1072}
1073#endif 1073#endif
1074 1074
1075static inline void
1076drv_channel_switch_beacon(struct ieee80211_sub_if_data *sdata,
1077 struct cfg80211_chan_def *chandef)
1078{
1079 struct ieee80211_local *local = sdata->local;
1080
1081 if (local->ops->channel_switch_beacon) {
1082 trace_drv_channel_switch_beacon(local, sdata, chandef);
1083 local->ops->channel_switch_beacon(&local->hw, &sdata->vif,
1084 chandef);
1085 }
1086}
1087
1075#endif /* __MAC80211_DRIVER_OPS */ 1088#endif /* __MAC80211_DRIVER_OPS */
diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c
index f83534f6a2ee..529bf58bc145 100644
--- a/net/mac80211/ht.c
+++ b/net/mac80211/ht.c
@@ -19,13 +19,14 @@
19#include "ieee80211_i.h" 19#include "ieee80211_i.h"
20#include "rate.h" 20#include "rate.h"
21 21
22static void __check_htcap_disable(struct ieee80211_sub_if_data *sdata, 22static void __check_htcap_disable(struct ieee80211_ht_cap *ht_capa,
23 struct ieee80211_ht_cap *ht_capa_mask,
23 struct ieee80211_sta_ht_cap *ht_cap, 24 struct ieee80211_sta_ht_cap *ht_cap,
24 u16 flag) 25 u16 flag)
25{ 26{
26 __le16 le_flag = cpu_to_le16(flag); 27 __le16 le_flag = cpu_to_le16(flag);
27 if (sdata->u.mgd.ht_capa_mask.cap_info & le_flag) { 28 if (ht_capa_mask->cap_info & le_flag) {
28 if (!(sdata->u.mgd.ht_capa.cap_info & le_flag)) 29 if (!(ht_capa->cap_info & le_flag))
29 ht_cap->cap &= ~flag; 30 ht_cap->cap &= ~flag;
30 } 31 }
31} 32}
@@ -33,13 +34,30 @@ static void __check_htcap_disable(struct ieee80211_sub_if_data *sdata,
33void ieee80211_apply_htcap_overrides(struct ieee80211_sub_if_data *sdata, 34void ieee80211_apply_htcap_overrides(struct ieee80211_sub_if_data *sdata,
34 struct ieee80211_sta_ht_cap *ht_cap) 35 struct ieee80211_sta_ht_cap *ht_cap)
35{ 36{
36 u8 *scaps = (u8 *)(&sdata->u.mgd.ht_capa.mcs.rx_mask); 37 struct ieee80211_ht_cap *ht_capa, *ht_capa_mask;
37 u8 *smask = (u8 *)(&sdata->u.mgd.ht_capa_mask.mcs.rx_mask); 38 u8 *scaps, *smask;
38 int i; 39 int i;
39 40
40 if (!ht_cap->ht_supported) 41 if (!ht_cap->ht_supported)
41 return; 42 return;
42 43
44 switch (sdata->vif.type) {
45 case NL80211_IFTYPE_STATION:
46 ht_capa = &sdata->u.mgd.ht_capa;
47 ht_capa_mask = &sdata->u.mgd.ht_capa_mask;
48 break;
49 case NL80211_IFTYPE_ADHOC:
50 ht_capa = &sdata->u.ibss.ht_capa;
51 ht_capa_mask = &sdata->u.ibss.ht_capa_mask;
52 break;
53 default:
54 WARN_ON_ONCE(1);
55 return;
56 }
57
58 scaps = (u8 *)(&ht_capa->mcs.rx_mask);
59 smask = (u8 *)(&ht_capa_mask->mcs.rx_mask);
60
43 /* NOTE: If you add more over-rides here, update register_hw 61 /* NOTE: If you add more over-rides here, update register_hw
44 * ht_capa_mod_msk logic in main.c as well. 62 * ht_capa_mod_msk logic in main.c as well.
45 * And, if this method can ever change ht_cap.ht_supported, fix 63 * And, if this method can ever change ht_cap.ht_supported, fix
@@ -55,28 +73,32 @@ void ieee80211_apply_htcap_overrides(struct ieee80211_sub_if_data *sdata,
55 } 73 }
56 74
57 /* Force removal of HT-40 capabilities? */ 75 /* Force removal of HT-40 capabilities? */
58 __check_htcap_disable(sdata, ht_cap, IEEE80211_HT_CAP_SUP_WIDTH_20_40); 76 __check_htcap_disable(ht_capa, ht_capa_mask, ht_cap,
59 __check_htcap_disable(sdata, ht_cap, IEEE80211_HT_CAP_SGI_40); 77 IEEE80211_HT_CAP_SUP_WIDTH_20_40);
78 __check_htcap_disable(ht_capa, ht_capa_mask, ht_cap,
79 IEEE80211_HT_CAP_SGI_40);
60 80
61 /* Allow user to disable SGI-20 (SGI-40 is handled above) */ 81 /* Allow user to disable SGI-20 (SGI-40 is handled above) */
62 __check_htcap_disable(sdata, ht_cap, IEEE80211_HT_CAP_SGI_20); 82 __check_htcap_disable(ht_capa, ht_capa_mask, ht_cap,
83 IEEE80211_HT_CAP_SGI_20);
63 84
64 /* Allow user to disable the max-AMSDU bit. */ 85 /* Allow user to disable the max-AMSDU bit. */
65 __check_htcap_disable(sdata, ht_cap, IEEE80211_HT_CAP_MAX_AMSDU); 86 __check_htcap_disable(ht_capa, ht_capa_mask, ht_cap,
87 IEEE80211_HT_CAP_MAX_AMSDU);
66 88
67 /* Allow user to decrease AMPDU factor */ 89 /* Allow user to decrease AMPDU factor */
68 if (sdata->u.mgd.ht_capa_mask.ampdu_params_info & 90 if (ht_capa_mask->ampdu_params_info &
69 IEEE80211_HT_AMPDU_PARM_FACTOR) { 91 IEEE80211_HT_AMPDU_PARM_FACTOR) {
70 u8 n = sdata->u.mgd.ht_capa.ampdu_params_info 92 u8 n = ht_capa->ampdu_params_info &
71 & IEEE80211_HT_AMPDU_PARM_FACTOR; 93 IEEE80211_HT_AMPDU_PARM_FACTOR;
72 if (n < ht_cap->ampdu_factor) 94 if (n < ht_cap->ampdu_factor)
73 ht_cap->ampdu_factor = n; 95 ht_cap->ampdu_factor = n;
74 } 96 }
75 97
76 /* Allow the user to increase AMPDU density. */ 98 /* Allow the user to increase AMPDU density. */
77 if (sdata->u.mgd.ht_capa_mask.ampdu_params_info & 99 if (ht_capa_mask->ampdu_params_info &
78 IEEE80211_HT_AMPDU_PARM_DENSITY) { 100 IEEE80211_HT_AMPDU_PARM_DENSITY) {
79 u8 n = (sdata->u.mgd.ht_capa.ampdu_params_info & 101 u8 n = (ht_capa->ampdu_params_info &
80 IEEE80211_HT_AMPDU_PARM_DENSITY) 102 IEEE80211_HT_AMPDU_PARM_DENSITY)
81 >> IEEE80211_HT_AMPDU_PARM_DENSITY_SHIFT; 103 >> IEEE80211_HT_AMPDU_PARM_DENSITY_SHIFT;
82 if (n > ht_cap->ampdu_density) 104 if (n > ht_cap->ampdu_density)
@@ -112,7 +134,8 @@ bool ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_sub_if_data *sdata,
112 * we advertised a restricted capability set to. Override 134 * we advertised a restricted capability set to. Override
113 * our own capabilities and then use those below. 135 * our own capabilities and then use those below.
114 */ 136 */
115 if (sdata->vif.type == NL80211_IFTYPE_STATION && 137 if ((sdata->vif.type == NL80211_IFTYPE_STATION ||
138 sdata->vif.type == NL80211_IFTYPE_ADHOC) &&
116 !test_sta_flag(sta, WLAN_STA_TDLS_PEER)) 139 !test_sta_flag(sta, WLAN_STA_TDLS_PEER))
117 ieee80211_apply_htcap_overrides(sdata, &own_cap); 140 ieee80211_apply_htcap_overrides(sdata, &own_cap);
118 141
diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index 2d45643c964e..a12afe77bb26 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -30,78 +30,27 @@
30 30
31#define IEEE80211_IBSS_MERGE_INTERVAL (30 * HZ) 31#define IEEE80211_IBSS_MERGE_INTERVAL (30 * HZ)
32#define IEEE80211_IBSS_INACTIVITY_LIMIT (60 * HZ) 32#define IEEE80211_IBSS_INACTIVITY_LIMIT (60 * HZ)
33#define IEEE80211_IBSS_RSN_INACTIVITY_LIMIT (10 * HZ)
33 34
34#define IEEE80211_IBSS_MAX_STA_ENTRIES 128 35#define IEEE80211_IBSS_MAX_STA_ENTRIES 128
35 36
36 37static struct beacon_data *
37static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, 38ieee80211_ibss_build_presp(struct ieee80211_sub_if_data *sdata,
38 const u8 *bssid, const int beacon_int, 39 const int beacon_int, const u32 basic_rates,
39 struct cfg80211_chan_def *req_chandef, 40 const u16 capability, u64 tsf,
40 const u32 basic_rates, 41 struct cfg80211_chan_def *chandef,
41 const u16 capability, u64 tsf, 42 bool *have_higher_than_11mbit)
42 bool creator)
43{ 43{
44 struct ieee80211_if_ibss *ifibss = &sdata->u.ibss; 44 struct ieee80211_if_ibss *ifibss = &sdata->u.ibss;
45 struct ieee80211_local *local = sdata->local; 45 struct ieee80211_local *local = sdata->local;
46 int rates, i; 46 int rates_n = 0, i, ri;
47 struct ieee80211_mgmt *mgmt; 47 struct ieee80211_mgmt *mgmt;
48 u8 *pos; 48 u8 *pos;
49 struct ieee80211_supported_band *sband; 49 struct ieee80211_supported_band *sband;
50 struct cfg80211_bss *bss; 50 u32 rate_flags, rates = 0, rates_added = 0;
51 u32 bss_change;
52 u8 supp_rates[IEEE80211_MAX_SUPP_RATES];
53 struct cfg80211_chan_def chandef;
54 struct ieee80211_channel *chan;
55 struct beacon_data *presp; 51 struct beacon_data *presp;
56 int frame_len; 52 int frame_len;
57 53 int shift;
58 sdata_assert_lock(sdata);
59
60 /* Reset own TSF to allow time synchronization work. */
61 drv_reset_tsf(local, sdata);
62
63 if (!ether_addr_equal(ifibss->bssid, bssid))
64 sta_info_flush(sdata);
65
66 /* if merging, indicate to driver that we leave the old IBSS */
67 if (sdata->vif.bss_conf.ibss_joined) {
68 sdata->vif.bss_conf.ibss_joined = false;
69 sdata->vif.bss_conf.ibss_creator = false;
70 sdata->vif.bss_conf.enable_beacon = false;
71 netif_carrier_off(sdata->dev);
72 ieee80211_bss_info_change_notify(sdata,
73 BSS_CHANGED_IBSS |
74 BSS_CHANGED_BEACON_ENABLED);
75 }
76
77 presp = rcu_dereference_protected(ifibss->presp,
78 lockdep_is_held(&sdata->wdev.mtx));
79 rcu_assign_pointer(ifibss->presp, NULL);
80 if (presp)
81 kfree_rcu(presp, rcu_head);
82
83 sdata->drop_unencrypted = capability & WLAN_CAPABILITY_PRIVACY ? 1 : 0;
84
85 /* make a copy of the chandef, it could be modified below. */
86 chandef = *req_chandef;
87 chan = chandef.chan;
88 if (!cfg80211_reg_can_beacon(local->hw.wiphy, &chandef)) {
89 chandef.width = NL80211_CHAN_WIDTH_20;
90 chandef.center_freq1 = chan->center_freq;
91 }
92
93 ieee80211_vif_release_channel(sdata);
94 if (ieee80211_vif_use_channel(sdata, &chandef,
95 ifibss->fixed_channel ?
96 IEEE80211_CHANCTX_SHARED :
97 IEEE80211_CHANCTX_EXCLUSIVE)) {
98 sdata_info(sdata, "Failed to join IBSS, no channel context\n");
99 return;
100 }
101
102 memcpy(ifibss->bssid, bssid, ETH_ALEN);
103
104 sband = local->hw.wiphy->bands[chan->band];
105 54
106 /* Build IBSS probe response */ 55 /* Build IBSS probe response */
107 frame_len = sizeof(struct ieee80211_hdr_3addr) + 56 frame_len = sizeof(struct ieee80211_hdr_3addr) +
@@ -116,7 +65,7 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
116 ifibss->ie_len; 65 ifibss->ie_len;
117 presp = kzalloc(sizeof(*presp) + frame_len, GFP_KERNEL); 66 presp = kzalloc(sizeof(*presp) + frame_len, GFP_KERNEL);
118 if (!presp) 67 if (!presp)
119 return; 68 return NULL;
120 69
121 presp->head = (void *)(presp + 1); 70 presp->head = (void *)(presp + 1);
122 71
@@ -137,21 +86,47 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
137 memcpy(pos, ifibss->ssid, ifibss->ssid_len); 86 memcpy(pos, ifibss->ssid, ifibss->ssid_len);
138 pos += ifibss->ssid_len; 87 pos += ifibss->ssid_len;
139 88
140 rates = min_t(int, 8, sband->n_bitrates); 89 sband = local->hw.wiphy->bands[chandef->chan->band];
90 rate_flags = ieee80211_chandef_rate_flags(chandef);
91 shift = ieee80211_chandef_get_shift(chandef);
92 rates_n = 0;
93 if (have_higher_than_11mbit)
94 *have_higher_than_11mbit = false;
95
96 for (i = 0; i < sband->n_bitrates; i++) {
97 if ((rate_flags & sband->bitrates[i].flags) != rate_flags)
98 continue;
99 if (sband->bitrates[i].bitrate > 110 &&
100 have_higher_than_11mbit)
101 *have_higher_than_11mbit = true;
102
103 rates |= BIT(i);
104 rates_n++;
105 }
106
141 *pos++ = WLAN_EID_SUPP_RATES; 107 *pos++ = WLAN_EID_SUPP_RATES;
142 *pos++ = rates; 108 *pos++ = min_t(int, 8, rates_n);
143 for (i = 0; i < rates; i++) { 109 for (ri = 0; ri < sband->n_bitrates; ri++) {
144 int rate = sband->bitrates[i].bitrate; 110 int rate = DIV_ROUND_UP(sband->bitrates[ri].bitrate,
111 5 * (1 << shift));
145 u8 basic = 0; 112 u8 basic = 0;
146 if (basic_rates & BIT(i)) 113 if (!(rates & BIT(ri)))
114 continue;
115
116 if (basic_rates & BIT(ri))
147 basic = 0x80; 117 basic = 0x80;
148 *pos++ = basic | (u8) (rate / 5); 118 *pos++ = basic | (u8) rate;
119 if (++rates_added == 8) {
120 ri++; /* continue at next rate for EXT_SUPP_RATES */
121 break;
122 }
149 } 123 }
150 124
151 if (sband->band == IEEE80211_BAND_2GHZ) { 125 if (sband->band == IEEE80211_BAND_2GHZ) {
152 *pos++ = WLAN_EID_DS_PARAMS; 126 *pos++ = WLAN_EID_DS_PARAMS;
153 *pos++ = 1; 127 *pos++ = 1;
154 *pos++ = ieee80211_frequency_to_channel(chan->center_freq); 128 *pos++ = ieee80211_frequency_to_channel(
129 chandef->chan->center_freq);
155 } 130 }
156 131
157 *pos++ = WLAN_EID_IBSS_PARAMS; 132 *pos++ = WLAN_EID_IBSS_PARAMS;
@@ -160,15 +135,20 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
160 *pos++ = 0; 135 *pos++ = 0;
161 *pos++ = 0; 136 *pos++ = 0;
162 137
163 if (sband->n_bitrates > 8) { 138 /* put the remaining rates in WLAN_EID_EXT_SUPP_RATES */
139 if (rates_n > 8) {
164 *pos++ = WLAN_EID_EXT_SUPP_RATES; 140 *pos++ = WLAN_EID_EXT_SUPP_RATES;
165 *pos++ = sband->n_bitrates - 8; 141 *pos++ = rates_n - 8;
166 for (i = 8; i < sband->n_bitrates; i++) { 142 for (; ri < sband->n_bitrates; ri++) {
167 int rate = sband->bitrates[i].bitrate; 143 int rate = DIV_ROUND_UP(sband->bitrates[ri].bitrate,
144 5 * (1 << shift));
168 u8 basic = 0; 145 u8 basic = 0;
169 if (basic_rates & BIT(i)) 146 if (!(rates & BIT(ri)))
147 continue;
148
149 if (basic_rates & BIT(ri))
170 basic = 0x80; 150 basic = 0x80;
171 *pos++ = basic | (u8) (rate / 5); 151 *pos++ = basic | (u8) rate;
172 } 152 }
173 } 153 }
174 154
@@ -178,19 +158,23 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
178 } 158 }
179 159
180 /* add HT capability and information IEs */ 160 /* add HT capability and information IEs */
181 if (chandef.width != NL80211_CHAN_WIDTH_20_NOHT && 161 if (chandef->width != NL80211_CHAN_WIDTH_20_NOHT &&
182 chandef.width != NL80211_CHAN_WIDTH_5 && 162 chandef->width != NL80211_CHAN_WIDTH_5 &&
183 chandef.width != NL80211_CHAN_WIDTH_10 && 163 chandef->width != NL80211_CHAN_WIDTH_10 &&
184 sband->ht_cap.ht_supported) { 164 sband->ht_cap.ht_supported) {
185 pos = ieee80211_ie_build_ht_cap(pos, &sband->ht_cap, 165 struct ieee80211_sta_ht_cap ht_cap;
186 sband->ht_cap.cap); 166
167 memcpy(&ht_cap, &sband->ht_cap, sizeof(ht_cap));
168 ieee80211_apply_htcap_overrides(sdata, &ht_cap);
169
170 pos = ieee80211_ie_build_ht_cap(pos, &ht_cap, ht_cap.cap);
187 /* 171 /*
188 * Note: According to 802.11n-2009 9.13.3.1, HT Protection 172 * Note: According to 802.11n-2009 9.13.3.1, HT Protection
189 * field and RIFS Mode are reserved in IBSS mode, therefore 173 * field and RIFS Mode are reserved in IBSS mode, therefore
190 * keep them at 0 174 * keep them at 0
191 */ 175 */
192 pos = ieee80211_ie_build_ht_oper(pos, &sband->ht_cap, 176 pos = ieee80211_ie_build_ht_oper(pos, &sband->ht_cap,
193 &chandef, 0); 177 chandef, 0);
194 } 178 }
195 179
196 if (local->hw.queues >= IEEE80211_NUM_ACS) { 180 if (local->hw.queues >= IEEE80211_NUM_ACS) {
@@ -207,9 +191,97 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
207 191
208 presp->head_len = pos - presp->head; 192 presp->head_len = pos - presp->head;
209 if (WARN_ON(presp->head_len > frame_len)) 193 if (WARN_ON(presp->head_len > frame_len))
194 goto error;
195
196 return presp;
197error:
198 kfree(presp);
199 return NULL;
200}
201
202static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
203 const u8 *bssid, const int beacon_int,
204 struct cfg80211_chan_def *req_chandef,
205 const u32 basic_rates,
206 const u16 capability, u64 tsf,
207 bool creator)
208{
209 struct ieee80211_if_ibss *ifibss = &sdata->u.ibss;
210 struct ieee80211_local *local = sdata->local;
211 struct ieee80211_supported_band *sband;
212 struct ieee80211_mgmt *mgmt;
213 struct cfg80211_bss *bss;
214 u32 bss_change;
215 struct cfg80211_chan_def chandef;
216 struct ieee80211_channel *chan;
217 struct beacon_data *presp;
218 enum nl80211_bss_scan_width scan_width;
219 bool have_higher_than_11mbit;
220
221 sdata_assert_lock(sdata);
222
223 /* Reset own TSF to allow time synchronization work. */
224 drv_reset_tsf(local, sdata);
225
226 if (!ether_addr_equal(ifibss->bssid, bssid))
227 sta_info_flush(sdata);
228
229 /* if merging, indicate to driver that we leave the old IBSS */
230 if (sdata->vif.bss_conf.ibss_joined) {
231 sdata->vif.bss_conf.ibss_joined = false;
232 sdata->vif.bss_conf.ibss_creator = false;
233 sdata->vif.bss_conf.enable_beacon = false;
234 netif_carrier_off(sdata->dev);
235 ieee80211_bss_info_change_notify(sdata,
236 BSS_CHANGED_IBSS |
237 BSS_CHANGED_BEACON_ENABLED);
238 }
239
240 presp = rcu_dereference_protected(ifibss->presp,
241 lockdep_is_held(&sdata->wdev.mtx));
242 rcu_assign_pointer(ifibss->presp, NULL);
243 if (presp)
244 kfree_rcu(presp, rcu_head);
245
246 sdata->drop_unencrypted = capability & WLAN_CAPABILITY_PRIVACY ? 1 : 0;
247
248 /* make a copy of the chandef, it could be modified below. */
249 chandef = *req_chandef;
250 chan = chandef.chan;
251 if (!cfg80211_reg_can_beacon(local->hw.wiphy, &chandef)) {
252 if (chandef.width == NL80211_CHAN_WIDTH_5 ||
253 chandef.width == NL80211_CHAN_WIDTH_10 ||
254 chandef.width == NL80211_CHAN_WIDTH_20_NOHT ||
255 chandef.width == NL80211_CHAN_WIDTH_20) {
256 sdata_info(sdata,
257 "Failed to join IBSS, beacons forbidden\n");
258 return;
259 }
260 chandef.width = NL80211_CHAN_WIDTH_20;
261 chandef.center_freq1 = chan->center_freq;
262 }
263
264 ieee80211_vif_release_channel(sdata);
265 if (ieee80211_vif_use_channel(sdata, &chandef,
266 ifibss->fixed_channel ?
267 IEEE80211_CHANCTX_SHARED :
268 IEEE80211_CHANCTX_EXCLUSIVE)) {
269 sdata_info(sdata, "Failed to join IBSS, no channel context\n");
270 return;
271 }
272
273 memcpy(ifibss->bssid, bssid, ETH_ALEN);
274
275 sband = local->hw.wiphy->bands[chan->band];
276
277 presp = ieee80211_ibss_build_presp(sdata, beacon_int, basic_rates,
278 capability, tsf, &chandef,
279 &have_higher_than_11mbit);
280 if (!presp)
210 return; 281 return;
211 282
212 rcu_assign_pointer(ifibss->presp, presp); 283 rcu_assign_pointer(ifibss->presp, presp);
284 mgmt = (void *)presp->head;
213 285
214 sdata->vif.bss_conf.enable_beacon = true; 286 sdata->vif.bss_conf.enable_beacon = true;
215 sdata->vif.bss_conf.beacon_int = beacon_int; 287 sdata->vif.bss_conf.beacon_int = beacon_int;
@@ -239,18 +311,26 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
239 sdata->vif.bss_conf.use_short_slot = chan->band == IEEE80211_BAND_5GHZ; 311 sdata->vif.bss_conf.use_short_slot = chan->band == IEEE80211_BAND_5GHZ;
240 bss_change |= BSS_CHANGED_ERP_SLOT; 312 bss_change |= BSS_CHANGED_ERP_SLOT;
241 313
314 /* cf. IEEE 802.11 9.2.12 */
315 if (chan->band == IEEE80211_BAND_2GHZ && have_higher_than_11mbit)
316 sdata->flags |= IEEE80211_SDATA_OPERATING_GMODE;
317 else
318 sdata->flags &= ~IEEE80211_SDATA_OPERATING_GMODE;
319
242 sdata->vif.bss_conf.ibss_joined = true; 320 sdata->vif.bss_conf.ibss_joined = true;
243 sdata->vif.bss_conf.ibss_creator = creator; 321 sdata->vif.bss_conf.ibss_creator = creator;
244 ieee80211_bss_info_change_notify(sdata, bss_change); 322 ieee80211_bss_info_change_notify(sdata, bss_change);
245 323
246 ieee80211_sta_def_wmm_params(sdata, sband->n_bitrates, supp_rates); 324 ieee80211_set_wmm_default(sdata, true);
247 325
248 ifibss->state = IEEE80211_IBSS_MLME_JOINED; 326 ifibss->state = IEEE80211_IBSS_MLME_JOINED;
249 mod_timer(&ifibss->timer, 327 mod_timer(&ifibss->timer,
250 round_jiffies(jiffies + IEEE80211_IBSS_MERGE_INTERVAL)); 328 round_jiffies(jiffies + IEEE80211_IBSS_MERGE_INTERVAL));
251 329
252 bss = cfg80211_inform_bss_frame(local->hw.wiphy, chan, 330 scan_width = cfg80211_chandef_to_scan_width(&chandef);
253 mgmt, presp->head_len, 0, GFP_KERNEL); 331 bss = cfg80211_inform_bss_width_frame(local->hw.wiphy, chan,
332 scan_width, mgmt,
333 presp->head_len, 0, GFP_KERNEL);
254 cfg80211_put_bss(local->hw.wiphy, bss); 334 cfg80211_put_bss(local->hw.wiphy, bss);
255 netif_carrier_on(sdata->dev); 335 netif_carrier_on(sdata->dev);
256 cfg80211_ibss_joined(sdata->dev, ifibss->bssid, GFP_KERNEL); 336 cfg80211_ibss_joined(sdata->dev, ifibss->bssid, GFP_KERNEL);
@@ -269,6 +349,8 @@ static void ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
269 const struct cfg80211_bss_ies *ies; 349 const struct cfg80211_bss_ies *ies;
270 enum nl80211_channel_type chan_type; 350 enum nl80211_channel_type chan_type;
271 u64 tsf; 351 u64 tsf;
352 u32 rate_flags;
353 int shift;
272 354
273 sdata_assert_lock(sdata); 355 sdata_assert_lock(sdata);
274 356
@@ -296,15 +378,24 @@ static void ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
296 } 378 }
297 379
298 sband = sdata->local->hw.wiphy->bands[cbss->channel->band]; 380 sband = sdata->local->hw.wiphy->bands[cbss->channel->band];
381 rate_flags = ieee80211_chandef_rate_flags(&sdata->u.ibss.chandef);
382 shift = ieee80211_vif_get_shift(&sdata->vif);
299 383
300 basic_rates = 0; 384 basic_rates = 0;
301 385
302 for (i = 0; i < bss->supp_rates_len; i++) { 386 for (i = 0; i < bss->supp_rates_len; i++) {
303 int rate = (bss->supp_rates[i] & 0x7f) * 5; 387 int rate = bss->supp_rates[i] & 0x7f;
304 bool is_basic = !!(bss->supp_rates[i] & 0x80); 388 bool is_basic = !!(bss->supp_rates[i] & 0x80);
305 389
306 for (j = 0; j < sband->n_bitrates; j++) { 390 for (j = 0; j < sband->n_bitrates; j++) {
307 if (sband->bitrates[j].bitrate == rate) { 391 int brate;
392 if ((rate_flags & sband->bitrates[j].flags)
393 != rate_flags)
394 continue;
395
396 brate = DIV_ROUND_UP(sband->bitrates[j].bitrate,
397 5 * (1 << shift));
398 if (brate == rate) {
308 if (is_basic) 399 if (is_basic)
309 basic_rates |= BIT(j); 400 basic_rates |= BIT(j);
310 break; 401 break;
@@ -360,6 +451,7 @@ ieee80211_ibss_add_sta(struct ieee80211_sub_if_data *sdata, const u8 *bssid,
360 struct sta_info *sta; 451 struct sta_info *sta;
361 struct ieee80211_chanctx_conf *chanctx_conf; 452 struct ieee80211_chanctx_conf *chanctx_conf;
362 struct ieee80211_supported_band *sband; 453 struct ieee80211_supported_band *sband;
454 enum nl80211_bss_scan_width scan_width;
363 int band; 455 int band;
364 456
365 /* 457 /*
@@ -388,6 +480,7 @@ ieee80211_ibss_add_sta(struct ieee80211_sub_if_data *sdata, const u8 *bssid,
388 if (WARN_ON_ONCE(!chanctx_conf)) 480 if (WARN_ON_ONCE(!chanctx_conf))
389 return NULL; 481 return NULL;
390 band = chanctx_conf->def.chan->band; 482 band = chanctx_conf->def.chan->band;
483 scan_width = cfg80211_chandef_to_scan_width(&chanctx_conf->def);
391 rcu_read_unlock(); 484 rcu_read_unlock();
392 485
393 sta = sta_info_alloc(sdata, addr, GFP_KERNEL); 486 sta = sta_info_alloc(sdata, addr, GFP_KERNEL);
@@ -401,7 +494,7 @@ ieee80211_ibss_add_sta(struct ieee80211_sub_if_data *sdata, const u8 *bssid,
401 /* make sure mandatory rates are always added */ 494 /* make sure mandatory rates are always added */
402 sband = local->hw.wiphy->bands[band]; 495 sband = local->hw.wiphy->bands[band];
403 sta->sta.supp_rates[band] = supp_rates | 496 sta->sta.supp_rates[band] = supp_rates |
404 ieee80211_mandatory_rates(sband); 497 ieee80211_mandatory_rates(sband, scan_width);
405 498
406 return ieee80211_ibss_finish_sta(sta); 499 return ieee80211_ibss_finish_sta(sta);
407} 500}
@@ -465,6 +558,7 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
465 u64 beacon_timestamp, rx_timestamp; 558 u64 beacon_timestamp, rx_timestamp;
466 u32 supp_rates = 0; 559 u32 supp_rates = 0;
467 enum ieee80211_band band = rx_status->band; 560 enum ieee80211_band band = rx_status->band;
561 enum nl80211_bss_scan_width scan_width;
468 struct ieee80211_supported_band *sband = local->hw.wiphy->bands[band]; 562 struct ieee80211_supported_band *sband = local->hw.wiphy->bands[band];
469 bool rates_updated = false; 563 bool rates_updated = false;
470 564
@@ -486,16 +580,22 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
486 sta = sta_info_get(sdata, mgmt->sa); 580 sta = sta_info_get(sdata, mgmt->sa);
487 581
488 if (elems->supp_rates) { 582 if (elems->supp_rates) {
489 supp_rates = ieee80211_sta_get_rates(local, elems, 583 supp_rates = ieee80211_sta_get_rates(sdata, elems,
490 band, NULL); 584 band, NULL);
491 if (sta) { 585 if (sta) {
492 u32 prev_rates; 586 u32 prev_rates;
493 587
494 prev_rates = sta->sta.supp_rates[band]; 588 prev_rates = sta->sta.supp_rates[band];
495 /* make sure mandatory rates are always added */ 589 /* make sure mandatory rates are always added */
496 sta->sta.supp_rates[band] = supp_rates | 590 scan_width = NL80211_BSS_CHAN_WIDTH_20;
497 ieee80211_mandatory_rates(sband); 591 if (rx_status->flag & RX_FLAG_5MHZ)
592 scan_width = NL80211_BSS_CHAN_WIDTH_5;
593 if (rx_status->flag & RX_FLAG_10MHZ)
594 scan_width = NL80211_BSS_CHAN_WIDTH_10;
498 595
596 sta->sta.supp_rates[band] = supp_rates |
597 ieee80211_mandatory_rates(sband,
598 scan_width);
499 if (sta->sta.supp_rates[band] != prev_rates) { 599 if (sta->sta.supp_rates[band] != prev_rates) {
500 ibss_dbg(sdata, 600 ibss_dbg(sdata,
501 "updated supp_rates set for %pM based on beacon/probe_resp (0x%x -> 0x%x)\n", 601 "updated supp_rates set for %pM based on beacon/probe_resp (0x%x -> 0x%x)\n",
@@ -610,7 +710,7 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
610 "beacon TSF higher than local TSF - IBSS merge with BSSID %pM\n", 710 "beacon TSF higher than local TSF - IBSS merge with BSSID %pM\n",
611 mgmt->bssid); 711 mgmt->bssid);
612 ieee80211_sta_join_ibss(sdata, bss); 712 ieee80211_sta_join_ibss(sdata, bss);
613 supp_rates = ieee80211_sta_get_rates(local, elems, band, NULL); 713 supp_rates = ieee80211_sta_get_rates(sdata, elems, band, NULL);
614 ieee80211_ibss_add_sta(sdata, mgmt->bssid, mgmt->sa, 714 ieee80211_ibss_add_sta(sdata, mgmt->bssid, mgmt->sa,
615 supp_rates); 715 supp_rates);
616 rcu_read_unlock(); 716 rcu_read_unlock();
@@ -629,6 +729,7 @@ void ieee80211_ibss_rx_no_sta(struct ieee80211_sub_if_data *sdata,
629 struct sta_info *sta; 729 struct sta_info *sta;
630 struct ieee80211_chanctx_conf *chanctx_conf; 730 struct ieee80211_chanctx_conf *chanctx_conf;
631 struct ieee80211_supported_band *sband; 731 struct ieee80211_supported_band *sband;
732 enum nl80211_bss_scan_width scan_width;
632 int band; 733 int band;
633 734
634 /* 735 /*
@@ -654,6 +755,7 @@ void ieee80211_ibss_rx_no_sta(struct ieee80211_sub_if_data *sdata,
654 return; 755 return;
655 } 756 }
656 band = chanctx_conf->def.chan->band; 757 band = chanctx_conf->def.chan->band;
758 scan_width = cfg80211_chandef_to_scan_width(&chanctx_conf->def);
657 rcu_read_unlock(); 759 rcu_read_unlock();
658 760
659 sta = sta_info_alloc(sdata, addr, GFP_ATOMIC); 761 sta = sta_info_alloc(sdata, addr, GFP_ATOMIC);
@@ -665,7 +767,7 @@ void ieee80211_ibss_rx_no_sta(struct ieee80211_sub_if_data *sdata,
665 /* make sure mandatory rates are always added */ 767 /* make sure mandatory rates are always added */
666 sband = local->hw.wiphy->bands[band]; 768 sband = local->hw.wiphy->bands[band];
667 sta->sta.supp_rates[band] = supp_rates | 769 sta->sta.supp_rates[band] = supp_rates |
668 ieee80211_mandatory_rates(sband); 770 ieee80211_mandatory_rates(sband, scan_width);
669 771
670 spin_lock(&ifibss->incomplete_lock); 772 spin_lock(&ifibss->incomplete_lock);
671 list_add(&sta->list, &ifibss->incomplete_stations); 773 list_add(&sta->list, &ifibss->incomplete_stations);
@@ -697,6 +799,33 @@ static int ieee80211_sta_active_ibss(struct ieee80211_sub_if_data *sdata)
697 return active; 799 return active;
698} 800}
699 801
802static void ieee80211_ibss_sta_expire(struct ieee80211_sub_if_data *sdata)
803{
804 struct ieee80211_local *local = sdata->local;
805 struct sta_info *sta, *tmp;
806 unsigned long exp_time = IEEE80211_IBSS_INACTIVITY_LIMIT;
807 unsigned long exp_rsn_time = IEEE80211_IBSS_RSN_INACTIVITY_LIMIT;
808
809 mutex_lock(&local->sta_mtx);
810
811 list_for_each_entry_safe(sta, tmp, &local->sta_list, list) {
812 if (sdata != sta->sdata)
813 continue;
814
815 if (time_after(jiffies, sta->last_rx + exp_time) ||
816 (time_after(jiffies, sta->last_rx + exp_rsn_time) &&
817 sta->sta_state != IEEE80211_STA_AUTHORIZED)) {
818 sta_dbg(sta->sdata, "expiring inactive %sSTA %pM\n",
819 sta->sta_state != IEEE80211_STA_AUTHORIZED ?
820 "not authorized " : "", sta->sta.addr);
821
822 WARN_ON(__sta_info_destroy(sta));
823 }
824 }
825
826 mutex_unlock(&local->sta_mtx);
827}
828
700/* 829/*
701 * This function is called with state == IEEE80211_IBSS_MLME_JOINED 830 * This function is called with state == IEEE80211_IBSS_MLME_JOINED
702 */ 831 */
@@ -704,13 +833,14 @@ static int ieee80211_sta_active_ibss(struct ieee80211_sub_if_data *sdata)
704static void ieee80211_sta_merge_ibss(struct ieee80211_sub_if_data *sdata) 833static void ieee80211_sta_merge_ibss(struct ieee80211_sub_if_data *sdata)
705{ 834{
706 struct ieee80211_if_ibss *ifibss = &sdata->u.ibss; 835 struct ieee80211_if_ibss *ifibss = &sdata->u.ibss;
836 enum nl80211_bss_scan_width scan_width;
707 837
708 sdata_assert_lock(sdata); 838 sdata_assert_lock(sdata);
709 839
710 mod_timer(&ifibss->timer, 840 mod_timer(&ifibss->timer,
711 round_jiffies(jiffies + IEEE80211_IBSS_MERGE_INTERVAL)); 841 round_jiffies(jiffies + IEEE80211_IBSS_MERGE_INTERVAL));
712 842
713 ieee80211_sta_expire(sdata, IEEE80211_IBSS_INACTIVITY_LIMIT); 843 ieee80211_ibss_sta_expire(sdata);
714 844
715 if (time_before(jiffies, ifibss->last_scan_completed + 845 if (time_before(jiffies, ifibss->last_scan_completed +
716 IEEE80211_IBSS_MERGE_INTERVAL)) 846 IEEE80211_IBSS_MERGE_INTERVAL))
@@ -725,8 +855,9 @@ static void ieee80211_sta_merge_ibss(struct ieee80211_sub_if_data *sdata)
725 sdata_info(sdata, 855 sdata_info(sdata,
726 "No active IBSS STAs - trying to scan for other IBSS networks with same SSID (merge)\n"); 856 "No active IBSS STAs - trying to scan for other IBSS networks with same SSID (merge)\n");
727 857
858 scan_width = cfg80211_chandef_to_scan_width(&ifibss->chandef);
728 ieee80211_request_ibss_scan(sdata, ifibss->ssid, ifibss->ssid_len, 859 ieee80211_request_ibss_scan(sdata, ifibss->ssid, ifibss->ssid_len,
729 NULL); 860 NULL, scan_width);
730} 861}
731 862
732static void ieee80211_sta_create_ibss(struct ieee80211_sub_if_data *sdata) 863static void ieee80211_sta_create_ibss(struct ieee80211_sub_if_data *sdata)
@@ -776,6 +907,7 @@ static void ieee80211_sta_find_ibss(struct ieee80211_sub_if_data *sdata)
776 struct cfg80211_bss *cbss; 907 struct cfg80211_bss *cbss;
777 struct ieee80211_channel *chan = NULL; 908 struct ieee80211_channel *chan = NULL;
778 const u8 *bssid = NULL; 909 const u8 *bssid = NULL;
910 enum nl80211_bss_scan_width scan_width;
779 int active_ibss; 911 int active_ibss;
780 u16 capability; 912 u16 capability;
781 913
@@ -817,6 +949,17 @@ static void ieee80211_sta_find_ibss(struct ieee80211_sub_if_data *sdata)
817 return; 949 return;
818 } 950 }
819 951
952 /* if a fixed bssid and a fixed freq have been provided create the IBSS
953 * directly and do not waste time scanning
954 */
955 if (ifibss->fixed_bssid && ifibss->fixed_channel) {
956 sdata_info(sdata, "Created IBSS using preconfigured BSSID %pM\n",
957 bssid);
958 ieee80211_sta_create_ibss(sdata);
959 return;
960 }
961
962
820 ibss_dbg(sdata, "sta_find_ibss: did not try to join ibss\n"); 963 ibss_dbg(sdata, "sta_find_ibss: did not try to join ibss\n");
821 964
822 /* Selected IBSS not found in current scan results - try to scan */ 965 /* Selected IBSS not found in current scan results - try to scan */
@@ -824,8 +967,10 @@ static void ieee80211_sta_find_ibss(struct ieee80211_sub_if_data *sdata)
824 IEEE80211_SCAN_INTERVAL)) { 967 IEEE80211_SCAN_INTERVAL)) {
825 sdata_info(sdata, "Trigger new scan to find an IBSS to join\n"); 968 sdata_info(sdata, "Trigger new scan to find an IBSS to join\n");
826 969
970 scan_width = cfg80211_chandef_to_scan_width(&ifibss->chandef);
827 ieee80211_request_ibss_scan(sdata, ifibss->ssid, 971 ieee80211_request_ibss_scan(sdata, ifibss->ssid,
828 ifibss->ssid_len, chan); 972 ifibss->ssid_len, chan,
973 scan_width);
829 } else { 974 } else {
830 int interval = IEEE80211_SCAN_INTERVAL; 975 int interval = IEEE80211_SCAN_INTERVAL;
831 976
@@ -1045,6 +1190,9 @@ int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata,
1045 struct cfg80211_ibss_params *params) 1190 struct cfg80211_ibss_params *params)
1046{ 1191{
1047 u32 changed = 0; 1192 u32 changed = 0;
1193 u32 rate_flags;
1194 struct ieee80211_supported_band *sband;
1195 int i;
1048 1196
1049 if (params->bssid) { 1197 if (params->bssid) {
1050 memcpy(sdata->u.ibss.bssid, params->bssid, ETH_ALEN); 1198 memcpy(sdata->u.ibss.bssid, params->bssid, ETH_ALEN);
@@ -1055,6 +1203,14 @@ int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata,
1055 sdata->u.ibss.privacy = params->privacy; 1203 sdata->u.ibss.privacy = params->privacy;
1056 sdata->u.ibss.control_port = params->control_port; 1204 sdata->u.ibss.control_port = params->control_port;
1057 sdata->u.ibss.basic_rates = params->basic_rates; 1205 sdata->u.ibss.basic_rates = params->basic_rates;
1206
1207 /* fix basic_rates if channel does not support these rates */
1208 rate_flags = ieee80211_chandef_rate_flags(&params->chandef);
1209 sband = sdata->local->hw.wiphy->bands[params->chandef.chan->band];
1210 for (i = 0; i < sband->n_bitrates; i++) {
1211 if ((rate_flags & sband->bitrates[i].flags) != rate_flags)
1212 sdata->u.ibss.basic_rates &= ~BIT(i);
1213 }
1058 memcpy(sdata->vif.bss_conf.mcast_rate, params->mcast_rate, 1214 memcpy(sdata->vif.bss_conf.mcast_rate, params->mcast_rate,
1059 sizeof(params->mcast_rate)); 1215 sizeof(params->mcast_rate));
1060 1216
@@ -1076,6 +1232,11 @@ int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata,
1076 memcpy(sdata->u.ibss.ssid, params->ssid, params->ssid_len); 1232 memcpy(sdata->u.ibss.ssid, params->ssid, params->ssid_len);
1077 sdata->u.ibss.ssid_len = params->ssid_len; 1233 sdata->u.ibss.ssid_len = params->ssid_len;
1078 1234
1235 memcpy(&sdata->u.ibss.ht_capa, &params->ht_capa,
1236 sizeof(sdata->u.ibss.ht_capa));
1237 memcpy(&sdata->u.ibss.ht_capa_mask, &params->ht_capa_mask,
1238 sizeof(sdata->u.ibss.ht_capa_mask));
1239
1079 /* 1240 /*
1080 * 802.11n-2009 9.13.3.1: In an IBSS, the HT Protection field is 1241 * 802.11n-2009 9.13.3.1: In an IBSS, the HT Protection field is
1081 * reserved, but an HT STA shall protect HT transmissions as though 1242 * reserved, but an HT STA shall protect HT transmissions as though
@@ -1156,6 +1317,11 @@ int ieee80211_ibss_leave(struct ieee80211_sub_if_data *sdata)
1156 presp = rcu_dereference_protected(ifibss->presp, 1317 presp = rcu_dereference_protected(ifibss->presp,
1157 lockdep_is_held(&sdata->wdev.mtx)); 1318 lockdep_is_held(&sdata->wdev.mtx));
1158 RCU_INIT_POINTER(sdata->u.ibss.presp, NULL); 1319 RCU_INIT_POINTER(sdata->u.ibss.presp, NULL);
1320
1321 /* on the next join, re-program HT parameters */
1322 memset(&ifibss->ht_capa, 0, sizeof(ifibss->ht_capa));
1323 memset(&ifibss->ht_capa_mask, 0, sizeof(ifibss->ht_capa_mask));
1324
1159 sdata->vif.bss_conf.ibss_joined = false; 1325 sdata->vif.bss_conf.ibss_joined = false;
1160 sdata->vif.bss_conf.ibss_creator = false; 1326 sdata->vif.bss_conf.ibss_creator = false;
1161 sdata->vif.bss_conf.enable_beacon = false; 1327 sdata->vif.bss_conf.enable_beacon = false;
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 8412a303993a..b6186517ec56 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -53,9 +53,6 @@ struct ieee80211_local;
53 * increased memory use (about 2 kB of RAM per entry). */ 53 * increased memory use (about 2 kB of RAM per entry). */
54#define IEEE80211_FRAGMENT_MAX 4 54#define IEEE80211_FRAGMENT_MAX 4
55 55
56#define TU_TO_JIFFIES(x) (usecs_to_jiffies((x) * 1024))
57#define TU_TO_EXP_TIME(x) (jiffies + TU_TO_JIFFIES(x))
58
59/* power level hasn't been configured (or set to automatic) */ 56/* power level hasn't been configured (or set to automatic) */
60#define IEEE80211_UNSET_POWER_LEVEL INT_MIN 57#define IEEE80211_UNSET_POWER_LEVEL INT_MIN
61 58
@@ -259,6 +256,8 @@ struct ieee80211_if_ap {
259 struct beacon_data __rcu *beacon; 256 struct beacon_data __rcu *beacon;
260 struct probe_resp __rcu *probe_resp; 257 struct probe_resp __rcu *probe_resp;
261 258
259 /* to be used after channel switch. */
260 struct cfg80211_beacon_data *next_beacon;
262 struct list_head vlans; 261 struct list_head vlans;
263 262
264 struct ps_data ps; 263 struct ps_data ps;
@@ -509,6 +508,9 @@ struct ieee80211_if_ibss {
509 /* probe response/beacon for IBSS */ 508 /* probe response/beacon for IBSS */
510 struct beacon_data __rcu *presp; 509 struct beacon_data __rcu *presp;
511 510
511 struct ieee80211_ht_cap ht_capa; /* configured ht-cap over-rides */
512 struct ieee80211_ht_cap ht_capa_mask; /* Valid parts of ht_capa */
513
512 spinlock_t incomplete_lock; 514 spinlock_t incomplete_lock;
513 struct list_head incomplete_stations; 515 struct list_head incomplete_stations;
514 516
@@ -713,6 +715,11 @@ struct ieee80211_sub_if_data {
713 715
714 struct ieee80211_tx_queue_params tx_conf[IEEE80211_NUM_ACS]; 716 struct ieee80211_tx_queue_params tx_conf[IEEE80211_NUM_ACS];
715 717
718 struct work_struct csa_finalize_work;
719 int csa_counter_offset_beacon;
720 int csa_counter_offset_presp;
721 bool csa_radar_required;
722
716 /* used to reconfigure hardware SM PS */ 723 /* used to reconfigure hardware SM PS */
717 struct work_struct recalc_smps; 724 struct work_struct recalc_smps;
718 725
@@ -809,6 +816,34 @@ ieee80211_get_sdata_band(struct ieee80211_sub_if_data *sdata)
809 return band; 816 return band;
810} 817}
811 818
819static inline int
820ieee80211_chandef_get_shift(struct cfg80211_chan_def *chandef)
821{
822 switch (chandef->width) {
823 case NL80211_CHAN_WIDTH_5:
824 return 2;
825 case NL80211_CHAN_WIDTH_10:
826 return 1;
827 default:
828 return 0;
829 }
830}
831
832static inline int
833ieee80211_vif_get_shift(struct ieee80211_vif *vif)
834{
835 struct ieee80211_chanctx_conf *chanctx_conf;
836 int shift = 0;
837
838 rcu_read_lock();
839 chanctx_conf = rcu_dereference(vif->chanctx_conf);
840 if (chanctx_conf)
841 shift = ieee80211_chandef_get_shift(&chanctx_conf->def);
842 rcu_read_unlock();
843
844 return shift;
845}
846
812enum sdata_queue_type { 847enum sdata_queue_type {
813 IEEE80211_SDATA_QUEUE_TYPE_FRAME = 0, 848 IEEE80211_SDATA_QUEUE_TYPE_FRAME = 0,
814 IEEE80211_SDATA_QUEUE_AGG_START = 1, 849 IEEE80211_SDATA_QUEUE_AGG_START = 1,
@@ -1026,7 +1061,7 @@ struct ieee80211_local {
1026 struct cfg80211_ssid scan_ssid; 1061 struct cfg80211_ssid scan_ssid;
1027 struct cfg80211_scan_request *int_scan_req; 1062 struct cfg80211_scan_request *int_scan_req;
1028 struct cfg80211_scan_request *scan_req, *hw_scan_req; 1063 struct cfg80211_scan_request *scan_req, *hw_scan_req;
1029 struct ieee80211_channel *scan_channel; 1064 struct cfg80211_chan_def scan_chandef;
1030 enum ieee80211_band hw_scan_band; 1065 enum ieee80211_band hw_scan_band;
1031 int scan_channel_idx; 1066 int scan_channel_idx;
1032 int scan_ies_len; 1067 int scan_ies_len;
@@ -1063,7 +1098,6 @@ struct ieee80211_local {
1063 u32 dot11TransmittedFrameCount; 1098 u32 dot11TransmittedFrameCount;
1064 1099
1065#ifdef CONFIG_MAC80211_LEDS 1100#ifdef CONFIG_MAC80211_LEDS
1066 int tx_led_counter, rx_led_counter;
1067 struct led_trigger *tx_led, *rx_led, *assoc_led, *radio_led; 1101 struct led_trigger *tx_led, *rx_led, *assoc_led, *radio_led;
1068 struct tpt_led_trigger *tpt_led_trigger; 1102 struct tpt_led_trigger *tpt_led_trigger;
1069 char tx_led_name[32], rx_led_name[32], 1103 char tx_led_name[32], rx_led_name[32],
@@ -1306,7 +1340,8 @@ void ieee80211_mesh_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
1306void ieee80211_scan_work(struct work_struct *work); 1340void ieee80211_scan_work(struct work_struct *work);
1307int ieee80211_request_ibss_scan(struct ieee80211_sub_if_data *sdata, 1341int ieee80211_request_ibss_scan(struct ieee80211_sub_if_data *sdata,
1308 const u8 *ssid, u8 ssid_len, 1342 const u8 *ssid, u8 ssid_len,
1309 struct ieee80211_channel *chan); 1343 struct ieee80211_channel *chan,
1344 enum nl80211_bss_scan_width scan_width);
1310int ieee80211_request_scan(struct ieee80211_sub_if_data *sdata, 1345int ieee80211_request_scan(struct ieee80211_sub_if_data *sdata,
1311 struct cfg80211_scan_request *req); 1346 struct cfg80211_scan_request *req);
1312void ieee80211_scan_cancel(struct ieee80211_local *local); 1347void ieee80211_scan_cancel(struct ieee80211_local *local);
@@ -1341,6 +1376,9 @@ void ieee80211_roc_notify_destroy(struct ieee80211_roc_work *roc, bool free);
1341void ieee80211_sw_roc_work(struct work_struct *work); 1376void ieee80211_sw_roc_work(struct work_struct *work);
1342void ieee80211_handle_roc_started(struct ieee80211_roc_work *roc); 1377void ieee80211_handle_roc_started(struct ieee80211_roc_work *roc);
1343 1378
1379/* channel switch handling */
1380void ieee80211_csa_finalize_work(struct work_struct *work);
1381
1344/* interface handling */ 1382/* interface handling */
1345int ieee80211_iface_init(void); 1383int ieee80211_iface_init(void);
1346void ieee80211_iface_exit(void); 1384void ieee80211_iface_exit(void);
@@ -1362,6 +1400,8 @@ void ieee80211_del_virtual_monitor(struct ieee80211_local *local);
1362 1400
1363bool __ieee80211_recalc_txpower(struct ieee80211_sub_if_data *sdata); 1401bool __ieee80211_recalc_txpower(struct ieee80211_sub_if_data *sdata);
1364void ieee80211_recalc_txpower(struct ieee80211_sub_if_data *sdata); 1402void ieee80211_recalc_txpower(struct ieee80211_sub_if_data *sdata);
1403int ieee80211_assign_beacon(struct ieee80211_sub_if_data *sdata,
1404 struct cfg80211_beacon_data *params);
1365 1405
1366static inline bool ieee80211_sdata_running(struct ieee80211_sub_if_data *sdata) 1406static inline bool ieee80211_sdata_running(struct ieee80211_sub_if_data *sdata)
1367{ 1407{
@@ -1465,7 +1505,8 @@ extern void *mac80211_wiphy_privid; /* for wiphy privid */
1465u8 *ieee80211_get_bssid(struct ieee80211_hdr *hdr, size_t len, 1505u8 *ieee80211_get_bssid(struct ieee80211_hdr *hdr, size_t len,
1466 enum nl80211_iftype type); 1506 enum nl80211_iftype type);
1467int ieee80211_frame_duration(enum ieee80211_band band, size_t len, 1507int ieee80211_frame_duration(enum ieee80211_band band, size_t len,
1468 int rate, int erp, int short_preamble); 1508 int rate, int erp, int short_preamble,
1509 int shift);
1469void mac80211_ev_michael_mic_failure(struct ieee80211_sub_if_data *sdata, int keyidx, 1510void mac80211_ev_michael_mic_failure(struct ieee80211_sub_if_data *sdata, int keyidx,
1470 struct ieee80211_hdr *hdr, const u8 *tsc, 1511 struct ieee80211_hdr *hdr, const u8 *tsc,
1471 gfp_t gfp); 1512 gfp_t gfp);
@@ -1569,7 +1610,7 @@ void ieee80211_send_deauth_disassoc(struct ieee80211_sub_if_data *sdata,
1569int ieee80211_build_preq_ies(struct ieee80211_local *local, u8 *buffer, 1610int ieee80211_build_preq_ies(struct ieee80211_local *local, u8 *buffer,
1570 size_t buffer_len, const u8 *ie, size_t ie_len, 1611 size_t buffer_len, const u8 *ie, size_t ie_len,
1571 enum ieee80211_band band, u32 rate_mask, 1612 enum ieee80211_band band, u32 rate_mask,
1572 u8 channel); 1613 struct cfg80211_chan_def *chandef);
1573struct sk_buff *ieee80211_build_probe_req(struct ieee80211_sub_if_data *sdata, 1614struct sk_buff *ieee80211_build_probe_req(struct ieee80211_sub_if_data *sdata,
1574 u8 *dst, u32 ratemask, 1615 u8 *dst, u32 ratemask,
1575 struct ieee80211_channel *chan, 1616 struct ieee80211_channel *chan,
@@ -1582,10 +1623,7 @@ void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata, u8 *dst,
1582 u32 ratemask, bool directed, u32 tx_flags, 1623 u32 ratemask, bool directed, u32 tx_flags,
1583 struct ieee80211_channel *channel, bool scan); 1624 struct ieee80211_channel *channel, bool scan);
1584 1625
1585void ieee80211_sta_def_wmm_params(struct ieee80211_sub_if_data *sdata, 1626u32 ieee80211_sta_get_rates(struct ieee80211_sub_if_data *sdata,
1586 const size_t supp_rates_len,
1587 const u8 *supp_rates);
1588u32 ieee80211_sta_get_rates(struct ieee80211_local *local,
1589 struct ieee802_11_elems *elems, 1627 struct ieee802_11_elems *elems,
1590 enum ieee80211_band band, u32 *basic_rates); 1628 enum ieee80211_band band, u32 *basic_rates);
1591int __ieee80211_request_smps(struct ieee80211_sub_if_data *sdata, 1629int __ieee80211_request_smps(struct ieee80211_sub_if_data *sdata,
@@ -1602,6 +1640,9 @@ u8 *ieee80211_ie_build_ht_oper(u8 *pos, struct ieee80211_sta_ht_cap *ht_cap,
1602 u16 prot_mode); 1640 u16 prot_mode);
1603u8 *ieee80211_ie_build_vht_cap(u8 *pos, struct ieee80211_sta_vht_cap *vht_cap, 1641u8 *ieee80211_ie_build_vht_cap(u8 *pos, struct ieee80211_sta_vht_cap *vht_cap,
1604 u32 cap); 1642 u32 cap);
1643int ieee80211_parse_bitrates(struct cfg80211_chan_def *chandef,
1644 const struct ieee80211_supported_band *sband,
1645 const u8 *srates, int srates_len, u32 *rates);
1605int ieee80211_add_srates_ie(struct ieee80211_sub_if_data *sdata, 1646int ieee80211_add_srates_ie(struct ieee80211_sub_if_data *sdata,
1606 struct sk_buff *skb, bool need_basic, 1647 struct sk_buff *skb, bool need_basic,
1607 enum ieee80211_band band); 1648 enum ieee80211_band band);
@@ -1622,6 +1663,11 @@ int __must_check
1622ieee80211_vif_change_bandwidth(struct ieee80211_sub_if_data *sdata, 1663ieee80211_vif_change_bandwidth(struct ieee80211_sub_if_data *sdata,
1623 const struct cfg80211_chan_def *chandef, 1664 const struct cfg80211_chan_def *chandef,
1624 u32 *changed); 1665 u32 *changed);
1666/* NOTE: only use ieee80211_vif_change_channel() for channel switch */
1667int __must_check
1668ieee80211_vif_change_channel(struct ieee80211_sub_if_data *sdata,
1669 const struct cfg80211_chan_def *chandef,
1670 u32 *changed);
1625void ieee80211_vif_release_channel(struct ieee80211_sub_if_data *sdata); 1671void ieee80211_vif_release_channel(struct ieee80211_sub_if_data *sdata);
1626void ieee80211_vif_vlan_copy_chanctx(struct ieee80211_sub_if_data *sdata); 1672void ieee80211_vif_vlan_copy_chanctx(struct ieee80211_sub_if_data *sdata);
1627void ieee80211_vif_copy_chanctx_to_vlans(struct ieee80211_sub_if_data *sdata, 1673void ieee80211_vif_copy_chanctx_to_vlans(struct ieee80211_sub_if_data *sdata,
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index cc117591f678..fcecd633514e 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -54,7 +54,7 @@ bool __ieee80211_recalc_txpower(struct ieee80211_sub_if_data *sdata)
54 return false; 54 return false;
55 } 55 }
56 56
57 power = chanctx_conf->def.chan->max_power; 57 power = ieee80211_chandef_max_power(&chanctx_conf->def);
58 rcu_read_unlock(); 58 rcu_read_unlock();
59 59
60 if (sdata->user_power_level != IEEE80211_UNSET_POWER_LEVEL) 60 if (sdata->user_power_level != IEEE80211_UNSET_POWER_LEVEL)
@@ -274,6 +274,12 @@ static int ieee80211_check_concurrent_iface(struct ieee80211_sub_if_data *sdata,
274 if (iftype == NL80211_IFTYPE_ADHOC && 274 if (iftype == NL80211_IFTYPE_ADHOC &&
275 nsdata->vif.type == NL80211_IFTYPE_ADHOC) 275 nsdata->vif.type == NL80211_IFTYPE_ADHOC)
276 return -EBUSY; 276 return -EBUSY;
277 /*
278 * will not add another interface while any channel
279 * switch is active.
280 */
281 if (nsdata->vif.csa_active)
282 return -EBUSY;
277 283
278 /* 284 /*
279 * The remaining checks are only performed for interfaces 285 * The remaining checks are only performed for interfaces
@@ -302,12 +308,13 @@ static int ieee80211_check_concurrent_iface(struct ieee80211_sub_if_data *sdata,
302 return 0; 308 return 0;
303} 309}
304 310
305static int ieee80211_check_queues(struct ieee80211_sub_if_data *sdata) 311static int ieee80211_check_queues(struct ieee80211_sub_if_data *sdata,
312 enum nl80211_iftype iftype)
306{ 313{
307 int n_queues = sdata->local->hw.queues; 314 int n_queues = sdata->local->hw.queues;
308 int i; 315 int i;
309 316
310 if (sdata->vif.type != NL80211_IFTYPE_P2P_DEVICE) { 317 if (iftype != NL80211_IFTYPE_P2P_DEVICE) {
311 for (i = 0; i < IEEE80211_NUM_ACS; i++) { 318 for (i = 0; i < IEEE80211_NUM_ACS; i++) {
312 if (WARN_ON_ONCE(sdata->vif.hw_queue[i] == 319 if (WARN_ON_ONCE(sdata->vif.hw_queue[i] ==
313 IEEE80211_INVAL_HW_QUEUE)) 320 IEEE80211_INVAL_HW_QUEUE))
@@ -318,8 +325,9 @@ static int ieee80211_check_queues(struct ieee80211_sub_if_data *sdata)
318 } 325 }
319 } 326 }
320 327
321 if ((sdata->vif.type != NL80211_IFTYPE_AP && 328 if ((iftype != NL80211_IFTYPE_AP &&
322 sdata->vif.type != NL80211_IFTYPE_MESH_POINT) || 329 iftype != NL80211_IFTYPE_P2P_GO &&
330 iftype != NL80211_IFTYPE_MESH_POINT) ||
323 !(sdata->local->hw.flags & IEEE80211_HW_QUEUE_CONTROL)) { 331 !(sdata->local->hw.flags & IEEE80211_HW_QUEUE_CONTROL)) {
324 sdata->vif.cab_queue = IEEE80211_INVAL_HW_QUEUE; 332 sdata->vif.cab_queue = IEEE80211_INVAL_HW_QUEUE;
325 return 0; 333 return 0;
@@ -402,7 +410,7 @@ int ieee80211_add_virtual_monitor(struct ieee80211_local *local)
402 return ret; 410 return ret;
403 } 411 }
404 412
405 ret = ieee80211_check_queues(sdata); 413 ret = ieee80211_check_queues(sdata, NL80211_IFTYPE_MONITOR);
406 if (ret) { 414 if (ret) {
407 kfree(sdata); 415 kfree(sdata);
408 return ret; 416 return ret;
@@ -586,7 +594,8 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up)
586 res = drv_add_interface(local, sdata); 594 res = drv_add_interface(local, sdata);
587 if (res) 595 if (res)
588 goto err_stop; 596 goto err_stop;
589 res = ieee80211_check_queues(sdata); 597 res = ieee80211_check_queues(sdata,
598 ieee80211_vif_type_p2p(&sdata->vif));
590 if (res) 599 if (res)
591 goto err_del_interface; 600 goto err_del_interface;
592 } 601 }
@@ -804,6 +813,8 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata,
804 cancel_work_sync(&local->dynamic_ps_enable_work); 813 cancel_work_sync(&local->dynamic_ps_enable_work);
805 814
806 cancel_work_sync(&sdata->recalc_smps); 815 cancel_work_sync(&sdata->recalc_smps);
816 sdata->vif.csa_active = false;
817 cancel_work_sync(&sdata->csa_finalize_work);
807 818
808 cancel_delayed_work_sync(&sdata->dfs_cac_timer_work); 819 cancel_delayed_work_sync(&sdata->dfs_cac_timer_work);
809 820
@@ -1267,6 +1278,7 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata,
1267 skb_queue_head_init(&sdata->skb_queue); 1278 skb_queue_head_init(&sdata->skb_queue);
1268 INIT_WORK(&sdata->work, ieee80211_iface_work); 1279 INIT_WORK(&sdata->work, ieee80211_iface_work);
1269 INIT_WORK(&sdata->recalc_smps, ieee80211_recalc_smps_work); 1280 INIT_WORK(&sdata->recalc_smps, ieee80211_recalc_smps_work);
1281 INIT_WORK(&sdata->csa_finalize_work, ieee80211_csa_finalize_work);
1270 1282
1271 switch (type) { 1283 switch (type) {
1272 case NL80211_IFTYPE_P2P_GO: 1284 case NL80211_IFTYPE_P2P_GO:
@@ -1380,14 +1392,14 @@ static int ieee80211_runtime_change_iftype(struct ieee80211_sub_if_data *sdata,
1380 1392
1381 ret = drv_change_interface(local, sdata, internal_type, p2p); 1393 ret = drv_change_interface(local, sdata, internal_type, p2p);
1382 if (ret) 1394 if (ret)
1383 type = sdata->vif.type; 1395 type = ieee80211_vif_type_p2p(&sdata->vif);
1384 1396
1385 /* 1397 /*
1386 * Ignore return value here, there's not much we can do since 1398 * Ignore return value here, there's not much we can do since
1387 * the driver changed the interface type internally already. 1399 * the driver changed the interface type internally already.
1388 * The warnings will hopefully make driver authors fix it :-) 1400 * The warnings will hopefully make driver authors fix it :-)
1389 */ 1401 */
1390 ieee80211_check_queues(sdata); 1402 ieee80211_check_queues(sdata, type);
1391 1403
1392 ieee80211_setup_sdata(sdata, type); 1404 ieee80211_setup_sdata(sdata, type);
1393 1405
diff --git a/net/mac80211/key.c b/net/mac80211/key.c
index e39cc91d0cf1..620677e897bd 100644
--- a/net/mac80211/key.c
+++ b/net/mac80211/key.c
@@ -93,6 +93,9 @@ static int ieee80211_key_enable_hw_accel(struct ieee80211_key *key)
93 93
94 might_sleep(); 94 might_sleep();
95 95
96 if (key->flags & KEY_FLAG_TAINTED)
97 return -EINVAL;
98
96 if (!key->local->ops->set_key) 99 if (!key->local->ops->set_key)
97 goto out_unsupported; 100 goto out_unsupported;
98 101
@@ -455,6 +458,7 @@ int ieee80211_key_link(struct ieee80211_key *key,
455 struct ieee80211_sub_if_data *sdata, 458 struct ieee80211_sub_if_data *sdata,
456 struct sta_info *sta) 459 struct sta_info *sta)
457{ 460{
461 struct ieee80211_local *local = sdata->local;
458 struct ieee80211_key *old_key; 462 struct ieee80211_key *old_key;
459 int idx, ret; 463 int idx, ret;
460 bool pairwise; 464 bool pairwise;
@@ -484,10 +488,13 @@ int ieee80211_key_link(struct ieee80211_key *key,
484 488
485 ieee80211_debugfs_key_add(key); 489 ieee80211_debugfs_key_add(key);
486 490
487 ret = ieee80211_key_enable_hw_accel(key); 491 if (!local->wowlan) {
488 492 ret = ieee80211_key_enable_hw_accel(key);
489 if (ret) 493 if (ret)
490 ieee80211_key_free(key, true); 494 ieee80211_key_free(key, true);
495 } else {
496 ret = 0;
497 }
491 498
492 mutex_unlock(&sdata->local->key_mtx); 499 mutex_unlock(&sdata->local->key_mtx);
493 500
@@ -540,7 +547,7 @@ void ieee80211_iter_keys(struct ieee80211_hw *hw,
540 void *iter_data) 547 void *iter_data)
541{ 548{
542 struct ieee80211_local *local = hw_to_local(hw); 549 struct ieee80211_local *local = hw_to_local(hw);
543 struct ieee80211_key *key; 550 struct ieee80211_key *key, *tmp;
544 struct ieee80211_sub_if_data *sdata; 551 struct ieee80211_sub_if_data *sdata;
545 552
546 ASSERT_RTNL(); 553 ASSERT_RTNL();
@@ -548,13 +555,14 @@ void ieee80211_iter_keys(struct ieee80211_hw *hw,
548 mutex_lock(&local->key_mtx); 555 mutex_lock(&local->key_mtx);
549 if (vif) { 556 if (vif) {
550 sdata = vif_to_sdata(vif); 557 sdata = vif_to_sdata(vif);
551 list_for_each_entry(key, &sdata->key_list, list) 558 list_for_each_entry_safe(key, tmp, &sdata->key_list, list)
552 iter(hw, &sdata->vif, 559 iter(hw, &sdata->vif,
553 key->sta ? &key->sta->sta : NULL, 560 key->sta ? &key->sta->sta : NULL,
554 &key->conf, iter_data); 561 &key->conf, iter_data);
555 } else { 562 } else {
556 list_for_each_entry(sdata, &local->interfaces, list) 563 list_for_each_entry(sdata, &local->interfaces, list)
557 list_for_each_entry(key, &sdata->key_list, list) 564 list_for_each_entry_safe(key, tmp,
565 &sdata->key_list, list)
558 iter(hw, &sdata->vif, 566 iter(hw, &sdata->vif,
559 key->sta ? &key->sta->sta : NULL, 567 key->sta ? &key->sta->sta : NULL,
560 &key->conf, iter_data); 568 &key->conf, iter_data);
@@ -751,3 +759,135 @@ void ieee80211_get_key_rx_seq(struct ieee80211_key_conf *keyconf,
751 } 759 }
752} 760}
753EXPORT_SYMBOL(ieee80211_get_key_rx_seq); 761EXPORT_SYMBOL(ieee80211_get_key_rx_seq);
762
763void ieee80211_set_key_tx_seq(struct ieee80211_key_conf *keyconf,
764 struct ieee80211_key_seq *seq)
765{
766 struct ieee80211_key *key;
767 u64 pn64;
768
769 key = container_of(keyconf, struct ieee80211_key, conf);
770
771 switch (key->conf.cipher) {
772 case WLAN_CIPHER_SUITE_TKIP:
773 key->u.tkip.tx.iv32 = seq->tkip.iv32;
774 key->u.tkip.tx.iv16 = seq->tkip.iv16;
775 break;
776 case WLAN_CIPHER_SUITE_CCMP:
777 pn64 = (u64)seq->ccmp.pn[5] |
778 ((u64)seq->ccmp.pn[4] << 8) |
779 ((u64)seq->ccmp.pn[3] << 16) |
780 ((u64)seq->ccmp.pn[2] << 24) |
781 ((u64)seq->ccmp.pn[1] << 32) |
782 ((u64)seq->ccmp.pn[0] << 40);
783 atomic64_set(&key->u.ccmp.tx_pn, pn64);
784 break;
785 case WLAN_CIPHER_SUITE_AES_CMAC:
786 pn64 = (u64)seq->aes_cmac.pn[5] |
787 ((u64)seq->aes_cmac.pn[4] << 8) |
788 ((u64)seq->aes_cmac.pn[3] << 16) |
789 ((u64)seq->aes_cmac.pn[2] << 24) |
790 ((u64)seq->aes_cmac.pn[1] << 32) |
791 ((u64)seq->aes_cmac.pn[0] << 40);
792 atomic64_set(&key->u.aes_cmac.tx_pn, pn64);
793 break;
794 default:
795 WARN_ON(1);
796 break;
797 }
798}
799EXPORT_SYMBOL_GPL(ieee80211_set_key_tx_seq);
800
801void ieee80211_set_key_rx_seq(struct ieee80211_key_conf *keyconf,
802 int tid, struct ieee80211_key_seq *seq)
803{
804 struct ieee80211_key *key;
805 u8 *pn;
806
807 key = container_of(keyconf, struct ieee80211_key, conf);
808
809 switch (key->conf.cipher) {
810 case WLAN_CIPHER_SUITE_TKIP:
811 if (WARN_ON(tid < 0 || tid >= IEEE80211_NUM_TIDS))
812 return;
813 key->u.tkip.rx[tid].iv32 = seq->tkip.iv32;
814 key->u.tkip.rx[tid].iv16 = seq->tkip.iv16;
815 break;
816 case WLAN_CIPHER_SUITE_CCMP:
817 if (WARN_ON(tid < -1 || tid >= IEEE80211_NUM_TIDS))
818 return;
819 if (tid < 0)
820 pn = key->u.ccmp.rx_pn[IEEE80211_NUM_TIDS];
821 else
822 pn = key->u.ccmp.rx_pn[tid];
823 memcpy(pn, seq->ccmp.pn, IEEE80211_CCMP_PN_LEN);
824 break;
825 case WLAN_CIPHER_SUITE_AES_CMAC:
826 if (WARN_ON(tid != 0))
827 return;
828 pn = key->u.aes_cmac.rx_pn;
829 memcpy(pn, seq->aes_cmac.pn, IEEE80211_CMAC_PN_LEN);
830 break;
831 default:
832 WARN_ON(1);
833 break;
834 }
835}
836EXPORT_SYMBOL_GPL(ieee80211_set_key_rx_seq);
837
838void ieee80211_remove_key(struct ieee80211_key_conf *keyconf)
839{
840 struct ieee80211_key *key;
841
842 key = container_of(keyconf, struct ieee80211_key, conf);
843
844 assert_key_lock(key->local);
845
846 /*
847 * if key was uploaded, we assume the driver will/has remove(d)
848 * it, so adjust bookkeeping accordingly
849 */
850 if (key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE) {
851 key->flags &= ~KEY_FLAG_UPLOADED_TO_HARDWARE;
852
853 if (!((key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_MMIC) ||
854 (key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV) ||
855 (key->conf.flags & IEEE80211_KEY_FLAG_PUT_IV_SPACE)))
856 increment_tailroom_need_count(key->sdata);
857 }
858
859 ieee80211_key_free(key, false);
860}
861EXPORT_SYMBOL_GPL(ieee80211_remove_key);
862
863struct ieee80211_key_conf *
864ieee80211_gtk_rekey_add(struct ieee80211_vif *vif,
865 struct ieee80211_key_conf *keyconf)
866{
867 struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
868 struct ieee80211_local *local = sdata->local;
869 struct ieee80211_key *key;
870 int err;
871
872 if (WARN_ON(!local->wowlan))
873 return ERR_PTR(-EINVAL);
874
875 if (WARN_ON(vif->type != NL80211_IFTYPE_STATION))
876 return ERR_PTR(-EINVAL);
877
878 key = ieee80211_key_alloc(keyconf->cipher, keyconf->keyidx,
879 keyconf->keylen, keyconf->key,
880 0, NULL);
881 if (IS_ERR(key))
882 return ERR_PTR(PTR_ERR(key));
883
884 if (sdata->u.mgd.mfp != IEEE80211_MFP_DISABLED)
885 key->conf.flags |= IEEE80211_KEY_FLAG_RX_MGMT;
886
887 err = ieee80211_key_link(key, sdata, NULL);
888 if (err)
889 return ERR_PTR(err);
890
891 return &key->conf;
892}
893EXPORT_SYMBOL_GPL(ieee80211_gtk_rekey_add);
diff --git a/net/mac80211/led.c b/net/mac80211/led.c
index bcffa6903129..e2b836446af3 100644
--- a/net/mac80211/led.c
+++ b/net/mac80211/led.c
@@ -12,27 +12,22 @@
12#include <linux/export.h> 12#include <linux/export.h>
13#include "led.h" 13#include "led.h"
14 14
15#define MAC80211_BLINK_DELAY 50 /* ms */
16
15void ieee80211_led_rx(struct ieee80211_local *local) 17void ieee80211_led_rx(struct ieee80211_local *local)
16{ 18{
19 unsigned long led_delay = MAC80211_BLINK_DELAY;
17 if (unlikely(!local->rx_led)) 20 if (unlikely(!local->rx_led))
18 return; 21 return;
19 if (local->rx_led_counter++ % 2 == 0) 22 led_trigger_blink_oneshot(local->rx_led, &led_delay, &led_delay, 0);
20 led_trigger_event(local->rx_led, LED_OFF);
21 else
22 led_trigger_event(local->rx_led, LED_FULL);
23} 23}
24 24
25/* q is 1 if a packet was enqueued, 0 if it has been transmitted */ 25void ieee80211_led_tx(struct ieee80211_local *local)
26void ieee80211_led_tx(struct ieee80211_local *local, int q)
27{ 26{
27 unsigned long led_delay = MAC80211_BLINK_DELAY;
28 if (unlikely(!local->tx_led)) 28 if (unlikely(!local->tx_led))
29 return; 29 return;
30 /* not sure how this is supposed to work ... */ 30 led_trigger_blink_oneshot(local->tx_led, &led_delay, &led_delay, 0);
31 local->tx_led_counter += 2*q-1;
32 if (local->tx_led_counter % 2 == 0)
33 led_trigger_event(local->tx_led, LED_OFF);
34 else
35 led_trigger_event(local->tx_led, LED_FULL);
36} 31}
37 32
38void ieee80211_led_assoc(struct ieee80211_local *local, bool associated) 33void ieee80211_led_assoc(struct ieee80211_local *local, bool associated)
diff --git a/net/mac80211/led.h b/net/mac80211/led.h
index e0275d9befa8..89f4344f13b9 100644
--- a/net/mac80211/led.h
+++ b/net/mac80211/led.h
@@ -13,7 +13,7 @@
13 13
14#ifdef CONFIG_MAC80211_LEDS 14#ifdef CONFIG_MAC80211_LEDS
15void ieee80211_led_rx(struct ieee80211_local *local); 15void ieee80211_led_rx(struct ieee80211_local *local);
16void ieee80211_led_tx(struct ieee80211_local *local, int q); 16void ieee80211_led_tx(struct ieee80211_local *local);
17void ieee80211_led_assoc(struct ieee80211_local *local, 17void ieee80211_led_assoc(struct ieee80211_local *local,
18 bool associated); 18 bool associated);
19void ieee80211_led_radio(struct ieee80211_local *local, 19void ieee80211_led_radio(struct ieee80211_local *local,
@@ -27,7 +27,7 @@ void ieee80211_mod_tpt_led_trig(struct ieee80211_local *local,
27static inline void ieee80211_led_rx(struct ieee80211_local *local) 27static inline void ieee80211_led_rx(struct ieee80211_local *local)
28{ 28{
29} 29}
30static inline void ieee80211_led_tx(struct ieee80211_local *local, int q) 30static inline void ieee80211_led_tx(struct ieee80211_local *local)
31{ 31{
32} 32}
33static inline void ieee80211_led_assoc(struct ieee80211_local *local, 33static inline void ieee80211_led_assoc(struct ieee80211_local *local,
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 091088ac7890..21d5d44444d0 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -102,17 +102,8 @@ static u32 ieee80211_hw_conf_chan(struct ieee80211_local *local)
102 102
103 offchannel_flag = local->hw.conf.flags & IEEE80211_CONF_OFFCHANNEL; 103 offchannel_flag = local->hw.conf.flags & IEEE80211_CONF_OFFCHANNEL;
104 104
105 if (local->scan_channel) { 105 if (local->scan_chandef.chan) {
106 chandef.chan = local->scan_channel; 106 chandef = local->scan_chandef;
107 /* If scanning on oper channel, use whatever channel-type
108 * is currently in use.
109 */
110 if (chandef.chan == local->_oper_chandef.chan) {
111 chandef = local->_oper_chandef;
112 } else {
113 chandef.width = NL80211_CHAN_WIDTH_20_NOHT;
114 chandef.center_freq1 = chandef.chan->center_freq;
115 }
116 } else if (local->tmp_channel) { 107 } else if (local->tmp_channel) {
117 chandef.chan = local->tmp_channel; 108 chandef.chan = local->tmp_channel;
118 chandef.width = NL80211_CHAN_WIDTH_20_NOHT; 109 chandef.width = NL80211_CHAN_WIDTH_20_NOHT;
@@ -151,7 +142,7 @@ static u32 ieee80211_hw_conf_chan(struct ieee80211_local *local)
151 changed |= IEEE80211_CONF_CHANGE_SMPS; 142 changed |= IEEE80211_CONF_CHANGE_SMPS;
152 } 143 }
153 144
154 power = chandef.chan->max_power; 145 power = ieee80211_chandef_max_power(&chandef);
155 146
156 rcu_read_lock(); 147 rcu_read_lock();
157 list_for_each_entry_rcu(sdata, &local->interfaces, list) { 148 list_for_each_entry_rcu(sdata, &local->interfaces, list) {
@@ -901,9 +892,6 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
901 if (!local->ops->remain_on_channel) 892 if (!local->ops->remain_on_channel)
902 local->hw.wiphy->max_remain_on_channel_duration = 5000; 893 local->hw.wiphy->max_remain_on_channel_duration = 5000;
903 894
904 if (local->ops->sched_scan_start)
905 local->hw.wiphy->flags |= WIPHY_FLAG_SUPPORTS_SCHED_SCAN;
906
907 /* mac80211 based drivers don't support internal TDLS setup */ 895 /* mac80211 based drivers don't support internal TDLS setup */
908 if (local->hw.wiphy->flags & WIPHY_FLAG_SUPPORTS_TDLS) 896 if (local->hw.wiphy->flags & WIPHY_FLAG_SUPPORTS_TDLS)
909 local->hw.wiphy->flags |= WIPHY_FLAG_TDLS_EXTERNAL_SETUP; 897 local->hw.wiphy->flags |= WIPHY_FLAG_TDLS_EXTERNAL_SETUP;
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index 447f41bbe744..707ac61d63e5 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -62,7 +62,6 @@ bool mesh_matches_local(struct ieee80211_sub_if_data *sdata,
62 struct ieee802_11_elems *ie) 62 struct ieee802_11_elems *ie)
63{ 63{
64 struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; 64 struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
65 struct ieee80211_local *local = sdata->local;
66 u32 basic_rates = 0; 65 u32 basic_rates = 0;
67 struct cfg80211_chan_def sta_chan_def; 66 struct cfg80211_chan_def sta_chan_def;
68 67
@@ -85,7 +84,7 @@ bool mesh_matches_local(struct ieee80211_sub_if_data *sdata,
85 (ifmsh->mesh_auth_id == ie->mesh_config->meshconf_auth))) 84 (ifmsh->mesh_auth_id == ie->mesh_config->meshconf_auth)))
86 return false; 85 return false;
87 86
88 ieee80211_sta_get_rates(local, ie, ieee80211_get_sdata_band(sdata), 87 ieee80211_sta_get_rates(sdata, ie, ieee80211_get_sdata_band(sdata),
89 &basic_rates); 88 &basic_rates);
90 89
91 if (sdata->vif.bss_conf.basic_rates != basic_rates) 90 if (sdata->vif.bss_conf.basic_rates != basic_rates)
@@ -274,7 +273,9 @@ int mesh_add_meshconf_ie(struct ieee80211_sub_if_data *sdata,
274 neighbors = min_t(int, neighbors, IEEE80211_MAX_MESH_PEERINGS); 273 neighbors = min_t(int, neighbors, IEEE80211_MAX_MESH_PEERINGS);
275 *pos++ = neighbors << 1; 274 *pos++ = neighbors << 1;
276 /* Mesh capability */ 275 /* Mesh capability */
277 *pos = IEEE80211_MESHCONF_CAPAB_FORWARDING; 276 *pos = 0x00;
277 *pos |= ifmsh->mshcfg.dot11MeshForwarding ?
278 IEEE80211_MESHCONF_CAPAB_FORWARDING : 0x00;
278 *pos |= ifmsh->accepting_plinks ? 279 *pos |= ifmsh->accepting_plinks ?
279 IEEE80211_MESHCONF_CAPAB_ACCEPT_PLINKS : 0x00; 280 IEEE80211_MESHCONF_CAPAB_ACCEPT_PLINKS : 0x00;
280 /* Mesh PS mode. See IEEE802.11-2012 8.4.2.100.8 */ 281 /* Mesh PS mode. See IEEE802.11-2012 8.4.2.100.8 */
@@ -831,6 +832,9 @@ ieee80211_mesh_rx_probe_req(struct ieee80211_sub_if_data *sdata,
831 832
832 ieee802_11_parse_elems(pos, len - baselen, false, &elems); 833 ieee802_11_parse_elems(pos, len - baselen, false, &elems);
833 834
835 if (!elems.mesh_id)
836 return;
837
834 /* 802.11-2012 10.1.4.3.2 */ 838 /* 802.11-2012 10.1.4.3.2 */
835 if ((!ether_addr_equal(mgmt->da, sdata->vif.addr) && 839 if ((!ether_addr_equal(mgmt->da, sdata->vif.addr) &&
836 !is_broadcast_ether_addr(mgmt->da)) || 840 !is_broadcast_ether_addr(mgmt->da)) ||
diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c
index 02c05fa15c20..6b65d5055f5b 100644
--- a/net/mac80211/mesh_plink.c
+++ b/net/mac80211/mesh_plink.c
@@ -379,7 +379,7 @@ static void mesh_sta_info_init(struct ieee80211_sub_if_data *sdata,
379 u32 rates, basic_rates = 0, changed = 0; 379 u32 rates, basic_rates = 0, changed = 0;
380 380
381 sband = local->hw.wiphy->bands[band]; 381 sband = local->hw.wiphy->bands[band];
382 rates = ieee80211_sta_get_rates(local, elems, band, &basic_rates); 382 rates = ieee80211_sta_get_rates(sdata, elems, band, &basic_rates);
383 383
384 spin_lock_bh(&sta->lock); 384 spin_lock_bh(&sta->lock);
385 sta->last_rx = jiffies; 385 sta->last_rx = jiffies;
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index cc9e02d79b55..86e4ad56b573 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -489,27 +489,6 @@ static int ieee80211_config_bw(struct ieee80211_sub_if_data *sdata,
489 489
490/* frame sending functions */ 490/* frame sending functions */
491 491
492static int ieee80211_compatible_rates(const u8 *supp_rates, int supp_rates_len,
493 struct ieee80211_supported_band *sband,
494 u32 *rates)
495{
496 int i, j, count;
497 *rates = 0;
498 count = 0;
499 for (i = 0; i < supp_rates_len; i++) {
500 int rate = (supp_rates[i] & 0x7F) * 5;
501
502 for (j = 0; j < sband->n_bitrates; j++)
503 if (sband->bitrates[j].bitrate == rate) {
504 *rates |= BIT(j);
505 count++;
506 break;
507 }
508 }
509
510 return count;
511}
512
513static void ieee80211_add_ht_ie(struct ieee80211_sub_if_data *sdata, 492static void ieee80211_add_ht_ie(struct ieee80211_sub_if_data *sdata,
514 struct sk_buff *skb, u8 ap_ht_param, 493 struct sk_buff *skb, u8 ap_ht_param,
515 struct ieee80211_supported_band *sband, 494 struct ieee80211_supported_band *sband,
@@ -628,12 +607,12 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
628 struct ieee80211_mgmt *mgmt; 607 struct ieee80211_mgmt *mgmt;
629 u8 *pos, qos_info; 608 u8 *pos, qos_info;
630 size_t offset = 0, noffset; 609 size_t offset = 0, noffset;
631 int i, count, rates_len, supp_rates_len; 610 int i, count, rates_len, supp_rates_len, shift;
632 u16 capab; 611 u16 capab;
633 struct ieee80211_supported_band *sband; 612 struct ieee80211_supported_band *sband;
634 struct ieee80211_chanctx_conf *chanctx_conf; 613 struct ieee80211_chanctx_conf *chanctx_conf;
635 struct ieee80211_channel *chan; 614 struct ieee80211_channel *chan;
636 u32 rates = 0; 615 u32 rate_flags, rates = 0;
637 616
638 sdata_assert_lock(sdata); 617 sdata_assert_lock(sdata);
639 618
@@ -644,8 +623,10 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
644 return; 623 return;
645 } 624 }
646 chan = chanctx_conf->def.chan; 625 chan = chanctx_conf->def.chan;
626 rate_flags = ieee80211_chandef_rate_flags(&chanctx_conf->def);
647 rcu_read_unlock(); 627 rcu_read_unlock();
648 sband = local->hw.wiphy->bands[chan->band]; 628 sband = local->hw.wiphy->bands[chan->band];
629 shift = ieee80211_vif_get_shift(&sdata->vif);
649 630
650 if (assoc_data->supp_rates_len) { 631 if (assoc_data->supp_rates_len) {
651 /* 632 /*
@@ -654,17 +635,24 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
654 * in the association request (e.g. D-Link DAP 1353 in 635 * in the association request (e.g. D-Link DAP 1353 in
655 * b-only mode)... 636 * b-only mode)...
656 */ 637 */
657 rates_len = ieee80211_compatible_rates(assoc_data->supp_rates, 638 rates_len = ieee80211_parse_bitrates(&chanctx_conf->def, sband,
658 assoc_data->supp_rates_len, 639 assoc_data->supp_rates,
659 sband, &rates); 640 assoc_data->supp_rates_len,
641 &rates);
660 } else { 642 } else {
661 /* 643 /*
662 * In case AP not provide any supported rates information 644 * In case AP not provide any supported rates information
663 * before association, we send information element(s) with 645 * before association, we send information element(s) with
664 * all rates that we support. 646 * all rates that we support.
665 */ 647 */
666 rates = ~0; 648 rates_len = 0;
667 rates_len = sband->n_bitrates; 649 for (i = 0; i < sband->n_bitrates; i++) {
650 if ((rate_flags & sband->bitrates[i].flags)
651 != rate_flags)
652 continue;
653 rates |= BIT(i);
654 rates_len++;
655 }
668 } 656 }
669 657
670 skb = alloc_skb(local->hw.extra_tx_headroom + 658 skb = alloc_skb(local->hw.extra_tx_headroom +
@@ -741,8 +729,9 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
741 count = 0; 729 count = 0;
742 for (i = 0; i < sband->n_bitrates; i++) { 730 for (i = 0; i < sband->n_bitrates; i++) {
743 if (BIT(i) & rates) { 731 if (BIT(i) & rates) {
744 int rate = sband->bitrates[i].bitrate; 732 int rate = DIV_ROUND_UP(sband->bitrates[i].bitrate,
745 *pos++ = (u8) (rate / 5); 733 5 * (1 << shift));
734 *pos++ = (u8) rate;
746 if (++count == 8) 735 if (++count == 8)
747 break; 736 break;
748 } 737 }
@@ -755,8 +744,10 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
755 744
756 for (i++; i < sband->n_bitrates; i++) { 745 for (i++; i < sband->n_bitrates; i++) {
757 if (BIT(i) & rates) { 746 if (BIT(i) & rates) {
758 int rate = sband->bitrates[i].bitrate; 747 int rate;
759 *pos++ = (u8) (rate / 5); 748 rate = DIV_ROUND_UP(sband->bitrates[i].bitrate,
749 5 * (1 << shift));
750 *pos++ = (u8) rate;
760 } 751 }
761 } 752 }
762 } 753 }
@@ -767,7 +758,8 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
767 *pos++ = WLAN_EID_PWR_CAPABILITY; 758 *pos++ = WLAN_EID_PWR_CAPABILITY;
768 *pos++ = 2; 759 *pos++ = 2;
769 *pos++ = 0; /* min tx power */ 760 *pos++ = 0; /* min tx power */
770 *pos++ = chan->max_power; /* max tx power */ 761 /* max tx power */
762 *pos++ = ieee80211_chandef_max_power(&chanctx_conf->def);
771 763
772 /* 2. supported channels */ 764 /* 2. supported channels */
773 /* TODO: get this in reg domain format */ 765 /* TODO: get this in reg domain format */
@@ -1121,6 +1113,15 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata,
1121 case -1: 1113 case -1:
1122 cfg80211_chandef_create(&new_chandef, new_chan, 1114 cfg80211_chandef_create(&new_chandef, new_chan,
1123 NL80211_CHAN_NO_HT); 1115 NL80211_CHAN_NO_HT);
1116 /* keep width for 5/10 MHz channels */
1117 switch (sdata->vif.bss_conf.chandef.width) {
1118 case NL80211_CHAN_WIDTH_5:
1119 case NL80211_CHAN_WIDTH_10:
1120 new_chandef.width = sdata->vif.bss_conf.chandef.width;
1121 break;
1122 default:
1123 break;
1124 }
1124 break; 1125 break;
1125 } 1126 }
1126 1127
@@ -2443,15 +2444,16 @@ static void ieee80211_get_rates(struct ieee80211_supported_band *sband,
2443 u8 *supp_rates, unsigned int supp_rates_len, 2444 u8 *supp_rates, unsigned int supp_rates_len,
2444 u32 *rates, u32 *basic_rates, 2445 u32 *rates, u32 *basic_rates,
2445 bool *have_higher_than_11mbit, 2446 bool *have_higher_than_11mbit,
2446 int *min_rate, int *min_rate_index) 2447 int *min_rate, int *min_rate_index,
2448 int shift, u32 rate_flags)
2447{ 2449{
2448 int i, j; 2450 int i, j;
2449 2451
2450 for (i = 0; i < supp_rates_len; i++) { 2452 for (i = 0; i < supp_rates_len; i++) {
2451 int rate = (supp_rates[i] & 0x7f) * 5; 2453 int rate = supp_rates[i] & 0x7f;
2452 bool is_basic = !!(supp_rates[i] & 0x80); 2454 bool is_basic = !!(supp_rates[i] & 0x80);
2453 2455
2454 if (rate > 110) 2456 if ((rate * 5 * (1 << shift)) > 110)
2455 *have_higher_than_11mbit = true; 2457 *have_higher_than_11mbit = true;
2456 2458
2457 /* 2459 /*
@@ -2467,12 +2469,20 @@ static void ieee80211_get_rates(struct ieee80211_supported_band *sband,
2467 continue; 2469 continue;
2468 2470
2469 for (j = 0; j < sband->n_bitrates; j++) { 2471 for (j = 0; j < sband->n_bitrates; j++) {
2470 if (sband->bitrates[j].bitrate == rate) { 2472 struct ieee80211_rate *br;
2473 int brate;
2474
2475 br = &sband->bitrates[j];
2476 if ((rate_flags & br->flags) != rate_flags)
2477 continue;
2478
2479 brate = DIV_ROUND_UP(br->bitrate, (1 << shift) * 5);
2480 if (brate == rate) {
2471 *rates |= BIT(j); 2481 *rates |= BIT(j);
2472 if (is_basic) 2482 if (is_basic)
2473 *basic_rates |= BIT(j); 2483 *basic_rates |= BIT(j);
2474 if (rate < *min_rate) { 2484 if ((rate * 5) < *min_rate) {
2475 *min_rate = rate; 2485 *min_rate = rate * 5;
2476 *min_rate_index = j; 2486 *min_rate_index = j;
2477 } 2487 }
2478 break; 2488 break;
@@ -2851,14 +2861,6 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
2851 ieee80211_rx_bss_put(local, bss); 2861 ieee80211_rx_bss_put(local, bss);
2852 sdata->vif.bss_conf.beacon_rate = bss->beacon_rate; 2862 sdata->vif.bss_conf.beacon_rate = bss->beacon_rate;
2853 } 2863 }
2854
2855 if (!sdata->u.mgd.associated ||
2856 !ether_addr_equal(mgmt->bssid, sdata->u.mgd.associated->bssid))
2857 return;
2858
2859 ieee80211_sta_process_chanswitch(sdata, rx_status->mactime,
2860 elems, true);
2861
2862} 2864}
2863 2865
2864 2866
@@ -3147,6 +3149,9 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
3147 3149
3148 ieee80211_rx_bss_info(sdata, mgmt, len, rx_status, &elems); 3150 ieee80211_rx_bss_info(sdata, mgmt, len, rx_status, &elems);
3149 3151
3152 ieee80211_sta_process_chanswitch(sdata, rx_status->mactime,
3153 &elems, true);
3154
3150 if (ieee80211_sta_wmm_params(local, sdata, elems.wmm_param, 3155 if (ieee80211_sta_wmm_params(local, sdata, elems.wmm_param,
3151 elems.wmm_param_len)) 3156 elems.wmm_param_len))
3152 changed |= BSS_CHANGED_QOS; 3157 changed |= BSS_CHANGED_QOS;
@@ -3902,27 +3907,40 @@ static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata,
3902 if (!new_sta) 3907 if (!new_sta)
3903 return -ENOMEM; 3908 return -ENOMEM;
3904 } 3909 }
3905
3906 if (new_sta) { 3910 if (new_sta) {
3907 u32 rates = 0, basic_rates = 0; 3911 u32 rates = 0, basic_rates = 0;
3908 bool have_higher_than_11mbit; 3912 bool have_higher_than_11mbit;
3909 int min_rate = INT_MAX, min_rate_index = -1; 3913 int min_rate = INT_MAX, min_rate_index = -1;
3914 struct ieee80211_chanctx_conf *chanctx_conf;
3910 struct ieee80211_supported_band *sband; 3915 struct ieee80211_supported_band *sband;
3911 const struct cfg80211_bss_ies *ies; 3916 const struct cfg80211_bss_ies *ies;
3917 int shift;
3918 u32 rate_flags;
3912 3919
3913 sband = local->hw.wiphy->bands[cbss->channel->band]; 3920 sband = local->hw.wiphy->bands[cbss->channel->band];
3914 3921
3915 err = ieee80211_prep_channel(sdata, cbss); 3922 err = ieee80211_prep_channel(sdata, cbss);
3916 if (err) { 3923 if (err) {
3917 sta_info_free(local, new_sta); 3924 sta_info_free(local, new_sta);
3918 return err; 3925 return -EINVAL;
3926 }
3927 shift = ieee80211_vif_get_shift(&sdata->vif);
3928
3929 rcu_read_lock();
3930 chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf);
3931 if (WARN_ON(!chanctx_conf)) {
3932 rcu_read_unlock();
3933 return -EINVAL;
3919 } 3934 }
3935 rate_flags = ieee80211_chandef_rate_flags(&chanctx_conf->def);
3936 rcu_read_unlock();
3920 3937
3921 ieee80211_get_rates(sband, bss->supp_rates, 3938 ieee80211_get_rates(sband, bss->supp_rates,
3922 bss->supp_rates_len, 3939 bss->supp_rates_len,
3923 &rates, &basic_rates, 3940 &rates, &basic_rates,
3924 &have_higher_than_11mbit, 3941 &have_higher_than_11mbit,
3925 &min_rate, &min_rate_index); 3942 &min_rate, &min_rate_index,
3943 shift, rate_flags);
3926 3944
3927 /* 3945 /*
3928 * This used to be a workaround for basic rates missing 3946 * This used to be a workaround for basic rates missing
diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c
index 30d58d2d13e2..e126605cec66 100644
--- a/net/mac80211/rate.c
+++ b/net/mac80211/rate.c
@@ -210,7 +210,7 @@ static bool rc_no_data_or_no_ack_use_min(struct ieee80211_tx_rate_control *txrc)
210 !ieee80211_is_data(fc); 210 !ieee80211_is_data(fc);
211} 211}
212 212
213static void rc_send_low_broadcast(s8 *idx, u32 basic_rates, 213static void rc_send_low_basicrate(s8 *idx, u32 basic_rates,
214 struct ieee80211_supported_band *sband) 214 struct ieee80211_supported_band *sband)
215{ 215{
216 u8 i; 216 u8 i;
@@ -232,37 +232,28 @@ static void rc_send_low_broadcast(s8 *idx, u32 basic_rates,
232 /* could not find a basic rate; use original selection */ 232 /* could not find a basic rate; use original selection */
233} 233}
234 234
235static inline s8 235static void __rate_control_send_low(struct ieee80211_hw *hw,
236rate_lowest_non_cck_index(struct ieee80211_supported_band *sband, 236 struct ieee80211_supported_band *sband,
237 struct ieee80211_sta *sta) 237 struct ieee80211_sta *sta,
238 struct ieee80211_tx_info *info)
238{ 239{
239 int i; 240 int i;
241 u32 rate_flags =
242 ieee80211_chandef_rate_flags(&hw->conf.chandef);
243
244 if ((sband->band == IEEE80211_BAND_2GHZ) &&
245 (info->flags & IEEE80211_TX_CTL_NO_CCK_RATE))
246 rate_flags |= IEEE80211_RATE_ERP_G;
240 247
248 info->control.rates[0].idx = 0;
241 for (i = 0; i < sband->n_bitrates; i++) { 249 for (i = 0; i < sband->n_bitrates; i++) {
242 struct ieee80211_rate *srate = &sband->bitrates[i]; 250 if (!rate_supported(sta, sband->band, i))
243 if ((srate->bitrate == 10) || (srate->bitrate == 20) ||
244 (srate->bitrate == 55) || (srate->bitrate == 110))
245 continue; 251 continue;
246 252
247 if (rate_supported(sta, sband->band, i)) 253 info->control.rates[0].idx = i;
248 return i; 254 break;
249 } 255 }
250 256 WARN_ON_ONCE(i == sband->n_bitrates);
251 /* No matching rate found */
252 return 0;
253}
254
255static void __rate_control_send_low(struct ieee80211_hw *hw,
256 struct ieee80211_supported_band *sband,
257 struct ieee80211_sta *sta,
258 struct ieee80211_tx_info *info)
259{
260 if ((sband->band != IEEE80211_BAND_2GHZ) ||
261 !(info->flags & IEEE80211_TX_CTL_NO_CCK_RATE))
262 info->control.rates[0].idx = rate_lowest_index(sband, sta);
263 else
264 info->control.rates[0].idx =
265 rate_lowest_non_cck_index(sband, sta);
266 257
267 info->control.rates[0].count = 258 info->control.rates[0].count =
268 (info->flags & IEEE80211_TX_CTL_NO_ACK) ? 259 (info->flags & IEEE80211_TX_CTL_NO_ACK) ?
@@ -272,28 +263,37 @@ static void __rate_control_send_low(struct ieee80211_hw *hw,
272} 263}
273 264
274 265
275bool rate_control_send_low(struct ieee80211_sta *sta, 266bool rate_control_send_low(struct ieee80211_sta *pubsta,
276 void *priv_sta, 267 void *priv_sta,
277 struct ieee80211_tx_rate_control *txrc) 268 struct ieee80211_tx_rate_control *txrc)
278{ 269{
279 struct ieee80211_tx_info *info = IEEE80211_SKB_CB(txrc->skb); 270 struct ieee80211_tx_info *info = IEEE80211_SKB_CB(txrc->skb);
280 struct ieee80211_supported_band *sband = txrc->sband; 271 struct ieee80211_supported_band *sband = txrc->sband;
272 struct sta_info *sta;
281 int mcast_rate; 273 int mcast_rate;
274 bool use_basicrate = false;
282 275
283 if (!sta || !priv_sta || rc_no_data_or_no_ack_use_min(txrc)) { 276 if (!pubsta || !priv_sta || rc_no_data_or_no_ack_use_min(txrc)) {
284 __rate_control_send_low(txrc->hw, sband, sta, info); 277 __rate_control_send_low(txrc->hw, sband, pubsta, info);
285 278
286 if (!sta && txrc->bss) { 279 if (!pubsta && txrc->bss) {
287 mcast_rate = txrc->bss_conf->mcast_rate[sband->band]; 280 mcast_rate = txrc->bss_conf->mcast_rate[sband->band];
288 if (mcast_rate > 0) { 281 if (mcast_rate > 0) {
289 info->control.rates[0].idx = mcast_rate - 1; 282 info->control.rates[0].idx = mcast_rate - 1;
290 return true; 283 return true;
291 } 284 }
285 use_basicrate = true;
286 } else if (pubsta) {
287 sta = container_of(pubsta, struct sta_info, sta);
288 if (ieee80211_vif_is_mesh(&sta->sdata->vif))
289 use_basicrate = true;
290 }
292 291
293 rc_send_low_broadcast(&info->control.rates[0].idx, 292 if (use_basicrate)
293 rc_send_low_basicrate(&info->control.rates[0].idx,
294 txrc->bss_conf->basic_rates, 294 txrc->bss_conf->basic_rates,
295 sband); 295 sband);
296 } 296
297 return true; 297 return true;
298 } 298 }
299 return false; 299 return false;
@@ -585,6 +585,7 @@ static void rate_control_apply_mask(struct ieee80211_sub_if_data *sdata,
585 u8 mcs_mask[IEEE80211_HT_MCS_MASK_LEN]; 585 u8 mcs_mask[IEEE80211_HT_MCS_MASK_LEN];
586 bool has_mcs_mask; 586 bool has_mcs_mask;
587 u32 mask; 587 u32 mask;
588 u32 rate_flags;
588 int i; 589 int i;
589 590
590 /* 591 /*
@@ -594,6 +595,12 @@ static void rate_control_apply_mask(struct ieee80211_sub_if_data *sdata,
594 */ 595 */
595 mask = sdata->rc_rateidx_mask[info->band]; 596 mask = sdata->rc_rateidx_mask[info->band];
596 has_mcs_mask = sdata->rc_has_mcs_mask[info->band]; 597 has_mcs_mask = sdata->rc_has_mcs_mask[info->band];
598 rate_flags =
599 ieee80211_chandef_rate_flags(&sdata->vif.bss_conf.chandef);
600 for (i = 0; i < sband->n_bitrates; i++)
601 if ((rate_flags & sband->bitrates[i].flags) != rate_flags)
602 mask &= ~BIT(i);
603
597 if (mask == (1 << sband->n_bitrates) - 1 && !has_mcs_mask) 604 if (mask == (1 << sband->n_bitrates) - 1 && !has_mcs_mask)
598 return; 605 return;
599 606
diff --git a/net/mac80211/rate.h b/net/mac80211/rate.h
index d35a5dd3fb13..5dedc56c94db 100644
--- a/net/mac80211/rate.h
+++ b/net/mac80211/rate.h
@@ -66,11 +66,12 @@ static inline void rate_control_rate_init(struct sta_info *sta)
66 } 66 }
67 67
68 sband = local->hw.wiphy->bands[chanctx_conf->def.chan->band]; 68 sband = local->hw.wiphy->bands[chanctx_conf->def.chan->band];
69 rcu_read_unlock();
70 69
71 ieee80211_sta_set_rx_nss(sta); 70 ieee80211_sta_set_rx_nss(sta);
72 71
73 ref->ops->rate_init(ref->priv, sband, ista, priv_sta); 72 ref->ops->rate_init(ref->priv, sband, &chanctx_conf->def, ista,
73 priv_sta);
74 rcu_read_unlock();
74 set_sta_flag(sta, WLAN_STA_RATE_CONTROL); 75 set_sta_flag(sta, WLAN_STA_RATE_CONTROL);
75} 76}
76 77
@@ -81,10 +82,21 @@ static inline void rate_control_rate_update(struct ieee80211_local *local,
81 struct rate_control_ref *ref = local->rate_ctrl; 82 struct rate_control_ref *ref = local->rate_ctrl;
82 struct ieee80211_sta *ista = &sta->sta; 83 struct ieee80211_sta *ista = &sta->sta;
83 void *priv_sta = sta->rate_ctrl_priv; 84 void *priv_sta = sta->rate_ctrl_priv;
85 struct ieee80211_chanctx_conf *chanctx_conf;
86
87 if (ref && ref->ops->rate_update) {
88 rcu_read_lock();
84 89
85 if (ref && ref->ops->rate_update) 90 chanctx_conf = rcu_dereference(sta->sdata->vif.chanctx_conf);
86 ref->ops->rate_update(ref->priv, sband, ista, 91 if (WARN_ON(!chanctx_conf)) {
87 priv_sta, changed); 92 rcu_read_unlock();
93 return;
94 }
95
96 ref->ops->rate_update(ref->priv, sband, &chanctx_conf->def,
97 ista, priv_sta, changed);
98 rcu_read_unlock();
99 }
88 drv_sta_rc_update(local, sta->sdata, &sta->sta, changed); 100 drv_sta_rc_update(local, sta->sdata, &sta->sta, changed);
89} 101}
90 102
diff --git a/net/mac80211/rc80211_minstrel.c b/net/mac80211/rc80211_minstrel.c
index e6512e2ffd20..8b5f7ef7c0c9 100644
--- a/net/mac80211/rc80211_minstrel.c
+++ b/net/mac80211/rc80211_minstrel.c
@@ -383,14 +383,18 @@ minstrel_get_rate(void *priv, struct ieee80211_sta *sta,
383static void 383static void
384calc_rate_durations(enum ieee80211_band band, 384calc_rate_durations(enum ieee80211_band band,
385 struct minstrel_rate *d, 385 struct minstrel_rate *d,
386 struct ieee80211_rate *rate) 386 struct ieee80211_rate *rate,
387 struct cfg80211_chan_def *chandef)
387{ 388{
388 int erp = !!(rate->flags & IEEE80211_RATE_ERP_G); 389 int erp = !!(rate->flags & IEEE80211_RATE_ERP_G);
390 int shift = ieee80211_chandef_get_shift(chandef);
389 391
390 d->perfect_tx_time = ieee80211_frame_duration(band, 1200, 392 d->perfect_tx_time = ieee80211_frame_duration(band, 1200,
391 rate->bitrate, erp, 1); 393 DIV_ROUND_UP(rate->bitrate, 1 << shift), erp, 1,
394 shift);
392 d->ack_time = ieee80211_frame_duration(band, 10, 395 d->ack_time = ieee80211_frame_duration(band, 10,
393 rate->bitrate, erp, 1); 396 DIV_ROUND_UP(rate->bitrate, 1 << shift), erp, 1,
397 shift);
394} 398}
395 399
396static void 400static void
@@ -418,21 +422,25 @@ init_sample_table(struct minstrel_sta_info *mi)
418 422
419static void 423static void
420minstrel_rate_init(void *priv, struct ieee80211_supported_band *sband, 424minstrel_rate_init(void *priv, struct ieee80211_supported_band *sband,
421 struct ieee80211_sta *sta, void *priv_sta) 425 struct cfg80211_chan_def *chandef,
426 struct ieee80211_sta *sta, void *priv_sta)
422{ 427{
423 struct minstrel_sta_info *mi = priv_sta; 428 struct minstrel_sta_info *mi = priv_sta;
424 struct minstrel_priv *mp = priv; 429 struct minstrel_priv *mp = priv;
425 struct ieee80211_rate *ctl_rate; 430 struct ieee80211_rate *ctl_rate;
426 unsigned int i, n = 0; 431 unsigned int i, n = 0;
427 unsigned int t_slot = 9; /* FIXME: get real slot time */ 432 unsigned int t_slot = 9; /* FIXME: get real slot time */
433 u32 rate_flags;
428 434
429 mi->sta = sta; 435 mi->sta = sta;
430 mi->lowest_rix = rate_lowest_index(sband, sta); 436 mi->lowest_rix = rate_lowest_index(sband, sta);
431 ctl_rate = &sband->bitrates[mi->lowest_rix]; 437 ctl_rate = &sband->bitrates[mi->lowest_rix];
432 mi->sp_ack_dur = ieee80211_frame_duration(sband->band, 10, 438 mi->sp_ack_dur = ieee80211_frame_duration(sband->band, 10,
433 ctl_rate->bitrate, 439 ctl_rate->bitrate,
434 !!(ctl_rate->flags & IEEE80211_RATE_ERP_G), 1); 440 !!(ctl_rate->flags & IEEE80211_RATE_ERP_G), 1,
441 ieee80211_chandef_get_shift(chandef));
435 442
443 rate_flags = ieee80211_chandef_rate_flags(&mp->hw->conf.chandef);
436 memset(mi->max_tp_rate, 0, sizeof(mi->max_tp_rate)); 444 memset(mi->max_tp_rate, 0, sizeof(mi->max_tp_rate));
437 mi->max_prob_rate = 0; 445 mi->max_prob_rate = 0;
438 446
@@ -441,15 +449,22 @@ minstrel_rate_init(void *priv, struct ieee80211_supported_band *sband,
441 unsigned int tx_time = 0, tx_time_cts = 0, tx_time_rtscts = 0; 449 unsigned int tx_time = 0, tx_time_cts = 0, tx_time_rtscts = 0;
442 unsigned int tx_time_single; 450 unsigned int tx_time_single;
443 unsigned int cw = mp->cw_min; 451 unsigned int cw = mp->cw_min;
452 int shift;
444 453
445 if (!rate_supported(sta, sband->band, i)) 454 if (!rate_supported(sta, sband->band, i))
446 continue; 455 continue;
456 if ((rate_flags & sband->bitrates[i].flags) != rate_flags)
457 continue;
458
447 n++; 459 n++;
448 memset(mr, 0, sizeof(*mr)); 460 memset(mr, 0, sizeof(*mr));
449 461
450 mr->rix = i; 462 mr->rix = i;
451 mr->bitrate = sband->bitrates[i].bitrate / 5; 463 shift = ieee80211_chandef_get_shift(chandef);
452 calc_rate_durations(sband->band, mr, &sband->bitrates[i]); 464 mr->bitrate = DIV_ROUND_UP(sband->bitrates[i].bitrate,
465 (1 << shift) * 5);
466 calc_rate_durations(sband->band, mr, &sband->bitrates[i],
467 chandef);
453 468
454 /* calculate maximum number of retransmissions before 469 /* calculate maximum number of retransmissions before
455 * fallback (based on maximum segment size) */ 470 * fallback (based on maximum segment size) */
@@ -547,6 +562,7 @@ minstrel_init_cck_rates(struct minstrel_priv *mp)
547{ 562{
548 static const int bitrates[4] = { 10, 20, 55, 110 }; 563 static const int bitrates[4] = { 10, 20, 55, 110 };
549 struct ieee80211_supported_band *sband; 564 struct ieee80211_supported_band *sband;
565 u32 rate_flags = ieee80211_chandef_rate_flags(&mp->hw->conf.chandef);
550 int i, j; 566 int i, j;
551 567
552 sband = mp->hw->wiphy->bands[IEEE80211_BAND_2GHZ]; 568 sband = mp->hw->wiphy->bands[IEEE80211_BAND_2GHZ];
@@ -559,6 +575,9 @@ minstrel_init_cck_rates(struct minstrel_priv *mp)
559 if (rate->flags & IEEE80211_RATE_ERP_G) 575 if (rate->flags & IEEE80211_RATE_ERP_G)
560 continue; 576 continue;
561 577
578 if ((rate_flags & sband->bitrates[i].flags) != rate_flags)
579 continue;
580
562 for (j = 0; j < ARRAY_SIZE(bitrates); j++) { 581 for (j = 0; j < ARRAY_SIZE(bitrates); j++) {
563 if (rate->bitrate != bitrates[j]) 582 if (rate->bitrate != bitrates[j])
564 continue; 583 continue;
diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c
index f3bbea1eb9e7..7c323f27ba23 100644
--- a/net/mac80211/rc80211_minstrel_ht.c
+++ b/net/mac80211/rc80211_minstrel_ht.c
@@ -776,7 +776,7 @@ minstrel_ht_get_rate(void *priv, struct ieee80211_sta *sta, void *priv_sta,
776 776
777 /* Don't use EAPOL frames for sampling on non-mrr hw */ 777 /* Don't use EAPOL frames for sampling on non-mrr hw */
778 if (mp->hw->max_rates == 1 && 778 if (mp->hw->max_rates == 1 &&
779 txrc->skb->protocol == cpu_to_be16(ETH_P_PAE)) 779 (info->control.flags & IEEE80211_TX_CTRL_PORT_CTRL_PROTO))
780 sample_idx = -1; 780 sample_idx = -1;
781 else 781 else
782 sample_idx = minstrel_get_sample_rate(mp, mi); 782 sample_idx = minstrel_get_sample_rate(mp, mi);
@@ -847,6 +847,7 @@ minstrel_ht_update_cck(struct minstrel_priv *mp, struct minstrel_ht_sta *mi,
847 847
848static void 848static void
849minstrel_ht_update_caps(void *priv, struct ieee80211_supported_band *sband, 849minstrel_ht_update_caps(void *priv, struct ieee80211_supported_band *sband,
850 struct cfg80211_chan_def *chandef,
850 struct ieee80211_sta *sta, void *priv_sta) 851 struct ieee80211_sta *sta, void *priv_sta)
851{ 852{
852 struct minstrel_priv *mp = priv; 853 struct minstrel_priv *mp = priv;
@@ -872,8 +873,9 @@ minstrel_ht_update_caps(void *priv, struct ieee80211_supported_band *sband,
872 mi->sta = sta; 873 mi->sta = sta;
873 mi->stats_update = jiffies; 874 mi->stats_update = jiffies;
874 875
875 ack_dur = ieee80211_frame_duration(sband->band, 10, 60, 1, 1); 876 ack_dur = ieee80211_frame_duration(sband->band, 10, 60, 1, 1, 0);
876 mi->overhead = ieee80211_frame_duration(sband->band, 0, 60, 1, 1) + ack_dur; 877 mi->overhead = ieee80211_frame_duration(sband->band, 0, 60, 1, 1, 0);
878 mi->overhead += ack_dur;
877 mi->overhead_rtscts = mi->overhead + 2 * ack_dur; 879 mi->overhead_rtscts = mi->overhead + 2 * ack_dur;
878 880
879 mi->avg_ampdu_len = MINSTREL_FRAC(1, 1); 881 mi->avg_ampdu_len = MINSTREL_FRAC(1, 1);
@@ -942,22 +944,25 @@ use_legacy:
942 memset(&msp->legacy, 0, sizeof(msp->legacy)); 944 memset(&msp->legacy, 0, sizeof(msp->legacy));
943 msp->legacy.r = msp->ratelist; 945 msp->legacy.r = msp->ratelist;
944 msp->legacy.sample_table = msp->sample_table; 946 msp->legacy.sample_table = msp->sample_table;
945 return mac80211_minstrel.rate_init(priv, sband, sta, &msp->legacy); 947 return mac80211_minstrel.rate_init(priv, sband, chandef, sta,
948 &msp->legacy);
946} 949}
947 950
948static void 951static void
949minstrel_ht_rate_init(void *priv, struct ieee80211_supported_band *sband, 952minstrel_ht_rate_init(void *priv, struct ieee80211_supported_band *sband,
953 struct cfg80211_chan_def *chandef,
950 struct ieee80211_sta *sta, void *priv_sta) 954 struct ieee80211_sta *sta, void *priv_sta)
951{ 955{
952 minstrel_ht_update_caps(priv, sband, sta, priv_sta); 956 minstrel_ht_update_caps(priv, sband, chandef, sta, priv_sta);
953} 957}
954 958
955static void 959static void
956minstrel_ht_rate_update(void *priv, struct ieee80211_supported_band *sband, 960minstrel_ht_rate_update(void *priv, struct ieee80211_supported_band *sband,
961 struct cfg80211_chan_def *chandef,
957 struct ieee80211_sta *sta, void *priv_sta, 962 struct ieee80211_sta *sta, void *priv_sta,
958 u32 changed) 963 u32 changed)
959{ 964{
960 minstrel_ht_update_caps(priv, sband, sta, priv_sta); 965 minstrel_ht_update_caps(priv, sband, chandef, sta, priv_sta);
961} 966}
962 967
963static void * 968static void *
diff --git a/net/mac80211/rc80211_pid_algo.c b/net/mac80211/rc80211_pid_algo.c
index 502d3ecc4a79..958fad07b54c 100644
--- a/net/mac80211/rc80211_pid_algo.c
+++ b/net/mac80211/rc80211_pid_algo.c
@@ -293,6 +293,7 @@ rate_control_pid_get_rate(void *priv, struct ieee80211_sta *sta,
293 293
294static void 294static void
295rate_control_pid_rate_init(void *priv, struct ieee80211_supported_band *sband, 295rate_control_pid_rate_init(void *priv, struct ieee80211_supported_band *sband,
296 struct cfg80211_chan_def *chandef,
296 struct ieee80211_sta *sta, void *priv_sta) 297 struct ieee80211_sta *sta, void *priv_sta)
297{ 298{
298 struct rc_pid_sta_info *spinfo = priv_sta; 299 struct rc_pid_sta_info *spinfo = priv_sta;
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 2c5a79bd3777..54395d7583ba 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -87,11 +87,13 @@ ieee80211_rx_radiotap_space(struct ieee80211_local *local,
87 int len; 87 int len;
88 88
89 /* always present fields */ 89 /* always present fields */
90 len = sizeof(struct ieee80211_radiotap_header) + 9; 90 len = sizeof(struct ieee80211_radiotap_header) + 8;
91 91
92 /* allocate extra bitmap */ 92 /* allocate extra bitmaps */
93 if (status->vendor_radiotap_len) 93 if (status->vendor_radiotap_len)
94 len += 4; 94 len += 4;
95 if (status->chains)
96 len += 4 * hweight8(status->chains);
95 97
96 if (ieee80211_have_rx_timestamp(status)) { 98 if (ieee80211_have_rx_timestamp(status)) {
97 len = ALIGN(len, 8); 99 len = ALIGN(len, 8);
@@ -100,6 +102,10 @@ ieee80211_rx_radiotap_space(struct ieee80211_local *local,
100 if (local->hw.flags & IEEE80211_HW_SIGNAL_DBM) 102 if (local->hw.flags & IEEE80211_HW_SIGNAL_DBM)
101 len += 1; 103 len += 1;
102 104
105 /* antenna field, if we don't have per-chain info */
106 if (!status->chains)
107 len += 1;
108
103 /* padding for RX_FLAGS if necessary */ 109 /* padding for RX_FLAGS if necessary */
104 len = ALIGN(len, 2); 110 len = ALIGN(len, 2);
105 111
@@ -116,6 +122,11 @@ ieee80211_rx_radiotap_space(struct ieee80211_local *local,
116 len += 12; 122 len += 12;
117 } 123 }
118 124
125 if (status->chains) {
126 /* antenna and antenna signal fields */
127 len += 2 * hweight8(status->chains);
128 }
129
119 if (status->vendor_radiotap_len) { 130 if (status->vendor_radiotap_len) {
120 if (WARN_ON_ONCE(status->vendor_radiotap_align == 0)) 131 if (WARN_ON_ONCE(status->vendor_radiotap_align == 0))
121 status->vendor_radiotap_align = 1; 132 status->vendor_radiotap_align = 1;
@@ -145,8 +156,12 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local,
145 struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); 156 struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
146 struct ieee80211_radiotap_header *rthdr; 157 struct ieee80211_radiotap_header *rthdr;
147 unsigned char *pos; 158 unsigned char *pos;
159 __le32 *it_present;
160 u32 it_present_val;
148 u16 rx_flags = 0; 161 u16 rx_flags = 0;
149 int mpdulen; 162 u16 channel_flags = 0;
163 int mpdulen, chain;
164 unsigned long chains = status->chains;
150 165
151 mpdulen = skb->len; 166 mpdulen = skb->len;
152 if (!(has_fcs && (local->hw.flags & IEEE80211_HW_RX_INCLUDES_FCS))) 167 if (!(has_fcs && (local->hw.flags & IEEE80211_HW_RX_INCLUDES_FCS)))
@@ -154,25 +169,39 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local,
154 169
155 rthdr = (struct ieee80211_radiotap_header *)skb_push(skb, rtap_len); 170 rthdr = (struct ieee80211_radiotap_header *)skb_push(skb, rtap_len);
156 memset(rthdr, 0, rtap_len); 171 memset(rthdr, 0, rtap_len);
172 it_present = &rthdr->it_present;
157 173
158 /* radiotap header, set always present flags */ 174 /* radiotap header, set always present flags */
159 rthdr->it_present =
160 cpu_to_le32((1 << IEEE80211_RADIOTAP_FLAGS) |
161 (1 << IEEE80211_RADIOTAP_CHANNEL) |
162 (1 << IEEE80211_RADIOTAP_ANTENNA) |
163 (1 << IEEE80211_RADIOTAP_RX_FLAGS));
164 rthdr->it_len = cpu_to_le16(rtap_len + status->vendor_radiotap_len); 175 rthdr->it_len = cpu_to_le16(rtap_len + status->vendor_radiotap_len);
176 it_present_val = BIT(IEEE80211_RADIOTAP_FLAGS) |
177 BIT(IEEE80211_RADIOTAP_CHANNEL) |
178 BIT(IEEE80211_RADIOTAP_RX_FLAGS);
179
180 if (!status->chains)
181 it_present_val |= BIT(IEEE80211_RADIOTAP_ANTENNA);
165 182
166 pos = (unsigned char *)(rthdr + 1); 183 for_each_set_bit(chain, &chains, IEEE80211_MAX_CHAINS) {
184 it_present_val |=
185 BIT(IEEE80211_RADIOTAP_EXT) |
186 BIT(IEEE80211_RADIOTAP_RADIOTAP_NAMESPACE);
187 put_unaligned_le32(it_present_val, it_present);
188 it_present++;
189 it_present_val = BIT(IEEE80211_RADIOTAP_ANTENNA) |
190 BIT(IEEE80211_RADIOTAP_DBM_ANTSIGNAL);
191 }
167 192
168 if (status->vendor_radiotap_len) { 193 if (status->vendor_radiotap_len) {
169 rthdr->it_present |= 194 it_present_val |= BIT(IEEE80211_RADIOTAP_VENDOR_NAMESPACE) |
170 cpu_to_le32(BIT(IEEE80211_RADIOTAP_VENDOR_NAMESPACE)) | 195 BIT(IEEE80211_RADIOTAP_EXT);
171 cpu_to_le32(BIT(IEEE80211_RADIOTAP_EXT)); 196 put_unaligned_le32(it_present_val, it_present);
172 put_unaligned_le32(status->vendor_radiotap_bitmap, pos); 197 it_present++;
173 pos += 4; 198 it_present_val = status->vendor_radiotap_bitmap;
174 } 199 }
175 200
201 put_unaligned_le32(it_present_val, it_present);
202
203 pos = (void *)(it_present + 1);
204
176 /* the order of the following fields is important */ 205 /* the order of the following fields is important */
177 206
178 /* IEEE80211_RADIOTAP_TSFT */ 207 /* IEEE80211_RADIOTAP_TSFT */
@@ -207,28 +236,35 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local,
207 */ 236 */
208 *pos = 0; 237 *pos = 0;
209 } else { 238 } else {
239 int shift = 0;
210 rthdr->it_present |= cpu_to_le32(1 << IEEE80211_RADIOTAP_RATE); 240 rthdr->it_present |= cpu_to_le32(1 << IEEE80211_RADIOTAP_RATE);
211 *pos = rate->bitrate / 5; 241 if (status->flag & RX_FLAG_10MHZ)
242 shift = 1;
243 else if (status->flag & RX_FLAG_5MHZ)
244 shift = 2;
245 *pos = DIV_ROUND_UP(rate->bitrate, 5 * (1 << shift));
212 } 246 }
213 pos++; 247 pos++;
214 248
215 /* IEEE80211_RADIOTAP_CHANNEL */ 249 /* IEEE80211_RADIOTAP_CHANNEL */
216 put_unaligned_le16(status->freq, pos); 250 put_unaligned_le16(status->freq, pos);
217 pos += 2; 251 pos += 2;
252 if (status->flag & RX_FLAG_10MHZ)
253 channel_flags |= IEEE80211_CHAN_HALF;
254 else if (status->flag & RX_FLAG_5MHZ)
255 channel_flags |= IEEE80211_CHAN_QUARTER;
256
218 if (status->band == IEEE80211_BAND_5GHZ) 257 if (status->band == IEEE80211_BAND_5GHZ)
219 put_unaligned_le16(IEEE80211_CHAN_OFDM | IEEE80211_CHAN_5GHZ, 258 channel_flags |= IEEE80211_CHAN_OFDM | IEEE80211_CHAN_5GHZ;
220 pos);
221 else if (status->flag & (RX_FLAG_HT | RX_FLAG_VHT)) 259 else if (status->flag & (RX_FLAG_HT | RX_FLAG_VHT))
222 put_unaligned_le16(IEEE80211_CHAN_DYN | IEEE80211_CHAN_2GHZ, 260 channel_flags |= IEEE80211_CHAN_DYN | IEEE80211_CHAN_2GHZ;
223 pos);
224 else if (rate && rate->flags & IEEE80211_RATE_ERP_G) 261 else if (rate && rate->flags & IEEE80211_RATE_ERP_G)
225 put_unaligned_le16(IEEE80211_CHAN_OFDM | IEEE80211_CHAN_2GHZ, 262 channel_flags |= IEEE80211_CHAN_OFDM | IEEE80211_CHAN_2GHZ;
226 pos);
227 else if (rate) 263 else if (rate)
228 put_unaligned_le16(IEEE80211_CHAN_CCK | IEEE80211_CHAN_2GHZ, 264 channel_flags |= IEEE80211_CHAN_OFDM | IEEE80211_CHAN_2GHZ;
229 pos);
230 else 265 else
231 put_unaligned_le16(IEEE80211_CHAN_2GHZ, pos); 266 channel_flags |= IEEE80211_CHAN_2GHZ;
267 put_unaligned_le16(channel_flags, pos);
232 pos += 2; 268 pos += 2;
233 269
234 /* IEEE80211_RADIOTAP_DBM_ANTSIGNAL */ 270 /* IEEE80211_RADIOTAP_DBM_ANTSIGNAL */
@@ -242,9 +278,11 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local,
242 278
243 /* IEEE80211_RADIOTAP_LOCK_QUALITY is missing */ 279 /* IEEE80211_RADIOTAP_LOCK_QUALITY is missing */
244 280
245 /* IEEE80211_RADIOTAP_ANTENNA */ 281 if (!status->chains) {
246 *pos = status->antenna; 282 /* IEEE80211_RADIOTAP_ANTENNA */
247 pos++; 283 *pos = status->antenna;
284 pos++;
285 }
248 286
249 /* IEEE80211_RADIOTAP_DB_ANTNOISE is not used */ 287 /* IEEE80211_RADIOTAP_DB_ANTNOISE is not used */
250 288
@@ -341,6 +379,11 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local,
341 pos += 2; 379 pos += 2;
342 } 380 }
343 381
382 for_each_set_bit(chain, &chains, IEEE80211_MAX_CHAINS) {
383 *pos++ = status->chain_signal[chain];
384 *pos++ = chain;
385 }
386
344 if (status->vendor_radiotap_len) { 387 if (status->vendor_radiotap_len) {
345 /* ensure 2 byte alignment for the vendor field as required */ 388 /* ensure 2 byte alignment for the vendor field as required */
346 if ((pos - (u8 *)rthdr) & 1) 389 if ((pos - (u8 *)rthdr) & 1)
@@ -1012,207 +1055,6 @@ ieee80211_rx_h_check(struct ieee80211_rx_data *rx)
1012 1055
1013 1056
1014static ieee80211_rx_result debug_noinline 1057static ieee80211_rx_result debug_noinline
1015ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx)
1016{
1017 struct sk_buff *skb = rx->skb;
1018 struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
1019 struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
1020 int keyidx;
1021 int hdrlen;
1022 ieee80211_rx_result result = RX_DROP_UNUSABLE;
1023 struct ieee80211_key *sta_ptk = NULL;
1024 int mmie_keyidx = -1;
1025 __le16 fc;
1026
1027 /*
1028 * Key selection 101
1029 *
1030 * There are four types of keys:
1031 * - GTK (group keys)
1032 * - IGTK (group keys for management frames)
1033 * - PTK (pairwise keys)
1034 * - STK (station-to-station pairwise keys)
1035 *
1036 * When selecting a key, we have to distinguish between multicast
1037 * (including broadcast) and unicast frames, the latter can only
1038 * use PTKs and STKs while the former always use GTKs and IGTKs.
1039 * Unless, of course, actual WEP keys ("pre-RSNA") are used, then
1040 * unicast frames can also use key indices like GTKs. Hence, if we
1041 * don't have a PTK/STK we check the key index for a WEP key.
1042 *
1043 * Note that in a regular BSS, multicast frames are sent by the
1044 * AP only, associated stations unicast the frame to the AP first
1045 * which then multicasts it on their behalf.
1046 *
1047 * There is also a slight problem in IBSS mode: GTKs are negotiated
1048 * with each station, that is something we don't currently handle.
1049 * The spec seems to expect that one negotiates the same key with
1050 * every station but there's no such requirement; VLANs could be
1051 * possible.
1052 */
1053
1054 /*
1055 * No point in finding a key and decrypting if the frame is neither
1056 * addressed to us nor a multicast frame.
1057 */
1058 if (!(status->rx_flags & IEEE80211_RX_RA_MATCH))
1059 return RX_CONTINUE;
1060
1061 /* start without a key */
1062 rx->key = NULL;
1063
1064 if (rx->sta)
1065 sta_ptk = rcu_dereference(rx->sta->ptk);
1066
1067 fc = hdr->frame_control;
1068
1069 if (!ieee80211_has_protected(fc))
1070 mmie_keyidx = ieee80211_get_mmie_keyidx(rx->skb);
1071
1072 if (!is_multicast_ether_addr(hdr->addr1) && sta_ptk) {
1073 rx->key = sta_ptk;
1074 if ((status->flag & RX_FLAG_DECRYPTED) &&
1075 (status->flag & RX_FLAG_IV_STRIPPED))
1076 return RX_CONTINUE;
1077 /* Skip decryption if the frame is not protected. */
1078 if (!ieee80211_has_protected(fc))
1079 return RX_CONTINUE;
1080 } else if (mmie_keyidx >= 0) {
1081 /* Broadcast/multicast robust management frame / BIP */
1082 if ((status->flag & RX_FLAG_DECRYPTED) &&
1083 (status->flag & RX_FLAG_IV_STRIPPED))
1084 return RX_CONTINUE;
1085
1086 if (mmie_keyidx < NUM_DEFAULT_KEYS ||
1087 mmie_keyidx >= NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS)
1088 return RX_DROP_MONITOR; /* unexpected BIP keyidx */
1089 if (rx->sta)
1090 rx->key = rcu_dereference(rx->sta->gtk[mmie_keyidx]);
1091 if (!rx->key)
1092 rx->key = rcu_dereference(rx->sdata->keys[mmie_keyidx]);
1093 } else if (!ieee80211_has_protected(fc)) {
1094 /*
1095 * The frame was not protected, so skip decryption. However, we
1096 * need to set rx->key if there is a key that could have been
1097 * used so that the frame may be dropped if encryption would
1098 * have been expected.
1099 */
1100 struct ieee80211_key *key = NULL;
1101 struct ieee80211_sub_if_data *sdata = rx->sdata;
1102 int i;
1103
1104 if (ieee80211_is_mgmt(fc) &&
1105 is_multicast_ether_addr(hdr->addr1) &&
1106 (key = rcu_dereference(rx->sdata->default_mgmt_key)))
1107 rx->key = key;
1108 else {
1109 if (rx->sta) {
1110 for (i = 0; i < NUM_DEFAULT_KEYS; i++) {
1111 key = rcu_dereference(rx->sta->gtk[i]);
1112 if (key)
1113 break;
1114 }
1115 }
1116 if (!key) {
1117 for (i = 0; i < NUM_DEFAULT_KEYS; i++) {
1118 key = rcu_dereference(sdata->keys[i]);
1119 if (key)
1120 break;
1121 }
1122 }
1123 if (key)
1124 rx->key = key;
1125 }
1126 return RX_CONTINUE;
1127 } else {
1128 u8 keyid;
1129 /*
1130 * The device doesn't give us the IV so we won't be
1131 * able to look up the key. That's ok though, we
1132 * don't need to decrypt the frame, we just won't
1133 * be able to keep statistics accurate.
1134 * Except for key threshold notifications, should
1135 * we somehow allow the driver to tell us which key
1136 * the hardware used if this flag is set?
1137 */
1138 if ((status->flag & RX_FLAG_DECRYPTED) &&
1139 (status->flag & RX_FLAG_IV_STRIPPED))
1140 return RX_CONTINUE;
1141
1142 hdrlen = ieee80211_hdrlen(fc);
1143
1144 if (rx->skb->len < 8 + hdrlen)
1145 return RX_DROP_UNUSABLE; /* TODO: count this? */
1146
1147 /*
1148 * no need to call ieee80211_wep_get_keyidx,
1149 * it verifies a bunch of things we've done already
1150 */
1151 skb_copy_bits(rx->skb, hdrlen + 3, &keyid, 1);
1152 keyidx = keyid >> 6;
1153
1154 /* check per-station GTK first, if multicast packet */
1155 if (is_multicast_ether_addr(hdr->addr1) && rx->sta)
1156 rx->key = rcu_dereference(rx->sta->gtk[keyidx]);
1157
1158 /* if not found, try default key */
1159 if (!rx->key) {
1160 rx->key = rcu_dereference(rx->sdata->keys[keyidx]);
1161
1162 /*
1163 * RSNA-protected unicast frames should always be
1164 * sent with pairwise or station-to-station keys,
1165 * but for WEP we allow using a key index as well.
1166 */
1167 if (rx->key &&
1168 rx->key->conf.cipher != WLAN_CIPHER_SUITE_WEP40 &&
1169 rx->key->conf.cipher != WLAN_CIPHER_SUITE_WEP104 &&
1170 !is_multicast_ether_addr(hdr->addr1))
1171 rx->key = NULL;
1172 }
1173 }
1174
1175 if (rx->key) {
1176 if (unlikely(rx->key->flags & KEY_FLAG_TAINTED))
1177 return RX_DROP_MONITOR;
1178
1179 rx->key->tx_rx_count++;
1180 /* TODO: add threshold stuff again */
1181 } else {
1182 return RX_DROP_MONITOR;
1183 }
1184
1185 switch (rx->key->conf.cipher) {
1186 case WLAN_CIPHER_SUITE_WEP40:
1187 case WLAN_CIPHER_SUITE_WEP104:
1188 result = ieee80211_crypto_wep_decrypt(rx);
1189 break;
1190 case WLAN_CIPHER_SUITE_TKIP:
1191 result = ieee80211_crypto_tkip_decrypt(rx);
1192 break;
1193 case WLAN_CIPHER_SUITE_CCMP:
1194 result = ieee80211_crypto_ccmp_decrypt(rx);
1195 break;
1196 case WLAN_CIPHER_SUITE_AES_CMAC:
1197 result = ieee80211_crypto_aes_cmac_decrypt(rx);
1198 break;
1199 default:
1200 /*
1201 * We can reach here only with HW-only algorithms
1202 * but why didn't it decrypt the frame?!
1203 */
1204 return RX_DROP_UNUSABLE;
1205 }
1206
1207 /* the hdr variable is invalid after the decrypt handlers */
1208
1209 /* either the frame has been decrypted or will be dropped */
1210 status->flag |= RX_FLAG_DECRYPTED;
1211
1212 return result;
1213}
1214
1215static ieee80211_rx_result debug_noinline
1216ieee80211_rx_h_check_more_data(struct ieee80211_rx_data *rx) 1058ieee80211_rx_h_check_more_data(struct ieee80211_rx_data *rx)
1217{ 1059{
1218 struct ieee80211_local *local; 1060 struct ieee80211_local *local;
@@ -1513,6 +1355,207 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx)
1513 return RX_CONTINUE; 1355 return RX_CONTINUE;
1514} /* ieee80211_rx_h_sta_process */ 1356} /* ieee80211_rx_h_sta_process */
1515 1357
1358static ieee80211_rx_result debug_noinline
1359ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx)
1360{
1361 struct sk_buff *skb = rx->skb;
1362 struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
1363 struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
1364 int keyidx;
1365 int hdrlen;
1366 ieee80211_rx_result result = RX_DROP_UNUSABLE;
1367 struct ieee80211_key *sta_ptk = NULL;
1368 int mmie_keyidx = -1;
1369 __le16 fc;
1370
1371 /*
1372 * Key selection 101
1373 *
1374 * There are four types of keys:
1375 * - GTK (group keys)
1376 * - IGTK (group keys for management frames)
1377 * - PTK (pairwise keys)
1378 * - STK (station-to-station pairwise keys)
1379 *
1380 * When selecting a key, we have to distinguish between multicast
1381 * (including broadcast) and unicast frames, the latter can only
1382 * use PTKs and STKs while the former always use GTKs and IGTKs.
1383 * Unless, of course, actual WEP keys ("pre-RSNA") are used, then
1384 * unicast frames can also use key indices like GTKs. Hence, if we
1385 * don't have a PTK/STK we check the key index for a WEP key.
1386 *
1387 * Note that in a regular BSS, multicast frames are sent by the
1388 * AP only, associated stations unicast the frame to the AP first
1389 * which then multicasts it on their behalf.
1390 *
1391 * There is also a slight problem in IBSS mode: GTKs are negotiated
1392 * with each station, that is something we don't currently handle.
1393 * The spec seems to expect that one negotiates the same key with
1394 * every station but there's no such requirement; VLANs could be
1395 * possible.
1396 */
1397
1398 /*
1399 * No point in finding a key and decrypting if the frame is neither
1400 * addressed to us nor a multicast frame.
1401 */
1402 if (!(status->rx_flags & IEEE80211_RX_RA_MATCH))
1403 return RX_CONTINUE;
1404
1405 /* start without a key */
1406 rx->key = NULL;
1407
1408 if (rx->sta)
1409 sta_ptk = rcu_dereference(rx->sta->ptk);
1410
1411 fc = hdr->frame_control;
1412
1413 if (!ieee80211_has_protected(fc))
1414 mmie_keyidx = ieee80211_get_mmie_keyidx(rx->skb);
1415
1416 if (!is_multicast_ether_addr(hdr->addr1) && sta_ptk) {
1417 rx->key = sta_ptk;
1418 if ((status->flag & RX_FLAG_DECRYPTED) &&
1419 (status->flag & RX_FLAG_IV_STRIPPED))
1420 return RX_CONTINUE;
1421 /* Skip decryption if the frame is not protected. */
1422 if (!ieee80211_has_protected(fc))
1423 return RX_CONTINUE;
1424 } else if (mmie_keyidx >= 0) {
1425 /* Broadcast/multicast robust management frame / BIP */
1426 if ((status->flag & RX_FLAG_DECRYPTED) &&
1427 (status->flag & RX_FLAG_IV_STRIPPED))
1428 return RX_CONTINUE;
1429
1430 if (mmie_keyidx < NUM_DEFAULT_KEYS ||
1431 mmie_keyidx >= NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS)
1432 return RX_DROP_MONITOR; /* unexpected BIP keyidx */
1433 if (rx->sta)
1434 rx->key = rcu_dereference(rx->sta->gtk[mmie_keyidx]);
1435 if (!rx->key)
1436 rx->key = rcu_dereference(rx->sdata->keys[mmie_keyidx]);
1437 } else if (!ieee80211_has_protected(fc)) {
1438 /*
1439 * The frame was not protected, so skip decryption. However, we
1440 * need to set rx->key if there is a key that could have been
1441 * used so that the frame may be dropped if encryption would
1442 * have been expected.
1443 */
1444 struct ieee80211_key *key = NULL;
1445 struct ieee80211_sub_if_data *sdata = rx->sdata;
1446 int i;
1447
1448 if (ieee80211_is_mgmt(fc) &&
1449 is_multicast_ether_addr(hdr->addr1) &&
1450 (key = rcu_dereference(rx->sdata->default_mgmt_key)))
1451 rx->key = key;
1452 else {
1453 if (rx->sta) {
1454 for (i = 0; i < NUM_DEFAULT_KEYS; i++) {
1455 key = rcu_dereference(rx->sta->gtk[i]);
1456 if (key)
1457 break;
1458 }
1459 }
1460 if (!key) {
1461 for (i = 0; i < NUM_DEFAULT_KEYS; i++) {
1462 key = rcu_dereference(sdata->keys[i]);
1463 if (key)
1464 break;
1465 }
1466 }
1467 if (key)
1468 rx->key = key;
1469 }
1470 return RX_CONTINUE;
1471 } else {
1472 u8 keyid;
1473 /*
1474 * The device doesn't give us the IV so we won't be
1475 * able to look up the key. That's ok though, we
1476 * don't need to decrypt the frame, we just won't
1477 * be able to keep statistics accurate.
1478 * Except for key threshold notifications, should
1479 * we somehow allow the driver to tell us which key
1480 * the hardware used if this flag is set?
1481 */
1482 if ((status->flag & RX_FLAG_DECRYPTED) &&
1483 (status->flag & RX_FLAG_IV_STRIPPED))
1484 return RX_CONTINUE;
1485
1486 hdrlen = ieee80211_hdrlen(fc);
1487
1488 if (rx->skb->len < 8 + hdrlen)
1489 return RX_DROP_UNUSABLE; /* TODO: count this? */
1490
1491 /*
1492 * no need to call ieee80211_wep_get_keyidx,
1493 * it verifies a bunch of things we've done already
1494 */
1495 skb_copy_bits(rx->skb, hdrlen + 3, &keyid, 1);
1496 keyidx = keyid >> 6;
1497
1498 /* check per-station GTK first, if multicast packet */
1499 if (is_multicast_ether_addr(hdr->addr1) && rx->sta)
1500 rx->key = rcu_dereference(rx->sta->gtk[keyidx]);
1501
1502 /* if not found, try default key */
1503 if (!rx->key) {
1504 rx->key = rcu_dereference(rx->sdata->keys[keyidx]);
1505
1506 /*
1507 * RSNA-protected unicast frames should always be
1508 * sent with pairwise or station-to-station keys,
1509 * but for WEP we allow using a key index as well.
1510 */
1511 if (rx->key &&
1512 rx->key->conf.cipher != WLAN_CIPHER_SUITE_WEP40 &&
1513 rx->key->conf.cipher != WLAN_CIPHER_SUITE_WEP104 &&
1514 !is_multicast_ether_addr(hdr->addr1))
1515 rx->key = NULL;
1516 }
1517 }
1518
1519 if (rx->key) {
1520 if (unlikely(rx->key->flags & KEY_FLAG_TAINTED))
1521 return RX_DROP_MONITOR;
1522
1523 rx->key->tx_rx_count++;
1524 /* TODO: add threshold stuff again */
1525 } else {
1526 return RX_DROP_MONITOR;
1527 }
1528
1529 switch (rx->key->conf.cipher) {
1530 case WLAN_CIPHER_SUITE_WEP40:
1531 case WLAN_CIPHER_SUITE_WEP104:
1532 result = ieee80211_crypto_wep_decrypt(rx);
1533 break;
1534 case WLAN_CIPHER_SUITE_TKIP:
1535 result = ieee80211_crypto_tkip_decrypt(rx);
1536 break;
1537 case WLAN_CIPHER_SUITE_CCMP:
1538 result = ieee80211_crypto_ccmp_decrypt(rx);
1539 break;
1540 case WLAN_CIPHER_SUITE_AES_CMAC:
1541 result = ieee80211_crypto_aes_cmac_decrypt(rx);
1542 break;
1543 default:
1544 /*
1545 * We can reach here only with HW-only algorithms
1546 * but why didn't it decrypt the frame?!
1547 */
1548 return RX_DROP_UNUSABLE;
1549 }
1550
1551 /* the hdr variable is invalid after the decrypt handlers */
1552
1553 /* either the frame has been decrypted or will be dropped */
1554 status->flag |= RX_FLAG_DECRYPTED;
1555
1556 return result;
1557}
1558
1516static inline struct ieee80211_fragment_entry * 1559static inline struct ieee80211_fragment_entry *
1517ieee80211_reassemble_add(struct ieee80211_sub_if_data *sdata, 1560ieee80211_reassemble_add(struct ieee80211_sub_if_data *sdata,
1518 unsigned int frag, unsigned int seq, int rx_queue, 1561 unsigned int frag, unsigned int seq, int rx_queue,
@@ -2641,8 +2684,7 @@ ieee80211_rx_h_userspace_mgmt(struct ieee80211_rx_data *rx)
2641 sig = status->signal; 2684 sig = status->signal;
2642 2685
2643 if (cfg80211_rx_mgmt(&rx->sdata->wdev, status->freq, sig, 2686 if (cfg80211_rx_mgmt(&rx->sdata->wdev, status->freq, sig,
2644 rx->skb->data, rx->skb->len, 2687 rx->skb->data, rx->skb->len, 0, GFP_ATOMIC)) {
2645 GFP_ATOMIC)) {
2646 if (rx->sta) 2688 if (rx->sta)
2647 rx->sta->rx_packets++; 2689 rx->sta->rx_packets++;
2648 dev_kfree_skb(rx->skb); 2690 dev_kfree_skb(rx->skb);
@@ -2896,10 +2938,10 @@ static void ieee80211_rx_handlers(struct ieee80211_rx_data *rx,
2896 */ 2938 */
2897 rx->skb = skb; 2939 rx->skb = skb;
2898 2940
2899 CALL_RXH(ieee80211_rx_h_decrypt)
2900 CALL_RXH(ieee80211_rx_h_check_more_data) 2941 CALL_RXH(ieee80211_rx_h_check_more_data)
2901 CALL_RXH(ieee80211_rx_h_uapsd_and_pspoll) 2942 CALL_RXH(ieee80211_rx_h_uapsd_and_pspoll)
2902 CALL_RXH(ieee80211_rx_h_sta_process) 2943 CALL_RXH(ieee80211_rx_h_sta_process)
2944 CALL_RXH(ieee80211_rx_h_decrypt)
2903 CALL_RXH(ieee80211_rx_h_defragment) 2945 CALL_RXH(ieee80211_rx_h_defragment)
2904 CALL_RXH(ieee80211_rx_h_michael_mic_verify) 2946 CALL_RXH(ieee80211_rx_h_michael_mic_verify)
2905 /* must be after MMIC verify so header is counted in MPDU mic */ 2947 /* must be after MMIC verify so header is counted in MPDU mic */
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index 1b122a79b0d8..08afe74b98f4 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -66,6 +66,7 @@ ieee80211_bss_info_update(struct ieee80211_local *local,
66 struct cfg80211_bss *cbss; 66 struct cfg80211_bss *cbss;
67 struct ieee80211_bss *bss; 67 struct ieee80211_bss *bss;
68 int clen, srlen; 68 int clen, srlen;
69 enum nl80211_bss_scan_width scan_width;
69 s32 signal = 0; 70 s32 signal = 0;
70 71
71 if (local->hw.flags & IEEE80211_HW_SIGNAL_DBM) 72 if (local->hw.flags & IEEE80211_HW_SIGNAL_DBM)
@@ -73,8 +74,15 @@ ieee80211_bss_info_update(struct ieee80211_local *local,
73 else if (local->hw.flags & IEEE80211_HW_SIGNAL_UNSPEC) 74 else if (local->hw.flags & IEEE80211_HW_SIGNAL_UNSPEC)
74 signal = (rx_status->signal * 100) / local->hw.max_signal; 75 signal = (rx_status->signal * 100) / local->hw.max_signal;
75 76
76 cbss = cfg80211_inform_bss_frame(local->hw.wiphy, channel, 77 scan_width = NL80211_BSS_CHAN_WIDTH_20;
77 mgmt, len, signal, GFP_ATOMIC); 78 if (rx_status->flag & RX_FLAG_5MHZ)
79 scan_width = NL80211_BSS_CHAN_WIDTH_5;
80 if (rx_status->flag & RX_FLAG_10MHZ)
81 scan_width = NL80211_BSS_CHAN_WIDTH_10;
82
83 cbss = cfg80211_inform_bss_width_frame(local->hw.wiphy, channel,
84 scan_width, mgmt, len, signal,
85 GFP_ATOMIC);
78 if (!cbss) 86 if (!cbss)
79 return NULL; 87 return NULL;
80 88
@@ -204,10 +212,29 @@ void ieee80211_scan_rx(struct ieee80211_local *local, struct sk_buff *skb)
204 ieee80211_rx_bss_put(local, bss); 212 ieee80211_rx_bss_put(local, bss);
205} 213}
206 214
215static void
216ieee80211_prepare_scan_chandef(struct cfg80211_chan_def *chandef,
217 enum nl80211_bss_scan_width scan_width)
218{
219 memset(chandef, 0, sizeof(*chandef));
220 switch (scan_width) {
221 case NL80211_BSS_CHAN_WIDTH_5:
222 chandef->width = NL80211_CHAN_WIDTH_5;
223 break;
224 case NL80211_BSS_CHAN_WIDTH_10:
225 chandef->width = NL80211_CHAN_WIDTH_10;
226 break;
227 default:
228 chandef->width = NL80211_CHAN_WIDTH_20_NOHT;
229 break;
230 }
231}
232
207/* return false if no more work */ 233/* return false if no more work */
208static bool ieee80211_prep_hw_scan(struct ieee80211_local *local) 234static bool ieee80211_prep_hw_scan(struct ieee80211_local *local)
209{ 235{
210 struct cfg80211_scan_request *req = local->scan_req; 236 struct cfg80211_scan_request *req = local->scan_req;
237 struct cfg80211_chan_def chandef;
211 enum ieee80211_band band; 238 enum ieee80211_band band;
212 int i, ielen, n_chans; 239 int i, ielen, n_chans;
213 240
@@ -229,11 +256,12 @@ static bool ieee80211_prep_hw_scan(struct ieee80211_local *local)
229 } while (!n_chans); 256 } while (!n_chans);
230 257
231 local->hw_scan_req->n_channels = n_chans; 258 local->hw_scan_req->n_channels = n_chans;
259 ieee80211_prepare_scan_chandef(&chandef, req->scan_width);
232 260
233 ielen = ieee80211_build_preq_ies(local, (u8 *)local->hw_scan_req->ie, 261 ielen = ieee80211_build_preq_ies(local, (u8 *)local->hw_scan_req->ie,
234 local->hw_scan_ies_bufsize, 262 local->hw_scan_ies_bufsize,
235 req->ie, req->ie_len, band, 263 req->ie, req->ie_len, band,
236 req->rates[band], 0); 264 req->rates[band], &chandef);
237 local->hw_scan_req->ie_len = ielen; 265 local->hw_scan_req->ie_len = ielen;
238 local->hw_scan_req->no_cck = req->no_cck; 266 local->hw_scan_req->no_cck = req->no_cck;
239 267
@@ -280,7 +308,7 @@ static void __ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted,
280 rcu_assign_pointer(local->scan_sdata, NULL); 308 rcu_assign_pointer(local->scan_sdata, NULL);
281 309
282 local->scanning = 0; 310 local->scanning = 0;
283 local->scan_channel = NULL; 311 local->scan_chandef.chan = NULL;
284 312
285 /* Set power back to normal operating levels. */ 313 /* Set power back to normal operating levels. */
286 ieee80211_hw_config(local, 0); 314 ieee80211_hw_config(local, 0);
@@ -615,11 +643,34 @@ static void ieee80211_scan_state_set_channel(struct ieee80211_local *local,
615{ 643{
616 int skip; 644 int skip;
617 struct ieee80211_channel *chan; 645 struct ieee80211_channel *chan;
646 enum nl80211_bss_scan_width oper_scan_width;
618 647
619 skip = 0; 648 skip = 0;
620 chan = local->scan_req->channels[local->scan_channel_idx]; 649 chan = local->scan_req->channels[local->scan_channel_idx];
621 650
622 local->scan_channel = chan; 651 local->scan_chandef.chan = chan;
652 local->scan_chandef.center_freq1 = chan->center_freq;
653 local->scan_chandef.center_freq2 = 0;
654 switch (local->scan_req->scan_width) {
655 case NL80211_BSS_CHAN_WIDTH_5:
656 local->scan_chandef.width = NL80211_CHAN_WIDTH_5;
657 break;
658 case NL80211_BSS_CHAN_WIDTH_10:
659 local->scan_chandef.width = NL80211_CHAN_WIDTH_10;
660 break;
661 case NL80211_BSS_CHAN_WIDTH_20:
662 /* If scanning on oper channel, use whatever channel-type
663 * is currently in use.
664 */
665 oper_scan_width = cfg80211_chandef_to_scan_width(
666 &local->_oper_chandef);
667 if (chan == local->_oper_chandef.chan &&
668 oper_scan_width == local->scan_req->scan_width)
669 local->scan_chandef = local->_oper_chandef;
670 else
671 local->scan_chandef.width = NL80211_CHAN_WIDTH_20_NOHT;
672 break;
673 }
623 674
624 if (ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL)) 675 if (ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL))
625 skip = 1; 676 skip = 1;
@@ -659,7 +710,7 @@ static void ieee80211_scan_state_suspend(struct ieee80211_local *local,
659 unsigned long *next_delay) 710 unsigned long *next_delay)
660{ 711{
661 /* switch back to the operating channel */ 712 /* switch back to the operating channel */
662 local->scan_channel = NULL; 713 local->scan_chandef.chan = NULL;
663 ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL); 714 ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL);
664 715
665 /* disable PS */ 716 /* disable PS */
@@ -801,7 +852,8 @@ int ieee80211_request_scan(struct ieee80211_sub_if_data *sdata,
801 852
802int ieee80211_request_ibss_scan(struct ieee80211_sub_if_data *sdata, 853int ieee80211_request_ibss_scan(struct ieee80211_sub_if_data *sdata,
803 const u8 *ssid, u8 ssid_len, 854 const u8 *ssid, u8 ssid_len,
804 struct ieee80211_channel *chan) 855 struct ieee80211_channel *chan,
856 enum nl80211_bss_scan_width scan_width)
805{ 857{
806 struct ieee80211_local *local = sdata->local; 858 struct ieee80211_local *local = sdata->local;
807 int ret = -EBUSY; 859 int ret = -EBUSY;
@@ -851,6 +903,7 @@ int ieee80211_request_ibss_scan(struct ieee80211_sub_if_data *sdata,
851 903
852 local->int_scan_req->ssids = &local->scan_ssid; 904 local->int_scan_req->ssids = &local->scan_ssid;
853 local->int_scan_req->n_ssids = 1; 905 local->int_scan_req->n_ssids = 1;
906 local->int_scan_req->scan_width = scan_width;
854 memcpy(local->int_scan_req->ssids[0].ssid, ssid, IEEE80211_MAX_SSID_LEN); 907 memcpy(local->int_scan_req->ssids[0].ssid, ssid, IEEE80211_MAX_SSID_LEN);
855 local->int_scan_req->ssids[0].ssid_len = ssid_len; 908 local->int_scan_req->ssids[0].ssid_len = ssid_len;
856 909
@@ -912,6 +965,7 @@ int ieee80211_request_sched_scan_start(struct ieee80211_sub_if_data *sdata,
912{ 965{
913 struct ieee80211_local *local = sdata->local; 966 struct ieee80211_local *local = sdata->local;
914 struct ieee80211_sched_scan_ies sched_scan_ies = {}; 967 struct ieee80211_sched_scan_ies sched_scan_ies = {};
968 struct cfg80211_chan_def chandef;
915 int ret, i, iebufsz; 969 int ret, i, iebufsz;
916 970
917 iebufsz = 2 + IEEE80211_MAX_SSID_LEN + 971 iebufsz = 2 + IEEE80211_MAX_SSID_LEN +
@@ -939,10 +993,12 @@ int ieee80211_request_sched_scan_start(struct ieee80211_sub_if_data *sdata,
939 goto out_free; 993 goto out_free;
940 } 994 }
941 995
996 ieee80211_prepare_scan_chandef(&chandef, req->scan_width);
997
942 sched_scan_ies.len[i] = 998 sched_scan_ies.len[i] =
943 ieee80211_build_preq_ies(local, sched_scan_ies.ie[i], 999 ieee80211_build_preq_ies(local, sched_scan_ies.ie[i],
944 iebufsz, req->ie, req->ie_len, 1000 iebufsz, req->ie, req->ie_len,
945 i, (u32) -1, 0); 1001 i, (u32) -1, &chandef);
946 } 1002 }
947 1003
948 ret = drv_sched_scan_start(local, sdata, req, &sched_scan_ies); 1004 ret = drv_sched_scan_start(local, sdata, req, &sched_scan_ies);
diff --git a/net/mac80211/status.c b/net/mac80211/status.c
index 43439203f4e4..368837fe3b80 100644
--- a/net/mac80211/status.c
+++ b/net/mac80211/status.c
@@ -235,7 +235,8 @@ static int ieee80211_tx_radiotap_len(struct ieee80211_tx_info *info)
235 235
236 /* IEEE80211_RADIOTAP_RATE rate */ 236 /* IEEE80211_RADIOTAP_RATE rate */
237 if (info->status.rates[0].idx >= 0 && 237 if (info->status.rates[0].idx >= 0 &&
238 !(info->status.rates[0].flags & IEEE80211_TX_RC_MCS)) 238 !(info->status.rates[0].flags & (IEEE80211_TX_RC_MCS |
239 IEEE80211_TX_RC_VHT_MCS)))
239 len += 2; 240 len += 2;
240 241
241 /* IEEE80211_RADIOTAP_TX_FLAGS */ 242 /* IEEE80211_RADIOTAP_TX_FLAGS */
@@ -244,17 +245,23 @@ static int ieee80211_tx_radiotap_len(struct ieee80211_tx_info *info)
244 /* IEEE80211_RADIOTAP_DATA_RETRIES */ 245 /* IEEE80211_RADIOTAP_DATA_RETRIES */
245 len += 1; 246 len += 1;
246 247
247 /* IEEE80211_TX_RC_MCS */ 248 /* IEEE80211_RADIOTAP_MCS
248 if (info->status.rates[0].idx >= 0 && 249 * IEEE80211_RADIOTAP_VHT */
249 info->status.rates[0].flags & IEEE80211_TX_RC_MCS) 250 if (info->status.rates[0].idx >= 0) {
250 len += 3; 251 if (info->status.rates[0].flags & IEEE80211_TX_RC_MCS)
252 len += 3;
253 else if (info->status.rates[0].flags & IEEE80211_TX_RC_VHT_MCS)
254 len = ALIGN(len, 2) + 12;
255 }
251 256
252 return len; 257 return len;
253} 258}
254 259
255static void ieee80211_add_tx_radiotap_header(struct ieee80211_supported_band 260static void
256 *sband, struct sk_buff *skb, 261ieee80211_add_tx_radiotap_header(struct ieee80211_local *local,
257 int retry_count, int rtap_len) 262 struct ieee80211_supported_band *sband,
263 struct sk_buff *skb, int retry_count,
264 int rtap_len, int shift)
258{ 265{
259 struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); 266 struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
260 struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; 267 struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
@@ -279,9 +286,13 @@ static void ieee80211_add_tx_radiotap_header(struct ieee80211_supported_band
279 286
280 /* IEEE80211_RADIOTAP_RATE */ 287 /* IEEE80211_RADIOTAP_RATE */
281 if (info->status.rates[0].idx >= 0 && 288 if (info->status.rates[0].idx >= 0 &&
282 !(info->status.rates[0].flags & IEEE80211_TX_RC_MCS)) { 289 !(info->status.rates[0].flags & (IEEE80211_TX_RC_MCS |
290 IEEE80211_TX_RC_VHT_MCS))) {
291 u16 rate;
292
283 rthdr->it_present |= cpu_to_le32(1 << IEEE80211_RADIOTAP_RATE); 293 rthdr->it_present |= cpu_to_le32(1 << IEEE80211_RADIOTAP_RATE);
284 *pos = sband->bitrates[info->status.rates[0].idx].bitrate / 5; 294 rate = sband->bitrates[info->status.rates[0].idx].bitrate;
295 *pos = DIV_ROUND_UP(rate, 5 * (1 << shift));
285 /* padding for tx flags */ 296 /* padding for tx flags */
286 pos += 2; 297 pos += 2;
287 } 298 }
@@ -306,9 +317,12 @@ static void ieee80211_add_tx_radiotap_header(struct ieee80211_supported_band
306 *pos = retry_count; 317 *pos = retry_count;
307 pos++; 318 pos++;
308 319
309 /* IEEE80211_TX_RC_MCS */ 320 if (info->status.rates[0].idx < 0)
310 if (info->status.rates[0].idx >= 0 && 321 return;
311 info->status.rates[0].flags & IEEE80211_TX_RC_MCS) { 322
323 /* IEEE80211_RADIOTAP_MCS
324 * IEEE80211_RADIOTAP_VHT */
325 if (info->status.rates[0].flags & IEEE80211_TX_RC_MCS) {
312 rthdr->it_present |= cpu_to_le32(1 << IEEE80211_RADIOTAP_MCS); 326 rthdr->it_present |= cpu_to_le32(1 << IEEE80211_RADIOTAP_MCS);
313 pos[0] = IEEE80211_RADIOTAP_MCS_HAVE_MCS | 327 pos[0] = IEEE80211_RADIOTAP_MCS_HAVE_MCS |
314 IEEE80211_RADIOTAP_MCS_HAVE_GI | 328 IEEE80211_RADIOTAP_MCS_HAVE_GI |
@@ -321,8 +335,48 @@ static void ieee80211_add_tx_radiotap_header(struct ieee80211_supported_band
321 pos[1] |= IEEE80211_RADIOTAP_MCS_FMT_GF; 335 pos[1] |= IEEE80211_RADIOTAP_MCS_FMT_GF;
322 pos[2] = info->status.rates[0].idx; 336 pos[2] = info->status.rates[0].idx;
323 pos += 3; 337 pos += 3;
324 } 338 } else if (info->status.rates[0].flags & IEEE80211_TX_RC_VHT_MCS) {
339 u16 known = local->hw.radiotap_vht_details &
340 (IEEE80211_RADIOTAP_VHT_KNOWN_GI |
341 IEEE80211_RADIOTAP_VHT_KNOWN_BANDWIDTH);
342
343 rthdr->it_present |= cpu_to_le32(1 << IEEE80211_RADIOTAP_VHT);
344
345 /* required alignment from rthdr */
346 pos = (u8 *)rthdr + ALIGN(pos - (u8 *)rthdr, 2);
325 347
348 /* u16 known - IEEE80211_RADIOTAP_VHT_KNOWN_* */
349 put_unaligned_le16(known, pos);
350 pos += 2;
351
352 /* u8 flags - IEEE80211_RADIOTAP_VHT_FLAG_* */
353 if (info->status.rates[0].flags & IEEE80211_TX_RC_SHORT_GI)
354 *pos |= IEEE80211_RADIOTAP_VHT_FLAG_SGI;
355 pos++;
356
357 /* u8 bandwidth */
358 if (info->status.rates[0].flags & IEEE80211_TX_RC_40_MHZ_WIDTH)
359 *pos = 1;
360 else if (info->status.rates[0].flags & IEEE80211_TX_RC_80_MHZ_WIDTH)
361 *pos = 4;
362 else if (info->status.rates[0].flags & IEEE80211_TX_RC_160_MHZ_WIDTH)
363 *pos = 11;
364 else /* IEEE80211_TX_RC_{20_MHZ_WIDTH,FIXME:DUP_DATA} */
365 *pos = 0;
366 pos++;
367
368 /* u8 mcs_nss[4] */
369 *pos = (ieee80211_rate_get_vht_mcs(&info->status.rates[0]) << 4) |
370 ieee80211_rate_get_vht_nss(&info->status.rates[0]);
371 pos += 4;
372
373 /* u8 coding */
374 pos++;
375 /* u8 group_id */
376 pos++;
377 /* u16 partial_aid */
378 pos += 2;
379 }
326} 380}
327 381
328static void ieee80211_report_used_skb(struct ieee80211_local *local, 382static void ieee80211_report_used_skb(struct ieee80211_local *local,
@@ -424,6 +478,7 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
424 bool acked; 478 bool acked;
425 struct ieee80211_bar *bar; 479 struct ieee80211_bar *bar;
426 int rtap_len; 480 int rtap_len;
481 int shift = 0;
427 482
428 for (i = 0; i < IEEE80211_TX_MAX_RATES; i++) { 483 for (i = 0; i < IEEE80211_TX_MAX_RATES; i++) {
429 if ((info->flags & IEEE80211_TX_CTL_AMPDU) && 484 if ((info->flags & IEEE80211_TX_CTL_AMPDU) &&
@@ -458,6 +513,8 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
458 if (!ether_addr_equal(hdr->addr2, sta->sdata->vif.addr)) 513 if (!ether_addr_equal(hdr->addr2, sta->sdata->vif.addr))
459 continue; 514 continue;
460 515
516 shift = ieee80211_vif_get_shift(&sta->sdata->vif);
517
461 if (info->flags & IEEE80211_TX_STATUS_EOSP) 518 if (info->flags & IEEE80211_TX_STATUS_EOSP)
462 clear_sta_flag(sta, WLAN_STA_SP); 519 clear_sta_flag(sta, WLAN_STA_SP);
463 520
@@ -557,7 +614,7 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
557 614
558 rcu_read_unlock(); 615 rcu_read_unlock();
559 616
560 ieee80211_led_tx(local, 0); 617 ieee80211_led_tx(local);
561 618
562 /* SNMP counters 619 /* SNMP counters
563 * Fragments are passed to low-level drivers as separate skbs, so these 620 * Fragments are passed to low-level drivers as separate skbs, so these
@@ -624,7 +681,8 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
624 dev_kfree_skb(skb); 681 dev_kfree_skb(skb);
625 return; 682 return;
626 } 683 }
627 ieee80211_add_tx_radiotap_header(sband, skb, retry_count, rtap_len); 684 ieee80211_add_tx_radiotap_header(local, sband, skb, retry_count,
685 rtap_len, shift);
628 686
629 /* XXX: is this sufficient for BPF? */ 687 /* XXX: is this sufficient for BPF? */
630 skb_set_mac_header(skb, 0); 688 skb_set_mac_header(skb, 0);
diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h
index c215fafd7a2f..1aba645882bd 100644
--- a/net/mac80211/trace.h
+++ b/net/mac80211/trace.h
@@ -1906,6 +1906,32 @@ TRACE_EVENT(api_radar_detected,
1906 ) 1906 )
1907); 1907);
1908 1908
1909TRACE_EVENT(drv_channel_switch_beacon,
1910 TP_PROTO(struct ieee80211_local *local,
1911 struct ieee80211_sub_if_data *sdata,
1912 struct cfg80211_chan_def *chandef),
1913
1914 TP_ARGS(local, sdata, chandef),
1915
1916 TP_STRUCT__entry(
1917 LOCAL_ENTRY
1918 VIF_ENTRY
1919 CHANDEF_ENTRY
1920 ),
1921
1922 TP_fast_assign(
1923 LOCAL_ASSIGN;
1924 VIF_ASSIGN;
1925 CHANDEF_ASSIGN(chandef);
1926 ),
1927
1928 TP_printk(
1929 LOCAL_PR_FMT VIF_PR_FMT " channel switch to " CHANDEF_PR_FMT,
1930 LOCAL_PR_ARG, VIF_PR_ARG, CHANDEF_PR_ARG
1931 )
1932);
1933
1934
1909#ifdef CONFIG_MAC80211_MESSAGE_TRACING 1935#ifdef CONFIG_MAC80211_MESSAGE_TRACING
1910#undef TRACE_SYSTEM 1936#undef TRACE_SYSTEM
1911#define TRACE_SYSTEM mac80211_msg 1937#define TRACE_SYSTEM mac80211_msg
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 4105d0ca963e..3456c0486b48 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -40,12 +40,22 @@ static __le16 ieee80211_duration(struct ieee80211_tx_data *tx,
40 struct sk_buff *skb, int group_addr, 40 struct sk_buff *skb, int group_addr,
41 int next_frag_len) 41 int next_frag_len)
42{ 42{
43 int rate, mrate, erp, dur, i; 43 int rate, mrate, erp, dur, i, shift = 0;
44 struct ieee80211_rate *txrate; 44 struct ieee80211_rate *txrate;
45 struct ieee80211_local *local = tx->local; 45 struct ieee80211_local *local = tx->local;
46 struct ieee80211_supported_band *sband; 46 struct ieee80211_supported_band *sband;
47 struct ieee80211_hdr *hdr; 47 struct ieee80211_hdr *hdr;
48 struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); 48 struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
49 struct ieee80211_chanctx_conf *chanctx_conf;
50 u32 rate_flags = 0;
51
52 rcu_read_lock();
53 chanctx_conf = rcu_dereference(tx->sdata->vif.chanctx_conf);
54 if (chanctx_conf) {
55 shift = ieee80211_chandef_get_shift(&chanctx_conf->def);
56 rate_flags = ieee80211_chandef_rate_flags(&chanctx_conf->def);
57 }
58 rcu_read_unlock();
49 59
50 /* assume HW handles this */ 60 /* assume HW handles this */
51 if (tx->rate.flags & IEEE80211_TX_RC_MCS) 61 if (tx->rate.flags & IEEE80211_TX_RC_MCS)
@@ -122,8 +132,11 @@ static __le16 ieee80211_duration(struct ieee80211_tx_data *tx,
122 if (r->bitrate > txrate->bitrate) 132 if (r->bitrate > txrate->bitrate)
123 break; 133 break;
124 134
135 if ((rate_flags & r->flags) != rate_flags)
136 continue;
137
125 if (tx->sdata->vif.bss_conf.basic_rates & BIT(i)) 138 if (tx->sdata->vif.bss_conf.basic_rates & BIT(i))
126 rate = r->bitrate; 139 rate = DIV_ROUND_UP(r->bitrate, 1 << shift);
127 140
128 switch (sband->band) { 141 switch (sband->band) {
129 case IEEE80211_BAND_2GHZ: { 142 case IEEE80211_BAND_2GHZ: {
@@ -150,7 +163,7 @@ static __le16 ieee80211_duration(struct ieee80211_tx_data *tx,
150 if (rate == -1) { 163 if (rate == -1) {
151 /* No matching basic rate found; use highest suitable mandatory 164 /* No matching basic rate found; use highest suitable mandatory
152 * PHY rate */ 165 * PHY rate */
153 rate = mrate; 166 rate = DIV_ROUND_UP(mrate, 1 << shift);
154 } 167 }
155 168
156 /* Don't calculate ACKs for QoS Frames with NoAck Policy set */ 169 /* Don't calculate ACKs for QoS Frames with NoAck Policy set */
@@ -162,7 +175,8 @@ static __le16 ieee80211_duration(struct ieee80211_tx_data *tx,
162 * (10 bytes + 4-byte FCS = 112 bits) plus SIFS; rounded up 175 * (10 bytes + 4-byte FCS = 112 bits) plus SIFS; rounded up
163 * to closest integer */ 176 * to closest integer */
164 dur = ieee80211_frame_duration(sband->band, 10, rate, erp, 177 dur = ieee80211_frame_duration(sband->band, 10, rate, erp,
165 tx->sdata->vif.bss_conf.use_short_preamble); 178 tx->sdata->vif.bss_conf.use_short_preamble,
179 shift);
166 180
167 if (next_frag_len) { 181 if (next_frag_len) {
168 /* Frame is fragmented: duration increases with time needed to 182 /* Frame is fragmented: duration increases with time needed to
@@ -171,7 +185,8 @@ static __le16 ieee80211_duration(struct ieee80211_tx_data *tx,
171 /* next fragment */ 185 /* next fragment */
172 dur += ieee80211_frame_duration(sband->band, next_frag_len, 186 dur += ieee80211_frame_duration(sband->band, next_frag_len,
173 txrate->bitrate, erp, 187 txrate->bitrate, erp,
174 tx->sdata->vif.bss_conf.use_short_preamble); 188 tx->sdata->vif.bss_conf.use_short_preamble,
189 shift);
175 } 190 }
176 191
177 return cpu_to_le16(dur); 192 return cpu_to_le16(dur);
@@ -524,9 +539,11 @@ ieee80211_tx_h_check_control_port_protocol(struct ieee80211_tx_data *tx)
524{ 539{
525 struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb); 540 struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb);
526 541
527 if (unlikely(tx->sdata->control_port_protocol == tx->skb->protocol && 542 if (unlikely(tx->sdata->control_port_protocol == tx->skb->protocol)) {
528 tx->sdata->control_port_no_encrypt)) 543 if (tx->sdata->control_port_no_encrypt)
529 info->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT; 544 info->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT;
545 info->control.flags |= IEEE80211_TX_CTRL_PORT_CTRL_PROTO;
546 }
530 547
531 return TX_CONTINUE; 548 return TX_CONTINUE;
532} 549}
@@ -764,9 +781,11 @@ ieee80211_tx_h_sequence(struct ieee80211_tx_data *tx)
764 /* 781 /*
765 * Anything but QoS data that has a sequence number field 782 * Anything but QoS data that has a sequence number field
766 * (is long enough) gets a sequence number from the global 783 * (is long enough) gets a sequence number from the global
767 * counter. 784 * counter. QoS data frames with a multicast destination
785 * also use the global counter (802.11-2012 9.3.2.10).
768 */ 786 */
769 if (!ieee80211_is_data_qos(hdr->frame_control)) { 787 if (!ieee80211_is_data_qos(hdr->frame_control) ||
788 is_multicast_ether_addr(hdr->addr1)) {
770 /* driver should assign sequence number */ 789 /* driver should assign sequence number */
771 info->flags |= IEEE80211_TX_CTL_ASSIGN_SEQ; 790 info->flags |= IEEE80211_TX_CTL_ASSIGN_SEQ;
772 /* for pure STA mode without beacons, we can do it */ 791 /* for pure STA mode without beacons, we can do it */
@@ -1257,6 +1276,10 @@ static bool __ieee80211_tx(struct ieee80211_local *local,
1257 1276
1258 switch (sdata->vif.type) { 1277 switch (sdata->vif.type) {
1259 case NL80211_IFTYPE_MONITOR: 1278 case NL80211_IFTYPE_MONITOR:
1279 if (sdata->u.mntr_flags & MONITOR_FLAG_ACTIVE) {
1280 vif = &sdata->vif;
1281 break;
1282 }
1260 sdata = rcu_dereference(local->monitor_sdata); 1283 sdata = rcu_dereference(local->monitor_sdata);
1261 if (sdata) { 1284 if (sdata) {
1262 vif = &sdata->vif; 1285 vif = &sdata->vif;
@@ -1281,7 +1304,6 @@ static bool __ieee80211_tx(struct ieee80211_local *local,
1281 txpending); 1304 txpending);
1282 1305
1283 ieee80211_tpt_led_trig_tx(local, fc, led_len); 1306 ieee80211_tpt_led_trig_tx(local, fc, led_len);
1284 ieee80211_led_tx(local, 1);
1285 1307
1286 WARN_ON_ONCE(!skb_queue_empty(skbs)); 1308 WARN_ON_ONCE(!skb_queue_empty(skbs));
1287 1309
@@ -2320,6 +2342,81 @@ static int ieee80211_beacon_add_tim(struct ieee80211_sub_if_data *sdata,
2320 return 0; 2342 return 0;
2321} 2343}
2322 2344
2345void ieee80211_csa_finish(struct ieee80211_vif *vif)
2346{
2347 struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
2348
2349 ieee80211_queue_work(&sdata->local->hw,
2350 &sdata->csa_finalize_work);
2351}
2352EXPORT_SYMBOL(ieee80211_csa_finish);
2353
2354static void ieee80211_update_csa(struct ieee80211_sub_if_data *sdata,
2355 struct beacon_data *beacon)
2356{
2357 struct probe_resp *resp;
2358 int counter_offset_beacon = sdata->csa_counter_offset_beacon;
2359 int counter_offset_presp = sdata->csa_counter_offset_presp;
2360
2361 /* warn if the driver did not check for/react to csa completeness */
2362 if (WARN_ON(((u8 *)beacon->tail)[counter_offset_beacon] == 0))
2363 return;
2364
2365 ((u8 *)beacon->tail)[counter_offset_beacon]--;
2366
2367 if (sdata->vif.type == NL80211_IFTYPE_AP &&
2368 counter_offset_presp) {
2369 rcu_read_lock();
2370 resp = rcu_dereference(sdata->u.ap.probe_resp);
2371
2372 /* if nl80211 accepted the offset, this should not happen. */
2373 if (WARN_ON(!resp)) {
2374 rcu_read_unlock();
2375 return;
2376 }
2377 resp->data[counter_offset_presp]--;
2378 rcu_read_unlock();
2379 }
2380}
2381
2382bool ieee80211_csa_is_complete(struct ieee80211_vif *vif)
2383{
2384 struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
2385 struct beacon_data *beacon = NULL;
2386 u8 *beacon_data;
2387 size_t beacon_data_len;
2388 int counter_beacon = sdata->csa_counter_offset_beacon;
2389 int ret = false;
2390
2391 if (!ieee80211_sdata_running(sdata))
2392 return false;
2393
2394 rcu_read_lock();
2395 if (vif->type == NL80211_IFTYPE_AP) {
2396 struct ieee80211_if_ap *ap = &sdata->u.ap;
2397
2398 beacon = rcu_dereference(ap->beacon);
2399 if (WARN_ON(!beacon || !beacon->tail))
2400 goto out;
2401 beacon_data = beacon->tail;
2402 beacon_data_len = beacon->tail_len;
2403 } else {
2404 WARN_ON(1);
2405 goto out;
2406 }
2407
2408 if (WARN_ON(counter_beacon > beacon_data_len))
2409 goto out;
2410
2411 if (beacon_data[counter_beacon] == 0)
2412 ret = true;
2413 out:
2414 rcu_read_unlock();
2415
2416 return ret;
2417}
2418EXPORT_SYMBOL(ieee80211_csa_is_complete);
2419
2323struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw, 2420struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw,
2324 struct ieee80211_vif *vif, 2421 struct ieee80211_vif *vif,
2325 u16 *tim_offset, u16 *tim_length) 2422 u16 *tim_offset, u16 *tim_length)
@@ -2350,6 +2447,9 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw,
2350 struct beacon_data *beacon = rcu_dereference(ap->beacon); 2447 struct beacon_data *beacon = rcu_dereference(ap->beacon);
2351 2448
2352 if (beacon) { 2449 if (beacon) {
2450 if (sdata->vif.csa_active)
2451 ieee80211_update_csa(sdata, beacon);
2452
2353 /* 2453 /*
2354 * headroom, head length, 2454 * headroom, head length,
2355 * tail length and maximum TIM length 2455 * tail length and maximum TIM length
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 22654452a561..e1b34a18b243 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -107,7 +107,8 @@ void ieee80211_tx_set_protected(struct ieee80211_tx_data *tx)
107} 107}
108 108
109int ieee80211_frame_duration(enum ieee80211_band band, size_t len, 109int ieee80211_frame_duration(enum ieee80211_band band, size_t len,
110 int rate, int erp, int short_preamble) 110 int rate, int erp, int short_preamble,
111 int shift)
111{ 112{
112 int dur; 113 int dur;
113 114
@@ -118,6 +119,9 @@ int ieee80211_frame_duration(enum ieee80211_band band, size_t len,
118 * 119 *
119 * rate is in 100 kbps, so divident is multiplied by 10 in the 120 * rate is in 100 kbps, so divident is multiplied by 10 in the
120 * DIV_ROUND_UP() operations. 121 * DIV_ROUND_UP() operations.
122 *
123 * shift may be 2 for 5 MHz channels or 1 for 10 MHz channels, and
124 * is assumed to be 0 otherwise.
121 */ 125 */
122 126
123 if (band == IEEE80211_BAND_5GHZ || erp) { 127 if (band == IEEE80211_BAND_5GHZ || erp) {
@@ -130,13 +134,23 @@ int ieee80211_frame_duration(enum ieee80211_band band, size_t len,
130 * TXTIME = T_PREAMBLE + T_SIGNAL + T_SYM x N_SYM + Signal Ext 134 * TXTIME = T_PREAMBLE + T_SIGNAL + T_SYM x N_SYM + Signal Ext
131 * 135 *
132 * T_SYM = 4 usec 136 * T_SYM = 4 usec
133 * 802.11a - 17.5.2: aSIFSTime = 16 usec 137 * 802.11a - 18.5.2: aSIFSTime = 16 usec
134 * 802.11g - 19.8.4: aSIFSTime = 10 usec + 138 * 802.11g - 19.8.4: aSIFSTime = 10 usec +
135 * signal ext = 6 usec 139 * signal ext = 6 usec
136 */ 140 */
137 dur = 16; /* SIFS + signal ext */ 141 dur = 16; /* SIFS + signal ext */
138 dur += 16; /* 17.3.2.3: T_PREAMBLE = 16 usec */ 142 dur += 16; /* IEEE 802.11-2012 18.3.2.4: T_PREAMBLE = 16 usec */
139 dur += 4; /* 17.3.2.3: T_SIGNAL = 4 usec */ 143 dur += 4; /* IEEE 802.11-2012 18.3.2.4: T_SIGNAL = 4 usec */
144
145 /* IEEE 802.11-2012 18.3.2.4: all values above are:
146 * * times 4 for 5 MHz
147 * * times 2 for 10 MHz
148 */
149 dur *= 1 << shift;
150
151 /* rates should already consider the channel bandwidth,
152 * don't apply divisor again.
153 */
140 dur += 4 * DIV_ROUND_UP((16 + 8 * (len + 4) + 6) * 10, 154 dur += 4 * DIV_ROUND_UP((16 + 8 * (len + 4) + 6) * 10,
141 4 * rate); /* T_SYM x N_SYM */ 155 4 * rate); /* T_SYM x N_SYM */
142 } else { 156 } else {
@@ -168,7 +182,7 @@ __le16 ieee80211_generic_frame_duration(struct ieee80211_hw *hw,
168{ 182{
169 struct ieee80211_sub_if_data *sdata; 183 struct ieee80211_sub_if_data *sdata;
170 u16 dur; 184 u16 dur;
171 int erp; 185 int erp, shift = 0;
172 bool short_preamble = false; 186 bool short_preamble = false;
173 187
174 erp = 0; 188 erp = 0;
@@ -177,10 +191,11 @@ __le16 ieee80211_generic_frame_duration(struct ieee80211_hw *hw,
177 short_preamble = sdata->vif.bss_conf.use_short_preamble; 191 short_preamble = sdata->vif.bss_conf.use_short_preamble;
178 if (sdata->flags & IEEE80211_SDATA_OPERATING_GMODE) 192 if (sdata->flags & IEEE80211_SDATA_OPERATING_GMODE)
179 erp = rate->flags & IEEE80211_RATE_ERP_G; 193 erp = rate->flags & IEEE80211_RATE_ERP_G;
194 shift = ieee80211_vif_get_shift(vif);
180 } 195 }
181 196
182 dur = ieee80211_frame_duration(band, frame_len, rate->bitrate, erp, 197 dur = ieee80211_frame_duration(band, frame_len, rate->bitrate, erp,
183 short_preamble); 198 short_preamble, shift);
184 199
185 return cpu_to_le16(dur); 200 return cpu_to_le16(dur);
186} 201}
@@ -194,7 +209,7 @@ __le16 ieee80211_rts_duration(struct ieee80211_hw *hw,
194 struct ieee80211_rate *rate; 209 struct ieee80211_rate *rate;
195 struct ieee80211_sub_if_data *sdata; 210 struct ieee80211_sub_if_data *sdata;
196 bool short_preamble; 211 bool short_preamble;
197 int erp; 212 int erp, shift = 0, bitrate;
198 u16 dur; 213 u16 dur;
199 struct ieee80211_supported_band *sband; 214 struct ieee80211_supported_band *sband;
200 215
@@ -210,17 +225,20 @@ __le16 ieee80211_rts_duration(struct ieee80211_hw *hw,
210 short_preamble = sdata->vif.bss_conf.use_short_preamble; 225 short_preamble = sdata->vif.bss_conf.use_short_preamble;
211 if (sdata->flags & IEEE80211_SDATA_OPERATING_GMODE) 226 if (sdata->flags & IEEE80211_SDATA_OPERATING_GMODE)
212 erp = rate->flags & IEEE80211_RATE_ERP_G; 227 erp = rate->flags & IEEE80211_RATE_ERP_G;
228 shift = ieee80211_vif_get_shift(vif);
213 } 229 }
214 230
231 bitrate = DIV_ROUND_UP(rate->bitrate, 1 << shift);
232
215 /* CTS duration */ 233 /* CTS duration */
216 dur = ieee80211_frame_duration(sband->band, 10, rate->bitrate, 234 dur = ieee80211_frame_duration(sband->band, 10, bitrate,
217 erp, short_preamble); 235 erp, short_preamble, shift);
218 /* Data frame duration */ 236 /* Data frame duration */
219 dur += ieee80211_frame_duration(sband->band, frame_len, rate->bitrate, 237 dur += ieee80211_frame_duration(sband->band, frame_len, bitrate,
220 erp, short_preamble); 238 erp, short_preamble, shift);
221 /* ACK duration */ 239 /* ACK duration */
222 dur += ieee80211_frame_duration(sband->band, 10, rate->bitrate, 240 dur += ieee80211_frame_duration(sband->band, 10, bitrate,
223 erp, short_preamble); 241 erp, short_preamble, shift);
224 242
225 return cpu_to_le16(dur); 243 return cpu_to_le16(dur);
226} 244}
@@ -235,7 +253,7 @@ __le16 ieee80211_ctstoself_duration(struct ieee80211_hw *hw,
235 struct ieee80211_rate *rate; 253 struct ieee80211_rate *rate;
236 struct ieee80211_sub_if_data *sdata; 254 struct ieee80211_sub_if_data *sdata;
237 bool short_preamble; 255 bool short_preamble;
238 int erp; 256 int erp, shift = 0, bitrate;
239 u16 dur; 257 u16 dur;
240 struct ieee80211_supported_band *sband; 258 struct ieee80211_supported_band *sband;
241 259
@@ -250,15 +268,18 @@ __le16 ieee80211_ctstoself_duration(struct ieee80211_hw *hw,
250 short_preamble = sdata->vif.bss_conf.use_short_preamble; 268 short_preamble = sdata->vif.bss_conf.use_short_preamble;
251 if (sdata->flags & IEEE80211_SDATA_OPERATING_GMODE) 269 if (sdata->flags & IEEE80211_SDATA_OPERATING_GMODE)
252 erp = rate->flags & IEEE80211_RATE_ERP_G; 270 erp = rate->flags & IEEE80211_RATE_ERP_G;
271 shift = ieee80211_vif_get_shift(vif);
253 } 272 }
254 273
274 bitrate = DIV_ROUND_UP(rate->bitrate, 1 << shift);
275
255 /* Data frame duration */ 276 /* Data frame duration */
256 dur = ieee80211_frame_duration(sband->band, frame_len, rate->bitrate, 277 dur = ieee80211_frame_duration(sband->band, frame_len, bitrate,
257 erp, short_preamble); 278 erp, short_preamble, shift);
258 if (!(frame_txctl->flags & IEEE80211_TX_CTL_NO_ACK)) { 279 if (!(frame_txctl->flags & IEEE80211_TX_CTL_NO_ACK)) {
259 /* ACK duration */ 280 /* ACK duration */
260 dur += ieee80211_frame_duration(sband->band, 10, rate->bitrate, 281 dur += ieee80211_frame_duration(sband->band, 10, bitrate,
261 erp, short_preamble); 282 erp, short_preamble, shift);
262 } 283 }
263 284
264 return cpu_to_le16(dur); 285 return cpu_to_le16(dur);
@@ -1052,32 +1073,6 @@ void ieee80211_set_wmm_default(struct ieee80211_sub_if_data *sdata,
1052 } 1073 }
1053} 1074}
1054 1075
1055void ieee80211_sta_def_wmm_params(struct ieee80211_sub_if_data *sdata,
1056 const size_t supp_rates_len,
1057 const u8 *supp_rates)
1058{
1059 struct ieee80211_chanctx_conf *chanctx_conf;
1060 int i, have_higher_than_11mbit = 0;
1061
1062 /* cf. IEEE 802.11 9.2.12 */
1063 for (i = 0; i < supp_rates_len; i++)
1064 if ((supp_rates[i] & 0x7f) * 5 > 110)
1065 have_higher_than_11mbit = 1;
1066
1067 rcu_read_lock();
1068 chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf);
1069
1070 if (chanctx_conf &&
1071 chanctx_conf->def.chan->band == IEEE80211_BAND_2GHZ &&
1072 have_higher_than_11mbit)
1073 sdata->flags |= IEEE80211_SDATA_OPERATING_GMODE;
1074 else
1075 sdata->flags &= ~IEEE80211_SDATA_OPERATING_GMODE;
1076 rcu_read_unlock();
1077
1078 ieee80211_set_wmm_default(sdata, true);
1079}
1080
1081void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata, 1076void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata,
1082 u16 transaction, u16 auth_alg, u16 status, 1077 u16 transaction, u16 auth_alg, u16 status,
1083 const u8 *extra, size_t extra_len, const u8 *da, 1078 const u8 *extra, size_t extra_len, const u8 *da,
@@ -1162,7 +1157,7 @@ void ieee80211_send_deauth_disassoc(struct ieee80211_sub_if_data *sdata,
1162int ieee80211_build_preq_ies(struct ieee80211_local *local, u8 *buffer, 1157int ieee80211_build_preq_ies(struct ieee80211_local *local, u8 *buffer,
1163 size_t buffer_len, const u8 *ie, size_t ie_len, 1158 size_t buffer_len, const u8 *ie, size_t ie_len,
1164 enum ieee80211_band band, u32 rate_mask, 1159 enum ieee80211_band band, u32 rate_mask,
1165 u8 channel) 1160 struct cfg80211_chan_def *chandef)
1166{ 1161{
1167 struct ieee80211_supported_band *sband; 1162 struct ieee80211_supported_band *sband;
1168 u8 *pos = buffer, *end = buffer + buffer_len; 1163 u8 *pos = buffer, *end = buffer + buffer_len;
@@ -1171,16 +1166,26 @@ int ieee80211_build_preq_ies(struct ieee80211_local *local, u8 *buffer,
1171 u8 rates[32]; 1166 u8 rates[32];
1172 int num_rates; 1167 int num_rates;
1173 int ext_rates_len; 1168 int ext_rates_len;
1169 int shift;
1170 u32 rate_flags;
1174 1171
1175 sband = local->hw.wiphy->bands[band]; 1172 sband = local->hw.wiphy->bands[band];
1176 if (WARN_ON_ONCE(!sband)) 1173 if (WARN_ON_ONCE(!sband))
1177 return 0; 1174 return 0;
1178 1175
1176 rate_flags = ieee80211_chandef_rate_flags(chandef);
1177 shift = ieee80211_chandef_get_shift(chandef);
1178
1179 num_rates = 0; 1179 num_rates = 0;
1180 for (i = 0; i < sband->n_bitrates; i++) { 1180 for (i = 0; i < sband->n_bitrates; i++) {
1181 if ((BIT(i) & rate_mask) == 0) 1181 if ((BIT(i) & rate_mask) == 0)
1182 continue; /* skip rate */ 1182 continue; /* skip rate */
1183 rates[num_rates++] = (u8) (sband->bitrates[i].bitrate / 5); 1183 if ((rate_flags & sband->bitrates[i].flags) != rate_flags)
1184 continue;
1185
1186 rates[num_rates++] =
1187 (u8) DIV_ROUND_UP(sband->bitrates[i].bitrate,
1188 (1 << shift) * 5);
1184 } 1189 }
1185 1190
1186 supp_rates_len = min_t(int, num_rates, 8); 1191 supp_rates_len = min_t(int, num_rates, 8);
@@ -1220,12 +1225,13 @@ int ieee80211_build_preq_ies(struct ieee80211_local *local, u8 *buffer,
1220 pos += ext_rates_len; 1225 pos += ext_rates_len;
1221 } 1226 }
1222 1227
1223 if (channel && sband->band == IEEE80211_BAND_2GHZ) { 1228 if (chandef->chan && sband->band == IEEE80211_BAND_2GHZ) {
1224 if (end - pos < 3) 1229 if (end - pos < 3)
1225 goto out_err; 1230 goto out_err;
1226 *pos++ = WLAN_EID_DS_PARAMS; 1231 *pos++ = WLAN_EID_DS_PARAMS;
1227 *pos++ = 1; 1232 *pos++ = 1;
1228 *pos++ = channel; 1233 *pos++ = ieee80211_frequency_to_channel(
1234 chandef->chan->center_freq);
1229 } 1235 }
1230 1236
1231 /* insert custom IEs that go before HT */ 1237 /* insert custom IEs that go before HT */
@@ -1290,9 +1296,9 @@ struct sk_buff *ieee80211_build_probe_req(struct ieee80211_sub_if_data *sdata,
1290 bool directed) 1296 bool directed)
1291{ 1297{
1292 struct ieee80211_local *local = sdata->local; 1298 struct ieee80211_local *local = sdata->local;
1299 struct cfg80211_chan_def chandef;
1293 struct sk_buff *skb; 1300 struct sk_buff *skb;
1294 struct ieee80211_mgmt *mgmt; 1301 struct ieee80211_mgmt *mgmt;
1295 u8 chan_no;
1296 int ies_len; 1302 int ies_len;
1297 1303
1298 /* 1304 /*
@@ -1300,10 +1306,11 @@ struct sk_buff *ieee80211_build_probe_req(struct ieee80211_sub_if_data *sdata,
1300 * in order to maximize the chance that we get a response. Some 1306 * in order to maximize the chance that we get a response. Some
1301 * badly-behaved APs don't respond when this parameter is included. 1307 * badly-behaved APs don't respond when this parameter is included.
1302 */ 1308 */
1309 chandef.width = sdata->vif.bss_conf.chandef.width;
1303 if (directed) 1310 if (directed)
1304 chan_no = 0; 1311 chandef.chan = NULL;
1305 else 1312 else
1306 chan_no = ieee80211_frequency_to_channel(chan->center_freq); 1313 chandef.chan = chan;
1307 1314
1308 skb = ieee80211_probereq_get(&local->hw, &sdata->vif, 1315 skb = ieee80211_probereq_get(&local->hw, &sdata->vif,
1309 ssid, ssid_len, 100 + ie_len); 1316 ssid, ssid_len, 100 + ie_len);
@@ -1313,7 +1320,7 @@ struct sk_buff *ieee80211_build_probe_req(struct ieee80211_sub_if_data *sdata,
1313 ies_len = ieee80211_build_preq_ies(local, skb_tail_pointer(skb), 1320 ies_len = ieee80211_build_preq_ies(local, skb_tail_pointer(skb),
1314 skb_tailroom(skb), 1321 skb_tailroom(skb),
1315 ie, ie_len, chan->band, 1322 ie, ie_len, chan->band,
1316 ratemask, chan_no); 1323 ratemask, &chandef);
1317 skb_put(skb, ies_len); 1324 skb_put(skb, ies_len);
1318 1325
1319 if (dst) { 1326 if (dst) {
@@ -1347,16 +1354,19 @@ void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata, u8 *dst,
1347 } 1354 }
1348} 1355}
1349 1356
1350u32 ieee80211_sta_get_rates(struct ieee80211_local *local, 1357u32 ieee80211_sta_get_rates(struct ieee80211_sub_if_data *sdata,
1351 struct ieee802_11_elems *elems, 1358 struct ieee802_11_elems *elems,
1352 enum ieee80211_band band, u32 *basic_rates) 1359 enum ieee80211_band band, u32 *basic_rates)
1353{ 1360{
1354 struct ieee80211_supported_band *sband; 1361 struct ieee80211_supported_band *sband;
1355 struct ieee80211_rate *bitrates; 1362 struct ieee80211_rate *bitrates;
1356 size_t num_rates; 1363 size_t num_rates;
1357 u32 supp_rates; 1364 u32 supp_rates, rate_flags;
1358 int i, j; 1365 int i, j, shift;
1359 sband = local->hw.wiphy->bands[band]; 1366 sband = sdata->local->hw.wiphy->bands[band];
1367
1368 rate_flags = ieee80211_chandef_rate_flags(&sdata->vif.bss_conf.chandef);
1369 shift = ieee80211_vif_get_shift(&sdata->vif);
1360 1370
1361 if (WARN_ON(!sband)) 1371 if (WARN_ON(!sband))
1362 return 1; 1372 return 1;
@@ -1381,7 +1391,15 @@ u32 ieee80211_sta_get_rates(struct ieee80211_local *local,
1381 continue; 1391 continue;
1382 1392
1383 for (j = 0; j < num_rates; j++) { 1393 for (j = 0; j < num_rates; j++) {
1384 if (bitrates[j].bitrate == own_rate) { 1394 int brate;
1395 if ((rate_flags & sband->bitrates[j].flags)
1396 != rate_flags)
1397 continue;
1398
1399 brate = DIV_ROUND_UP(sband->bitrates[j].bitrate,
1400 1 << shift);
1401
1402 if (brate == own_rate) {
1385 supp_rates |= BIT(j); 1403 supp_rates |= BIT(j);
1386 if (basic_rates && is_basic) 1404 if (basic_rates && is_basic)
1387 *basic_rates |= BIT(j); 1405 *basic_rates |= BIT(j);
@@ -1435,8 +1453,8 @@ int ieee80211_reconfig(struct ieee80211_local *local)
1435 local->resuming = true; 1453 local->resuming = true;
1436 1454
1437 if (local->wowlan) { 1455 if (local->wowlan) {
1438 local->wowlan = false;
1439 res = drv_resume(local); 1456 res = drv_resume(local);
1457 local->wowlan = false;
1440 if (res < 0) { 1458 if (res < 0) {
1441 local->resuming = false; 1459 local->resuming = false;
1442 return res; 1460 return res;
@@ -2004,18 +2022,56 @@ void ieee80211_ht_oper_to_chandef(struct ieee80211_channel *control_chan,
2004 cfg80211_chandef_create(chandef, control_chan, channel_type); 2022 cfg80211_chandef_create(chandef, control_chan, channel_type);
2005} 2023}
2006 2024
2025int ieee80211_parse_bitrates(struct cfg80211_chan_def *chandef,
2026 const struct ieee80211_supported_band *sband,
2027 const u8 *srates, int srates_len, u32 *rates)
2028{
2029 u32 rate_flags = ieee80211_chandef_rate_flags(chandef);
2030 int shift = ieee80211_chandef_get_shift(chandef);
2031 struct ieee80211_rate *br;
2032 int brate, rate, i, j, count = 0;
2033
2034 *rates = 0;
2035
2036 for (i = 0; i < srates_len; i++) {
2037 rate = srates[i] & 0x7f;
2038
2039 for (j = 0; j < sband->n_bitrates; j++) {
2040 br = &sband->bitrates[j];
2041 if ((rate_flags & br->flags) != rate_flags)
2042 continue;
2043
2044 brate = DIV_ROUND_UP(br->bitrate, (1 << shift) * 5);
2045 if (brate == rate) {
2046 *rates |= BIT(j);
2047 count++;
2048 break;
2049 }
2050 }
2051 }
2052 return count;
2053}
2054
2007int ieee80211_add_srates_ie(struct ieee80211_sub_if_data *sdata, 2055int ieee80211_add_srates_ie(struct ieee80211_sub_if_data *sdata,
2008 struct sk_buff *skb, bool need_basic, 2056 struct sk_buff *skb, bool need_basic,
2009 enum ieee80211_band band) 2057 enum ieee80211_band band)
2010{ 2058{
2011 struct ieee80211_local *local = sdata->local; 2059 struct ieee80211_local *local = sdata->local;
2012 struct ieee80211_supported_band *sband; 2060 struct ieee80211_supported_band *sband;
2013 int rate; 2061 int rate, shift;
2014 u8 i, rates, *pos; 2062 u8 i, rates, *pos;
2015 u32 basic_rates = sdata->vif.bss_conf.basic_rates; 2063 u32 basic_rates = sdata->vif.bss_conf.basic_rates;
2064 u32 rate_flags;
2016 2065
2066 shift = ieee80211_vif_get_shift(&sdata->vif);
2067 rate_flags = ieee80211_chandef_rate_flags(&sdata->vif.bss_conf.chandef);
2017 sband = local->hw.wiphy->bands[band]; 2068 sband = local->hw.wiphy->bands[band];
2018 rates = sband->n_bitrates; 2069 rates = 0;
2070 for (i = 0; i < sband->n_bitrates; i++) {
2071 if ((rate_flags & sband->bitrates[i].flags) != rate_flags)
2072 continue;
2073 rates++;
2074 }
2019 if (rates > 8) 2075 if (rates > 8)
2020 rates = 8; 2076 rates = 8;
2021 2077
@@ -2027,10 +2083,15 @@ int ieee80211_add_srates_ie(struct ieee80211_sub_if_data *sdata,
2027 *pos++ = rates; 2083 *pos++ = rates;
2028 for (i = 0; i < rates; i++) { 2084 for (i = 0; i < rates; i++) {
2029 u8 basic = 0; 2085 u8 basic = 0;
2086 if ((rate_flags & sband->bitrates[i].flags) != rate_flags)
2087 continue;
2088
2030 if (need_basic && basic_rates & BIT(i)) 2089 if (need_basic && basic_rates & BIT(i))
2031 basic = 0x80; 2090 basic = 0x80;
2032 rate = sband->bitrates[i].bitrate; 2091 rate = sband->bitrates[i].bitrate;
2033 *pos++ = basic | (u8) (rate / 5); 2092 rate = DIV_ROUND_UP(sband->bitrates[i].bitrate,
2093 5 * (1 << shift));
2094 *pos++ = basic | (u8) rate;
2034 } 2095 }
2035 2096
2036 return 0; 2097 return 0;
@@ -2042,12 +2103,22 @@ int ieee80211_add_ext_srates_ie(struct ieee80211_sub_if_data *sdata,
2042{ 2103{
2043 struct ieee80211_local *local = sdata->local; 2104 struct ieee80211_local *local = sdata->local;
2044 struct ieee80211_supported_band *sband; 2105 struct ieee80211_supported_band *sband;
2045 int rate; 2106 int rate, skip, shift;
2046 u8 i, exrates, *pos; 2107 u8 i, exrates, *pos;
2047 u32 basic_rates = sdata->vif.bss_conf.basic_rates; 2108 u32 basic_rates = sdata->vif.bss_conf.basic_rates;
2109 u32 rate_flags;
2110
2111 rate_flags = ieee80211_chandef_rate_flags(&sdata->vif.bss_conf.chandef);
2112 shift = ieee80211_vif_get_shift(&sdata->vif);
2048 2113
2049 sband = local->hw.wiphy->bands[band]; 2114 sband = local->hw.wiphy->bands[band];
2050 exrates = sband->n_bitrates; 2115 exrates = 0;
2116 for (i = 0; i < sband->n_bitrates; i++) {
2117 if ((rate_flags & sband->bitrates[i].flags) != rate_flags)
2118 continue;
2119 exrates++;
2120 }
2121
2051 if (exrates > 8) 2122 if (exrates > 8)
2052 exrates -= 8; 2123 exrates -= 8;
2053 else 2124 else
@@ -2060,12 +2131,19 @@ int ieee80211_add_ext_srates_ie(struct ieee80211_sub_if_data *sdata,
2060 pos = skb_put(skb, exrates + 2); 2131 pos = skb_put(skb, exrates + 2);
2061 *pos++ = WLAN_EID_EXT_SUPP_RATES; 2132 *pos++ = WLAN_EID_EXT_SUPP_RATES;
2062 *pos++ = exrates; 2133 *pos++ = exrates;
2134 skip = 0;
2063 for (i = 8; i < sband->n_bitrates; i++) { 2135 for (i = 8; i < sband->n_bitrates; i++) {
2064 u8 basic = 0; 2136 u8 basic = 0;
2137 if ((rate_flags & sband->bitrates[i].flags)
2138 != rate_flags)
2139 continue;
2140 if (skip++ < 8)
2141 continue;
2065 if (need_basic && basic_rates & BIT(i)) 2142 if (need_basic && basic_rates & BIT(i))
2066 basic = 0x80; 2143 basic = 0x80;
2067 rate = sband->bitrates[i].bitrate; 2144 rate = DIV_ROUND_UP(sband->bitrates[i].bitrate,
2068 *pos++ = basic | (u8) (rate / 5); 2145 5 * (1 << shift));
2146 *pos++ = basic | (u8) rate;
2069 } 2147 }
2070 } 2148 }
2071 return 0; 2149 return 0;
@@ -2149,9 +2227,17 @@ u64 ieee80211_calculate_rx_timestamp(struct ieee80211_local *local,
2149 ri.flags |= RATE_INFO_FLAGS_SHORT_GI; 2227 ri.flags |= RATE_INFO_FLAGS_SHORT_GI;
2150 } else { 2228 } else {
2151 struct ieee80211_supported_band *sband; 2229 struct ieee80211_supported_band *sband;
2230 int shift = 0;
2231 int bitrate;
2232
2233 if (status->flag & RX_FLAG_10MHZ)
2234 shift = 1;
2235 if (status->flag & RX_FLAG_5MHZ)
2236 shift = 2;
2152 2237
2153 sband = local->hw.wiphy->bands[status->band]; 2238 sband = local->hw.wiphy->bands[status->band];
2154 ri.legacy = sband->bitrates[status->rate_idx].bitrate; 2239 bitrate = sband->bitrates[status->rate_idx].bitrate;
2240 ri.legacy = DIV_ROUND_UP(bitrate, (1 << shift));
2155 } 2241 }
2156 2242
2157 rate = cfg80211_calculate_bitrate(&ri); 2243 rate = cfg80211_calculate_bitrate(&ri);
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 56d22cae5906..6e839b6dff2b 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -408,21 +408,10 @@ config NF_NAT_TFTP
408 depends on NF_CONNTRACK && NF_NAT 408 depends on NF_CONNTRACK && NF_NAT
409 default NF_NAT && NF_CONNTRACK_TFTP 409 default NF_NAT && NF_CONNTRACK_TFTP
410 410
411endif # NF_CONNTRACK 411config NETFILTER_SYNPROXY
412 412 tristate
413# transparent proxy support
414config NETFILTER_TPROXY
415 tristate "Transparent proxying support"
416 depends on IP_NF_MANGLE
417 depends on NETFILTER_ADVANCED
418 help
419 This option enables transparent proxying support, that is,
420 support for handling non-locally bound IPv4 TCP and UDP sockets.
421 For it to work you will have to configure certain iptables rules
422 and use policy routing. For more information on how to set it up
423 see Documentation/networking/tproxy.txt.
424 413
425 To compile it as a module, choose M here. If unsure, say N. 414endif # NF_CONNTRACK
426 415
427config NETFILTER_XTABLES 416config NETFILTER_XTABLES
428 tristate "Netfilter Xtables support (required for ip_tables)" 417 tristate "Netfilter Xtables support (required for ip_tables)"
@@ -720,10 +709,10 @@ config NETFILTER_XT_TARGET_TEE
720 this clone be rerouted to another nexthop. 709 this clone be rerouted to another nexthop.
721 710
722config NETFILTER_XT_TARGET_TPROXY 711config NETFILTER_XT_TARGET_TPROXY
723 tristate '"TPROXY" target support' 712 tristate '"TPROXY" target transparent proxying support'
724 depends on NETFILTER_TPROXY
725 depends on NETFILTER_XTABLES 713 depends on NETFILTER_XTABLES
726 depends on NETFILTER_ADVANCED 714 depends on NETFILTER_ADVANCED
715 depends on IP_NF_MANGLE
727 select NF_DEFRAG_IPV4 716 select NF_DEFRAG_IPV4
728 select NF_DEFRAG_IPV6 if IP6_NF_IPTABLES 717 select NF_DEFRAG_IPV6 if IP6_NF_IPTABLES
729 help 718 help
@@ -731,6 +720,9 @@ config NETFILTER_XT_TARGET_TPROXY
731 REDIRECT. It can only be used in the mangle table and is useful 720 REDIRECT. It can only be used in the mangle table and is useful
732 to redirect traffic to a transparent proxy. It does _not_ depend 721 to redirect traffic to a transparent proxy. It does _not_ depend
733 on Netfilter connection tracking and NAT, unlike REDIRECT. 722 on Netfilter connection tracking and NAT, unlike REDIRECT.
723 For it to work you will have to configure certain iptables rules
724 and use policy routing. For more information on how to set it up
725 see Documentation/networking/tproxy.txt.
734 726
735 To compile it as a module, choose M here. If unsure, say N. 727 To compile it as a module, choose M here. If unsure, say N.
736 728
@@ -1180,10 +1172,10 @@ config NETFILTER_XT_MATCH_SCTP
1180 1172
1181config NETFILTER_XT_MATCH_SOCKET 1173config NETFILTER_XT_MATCH_SOCKET
1182 tristate '"socket" match support' 1174 tristate '"socket" match support'
1183 depends on NETFILTER_TPROXY
1184 depends on NETFILTER_XTABLES 1175 depends on NETFILTER_XTABLES
1185 depends on NETFILTER_ADVANCED 1176 depends on NETFILTER_ADVANCED
1186 depends on !NF_CONNTRACK || NF_CONNTRACK 1177 depends on !NF_CONNTRACK || NF_CONNTRACK
1178 depends on (IPV6 || IPV6=n)
1187 select NF_DEFRAG_IPV4 1179 select NF_DEFRAG_IPV4
1188 select NF_DEFRAG_IPV6 if IP6_NF_IPTABLES 1180 select NF_DEFRAG_IPV6 if IP6_NF_IPTABLES
1189 help 1181 help
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index a1abf87d43bf..c3a0a12907f6 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -1,6 +1,6 @@
1netfilter-objs := core.o nf_log.o nf_queue.o nf_sockopt.o 1netfilter-objs := core.o nf_log.o nf_queue.o nf_sockopt.o
2 2
3nf_conntrack-y := nf_conntrack_core.o nf_conntrack_standalone.o nf_conntrack_expect.o nf_conntrack_helper.o nf_conntrack_proto.o nf_conntrack_l3proto_generic.o nf_conntrack_proto_generic.o nf_conntrack_proto_tcp.o nf_conntrack_proto_udp.o nf_conntrack_extend.o nf_conntrack_acct.o 3nf_conntrack-y := nf_conntrack_core.o nf_conntrack_standalone.o nf_conntrack_expect.o nf_conntrack_helper.o nf_conntrack_proto.o nf_conntrack_l3proto_generic.o nf_conntrack_proto_generic.o nf_conntrack_proto_tcp.o nf_conntrack_proto_udp.o nf_conntrack_extend.o nf_conntrack_acct.o nf_conntrack_seqadj.o
4nf_conntrack-$(CONFIG_NF_CONNTRACK_TIMEOUT) += nf_conntrack_timeout.o 4nf_conntrack-$(CONFIG_NF_CONNTRACK_TIMEOUT) += nf_conntrack_timeout.o
5nf_conntrack-$(CONFIG_NF_CONNTRACK_TIMESTAMP) += nf_conntrack_timestamp.o 5nf_conntrack-$(CONFIG_NF_CONNTRACK_TIMESTAMP) += nf_conntrack_timestamp.o
6nf_conntrack-$(CONFIG_NF_CONNTRACK_EVENTS) += nf_conntrack_ecache.o 6nf_conntrack-$(CONFIG_NF_CONNTRACK_EVENTS) += nf_conntrack_ecache.o
@@ -61,8 +61,8 @@ obj-$(CONFIG_NF_NAT_IRC) += nf_nat_irc.o
61obj-$(CONFIG_NF_NAT_SIP) += nf_nat_sip.o 61obj-$(CONFIG_NF_NAT_SIP) += nf_nat_sip.o
62obj-$(CONFIG_NF_NAT_TFTP) += nf_nat_tftp.o 62obj-$(CONFIG_NF_NAT_TFTP) += nf_nat_tftp.o
63 63
64# transparent proxy support 64# SYNPROXY
65obj-$(CONFIG_NETFILTER_TPROXY) += nf_tproxy_core.o 65obj-$(CONFIG_NETFILTER_SYNPROXY) += nf_synproxy_core.o
66 66
67# generic X tables 67# generic X tables
68obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o 68obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 2217363ab422..593b16ea45e0 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -234,12 +234,13 @@ EXPORT_SYMBOL(skb_make_writable);
234/* This does not belong here, but locally generated errors need it if connection 234/* This does not belong here, but locally generated errors need it if connection
235 tracking in use: without this, connection may not be in hash table, and hence 235 tracking in use: without this, connection may not be in hash table, and hence
236 manufactured ICMP or RST packets will not be associated with it. */ 236 manufactured ICMP or RST packets will not be associated with it. */
237void (*ip_ct_attach)(struct sk_buff *, struct sk_buff *) __rcu __read_mostly; 237void (*ip_ct_attach)(struct sk_buff *, const struct sk_buff *)
238 __rcu __read_mostly;
238EXPORT_SYMBOL(ip_ct_attach); 239EXPORT_SYMBOL(ip_ct_attach);
239 240
240void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb) 241void nf_ct_attach(struct sk_buff *new, const struct sk_buff *skb)
241{ 242{
242 void (*attach)(struct sk_buff *, struct sk_buff *); 243 void (*attach)(struct sk_buff *, const struct sk_buff *);
243 244
244 if (skb->nfct) { 245 if (skb->nfct) {
245 rcu_read_lock(); 246 rcu_read_lock();
diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c
index 3cd85b2fc67c..5199448697f6 100644
--- a/net/netfilter/ipvs/ip_vs_lblcr.c
+++ b/net/netfilter/ipvs/ip_vs_lblcr.c
@@ -414,7 +414,7 @@ static void ip_vs_lblcr_flush(struct ip_vs_service *svc)
414 414
415 spin_lock_bh(&svc->sched_lock); 415 spin_lock_bh(&svc->sched_lock);
416 tbl->dead = 1; 416 tbl->dead = 1;
417 for (i=0; i<IP_VS_LBLCR_TAB_SIZE; i++) { 417 for (i = 0; i < IP_VS_LBLCR_TAB_SIZE; i++) {
418 hlist_for_each_entry_safe(en, next, &tbl->bucket[i], list) { 418 hlist_for_each_entry_safe(en, next, &tbl->bucket[i], list) {
419 ip_vs_lblcr_free(en); 419 ip_vs_lblcr_free(en);
420 } 420 }
@@ -440,7 +440,7 @@ static inline void ip_vs_lblcr_full_check(struct ip_vs_service *svc)
440 struct ip_vs_lblcr_entry *en; 440 struct ip_vs_lblcr_entry *en;
441 struct hlist_node *next; 441 struct hlist_node *next;
442 442
443 for (i=0, j=tbl->rover; i<IP_VS_LBLCR_TAB_SIZE; i++) { 443 for (i = 0, j = tbl->rover; i < IP_VS_LBLCR_TAB_SIZE; i++) {
444 j = (j + 1) & IP_VS_LBLCR_TAB_MASK; 444 j = (j + 1) & IP_VS_LBLCR_TAB_MASK;
445 445
446 spin_lock(&svc->sched_lock); 446 spin_lock(&svc->sched_lock);
@@ -495,7 +495,7 @@ static void ip_vs_lblcr_check_expire(unsigned long data)
495 if (goal > tbl->max_size/2) 495 if (goal > tbl->max_size/2)
496 goal = tbl->max_size/2; 496 goal = tbl->max_size/2;
497 497
498 for (i=0, j=tbl->rover; i<IP_VS_LBLCR_TAB_SIZE; i++) { 498 for (i = 0, j = tbl->rover; i < IP_VS_LBLCR_TAB_SIZE; i++) {
499 j = (j + 1) & IP_VS_LBLCR_TAB_MASK; 499 j = (j + 1) & IP_VS_LBLCR_TAB_MASK;
500 500
501 spin_lock(&svc->sched_lock); 501 spin_lock(&svc->sched_lock);
@@ -536,7 +536,7 @@ static int ip_vs_lblcr_init_svc(struct ip_vs_service *svc)
536 /* 536 /*
537 * Initialize the hash buckets 537 * Initialize the hash buckets
538 */ 538 */
539 for (i=0; i<IP_VS_LBLCR_TAB_SIZE; i++) { 539 for (i = 0; i < IP_VS_LBLCR_TAB_SIZE; i++) {
540 INIT_HLIST_HEAD(&tbl->bucket[i]); 540 INIT_HLIST_HEAD(&tbl->bucket[i]);
541 } 541 }
542 tbl->max_size = IP_VS_LBLCR_TAB_SIZE*16; 542 tbl->max_size = IP_VS_LBLCR_TAB_SIZE*16;
diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c
index 3c0da8728036..23e596e438b3 100644
--- a/net/netfilter/ipvs/ip_vs_proto_sctp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c
@@ -66,15 +66,7 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
66static void sctp_nat_csum(struct sk_buff *skb, sctp_sctphdr_t *sctph, 66static void sctp_nat_csum(struct sk_buff *skb, sctp_sctphdr_t *sctph,
67 unsigned int sctphoff) 67 unsigned int sctphoff)
68{ 68{
69 __u32 crc32; 69 sctph->checksum = sctp_compute_cksum(skb, sctphoff);
70 struct sk_buff *iter;
71
72 crc32 = sctp_start_cksum((__u8 *)sctph, skb_headlen(skb) - sctphoff);
73 skb_walk_frags(skb, iter)
74 crc32 = sctp_update_cksum((u8 *) iter->data,
75 skb_headlen(iter), crc32);
76 sctph->checksum = sctp_end_cksum(crc32);
77
78 skb->ip_summed = CHECKSUM_UNNECESSARY; 70 skb->ip_summed = CHECKSUM_UNNECESSARY;
79} 71}
80 72
@@ -151,10 +143,7 @@ sctp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp)
151{ 143{
152 unsigned int sctphoff; 144 unsigned int sctphoff;
153 struct sctphdr *sh, _sctph; 145 struct sctphdr *sh, _sctph;
154 struct sk_buff *iter; 146 __le32 cmp, val;
155 __le32 cmp;
156 __le32 val;
157 __u32 tmp;
158 147
159#ifdef CONFIG_IP_VS_IPV6 148#ifdef CONFIG_IP_VS_IPV6
160 if (af == AF_INET6) 149 if (af == AF_INET6)
@@ -168,13 +157,7 @@ sctp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp)
168 return 0; 157 return 0;
169 158
170 cmp = sh->checksum; 159 cmp = sh->checksum;
171 160 val = sctp_compute_cksum(skb, sctphoff);
172 tmp = sctp_start_cksum((__u8 *) sh, skb_headlen(skb));
173 skb_walk_frags(skb, iter)
174 tmp = sctp_update_cksum((__u8 *) iter->data,
175 skb_headlen(iter), tmp);
176
177 val = sctp_end_cksum(tmp);
178 161
179 if (val != cmp) { 162 if (val != cmp) {
180 /* CRC failure, dump it. */ 163 /* CRC failure, dump it. */
diff --git a/net/netfilter/ipvs/ip_vs_sh.c b/net/netfilter/ipvs/ip_vs_sh.c
index f16c027df15b..3588faebe529 100644
--- a/net/netfilter/ipvs/ip_vs_sh.c
+++ b/net/netfilter/ipvs/ip_vs_sh.c
@@ -269,14 +269,20 @@ ip_vs_sh_get_port(const struct sk_buff *skb, struct ip_vs_iphdr *iph)
269 switch (iph->protocol) { 269 switch (iph->protocol) {
270 case IPPROTO_TCP: 270 case IPPROTO_TCP:
271 th = skb_header_pointer(skb, iph->len, sizeof(_tcph), &_tcph); 271 th = skb_header_pointer(skb, iph->len, sizeof(_tcph), &_tcph);
272 if (unlikely(th == NULL))
273 return 0;
272 port = th->source; 274 port = th->source;
273 break; 275 break;
274 case IPPROTO_UDP: 276 case IPPROTO_UDP:
275 uh = skb_header_pointer(skb, iph->len, sizeof(_udph), &_udph); 277 uh = skb_header_pointer(skb, iph->len, sizeof(_udph), &_udph);
278 if (unlikely(uh == NULL))
279 return 0;
276 port = uh->source; 280 port = uh->source;
277 break; 281 break;
278 case IPPROTO_SCTP: 282 case IPPROTO_SCTP:
279 sh = skb_header_pointer(skb, iph->len, sizeof(_sctph), &_sctph); 283 sh = skb_header_pointer(skb, iph->len, sizeof(_sctph), &_sctph);
284 if (unlikely(sh == NULL))
285 return 0;
280 port = sh->source; 286 port = sh->source;
281 break; 287 break;
282 default: 288 default:
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 0283baedcdfb..5d892febd64c 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -39,6 +39,7 @@
39#include <net/netfilter/nf_conntrack_l4proto.h> 39#include <net/netfilter/nf_conntrack_l4proto.h>
40#include <net/netfilter/nf_conntrack_expect.h> 40#include <net/netfilter/nf_conntrack_expect.h>
41#include <net/netfilter/nf_conntrack_helper.h> 41#include <net/netfilter/nf_conntrack_helper.h>
42#include <net/netfilter/nf_conntrack_seqadj.h>
42#include <net/netfilter/nf_conntrack_core.h> 43#include <net/netfilter/nf_conntrack_core.h>
43#include <net/netfilter/nf_conntrack_extend.h> 44#include <net/netfilter/nf_conntrack_extend.h>
44#include <net/netfilter/nf_conntrack_acct.h> 45#include <net/netfilter/nf_conntrack_acct.h>
@@ -47,6 +48,7 @@
47#include <net/netfilter/nf_conntrack_timestamp.h> 48#include <net/netfilter/nf_conntrack_timestamp.h>
48#include <net/netfilter/nf_conntrack_timeout.h> 49#include <net/netfilter/nf_conntrack_timeout.h>
49#include <net/netfilter/nf_conntrack_labels.h> 50#include <net/netfilter/nf_conntrack_labels.h>
51#include <net/netfilter/nf_conntrack_synproxy.h>
50#include <net/netfilter/nf_nat.h> 52#include <net/netfilter/nf_nat.h>
51#include <net/netfilter/nf_nat_core.h> 53#include <net/netfilter/nf_nat_core.h>
52#include <net/netfilter/nf_nat_helper.h> 54#include <net/netfilter/nf_nat_helper.h>
@@ -238,7 +240,7 @@ destroy_conntrack(struct nf_conntrack *nfct)
238 nf_conntrack_free(ct); 240 nf_conntrack_free(ct);
239} 241}
240 242
241void nf_ct_delete_from_lists(struct nf_conn *ct) 243static void nf_ct_delete_from_lists(struct nf_conn *ct)
242{ 244{
243 struct net *net = nf_ct_net(ct); 245 struct net *net = nf_ct_net(ct);
244 246
@@ -253,7 +255,6 @@ void nf_ct_delete_from_lists(struct nf_conn *ct)
253 &net->ct.dying); 255 &net->ct.dying);
254 spin_unlock_bh(&nf_conntrack_lock); 256 spin_unlock_bh(&nf_conntrack_lock);
255} 257}
256EXPORT_SYMBOL_GPL(nf_ct_delete_from_lists);
257 258
258static void death_by_event(unsigned long ul_conntrack) 259static void death_by_event(unsigned long ul_conntrack)
259{ 260{
@@ -275,7 +276,7 @@ static void death_by_event(unsigned long ul_conntrack)
275 nf_ct_put(ct); 276 nf_ct_put(ct);
276} 277}
277 278
278void nf_ct_dying_timeout(struct nf_conn *ct) 279static void nf_ct_dying_timeout(struct nf_conn *ct)
279{ 280{
280 struct net *net = nf_ct_net(ct); 281 struct net *net = nf_ct_net(ct);
281 struct nf_conntrack_ecache *ecache = nf_ct_ecache_find(ct); 282 struct nf_conntrack_ecache *ecache = nf_ct_ecache_find(ct);
@@ -288,27 +289,33 @@ void nf_ct_dying_timeout(struct nf_conn *ct)
288 (prandom_u32() % net->ct.sysctl_events_retry_timeout); 289 (prandom_u32() % net->ct.sysctl_events_retry_timeout);
289 add_timer(&ecache->timeout); 290 add_timer(&ecache->timeout);
290} 291}
291EXPORT_SYMBOL_GPL(nf_ct_dying_timeout);
292 292
293static void death_by_timeout(unsigned long ul_conntrack) 293bool nf_ct_delete(struct nf_conn *ct, u32 portid, int report)
294{ 294{
295 struct nf_conn *ct = (void *)ul_conntrack;
296 struct nf_conn_tstamp *tstamp; 295 struct nf_conn_tstamp *tstamp;
297 296
298 tstamp = nf_conn_tstamp_find(ct); 297 tstamp = nf_conn_tstamp_find(ct);
299 if (tstamp && tstamp->stop == 0) 298 if (tstamp && tstamp->stop == 0)
300 tstamp->stop = ktime_to_ns(ktime_get_real()); 299 tstamp->stop = ktime_to_ns(ktime_get_real());
301 300
302 if (!test_bit(IPS_DYING_BIT, &ct->status) && 301 if (!nf_ct_is_dying(ct) &&
303 unlikely(nf_conntrack_event(IPCT_DESTROY, ct) < 0)) { 302 unlikely(nf_conntrack_event_report(IPCT_DESTROY, ct,
303 portid, report) < 0)) {
304 /* destroy event was not delivered */ 304 /* destroy event was not delivered */
305 nf_ct_delete_from_lists(ct); 305 nf_ct_delete_from_lists(ct);
306 nf_ct_dying_timeout(ct); 306 nf_ct_dying_timeout(ct);
307 return; 307 return false;
308 } 308 }
309 set_bit(IPS_DYING_BIT, &ct->status); 309 set_bit(IPS_DYING_BIT, &ct->status);
310 nf_ct_delete_from_lists(ct); 310 nf_ct_delete_from_lists(ct);
311 nf_ct_put(ct); 311 nf_ct_put(ct);
312 return true;
313}
314EXPORT_SYMBOL_GPL(nf_ct_delete);
315
316static void death_by_timeout(unsigned long ul_conntrack)
317{
318 nf_ct_delete((struct nf_conn *)ul_conntrack, 0, 0);
312} 319}
313 320
314/* 321/*
@@ -643,10 +650,7 @@ static noinline int early_drop(struct net *net, unsigned int hash)
643 return dropped; 650 return dropped;
644 651
645 if (del_timer(&ct->timeout)) { 652 if (del_timer(&ct->timeout)) {
646 death_by_timeout((unsigned long)ct); 653 if (nf_ct_delete(ct, 0, 0)) {
647 /* Check if we indeed killed this entry. Reliable event
648 delivery may have inserted it into the dying list. */
649 if (test_bit(IPS_DYING_BIT, &ct->status)) {
650 dropped = 1; 654 dropped = 1;
651 NF_CT_STAT_INC_ATOMIC(net, early_drop); 655 NF_CT_STAT_INC_ATOMIC(net, early_drop);
652 } 656 }
@@ -796,6 +800,11 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
796 if (IS_ERR(ct)) 800 if (IS_ERR(ct))
797 return (struct nf_conntrack_tuple_hash *)ct; 801 return (struct nf_conntrack_tuple_hash *)ct;
798 802
803 if (tmpl && nfct_synproxy(tmpl)) {
804 nfct_seqadj_ext_add(ct);
805 nfct_synproxy_ext_add(ct);
806 }
807
799 timeout_ext = tmpl ? nf_ct_timeout_find(tmpl) : NULL; 808 timeout_ext = tmpl ? nf_ct_timeout_find(tmpl) : NULL;
800 if (timeout_ext) 809 if (timeout_ext)
801 timeouts = NF_CT_TIMEOUT_EXT_DATA(timeout_ext); 810 timeouts = NF_CT_TIMEOUT_EXT_DATA(timeout_ext);
@@ -1192,7 +1201,7 @@ EXPORT_SYMBOL_GPL(nf_ct_port_nlattr_tuple_size);
1192#endif 1201#endif
1193 1202
1194/* Used by ipt_REJECT and ip6t_REJECT. */ 1203/* Used by ipt_REJECT and ip6t_REJECT. */
1195static void nf_conntrack_attach(struct sk_buff *nskb, struct sk_buff *skb) 1204static void nf_conntrack_attach(struct sk_buff *nskb, const struct sk_buff *skb)
1196{ 1205{
1197 struct nf_conn *ct; 1206 struct nf_conn *ct;
1198 enum ip_conntrack_info ctinfo; 1207 enum ip_conntrack_info ctinfo;
@@ -1244,7 +1253,7 @@ found:
1244 1253
1245void nf_ct_iterate_cleanup(struct net *net, 1254void nf_ct_iterate_cleanup(struct net *net,
1246 int (*iter)(struct nf_conn *i, void *data), 1255 int (*iter)(struct nf_conn *i, void *data),
1247 void *data) 1256 void *data, u32 portid, int report)
1248{ 1257{
1249 struct nf_conn *ct; 1258 struct nf_conn *ct;
1250 unsigned int bucket = 0; 1259 unsigned int bucket = 0;
@@ -1252,7 +1261,8 @@ void nf_ct_iterate_cleanup(struct net *net,
1252 while ((ct = get_next_corpse(net, iter, data, &bucket)) != NULL) { 1261 while ((ct = get_next_corpse(net, iter, data, &bucket)) != NULL) {
1253 /* Time to push up daises... */ 1262 /* Time to push up daises... */
1254 if (del_timer(&ct->timeout)) 1263 if (del_timer(&ct->timeout))
1255 death_by_timeout((unsigned long)ct); 1264 nf_ct_delete(ct, portid, report);
1265
1256 /* ... else the timer will get him soon. */ 1266 /* ... else the timer will get him soon. */
1257 1267
1258 nf_ct_put(ct); 1268 nf_ct_put(ct);
@@ -1260,30 +1270,6 @@ void nf_ct_iterate_cleanup(struct net *net,
1260} 1270}
1261EXPORT_SYMBOL_GPL(nf_ct_iterate_cleanup); 1271EXPORT_SYMBOL_GPL(nf_ct_iterate_cleanup);
1262 1272
1263struct __nf_ct_flush_report {
1264 u32 portid;
1265 int report;
1266};
1267
1268static int kill_report(struct nf_conn *i, void *data)
1269{
1270 struct __nf_ct_flush_report *fr = (struct __nf_ct_flush_report *)data;
1271 struct nf_conn_tstamp *tstamp;
1272
1273 tstamp = nf_conn_tstamp_find(i);
1274 if (tstamp && tstamp->stop == 0)
1275 tstamp->stop = ktime_to_ns(ktime_get_real());
1276
1277 /* If we fail to deliver the event, death_by_timeout() will retry */
1278 if (nf_conntrack_event_report(IPCT_DESTROY, i,
1279 fr->portid, fr->report) < 0)
1280 return 1;
1281
1282 /* Avoid the delivery of the destroy event in death_by_timeout(). */
1283 set_bit(IPS_DYING_BIT, &i->status);
1284 return 1;
1285}
1286
1287static int kill_all(struct nf_conn *i, void *data) 1273static int kill_all(struct nf_conn *i, void *data)
1288{ 1274{
1289 return 1; 1275 return 1;
@@ -1301,11 +1287,7 @@ EXPORT_SYMBOL_GPL(nf_ct_free_hashtable);
1301 1287
1302void nf_conntrack_flush_report(struct net *net, u32 portid, int report) 1288void nf_conntrack_flush_report(struct net *net, u32 portid, int report)
1303{ 1289{
1304 struct __nf_ct_flush_report fr = { 1290 nf_ct_iterate_cleanup(net, kill_all, NULL, portid, report);
1305 .portid = portid,
1306 .report = report,
1307 };
1308 nf_ct_iterate_cleanup(net, kill_report, &fr);
1309} 1291}
1310EXPORT_SYMBOL_GPL(nf_conntrack_flush_report); 1292EXPORT_SYMBOL_GPL(nf_conntrack_flush_report);
1311 1293
@@ -1351,6 +1333,7 @@ void nf_conntrack_cleanup_end(void)
1351 nf_ct_extend_unregister(&nf_ct_zone_extend); 1333 nf_ct_extend_unregister(&nf_ct_zone_extend);
1352#endif 1334#endif
1353 nf_conntrack_proto_fini(); 1335 nf_conntrack_proto_fini();
1336 nf_conntrack_seqadj_fini();
1354 nf_conntrack_labels_fini(); 1337 nf_conntrack_labels_fini();
1355 nf_conntrack_helper_fini(); 1338 nf_conntrack_helper_fini();
1356 nf_conntrack_timeout_fini(); 1339 nf_conntrack_timeout_fini();
@@ -1386,7 +1369,7 @@ void nf_conntrack_cleanup_net_list(struct list_head *net_exit_list)
1386i_see_dead_people: 1369i_see_dead_people:
1387 busy = 0; 1370 busy = 0;
1388 list_for_each_entry(net, net_exit_list, exit_list) { 1371 list_for_each_entry(net, net_exit_list, exit_list) {
1389 nf_ct_iterate_cleanup(net, kill_all, NULL); 1372 nf_ct_iterate_cleanup(net, kill_all, NULL, 0, 0);
1390 nf_ct_release_dying_list(net); 1373 nf_ct_release_dying_list(net);
1391 if (atomic_read(&net->ct.count) != 0) 1374 if (atomic_read(&net->ct.count) != 0)
1392 busy = 1; 1375 busy = 1;
@@ -1556,6 +1539,10 @@ int nf_conntrack_init_start(void)
1556 if (ret < 0) 1539 if (ret < 0)
1557 goto err_labels; 1540 goto err_labels;
1558 1541
1542 ret = nf_conntrack_seqadj_init();
1543 if (ret < 0)
1544 goto err_seqadj;
1545
1559#ifdef CONFIG_NF_CONNTRACK_ZONES 1546#ifdef CONFIG_NF_CONNTRACK_ZONES
1560 ret = nf_ct_extend_register(&nf_ct_zone_extend); 1547 ret = nf_ct_extend_register(&nf_ct_zone_extend);
1561 if (ret < 0) 1548 if (ret < 0)
@@ -1580,6 +1567,8 @@ err_proto:
1580 nf_ct_extend_unregister(&nf_ct_zone_extend); 1567 nf_ct_extend_unregister(&nf_ct_zone_extend);
1581err_extend: 1568err_extend:
1582#endif 1569#endif
1570 nf_conntrack_seqadj_fini();
1571err_seqadj:
1583 nf_conntrack_labels_fini(); 1572 nf_conntrack_labels_fini();
1584err_labels: 1573err_labels:
1585 nf_conntrack_helper_fini(); 1574 nf_conntrack_helper_fini();
@@ -1602,9 +1591,6 @@ void nf_conntrack_init_end(void)
1602 /* For use by REJECT target */ 1591 /* For use by REJECT target */
1603 RCU_INIT_POINTER(ip_ct_attach, nf_conntrack_attach); 1592 RCU_INIT_POINTER(ip_ct_attach, nf_conntrack_attach);
1604 RCU_INIT_POINTER(nf_ct_destroy, destroy_conntrack); 1593 RCU_INIT_POINTER(nf_ct_destroy, destroy_conntrack);
1605
1606 /* Howto get NAT offsets */
1607 RCU_INIT_POINTER(nf_ct_nat_offset, NULL);
1608} 1594}
1609 1595
1610/* 1596/*
@@ -1691,8 +1677,3 @@ err_slabname:
1691err_stat: 1677err_stat:
1692 return ret; 1678 return ret;
1693} 1679}
1694
1695s16 (*nf_ct_nat_offset)(const struct nf_conn *ct,
1696 enum ip_conntrack_dir dir,
1697 u32 seq);
1698EXPORT_SYMBOL_GPL(nf_ct_nat_offset);
diff --git a/net/netfilter/nf_conntrack_labels.c b/net/netfilter/nf_conntrack_labels.c
index 355d2ef08094..bb53f120e79c 100644
--- a/net/netfilter/nf_conntrack_labels.c
+++ b/net/netfilter/nf_conntrack_labels.c
@@ -8,12 +8,8 @@
8 * published by the Free Software Foundation. 8 * published by the Free Software Foundation.
9 */ 9 */
10 10
11#include <linux/ctype.h>
12#include <linux/export.h> 11#include <linux/export.h>
13#include <linux/jhash.h>
14#include <linux/spinlock.h>
15#include <linux/types.h> 12#include <linux/types.h>
16#include <linux/slab.h>
17 13
18#include <net/netfilter/nf_conntrack_ecache.h> 14#include <net/netfilter/nf_conntrack_ecache.h>
19#include <net/netfilter/nf_conntrack_labels.h> 15#include <net/netfilter/nf_conntrack_labels.h>
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index edc410e778f7..eea936b70d15 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -37,6 +37,7 @@
37#include <net/netfilter/nf_conntrack_core.h> 37#include <net/netfilter/nf_conntrack_core.h>
38#include <net/netfilter/nf_conntrack_expect.h> 38#include <net/netfilter/nf_conntrack_expect.h>
39#include <net/netfilter/nf_conntrack_helper.h> 39#include <net/netfilter/nf_conntrack_helper.h>
40#include <net/netfilter/nf_conntrack_seqadj.h>
40#include <net/netfilter/nf_conntrack_l3proto.h> 41#include <net/netfilter/nf_conntrack_l3proto.h>
41#include <net/netfilter/nf_conntrack_l4proto.h> 42#include <net/netfilter/nf_conntrack_l4proto.h>
42#include <net/netfilter/nf_conntrack_tuple.h> 43#include <net/netfilter/nf_conntrack_tuple.h>
@@ -381,9 +382,8 @@ nla_put_failure:
381 return -1; 382 return -1;
382} 383}
383 384
384#ifdef CONFIG_NF_NAT_NEEDED
385static int 385static int
386dump_nat_seq_adj(struct sk_buff *skb, const struct nf_nat_seq *natseq, int type) 386dump_ct_seq_adj(struct sk_buff *skb, const struct nf_ct_seqadj *seq, int type)
387{ 387{
388 struct nlattr *nest_parms; 388 struct nlattr *nest_parms;
389 389
@@ -391,12 +391,12 @@ dump_nat_seq_adj(struct sk_buff *skb, const struct nf_nat_seq *natseq, int type)
391 if (!nest_parms) 391 if (!nest_parms)
392 goto nla_put_failure; 392 goto nla_put_failure;
393 393
394 if (nla_put_be32(skb, CTA_NAT_SEQ_CORRECTION_POS, 394 if (nla_put_be32(skb, CTA_SEQADJ_CORRECTION_POS,
395 htonl(natseq->correction_pos)) || 395 htonl(seq->correction_pos)) ||
396 nla_put_be32(skb, CTA_NAT_SEQ_OFFSET_BEFORE, 396 nla_put_be32(skb, CTA_SEQADJ_OFFSET_BEFORE,
397 htonl(natseq->offset_before)) || 397 htonl(seq->offset_before)) ||
398 nla_put_be32(skb, CTA_NAT_SEQ_OFFSET_AFTER, 398 nla_put_be32(skb, CTA_SEQADJ_OFFSET_AFTER,
399 htonl(natseq->offset_after))) 399 htonl(seq->offset_after)))
400 goto nla_put_failure; 400 goto nla_put_failure;
401 401
402 nla_nest_end(skb, nest_parms); 402 nla_nest_end(skb, nest_parms);
@@ -408,27 +408,24 @@ nla_put_failure:
408} 408}
409 409
410static inline int 410static inline int
411ctnetlink_dump_nat_seq_adj(struct sk_buff *skb, const struct nf_conn *ct) 411ctnetlink_dump_ct_seq_adj(struct sk_buff *skb, const struct nf_conn *ct)
412{ 412{
413 struct nf_nat_seq *natseq; 413 struct nf_conn_seqadj *seqadj = nfct_seqadj(ct);
414 struct nf_conn_nat *nat = nfct_nat(ct); 414 struct nf_ct_seqadj *seq;
415 415
416 if (!(ct->status & IPS_SEQ_ADJUST) || !nat) 416 if (!(ct->status & IPS_SEQ_ADJUST) || !seqadj)
417 return 0; 417 return 0;
418 418
419 natseq = &nat->seq[IP_CT_DIR_ORIGINAL]; 419 seq = &seqadj->seq[IP_CT_DIR_ORIGINAL];
420 if (dump_nat_seq_adj(skb, natseq, CTA_NAT_SEQ_ADJ_ORIG) == -1) 420 if (dump_ct_seq_adj(skb, seq, CTA_SEQ_ADJ_ORIG) == -1)
421 return -1; 421 return -1;
422 422
423 natseq = &nat->seq[IP_CT_DIR_REPLY]; 423 seq = &seqadj->seq[IP_CT_DIR_REPLY];
424 if (dump_nat_seq_adj(skb, natseq, CTA_NAT_SEQ_ADJ_REPLY) == -1) 424 if (dump_ct_seq_adj(skb, seq, CTA_SEQ_ADJ_REPLY) == -1)
425 return -1; 425 return -1;
426 426
427 return 0; 427 return 0;
428} 428}
429#else
430#define ctnetlink_dump_nat_seq_adj(a, b) (0)
431#endif
432 429
433static inline int 430static inline int
434ctnetlink_dump_id(struct sk_buff *skb, const struct nf_conn *ct) 431ctnetlink_dump_id(struct sk_buff *skb, const struct nf_conn *ct)
@@ -502,7 +499,7 @@ ctnetlink_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type,
502 ctnetlink_dump_id(skb, ct) < 0 || 499 ctnetlink_dump_id(skb, ct) < 0 ||
503 ctnetlink_dump_use(skb, ct) < 0 || 500 ctnetlink_dump_use(skb, ct) < 0 ||
504 ctnetlink_dump_master(skb, ct) < 0 || 501 ctnetlink_dump_master(skb, ct) < 0 ||
505 ctnetlink_dump_nat_seq_adj(skb, ct) < 0) 502 ctnetlink_dump_ct_seq_adj(skb, ct) < 0)
506 goto nla_put_failure; 503 goto nla_put_failure;
507 504
508 nlmsg_end(skb, nlh); 505 nlmsg_end(skb, nlh);
@@ -707,8 +704,8 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item)
707 ctnetlink_dump_master(skb, ct) < 0) 704 ctnetlink_dump_master(skb, ct) < 0)
708 goto nla_put_failure; 705 goto nla_put_failure;
709 706
710 if (events & (1 << IPCT_NATSEQADJ) && 707 if (events & (1 << IPCT_SEQADJ) &&
711 ctnetlink_dump_nat_seq_adj(skb, ct) < 0) 708 ctnetlink_dump_ct_seq_adj(skb, ct) < 0)
712 goto nla_put_failure; 709 goto nla_put_failure;
713 } 710 }
714 711
@@ -1038,21 +1035,9 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb,
1038 } 1035 }
1039 } 1036 }
1040 1037
1041 if (del_timer(&ct->timeout)) { 1038 if (del_timer(&ct->timeout))
1042 if (nf_conntrack_event_report(IPCT_DESTROY, ct, 1039 nf_ct_delete(ct, NETLINK_CB(skb).portid, nlmsg_report(nlh));
1043 NETLINK_CB(skb).portid, 1040
1044 nlmsg_report(nlh)) < 0) {
1045 nf_ct_delete_from_lists(ct);
1046 /* we failed to report the event, try later */
1047 nf_ct_dying_timeout(ct);
1048 nf_ct_put(ct);
1049 return 0;
1050 }
1051 /* death_by_timeout would report the event again */
1052 set_bit(IPS_DYING_BIT, &ct->status);
1053 nf_ct_delete_from_lists(ct);
1054 nf_ct_put(ct);
1055 }
1056 nf_ct_put(ct); 1041 nf_ct_put(ct);
1057 1042
1058 return 0; 1043 return 0;
@@ -1451,66 +1436,65 @@ ctnetlink_change_protoinfo(struct nf_conn *ct, const struct nlattr * const cda[]
1451 return err; 1436 return err;
1452} 1437}
1453 1438
1454#ifdef CONFIG_NF_NAT_NEEDED 1439static const struct nla_policy seqadj_policy[CTA_SEQADJ_MAX+1] = {
1455static const struct nla_policy nat_seq_policy[CTA_NAT_SEQ_MAX+1] = { 1440 [CTA_SEQADJ_CORRECTION_POS] = { .type = NLA_U32 },
1456 [CTA_NAT_SEQ_CORRECTION_POS] = { .type = NLA_U32 }, 1441 [CTA_SEQADJ_OFFSET_BEFORE] = { .type = NLA_U32 },
1457 [CTA_NAT_SEQ_OFFSET_BEFORE] = { .type = NLA_U32 }, 1442 [CTA_SEQADJ_OFFSET_AFTER] = { .type = NLA_U32 },
1458 [CTA_NAT_SEQ_OFFSET_AFTER] = { .type = NLA_U32 },
1459}; 1443};
1460 1444
1461static inline int 1445static inline int
1462change_nat_seq_adj(struct nf_nat_seq *natseq, const struct nlattr * const attr) 1446change_seq_adj(struct nf_ct_seqadj *seq, const struct nlattr * const attr)
1463{ 1447{
1464 int err; 1448 int err;
1465 struct nlattr *cda[CTA_NAT_SEQ_MAX+1]; 1449 struct nlattr *cda[CTA_SEQADJ_MAX+1];
1466 1450
1467 err = nla_parse_nested(cda, CTA_NAT_SEQ_MAX, attr, nat_seq_policy); 1451 err = nla_parse_nested(cda, CTA_SEQADJ_MAX, attr, seqadj_policy);
1468 if (err < 0) 1452 if (err < 0)
1469 return err; 1453 return err;
1470 1454
1471 if (!cda[CTA_NAT_SEQ_CORRECTION_POS]) 1455 if (!cda[CTA_SEQADJ_CORRECTION_POS])
1472 return -EINVAL; 1456 return -EINVAL;
1473 1457
1474 natseq->correction_pos = 1458 seq->correction_pos =
1475 ntohl(nla_get_be32(cda[CTA_NAT_SEQ_CORRECTION_POS])); 1459 ntohl(nla_get_be32(cda[CTA_SEQADJ_CORRECTION_POS]));
1476 1460
1477 if (!cda[CTA_NAT_SEQ_OFFSET_BEFORE]) 1461 if (!cda[CTA_SEQADJ_OFFSET_BEFORE])
1478 return -EINVAL; 1462 return -EINVAL;
1479 1463
1480 natseq->offset_before = 1464 seq->offset_before =
1481 ntohl(nla_get_be32(cda[CTA_NAT_SEQ_OFFSET_BEFORE])); 1465 ntohl(nla_get_be32(cda[CTA_SEQADJ_OFFSET_BEFORE]));
1482 1466
1483 if (!cda[CTA_NAT_SEQ_OFFSET_AFTER]) 1467 if (!cda[CTA_SEQADJ_OFFSET_AFTER])
1484 return -EINVAL; 1468 return -EINVAL;
1485 1469
1486 natseq->offset_after = 1470 seq->offset_after =
1487 ntohl(nla_get_be32(cda[CTA_NAT_SEQ_OFFSET_AFTER])); 1471 ntohl(nla_get_be32(cda[CTA_SEQADJ_OFFSET_AFTER]));
1488 1472
1489 return 0; 1473 return 0;
1490} 1474}
1491 1475
1492static int 1476static int
1493ctnetlink_change_nat_seq_adj(struct nf_conn *ct, 1477ctnetlink_change_seq_adj(struct nf_conn *ct,
1494 const struct nlattr * const cda[]) 1478 const struct nlattr * const cda[])
1495{ 1479{
1480 struct nf_conn_seqadj *seqadj = nfct_seqadj(ct);
1496 int ret = 0; 1481 int ret = 0;
1497 struct nf_conn_nat *nat = nfct_nat(ct);
1498 1482
1499 if (!nat) 1483 if (!seqadj)
1500 return 0; 1484 return 0;
1501 1485
1502 if (cda[CTA_NAT_SEQ_ADJ_ORIG]) { 1486 if (cda[CTA_SEQ_ADJ_ORIG]) {
1503 ret = change_nat_seq_adj(&nat->seq[IP_CT_DIR_ORIGINAL], 1487 ret = change_seq_adj(&seqadj->seq[IP_CT_DIR_ORIGINAL],
1504 cda[CTA_NAT_SEQ_ADJ_ORIG]); 1488 cda[CTA_SEQ_ADJ_ORIG]);
1505 if (ret < 0) 1489 if (ret < 0)
1506 return ret; 1490 return ret;
1507 1491
1508 ct->status |= IPS_SEQ_ADJUST; 1492 ct->status |= IPS_SEQ_ADJUST;
1509 } 1493 }
1510 1494
1511 if (cda[CTA_NAT_SEQ_ADJ_REPLY]) { 1495 if (cda[CTA_SEQ_ADJ_REPLY]) {
1512 ret = change_nat_seq_adj(&nat->seq[IP_CT_DIR_REPLY], 1496 ret = change_seq_adj(&seqadj->seq[IP_CT_DIR_REPLY],
1513 cda[CTA_NAT_SEQ_ADJ_REPLY]); 1497 cda[CTA_SEQ_ADJ_REPLY]);
1514 if (ret < 0) 1498 if (ret < 0)
1515 return ret; 1499 return ret;
1516 1500
@@ -1519,7 +1503,6 @@ ctnetlink_change_nat_seq_adj(struct nf_conn *ct,
1519 1503
1520 return 0; 1504 return 0;
1521} 1505}
1522#endif
1523 1506
1524static int 1507static int
1525ctnetlink_attach_labels(struct nf_conn *ct, const struct nlattr * const cda[]) 1508ctnetlink_attach_labels(struct nf_conn *ct, const struct nlattr * const cda[])
@@ -1585,13 +1568,12 @@ ctnetlink_change_conntrack(struct nf_conn *ct,
1585 ct->mark = ntohl(nla_get_be32(cda[CTA_MARK])); 1568 ct->mark = ntohl(nla_get_be32(cda[CTA_MARK]));
1586#endif 1569#endif
1587 1570
1588#ifdef CONFIG_NF_NAT_NEEDED 1571 if (cda[CTA_SEQ_ADJ_ORIG] || cda[CTA_SEQ_ADJ_REPLY]) {
1589 if (cda[CTA_NAT_SEQ_ADJ_ORIG] || cda[CTA_NAT_SEQ_ADJ_REPLY]) { 1572 err = ctnetlink_change_seq_adj(ct, cda);
1590 err = ctnetlink_change_nat_seq_adj(ct, cda);
1591 if (err < 0) 1573 if (err < 0)
1592 return err; 1574 return err;
1593 } 1575 }
1594#endif 1576
1595 if (cda[CTA_LABELS]) { 1577 if (cda[CTA_LABELS]) {
1596 err = ctnetlink_attach_labels(ct, cda); 1578 err = ctnetlink_attach_labels(ct, cda);
1597 if (err < 0) 1579 if (err < 0)
@@ -1696,13 +1678,11 @@ ctnetlink_create_conntrack(struct net *net, u16 zone,
1696 goto err2; 1678 goto err2;
1697 } 1679 }
1698 1680
1699#ifdef CONFIG_NF_NAT_NEEDED 1681 if (cda[CTA_SEQ_ADJ_ORIG] || cda[CTA_SEQ_ADJ_REPLY]) {
1700 if (cda[CTA_NAT_SEQ_ADJ_ORIG] || cda[CTA_NAT_SEQ_ADJ_REPLY]) { 1682 err = ctnetlink_change_seq_adj(ct, cda);
1701 err = ctnetlink_change_nat_seq_adj(ct, cda);
1702 if (err < 0) 1683 if (err < 0)
1703 goto err2; 1684 goto err2;
1704 } 1685 }
1705#endif
1706 1686
1707 memset(&ct->proto, 0, sizeof(ct->proto)); 1687 memset(&ct->proto, 0, sizeof(ct->proto));
1708 if (cda[CTA_PROTOINFO]) { 1688 if (cda[CTA_PROTOINFO]) {
@@ -1816,7 +1796,7 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb,
1816 (1 << IPCT_ASSURED) | 1796 (1 << IPCT_ASSURED) |
1817 (1 << IPCT_HELPER) | 1797 (1 << IPCT_HELPER) |
1818 (1 << IPCT_PROTOINFO) | 1798 (1 << IPCT_PROTOINFO) |
1819 (1 << IPCT_NATSEQADJ) | 1799 (1 << IPCT_SEQADJ) |
1820 (1 << IPCT_MARK) | events, 1800 (1 << IPCT_MARK) | events,
1821 ct, NETLINK_CB(skb).portid, 1801 ct, NETLINK_CB(skb).portid,
1822 nlmsg_report(nlh)); 1802 nlmsg_report(nlh));
@@ -1839,7 +1819,7 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb,
1839 (1 << IPCT_HELPER) | 1819 (1 << IPCT_HELPER) |
1840 (1 << IPCT_LABEL) | 1820 (1 << IPCT_LABEL) |
1841 (1 << IPCT_PROTOINFO) | 1821 (1 << IPCT_PROTOINFO) |
1842 (1 << IPCT_NATSEQADJ) | 1822 (1 << IPCT_SEQADJ) |
1843 (1 << IPCT_MARK), 1823 (1 << IPCT_MARK),
1844 ct, NETLINK_CB(skb).portid, 1824 ct, NETLINK_CB(skb).portid,
1845 nlmsg_report(nlh)); 1825 nlmsg_report(nlh));
@@ -1999,6 +1979,27 @@ out:
1999 return err == -EAGAIN ? -ENOBUFS : err; 1979 return err == -EAGAIN ? -ENOBUFS : err;
2000} 1980}
2001 1981
1982static const struct nla_policy exp_nla_policy[CTA_EXPECT_MAX+1] = {
1983 [CTA_EXPECT_MASTER] = { .type = NLA_NESTED },
1984 [CTA_EXPECT_TUPLE] = { .type = NLA_NESTED },
1985 [CTA_EXPECT_MASK] = { .type = NLA_NESTED },
1986 [CTA_EXPECT_TIMEOUT] = { .type = NLA_U32 },
1987 [CTA_EXPECT_ID] = { .type = NLA_U32 },
1988 [CTA_EXPECT_HELP_NAME] = { .type = NLA_NUL_STRING,
1989 .len = NF_CT_HELPER_NAME_LEN - 1 },
1990 [CTA_EXPECT_ZONE] = { .type = NLA_U16 },
1991 [CTA_EXPECT_FLAGS] = { .type = NLA_U32 },
1992 [CTA_EXPECT_CLASS] = { .type = NLA_U32 },
1993 [CTA_EXPECT_NAT] = { .type = NLA_NESTED },
1994 [CTA_EXPECT_FN] = { .type = NLA_NUL_STRING },
1995};
1996
1997static struct nf_conntrack_expect *
1998ctnetlink_alloc_expect(const struct nlattr *const cda[], struct nf_conn *ct,
1999 struct nf_conntrack_helper *helper,
2000 struct nf_conntrack_tuple *tuple,
2001 struct nf_conntrack_tuple *mask);
2002
2002#ifdef CONFIG_NETFILTER_NETLINK_QUEUE_CT 2003#ifdef CONFIG_NETFILTER_NETLINK_QUEUE_CT
2003static size_t 2004static size_t
2004ctnetlink_nfqueue_build_size(const struct nf_conn *ct) 2005ctnetlink_nfqueue_build_size(const struct nf_conn *ct)
@@ -2073,7 +2074,7 @@ ctnetlink_nfqueue_build(struct sk_buff *skb, struct nf_conn *ct)
2073 goto nla_put_failure; 2074 goto nla_put_failure;
2074 2075
2075 if ((ct->status & IPS_SEQ_ADJUST) && 2076 if ((ct->status & IPS_SEQ_ADJUST) &&
2076 ctnetlink_dump_nat_seq_adj(skb, ct) < 0) 2077 ctnetlink_dump_ct_seq_adj(skb, ct) < 0)
2077 goto nla_put_failure; 2078 goto nla_put_failure;
2078 2079
2079#ifdef CONFIG_NF_CONNTRACK_MARK 2080#ifdef CONFIG_NF_CONNTRACK_MARK
@@ -2139,10 +2140,70 @@ ctnetlink_nfqueue_parse(const struct nlattr *attr, struct nf_conn *ct)
2139 return ret; 2140 return ret;
2140} 2141}
2141 2142
2143static int ctnetlink_nfqueue_exp_parse(const struct nlattr * const *cda,
2144 const struct nf_conn *ct,
2145 struct nf_conntrack_tuple *tuple,
2146 struct nf_conntrack_tuple *mask)
2147{
2148 int err;
2149
2150 err = ctnetlink_parse_tuple(cda, tuple, CTA_EXPECT_TUPLE,
2151 nf_ct_l3num(ct));
2152 if (err < 0)
2153 return err;
2154
2155 return ctnetlink_parse_tuple(cda, mask, CTA_EXPECT_MASK,
2156 nf_ct_l3num(ct));
2157}
2158
2159static int
2160ctnetlink_nfqueue_attach_expect(const struct nlattr *attr, struct nf_conn *ct,
2161 u32 portid, u32 report)
2162{
2163 struct nlattr *cda[CTA_EXPECT_MAX+1];
2164 struct nf_conntrack_tuple tuple, mask;
2165 struct nf_conntrack_helper *helper = NULL;
2166 struct nf_conntrack_expect *exp;
2167 int err;
2168
2169 err = nla_parse_nested(cda, CTA_EXPECT_MAX, attr, exp_nla_policy);
2170 if (err < 0)
2171 return err;
2172
2173 err = ctnetlink_nfqueue_exp_parse((const struct nlattr * const *)cda,
2174 ct, &tuple, &mask);
2175 if (err < 0)
2176 return err;
2177
2178 if (cda[CTA_EXPECT_HELP_NAME]) {
2179 const char *helpname = nla_data(cda[CTA_EXPECT_HELP_NAME]);
2180
2181 helper = __nf_conntrack_helper_find(helpname, nf_ct_l3num(ct),
2182 nf_ct_protonum(ct));
2183 if (helper == NULL)
2184 return -EOPNOTSUPP;
2185 }
2186
2187 exp = ctnetlink_alloc_expect((const struct nlattr * const *)cda, ct,
2188 helper, &tuple, &mask);
2189 if (IS_ERR(exp))
2190 return PTR_ERR(exp);
2191
2192 err = nf_ct_expect_related_report(exp, portid, report);
2193 if (err < 0) {
2194 nf_ct_expect_put(exp);
2195 return err;
2196 }
2197
2198 return 0;
2199}
2200
2142static struct nfq_ct_hook ctnetlink_nfqueue_hook = { 2201static struct nfq_ct_hook ctnetlink_nfqueue_hook = {
2143 .build_size = ctnetlink_nfqueue_build_size, 2202 .build_size = ctnetlink_nfqueue_build_size,
2144 .build = ctnetlink_nfqueue_build, 2203 .build = ctnetlink_nfqueue_build,
2145 .parse = ctnetlink_nfqueue_parse, 2204 .parse = ctnetlink_nfqueue_parse,
2205 .attach_expect = ctnetlink_nfqueue_attach_expect,
2206 .seq_adjust = nf_ct_tcp_seqadj_set,
2146}; 2207};
2147#endif /* CONFIG_NETFILTER_NETLINK_QUEUE_CT */ 2208#endif /* CONFIG_NETFILTER_NETLINK_QUEUE_CT */
2148 2209
@@ -2510,21 +2571,6 @@ static int ctnetlink_dump_exp_ct(struct sock *ctnl, struct sk_buff *skb,
2510 return err; 2571 return err;
2511} 2572}
2512 2573
2513static const struct nla_policy exp_nla_policy[CTA_EXPECT_MAX+1] = {
2514 [CTA_EXPECT_MASTER] = { .type = NLA_NESTED },
2515 [CTA_EXPECT_TUPLE] = { .type = NLA_NESTED },
2516 [CTA_EXPECT_MASK] = { .type = NLA_NESTED },
2517 [CTA_EXPECT_TIMEOUT] = { .type = NLA_U32 },
2518 [CTA_EXPECT_ID] = { .type = NLA_U32 },
2519 [CTA_EXPECT_HELP_NAME] = { .type = NLA_NUL_STRING,
2520 .len = NF_CT_HELPER_NAME_LEN - 1 },
2521 [CTA_EXPECT_ZONE] = { .type = NLA_U16 },
2522 [CTA_EXPECT_FLAGS] = { .type = NLA_U32 },
2523 [CTA_EXPECT_CLASS] = { .type = NLA_U32 },
2524 [CTA_EXPECT_NAT] = { .type = NLA_NESTED },
2525 [CTA_EXPECT_FN] = { .type = NLA_NUL_STRING },
2526};
2527
2528static int 2574static int
2529ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb, 2575ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb,
2530 const struct nlmsghdr *nlh, 2576 const struct nlmsghdr *nlh,
@@ -2747,76 +2793,26 @@ ctnetlink_parse_expect_nat(const struct nlattr *attr,
2747#endif 2793#endif
2748} 2794}
2749 2795
2750static int 2796static struct nf_conntrack_expect *
2751ctnetlink_create_expect(struct net *net, u16 zone, 2797ctnetlink_alloc_expect(const struct nlattr * const cda[], struct nf_conn *ct,
2752 const struct nlattr * const cda[], 2798 struct nf_conntrack_helper *helper,
2753 u_int8_t u3, 2799 struct nf_conntrack_tuple *tuple,
2754 u32 portid, int report) 2800 struct nf_conntrack_tuple *mask)
2755{ 2801{
2756 struct nf_conntrack_tuple tuple, mask, master_tuple; 2802 u_int32_t class = 0;
2757 struct nf_conntrack_tuple_hash *h = NULL;
2758 struct nf_conntrack_expect *exp; 2803 struct nf_conntrack_expect *exp;
2759 struct nf_conn *ct;
2760 struct nf_conn_help *help; 2804 struct nf_conn_help *help;
2761 struct nf_conntrack_helper *helper = NULL; 2805 int err;
2762 u_int32_t class = 0;
2763 int err = 0;
2764
2765 /* caller guarantees that those three CTA_EXPECT_* exist */
2766 err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE, u3);
2767 if (err < 0)
2768 return err;
2769 err = ctnetlink_parse_tuple(cda, &mask, CTA_EXPECT_MASK, u3);
2770 if (err < 0)
2771 return err;
2772 err = ctnetlink_parse_tuple(cda, &master_tuple, CTA_EXPECT_MASTER, u3);
2773 if (err < 0)
2774 return err;
2775
2776 /* Look for master conntrack of this expectation */
2777 h = nf_conntrack_find_get(net, zone, &master_tuple);
2778 if (!h)
2779 return -ENOENT;
2780 ct = nf_ct_tuplehash_to_ctrack(h);
2781
2782 /* Look for helper of this expectation */
2783 if (cda[CTA_EXPECT_HELP_NAME]) {
2784 const char *helpname = nla_data(cda[CTA_EXPECT_HELP_NAME]);
2785
2786 helper = __nf_conntrack_helper_find(helpname, nf_ct_l3num(ct),
2787 nf_ct_protonum(ct));
2788 if (helper == NULL) {
2789#ifdef CONFIG_MODULES
2790 if (request_module("nfct-helper-%s", helpname) < 0) {
2791 err = -EOPNOTSUPP;
2792 goto out;
2793 }
2794
2795 helper = __nf_conntrack_helper_find(helpname,
2796 nf_ct_l3num(ct),
2797 nf_ct_protonum(ct));
2798 if (helper) {
2799 err = -EAGAIN;
2800 goto out;
2801 }
2802#endif
2803 err = -EOPNOTSUPP;
2804 goto out;
2805 }
2806 }
2807 2806
2808 if (cda[CTA_EXPECT_CLASS] && helper) { 2807 if (cda[CTA_EXPECT_CLASS] && helper) {
2809 class = ntohl(nla_get_be32(cda[CTA_EXPECT_CLASS])); 2808 class = ntohl(nla_get_be32(cda[CTA_EXPECT_CLASS]));
2810 if (class > helper->expect_class_max) { 2809 if (class > helper->expect_class_max)
2811 err = -EINVAL; 2810 return ERR_PTR(-EINVAL);
2812 goto out;
2813 }
2814 } 2811 }
2815 exp = nf_ct_expect_alloc(ct); 2812 exp = nf_ct_expect_alloc(ct);
2816 if (!exp) { 2813 if (!exp)
2817 err = -ENOMEM; 2814 return ERR_PTR(-ENOMEM);
2818 goto out; 2815
2819 }
2820 help = nfct_help(ct); 2816 help = nfct_help(ct);
2821 if (!help) { 2817 if (!help) {
2822 if (!cda[CTA_EXPECT_TIMEOUT]) { 2818 if (!cda[CTA_EXPECT_TIMEOUT]) {
@@ -2854,21 +2850,89 @@ ctnetlink_create_expect(struct net *net, u16 zone,
2854 exp->class = class; 2850 exp->class = class;
2855 exp->master = ct; 2851 exp->master = ct;
2856 exp->helper = helper; 2852 exp->helper = helper;
2857 memcpy(&exp->tuple, &tuple, sizeof(struct nf_conntrack_tuple)); 2853 exp->tuple = *tuple;
2858 memcpy(&exp->mask.src.u3, &mask.src.u3, sizeof(exp->mask.src.u3)); 2854 exp->mask.src.u3 = mask->src.u3;
2859 exp->mask.src.u.all = mask.src.u.all; 2855 exp->mask.src.u.all = mask->src.u.all;
2860 2856
2861 if (cda[CTA_EXPECT_NAT]) { 2857 if (cda[CTA_EXPECT_NAT]) {
2862 err = ctnetlink_parse_expect_nat(cda[CTA_EXPECT_NAT], 2858 err = ctnetlink_parse_expect_nat(cda[CTA_EXPECT_NAT],
2863 exp, u3); 2859 exp, nf_ct_l3num(ct));
2864 if (err < 0) 2860 if (err < 0)
2865 goto err_out; 2861 goto err_out;
2866 } 2862 }
2867 err = nf_ct_expect_related_report(exp, portid, report); 2863 return exp;
2868err_out: 2864err_out:
2869 nf_ct_expect_put(exp); 2865 nf_ct_expect_put(exp);
2870out: 2866 return ERR_PTR(err);
2871 nf_ct_put(nf_ct_tuplehash_to_ctrack(h)); 2867}
2868
2869static int
2870ctnetlink_create_expect(struct net *net, u16 zone,
2871 const struct nlattr * const cda[],
2872 u_int8_t u3, u32 portid, int report)
2873{
2874 struct nf_conntrack_tuple tuple, mask, master_tuple;
2875 struct nf_conntrack_tuple_hash *h = NULL;
2876 struct nf_conntrack_helper *helper = NULL;
2877 struct nf_conntrack_expect *exp;
2878 struct nf_conn *ct;
2879 int err;
2880
2881 /* caller guarantees that those three CTA_EXPECT_* exist */
2882 err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE, u3);
2883 if (err < 0)
2884 return err;
2885 err = ctnetlink_parse_tuple(cda, &mask, CTA_EXPECT_MASK, u3);
2886 if (err < 0)
2887 return err;
2888 err = ctnetlink_parse_tuple(cda, &master_tuple, CTA_EXPECT_MASTER, u3);
2889 if (err < 0)
2890 return err;
2891
2892 /* Look for master conntrack of this expectation */
2893 h = nf_conntrack_find_get(net, zone, &master_tuple);
2894 if (!h)
2895 return -ENOENT;
2896 ct = nf_ct_tuplehash_to_ctrack(h);
2897
2898 if (cda[CTA_EXPECT_HELP_NAME]) {
2899 const char *helpname = nla_data(cda[CTA_EXPECT_HELP_NAME]);
2900
2901 helper = __nf_conntrack_helper_find(helpname, u3,
2902 nf_ct_protonum(ct));
2903 if (helper == NULL) {
2904#ifdef CONFIG_MODULES
2905 if (request_module("nfct-helper-%s", helpname) < 0) {
2906 err = -EOPNOTSUPP;
2907 goto err_ct;
2908 }
2909 helper = __nf_conntrack_helper_find(helpname, u3,
2910 nf_ct_protonum(ct));
2911 if (helper) {
2912 err = -EAGAIN;
2913 goto err_ct;
2914 }
2915#endif
2916 err = -EOPNOTSUPP;
2917 goto err_ct;
2918 }
2919 }
2920
2921 exp = ctnetlink_alloc_expect(cda, ct, helper, &tuple, &mask);
2922 if (IS_ERR(exp)) {
2923 err = PTR_ERR(exp);
2924 goto err_ct;
2925 }
2926
2927 err = nf_ct_expect_related_report(exp, portid, report);
2928 if (err < 0)
2929 goto err_exp;
2930
2931 return 0;
2932err_exp:
2933 nf_ct_expect_put(exp);
2934err_ct:
2935 nf_ct_put(ct);
2872 return err; 2936 return err;
2873} 2937}
2874 2938
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index 0ab9636ac57e..ce3004156eeb 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -281,7 +281,7 @@ void nf_ct_l3proto_pernet_unregister(struct net *net,
281 nf_ct_l3proto_unregister_sysctl(net, proto); 281 nf_ct_l3proto_unregister_sysctl(net, proto);
282 282
283 /* Remove all contrack entries for this protocol */ 283 /* Remove all contrack entries for this protocol */
284 nf_ct_iterate_cleanup(net, kill_l3proto, proto); 284 nf_ct_iterate_cleanup(net, kill_l3proto, proto, 0, 0);
285} 285}
286EXPORT_SYMBOL_GPL(nf_ct_l3proto_pernet_unregister); 286EXPORT_SYMBOL_GPL(nf_ct_l3proto_pernet_unregister);
287 287
@@ -476,7 +476,7 @@ void nf_ct_l4proto_pernet_unregister(struct net *net,
476 nf_ct_l4proto_unregister_sysctl(net, pn, l4proto); 476 nf_ct_l4proto_unregister_sysctl(net, pn, l4proto);
477 477
478 /* Remove all contrack entries for this protocol */ 478 /* Remove all contrack entries for this protocol */
479 nf_ct_iterate_cleanup(net, kill_l4proto, l4proto); 479 nf_ct_iterate_cleanup(net, kill_l4proto, l4proto, 0, 0);
480} 480}
481EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_unregister); 481EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_unregister);
482 482
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 2f8010707d01..44d1ea32570a 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -27,6 +27,8 @@
27#include <net/netfilter/nf_conntrack.h> 27#include <net/netfilter/nf_conntrack.h>
28#include <net/netfilter/nf_conntrack_l4proto.h> 28#include <net/netfilter/nf_conntrack_l4proto.h>
29#include <net/netfilter/nf_conntrack_ecache.h> 29#include <net/netfilter/nf_conntrack_ecache.h>
30#include <net/netfilter/nf_conntrack_seqadj.h>
31#include <net/netfilter/nf_conntrack_synproxy.h>
30#include <net/netfilter/nf_log.h> 32#include <net/netfilter/nf_log.h>
31#include <net/netfilter/ipv4/nf_conntrack_ipv4.h> 33#include <net/netfilter/ipv4/nf_conntrack_ipv4.h>
32#include <net/netfilter/ipv6/nf_conntrack_ipv6.h> 34#include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
@@ -495,21 +497,6 @@ static void tcp_sack(const struct sk_buff *skb, unsigned int dataoff,
495 } 497 }
496} 498}
497 499
498#ifdef CONFIG_NF_NAT_NEEDED
499static inline s16 nat_offset(const struct nf_conn *ct,
500 enum ip_conntrack_dir dir,
501 u32 seq)
502{
503 typeof(nf_ct_nat_offset) get_offset = rcu_dereference(nf_ct_nat_offset);
504
505 return get_offset != NULL ? get_offset(ct, dir, seq) : 0;
506}
507#define NAT_OFFSET(ct, dir, seq) \
508 (nat_offset(ct, dir, seq))
509#else
510#define NAT_OFFSET(ct, dir, seq) 0
511#endif
512
513static bool tcp_in_window(const struct nf_conn *ct, 500static bool tcp_in_window(const struct nf_conn *ct,
514 struct ip_ct_tcp *state, 501 struct ip_ct_tcp *state,
515 enum ip_conntrack_dir dir, 502 enum ip_conntrack_dir dir,
@@ -525,7 +512,7 @@ static bool tcp_in_window(const struct nf_conn *ct,
525 struct ip_ct_tcp_state *receiver = &state->seen[!dir]; 512 struct ip_ct_tcp_state *receiver = &state->seen[!dir];
526 const struct nf_conntrack_tuple *tuple = &ct->tuplehash[dir].tuple; 513 const struct nf_conntrack_tuple *tuple = &ct->tuplehash[dir].tuple;
527 __u32 seq, ack, sack, end, win, swin; 514 __u32 seq, ack, sack, end, win, swin;
528 s16 receiver_offset; 515 s32 receiver_offset;
529 bool res, in_recv_win; 516 bool res, in_recv_win;
530 517
531 /* 518 /*
@@ -540,7 +527,7 @@ static bool tcp_in_window(const struct nf_conn *ct,
540 tcp_sack(skb, dataoff, tcph, &sack); 527 tcp_sack(skb, dataoff, tcph, &sack);
541 528
542 /* Take into account NAT sequence number mangling */ 529 /* Take into account NAT sequence number mangling */
543 receiver_offset = NAT_OFFSET(ct, !dir, ack - 1); 530 receiver_offset = nf_ct_seq_offset(ct, !dir, ack - 1);
544 ack -= receiver_offset; 531 ack -= receiver_offset;
545 sack -= receiver_offset; 532 sack -= receiver_offset;
546 533
@@ -960,6 +947,21 @@ static int tcp_packet(struct nf_conn *ct,
960 "state %s ", tcp_conntrack_names[old_state]); 947 "state %s ", tcp_conntrack_names[old_state]);
961 return NF_ACCEPT; 948 return NF_ACCEPT;
962 case TCP_CONNTRACK_MAX: 949 case TCP_CONNTRACK_MAX:
950 /* Special case for SYN proxy: when the SYN to the server or
951 * the SYN/ACK from the server is lost, the client may transmit
952 * a keep-alive packet while in SYN_SENT state. This needs to
953 * be associated with the original conntrack entry in order to
954 * generate a new SYN with the correct sequence number.
955 */
956 if (nfct_synproxy(ct) && old_state == TCP_CONNTRACK_SYN_SENT &&
957 index == TCP_ACK_SET && dir == IP_CT_DIR_ORIGINAL &&
958 ct->proto.tcp.last_dir == IP_CT_DIR_ORIGINAL &&
959 ct->proto.tcp.seen[dir].td_end - 1 == ntohl(th->seq)) {
960 pr_debug("nf_ct_tcp: SYN proxy client keep alive\n");
961 spin_unlock_bh(&ct->lock);
962 return NF_ACCEPT;
963 }
964
963 /* Invalid packet */ 965 /* Invalid packet */
964 pr_debug("nf_ct_tcp: Invalid dir=%i index=%u ostate=%u\n", 966 pr_debug("nf_ct_tcp: Invalid dir=%i index=%u ostate=%u\n",
965 dir, get_conntrack_index(th), old_state); 967 dir, get_conntrack_index(th), old_state);
diff --git a/net/netfilter/nf_conntrack_seqadj.c b/net/netfilter/nf_conntrack_seqadj.c
new file mode 100644
index 000000000000..5f9bfd060dea
--- /dev/null
+++ b/net/netfilter/nf_conntrack_seqadj.c
@@ -0,0 +1,238 @@
1#include <linux/types.h>
2#include <linux/netfilter.h>
3#include <net/tcp.h>
4
5#include <net/netfilter/nf_conntrack.h>
6#include <net/netfilter/nf_conntrack_extend.h>
7#include <net/netfilter/nf_conntrack_seqadj.h>
8
9int nf_ct_seqadj_init(struct nf_conn *ct, enum ip_conntrack_info ctinfo,
10 s32 off)
11{
12 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
13 struct nf_conn_seqadj *seqadj;
14 struct nf_ct_seqadj *this_way;
15
16 if (off == 0)
17 return 0;
18
19 set_bit(IPS_SEQ_ADJUST_BIT, &ct->status);
20
21 seqadj = nfct_seqadj(ct);
22 this_way = &seqadj->seq[dir];
23 this_way->offset_before = off;
24 this_way->offset_after = off;
25 return 0;
26}
27EXPORT_SYMBOL_GPL(nf_ct_seqadj_init);
28
29int nf_ct_seqadj_set(struct nf_conn *ct, enum ip_conntrack_info ctinfo,
30 __be32 seq, s32 off)
31{
32 struct nf_conn_seqadj *seqadj = nfct_seqadj(ct);
33 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
34 struct nf_ct_seqadj *this_way;
35
36 if (off == 0)
37 return 0;
38
39 set_bit(IPS_SEQ_ADJUST_BIT, &ct->status);
40
41 spin_lock_bh(&ct->lock);
42 this_way = &seqadj->seq[dir];
43 if (this_way->offset_before == this_way->offset_after ||
44 before(this_way->correction_pos, seq)) {
45 this_way->correction_pos = seq;
46 this_way->offset_before = this_way->offset_after;
47 this_way->offset_after += off;
48 }
49 spin_unlock_bh(&ct->lock);
50 return 0;
51}
52EXPORT_SYMBOL_GPL(nf_ct_seqadj_set);
53
54void nf_ct_tcp_seqadj_set(struct sk_buff *skb,
55 struct nf_conn *ct, enum ip_conntrack_info ctinfo,
56 s32 off)
57{
58 const struct tcphdr *th;
59
60 if (nf_ct_protonum(ct) != IPPROTO_TCP)
61 return;
62
63 th = (struct tcphdr *)(skb_network_header(skb) + ip_hdrlen(skb));
64 nf_ct_seqadj_set(ct, ctinfo, th->seq, off);
65}
66EXPORT_SYMBOL_GPL(nf_ct_tcp_seqadj_set);
67
68/* Adjust one found SACK option including checksum correction */
69static void nf_ct_sack_block_adjust(struct sk_buff *skb,
70 struct tcphdr *tcph,
71 unsigned int sackoff,
72 unsigned int sackend,
73 struct nf_ct_seqadj *seq)
74{
75 while (sackoff < sackend) {
76 struct tcp_sack_block_wire *sack;
77 __be32 new_start_seq, new_end_seq;
78
79 sack = (void *)skb->data + sackoff;
80 if (after(ntohl(sack->start_seq) - seq->offset_before,
81 seq->correction_pos))
82 new_start_seq = htonl(ntohl(sack->start_seq) -
83 seq->offset_after);
84 else
85 new_start_seq = htonl(ntohl(sack->start_seq) -
86 seq->offset_before);
87
88 if (after(ntohl(sack->end_seq) - seq->offset_before,
89 seq->correction_pos))
90 new_end_seq = htonl(ntohl(sack->end_seq) -
91 seq->offset_after);
92 else
93 new_end_seq = htonl(ntohl(sack->end_seq) -
94 seq->offset_before);
95
96 pr_debug("sack_adjust: start_seq: %d->%d, end_seq: %d->%d\n",
97 ntohl(sack->start_seq), new_start_seq,
98 ntohl(sack->end_seq), new_end_seq);
99
100 inet_proto_csum_replace4(&tcph->check, skb,
101 sack->start_seq, new_start_seq, 0);
102 inet_proto_csum_replace4(&tcph->check, skb,
103 sack->end_seq, new_end_seq, 0);
104 sack->start_seq = new_start_seq;
105 sack->end_seq = new_end_seq;
106 sackoff += sizeof(*sack);
107 }
108}
109
110/* TCP SACK sequence number adjustment */
111static unsigned int nf_ct_sack_adjust(struct sk_buff *skb,
112 unsigned int protoff,
113 struct tcphdr *tcph,
114 struct nf_conn *ct,
115 enum ip_conntrack_info ctinfo)
116{
117 unsigned int dir, optoff, optend;
118 struct nf_conn_seqadj *seqadj = nfct_seqadj(ct);
119
120 optoff = protoff + sizeof(struct tcphdr);
121 optend = protoff + tcph->doff * 4;
122
123 if (!skb_make_writable(skb, optend))
124 return 0;
125
126 dir = CTINFO2DIR(ctinfo);
127
128 while (optoff < optend) {
129 /* Usually: option, length. */
130 unsigned char *op = skb->data + optoff;
131
132 switch (op[0]) {
133 case TCPOPT_EOL:
134 return 1;
135 case TCPOPT_NOP:
136 optoff++;
137 continue;
138 default:
139 /* no partial options */
140 if (optoff + 1 == optend ||
141 optoff + op[1] > optend ||
142 op[1] < 2)
143 return 0;
144 if (op[0] == TCPOPT_SACK &&
145 op[1] >= 2+TCPOLEN_SACK_PERBLOCK &&
146 ((op[1] - 2) % TCPOLEN_SACK_PERBLOCK) == 0)
147 nf_ct_sack_block_adjust(skb, tcph, optoff + 2,
148 optoff+op[1],
149 &seqadj->seq[!dir]);
150 optoff += op[1];
151 }
152 }
153 return 1;
154}
155
156/* TCP sequence number adjustment. Returns 1 on success, 0 on failure */
157int nf_ct_seq_adjust(struct sk_buff *skb,
158 struct nf_conn *ct, enum ip_conntrack_info ctinfo,
159 unsigned int protoff)
160{
161 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
162 struct tcphdr *tcph;
163 __be32 newseq, newack;
164 s32 seqoff, ackoff;
165 struct nf_conn_seqadj *seqadj = nfct_seqadj(ct);
166 struct nf_ct_seqadj *this_way, *other_way;
167 int res;
168
169 this_way = &seqadj->seq[dir];
170 other_way = &seqadj->seq[!dir];
171
172 if (!skb_make_writable(skb, protoff + sizeof(*tcph)))
173 return 0;
174
175 tcph = (void *)skb->data + protoff;
176 spin_lock_bh(&ct->lock);
177 if (after(ntohl(tcph->seq), this_way->correction_pos))
178 seqoff = this_way->offset_after;
179 else
180 seqoff = this_way->offset_before;
181
182 if (after(ntohl(tcph->ack_seq) - other_way->offset_before,
183 other_way->correction_pos))
184 ackoff = other_way->offset_after;
185 else
186 ackoff = other_way->offset_before;
187
188 newseq = htonl(ntohl(tcph->seq) + seqoff);
189 newack = htonl(ntohl(tcph->ack_seq) - ackoff);
190
191 inet_proto_csum_replace4(&tcph->check, skb, tcph->seq, newseq, 0);
192 inet_proto_csum_replace4(&tcph->check, skb, tcph->ack_seq, newack, 0);
193
194 pr_debug("Adjusting sequence number from %u->%u, ack from %u->%u\n",
195 ntohl(tcph->seq), ntohl(newseq), ntohl(tcph->ack_seq),
196 ntohl(newack));
197
198 tcph->seq = newseq;
199 tcph->ack_seq = newack;
200
201 res = nf_ct_sack_adjust(skb, protoff, tcph, ct, ctinfo);
202 spin_unlock_bh(&ct->lock);
203
204 return res;
205}
206EXPORT_SYMBOL_GPL(nf_ct_seq_adjust);
207
208s32 nf_ct_seq_offset(const struct nf_conn *ct,
209 enum ip_conntrack_dir dir,
210 u32 seq)
211{
212 struct nf_conn_seqadj *seqadj = nfct_seqadj(ct);
213 struct nf_ct_seqadj *this_way;
214
215 if (!seqadj)
216 return 0;
217
218 this_way = &seqadj->seq[dir];
219 return after(seq, this_way->correction_pos) ?
220 this_way->offset_after : this_way->offset_before;
221}
222EXPORT_SYMBOL_GPL(nf_ct_seq_offset);
223
224static struct nf_ct_ext_type nf_ct_seqadj_extend __read_mostly = {
225 .len = sizeof(struct nf_conn_seqadj),
226 .align = __alignof__(struct nf_conn_seqadj),
227 .id = NF_CT_EXT_SEQADJ,
228};
229
230int nf_conntrack_seqadj_init(void)
231{
232 return nf_ct_extend_register(&nf_ct_seqadj_extend);
233}
234
235void nf_conntrack_seqadj_fini(void)
236{
237 nf_ct_extend_unregister(&nf_ct_seqadj_extend);
238}
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index 038eee5c8f85..6f0f4f7f68a5 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -25,6 +25,7 @@
25#include <net/netfilter/nf_nat_core.h> 25#include <net/netfilter/nf_nat_core.h>
26#include <net/netfilter/nf_nat_helper.h> 26#include <net/netfilter/nf_nat_helper.h>
27#include <net/netfilter/nf_conntrack_helper.h> 27#include <net/netfilter/nf_conntrack_helper.h>
28#include <net/netfilter/nf_conntrack_seqadj.h>
28#include <net/netfilter/nf_conntrack_l3proto.h> 29#include <net/netfilter/nf_conntrack_l3proto.h>
29#include <net/netfilter/nf_conntrack_zones.h> 30#include <net/netfilter/nf_conntrack_zones.h>
30#include <linux/netfilter/nf_nat.h> 31#include <linux/netfilter/nf_nat.h>
@@ -402,6 +403,9 @@ nf_nat_setup_info(struct nf_conn *ct,
402 ct->status |= IPS_SRC_NAT; 403 ct->status |= IPS_SRC_NAT;
403 else 404 else
404 ct->status |= IPS_DST_NAT; 405 ct->status |= IPS_DST_NAT;
406
407 if (nfct_help(ct))
408 nfct_seqadj_ext_add(ct);
405 } 409 }
406 410
407 if (maniptype == NF_NAT_MANIP_SRC) { 411 if (maniptype == NF_NAT_MANIP_SRC) {
@@ -497,7 +501,7 @@ static void nf_nat_l4proto_clean(u8 l3proto, u8 l4proto)
497 501
498 rtnl_lock(); 502 rtnl_lock();
499 for_each_net(net) 503 for_each_net(net)
500 nf_ct_iterate_cleanup(net, nf_nat_proto_remove, &clean); 504 nf_ct_iterate_cleanup(net, nf_nat_proto_remove, &clean, 0, 0);
501 rtnl_unlock(); 505 rtnl_unlock();
502} 506}
503 507
@@ -511,7 +515,7 @@ static void nf_nat_l3proto_clean(u8 l3proto)
511 rtnl_lock(); 515 rtnl_lock();
512 516
513 for_each_net(net) 517 for_each_net(net)
514 nf_ct_iterate_cleanup(net, nf_nat_proto_remove, &clean); 518 nf_ct_iterate_cleanup(net, nf_nat_proto_remove, &clean, 0, 0);
515 rtnl_unlock(); 519 rtnl_unlock();
516} 520}
517 521
@@ -749,7 +753,7 @@ static void __net_exit nf_nat_net_exit(struct net *net)
749{ 753{
750 struct nf_nat_proto_clean clean = {}; 754 struct nf_nat_proto_clean clean = {};
751 755
752 nf_ct_iterate_cleanup(net, &nf_nat_proto_remove, &clean); 756 nf_ct_iterate_cleanup(net, &nf_nat_proto_remove, &clean, 0, 0);
753 synchronize_rcu(); 757 synchronize_rcu();
754 nf_ct_free_hashtable(net->ct.nat_bysource, net->ct.nat_htable_size); 758 nf_ct_free_hashtable(net->ct.nat_bysource, net->ct.nat_htable_size);
755} 759}
@@ -764,10 +768,6 @@ static struct nf_ct_helper_expectfn follow_master_nat = {
764 .expectfn = nf_nat_follow_master, 768 .expectfn = nf_nat_follow_master,
765}; 769};
766 770
767static struct nfq_ct_nat_hook nfq_ct_nat = {
768 .seq_adjust = nf_nat_tcp_seq_adjust,
769};
770
771static int __init nf_nat_init(void) 771static int __init nf_nat_init(void)
772{ 772{
773 int ret; 773 int ret;
@@ -787,14 +787,9 @@ static int __init nf_nat_init(void)
787 /* Initialize fake conntrack so that NAT will skip it */ 787 /* Initialize fake conntrack so that NAT will skip it */
788 nf_ct_untracked_status_or(IPS_NAT_DONE_MASK); 788 nf_ct_untracked_status_or(IPS_NAT_DONE_MASK);
789 789
790 BUG_ON(nf_nat_seq_adjust_hook != NULL);
791 RCU_INIT_POINTER(nf_nat_seq_adjust_hook, nf_nat_seq_adjust);
792 BUG_ON(nfnetlink_parse_nat_setup_hook != NULL); 790 BUG_ON(nfnetlink_parse_nat_setup_hook != NULL);
793 RCU_INIT_POINTER(nfnetlink_parse_nat_setup_hook, 791 RCU_INIT_POINTER(nfnetlink_parse_nat_setup_hook,
794 nfnetlink_parse_nat_setup); 792 nfnetlink_parse_nat_setup);
795 BUG_ON(nf_ct_nat_offset != NULL);
796 RCU_INIT_POINTER(nf_ct_nat_offset, nf_nat_get_offset);
797 RCU_INIT_POINTER(nfq_ct_nat_hook, &nfq_ct_nat);
798#ifdef CONFIG_XFRM 793#ifdef CONFIG_XFRM
799 BUG_ON(nf_nat_decode_session_hook != NULL); 794 BUG_ON(nf_nat_decode_session_hook != NULL);
800 RCU_INIT_POINTER(nf_nat_decode_session_hook, __nf_nat_decode_session); 795 RCU_INIT_POINTER(nf_nat_decode_session_hook, __nf_nat_decode_session);
@@ -813,10 +808,7 @@ static void __exit nf_nat_cleanup(void)
813 unregister_pernet_subsys(&nf_nat_net_ops); 808 unregister_pernet_subsys(&nf_nat_net_ops);
814 nf_ct_extend_unregister(&nat_extend); 809 nf_ct_extend_unregister(&nat_extend);
815 nf_ct_helper_expectfn_unregister(&follow_master_nat); 810 nf_ct_helper_expectfn_unregister(&follow_master_nat);
816 RCU_INIT_POINTER(nf_nat_seq_adjust_hook, NULL);
817 RCU_INIT_POINTER(nfnetlink_parse_nat_setup_hook, NULL); 811 RCU_INIT_POINTER(nfnetlink_parse_nat_setup_hook, NULL);
818 RCU_INIT_POINTER(nf_ct_nat_offset, NULL);
819 RCU_INIT_POINTER(nfq_ct_nat_hook, NULL);
820#ifdef CONFIG_XFRM 812#ifdef CONFIG_XFRM
821 RCU_INIT_POINTER(nf_nat_decode_session_hook, NULL); 813 RCU_INIT_POINTER(nf_nat_decode_session_hook, NULL);
822#endif 814#endif
diff --git a/net/netfilter/nf_nat_helper.c b/net/netfilter/nf_nat_helper.c
index 85e20a919081..2840abb5bb99 100644
--- a/net/netfilter/nf_nat_helper.c
+++ b/net/netfilter/nf_nat_helper.c
@@ -20,74 +20,13 @@
20#include <net/netfilter/nf_conntrack_helper.h> 20#include <net/netfilter/nf_conntrack_helper.h>
21#include <net/netfilter/nf_conntrack_ecache.h> 21#include <net/netfilter/nf_conntrack_ecache.h>
22#include <net/netfilter/nf_conntrack_expect.h> 22#include <net/netfilter/nf_conntrack_expect.h>
23#include <net/netfilter/nf_conntrack_seqadj.h>
23#include <net/netfilter/nf_nat.h> 24#include <net/netfilter/nf_nat.h>
24#include <net/netfilter/nf_nat_l3proto.h> 25#include <net/netfilter/nf_nat_l3proto.h>
25#include <net/netfilter/nf_nat_l4proto.h> 26#include <net/netfilter/nf_nat_l4proto.h>
26#include <net/netfilter/nf_nat_core.h> 27#include <net/netfilter/nf_nat_core.h>
27#include <net/netfilter/nf_nat_helper.h> 28#include <net/netfilter/nf_nat_helper.h>
28 29
29#define DUMP_OFFSET(x) \
30 pr_debug("offset_before=%d, offset_after=%d, correction_pos=%u\n", \
31 x->offset_before, x->offset_after, x->correction_pos);
32
33static DEFINE_SPINLOCK(nf_nat_seqofs_lock);
34
35/* Setup TCP sequence correction given this change at this sequence */
36static inline void
37adjust_tcp_sequence(u32 seq,
38 int sizediff,
39 struct nf_conn *ct,
40 enum ip_conntrack_info ctinfo)
41{
42 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
43 struct nf_conn_nat *nat = nfct_nat(ct);
44 struct nf_nat_seq *this_way = &nat->seq[dir];
45
46 pr_debug("adjust_tcp_sequence: seq = %u, sizediff = %d\n",
47 seq, sizediff);
48
49 pr_debug("adjust_tcp_sequence: Seq_offset before: ");
50 DUMP_OFFSET(this_way);
51
52 spin_lock_bh(&nf_nat_seqofs_lock);
53
54 /* SYN adjust. If it's uninitialized, or this is after last
55 * correction, record it: we don't handle more than one
56 * adjustment in the window, but do deal with common case of a
57 * retransmit */
58 if (this_way->offset_before == this_way->offset_after ||
59 before(this_way->correction_pos, seq)) {
60 this_way->correction_pos = seq;
61 this_way->offset_before = this_way->offset_after;
62 this_way->offset_after += sizediff;
63 }
64 spin_unlock_bh(&nf_nat_seqofs_lock);
65
66 pr_debug("adjust_tcp_sequence: Seq_offset after: ");
67 DUMP_OFFSET(this_way);
68}
69
70/* Get the offset value, for conntrack */
71s16 nf_nat_get_offset(const struct nf_conn *ct,
72 enum ip_conntrack_dir dir,
73 u32 seq)
74{
75 struct nf_conn_nat *nat = nfct_nat(ct);
76 struct nf_nat_seq *this_way;
77 s16 offset;
78
79 if (!nat)
80 return 0;
81
82 this_way = &nat->seq[dir];
83 spin_lock_bh(&nf_nat_seqofs_lock);
84 offset = after(seq, this_way->correction_pos)
85 ? this_way->offset_after : this_way->offset_before;
86 spin_unlock_bh(&nf_nat_seqofs_lock);
87
88 return offset;
89}
90
91/* Frobs data inside this packet, which is linear. */ 30/* Frobs data inside this packet, which is linear. */
92static void mangle_contents(struct sk_buff *skb, 31static void mangle_contents(struct sk_buff *skb,
93 unsigned int dataoff, 32 unsigned int dataoff,
@@ -142,30 +81,6 @@ static int enlarge_skb(struct sk_buff *skb, unsigned int extra)
142 return 1; 81 return 1;
143} 82}
144 83
145void nf_nat_set_seq_adjust(struct nf_conn *ct, enum ip_conntrack_info ctinfo,
146 __be32 seq, s16 off)
147{
148 if (!off)
149 return;
150 set_bit(IPS_SEQ_ADJUST_BIT, &ct->status);
151 adjust_tcp_sequence(ntohl(seq), off, ct, ctinfo);
152 nf_conntrack_event_cache(IPCT_NATSEQADJ, ct);
153}
154EXPORT_SYMBOL_GPL(nf_nat_set_seq_adjust);
155
156void nf_nat_tcp_seq_adjust(struct sk_buff *skb, struct nf_conn *ct,
157 u32 ctinfo, int off)
158{
159 const struct tcphdr *th;
160
161 if (nf_ct_protonum(ct) != IPPROTO_TCP)
162 return;
163
164 th = (struct tcphdr *)(skb_network_header(skb)+ ip_hdrlen(skb));
165 nf_nat_set_seq_adjust(ct, ctinfo, th->seq, off);
166}
167EXPORT_SYMBOL_GPL(nf_nat_tcp_seq_adjust);
168
169/* Generic function for mangling variable-length address changes inside 84/* Generic function for mangling variable-length address changes inside
170 * NATed TCP connections (like the PORT XXX,XXX,XXX,XXX,XXX,XXX 85 * NATed TCP connections (like the PORT XXX,XXX,XXX,XXX,XXX,XXX
171 * command in FTP). 86 * command in FTP).
@@ -210,8 +125,8 @@ int __nf_nat_mangle_tcp_packet(struct sk_buff *skb,
210 datalen, oldlen); 125 datalen, oldlen);
211 126
212 if (adjust && rep_len != match_len) 127 if (adjust && rep_len != match_len)
213 nf_nat_set_seq_adjust(ct, ctinfo, tcph->seq, 128 nf_ct_seqadj_set(ct, ctinfo, tcph->seq,
214 (int)rep_len - (int)match_len); 129 (int)rep_len - (int)match_len);
215 130
216 return 1; 131 return 1;
217} 132}
@@ -271,145 +186,6 @@ nf_nat_mangle_udp_packet(struct sk_buff *skb,
271} 186}
272EXPORT_SYMBOL(nf_nat_mangle_udp_packet); 187EXPORT_SYMBOL(nf_nat_mangle_udp_packet);
273 188
274/* Adjust one found SACK option including checksum correction */
275static void
276sack_adjust(struct sk_buff *skb,
277 struct tcphdr *tcph,
278 unsigned int sackoff,
279 unsigned int sackend,
280 struct nf_nat_seq *natseq)
281{
282 while (sackoff < sackend) {
283 struct tcp_sack_block_wire *sack;
284 __be32 new_start_seq, new_end_seq;
285
286 sack = (void *)skb->data + sackoff;
287 if (after(ntohl(sack->start_seq) - natseq->offset_before,
288 natseq->correction_pos))
289 new_start_seq = htonl(ntohl(sack->start_seq)
290 - natseq->offset_after);
291 else
292 new_start_seq = htonl(ntohl(sack->start_seq)
293 - natseq->offset_before);
294
295 if (after(ntohl(sack->end_seq) - natseq->offset_before,
296 natseq->correction_pos))
297 new_end_seq = htonl(ntohl(sack->end_seq)
298 - natseq->offset_after);
299 else
300 new_end_seq = htonl(ntohl(sack->end_seq)
301 - natseq->offset_before);
302
303 pr_debug("sack_adjust: start_seq: %d->%d, end_seq: %d->%d\n",
304 ntohl(sack->start_seq), new_start_seq,
305 ntohl(sack->end_seq), new_end_seq);
306
307 inet_proto_csum_replace4(&tcph->check, skb,
308 sack->start_seq, new_start_seq, 0);
309 inet_proto_csum_replace4(&tcph->check, skb,
310 sack->end_seq, new_end_seq, 0);
311 sack->start_seq = new_start_seq;
312 sack->end_seq = new_end_seq;
313 sackoff += sizeof(*sack);
314 }
315}
316
317/* TCP SACK sequence number adjustment */
318static inline unsigned int
319nf_nat_sack_adjust(struct sk_buff *skb,
320 unsigned int protoff,
321 struct tcphdr *tcph,
322 struct nf_conn *ct,
323 enum ip_conntrack_info ctinfo)
324{
325 unsigned int dir, optoff, optend;
326 struct nf_conn_nat *nat = nfct_nat(ct);
327
328 optoff = protoff + sizeof(struct tcphdr);
329 optend = protoff + tcph->doff * 4;
330
331 if (!skb_make_writable(skb, optend))
332 return 0;
333
334 dir = CTINFO2DIR(ctinfo);
335
336 while (optoff < optend) {
337 /* Usually: option, length. */
338 unsigned char *op = skb->data + optoff;
339
340 switch (op[0]) {
341 case TCPOPT_EOL:
342 return 1;
343 case TCPOPT_NOP:
344 optoff++;
345 continue;
346 default:
347 /* no partial options */
348 if (optoff + 1 == optend ||
349 optoff + op[1] > optend ||
350 op[1] < 2)
351 return 0;
352 if (op[0] == TCPOPT_SACK &&
353 op[1] >= 2+TCPOLEN_SACK_PERBLOCK &&
354 ((op[1] - 2) % TCPOLEN_SACK_PERBLOCK) == 0)
355 sack_adjust(skb, tcph, optoff+2,
356 optoff+op[1], &nat->seq[!dir]);
357 optoff += op[1];
358 }
359 }
360 return 1;
361}
362
363/* TCP sequence number adjustment. Returns 1 on success, 0 on failure */
364int
365nf_nat_seq_adjust(struct sk_buff *skb,
366 struct nf_conn *ct,
367 enum ip_conntrack_info ctinfo,
368 unsigned int protoff)
369{
370 struct tcphdr *tcph;
371 int dir;
372 __be32 newseq, newack;
373 s16 seqoff, ackoff;
374 struct nf_conn_nat *nat = nfct_nat(ct);
375 struct nf_nat_seq *this_way, *other_way;
376
377 dir = CTINFO2DIR(ctinfo);
378
379 this_way = &nat->seq[dir];
380 other_way = &nat->seq[!dir];
381
382 if (!skb_make_writable(skb, protoff + sizeof(*tcph)))
383 return 0;
384
385 tcph = (void *)skb->data + protoff;
386 if (after(ntohl(tcph->seq), this_way->correction_pos))
387 seqoff = this_way->offset_after;
388 else
389 seqoff = this_way->offset_before;
390
391 if (after(ntohl(tcph->ack_seq) - other_way->offset_before,
392 other_way->correction_pos))
393 ackoff = other_way->offset_after;
394 else
395 ackoff = other_way->offset_before;
396
397 newseq = htonl(ntohl(tcph->seq) + seqoff);
398 newack = htonl(ntohl(tcph->ack_seq) - ackoff);
399
400 inet_proto_csum_replace4(&tcph->check, skb, tcph->seq, newseq, 0);
401 inet_proto_csum_replace4(&tcph->check, skb, tcph->ack_seq, newack, 0);
402
403 pr_debug("Adjusting sequence number from %u->%u, ack from %u->%u\n",
404 ntohl(tcph->seq), ntohl(newseq), ntohl(tcph->ack_seq),
405 ntohl(newack));
406
407 tcph->seq = newseq;
408 tcph->ack_seq = newack;
409
410 return nf_nat_sack_adjust(skb, protoff, tcph, ct, ctinfo);
411}
412
413/* Setup NAT on this expected conntrack so it follows master. */ 189/* Setup NAT on this expected conntrack so it follows master. */
414/* If we fail to get a free NAT slot, we'll get dropped on confirm */ 190/* If we fail to get a free NAT slot, we'll get dropped on confirm */
415void nf_nat_follow_master(struct nf_conn *ct, 191void nf_nat_follow_master(struct nf_conn *ct,
diff --git a/net/netfilter/nf_nat_proto_sctp.c b/net/netfilter/nf_nat_proto_sctp.c
index 396e55d46f90..754536f2c674 100644
--- a/net/netfilter/nf_nat_proto_sctp.c
+++ b/net/netfilter/nf_nat_proto_sctp.c
@@ -34,9 +34,7 @@ sctp_manip_pkt(struct sk_buff *skb,
34 const struct nf_conntrack_tuple *tuple, 34 const struct nf_conntrack_tuple *tuple,
35 enum nf_nat_manip_type maniptype) 35 enum nf_nat_manip_type maniptype)
36{ 36{
37 struct sk_buff *frag;
38 sctp_sctphdr_t *hdr; 37 sctp_sctphdr_t *hdr;
39 __u32 crc32;
40 38
41 if (!skb_make_writable(skb, hdroff + sizeof(*hdr))) 39 if (!skb_make_writable(skb, hdroff + sizeof(*hdr)))
42 return false; 40 return false;
@@ -51,11 +49,7 @@ sctp_manip_pkt(struct sk_buff *skb,
51 hdr->dest = tuple->dst.u.sctp.port; 49 hdr->dest = tuple->dst.u.sctp.port;
52 } 50 }
53 51
54 crc32 = sctp_start_cksum((u8 *)hdr, skb_headlen(skb) - hdroff); 52 hdr->checksum = sctp_compute_cksum(skb, hdroff);
55 skb_walk_frags(skb, frag)
56 crc32 = sctp_update_cksum((u8 *)frag->data, skb_headlen(frag),
57 crc32);
58 hdr->checksum = sctp_end_cksum(crc32);
59 53
60 return true; 54 return true;
61} 55}
diff --git a/net/netfilter/nf_nat_sip.c b/net/netfilter/nf_nat_sip.c
index dac11f73868e..f9790405b7ff 100644
--- a/net/netfilter/nf_nat_sip.c
+++ b/net/netfilter/nf_nat_sip.c
@@ -20,6 +20,7 @@
20#include <net/netfilter/nf_nat_helper.h> 20#include <net/netfilter/nf_nat_helper.h>
21#include <net/netfilter/nf_conntrack_helper.h> 21#include <net/netfilter/nf_conntrack_helper.h>
22#include <net/netfilter/nf_conntrack_expect.h> 22#include <net/netfilter/nf_conntrack_expect.h>
23#include <net/netfilter/nf_conntrack_seqadj.h>
23#include <linux/netfilter/nf_conntrack_sip.h> 24#include <linux/netfilter/nf_conntrack_sip.h>
24 25
25MODULE_LICENSE("GPL"); 26MODULE_LICENSE("GPL");
@@ -308,7 +309,7 @@ static void nf_nat_sip_seq_adjust(struct sk_buff *skb, unsigned int protoff,
308 return; 309 return;
309 310
310 th = (struct tcphdr *)(skb->data + protoff); 311 th = (struct tcphdr *)(skb->data + protoff);
311 nf_nat_set_seq_adjust(ct, ctinfo, th->seq, off); 312 nf_ct_seqadj_set(ct, ctinfo, th->seq, off);
312} 313}
313 314
314/* Handles expected signalling connections and media streams */ 315/* Handles expected signalling connections and media streams */
diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c
new file mode 100644
index 000000000000..6fd967c6278c
--- /dev/null
+++ b/net/netfilter/nf_synproxy_core.c
@@ -0,0 +1,432 @@
1/*
2 * Copyright (c) 2013 Patrick McHardy <kaber@trash.net>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 */
8
9#include <linux/module.h>
10#include <linux/skbuff.h>
11#include <asm/unaligned.h>
12#include <net/tcp.h>
13#include <net/netns/generic.h>
14
15#include <linux/netfilter_ipv4/ip_tables.h>
16#include <linux/netfilter/x_tables.h>
17#include <linux/netfilter/xt_tcpudp.h>
18#include <linux/netfilter/xt_SYNPROXY.h>
19#include <net/netfilter/nf_conntrack.h>
20#include <net/netfilter/nf_conntrack_extend.h>
21#include <net/netfilter/nf_conntrack_seqadj.h>
22#include <net/netfilter/nf_conntrack_synproxy.h>
23
24int synproxy_net_id;
25EXPORT_SYMBOL_GPL(synproxy_net_id);
26
27void
28synproxy_parse_options(const struct sk_buff *skb, unsigned int doff,
29 const struct tcphdr *th, struct synproxy_options *opts)
30{
31 int length = (th->doff * 4) - sizeof(*th);
32 u8 buf[40], *ptr;
33
34 ptr = skb_header_pointer(skb, doff + sizeof(*th), length, buf);
35 BUG_ON(ptr == NULL);
36
37 opts->options = 0;
38 while (length > 0) {
39 int opcode = *ptr++;
40 int opsize;
41
42 switch (opcode) {
43 case TCPOPT_EOL:
44 return;
45 case TCPOPT_NOP:
46 length--;
47 continue;
48 default:
49 opsize = *ptr++;
50 if (opsize < 2)
51 return;
52 if (opsize > length)
53 return;
54
55 switch (opcode) {
56 case TCPOPT_MSS:
57 if (opsize == TCPOLEN_MSS) {
58 opts->mss = get_unaligned_be16(ptr);
59 opts->options |= XT_SYNPROXY_OPT_MSS;
60 }
61 break;
62 case TCPOPT_WINDOW:
63 if (opsize == TCPOLEN_WINDOW) {
64 opts->wscale = *ptr;
65 if (opts->wscale > 14)
66 opts->wscale = 14;
67 opts->options |= XT_SYNPROXY_OPT_WSCALE;
68 }
69 break;
70 case TCPOPT_TIMESTAMP:
71 if (opsize == TCPOLEN_TIMESTAMP) {
72 opts->tsval = get_unaligned_be32(ptr);
73 opts->tsecr = get_unaligned_be32(ptr + 4);
74 opts->options |= XT_SYNPROXY_OPT_TIMESTAMP;
75 }
76 break;
77 case TCPOPT_SACK_PERM:
78 if (opsize == TCPOLEN_SACK_PERM)
79 opts->options |= XT_SYNPROXY_OPT_SACK_PERM;
80 break;
81 }
82
83 ptr += opsize - 2;
84 length -= opsize;
85 }
86 }
87}
88EXPORT_SYMBOL_GPL(synproxy_parse_options);
89
90unsigned int synproxy_options_size(const struct synproxy_options *opts)
91{
92 unsigned int size = 0;
93
94 if (opts->options & XT_SYNPROXY_OPT_MSS)
95 size += TCPOLEN_MSS_ALIGNED;
96 if (opts->options & XT_SYNPROXY_OPT_TIMESTAMP)
97 size += TCPOLEN_TSTAMP_ALIGNED;
98 else if (opts->options & XT_SYNPROXY_OPT_SACK_PERM)
99 size += TCPOLEN_SACKPERM_ALIGNED;
100 if (opts->options & XT_SYNPROXY_OPT_WSCALE)
101 size += TCPOLEN_WSCALE_ALIGNED;
102
103 return size;
104}
105EXPORT_SYMBOL_GPL(synproxy_options_size);
106
107void
108synproxy_build_options(struct tcphdr *th, const struct synproxy_options *opts)
109{
110 __be32 *ptr = (__be32 *)(th + 1);
111 u8 options = opts->options;
112
113 if (options & XT_SYNPROXY_OPT_MSS)
114 *ptr++ = htonl((TCPOPT_MSS << 24) |
115 (TCPOLEN_MSS << 16) |
116 opts->mss);
117
118 if (options & XT_SYNPROXY_OPT_TIMESTAMP) {
119 if (options & XT_SYNPROXY_OPT_SACK_PERM)
120 *ptr++ = htonl((TCPOPT_SACK_PERM << 24) |
121 (TCPOLEN_SACK_PERM << 16) |
122 (TCPOPT_TIMESTAMP << 8) |
123 TCPOLEN_TIMESTAMP);
124 else
125 *ptr++ = htonl((TCPOPT_NOP << 24) |
126 (TCPOPT_NOP << 16) |
127 (TCPOPT_TIMESTAMP << 8) |
128 TCPOLEN_TIMESTAMP);
129
130 *ptr++ = htonl(opts->tsval);
131 *ptr++ = htonl(opts->tsecr);
132 } else if (options & XT_SYNPROXY_OPT_SACK_PERM)
133 *ptr++ = htonl((TCPOPT_NOP << 24) |
134 (TCPOPT_NOP << 16) |
135 (TCPOPT_SACK_PERM << 8) |
136 TCPOLEN_SACK_PERM);
137
138 if (options & XT_SYNPROXY_OPT_WSCALE)
139 *ptr++ = htonl((TCPOPT_NOP << 24) |
140 (TCPOPT_WINDOW << 16) |
141 (TCPOLEN_WINDOW << 8) |
142 opts->wscale);
143}
144EXPORT_SYMBOL_GPL(synproxy_build_options);
145
146void synproxy_init_timestamp_cookie(const struct xt_synproxy_info *info,
147 struct synproxy_options *opts)
148{
149 opts->tsecr = opts->tsval;
150 opts->tsval = tcp_time_stamp & ~0x3f;
151
152 if (opts->options & XT_SYNPROXY_OPT_WSCALE)
153 opts->tsval |= info->wscale;
154 else
155 opts->tsval |= 0xf;
156
157 if (opts->options & XT_SYNPROXY_OPT_SACK_PERM)
158 opts->tsval |= 1 << 4;
159
160 if (opts->options & XT_SYNPROXY_OPT_ECN)
161 opts->tsval |= 1 << 5;
162}
163EXPORT_SYMBOL_GPL(synproxy_init_timestamp_cookie);
164
165void synproxy_check_timestamp_cookie(struct synproxy_options *opts)
166{
167 opts->wscale = opts->tsecr & 0xf;
168 if (opts->wscale != 0xf)
169 opts->options |= XT_SYNPROXY_OPT_WSCALE;
170
171 opts->options |= opts->tsecr & (1 << 4) ? XT_SYNPROXY_OPT_SACK_PERM : 0;
172
173 opts->options |= opts->tsecr & (1 << 5) ? XT_SYNPROXY_OPT_ECN : 0;
174}
175EXPORT_SYMBOL_GPL(synproxy_check_timestamp_cookie);
176
177unsigned int synproxy_tstamp_adjust(struct sk_buff *skb,
178 unsigned int protoff,
179 struct tcphdr *th,
180 struct nf_conn *ct,
181 enum ip_conntrack_info ctinfo,
182 const struct nf_conn_synproxy *synproxy)
183{
184 unsigned int optoff, optend;
185 u32 *ptr, old;
186
187 if (synproxy->tsoff == 0)
188 return 1;
189
190 optoff = protoff + sizeof(struct tcphdr);
191 optend = protoff + th->doff * 4;
192
193 if (!skb_make_writable(skb, optend))
194 return 0;
195
196 while (optoff < optend) {
197 unsigned char *op = skb->data + optoff;
198
199 switch (op[0]) {
200 case TCPOPT_EOL:
201 return 1;
202 case TCPOPT_NOP:
203 optoff++;
204 continue;
205 default:
206 if (optoff + 1 == optend ||
207 optoff + op[1] > optend ||
208 op[1] < 2)
209 return 0;
210 if (op[0] == TCPOPT_TIMESTAMP &&
211 op[1] == TCPOLEN_TIMESTAMP) {
212 if (CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY) {
213 ptr = (u32 *)&op[2];
214 old = *ptr;
215 *ptr = htonl(ntohl(*ptr) -
216 synproxy->tsoff);
217 } else {
218 ptr = (u32 *)&op[6];
219 old = *ptr;
220 *ptr = htonl(ntohl(*ptr) +
221 synproxy->tsoff);
222 }
223 inet_proto_csum_replace4(&th->check, skb,
224 old, *ptr, 0);
225 return 1;
226 }
227 optoff += op[1];
228 }
229 }
230 return 1;
231}
232EXPORT_SYMBOL_GPL(synproxy_tstamp_adjust);
233
234static struct nf_ct_ext_type nf_ct_synproxy_extend __read_mostly = {
235 .len = sizeof(struct nf_conn_synproxy),
236 .align = __alignof__(struct nf_conn_synproxy),
237 .id = NF_CT_EXT_SYNPROXY,
238};
239
240#ifdef CONFIG_PROC_FS
241static void *synproxy_cpu_seq_start(struct seq_file *seq, loff_t *pos)
242{
243 struct synproxy_net *snet = synproxy_pernet(seq_file_net(seq));
244 int cpu;
245
246 if (*pos == 0)
247 return SEQ_START_TOKEN;
248
249 for (cpu = *pos - 1; cpu < nr_cpu_ids; cpu++) {
250 if (!cpu_possible(cpu))
251 continue;
252 *pos = cpu + 1;
253 return per_cpu_ptr(snet->stats, cpu);
254 }
255
256 return NULL;
257}
258
259static void *synproxy_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos)
260{
261 struct synproxy_net *snet = synproxy_pernet(seq_file_net(seq));
262 int cpu;
263
264 for (cpu = *pos; cpu < nr_cpu_ids; cpu++) {
265 if (!cpu_possible(cpu))
266 continue;
267 *pos = cpu + 1;
268 return per_cpu_ptr(snet->stats, cpu);
269 }
270
271 return NULL;
272}
273
274static void synproxy_cpu_seq_stop(struct seq_file *seq, void *v)
275{
276 return;
277}
278
279static int synproxy_cpu_seq_show(struct seq_file *seq, void *v)
280{
281 struct synproxy_stats *stats = v;
282
283 if (v == SEQ_START_TOKEN) {
284 seq_printf(seq, "entries\t\tsyn_received\t"
285 "cookie_invalid\tcookie_valid\t"
286 "cookie_retrans\tconn_reopened\n");
287 return 0;
288 }
289
290 seq_printf(seq, "%08x\t%08x\t%08x\t%08x\t%08x\t%08x\n", 0,
291 stats->syn_received,
292 stats->cookie_invalid,
293 stats->cookie_valid,
294 stats->cookie_retrans,
295 stats->conn_reopened);
296
297 return 0;
298}
299
300static const struct seq_operations synproxy_cpu_seq_ops = {
301 .start = synproxy_cpu_seq_start,
302 .next = synproxy_cpu_seq_next,
303 .stop = synproxy_cpu_seq_stop,
304 .show = synproxy_cpu_seq_show,
305};
306
307static int synproxy_cpu_seq_open(struct inode *inode, struct file *file)
308{
309 return seq_open_net(inode, file, &synproxy_cpu_seq_ops,
310 sizeof(struct seq_net_private));
311}
312
313static const struct file_operations synproxy_cpu_seq_fops = {
314 .owner = THIS_MODULE,
315 .open = synproxy_cpu_seq_open,
316 .read = seq_read,
317 .llseek = seq_lseek,
318 .release = seq_release_net,
319};
320
321static int __net_init synproxy_proc_init(struct net *net)
322{
323 if (!proc_create("synproxy", S_IRUGO, net->proc_net_stat,
324 &synproxy_cpu_seq_fops))
325 return -ENOMEM;
326 return 0;
327}
328
329static void __net_exit synproxy_proc_exit(struct net *net)
330{
331 remove_proc_entry("synproxy", net->proc_net_stat);
332}
333#else
334static int __net_init synproxy_proc_init(struct net *net)
335{
336 return 0;
337}
338
339static void __net_exit synproxy_proc_exit(struct net *net)
340{
341 return;
342}
343#endif /* CONFIG_PROC_FS */
344
345static int __net_init synproxy_net_init(struct net *net)
346{
347 struct synproxy_net *snet = synproxy_pernet(net);
348 struct nf_conntrack_tuple t;
349 struct nf_conn *ct;
350 int err = -ENOMEM;
351
352 memset(&t, 0, sizeof(t));
353 ct = nf_conntrack_alloc(net, 0, &t, &t, GFP_KERNEL);
354 if (IS_ERR(ct)) {
355 err = PTR_ERR(ct);
356 goto err1;
357 }
358
359 if (!nfct_seqadj_ext_add(ct))
360 goto err2;
361 if (!nfct_synproxy_ext_add(ct))
362 goto err2;
363 __set_bit(IPS_TEMPLATE_BIT, &ct->status);
364 __set_bit(IPS_CONFIRMED_BIT, &ct->status);
365
366 snet->tmpl = ct;
367
368 snet->stats = alloc_percpu(struct synproxy_stats);
369 if (snet->stats == NULL)
370 goto err2;
371
372 err = synproxy_proc_init(net);
373 if (err < 0)
374 goto err3;
375
376 return 0;
377
378err3:
379 free_percpu(snet->stats);
380err2:
381 nf_conntrack_free(ct);
382err1:
383 return err;
384}
385
386static void __net_exit synproxy_net_exit(struct net *net)
387{
388 struct synproxy_net *snet = synproxy_pernet(net);
389
390 nf_conntrack_free(snet->tmpl);
391 synproxy_proc_exit(net);
392 free_percpu(snet->stats);
393}
394
395static struct pernet_operations synproxy_net_ops = {
396 .init = synproxy_net_init,
397 .exit = synproxy_net_exit,
398 .id = &synproxy_net_id,
399 .size = sizeof(struct synproxy_net),
400};
401
402static int __init synproxy_core_init(void)
403{
404 int err;
405
406 err = nf_ct_extend_register(&nf_ct_synproxy_extend);
407 if (err < 0)
408 goto err1;
409
410 err = register_pernet_subsys(&synproxy_net_ops);
411 if (err < 0)
412 goto err2;
413
414 return 0;
415
416err2:
417 nf_ct_extend_unregister(&nf_ct_synproxy_extend);
418err1:
419 return err;
420}
421
422static void __exit synproxy_core_exit(void)
423{
424 unregister_pernet_subsys(&synproxy_net_ops);
425 nf_ct_extend_unregister(&nf_ct_synproxy_extend);
426}
427
428module_init(synproxy_core_init);
429module_exit(synproxy_core_exit);
430
431MODULE_LICENSE("GPL");
432MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
diff --git a/net/netfilter/nf_tproxy_core.c b/net/netfilter/nf_tproxy_core.c
deleted file mode 100644
index 474d621cbc2e..000000000000
--- a/net/netfilter/nf_tproxy_core.c
+++ /dev/null
@@ -1,62 +0,0 @@
1/*
2 * Transparent proxy support for Linux/iptables
3 *
4 * Copyright (c) 2006-2007 BalaBit IT Ltd.
5 * Author: Balazs Scheidler, Krisztian Kovacs
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 *
11 */
12
13#include <linux/module.h>
14
15#include <linux/net.h>
16#include <linux/if.h>
17#include <linux/netdevice.h>
18#include <net/udp.h>
19#include <net/netfilter/nf_tproxy_core.h>
20
21
22static void
23nf_tproxy_destructor(struct sk_buff *skb)
24{
25 struct sock *sk = skb->sk;
26
27 skb->sk = NULL;
28 skb->destructor = NULL;
29
30 if (sk)
31 sock_put(sk);
32}
33
34/* consumes sk */
35void
36nf_tproxy_assign_sock(struct sk_buff *skb, struct sock *sk)
37{
38 /* assigning tw sockets complicates things; most
39 * skb->sk->X checks would have to test sk->sk_state first */
40 if (sk->sk_state == TCP_TIME_WAIT) {
41 inet_twsk_put(inet_twsk(sk));
42 return;
43 }
44
45 skb_orphan(skb);
46 skb->sk = sk;
47 skb->destructor = nf_tproxy_destructor;
48}
49EXPORT_SYMBOL_GPL(nf_tproxy_assign_sock);
50
51static int __init nf_tproxy_init(void)
52{
53 pr_info("NF_TPROXY: Transparent proxy support initialized, version 4.1.0\n");
54 pr_info("NF_TPROXY: Copyright (c) 2006-2007 BalaBit IT Ltd.\n");
55 return 0;
56}
57
58module_init(nf_tproxy_init);
59
60MODULE_LICENSE("GPL");
61MODULE_AUTHOR("Krisztian Kovacs");
62MODULE_DESCRIPTION("Transparent proxy support core routines");
diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue_core.c
index 8a703c3dd318..95a98c8c1da6 100644
--- a/net/netfilter/nfnetlink_queue_core.c
+++ b/net/netfilter/nfnetlink_queue_core.c
@@ -862,6 +862,7 @@ static const struct nla_policy nfqa_verdict_policy[NFQA_MAX+1] = {
862 [NFQA_MARK] = { .type = NLA_U32 }, 862 [NFQA_MARK] = { .type = NLA_U32 },
863 [NFQA_PAYLOAD] = { .type = NLA_UNSPEC }, 863 [NFQA_PAYLOAD] = { .type = NLA_UNSPEC },
864 [NFQA_CT] = { .type = NLA_UNSPEC }, 864 [NFQA_CT] = { .type = NLA_UNSPEC },
865 [NFQA_EXP] = { .type = NLA_UNSPEC },
865}; 866};
866 867
867static const struct nla_policy nfqa_verdict_batch_policy[NFQA_MAX+1] = { 868static const struct nla_policy nfqa_verdict_batch_policy[NFQA_MAX+1] = {
@@ -990,9 +991,14 @@ nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb,
990 if (entry == NULL) 991 if (entry == NULL)
991 return -ENOENT; 992 return -ENOENT;
992 993
993 rcu_read_lock(); 994 if (nfqa[NFQA_CT]) {
994 if (nfqa[NFQA_CT] && (queue->flags & NFQA_CFG_F_CONNTRACK))
995 ct = nfqnl_ct_parse(entry->skb, nfqa[NFQA_CT], &ctinfo); 995 ct = nfqnl_ct_parse(entry->skb, nfqa[NFQA_CT], &ctinfo);
996 if (ct && nfqa[NFQA_EXP]) {
997 nfqnl_attach_expect(ct, nfqa[NFQA_EXP],
998 NETLINK_CB(skb).portid,
999 nlmsg_report(nlh));
1000 }
1001 }
996 1002
997 if (nfqa[NFQA_PAYLOAD]) { 1003 if (nfqa[NFQA_PAYLOAD]) {
998 u16 payload_len = nla_len(nfqa[NFQA_PAYLOAD]); 1004 u16 payload_len = nla_len(nfqa[NFQA_PAYLOAD]);
@@ -1005,7 +1011,6 @@ nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb,
1005 if (ct) 1011 if (ct)
1006 nfqnl_ct_seq_adjust(skb, ct, ctinfo, diff); 1012 nfqnl_ct_seq_adjust(skb, ct, ctinfo, diff);
1007 } 1013 }
1008 rcu_read_unlock();
1009 1014
1010 if (nfqa[NFQA_MARK]) 1015 if (nfqa[NFQA_MARK])
1011 entry->skb->mark = ntohl(nla_get_be32(nfqa[NFQA_MARK])); 1016 entry->skb->mark = ntohl(nla_get_be32(nfqa[NFQA_MARK]));
diff --git a/net/netfilter/nfnetlink_queue_ct.c b/net/netfilter/nfnetlink_queue_ct.c
index ab61d66bc0b9..96cac50e0d12 100644
--- a/net/netfilter/nfnetlink_queue_ct.c
+++ b/net/netfilter/nfnetlink_queue_ct.c
@@ -87,12 +87,27 @@ nla_put_failure:
87void nfqnl_ct_seq_adjust(struct sk_buff *skb, struct nf_conn *ct, 87void nfqnl_ct_seq_adjust(struct sk_buff *skb, struct nf_conn *ct,
88 enum ip_conntrack_info ctinfo, int diff) 88 enum ip_conntrack_info ctinfo, int diff)
89{ 89{
90 struct nfq_ct_nat_hook *nfq_nat_ct; 90 struct nfq_ct_hook *nfq_ct;
91 91
92 nfq_nat_ct = rcu_dereference(nfq_ct_nat_hook); 92 nfq_ct = rcu_dereference(nfq_ct_hook);
93 if (nfq_nat_ct == NULL) 93 if (nfq_ct == NULL)
94 return; 94 return;
95 95
96 if ((ct->status & IPS_NAT_MASK) && diff) 96 if ((ct->status & IPS_NAT_MASK) && diff)
97 nfq_nat_ct->seq_adjust(skb, ct, ctinfo, diff); 97 nfq_ct->seq_adjust(skb, ct, ctinfo, diff);
98}
99
100int nfqnl_attach_expect(struct nf_conn *ct, const struct nlattr *attr,
101 u32 portid, u32 report)
102{
103 struct nfq_ct_hook *nfq_ct;
104
105 if (nf_ct_is_untracked(ct))
106 return 0;
107
108 nfq_ct = rcu_dereference(nfq_ct_hook);
109 if (nfq_ct == NULL)
110 return -EOPNOTSUPP;
111
112 return nfq_ct->attach_expect(attr, ct, portid, report);
98} 113}
diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c
index 6113cc7efffc..cd24290f3b2f 100644
--- a/net/netfilter/xt_TCPMSS.c
+++ b/net/netfilter/xt_TCPMSS.c
@@ -60,7 +60,7 @@ tcpmss_mangle_packet(struct sk_buff *skb,
60 60
61 /* This is a fragment, no TCP header is available */ 61 /* This is a fragment, no TCP header is available */
62 if (par->fragoff != 0) 62 if (par->fragoff != 0)
63 return XT_CONTINUE; 63 return 0;
64 64
65 if (!skb_make_writable(skb, skb->len)) 65 if (!skb_make_writable(skb, skb->len))
66 return -1; 66 return -1;
diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c
index d7f195388f66..5d8a3a3cd5a7 100644
--- a/net/netfilter/xt_TPROXY.c
+++ b/net/netfilter/xt_TPROXY.c
@@ -15,7 +15,9 @@
15#include <linux/ip.h> 15#include <linux/ip.h>
16#include <net/checksum.h> 16#include <net/checksum.h>
17#include <net/udp.h> 17#include <net/udp.h>
18#include <net/tcp.h>
18#include <net/inet_sock.h> 19#include <net/inet_sock.h>
20#include <net/inet_hashtables.h>
19#include <linux/inetdevice.h> 21#include <linux/inetdevice.h>
20#include <linux/netfilter/x_tables.h> 22#include <linux/netfilter/x_tables.h>
21#include <linux/netfilter_ipv4/ip_tables.h> 23#include <linux/netfilter_ipv4/ip_tables.h>
@@ -26,13 +28,18 @@
26#define XT_TPROXY_HAVE_IPV6 1 28#define XT_TPROXY_HAVE_IPV6 1
27#include <net/if_inet6.h> 29#include <net/if_inet6.h>
28#include <net/addrconf.h> 30#include <net/addrconf.h>
31#include <net/inet6_hashtables.h>
29#include <linux/netfilter_ipv6/ip6_tables.h> 32#include <linux/netfilter_ipv6/ip6_tables.h>
30#include <net/netfilter/ipv6/nf_defrag_ipv6.h> 33#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
31#endif 34#endif
32 35
33#include <net/netfilter/nf_tproxy_core.h>
34#include <linux/netfilter/xt_TPROXY.h> 36#include <linux/netfilter/xt_TPROXY.h>
35 37
38enum nf_tproxy_lookup_t {
39 NFT_LOOKUP_LISTENER,
40 NFT_LOOKUP_ESTABLISHED,
41};
42
36static bool tproxy_sk_is_transparent(struct sock *sk) 43static bool tproxy_sk_is_transparent(struct sock *sk)
37{ 44{
38 if (sk->sk_state != TCP_TIME_WAIT) { 45 if (sk->sk_state != TCP_TIME_WAIT) {
@@ -68,6 +75,157 @@ tproxy_laddr4(struct sk_buff *skb, __be32 user_laddr, __be32 daddr)
68 return laddr ? laddr : daddr; 75 return laddr ? laddr : daddr;
69} 76}
70 77
78/*
79 * This is used when the user wants to intercept a connection matching
80 * an explicit iptables rule. In this case the sockets are assumed
81 * matching in preference order:
82 *
83 * - match: if there's a fully established connection matching the
84 * _packet_ tuple, it is returned, assuming the redirection
85 * already took place and we process a packet belonging to an
86 * established connection
87 *
88 * - match: if there's a listening socket matching the redirection
89 * (e.g. on-port & on-ip of the connection), it is returned,
90 * regardless if it was bound to 0.0.0.0 or an explicit
91 * address. The reasoning is that if there's an explicit rule, it
92 * does not really matter if the listener is bound to an interface
93 * or to 0. The user already stated that he wants redirection
94 * (since he added the rule).
95 *
96 * Please note that there's an overlap between what a TPROXY target
97 * and a socket match will match. Normally if you have both rules the
98 * "socket" match will be the first one, effectively all packets
99 * belonging to established connections going through that one.
100 */
101static inline struct sock *
102nf_tproxy_get_sock_v4(struct net *net, const u8 protocol,
103 const __be32 saddr, const __be32 daddr,
104 const __be16 sport, const __be16 dport,
105 const struct net_device *in,
106 const enum nf_tproxy_lookup_t lookup_type)
107{
108 struct sock *sk;
109
110 switch (protocol) {
111 case IPPROTO_TCP:
112 switch (lookup_type) {
113 case NFT_LOOKUP_LISTENER:
114 sk = inet_lookup_listener(net, &tcp_hashinfo,
115 saddr, sport,
116 daddr, dport,
117 in->ifindex);
118
119 /* NOTE: we return listeners even if bound to
120 * 0.0.0.0, those are filtered out in
121 * xt_socket, since xt_TPROXY needs 0 bound
122 * listeners too
123 */
124 break;
125 case NFT_LOOKUP_ESTABLISHED:
126 sk = inet_lookup_established(net, &tcp_hashinfo,
127 saddr, sport, daddr, dport,
128 in->ifindex);
129 break;
130 default:
131 BUG();
132 }
133 break;
134 case IPPROTO_UDP:
135 sk = udp4_lib_lookup(net, saddr, sport, daddr, dport,
136 in->ifindex);
137 if (sk) {
138 int connected = (sk->sk_state == TCP_ESTABLISHED);
139 int wildcard = (inet_sk(sk)->inet_rcv_saddr == 0);
140
141 /* NOTE: we return listeners even if bound to
142 * 0.0.0.0, those are filtered out in
143 * xt_socket, since xt_TPROXY needs 0 bound
144 * listeners too
145 */
146 if ((lookup_type == NFT_LOOKUP_ESTABLISHED && (!connected || wildcard)) ||
147 (lookup_type == NFT_LOOKUP_LISTENER && connected)) {
148 sock_put(sk);
149 sk = NULL;
150 }
151 }
152 break;
153 default:
154 WARN_ON(1);
155 sk = NULL;
156 }
157
158 pr_debug("tproxy socket lookup: proto %u %08x:%u -> %08x:%u, lookup type: %d, sock %p\n",
159 protocol, ntohl(saddr), ntohs(sport), ntohl(daddr), ntohs(dport), lookup_type, sk);
160
161 return sk;
162}
163
164#ifdef XT_TPROXY_HAVE_IPV6
165static inline struct sock *
166nf_tproxy_get_sock_v6(struct net *net, const u8 protocol,
167 const struct in6_addr *saddr, const struct in6_addr *daddr,
168 const __be16 sport, const __be16 dport,
169 const struct net_device *in,
170 const enum nf_tproxy_lookup_t lookup_type)
171{
172 struct sock *sk;
173
174 switch (protocol) {
175 case IPPROTO_TCP:
176 switch (lookup_type) {
177 case NFT_LOOKUP_LISTENER:
178 sk = inet6_lookup_listener(net, &tcp_hashinfo,
179 saddr, sport,
180 daddr, ntohs(dport),
181 in->ifindex);
182
183 /* NOTE: we return listeners even if bound to
184 * 0.0.0.0, those are filtered out in
185 * xt_socket, since xt_TPROXY needs 0 bound
186 * listeners too
187 */
188 break;
189 case NFT_LOOKUP_ESTABLISHED:
190 sk = __inet6_lookup_established(net, &tcp_hashinfo,
191 saddr, sport, daddr, ntohs(dport),
192 in->ifindex);
193 break;
194 default:
195 BUG();
196 }
197 break;
198 case IPPROTO_UDP:
199 sk = udp6_lib_lookup(net, saddr, sport, daddr, dport,
200 in->ifindex);
201 if (sk) {
202 int connected = (sk->sk_state == TCP_ESTABLISHED);
203 int wildcard = ipv6_addr_any(&inet6_sk(sk)->rcv_saddr);
204
205 /* NOTE: we return listeners even if bound to
206 * 0.0.0.0, those are filtered out in
207 * xt_socket, since xt_TPROXY needs 0 bound
208 * listeners too
209 */
210 if ((lookup_type == NFT_LOOKUP_ESTABLISHED && (!connected || wildcard)) ||
211 (lookup_type == NFT_LOOKUP_LISTENER && connected)) {
212 sock_put(sk);
213 sk = NULL;
214 }
215 }
216 break;
217 default:
218 WARN_ON(1);
219 sk = NULL;
220 }
221
222 pr_debug("tproxy socket lookup: proto %u %pI6:%u -> %pI6:%u, lookup type: %d, sock %p\n",
223 protocol, saddr, ntohs(sport), daddr, ntohs(dport), lookup_type, sk);
224
225 return sk;
226}
227#endif
228
71/** 229/**
72 * tproxy_handle_time_wait4 - handle IPv4 TCP TIME_WAIT reopen redirections 230 * tproxy_handle_time_wait4 - handle IPv4 TCP TIME_WAIT reopen redirections
73 * @skb: The skb being processed. 231 * @skb: The skb being processed.
@@ -117,6 +275,15 @@ tproxy_handle_time_wait4(struct sk_buff *skb, __be32 laddr, __be16 lport,
117 return sk; 275 return sk;
118} 276}
119 277
278/* assign a socket to the skb -- consumes sk */
279static void
280nf_tproxy_assign_sock(struct sk_buff *skb, struct sock *sk)
281{
282 skb_orphan(skb);
283 skb->sk = sk;
284 skb->destructor = sock_edemux;
285}
286
120static unsigned int 287static unsigned int
121tproxy_tg4(struct sk_buff *skb, __be32 laddr, __be16 lport, 288tproxy_tg4(struct sk_buff *skb, __be32 laddr, __be16 lport,
122 u_int32_t mark_mask, u_int32_t mark_value) 289 u_int32_t mark_mask, u_int32_t mark_value)
diff --git a/net/netfilter/xt_addrtype.c b/net/netfilter/xt_addrtype.c
index 68ff29f60867..fab6eea1bf38 100644
--- a/net/netfilter/xt_addrtype.c
+++ b/net/netfilter/xt_addrtype.c
@@ -202,7 +202,7 @@ static int addrtype_mt_checkentry_v1(const struct xt_mtchk_param *par)
202 return -EINVAL; 202 return -EINVAL;
203 } 203 }
204 if ((info->source | info->dest) >= XT_ADDRTYPE_PROHIBIT) { 204 if ((info->source | info->dest) >= XT_ADDRTYPE_PROHIBIT) {
205 pr_err("ipv6 PROHIBT (THROW, NAT ..) matching not supported\n"); 205 pr_err("ipv6 PROHIBIT (THROW, NAT ..) matching not supported\n");
206 return -EINVAL; 206 return -EINVAL;
207 } 207 }
208 if ((info->source | info->dest) & XT_ADDRTYPE_BROADCAST) { 208 if ((info->source | info->dest) & XT_ADDRTYPE_BROADCAST) {
diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c
index 20b15916f403..06df2b9110f5 100644
--- a/net/netfilter/xt_socket.c
+++ b/net/netfilter/xt_socket.c
@@ -19,12 +19,12 @@
19#include <net/icmp.h> 19#include <net/icmp.h>
20#include <net/sock.h> 20#include <net/sock.h>
21#include <net/inet_sock.h> 21#include <net/inet_sock.h>
22#include <net/netfilter/nf_tproxy_core.h>
23#include <net/netfilter/ipv4/nf_defrag_ipv4.h> 22#include <net/netfilter/ipv4/nf_defrag_ipv4.h>
24 23
25#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) 24#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
26#define XT_SOCKET_HAVE_IPV6 1 25#define XT_SOCKET_HAVE_IPV6 1
27#include <linux/netfilter_ipv6/ip6_tables.h> 26#include <linux/netfilter_ipv6/ip6_tables.h>
27#include <net/inet6_hashtables.h>
28#include <net/netfilter/ipv6/nf_defrag_ipv6.h> 28#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
29#endif 29#endif
30 30
@@ -101,6 +101,43 @@ extract_icmp4_fields(const struct sk_buff *skb,
101 return 0; 101 return 0;
102} 102}
103 103
104/* "socket" match based redirection (no specific rule)
105 * ===================================================
106 *
107 * There are connections with dynamic endpoints (e.g. FTP data
108 * connection) that the user is unable to add explicit rules
109 * for. These are taken care of by a generic "socket" rule. It is
110 * assumed that the proxy application is trusted to open such
111 * connections without explicit iptables rule (except of course the
112 * generic 'socket' rule). In this case the following sockets are
113 * matched in preference order:
114 *
115 * - match: if there's a fully established connection matching the
116 * _packet_ tuple
117 *
118 * - match: if there's a non-zero bound listener (possibly with a
119 * non-local address) We don't accept zero-bound listeners, since
120 * then local services could intercept traffic going through the
121 * box.
122 */
123static struct sock *
124xt_socket_get_sock_v4(struct net *net, const u8 protocol,
125 const __be32 saddr, const __be32 daddr,
126 const __be16 sport, const __be16 dport,
127 const struct net_device *in)
128{
129 switch (protocol) {
130 case IPPROTO_TCP:
131 return __inet_lookup(net, &tcp_hashinfo,
132 saddr, sport, daddr, dport,
133 in->ifindex);
134 case IPPROTO_UDP:
135 return udp4_lib_lookup(net, saddr, sport, daddr, dport,
136 in->ifindex);
137 }
138 return NULL;
139}
140
104static bool 141static bool
105socket_match(const struct sk_buff *skb, struct xt_action_param *par, 142socket_match(const struct sk_buff *skb, struct xt_action_param *par,
106 const struct xt_socket_mtinfo1 *info) 143 const struct xt_socket_mtinfo1 *info)
@@ -156,9 +193,9 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par,
156#endif 193#endif
157 194
158 if (!sk) 195 if (!sk)
159 sk = nf_tproxy_get_sock_v4(dev_net(skb->dev), protocol, 196 sk = xt_socket_get_sock_v4(dev_net(skb->dev), protocol,
160 saddr, daddr, sport, dport, 197 saddr, daddr, sport, dport,
161 par->in, NFT_LOOKUP_ANY); 198 par->in);
162 if (sk) { 199 if (sk) {
163 bool wildcard; 200 bool wildcard;
164 bool transparent = true; 201 bool transparent = true;
@@ -265,6 +302,25 @@ extract_icmp6_fields(const struct sk_buff *skb,
265 return 0; 302 return 0;
266} 303}
267 304
305static struct sock *
306xt_socket_get_sock_v6(struct net *net, const u8 protocol,
307 const struct in6_addr *saddr, const struct in6_addr *daddr,
308 const __be16 sport, const __be16 dport,
309 const struct net_device *in)
310{
311 switch (protocol) {
312 case IPPROTO_TCP:
313 return inet6_lookup(net, &tcp_hashinfo,
314 saddr, sport, daddr, dport,
315 in->ifindex);
316 case IPPROTO_UDP:
317 return udp6_lib_lookup(net, saddr, sport, daddr, dport,
318 in->ifindex);
319 }
320
321 return NULL;
322}
323
268static bool 324static bool
269socket_mt6_v1_v2(const struct sk_buff *skb, struct xt_action_param *par) 325socket_mt6_v1_v2(const struct sk_buff *skb, struct xt_action_param *par)
270{ 326{
@@ -302,9 +358,9 @@ socket_mt6_v1_v2(const struct sk_buff *skb, struct xt_action_param *par)
302 } 358 }
303 359
304 if (!sk) 360 if (!sk)
305 sk = nf_tproxy_get_sock_v6(dev_net(skb->dev), tproto, 361 sk = xt_socket_get_sock_v6(dev_net(skb->dev), tproto,
306 saddr, daddr, sport, dport, 362 saddr, daddr, sport, dport,
307 par->in, NFT_LOOKUP_ANY); 363 par->in);
308 if (sk) { 364 if (sk) {
309 bool wildcard; 365 bool wildcard;
310 bool transparent = true; 366 bool transparent = true;
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 0c61b59175dc..8df7f64c6db3 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -168,16 +168,43 @@ int netlink_remove_tap(struct netlink_tap *nt)
168} 168}
169EXPORT_SYMBOL_GPL(netlink_remove_tap); 169EXPORT_SYMBOL_GPL(netlink_remove_tap);
170 170
171static bool netlink_filter_tap(const struct sk_buff *skb)
172{
173 struct sock *sk = skb->sk;
174 bool pass = false;
175
176 /* We take the more conservative approach and
177 * whitelist socket protocols that may pass.
178 */
179 switch (sk->sk_protocol) {
180 case NETLINK_ROUTE:
181 case NETLINK_USERSOCK:
182 case NETLINK_SOCK_DIAG:
183 case NETLINK_NFLOG:
184 case NETLINK_XFRM:
185 case NETLINK_FIB_LOOKUP:
186 case NETLINK_NETFILTER:
187 case NETLINK_GENERIC:
188 pass = true;
189 break;
190 }
191
192 return pass;
193}
194
171static int __netlink_deliver_tap_skb(struct sk_buff *skb, 195static int __netlink_deliver_tap_skb(struct sk_buff *skb,
172 struct net_device *dev) 196 struct net_device *dev)
173{ 197{
174 struct sk_buff *nskb; 198 struct sk_buff *nskb;
199 struct sock *sk = skb->sk;
175 int ret = -ENOMEM; 200 int ret = -ENOMEM;
176 201
177 dev_hold(dev); 202 dev_hold(dev);
178 nskb = skb_clone(skb, GFP_ATOMIC); 203 nskb = skb_clone(skb, GFP_ATOMIC);
179 if (nskb) { 204 if (nskb) {
180 nskb->dev = dev; 205 nskb->dev = dev;
206 nskb->protocol = htons((u16) sk->sk_protocol);
207
181 ret = dev_queue_xmit(nskb); 208 ret = dev_queue_xmit(nskb);
182 if (unlikely(ret > 0)) 209 if (unlikely(ret > 0))
183 ret = net_xmit_errno(ret); 210 ret = net_xmit_errno(ret);
@@ -192,6 +219,9 @@ static void __netlink_deliver_tap(struct sk_buff *skb)
192 int ret; 219 int ret;
193 struct netlink_tap *tmp; 220 struct netlink_tap *tmp;
194 221
222 if (!netlink_filter_tap(skb))
223 return;
224
195 list_for_each_entry_rcu(tmp, &netlink_tap_all, list) { 225 list_for_each_entry_rcu(tmp, &netlink_tap_all, list) {
196 ret = __netlink_deliver_tap_skb(skb, tmp->dev); 226 ret = __netlink_deliver_tap_skb(skb, tmp->dev);
197 if (unlikely(ret)) 227 if (unlikely(ret))
@@ -294,14 +324,14 @@ static void **alloc_pg_vec(struct netlink_sock *nlk,
294{ 324{
295 unsigned int block_nr = req->nm_block_nr; 325 unsigned int block_nr = req->nm_block_nr;
296 unsigned int i; 326 unsigned int i;
297 void **pg_vec, *ptr; 327 void **pg_vec;
298 328
299 pg_vec = kcalloc(block_nr, sizeof(void *), GFP_KERNEL); 329 pg_vec = kcalloc(block_nr, sizeof(void *), GFP_KERNEL);
300 if (pg_vec == NULL) 330 if (pg_vec == NULL)
301 return NULL; 331 return NULL;
302 332
303 for (i = 0; i < block_nr; i++) { 333 for (i = 0; i < block_nr; i++) {
304 pg_vec[i] = ptr = alloc_one_pg_vec_page(order); 334 pg_vec[i] = alloc_one_pg_vec_page(order);
305 if (pg_vec[i] == NULL) 335 if (pg_vec[i] == NULL)
306 goto err1; 336 goto err1;
307 } 337 }
@@ -595,7 +625,7 @@ static unsigned int netlink_poll(struct file *file, struct socket *sock,
595 * for dumps is performed here. A dump is allowed to continue 625 * for dumps is performed here. A dump is allowed to continue
596 * if at least half the ring is unused. 626 * if at least half the ring is unused.
597 */ 627 */
598 while (nlk->cb != NULL && netlink_dump_space(nlk)) { 628 while (nlk->cb_running && netlink_dump_space(nlk)) {
599 err = netlink_dump(sk); 629 err = netlink_dump(sk);
600 if (err < 0) { 630 if (err < 0) {
601 sk->sk_err = err; 631 sk->sk_err = err;
@@ -802,18 +832,6 @@ static void netlink_ring_set_copied(struct sock *sk, struct sk_buff *skb)
802#define netlink_mmap_sendmsg(sk, msg, dst_portid, dst_group, siocb) 0 832#define netlink_mmap_sendmsg(sk, msg, dst_portid, dst_group, siocb) 0
803#endif /* CONFIG_NETLINK_MMAP */ 833#endif /* CONFIG_NETLINK_MMAP */
804 834
805static void netlink_destroy_callback(struct netlink_callback *cb)
806{
807 kfree_skb(cb->skb);
808 kfree(cb);
809}
810
811static void netlink_consume_callback(struct netlink_callback *cb)
812{
813 consume_skb(cb->skb);
814 kfree(cb);
815}
816
817static void netlink_skb_destructor(struct sk_buff *skb) 835static void netlink_skb_destructor(struct sk_buff *skb)
818{ 836{
819#ifdef CONFIG_NETLINK_MMAP 837#ifdef CONFIG_NETLINK_MMAP
@@ -872,12 +890,12 @@ static void netlink_sock_destruct(struct sock *sk)
872{ 890{
873 struct netlink_sock *nlk = nlk_sk(sk); 891 struct netlink_sock *nlk = nlk_sk(sk);
874 892
875 if (nlk->cb) { 893 if (nlk->cb_running) {
876 if (nlk->cb->done) 894 if (nlk->cb.done)
877 nlk->cb->done(nlk->cb); 895 nlk->cb.done(&nlk->cb);
878 896
879 module_put(nlk->cb->module); 897 module_put(nlk->cb.module);
880 netlink_destroy_callback(nlk->cb); 898 kfree_skb(nlk->cb.skb);
881 } 899 }
882 900
883 skb_queue_purge(&sk->sk_receive_queue); 901 skb_queue_purge(&sk->sk_receive_queue);
@@ -2350,7 +2368,8 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock,
2350 2368
2351 skb_free_datagram(sk, skb); 2369 skb_free_datagram(sk, skb);
2352 2370
2353 if (nlk->cb && atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf / 2) { 2371 if (nlk->cb_running &&
2372 atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf / 2) {
2354 ret = netlink_dump(sk); 2373 ret = netlink_dump(sk);
2355 if (ret) { 2374 if (ret) {
2356 sk->sk_err = ret; 2375 sk->sk_err = ret;
@@ -2566,13 +2585,12 @@ static int netlink_dump(struct sock *sk)
2566 int alloc_size; 2585 int alloc_size;
2567 2586
2568 mutex_lock(nlk->cb_mutex); 2587 mutex_lock(nlk->cb_mutex);
2569 2588 if (!nlk->cb_running) {
2570 cb = nlk->cb;
2571 if (cb == NULL) {
2572 err = -EINVAL; 2589 err = -EINVAL;
2573 goto errout_skb; 2590 goto errout_skb;
2574 } 2591 }
2575 2592
2593 cb = &nlk->cb;
2576 alloc_size = max_t(int, cb->min_dump_alloc, NLMSG_GOODSIZE); 2594 alloc_size = max_t(int, cb->min_dump_alloc, NLMSG_GOODSIZE);
2577 2595
2578 if (!netlink_rx_is_mmaped(sk) && 2596 if (!netlink_rx_is_mmaped(sk) &&
@@ -2610,11 +2628,11 @@ static int netlink_dump(struct sock *sk)
2610 2628
2611 if (cb->done) 2629 if (cb->done)
2612 cb->done(cb); 2630 cb->done(cb);
2613 nlk->cb = NULL;
2614 mutex_unlock(nlk->cb_mutex);
2615 2631
2632 nlk->cb_running = false;
2633 mutex_unlock(nlk->cb_mutex);
2616 module_put(cb->module); 2634 module_put(cb->module);
2617 netlink_consume_callback(cb); 2635 consume_skb(cb->skb);
2618 return 0; 2636 return 0;
2619 2637
2620errout_skb: 2638errout_skb:
@@ -2632,59 +2650,51 @@ int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
2632 struct netlink_sock *nlk; 2650 struct netlink_sock *nlk;
2633 int ret; 2651 int ret;
2634 2652
2635 cb = kzalloc(sizeof(*cb), GFP_KERNEL);
2636 if (cb == NULL)
2637 return -ENOBUFS;
2638
2639 /* Memory mapped dump requests need to be copied to avoid looping 2653 /* Memory mapped dump requests need to be copied to avoid looping
2640 * on the pending state in netlink_mmap_sendmsg() while the CB hold 2654 * on the pending state in netlink_mmap_sendmsg() while the CB hold
2641 * a reference to the skb. 2655 * a reference to the skb.
2642 */ 2656 */
2643 if (netlink_skb_is_mmaped(skb)) { 2657 if (netlink_skb_is_mmaped(skb)) {
2644 skb = skb_copy(skb, GFP_KERNEL); 2658 skb = skb_copy(skb, GFP_KERNEL);
2645 if (skb == NULL) { 2659 if (skb == NULL)
2646 kfree(cb);
2647 return -ENOBUFS; 2660 return -ENOBUFS;
2648 }
2649 } else 2661 } else
2650 atomic_inc(&skb->users); 2662 atomic_inc(&skb->users);
2651 2663
2652 cb->dump = control->dump;
2653 cb->done = control->done;
2654 cb->nlh = nlh;
2655 cb->data = control->data;
2656 cb->module = control->module;
2657 cb->min_dump_alloc = control->min_dump_alloc;
2658 cb->skb = skb;
2659
2660 sk = netlink_lookup(sock_net(ssk), ssk->sk_protocol, NETLINK_CB(skb).portid); 2664 sk = netlink_lookup(sock_net(ssk), ssk->sk_protocol, NETLINK_CB(skb).portid);
2661 if (sk == NULL) { 2665 if (sk == NULL) {
2662 netlink_destroy_callback(cb); 2666 ret = -ECONNREFUSED;
2663 return -ECONNREFUSED; 2667 goto error_free;
2664 } 2668 }
2665 nlk = nlk_sk(sk);
2666 2669
2670 nlk = nlk_sk(sk);
2667 mutex_lock(nlk->cb_mutex); 2671 mutex_lock(nlk->cb_mutex);
2668 /* A dump is in progress... */ 2672 /* A dump is in progress... */
2669 if (nlk->cb) { 2673 if (nlk->cb_running) {
2670 mutex_unlock(nlk->cb_mutex);
2671 netlink_destroy_callback(cb);
2672 ret = -EBUSY; 2674 ret = -EBUSY;
2673 goto out; 2675 goto error_unlock;
2674 } 2676 }
2675 /* add reference of module which cb->dump belongs to */ 2677 /* add reference of module which cb->dump belongs to */
2676 if (!try_module_get(cb->module)) { 2678 if (!try_module_get(control->module)) {
2677 mutex_unlock(nlk->cb_mutex);
2678 netlink_destroy_callback(cb);
2679 ret = -EPROTONOSUPPORT; 2679 ret = -EPROTONOSUPPORT;
2680 goto out; 2680 goto error_unlock;
2681 } 2681 }
2682 2682
2683 nlk->cb = cb; 2683 cb = &nlk->cb;
2684 memset(cb, 0, sizeof(*cb));
2685 cb->dump = control->dump;
2686 cb->done = control->done;
2687 cb->nlh = nlh;
2688 cb->data = control->data;
2689 cb->module = control->module;
2690 cb->min_dump_alloc = control->min_dump_alloc;
2691 cb->skb = skb;
2692
2693 nlk->cb_running = true;
2694
2684 mutex_unlock(nlk->cb_mutex); 2695 mutex_unlock(nlk->cb_mutex);
2685 2696
2686 ret = netlink_dump(sk); 2697 ret = netlink_dump(sk);
2687out:
2688 sock_put(sk); 2698 sock_put(sk);
2689 2699
2690 if (ret) 2700 if (ret)
@@ -2694,6 +2704,13 @@ out:
2694 * signal not to send ACK even if it was requested. 2704 * signal not to send ACK even if it was requested.
2695 */ 2705 */
2696 return -EINTR; 2706 return -EINTR;
2707
2708error_unlock:
2709 sock_put(sk);
2710 mutex_unlock(nlk->cb_mutex);
2711error_free:
2712 kfree_skb(skb);
2713 return ret;
2697} 2714}
2698EXPORT_SYMBOL(__netlink_dump_start); 2715EXPORT_SYMBOL(__netlink_dump_start);
2699 2716
@@ -2916,14 +2933,14 @@ static int netlink_seq_show(struct seq_file *seq, void *v)
2916 struct sock *s = v; 2933 struct sock *s = v;
2917 struct netlink_sock *nlk = nlk_sk(s); 2934 struct netlink_sock *nlk = nlk_sk(s);
2918 2935
2919 seq_printf(seq, "%pK %-3d %-6u %08x %-8d %-8d %pK %-8d %-8d %-8lu\n", 2936 seq_printf(seq, "%pK %-3d %-6u %08x %-8d %-8d %d %-8d %-8d %-8lu\n",
2920 s, 2937 s,
2921 s->sk_protocol, 2938 s->sk_protocol,
2922 nlk->portid, 2939 nlk->portid,
2923 nlk->groups ? (u32)nlk->groups[0] : 0, 2940 nlk->groups ? (u32)nlk->groups[0] : 0,
2924 sk_rmem_alloc_get(s), 2941 sk_rmem_alloc_get(s),
2925 sk_wmem_alloc_get(s), 2942 sk_wmem_alloc_get(s),
2926 nlk->cb, 2943 nlk->cb_running,
2927 atomic_read(&s->sk_refcnt), 2944 atomic_read(&s->sk_refcnt),
2928 atomic_read(&s->sk_drops), 2945 atomic_read(&s->sk_drops),
2929 sock_i_ino(s) 2946 sock_i_ino(s)
diff --git a/net/netlink/af_netlink.h b/net/netlink/af_netlink.h
index eaa88d187cdc..acbd774eeb7c 100644
--- a/net/netlink/af_netlink.h
+++ b/net/netlink/af_netlink.h
@@ -32,7 +32,8 @@ struct netlink_sock {
32 unsigned long *groups; 32 unsigned long *groups;
33 unsigned long state; 33 unsigned long state;
34 wait_queue_head_t wait; 34 wait_queue_head_t wait;
35 struct netlink_callback *cb; 35 bool cb_running;
36 struct netlink_callback cb;
36 struct mutex *cb_mutex; 37 struct mutex *cb_mutex;
37 struct mutex cb_def_mutex; 38 struct mutex cb_def_mutex;
38 void (*netlink_rcv)(struct sk_buff *skb); 39 void (*netlink_rcv)(struct sk_buff *skb);
diff --git a/net/nfc/core.c b/net/nfc/core.c
index 1d074dd1650f..e92923cf3e03 100644
--- a/net/nfc/core.c
+++ b/net/nfc/core.c
@@ -77,11 +77,19 @@ error:
77 return rc; 77 return rc;
78} 78}
79 79
80int nfc_fw_download_done(struct nfc_dev *dev, const char *firmware_name) 80/**
81 * nfc_fw_download_done - inform that a firmware download was completed
82 *
83 * @dev: The nfc device to which firmware was downloaded
84 * @firmware_name: The firmware filename
85 * @result: The positive value of a standard errno value
86 */
87int nfc_fw_download_done(struct nfc_dev *dev, const char *firmware_name,
88 u32 result)
81{ 89{
82 dev->fw_download_in_progress = false; 90 dev->fw_download_in_progress = false;
83 91
84 return nfc_genl_fw_download_done(dev, firmware_name); 92 return nfc_genl_fw_download_done(dev, firmware_name, result);
85} 93}
86EXPORT_SYMBOL(nfc_fw_download_done); 94EXPORT_SYMBOL(nfc_fw_download_done);
87 95
@@ -129,7 +137,7 @@ int nfc_dev_up(struct nfc_dev *dev)
129 /* We have to enable the device before discovering SEs */ 137 /* We have to enable the device before discovering SEs */
130 if (dev->ops->discover_se) { 138 if (dev->ops->discover_se) {
131 rc = dev->ops->discover_se(dev); 139 rc = dev->ops->discover_se(dev);
132 if (!rc) 140 if (rc)
133 pr_warn("SE discovery failed\n"); 141 pr_warn("SE discovery failed\n");
134 } 142 }
135 143
@@ -575,12 +583,14 @@ int nfc_enable_se(struct nfc_dev *dev, u32 se_idx)
575 goto error; 583 goto error;
576 } 584 }
577 585
578 if (se->type == NFC_SE_ENABLED) { 586 if (se->state == NFC_SE_ENABLED) {
579 rc = -EALREADY; 587 rc = -EALREADY;
580 goto error; 588 goto error;
581 } 589 }
582 590
583 rc = dev->ops->enable_se(dev, se_idx); 591 rc = dev->ops->enable_se(dev, se_idx);
592 if (rc >= 0)
593 se->state = NFC_SE_ENABLED;
584 594
585error: 595error:
586 device_unlock(&dev->dev); 596 device_unlock(&dev->dev);
@@ -618,12 +628,14 @@ int nfc_disable_se(struct nfc_dev *dev, u32 se_idx)
618 goto error; 628 goto error;
619 } 629 }
620 630
621 if (se->type == NFC_SE_DISABLED) { 631 if (se->state == NFC_SE_DISABLED) {
622 rc = -EALREADY; 632 rc = -EALREADY;
623 goto error; 633 goto error;
624 } 634 }
625 635
626 rc = dev->ops->disable_se(dev, se_idx); 636 rc = dev->ops->disable_se(dev, se_idx);
637 if (rc >= 0)
638 se->state = NFC_SE_DISABLED;
627 639
628error: 640error:
629 device_unlock(&dev->dev); 641 device_unlock(&dev->dev);
diff --git a/net/nfc/hci/core.c b/net/nfc/hci/core.c
index fe66908401f5..d07ca4c5cf8c 100644
--- a/net/nfc/hci/core.c
+++ b/net/nfc/hci/core.c
@@ -717,7 +717,7 @@ static int hci_disable_se(struct nfc_dev *nfc_dev, u32 se_idx)
717 struct nfc_hci_dev *hdev = nfc_get_drvdata(nfc_dev); 717 struct nfc_hci_dev *hdev = nfc_get_drvdata(nfc_dev);
718 718
719 if (hdev->ops->disable_se) 719 if (hdev->ops->disable_se)
720 return hdev->ops->enable_se(hdev, se_idx); 720 return hdev->ops->disable_se(hdev, se_idx);
721 721
722 return 0; 722 return 0;
723} 723}
diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c
index f16fd59d4160..68063b2025da 100644
--- a/net/nfc/netlink.c
+++ b/net/nfc/netlink.c
@@ -1114,7 +1114,8 @@ static int nfc_genl_fw_download(struct sk_buff *skb, struct genl_info *info)
1114 return rc; 1114 return rc;
1115} 1115}
1116 1116
1117int nfc_genl_fw_download_done(struct nfc_dev *dev, const char *firmware_name) 1117int nfc_genl_fw_download_done(struct nfc_dev *dev, const char *firmware_name,
1118 u32 result)
1118{ 1119{
1119 struct sk_buff *msg; 1120 struct sk_buff *msg;
1120 void *hdr; 1121 void *hdr;
@@ -1129,6 +1130,7 @@ int nfc_genl_fw_download_done(struct nfc_dev *dev, const char *firmware_name)
1129 goto free_msg; 1130 goto free_msg;
1130 1131
1131 if (nla_put_string(msg, NFC_ATTR_FIRMWARE_NAME, firmware_name) || 1132 if (nla_put_string(msg, NFC_ATTR_FIRMWARE_NAME, firmware_name) ||
1133 nla_put_u32(msg, NFC_ATTR_FIRMWARE_DOWNLOAD_STATUS, result) ||
1132 nla_put_u32(msg, NFC_ATTR_DEVICE_INDEX, dev->idx)) 1134 nla_put_u32(msg, NFC_ATTR_DEVICE_INDEX, dev->idx))
1133 goto nla_put_failure; 1135 goto nla_put_failure;
1134 1136
@@ -1191,6 +1193,91 @@ static int nfc_genl_disable_se(struct sk_buff *skb, struct genl_info *info)
1191 return rc; 1193 return rc;
1192} 1194}
1193 1195
1196static int nfc_genl_send_se(struct sk_buff *msg, struct nfc_dev *dev,
1197 u32 portid, u32 seq,
1198 struct netlink_callback *cb,
1199 int flags)
1200{
1201 void *hdr;
1202 struct nfc_se *se, *n;
1203
1204 list_for_each_entry_safe(se, n, &dev->secure_elements, list) {
1205 hdr = genlmsg_put(msg, portid, seq, &nfc_genl_family, flags,
1206 NFC_CMD_GET_SE);
1207 if (!hdr)
1208 goto nla_put_failure;
1209
1210 if (cb)
1211 genl_dump_check_consistent(cb, hdr, &nfc_genl_family);
1212
1213 if (nla_put_u32(msg, NFC_ATTR_DEVICE_INDEX, dev->idx) ||
1214 nla_put_u32(msg, NFC_ATTR_SE_INDEX, se->idx) ||
1215 nla_put_u8(msg, NFC_ATTR_SE_TYPE, se->type))
1216 goto nla_put_failure;
1217
1218 if (genlmsg_end(msg, hdr) < 0)
1219 goto nla_put_failure;
1220 }
1221
1222 return 0;
1223
1224nla_put_failure:
1225 genlmsg_cancel(msg, hdr);
1226 return -EMSGSIZE;
1227}
1228
1229static int nfc_genl_dump_ses(struct sk_buff *skb,
1230 struct netlink_callback *cb)
1231{
1232 struct class_dev_iter *iter = (struct class_dev_iter *) cb->args[0];
1233 struct nfc_dev *dev = (struct nfc_dev *) cb->args[1];
1234 bool first_call = false;
1235
1236 if (!iter) {
1237 first_call = true;
1238 iter = kmalloc(sizeof(struct class_dev_iter), GFP_KERNEL);
1239 if (!iter)
1240 return -ENOMEM;
1241 cb->args[0] = (long) iter;
1242 }
1243
1244 mutex_lock(&nfc_devlist_mutex);
1245
1246 cb->seq = nfc_devlist_generation;
1247
1248 if (first_call) {
1249 nfc_device_iter_init(iter);
1250 dev = nfc_device_iter_next(iter);
1251 }
1252
1253 while (dev) {
1254 int rc;
1255
1256 rc = nfc_genl_send_se(skb, dev, NETLINK_CB(cb->skb).portid,
1257 cb->nlh->nlmsg_seq, cb, NLM_F_MULTI);
1258 if (rc < 0)
1259 break;
1260
1261 dev = nfc_device_iter_next(iter);
1262 }
1263
1264 mutex_unlock(&nfc_devlist_mutex);
1265
1266 cb->args[1] = (long) dev;
1267
1268 return skb->len;
1269}
1270
1271static int nfc_genl_dump_ses_done(struct netlink_callback *cb)
1272{
1273 struct class_dev_iter *iter = (struct class_dev_iter *) cb->args[0];
1274
1275 nfc_device_iter_exit(iter);
1276 kfree(iter);
1277
1278 return 0;
1279}
1280
1194static struct genl_ops nfc_genl_ops[] = { 1281static struct genl_ops nfc_genl_ops[] = {
1195 { 1282 {
1196 .cmd = NFC_CMD_GET_DEVICE, 1283 .cmd = NFC_CMD_GET_DEVICE,
@@ -1265,6 +1352,12 @@ static struct genl_ops nfc_genl_ops[] = {
1265 .doit = nfc_genl_disable_se, 1352 .doit = nfc_genl_disable_se,
1266 .policy = nfc_genl_policy, 1353 .policy = nfc_genl_policy,
1267 }, 1354 },
1355 {
1356 .cmd = NFC_CMD_GET_SE,
1357 .dumpit = nfc_genl_dump_ses,
1358 .done = nfc_genl_dump_ses_done,
1359 .policy = nfc_genl_policy,
1360 },
1268}; 1361};
1269 1362
1270 1363
diff --git a/net/nfc/nfc.h b/net/nfc/nfc.h
index 820a7850c36a..aaf606fc1faa 100644
--- a/net/nfc/nfc.h
+++ b/net/nfc/nfc.h
@@ -124,9 +124,8 @@ static inline void nfc_device_iter_exit(struct class_dev_iter *iter)
124} 124}
125 125
126int nfc_fw_download(struct nfc_dev *dev, const char *firmware_name); 126int nfc_fw_download(struct nfc_dev *dev, const char *firmware_name);
127int nfc_genl_fw_download_done(struct nfc_dev *dev, const char *firmware_name); 127int nfc_genl_fw_download_done(struct nfc_dev *dev, const char *firmware_name,
128 128 u32 result);
129int nfc_fw_download_done(struct nfc_dev *dev, const char *firmware_name);
130 129
131int nfc_dev_up(struct nfc_dev *dev); 130int nfc_dev_up(struct nfc_dev *dev);
132 131
diff --git a/net/openvswitch/Kconfig b/net/openvswitch/Kconfig
index 27ee56b688a3..6ecf491ad509 100644
--- a/net/openvswitch/Kconfig
+++ b/net/openvswitch/Kconfig
@@ -4,6 +4,7 @@
4 4
5config OPENVSWITCH 5config OPENVSWITCH
6 tristate "Open vSwitch" 6 tristate "Open vSwitch"
7 select LIBCRC32C
7 ---help--- 8 ---help---
8 Open vSwitch is a multilayer Ethernet switch targeted at virtualized 9 Open vSwitch is a multilayer Ethernet switch targeted at virtualized
9 environments. In addition to supporting a variety of features 10 environments. In addition to supporting a variety of features
@@ -40,3 +41,16 @@ config OPENVSWITCH_GRE
40 Say N to exclude this support and reduce the binary size. 41 Say N to exclude this support and reduce the binary size.
41 42
42 If unsure, say Y. 43 If unsure, say Y.
44
45config OPENVSWITCH_VXLAN
46 bool "Open vSwitch VXLAN tunneling support"
47 depends on INET
48 depends on OPENVSWITCH
49 depends on VXLAN && !(OPENVSWITCH=y && VXLAN=m)
50 default y
51 ---help---
52 If you say Y here, then the Open vSwitch will be able create vxlan vport.
53
54 Say N to exclude this support and reduce the binary size.
55
56 If unsure, say Y.
diff --git a/net/openvswitch/Makefile b/net/openvswitch/Makefile
index 01bddb2991e3..ea36e99089af 100644
--- a/net/openvswitch/Makefile
+++ b/net/openvswitch/Makefile
@@ -10,6 +10,13 @@ openvswitch-y := \
10 dp_notify.o \ 10 dp_notify.o \
11 flow.o \ 11 flow.o \
12 vport.o \ 12 vport.o \
13 vport-gre.o \
14 vport-internal_dev.o \ 13 vport-internal_dev.o \
15 vport-netdev.o 14 vport-netdev.o
15
16ifneq ($(CONFIG_OPENVSWITCH_VXLAN),)
17openvswitch-y += vport-vxlan.o
18endif
19
20ifneq ($(CONFIG_OPENVSWITCH_GRE),)
21openvswitch-y += vport-gre.o
22endif
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index ab101f715447..65cfaa816075 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2007-2012 Nicira, Inc. 2 * Copyright (c) 2007-2013 Nicira, Inc.
3 * 3 *
4 * This program is free software; you can redistribute it and/or 4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of version 2 of the GNU General Public 5 * modify it under the terms of version 2 of the GNU General Public
@@ -22,6 +22,7 @@
22#include <linux/in.h> 22#include <linux/in.h>
23#include <linux/ip.h> 23#include <linux/ip.h>
24#include <linux/openvswitch.h> 24#include <linux/openvswitch.h>
25#include <linux/sctp.h>
25#include <linux/tcp.h> 26#include <linux/tcp.h>
26#include <linux/udp.h> 27#include <linux/udp.h>
27#include <linux/in6.h> 28#include <linux/in6.h>
@@ -31,6 +32,7 @@
31#include <net/ipv6.h> 32#include <net/ipv6.h>
32#include <net/checksum.h> 33#include <net/checksum.h>
33#include <net/dsfield.h> 34#include <net/dsfield.h>
35#include <net/sctp/checksum.h>
34 36
35#include "datapath.h" 37#include "datapath.h"
36#include "vport.h" 38#include "vport.h"
@@ -352,6 +354,39 @@ static int set_tcp(struct sk_buff *skb, const struct ovs_key_tcp *tcp_port_key)
352 return 0; 354 return 0;
353} 355}
354 356
357static int set_sctp(struct sk_buff *skb,
358 const struct ovs_key_sctp *sctp_port_key)
359{
360 struct sctphdr *sh;
361 int err;
362 unsigned int sctphoff = skb_transport_offset(skb);
363
364 err = make_writable(skb, sctphoff + sizeof(struct sctphdr));
365 if (unlikely(err))
366 return err;
367
368 sh = sctp_hdr(skb);
369 if (sctp_port_key->sctp_src != sh->source ||
370 sctp_port_key->sctp_dst != sh->dest) {
371 __le32 old_correct_csum, new_csum, old_csum;
372
373 old_csum = sh->checksum;
374 old_correct_csum = sctp_compute_cksum(skb, sctphoff);
375
376 sh->source = sctp_port_key->sctp_src;
377 sh->dest = sctp_port_key->sctp_dst;
378
379 new_csum = sctp_compute_cksum(skb, sctphoff);
380
381 /* Carry any checksum errors through. */
382 sh->checksum = old_csum ^ old_correct_csum ^ new_csum;
383
384 skb->rxhash = 0;
385 }
386
387 return 0;
388}
389
355static int do_output(struct datapath *dp, struct sk_buff *skb, int out_port) 390static int do_output(struct datapath *dp, struct sk_buff *skb, int out_port)
356{ 391{
357 struct vport *vport; 392 struct vport *vport;
@@ -376,8 +411,10 @@ static int output_userspace(struct datapath *dp, struct sk_buff *skb,
376 const struct nlattr *a; 411 const struct nlattr *a;
377 int rem; 412 int rem;
378 413
414 BUG_ON(!OVS_CB(skb)->pkt_key);
415
379 upcall.cmd = OVS_PACKET_CMD_ACTION; 416 upcall.cmd = OVS_PACKET_CMD_ACTION;
380 upcall.key = &OVS_CB(skb)->flow->key; 417 upcall.key = OVS_CB(skb)->pkt_key;
381 upcall.userdata = NULL; 418 upcall.userdata = NULL;
382 upcall.portid = 0; 419 upcall.portid = 0;
383 420
@@ -459,6 +496,10 @@ static int execute_set_action(struct sk_buff *skb,
459 case OVS_KEY_ATTR_UDP: 496 case OVS_KEY_ATTR_UDP:
460 err = set_udp(skb, nla_data(nested_attr)); 497 err = set_udp(skb, nla_data(nested_attr));
461 break; 498 break;
499
500 case OVS_KEY_ATTR_SCTP:
501 err = set_sctp(skb, nla_data(nested_attr));
502 break;
462 } 503 }
463 504
464 return err; 505 return err;
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index f2ed7600084e..2aa13bd7f2b2 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2007-2012 Nicira, Inc. 2 * Copyright (c) 2007-2013 Nicira, Inc.
3 * 3 *
4 * This program is free software; you can redistribute it and/or 4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of version 2 of the GNU General Public 5 * modify it under the terms of version 2 of the GNU General Public
@@ -165,7 +165,7 @@ static void destroy_dp_rcu(struct rcu_head *rcu)
165{ 165{
166 struct datapath *dp = container_of(rcu, struct datapath, rcu); 166 struct datapath *dp = container_of(rcu, struct datapath, rcu);
167 167
168 ovs_flow_tbl_destroy((__force struct flow_table *)dp->table); 168 ovs_flow_tbl_destroy((__force struct flow_table *)dp->table, false);
169 free_percpu(dp->stats_percpu); 169 free_percpu(dp->stats_percpu);
170 release_net(ovs_dp_get_net(dp)); 170 release_net(ovs_dp_get_net(dp));
171 kfree(dp->ports); 171 kfree(dp->ports);
@@ -226,19 +226,18 @@ void ovs_dp_process_received_packet(struct vport *p, struct sk_buff *skb)
226 struct sw_flow_key key; 226 struct sw_flow_key key;
227 u64 *stats_counter; 227 u64 *stats_counter;
228 int error; 228 int error;
229 int key_len;
230 229
231 stats = this_cpu_ptr(dp->stats_percpu); 230 stats = this_cpu_ptr(dp->stats_percpu);
232 231
233 /* Extract flow from 'skb' into 'key'. */ 232 /* Extract flow from 'skb' into 'key'. */
234 error = ovs_flow_extract(skb, p->port_no, &key, &key_len); 233 error = ovs_flow_extract(skb, p->port_no, &key);
235 if (unlikely(error)) { 234 if (unlikely(error)) {
236 kfree_skb(skb); 235 kfree_skb(skb);
237 return; 236 return;
238 } 237 }
239 238
240 /* Look up flow. */ 239 /* Look up flow. */
241 flow = ovs_flow_tbl_lookup(rcu_dereference(dp->table), &key, key_len); 240 flow = ovs_flow_lookup(rcu_dereference(dp->table), &key);
242 if (unlikely(!flow)) { 241 if (unlikely(!flow)) {
243 struct dp_upcall_info upcall; 242 struct dp_upcall_info upcall;
244 243
@@ -253,6 +252,7 @@ void ovs_dp_process_received_packet(struct vport *p, struct sk_buff *skb)
253 } 252 }
254 253
255 OVS_CB(skb)->flow = flow; 254 OVS_CB(skb)->flow = flow;
255 OVS_CB(skb)->pkt_key = &key;
256 256
257 stats_counter = &stats->n_hit; 257 stats_counter = &stats->n_hit;
258 ovs_flow_used(OVS_CB(skb)->flow, skb); 258 ovs_flow_used(OVS_CB(skb)->flow, skb);
@@ -435,7 +435,7 @@ static int queue_userspace_packet(struct net *net, int dp_ifindex,
435 upcall->dp_ifindex = dp_ifindex; 435 upcall->dp_ifindex = dp_ifindex;
436 436
437 nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_KEY); 437 nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_KEY);
438 ovs_flow_to_nlattrs(upcall_info->key, user_skb); 438 ovs_flow_to_nlattrs(upcall_info->key, upcall_info->key, user_skb);
439 nla_nest_end(user_skb, nla); 439 nla_nest_end(user_skb, nla);
440 440
441 if (upcall_info->userdata) 441 if (upcall_info->userdata)
@@ -468,7 +468,7 @@ static int flush_flows(struct datapath *dp)
468 468
469 rcu_assign_pointer(dp->table, new_table); 469 rcu_assign_pointer(dp->table, new_table);
470 470
471 ovs_flow_tbl_deferred_destroy(old_table); 471 ovs_flow_tbl_destroy(old_table, true);
472 return 0; 472 return 0;
473} 473}
474 474
@@ -611,10 +611,12 @@ static int validate_tp_port(const struct sw_flow_key *flow_key)
611static int validate_and_copy_set_tun(const struct nlattr *attr, 611static int validate_and_copy_set_tun(const struct nlattr *attr,
612 struct sw_flow_actions **sfa) 612 struct sw_flow_actions **sfa)
613{ 613{
614 struct ovs_key_ipv4_tunnel tun_key; 614 struct sw_flow_match match;
615 struct sw_flow_key key;
615 int err, start; 616 int err, start;
616 617
617 err = ovs_ipv4_tun_from_nlattr(nla_data(attr), &tun_key); 618 ovs_match_init(&match, &key, NULL);
619 err = ovs_ipv4_tun_from_nlattr(nla_data(attr), &match, false);
618 if (err) 620 if (err)
619 return err; 621 return err;
620 622
@@ -622,7 +624,8 @@ static int validate_and_copy_set_tun(const struct nlattr *attr,
622 if (start < 0) 624 if (start < 0)
623 return start; 625 return start;
624 626
625 err = add_action(sfa, OVS_KEY_ATTR_IPV4_TUNNEL, &tun_key, sizeof(tun_key)); 627 err = add_action(sfa, OVS_KEY_ATTR_IPV4_TUNNEL, &match.key->tun_key,
628 sizeof(match.key->tun_key));
626 add_nested_action_end(*sfa, start); 629 add_nested_action_end(*sfa, start);
627 630
628 return err; 631 return err;
@@ -709,6 +712,12 @@ static int validate_set(const struct nlattr *a,
709 712
710 return validate_tp_port(flow_key); 713 return validate_tp_port(flow_key);
711 714
715 case OVS_KEY_ATTR_SCTP:
716 if (flow_key->ip.proto != IPPROTO_SCTP)
717 return -EINVAL;
718
719 return validate_tp_port(flow_key);
720
712 default: 721 default:
713 return -EINVAL; 722 return -EINVAL;
714 } 723 }
@@ -857,7 +866,6 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
857 struct ethhdr *eth; 866 struct ethhdr *eth;
858 int len; 867 int len;
859 int err; 868 int err;
860 int key_len;
861 869
862 err = -EINVAL; 870 err = -EINVAL;
863 if (!a[OVS_PACKET_ATTR_PACKET] || !a[OVS_PACKET_ATTR_KEY] || 871 if (!a[OVS_PACKET_ATTR_PACKET] || !a[OVS_PACKET_ATTR_KEY] ||
@@ -890,11 +898,11 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
890 if (IS_ERR(flow)) 898 if (IS_ERR(flow))
891 goto err_kfree_skb; 899 goto err_kfree_skb;
892 900
893 err = ovs_flow_extract(packet, -1, &flow->key, &key_len); 901 err = ovs_flow_extract(packet, -1, &flow->key);
894 if (err) 902 if (err)
895 goto err_flow_free; 903 goto err_flow_free;
896 904
897 err = ovs_flow_metadata_from_nlattrs(flow, key_len, a[OVS_PACKET_ATTR_KEY]); 905 err = ovs_flow_metadata_from_nlattrs(flow, a[OVS_PACKET_ATTR_KEY]);
898 if (err) 906 if (err)
899 goto err_flow_free; 907 goto err_flow_free;
900 acts = ovs_flow_actions_alloc(nla_len(a[OVS_PACKET_ATTR_ACTIONS])); 908 acts = ovs_flow_actions_alloc(nla_len(a[OVS_PACKET_ATTR_ACTIONS]));
@@ -908,6 +916,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
908 goto err_flow_free; 916 goto err_flow_free;
909 917
910 OVS_CB(packet)->flow = flow; 918 OVS_CB(packet)->flow = flow;
919 OVS_CB(packet)->pkt_key = &flow->key;
911 packet->priority = flow->key.phy.priority; 920 packet->priority = flow->key.phy.priority;
912 packet->mark = flow->key.phy.skb_mark; 921 packet->mark = flow->key.phy.skb_mark;
913 922
@@ -922,13 +931,13 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
922 local_bh_enable(); 931 local_bh_enable();
923 rcu_read_unlock(); 932 rcu_read_unlock();
924 933
925 ovs_flow_free(flow); 934 ovs_flow_free(flow, false);
926 return err; 935 return err;
927 936
928err_unlock: 937err_unlock:
929 rcu_read_unlock(); 938 rcu_read_unlock();
930err_flow_free: 939err_flow_free:
931 ovs_flow_free(flow); 940 ovs_flow_free(flow, false);
932err_kfree_skb: 941err_kfree_skb:
933 kfree_skb(packet); 942 kfree_skb(packet);
934err: 943err:
@@ -951,9 +960,10 @@ static struct genl_ops dp_packet_genl_ops[] = {
951 960
952static void get_dp_stats(struct datapath *dp, struct ovs_dp_stats *stats) 961static void get_dp_stats(struct datapath *dp, struct ovs_dp_stats *stats)
953{ 962{
963 struct flow_table *table;
954 int i; 964 int i;
955 struct flow_table *table = ovsl_dereference(dp->table);
956 965
966 table = rcu_dereference_check(dp->table, lockdep_ovsl_is_held());
957 stats->n_flows = ovs_flow_tbl_count(table); 967 stats->n_flows = ovs_flow_tbl_count(table);
958 968
959 stats->n_hit = stats->n_missed = stats->n_lost = 0; 969 stats->n_hit = stats->n_missed = stats->n_lost = 0;
@@ -1044,7 +1054,8 @@ static int set_action_to_attr(const struct nlattr *a, struct sk_buff *skb)
1044 if (!start) 1054 if (!start)
1045 return -EMSGSIZE; 1055 return -EMSGSIZE;
1046 1056
1047 err = ovs_ipv4_tun_to_nlattr(skb, nla_data(ovs_key)); 1057 err = ovs_ipv4_tun_to_nlattr(skb, nla_data(ovs_key),
1058 nla_data(ovs_key));
1048 if (err) 1059 if (err)
1049 return err; 1060 return err;
1050 nla_nest_end(skb, start); 1061 nla_nest_end(skb, start);
@@ -1092,6 +1103,7 @@ static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts)
1092{ 1103{
1093 return NLMSG_ALIGN(sizeof(struct ovs_header)) 1104 return NLMSG_ALIGN(sizeof(struct ovs_header))
1094 + nla_total_size(key_attr_size()) /* OVS_FLOW_ATTR_KEY */ 1105 + nla_total_size(key_attr_size()) /* OVS_FLOW_ATTR_KEY */
1106 + nla_total_size(key_attr_size()) /* OVS_FLOW_ATTR_MASK */
1095 + nla_total_size(sizeof(struct ovs_flow_stats)) /* OVS_FLOW_ATTR_STATS */ 1107 + nla_total_size(sizeof(struct ovs_flow_stats)) /* OVS_FLOW_ATTR_STATS */
1096 + nla_total_size(1) /* OVS_FLOW_ATTR_TCP_FLAGS */ 1108 + nla_total_size(1) /* OVS_FLOW_ATTR_TCP_FLAGS */
1097 + nla_total_size(8) /* OVS_FLOW_ATTR_USED */ 1109 + nla_total_size(8) /* OVS_FLOW_ATTR_USED */
@@ -1104,7 +1116,6 @@ static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp,
1104 u32 seq, u32 flags, u8 cmd) 1116 u32 seq, u32 flags, u8 cmd)
1105{ 1117{
1106 const int skb_orig_len = skb->len; 1118 const int skb_orig_len = skb->len;
1107 const struct sw_flow_actions *sf_acts;
1108 struct nlattr *start; 1119 struct nlattr *start;
1109 struct ovs_flow_stats stats; 1120 struct ovs_flow_stats stats;
1110 struct ovs_header *ovs_header; 1121 struct ovs_header *ovs_header;
@@ -1113,20 +1124,31 @@ static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp,
1113 u8 tcp_flags; 1124 u8 tcp_flags;
1114 int err; 1125 int err;
1115 1126
1116 sf_acts = ovsl_dereference(flow->sf_acts);
1117
1118 ovs_header = genlmsg_put(skb, portid, seq, &dp_flow_genl_family, flags, cmd); 1127 ovs_header = genlmsg_put(skb, portid, seq, &dp_flow_genl_family, flags, cmd);
1119 if (!ovs_header) 1128 if (!ovs_header)
1120 return -EMSGSIZE; 1129 return -EMSGSIZE;
1121 1130
1122 ovs_header->dp_ifindex = get_dpifindex(dp); 1131 ovs_header->dp_ifindex = get_dpifindex(dp);
1123 1132
1133 /* Fill flow key. */
1124 nla = nla_nest_start(skb, OVS_FLOW_ATTR_KEY); 1134 nla = nla_nest_start(skb, OVS_FLOW_ATTR_KEY);
1125 if (!nla) 1135 if (!nla)
1126 goto nla_put_failure; 1136 goto nla_put_failure;
1127 err = ovs_flow_to_nlattrs(&flow->key, skb); 1137
1138 err = ovs_flow_to_nlattrs(&flow->unmasked_key,
1139 &flow->unmasked_key, skb);
1140 if (err)
1141 goto error;
1142 nla_nest_end(skb, nla);
1143
1144 nla = nla_nest_start(skb, OVS_FLOW_ATTR_MASK);
1145 if (!nla)
1146 goto nla_put_failure;
1147
1148 err = ovs_flow_to_nlattrs(&flow->key, &flow->mask->key, skb);
1128 if (err) 1149 if (err)
1129 goto error; 1150 goto error;
1151
1130 nla_nest_end(skb, nla); 1152 nla_nest_end(skb, nla);
1131 1153
1132 spin_lock_bh(&flow->lock); 1154 spin_lock_bh(&flow->lock);
@@ -1161,6 +1183,11 @@ static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp,
1161 */ 1183 */
1162 start = nla_nest_start(skb, OVS_FLOW_ATTR_ACTIONS); 1184 start = nla_nest_start(skb, OVS_FLOW_ATTR_ACTIONS);
1163 if (start) { 1185 if (start) {
1186 const struct sw_flow_actions *sf_acts;
1187
1188 sf_acts = rcu_dereference_check(flow->sf_acts,
1189 lockdep_ovsl_is_held());
1190
1164 err = actions_to_attr(sf_acts->actions, sf_acts->actions_len, skb); 1191 err = actions_to_attr(sf_acts->actions, sf_acts->actions_len, skb);
1165 if (!err) 1192 if (!err)
1166 nla_nest_end(skb, start); 1193 nla_nest_end(skb, start);
@@ -1211,20 +1238,24 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
1211{ 1238{
1212 struct nlattr **a = info->attrs; 1239 struct nlattr **a = info->attrs;
1213 struct ovs_header *ovs_header = info->userhdr; 1240 struct ovs_header *ovs_header = info->userhdr;
1214 struct sw_flow_key key; 1241 struct sw_flow_key key, masked_key;
1215 struct sw_flow *flow; 1242 struct sw_flow *flow = NULL;
1243 struct sw_flow_mask mask;
1216 struct sk_buff *reply; 1244 struct sk_buff *reply;
1217 struct datapath *dp; 1245 struct datapath *dp;
1218 struct flow_table *table; 1246 struct flow_table *table;
1219 struct sw_flow_actions *acts = NULL; 1247 struct sw_flow_actions *acts = NULL;
1248 struct sw_flow_match match;
1220 int error; 1249 int error;
1221 int key_len;
1222 1250
1223 /* Extract key. */ 1251 /* Extract key. */
1224 error = -EINVAL; 1252 error = -EINVAL;
1225 if (!a[OVS_FLOW_ATTR_KEY]) 1253 if (!a[OVS_FLOW_ATTR_KEY])
1226 goto error; 1254 goto error;
1227 error = ovs_flow_from_nlattrs(&key, &key_len, a[OVS_FLOW_ATTR_KEY]); 1255
1256 ovs_match_init(&match, &key, &mask);
1257 error = ovs_match_from_nlattrs(&match,
1258 a[OVS_FLOW_ATTR_KEY], a[OVS_FLOW_ATTR_MASK]);
1228 if (error) 1259 if (error)
1229 goto error; 1260 goto error;
1230 1261
@@ -1235,9 +1266,13 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
1235 if (IS_ERR(acts)) 1266 if (IS_ERR(acts))
1236 goto error; 1267 goto error;
1237 1268
1238 error = validate_and_copy_actions(a[OVS_FLOW_ATTR_ACTIONS], &key, 0, &acts); 1269 ovs_flow_key_mask(&masked_key, &key, &mask);
1239 if (error) 1270 error = validate_and_copy_actions(a[OVS_FLOW_ATTR_ACTIONS],
1271 &masked_key, 0, &acts);
1272 if (error) {
1273 OVS_NLERR("Flow actions may not be safe on all matching packets.\n");
1240 goto err_kfree; 1274 goto err_kfree;
1275 }
1241 } else if (info->genlhdr->cmd == OVS_FLOW_CMD_NEW) { 1276 } else if (info->genlhdr->cmd == OVS_FLOW_CMD_NEW) {
1242 error = -EINVAL; 1277 error = -EINVAL;
1243 goto error; 1278 goto error;
@@ -1250,8 +1285,11 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
1250 goto err_unlock_ovs; 1285 goto err_unlock_ovs;
1251 1286
1252 table = ovsl_dereference(dp->table); 1287 table = ovsl_dereference(dp->table);
1253 flow = ovs_flow_tbl_lookup(table, &key, key_len); 1288
1289 /* Check if this is a duplicate flow */
1290 flow = ovs_flow_lookup(table, &key);
1254 if (!flow) { 1291 if (!flow) {
1292 struct sw_flow_mask *mask_p;
1255 /* Bail out if we're not allowed to create a new flow. */ 1293 /* Bail out if we're not allowed to create a new flow. */
1256 error = -ENOENT; 1294 error = -ENOENT;
1257 if (info->genlhdr->cmd == OVS_FLOW_CMD_SET) 1295 if (info->genlhdr->cmd == OVS_FLOW_CMD_SET)
@@ -1264,7 +1302,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
1264 new_table = ovs_flow_tbl_expand(table); 1302 new_table = ovs_flow_tbl_expand(table);
1265 if (!IS_ERR(new_table)) { 1303 if (!IS_ERR(new_table)) {
1266 rcu_assign_pointer(dp->table, new_table); 1304 rcu_assign_pointer(dp->table, new_table);
1267 ovs_flow_tbl_deferred_destroy(table); 1305 ovs_flow_tbl_destroy(table, true);
1268 table = ovsl_dereference(dp->table); 1306 table = ovsl_dereference(dp->table);
1269 } 1307 }
1270 } 1308 }
@@ -1277,14 +1315,30 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
1277 } 1315 }
1278 clear_stats(flow); 1316 clear_stats(flow);
1279 1317
1318 flow->key = masked_key;
1319 flow->unmasked_key = key;
1320
1321 /* Make sure mask is unique in the system */
1322 mask_p = ovs_sw_flow_mask_find(table, &mask);
1323 if (!mask_p) {
1324 /* Allocate a new mask if none exsits. */
1325 mask_p = ovs_sw_flow_mask_alloc();
1326 if (!mask_p)
1327 goto err_flow_free;
1328 mask_p->key = mask.key;
1329 mask_p->range = mask.range;
1330 ovs_sw_flow_mask_insert(table, mask_p);
1331 }
1332
1333 ovs_sw_flow_mask_add_ref(mask_p);
1334 flow->mask = mask_p;
1280 rcu_assign_pointer(flow->sf_acts, acts); 1335 rcu_assign_pointer(flow->sf_acts, acts);
1281 1336
1282 /* Put flow in bucket. */ 1337 /* Put flow in bucket. */
1283 ovs_flow_tbl_insert(table, flow, &key, key_len); 1338 ovs_flow_insert(table, flow);
1284 1339
1285 reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid, 1340 reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid,
1286 info->snd_seq, 1341 info->snd_seq, OVS_FLOW_CMD_NEW);
1287 OVS_FLOW_CMD_NEW);
1288 } else { 1342 } else {
1289 /* We found a matching flow. */ 1343 /* We found a matching flow. */
1290 struct sw_flow_actions *old_acts; 1344 struct sw_flow_actions *old_acts;
@@ -1300,6 +1354,13 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
1300 info->nlhdr->nlmsg_flags & (NLM_F_CREATE | NLM_F_EXCL)) 1354 info->nlhdr->nlmsg_flags & (NLM_F_CREATE | NLM_F_EXCL))
1301 goto err_unlock_ovs; 1355 goto err_unlock_ovs;
1302 1356
1357 /* The unmasked key has to be the same for flow updates. */
1358 error = -EINVAL;
1359 if (!ovs_flow_cmp_unmasked_key(flow, &key, match.range.end)) {
1360 OVS_NLERR("Flow modification message rejected, unmasked key does not match.\n");
1361 goto err_unlock_ovs;
1362 }
1363
1303 /* Update actions. */ 1364 /* Update actions. */
1304 old_acts = ovsl_dereference(flow->sf_acts); 1365 old_acts = ovsl_dereference(flow->sf_acts);
1305 rcu_assign_pointer(flow->sf_acts, acts); 1366 rcu_assign_pointer(flow->sf_acts, acts);
@@ -1324,6 +1385,8 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
1324 ovs_dp_flow_multicast_group.id, PTR_ERR(reply)); 1385 ovs_dp_flow_multicast_group.id, PTR_ERR(reply));
1325 return 0; 1386 return 0;
1326 1387
1388err_flow_free:
1389 ovs_flow_free(flow, false);
1327err_unlock_ovs: 1390err_unlock_ovs:
1328 ovs_unlock(); 1391 ovs_unlock();
1329err_kfree: 1392err_kfree:
@@ -1341,12 +1404,16 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
1341 struct sw_flow *flow; 1404 struct sw_flow *flow;
1342 struct datapath *dp; 1405 struct datapath *dp;
1343 struct flow_table *table; 1406 struct flow_table *table;
1407 struct sw_flow_match match;
1344 int err; 1408 int err;
1345 int key_len;
1346 1409
1347 if (!a[OVS_FLOW_ATTR_KEY]) 1410 if (!a[OVS_FLOW_ATTR_KEY]) {
1411 OVS_NLERR("Flow get message rejected, Key attribute missing.\n");
1348 return -EINVAL; 1412 return -EINVAL;
1349 err = ovs_flow_from_nlattrs(&key, &key_len, a[OVS_FLOW_ATTR_KEY]); 1413 }
1414
1415 ovs_match_init(&match, &key, NULL);
1416 err = ovs_match_from_nlattrs(&match, a[OVS_FLOW_ATTR_KEY], NULL);
1350 if (err) 1417 if (err)
1351 return err; 1418 return err;
1352 1419
@@ -1358,7 +1425,7 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
1358 } 1425 }
1359 1426
1360 table = ovsl_dereference(dp->table); 1427 table = ovsl_dereference(dp->table);
1361 flow = ovs_flow_tbl_lookup(table, &key, key_len); 1428 flow = ovs_flow_lookup_unmasked_key(table, &match);
1362 if (!flow) { 1429 if (!flow) {
1363 err = -ENOENT; 1430 err = -ENOENT;
1364 goto unlock; 1431 goto unlock;
@@ -1387,8 +1454,8 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
1387 struct sw_flow *flow; 1454 struct sw_flow *flow;
1388 struct datapath *dp; 1455 struct datapath *dp;
1389 struct flow_table *table; 1456 struct flow_table *table;
1457 struct sw_flow_match match;
1390 int err; 1458 int err;
1391 int key_len;
1392 1459
1393 ovs_lock(); 1460 ovs_lock();
1394 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); 1461 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
@@ -1401,12 +1468,14 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
1401 err = flush_flows(dp); 1468 err = flush_flows(dp);
1402 goto unlock; 1469 goto unlock;
1403 } 1470 }
1404 err = ovs_flow_from_nlattrs(&key, &key_len, a[OVS_FLOW_ATTR_KEY]); 1471
1472 ovs_match_init(&match, &key, NULL);
1473 err = ovs_match_from_nlattrs(&match, a[OVS_FLOW_ATTR_KEY], NULL);
1405 if (err) 1474 if (err)
1406 goto unlock; 1475 goto unlock;
1407 1476
1408 table = ovsl_dereference(dp->table); 1477 table = ovsl_dereference(dp->table);
1409 flow = ovs_flow_tbl_lookup(table, &key, key_len); 1478 flow = ovs_flow_lookup_unmasked_key(table, &match);
1410 if (!flow) { 1479 if (!flow) {
1411 err = -ENOENT; 1480 err = -ENOENT;
1412 goto unlock; 1481 goto unlock;
@@ -1418,13 +1487,13 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
1418 goto unlock; 1487 goto unlock;
1419 } 1488 }
1420 1489
1421 ovs_flow_tbl_remove(table, flow); 1490 ovs_flow_remove(table, flow);
1422 1491
1423 err = ovs_flow_cmd_fill_info(flow, dp, reply, info->snd_portid, 1492 err = ovs_flow_cmd_fill_info(flow, dp, reply, info->snd_portid,
1424 info->snd_seq, 0, OVS_FLOW_CMD_DEL); 1493 info->snd_seq, 0, OVS_FLOW_CMD_DEL);
1425 BUG_ON(err < 0); 1494 BUG_ON(err < 0);
1426 1495
1427 ovs_flow_deferred_free(flow); 1496 ovs_flow_free(flow, true);
1428 ovs_unlock(); 1497 ovs_unlock();
1429 1498
1430 ovs_notify(reply, info, &ovs_dp_flow_multicast_group); 1499 ovs_notify(reply, info, &ovs_dp_flow_multicast_group);
@@ -1440,22 +1509,21 @@ static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
1440 struct datapath *dp; 1509 struct datapath *dp;
1441 struct flow_table *table; 1510 struct flow_table *table;
1442 1511
1443 ovs_lock(); 1512 rcu_read_lock();
1444 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); 1513 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
1445 if (!dp) { 1514 if (!dp) {
1446 ovs_unlock(); 1515 rcu_read_unlock();
1447 return -ENODEV; 1516 return -ENODEV;
1448 } 1517 }
1449 1518
1450 table = ovsl_dereference(dp->table); 1519 table = rcu_dereference(dp->table);
1451
1452 for (;;) { 1520 for (;;) {
1453 struct sw_flow *flow; 1521 struct sw_flow *flow;
1454 u32 bucket, obj; 1522 u32 bucket, obj;
1455 1523
1456 bucket = cb->args[0]; 1524 bucket = cb->args[0];
1457 obj = cb->args[1]; 1525 obj = cb->args[1];
1458 flow = ovs_flow_tbl_next(table, &bucket, &obj); 1526 flow = ovs_flow_dump_next(table, &bucket, &obj);
1459 if (!flow) 1527 if (!flow)
1460 break; 1528 break;
1461 1529
@@ -1468,7 +1536,7 @@ static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
1468 cb->args[0] = bucket; 1536 cb->args[0] = bucket;
1469 cb->args[1] = obj; 1537 cb->args[1] = obj;
1470 } 1538 }
1471 ovs_unlock(); 1539 rcu_read_unlock();
1472 return skb->len; 1540 return skb->len;
1473} 1541}
1474 1542
@@ -1664,7 +1732,7 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
1664 goto err_destroy_local_port; 1732 goto err_destroy_local_port;
1665 1733
1666 ovs_net = net_generic(ovs_dp_get_net(dp), ovs_net_id); 1734 ovs_net = net_generic(ovs_dp_get_net(dp), ovs_net_id);
1667 list_add_tail(&dp->list_node, &ovs_net->dps); 1735 list_add_tail_rcu(&dp->list_node, &ovs_net->dps);
1668 1736
1669 ovs_unlock(); 1737 ovs_unlock();
1670 1738
@@ -1678,7 +1746,7 @@ err_destroy_ports_array:
1678err_destroy_percpu: 1746err_destroy_percpu:
1679 free_percpu(dp->stats_percpu); 1747 free_percpu(dp->stats_percpu);
1680err_destroy_table: 1748err_destroy_table:
1681 ovs_flow_tbl_destroy(ovsl_dereference(dp->table)); 1749 ovs_flow_tbl_destroy(ovsl_dereference(dp->table), false);
1682err_free_dp: 1750err_free_dp:
1683 release_net(ovs_dp_get_net(dp)); 1751 release_net(ovs_dp_get_net(dp));
1684 kfree(dp); 1752 kfree(dp);
@@ -1702,7 +1770,7 @@ static void __dp_destroy(struct datapath *dp)
1702 ovs_dp_detach_port(vport); 1770 ovs_dp_detach_port(vport);
1703 } 1771 }
1704 1772
1705 list_del(&dp->list_node); 1773 list_del_rcu(&dp->list_node);
1706 1774
1707 /* OVSP_LOCAL is datapath internal port. We need to make sure that 1775 /* OVSP_LOCAL is datapath internal port. We need to make sure that
1708 * all port in datapath are destroyed first before freeing datapath. 1776 * all port in datapath are destroyed first before freeing datapath.
@@ -1807,8 +1875,8 @@ static int ovs_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
1807 int skip = cb->args[0]; 1875 int skip = cb->args[0];
1808 int i = 0; 1876 int i = 0;
1809 1877
1810 ovs_lock(); 1878 rcu_read_lock();
1811 list_for_each_entry(dp, &ovs_net->dps, list_node) { 1879 list_for_each_entry_rcu(dp, &ovs_net->dps, list_node) {
1812 if (i >= skip && 1880 if (i >= skip &&
1813 ovs_dp_cmd_fill_info(dp, skb, NETLINK_CB(cb->skb).portid, 1881 ovs_dp_cmd_fill_info(dp, skb, NETLINK_CB(cb->skb).portid,
1814 cb->nlh->nlmsg_seq, NLM_F_MULTI, 1882 cb->nlh->nlmsg_seq, NLM_F_MULTI,
@@ -1816,7 +1884,7 @@ static int ovs_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
1816 break; 1884 break;
1817 i++; 1885 i++;
1818 } 1886 }
1819 ovs_unlock(); 1887 rcu_read_unlock();
1820 1888
1821 cb->args[0] = i; 1889 cb->args[0] = i;
1822 1890
@@ -2285,7 +2353,7 @@ static void rehash_flow_table(struct work_struct *work)
2285 new_table = ovs_flow_tbl_rehash(old_table); 2353 new_table = ovs_flow_tbl_rehash(old_table);
2286 if (!IS_ERR(new_table)) { 2354 if (!IS_ERR(new_table)) {
2287 rcu_assign_pointer(dp->table, new_table); 2355 rcu_assign_pointer(dp->table, new_table);
2288 ovs_flow_tbl_deferred_destroy(old_table); 2356 ovs_flow_tbl_destroy(old_table, true);
2289 } 2357 }
2290 } 2358 }
2291 } 2359 }
diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
index a91486484916..4d109c176ef3 100644
--- a/net/openvswitch/datapath.h
+++ b/net/openvswitch/datapath.h
@@ -88,11 +88,13 @@ struct datapath {
88/** 88/**
89 * struct ovs_skb_cb - OVS data in skb CB 89 * struct ovs_skb_cb - OVS data in skb CB
90 * @flow: The flow associated with this packet. May be %NULL if no flow. 90 * @flow: The flow associated with this packet. May be %NULL if no flow.
91 * @pkt_key: The flow information extracted from the packet. Must be nonnull.
91 * @tun_key: Key for the tunnel that encapsulated this packet. NULL if the 92 * @tun_key: Key for the tunnel that encapsulated this packet. NULL if the
92 * packet is not being tunneled. 93 * packet is not being tunneled.
93 */ 94 */
94struct ovs_skb_cb { 95struct ovs_skb_cb {
95 struct sw_flow *flow; 96 struct sw_flow *flow;
97 struct sw_flow_key *pkt_key;
96 struct ovs_key_ipv4_tunnel *tun_key; 98 struct ovs_key_ipv4_tunnel *tun_key;
97}; 99};
98#define OVS_CB(skb) ((struct ovs_skb_cb *)(skb)->cb) 100#define OVS_CB(skb) ((struct ovs_skb_cb *)(skb)->cb)
@@ -183,4 +185,8 @@ struct sk_buff *ovs_vport_cmd_build_info(struct vport *, u32 pid, u32 seq,
183 185
184int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb); 186int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb);
185void ovs_dp_notify_wq(struct work_struct *work); 187void ovs_dp_notify_wq(struct work_struct *work);
188
189#define OVS_NLERR(fmt, ...) \
190 pr_info_once("netlink: " fmt, ##__VA_ARGS__)
191
186#endif /* datapath.h */ 192#endif /* datapath.h */
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index 1aa84dc58777..410db90db73d 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2007-2011 Nicira, Inc. 2 * Copyright (c) 2007-2013 Nicira, Inc.
3 * 3 *
4 * This program is free software; you can redistribute it and/or 4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of version 2 of the GNU General Public 5 * modify it under the terms of version 2 of the GNU General Public
@@ -34,6 +34,7 @@
34#include <linux/if_arp.h> 34#include <linux/if_arp.h>
35#include <linux/ip.h> 35#include <linux/ip.h>
36#include <linux/ipv6.h> 36#include <linux/ipv6.h>
37#include <linux/sctp.h>
37#include <linux/tcp.h> 38#include <linux/tcp.h>
38#include <linux/udp.h> 39#include <linux/udp.h>
39#include <linux/icmp.h> 40#include <linux/icmp.h>
@@ -46,6 +47,202 @@
46 47
47static struct kmem_cache *flow_cache; 48static struct kmem_cache *flow_cache;
48 49
50static void ovs_sw_flow_mask_set(struct sw_flow_mask *mask,
51 struct sw_flow_key_range *range, u8 val);
52
53static void update_range__(struct sw_flow_match *match,
54 size_t offset, size_t size, bool is_mask)
55{
56 struct sw_flow_key_range *range = NULL;
57 size_t start = rounddown(offset, sizeof(long));
58 size_t end = roundup(offset + size, sizeof(long));
59
60 if (!is_mask)
61 range = &match->range;
62 else if (match->mask)
63 range = &match->mask->range;
64
65 if (!range)
66 return;
67
68 if (range->start == range->end) {
69 range->start = start;
70 range->end = end;
71 return;
72 }
73
74 if (range->start > start)
75 range->start = start;
76
77 if (range->end < end)
78 range->end = end;
79}
80
81#define SW_FLOW_KEY_PUT(match, field, value, is_mask) \
82 do { \
83 update_range__(match, offsetof(struct sw_flow_key, field), \
84 sizeof((match)->key->field), is_mask); \
85 if (is_mask) { \
86 if ((match)->mask) \
87 (match)->mask->key.field = value; \
88 } else { \
89 (match)->key->field = value; \
90 } \
91 } while (0)
92
93#define SW_FLOW_KEY_MEMCPY(match, field, value_p, len, is_mask) \
94 do { \
95 update_range__(match, offsetof(struct sw_flow_key, field), \
96 len, is_mask); \
97 if (is_mask) { \
98 if ((match)->mask) \
99 memcpy(&(match)->mask->key.field, value_p, len);\
100 } else { \
101 memcpy(&(match)->key->field, value_p, len); \
102 } \
103 } while (0)
104
105static u16 range_n_bytes(const struct sw_flow_key_range *range)
106{
107 return range->end - range->start;
108}
109
110void ovs_match_init(struct sw_flow_match *match,
111 struct sw_flow_key *key,
112 struct sw_flow_mask *mask)
113{
114 memset(match, 0, sizeof(*match));
115 match->key = key;
116 match->mask = mask;
117
118 memset(key, 0, sizeof(*key));
119
120 if (mask) {
121 memset(&mask->key, 0, sizeof(mask->key));
122 mask->range.start = mask->range.end = 0;
123 }
124}
125
126static bool ovs_match_validate(const struct sw_flow_match *match,
127 u64 key_attrs, u64 mask_attrs)
128{
129 u64 key_expected = 1 << OVS_KEY_ATTR_ETHERNET;
130 u64 mask_allowed = key_attrs; /* At most allow all key attributes */
131
132 /* The following mask attributes allowed only if they
133 * pass the validation tests. */
134 mask_allowed &= ~((1 << OVS_KEY_ATTR_IPV4)
135 | (1 << OVS_KEY_ATTR_IPV6)
136 | (1 << OVS_KEY_ATTR_TCP)
137 | (1 << OVS_KEY_ATTR_UDP)
138 | (1 << OVS_KEY_ATTR_SCTP)
139 | (1 << OVS_KEY_ATTR_ICMP)
140 | (1 << OVS_KEY_ATTR_ICMPV6)
141 | (1 << OVS_KEY_ATTR_ARP)
142 | (1 << OVS_KEY_ATTR_ND));
143
144 /* Always allowed mask fields. */
145 mask_allowed |= ((1 << OVS_KEY_ATTR_TUNNEL)
146 | (1 << OVS_KEY_ATTR_IN_PORT)
147 | (1 << OVS_KEY_ATTR_ETHERTYPE));
148
149 /* Check key attributes. */
150 if (match->key->eth.type == htons(ETH_P_ARP)
151 || match->key->eth.type == htons(ETH_P_RARP)) {
152 key_expected |= 1 << OVS_KEY_ATTR_ARP;
153 if (match->mask && (match->mask->key.eth.type == htons(0xffff)))
154 mask_allowed |= 1 << OVS_KEY_ATTR_ARP;
155 }
156
157 if (match->key->eth.type == htons(ETH_P_IP)) {
158 key_expected |= 1 << OVS_KEY_ATTR_IPV4;
159 if (match->mask && (match->mask->key.eth.type == htons(0xffff)))
160 mask_allowed |= 1 << OVS_KEY_ATTR_IPV4;
161
162 if (match->key->ip.frag != OVS_FRAG_TYPE_LATER) {
163 if (match->key->ip.proto == IPPROTO_UDP) {
164 key_expected |= 1 << OVS_KEY_ATTR_UDP;
165 if (match->mask && (match->mask->key.ip.proto == 0xff))
166 mask_allowed |= 1 << OVS_KEY_ATTR_UDP;
167 }
168
169 if (match->key->ip.proto == IPPROTO_SCTP) {
170 key_expected |= 1 << OVS_KEY_ATTR_SCTP;
171 if (match->mask && (match->mask->key.ip.proto == 0xff))
172 mask_allowed |= 1 << OVS_KEY_ATTR_SCTP;
173 }
174
175 if (match->key->ip.proto == IPPROTO_TCP) {
176 key_expected |= 1 << OVS_KEY_ATTR_TCP;
177 if (match->mask && (match->mask->key.ip.proto == 0xff))
178 mask_allowed |= 1 << OVS_KEY_ATTR_TCP;
179 }
180
181 if (match->key->ip.proto == IPPROTO_ICMP) {
182 key_expected |= 1 << OVS_KEY_ATTR_ICMP;
183 if (match->mask && (match->mask->key.ip.proto == 0xff))
184 mask_allowed |= 1 << OVS_KEY_ATTR_ICMP;
185 }
186 }
187 }
188
189 if (match->key->eth.type == htons(ETH_P_IPV6)) {
190 key_expected |= 1 << OVS_KEY_ATTR_IPV6;
191 if (match->mask && (match->mask->key.eth.type == htons(0xffff)))
192 mask_allowed |= 1 << OVS_KEY_ATTR_IPV6;
193
194 if (match->key->ip.frag != OVS_FRAG_TYPE_LATER) {
195 if (match->key->ip.proto == IPPROTO_UDP) {
196 key_expected |= 1 << OVS_KEY_ATTR_UDP;
197 if (match->mask && (match->mask->key.ip.proto == 0xff))
198 mask_allowed |= 1 << OVS_KEY_ATTR_UDP;
199 }
200
201 if (match->key->ip.proto == IPPROTO_SCTP) {
202 key_expected |= 1 << OVS_KEY_ATTR_SCTP;
203 if (match->mask && (match->mask->key.ip.proto == 0xff))
204 mask_allowed |= 1 << OVS_KEY_ATTR_SCTP;
205 }
206
207 if (match->key->ip.proto == IPPROTO_TCP) {
208 key_expected |= 1 << OVS_KEY_ATTR_TCP;
209 if (match->mask && (match->mask->key.ip.proto == 0xff))
210 mask_allowed |= 1 << OVS_KEY_ATTR_TCP;
211 }
212
213 if (match->key->ip.proto == IPPROTO_ICMPV6) {
214 key_expected |= 1 << OVS_KEY_ATTR_ICMPV6;
215 if (match->mask && (match->mask->key.ip.proto == 0xff))
216 mask_allowed |= 1 << OVS_KEY_ATTR_ICMPV6;
217
218 if (match->key->ipv6.tp.src ==
219 htons(NDISC_NEIGHBOUR_SOLICITATION) ||
220 match->key->ipv6.tp.src == htons(NDISC_NEIGHBOUR_ADVERTISEMENT)) {
221 key_expected |= 1 << OVS_KEY_ATTR_ND;
222 if (match->mask && (match->mask->key.ipv6.tp.src == htons(0xffff)))
223 mask_allowed |= 1 << OVS_KEY_ATTR_ND;
224 }
225 }
226 }
227 }
228
229 if ((key_attrs & key_expected) != key_expected) {
230 /* Key attributes check failed. */
231 OVS_NLERR("Missing expected key attributes (key_attrs=%llx, expected=%llx).\n",
232 key_attrs, key_expected);
233 return false;
234 }
235
236 if ((mask_attrs & mask_allowed) != mask_attrs) {
237 /* Mask attributes check failed. */
238 OVS_NLERR("Contain more than allowed mask fields (mask_attrs=%llx, mask_allowed=%llx).\n",
239 mask_attrs, mask_allowed);
240 return false;
241 }
242
243 return true;
244}
245
49static int check_header(struct sk_buff *skb, int len) 246static int check_header(struct sk_buff *skb, int len)
50{ 247{
51 if (unlikely(skb->len < len)) 248 if (unlikely(skb->len < len))
@@ -102,6 +299,12 @@ static bool udphdr_ok(struct sk_buff *skb)
102 sizeof(struct udphdr)); 299 sizeof(struct udphdr));
103} 300}
104 301
302static bool sctphdr_ok(struct sk_buff *skb)
303{
304 return pskb_may_pull(skb, skb_transport_offset(skb) +
305 sizeof(struct sctphdr));
306}
307
105static bool icmphdr_ok(struct sk_buff *skb) 308static bool icmphdr_ok(struct sk_buff *skb)
106{ 309{
107 return pskb_may_pull(skb, skb_transport_offset(skb) + 310 return pskb_may_pull(skb, skb_transport_offset(skb) +
@@ -121,12 +324,7 @@ u64 ovs_flow_used_time(unsigned long flow_jiffies)
121 return cur_ms - idle_ms; 324 return cur_ms - idle_ms;
122} 325}
123 326
124#define SW_FLOW_KEY_OFFSET(field) \ 327static int parse_ipv6hdr(struct sk_buff *skb, struct sw_flow_key *key)
125 (offsetof(struct sw_flow_key, field) + \
126 FIELD_SIZEOF(struct sw_flow_key, field))
127
128static int parse_ipv6hdr(struct sk_buff *skb, struct sw_flow_key *key,
129 int *key_lenp)
130{ 328{
131 unsigned int nh_ofs = skb_network_offset(skb); 329 unsigned int nh_ofs = skb_network_offset(skb);
132 unsigned int nh_len; 330 unsigned int nh_len;
@@ -136,8 +334,6 @@ static int parse_ipv6hdr(struct sk_buff *skb, struct sw_flow_key *key,
136 __be16 frag_off; 334 __be16 frag_off;
137 int err; 335 int err;
138 336
139 *key_lenp = SW_FLOW_KEY_OFFSET(ipv6.label);
140
141 err = check_header(skb, nh_ofs + sizeof(*nh)); 337 err = check_header(skb, nh_ofs + sizeof(*nh));
142 if (unlikely(err)) 338 if (unlikely(err))
143 return err; 339 return err;
@@ -176,6 +372,22 @@ static bool icmp6hdr_ok(struct sk_buff *skb)
176 sizeof(struct icmp6hdr)); 372 sizeof(struct icmp6hdr));
177} 373}
178 374
375void ovs_flow_key_mask(struct sw_flow_key *dst, const struct sw_flow_key *src,
376 const struct sw_flow_mask *mask)
377{
378 const long *m = (long *)((u8 *)&mask->key + mask->range.start);
379 const long *s = (long *)((u8 *)src + mask->range.start);
380 long *d = (long *)((u8 *)dst + mask->range.start);
381 int i;
382
383 /* The memory outside of the 'mask->range' are not set since
384 * further operations on 'dst' only uses contents within
385 * 'mask->range'.
386 */
387 for (i = 0; i < range_n_bytes(&mask->range); i += sizeof(long))
388 *d++ = *s++ & *m++;
389}
390
179#define TCP_FLAGS_OFFSET 13 391#define TCP_FLAGS_OFFSET 13
180#define TCP_FLAG_MASK 0x3f 392#define TCP_FLAG_MASK 0x3f
181 393
@@ -224,6 +436,7 @@ struct sw_flow *ovs_flow_alloc(void)
224 436
225 spin_lock_init(&flow->lock); 437 spin_lock_init(&flow->lock);
226 flow->sf_acts = NULL; 438 flow->sf_acts = NULL;
439 flow->mask = NULL;
227 440
228 return flow; 441 return flow;
229} 442}
@@ -263,7 +476,7 @@ static void free_buckets(struct flex_array *buckets)
263 flex_array_free(buckets); 476 flex_array_free(buckets);
264} 477}
265 478
266struct flow_table *ovs_flow_tbl_alloc(int new_size) 479static struct flow_table *__flow_tbl_alloc(int new_size)
267{ 480{
268 struct flow_table *table = kmalloc(sizeof(*table), GFP_KERNEL); 481 struct flow_table *table = kmalloc(sizeof(*table), GFP_KERNEL);
269 482
@@ -281,17 +494,15 @@ struct flow_table *ovs_flow_tbl_alloc(int new_size)
281 table->node_ver = 0; 494 table->node_ver = 0;
282 table->keep_flows = false; 495 table->keep_flows = false;
283 get_random_bytes(&table->hash_seed, sizeof(u32)); 496 get_random_bytes(&table->hash_seed, sizeof(u32));
497 table->mask_list = NULL;
284 498
285 return table; 499 return table;
286} 500}
287 501
288void ovs_flow_tbl_destroy(struct flow_table *table) 502static void __flow_tbl_destroy(struct flow_table *table)
289{ 503{
290 int i; 504 int i;
291 505
292 if (!table)
293 return;
294
295 if (table->keep_flows) 506 if (table->keep_flows)
296 goto skip_flows; 507 goto skip_flows;
297 508
@@ -302,32 +513,56 @@ void ovs_flow_tbl_destroy(struct flow_table *table)
302 int ver = table->node_ver; 513 int ver = table->node_ver;
303 514
304 hlist_for_each_entry_safe(flow, n, head, hash_node[ver]) { 515 hlist_for_each_entry_safe(flow, n, head, hash_node[ver]) {
305 hlist_del_rcu(&flow->hash_node[ver]); 516 hlist_del(&flow->hash_node[ver]);
306 ovs_flow_free(flow); 517 ovs_flow_free(flow, false);
307 } 518 }
308 } 519 }
309 520
521 BUG_ON(!list_empty(table->mask_list));
522 kfree(table->mask_list);
523
310skip_flows: 524skip_flows:
311 free_buckets(table->buckets); 525 free_buckets(table->buckets);
312 kfree(table); 526 kfree(table);
313} 527}
314 528
529struct flow_table *ovs_flow_tbl_alloc(int new_size)
530{
531 struct flow_table *table = __flow_tbl_alloc(new_size);
532
533 if (!table)
534 return NULL;
535
536 table->mask_list = kmalloc(sizeof(struct list_head), GFP_KERNEL);
537 if (!table->mask_list) {
538 table->keep_flows = true;
539 __flow_tbl_destroy(table);
540 return NULL;
541 }
542 INIT_LIST_HEAD(table->mask_list);
543
544 return table;
545}
546
315static void flow_tbl_destroy_rcu_cb(struct rcu_head *rcu) 547static void flow_tbl_destroy_rcu_cb(struct rcu_head *rcu)
316{ 548{
317 struct flow_table *table = container_of(rcu, struct flow_table, rcu); 549 struct flow_table *table = container_of(rcu, struct flow_table, rcu);
318 550
319 ovs_flow_tbl_destroy(table); 551 __flow_tbl_destroy(table);
320} 552}
321 553
322void ovs_flow_tbl_deferred_destroy(struct flow_table *table) 554void ovs_flow_tbl_destroy(struct flow_table *table, bool deferred)
323{ 555{
324 if (!table) 556 if (!table)
325 return; 557 return;
326 558
327 call_rcu(&table->rcu, flow_tbl_destroy_rcu_cb); 559 if (deferred)
560 call_rcu(&table->rcu, flow_tbl_destroy_rcu_cb);
561 else
562 __flow_tbl_destroy(table);
328} 563}
329 564
330struct sw_flow *ovs_flow_tbl_next(struct flow_table *table, u32 *bucket, u32 *last) 565struct sw_flow *ovs_flow_dump_next(struct flow_table *table, u32 *bucket, u32 *last)
331{ 566{
332 struct sw_flow *flow; 567 struct sw_flow *flow;
333 struct hlist_head *head; 568 struct hlist_head *head;
@@ -353,11 +588,13 @@ struct sw_flow *ovs_flow_tbl_next(struct flow_table *table, u32 *bucket, u32 *la
353 return NULL; 588 return NULL;
354} 589}
355 590
356static void __flow_tbl_insert(struct flow_table *table, struct sw_flow *flow) 591static void __tbl_insert(struct flow_table *table, struct sw_flow *flow)
357{ 592{
358 struct hlist_head *head; 593 struct hlist_head *head;
594
359 head = find_bucket(table, flow->hash); 595 head = find_bucket(table, flow->hash);
360 hlist_add_head_rcu(&flow->hash_node[table->node_ver], head); 596 hlist_add_head_rcu(&flow->hash_node[table->node_ver], head);
597
361 table->count++; 598 table->count++;
362} 599}
363 600
@@ -377,8 +614,10 @@ static void flow_table_copy_flows(struct flow_table *old, struct flow_table *new
377 head = flex_array_get(old->buckets, i); 614 head = flex_array_get(old->buckets, i);
378 615
379 hlist_for_each_entry(flow, head, hash_node[old_ver]) 616 hlist_for_each_entry(flow, head, hash_node[old_ver])
380 __flow_tbl_insert(new, flow); 617 __tbl_insert(new, flow);
381 } 618 }
619
620 new->mask_list = old->mask_list;
382 old->keep_flows = true; 621 old->keep_flows = true;
383} 622}
384 623
@@ -386,7 +625,7 @@ static struct flow_table *__flow_tbl_rehash(struct flow_table *table, int n_buck
386{ 625{
387 struct flow_table *new_table; 626 struct flow_table *new_table;
388 627
389 new_table = ovs_flow_tbl_alloc(n_buckets); 628 new_table = __flow_tbl_alloc(n_buckets);
390 if (!new_table) 629 if (!new_table)
391 return ERR_PTR(-ENOMEM); 630 return ERR_PTR(-ENOMEM);
392 631
@@ -405,28 +644,30 @@ struct flow_table *ovs_flow_tbl_expand(struct flow_table *table)
405 return __flow_tbl_rehash(table, table->n_buckets * 2); 644 return __flow_tbl_rehash(table, table->n_buckets * 2);
406} 645}
407 646
408void ovs_flow_free(struct sw_flow *flow) 647static void __flow_free(struct sw_flow *flow)
409{ 648{
410 if (unlikely(!flow))
411 return;
412
413 kfree((struct sf_flow_acts __force *)flow->sf_acts); 649 kfree((struct sf_flow_acts __force *)flow->sf_acts);
414 kmem_cache_free(flow_cache, flow); 650 kmem_cache_free(flow_cache, flow);
415} 651}
416 652
417/* RCU callback used by ovs_flow_deferred_free. */
418static void rcu_free_flow_callback(struct rcu_head *rcu) 653static void rcu_free_flow_callback(struct rcu_head *rcu)
419{ 654{
420 struct sw_flow *flow = container_of(rcu, struct sw_flow, rcu); 655 struct sw_flow *flow = container_of(rcu, struct sw_flow, rcu);
421 656
422 ovs_flow_free(flow); 657 __flow_free(flow);
423} 658}
424 659
425/* Schedules 'flow' to be freed after the next RCU grace period. 660void ovs_flow_free(struct sw_flow *flow, bool deferred)
426 * The caller must hold rcu_read_lock for this to be sensible. */
427void ovs_flow_deferred_free(struct sw_flow *flow)
428{ 661{
429 call_rcu(&flow->rcu, rcu_free_flow_callback); 662 if (!flow)
663 return;
664
665 ovs_sw_flow_mask_del_ref(flow->mask, deferred);
666
667 if (deferred)
668 call_rcu(&flow->rcu, rcu_free_flow_callback);
669 else
670 __flow_free(flow);
430} 671}
431 672
432/* Schedules 'sf_acts' to be freed after the next RCU grace period. 673/* Schedules 'sf_acts' to be freed after the next RCU grace period.
@@ -497,18 +738,15 @@ static __be16 parse_ethertype(struct sk_buff *skb)
497} 738}
498 739
499static int parse_icmpv6(struct sk_buff *skb, struct sw_flow_key *key, 740static int parse_icmpv6(struct sk_buff *skb, struct sw_flow_key *key,
500 int *key_lenp, int nh_len) 741 int nh_len)
501{ 742{
502 struct icmp6hdr *icmp = icmp6_hdr(skb); 743 struct icmp6hdr *icmp = icmp6_hdr(skb);
503 int error = 0;
504 int key_len;
505 744
506 /* The ICMPv6 type and code fields use the 16-bit transport port 745 /* The ICMPv6 type and code fields use the 16-bit transport port
507 * fields, so we need to store them in 16-bit network byte order. 746 * fields, so we need to store them in 16-bit network byte order.
508 */ 747 */
509 key->ipv6.tp.src = htons(icmp->icmp6_type); 748 key->ipv6.tp.src = htons(icmp->icmp6_type);
510 key->ipv6.tp.dst = htons(icmp->icmp6_code); 749 key->ipv6.tp.dst = htons(icmp->icmp6_code);
511 key_len = SW_FLOW_KEY_OFFSET(ipv6.tp);
512 750
513 if (icmp->icmp6_code == 0 && 751 if (icmp->icmp6_code == 0 &&
514 (icmp->icmp6_type == NDISC_NEIGHBOUR_SOLICITATION || 752 (icmp->icmp6_type == NDISC_NEIGHBOUR_SOLICITATION ||
@@ -517,21 +755,17 @@ static int parse_icmpv6(struct sk_buff *skb, struct sw_flow_key *key,
517 struct nd_msg *nd; 755 struct nd_msg *nd;
518 int offset; 756 int offset;
519 757
520 key_len = SW_FLOW_KEY_OFFSET(ipv6.nd);
521
522 /* In order to process neighbor discovery options, we need the 758 /* In order to process neighbor discovery options, we need the
523 * entire packet. 759 * entire packet.
524 */ 760 */
525 if (unlikely(icmp_len < sizeof(*nd))) 761 if (unlikely(icmp_len < sizeof(*nd)))
526 goto out; 762 return 0;
527 if (unlikely(skb_linearize(skb))) { 763
528 error = -ENOMEM; 764 if (unlikely(skb_linearize(skb)))
529 goto out; 765 return -ENOMEM;
530 }
531 766
532 nd = (struct nd_msg *)skb_transport_header(skb); 767 nd = (struct nd_msg *)skb_transport_header(skb);
533 key->ipv6.nd.target = nd->target; 768 key->ipv6.nd.target = nd->target;
534 key_len = SW_FLOW_KEY_OFFSET(ipv6.nd);
535 769
536 icmp_len -= sizeof(*nd); 770 icmp_len -= sizeof(*nd);
537 offset = 0; 771 offset = 0;
@@ -541,7 +775,7 @@ static int parse_icmpv6(struct sk_buff *skb, struct sw_flow_key *key,
541 int opt_len = nd_opt->nd_opt_len * 8; 775 int opt_len = nd_opt->nd_opt_len * 8;
542 776
543 if (unlikely(!opt_len || opt_len > icmp_len)) 777 if (unlikely(!opt_len || opt_len > icmp_len))
544 goto invalid; 778 return 0;
545 779
546 /* Store the link layer address if the appropriate 780 /* Store the link layer address if the appropriate
547 * option is provided. It is considered an error if 781 * option is provided. It is considered an error if
@@ -566,16 +800,14 @@ static int parse_icmpv6(struct sk_buff *skb, struct sw_flow_key *key,
566 } 800 }
567 } 801 }
568 802
569 goto out; 803 return 0;
570 804
571invalid: 805invalid:
572 memset(&key->ipv6.nd.target, 0, sizeof(key->ipv6.nd.target)); 806 memset(&key->ipv6.nd.target, 0, sizeof(key->ipv6.nd.target));
573 memset(key->ipv6.nd.sll, 0, sizeof(key->ipv6.nd.sll)); 807 memset(key->ipv6.nd.sll, 0, sizeof(key->ipv6.nd.sll));
574 memset(key->ipv6.nd.tll, 0, sizeof(key->ipv6.nd.tll)); 808 memset(key->ipv6.nd.tll, 0, sizeof(key->ipv6.nd.tll));
575 809
576out: 810 return 0;
577 *key_lenp = key_len;
578 return error;
579} 811}
580 812
581/** 813/**
@@ -584,7 +816,6 @@ out:
584 * Ethernet header 816 * Ethernet header
585 * @in_port: port number on which @skb was received. 817 * @in_port: port number on which @skb was received.
586 * @key: output flow key 818 * @key: output flow key
587 * @key_lenp: length of output flow key
588 * 819 *
589 * The caller must ensure that skb->len >= ETH_HLEN. 820 * The caller must ensure that skb->len >= ETH_HLEN.
590 * 821 *
@@ -602,11 +833,9 @@ out:
602 * of a correct length, otherwise the same as skb->network_header. 833 * of a correct length, otherwise the same as skb->network_header.
603 * For other key->eth.type values it is left untouched. 834 * For other key->eth.type values it is left untouched.
604 */ 835 */
605int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key, 836int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key)
606 int *key_lenp)
607{ 837{
608 int error = 0; 838 int error;
609 int key_len = SW_FLOW_KEY_OFFSET(eth);
610 struct ethhdr *eth; 839 struct ethhdr *eth;
611 840
612 memset(key, 0, sizeof(*key)); 841 memset(key, 0, sizeof(*key));
@@ -649,15 +878,13 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key,
649 struct iphdr *nh; 878 struct iphdr *nh;
650 __be16 offset; 879 __be16 offset;
651 880
652 key_len = SW_FLOW_KEY_OFFSET(ipv4.addr);
653
654 error = check_iphdr(skb); 881 error = check_iphdr(skb);
655 if (unlikely(error)) { 882 if (unlikely(error)) {
656 if (error == -EINVAL) { 883 if (error == -EINVAL) {
657 skb->transport_header = skb->network_header; 884 skb->transport_header = skb->network_header;
658 error = 0; 885 error = 0;
659 } 886 }
660 goto out; 887 return error;
661 } 888 }
662 889
663 nh = ip_hdr(skb); 890 nh = ip_hdr(skb);
@@ -671,7 +898,7 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key,
671 offset = nh->frag_off & htons(IP_OFFSET); 898 offset = nh->frag_off & htons(IP_OFFSET);
672 if (offset) { 899 if (offset) {
673 key->ip.frag = OVS_FRAG_TYPE_LATER; 900 key->ip.frag = OVS_FRAG_TYPE_LATER;
674 goto out; 901 return 0;
675 } 902 }
676 if (nh->frag_off & htons(IP_MF) || 903 if (nh->frag_off & htons(IP_MF) ||
677 skb_shinfo(skb)->gso_type & SKB_GSO_UDP) 904 skb_shinfo(skb)->gso_type & SKB_GSO_UDP)
@@ -679,21 +906,24 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key,
679 906
680 /* Transport layer. */ 907 /* Transport layer. */
681 if (key->ip.proto == IPPROTO_TCP) { 908 if (key->ip.proto == IPPROTO_TCP) {
682 key_len = SW_FLOW_KEY_OFFSET(ipv4.tp);
683 if (tcphdr_ok(skb)) { 909 if (tcphdr_ok(skb)) {
684 struct tcphdr *tcp = tcp_hdr(skb); 910 struct tcphdr *tcp = tcp_hdr(skb);
685 key->ipv4.tp.src = tcp->source; 911 key->ipv4.tp.src = tcp->source;
686 key->ipv4.tp.dst = tcp->dest; 912 key->ipv4.tp.dst = tcp->dest;
687 } 913 }
688 } else if (key->ip.proto == IPPROTO_UDP) { 914 } else if (key->ip.proto == IPPROTO_UDP) {
689 key_len = SW_FLOW_KEY_OFFSET(ipv4.tp);
690 if (udphdr_ok(skb)) { 915 if (udphdr_ok(skb)) {
691 struct udphdr *udp = udp_hdr(skb); 916 struct udphdr *udp = udp_hdr(skb);
692 key->ipv4.tp.src = udp->source; 917 key->ipv4.tp.src = udp->source;
693 key->ipv4.tp.dst = udp->dest; 918 key->ipv4.tp.dst = udp->dest;
694 } 919 }
920 } else if (key->ip.proto == IPPROTO_SCTP) {
921 if (sctphdr_ok(skb)) {
922 struct sctphdr *sctp = sctp_hdr(skb);
923 key->ipv4.tp.src = sctp->source;
924 key->ipv4.tp.dst = sctp->dest;
925 }
695 } else if (key->ip.proto == IPPROTO_ICMP) { 926 } else if (key->ip.proto == IPPROTO_ICMP) {
696 key_len = SW_FLOW_KEY_OFFSET(ipv4.tp);
697 if (icmphdr_ok(skb)) { 927 if (icmphdr_ok(skb)) {
698 struct icmphdr *icmp = icmp_hdr(skb); 928 struct icmphdr *icmp = icmp_hdr(skb);
699 /* The ICMP type and code fields use the 16-bit 929 /* The ICMP type and code fields use the 16-bit
@@ -722,102 +952,175 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key,
722 memcpy(&key->ipv4.addr.dst, arp->ar_tip, sizeof(key->ipv4.addr.dst)); 952 memcpy(&key->ipv4.addr.dst, arp->ar_tip, sizeof(key->ipv4.addr.dst));
723 memcpy(key->ipv4.arp.sha, arp->ar_sha, ETH_ALEN); 953 memcpy(key->ipv4.arp.sha, arp->ar_sha, ETH_ALEN);
724 memcpy(key->ipv4.arp.tha, arp->ar_tha, ETH_ALEN); 954 memcpy(key->ipv4.arp.tha, arp->ar_tha, ETH_ALEN);
725 key_len = SW_FLOW_KEY_OFFSET(ipv4.arp);
726 } 955 }
727 } else if (key->eth.type == htons(ETH_P_IPV6)) { 956 } else if (key->eth.type == htons(ETH_P_IPV6)) {
728 int nh_len; /* IPv6 Header + Extensions */ 957 int nh_len; /* IPv6 Header + Extensions */
729 958
730 nh_len = parse_ipv6hdr(skb, key, &key_len); 959 nh_len = parse_ipv6hdr(skb, key);
731 if (unlikely(nh_len < 0)) { 960 if (unlikely(nh_len < 0)) {
732 if (nh_len == -EINVAL) 961 if (nh_len == -EINVAL) {
733 skb->transport_header = skb->network_header; 962 skb->transport_header = skb->network_header;
734 else 963 error = 0;
964 } else {
735 error = nh_len; 965 error = nh_len;
736 goto out; 966 }
967 return error;
737 } 968 }
738 969
739 if (key->ip.frag == OVS_FRAG_TYPE_LATER) 970 if (key->ip.frag == OVS_FRAG_TYPE_LATER)
740 goto out; 971 return 0;
741 if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP) 972 if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP)
742 key->ip.frag = OVS_FRAG_TYPE_FIRST; 973 key->ip.frag = OVS_FRAG_TYPE_FIRST;
743 974
744 /* Transport layer. */ 975 /* Transport layer. */
745 if (key->ip.proto == NEXTHDR_TCP) { 976 if (key->ip.proto == NEXTHDR_TCP) {
746 key_len = SW_FLOW_KEY_OFFSET(ipv6.tp);
747 if (tcphdr_ok(skb)) { 977 if (tcphdr_ok(skb)) {
748 struct tcphdr *tcp = tcp_hdr(skb); 978 struct tcphdr *tcp = tcp_hdr(skb);
749 key->ipv6.tp.src = tcp->source; 979 key->ipv6.tp.src = tcp->source;
750 key->ipv6.tp.dst = tcp->dest; 980 key->ipv6.tp.dst = tcp->dest;
751 } 981 }
752 } else if (key->ip.proto == NEXTHDR_UDP) { 982 } else if (key->ip.proto == NEXTHDR_UDP) {
753 key_len = SW_FLOW_KEY_OFFSET(ipv6.tp);
754 if (udphdr_ok(skb)) { 983 if (udphdr_ok(skb)) {
755 struct udphdr *udp = udp_hdr(skb); 984 struct udphdr *udp = udp_hdr(skb);
756 key->ipv6.tp.src = udp->source; 985 key->ipv6.tp.src = udp->source;
757 key->ipv6.tp.dst = udp->dest; 986 key->ipv6.tp.dst = udp->dest;
758 } 987 }
988 } else if (key->ip.proto == NEXTHDR_SCTP) {
989 if (sctphdr_ok(skb)) {
990 struct sctphdr *sctp = sctp_hdr(skb);
991 key->ipv6.tp.src = sctp->source;
992 key->ipv6.tp.dst = sctp->dest;
993 }
759 } else if (key->ip.proto == NEXTHDR_ICMP) { 994 } else if (key->ip.proto == NEXTHDR_ICMP) {
760 key_len = SW_FLOW_KEY_OFFSET(ipv6.tp);
761 if (icmp6hdr_ok(skb)) { 995 if (icmp6hdr_ok(skb)) {
762 error = parse_icmpv6(skb, key, &key_len, nh_len); 996 error = parse_icmpv6(skb, key, nh_len);
763 if (error < 0) 997 if (error)
764 goto out; 998 return error;
765 } 999 }
766 } 1000 }
767 } 1001 }
768 1002
769out: 1003 return 0;
770 *key_lenp = key_len;
771 return error;
772} 1004}
773 1005
774static u32 ovs_flow_hash(const struct sw_flow_key *key, int key_start, int key_len) 1006static u32 ovs_flow_hash(const struct sw_flow_key *key, int key_start,
1007 int key_end)
775{ 1008{
776 return jhash2((u32 *)((u8 *)key + key_start), 1009 u32 *hash_key = (u32 *)((u8 *)key + key_start);
777 DIV_ROUND_UP(key_len - key_start, sizeof(u32)), 0); 1010 int hash_u32s = (key_end - key_start) >> 2;
1011
1012 /* Make sure number of hash bytes are multiple of u32. */
1013 BUILD_BUG_ON(sizeof(long) % sizeof(u32));
1014
1015 return jhash2(hash_key, hash_u32s, 0);
778} 1016}
779 1017
780static int flow_key_start(struct sw_flow_key *key) 1018static int flow_key_start(const struct sw_flow_key *key)
781{ 1019{
782 if (key->tun_key.ipv4_dst) 1020 if (key->tun_key.ipv4_dst)
783 return 0; 1021 return 0;
784 else 1022 else
785 return offsetof(struct sw_flow_key, phy); 1023 return rounddown(offsetof(struct sw_flow_key, phy),
1024 sizeof(long));
1025}
1026
1027static bool __cmp_key(const struct sw_flow_key *key1,
1028 const struct sw_flow_key *key2, int key_start, int key_end)
1029{
1030 const long *cp1 = (long *)((u8 *)key1 + key_start);
1031 const long *cp2 = (long *)((u8 *)key2 + key_start);
1032 long diffs = 0;
1033 int i;
1034
1035 for (i = key_start; i < key_end; i += sizeof(long))
1036 diffs |= *cp1++ ^ *cp2++;
1037
1038 return diffs == 0;
786} 1039}
787 1040
788struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *table, 1041static bool __flow_cmp_masked_key(const struct sw_flow *flow,
789 struct sw_flow_key *key, int key_len) 1042 const struct sw_flow_key *key, int key_start, int key_end)
1043{
1044 return __cmp_key(&flow->key, key, key_start, key_end);
1045}
1046
1047static bool __flow_cmp_unmasked_key(const struct sw_flow *flow,
1048 const struct sw_flow_key *key, int key_start, int key_end)
1049{
1050 return __cmp_key(&flow->unmasked_key, key, key_start, key_end);
1051}
1052
1053bool ovs_flow_cmp_unmasked_key(const struct sw_flow *flow,
1054 const struct sw_flow_key *key, int key_end)
1055{
1056 int key_start;
1057 key_start = flow_key_start(key);
1058
1059 return __flow_cmp_unmasked_key(flow, key, key_start, key_end);
1060
1061}
1062
1063struct sw_flow *ovs_flow_lookup_unmasked_key(struct flow_table *table,
1064 struct sw_flow_match *match)
1065{
1066 struct sw_flow_key *unmasked = match->key;
1067 int key_end = match->range.end;
1068 struct sw_flow *flow;
1069
1070 flow = ovs_flow_lookup(table, unmasked);
1071 if (flow && (!ovs_flow_cmp_unmasked_key(flow, unmasked, key_end)))
1072 flow = NULL;
1073
1074 return flow;
1075}
1076
1077static struct sw_flow *ovs_masked_flow_lookup(struct flow_table *table,
1078 const struct sw_flow_key *unmasked,
1079 struct sw_flow_mask *mask)
790{ 1080{
791 struct sw_flow *flow; 1081 struct sw_flow *flow;
792 struct hlist_head *head; 1082 struct hlist_head *head;
793 u8 *_key; 1083 int key_start = mask->range.start;
794 int key_start; 1084 int key_end = mask->range.end;
795 u32 hash; 1085 u32 hash;
1086 struct sw_flow_key masked_key;
796 1087
797 key_start = flow_key_start(key); 1088 ovs_flow_key_mask(&masked_key, unmasked, mask);
798 hash = ovs_flow_hash(key, key_start, key_len); 1089 hash = ovs_flow_hash(&masked_key, key_start, key_end);
799
800 _key = (u8 *) key + key_start;
801 head = find_bucket(table, hash); 1090 head = find_bucket(table, hash);
802 hlist_for_each_entry_rcu(flow, head, hash_node[table->node_ver]) { 1091 hlist_for_each_entry_rcu(flow, head, hash_node[table->node_ver]) {
803 1092 if (flow->mask == mask &&
804 if (flow->hash == hash && 1093 __flow_cmp_masked_key(flow, &masked_key,
805 !memcmp((u8 *)&flow->key + key_start, _key, key_len - key_start)) { 1094 key_start, key_end))
806 return flow; 1095 return flow;
807 }
808 } 1096 }
809 return NULL; 1097 return NULL;
810} 1098}
811 1099
812void ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow, 1100struct sw_flow *ovs_flow_lookup(struct flow_table *tbl,
813 struct sw_flow_key *key, int key_len) 1101 const struct sw_flow_key *key)
1102{
1103 struct sw_flow *flow = NULL;
1104 struct sw_flow_mask *mask;
1105
1106 list_for_each_entry_rcu(mask, tbl->mask_list, list) {
1107 flow = ovs_masked_flow_lookup(tbl, key, mask);
1108 if (flow) /* Found */
1109 break;
1110 }
1111
1112 return flow;
1113}
1114
1115
1116void ovs_flow_insert(struct flow_table *table, struct sw_flow *flow)
814{ 1117{
815 flow->hash = ovs_flow_hash(key, flow_key_start(key), key_len); 1118 flow->hash = ovs_flow_hash(&flow->key, flow->mask->range.start,
816 memcpy(&flow->key, key, sizeof(flow->key)); 1119 flow->mask->range.end);
817 __flow_tbl_insert(table, flow); 1120 __tbl_insert(table, flow);
818} 1121}
819 1122
820void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow) 1123void ovs_flow_remove(struct flow_table *table, struct sw_flow *flow)
821{ 1124{
822 BUG_ON(table->count == 0); 1125 BUG_ON(table->count == 0);
823 hlist_del_rcu(&flow->hash_node[table->node_ver]); 1126 hlist_del_rcu(&flow->hash_node[table->node_ver]);
@@ -837,6 +1140,7 @@ const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = {
837 [OVS_KEY_ATTR_IPV6] = sizeof(struct ovs_key_ipv6), 1140 [OVS_KEY_ATTR_IPV6] = sizeof(struct ovs_key_ipv6),
838 [OVS_KEY_ATTR_TCP] = sizeof(struct ovs_key_tcp), 1141 [OVS_KEY_ATTR_TCP] = sizeof(struct ovs_key_tcp),
839 [OVS_KEY_ATTR_UDP] = sizeof(struct ovs_key_udp), 1142 [OVS_KEY_ATTR_UDP] = sizeof(struct ovs_key_udp),
1143 [OVS_KEY_ATTR_SCTP] = sizeof(struct ovs_key_sctp),
840 [OVS_KEY_ATTR_ICMP] = sizeof(struct ovs_key_icmp), 1144 [OVS_KEY_ATTR_ICMP] = sizeof(struct ovs_key_icmp),
841 [OVS_KEY_ATTR_ICMPV6] = sizeof(struct ovs_key_icmpv6), 1145 [OVS_KEY_ATTR_ICMPV6] = sizeof(struct ovs_key_icmpv6),
842 [OVS_KEY_ATTR_ARP] = sizeof(struct ovs_key_arp), 1146 [OVS_KEY_ATTR_ARP] = sizeof(struct ovs_key_arp),
@@ -844,149 +1148,85 @@ const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = {
844 [OVS_KEY_ATTR_TUNNEL] = -1, 1148 [OVS_KEY_ATTR_TUNNEL] = -1,
845}; 1149};
846 1150
847static int ipv4_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_len, 1151static bool is_all_zero(const u8 *fp, size_t size)
848 const struct nlattr *a[], u32 *attrs)
849{ 1152{
850 const struct ovs_key_icmp *icmp_key; 1153 int i;
851 const struct ovs_key_tcp *tcp_key;
852 const struct ovs_key_udp *udp_key;
853
854 switch (swkey->ip.proto) {
855 case IPPROTO_TCP:
856 if (!(*attrs & (1 << OVS_KEY_ATTR_TCP)))
857 return -EINVAL;
858 *attrs &= ~(1 << OVS_KEY_ATTR_TCP);
859
860 *key_len = SW_FLOW_KEY_OFFSET(ipv4.tp);
861 tcp_key = nla_data(a[OVS_KEY_ATTR_TCP]);
862 swkey->ipv4.tp.src = tcp_key->tcp_src;
863 swkey->ipv4.tp.dst = tcp_key->tcp_dst;
864 break;
865
866 case IPPROTO_UDP:
867 if (!(*attrs & (1 << OVS_KEY_ATTR_UDP)))
868 return -EINVAL;
869 *attrs &= ~(1 << OVS_KEY_ATTR_UDP);
870
871 *key_len = SW_FLOW_KEY_OFFSET(ipv4.tp);
872 udp_key = nla_data(a[OVS_KEY_ATTR_UDP]);
873 swkey->ipv4.tp.src = udp_key->udp_src;
874 swkey->ipv4.tp.dst = udp_key->udp_dst;
875 break;
876
877 case IPPROTO_ICMP:
878 if (!(*attrs & (1 << OVS_KEY_ATTR_ICMP)))
879 return -EINVAL;
880 *attrs &= ~(1 << OVS_KEY_ATTR_ICMP);
881
882 *key_len = SW_FLOW_KEY_OFFSET(ipv4.tp);
883 icmp_key = nla_data(a[OVS_KEY_ATTR_ICMP]);
884 swkey->ipv4.tp.src = htons(icmp_key->icmp_type);
885 swkey->ipv4.tp.dst = htons(icmp_key->icmp_code);
886 break;
887 }
888
889 return 0;
890}
891
892static int ipv6_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_len,
893 const struct nlattr *a[], u32 *attrs)
894{
895 const struct ovs_key_icmpv6 *icmpv6_key;
896 const struct ovs_key_tcp *tcp_key;
897 const struct ovs_key_udp *udp_key;
898
899 switch (swkey->ip.proto) {
900 case IPPROTO_TCP:
901 if (!(*attrs & (1 << OVS_KEY_ATTR_TCP)))
902 return -EINVAL;
903 *attrs &= ~(1 << OVS_KEY_ATTR_TCP);
904
905 *key_len = SW_FLOW_KEY_OFFSET(ipv6.tp);
906 tcp_key = nla_data(a[OVS_KEY_ATTR_TCP]);
907 swkey->ipv6.tp.src = tcp_key->tcp_src;
908 swkey->ipv6.tp.dst = tcp_key->tcp_dst;
909 break;
910
911 case IPPROTO_UDP:
912 if (!(*attrs & (1 << OVS_KEY_ATTR_UDP)))
913 return -EINVAL;
914 *attrs &= ~(1 << OVS_KEY_ATTR_UDP);
915
916 *key_len = SW_FLOW_KEY_OFFSET(ipv6.tp);
917 udp_key = nla_data(a[OVS_KEY_ATTR_UDP]);
918 swkey->ipv6.tp.src = udp_key->udp_src;
919 swkey->ipv6.tp.dst = udp_key->udp_dst;
920 break;
921
922 case IPPROTO_ICMPV6:
923 if (!(*attrs & (1 << OVS_KEY_ATTR_ICMPV6)))
924 return -EINVAL;
925 *attrs &= ~(1 << OVS_KEY_ATTR_ICMPV6);
926
927 *key_len = SW_FLOW_KEY_OFFSET(ipv6.tp);
928 icmpv6_key = nla_data(a[OVS_KEY_ATTR_ICMPV6]);
929 swkey->ipv6.tp.src = htons(icmpv6_key->icmpv6_type);
930 swkey->ipv6.tp.dst = htons(icmpv6_key->icmpv6_code);
931 1154
932 if (swkey->ipv6.tp.src == htons(NDISC_NEIGHBOUR_SOLICITATION) || 1155 if (!fp)
933 swkey->ipv6.tp.src == htons(NDISC_NEIGHBOUR_ADVERTISEMENT)) { 1156 return false;
934 const struct ovs_key_nd *nd_key;
935 1157
936 if (!(*attrs & (1 << OVS_KEY_ATTR_ND))) 1158 for (i = 0; i < size; i++)
937 return -EINVAL; 1159 if (fp[i])
938 *attrs &= ~(1 << OVS_KEY_ATTR_ND); 1160 return false;
939
940 *key_len = SW_FLOW_KEY_OFFSET(ipv6.nd);
941 nd_key = nla_data(a[OVS_KEY_ATTR_ND]);
942 memcpy(&swkey->ipv6.nd.target, nd_key->nd_target,
943 sizeof(swkey->ipv6.nd.target));
944 memcpy(swkey->ipv6.nd.sll, nd_key->nd_sll, ETH_ALEN);
945 memcpy(swkey->ipv6.nd.tll, nd_key->nd_tll, ETH_ALEN);
946 }
947 break;
948 }
949 1161
950 return 0; 1162 return true;
951} 1163}
952 1164
953static int parse_flow_nlattrs(const struct nlattr *attr, 1165static int __parse_flow_nlattrs(const struct nlattr *attr,
954 const struct nlattr *a[], u32 *attrsp) 1166 const struct nlattr *a[],
1167 u64 *attrsp, bool nz)
955{ 1168{
956 const struct nlattr *nla; 1169 const struct nlattr *nla;
957 u32 attrs; 1170 u32 attrs;
958 int rem; 1171 int rem;
959 1172
960 attrs = 0; 1173 attrs = *attrsp;
961 nla_for_each_nested(nla, attr, rem) { 1174 nla_for_each_nested(nla, attr, rem) {
962 u16 type = nla_type(nla); 1175 u16 type = nla_type(nla);
963 int expected_len; 1176 int expected_len;
964 1177
965 if (type > OVS_KEY_ATTR_MAX || attrs & (1 << type)) 1178 if (type > OVS_KEY_ATTR_MAX) {
1179 OVS_NLERR("Unknown key attribute (type=%d, max=%d).\n",
1180 type, OVS_KEY_ATTR_MAX);
966 return -EINVAL; 1181 return -EINVAL;
1182 }
1183
1184 if (attrs & (1 << type)) {
1185 OVS_NLERR("Duplicate key attribute (type %d).\n", type);
1186 return -EINVAL;
1187 }
967 1188
968 expected_len = ovs_key_lens[type]; 1189 expected_len = ovs_key_lens[type];
969 if (nla_len(nla) != expected_len && expected_len != -1) 1190 if (nla_len(nla) != expected_len && expected_len != -1) {
1191 OVS_NLERR("Key attribute has unexpected length (type=%d"
1192 ", length=%d, expected=%d).\n", type,
1193 nla_len(nla), expected_len);
970 return -EINVAL; 1194 return -EINVAL;
1195 }
971 1196
972 attrs |= 1 << type; 1197 if (!nz || !is_all_zero(nla_data(nla), expected_len)) {
973 a[type] = nla; 1198 attrs |= 1 << type;
1199 a[type] = nla;
1200 }
974 } 1201 }
975 if (rem) 1202 if (rem) {
1203 OVS_NLERR("Message has %d unknown bytes.\n", rem);
976 return -EINVAL; 1204 return -EINVAL;
1205 }
977 1206
978 *attrsp = attrs; 1207 *attrsp = attrs;
979 return 0; 1208 return 0;
980} 1209}
981 1210
1211static int parse_flow_mask_nlattrs(const struct nlattr *attr,
1212 const struct nlattr *a[], u64 *attrsp)
1213{
1214 return __parse_flow_nlattrs(attr, a, attrsp, true);
1215}
1216
1217static int parse_flow_nlattrs(const struct nlattr *attr,
1218 const struct nlattr *a[], u64 *attrsp)
1219{
1220 return __parse_flow_nlattrs(attr, a, attrsp, false);
1221}
1222
982int ovs_ipv4_tun_from_nlattr(const struct nlattr *attr, 1223int ovs_ipv4_tun_from_nlattr(const struct nlattr *attr,
983 struct ovs_key_ipv4_tunnel *tun_key) 1224 struct sw_flow_match *match, bool is_mask)
984{ 1225{
985 struct nlattr *a; 1226 struct nlattr *a;
986 int rem; 1227 int rem;
987 bool ttl = false; 1228 bool ttl = false;
988 1229 __be16 tun_flags = 0;
989 memset(tun_key, 0, sizeof(*tun_key));
990 1230
991 nla_for_each_nested(a, attr, rem) { 1231 nla_for_each_nested(a, attr, rem) {
992 int type = nla_type(a); 1232 int type = nla_type(a);
@@ -1000,53 +1240,78 @@ int ovs_ipv4_tun_from_nlattr(const struct nlattr *attr,
1000 [OVS_TUNNEL_KEY_ATTR_CSUM] = 0, 1240 [OVS_TUNNEL_KEY_ATTR_CSUM] = 0,
1001 }; 1241 };
1002 1242
1003 if (type > OVS_TUNNEL_KEY_ATTR_MAX || 1243 if (type > OVS_TUNNEL_KEY_ATTR_MAX) {
1004 ovs_tunnel_key_lens[type] != nla_len(a)) 1244 OVS_NLERR("Unknown IPv4 tunnel attribute (type=%d, max=%d).\n",
1245 type, OVS_TUNNEL_KEY_ATTR_MAX);
1005 return -EINVAL; 1246 return -EINVAL;
1247 }
1248
1249 if (ovs_tunnel_key_lens[type] != nla_len(a)) {
1250 OVS_NLERR("IPv4 tunnel attribute type has unexpected "
1251 " length (type=%d, length=%d, expected=%d).\n",
1252 type, nla_len(a), ovs_tunnel_key_lens[type]);
1253 return -EINVAL;
1254 }
1006 1255
1007 switch (type) { 1256 switch (type) {
1008 case OVS_TUNNEL_KEY_ATTR_ID: 1257 case OVS_TUNNEL_KEY_ATTR_ID:
1009 tun_key->tun_id = nla_get_be64(a); 1258 SW_FLOW_KEY_PUT(match, tun_key.tun_id,
1010 tun_key->tun_flags |= TUNNEL_KEY; 1259 nla_get_be64(a), is_mask);
1260 tun_flags |= TUNNEL_KEY;
1011 break; 1261 break;
1012 case OVS_TUNNEL_KEY_ATTR_IPV4_SRC: 1262 case OVS_TUNNEL_KEY_ATTR_IPV4_SRC:
1013 tun_key->ipv4_src = nla_get_be32(a); 1263 SW_FLOW_KEY_PUT(match, tun_key.ipv4_src,
1264 nla_get_be32(a), is_mask);
1014 break; 1265 break;
1015 case OVS_TUNNEL_KEY_ATTR_IPV4_DST: 1266 case OVS_TUNNEL_KEY_ATTR_IPV4_DST:
1016 tun_key->ipv4_dst = nla_get_be32(a); 1267 SW_FLOW_KEY_PUT(match, tun_key.ipv4_dst,
1268 nla_get_be32(a), is_mask);
1017 break; 1269 break;
1018 case OVS_TUNNEL_KEY_ATTR_TOS: 1270 case OVS_TUNNEL_KEY_ATTR_TOS:
1019 tun_key->ipv4_tos = nla_get_u8(a); 1271 SW_FLOW_KEY_PUT(match, tun_key.ipv4_tos,
1272 nla_get_u8(a), is_mask);
1020 break; 1273 break;
1021 case OVS_TUNNEL_KEY_ATTR_TTL: 1274 case OVS_TUNNEL_KEY_ATTR_TTL:
1022 tun_key->ipv4_ttl = nla_get_u8(a); 1275 SW_FLOW_KEY_PUT(match, tun_key.ipv4_ttl,
1276 nla_get_u8(a), is_mask);
1023 ttl = true; 1277 ttl = true;
1024 break; 1278 break;
1025 case OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT: 1279 case OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT:
1026 tun_key->tun_flags |= TUNNEL_DONT_FRAGMENT; 1280 tun_flags |= TUNNEL_DONT_FRAGMENT;
1027 break; 1281 break;
1028 case OVS_TUNNEL_KEY_ATTR_CSUM: 1282 case OVS_TUNNEL_KEY_ATTR_CSUM:
1029 tun_key->tun_flags |= TUNNEL_CSUM; 1283 tun_flags |= TUNNEL_CSUM;
1030 break; 1284 break;
1031 default: 1285 default:
1032 return -EINVAL; 1286 return -EINVAL;
1033
1034 } 1287 }
1035 } 1288 }
1036 if (rem > 0)
1037 return -EINVAL;
1038 1289
1039 if (!tun_key->ipv4_dst) 1290 SW_FLOW_KEY_PUT(match, tun_key.tun_flags, tun_flags, is_mask);
1040 return -EINVAL;
1041 1291
1042 if (!ttl) 1292 if (rem > 0) {
1293 OVS_NLERR("IPv4 tunnel attribute has %d unknown bytes.\n", rem);
1043 return -EINVAL; 1294 return -EINVAL;
1295 }
1296
1297 if (!is_mask) {
1298 if (!match->key->tun_key.ipv4_dst) {
1299 OVS_NLERR("IPv4 tunnel destination address is zero.\n");
1300 return -EINVAL;
1301 }
1302
1303 if (!ttl) {
1304 OVS_NLERR("IPv4 tunnel TTL not specified.\n");
1305 return -EINVAL;
1306 }
1307 }
1044 1308
1045 return 0; 1309 return 0;
1046} 1310}
1047 1311
1048int ovs_ipv4_tun_to_nlattr(struct sk_buff *skb, 1312int ovs_ipv4_tun_to_nlattr(struct sk_buff *skb,
1049 const struct ovs_key_ipv4_tunnel *tun_key) 1313 const struct ovs_key_ipv4_tunnel *tun_key,
1314 const struct ovs_key_ipv4_tunnel *output)
1050{ 1315{
1051 struct nlattr *nla; 1316 struct nlattr *nla;
1052 1317
@@ -1054,23 +1319,24 @@ int ovs_ipv4_tun_to_nlattr(struct sk_buff *skb,
1054 if (!nla) 1319 if (!nla)
1055 return -EMSGSIZE; 1320 return -EMSGSIZE;
1056 1321
1057 if (tun_key->tun_flags & TUNNEL_KEY && 1322 if (output->tun_flags & TUNNEL_KEY &&
1058 nla_put_be64(skb, OVS_TUNNEL_KEY_ATTR_ID, tun_key->tun_id)) 1323 nla_put_be64(skb, OVS_TUNNEL_KEY_ATTR_ID, output->tun_id))
1059 return -EMSGSIZE; 1324 return -EMSGSIZE;
1060 if (tun_key->ipv4_src && 1325 if (output->ipv4_src &&
1061 nla_put_be32(skb, OVS_TUNNEL_KEY_ATTR_IPV4_SRC, tun_key->ipv4_src)) 1326 nla_put_be32(skb, OVS_TUNNEL_KEY_ATTR_IPV4_SRC, output->ipv4_src))
1062 return -EMSGSIZE; 1327 return -EMSGSIZE;
1063 if (nla_put_be32(skb, OVS_TUNNEL_KEY_ATTR_IPV4_DST, tun_key->ipv4_dst)) 1328 if (output->ipv4_dst &&
1329 nla_put_be32(skb, OVS_TUNNEL_KEY_ATTR_IPV4_DST, output->ipv4_dst))
1064 return -EMSGSIZE; 1330 return -EMSGSIZE;
1065 if (tun_key->ipv4_tos && 1331 if (output->ipv4_tos &&
1066 nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TOS, tun_key->ipv4_tos)) 1332 nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TOS, output->ipv4_tos))
1067 return -EMSGSIZE; 1333 return -EMSGSIZE;
1068 if (nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TTL, tun_key->ipv4_ttl)) 1334 if (nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TTL, output->ipv4_ttl))
1069 return -EMSGSIZE; 1335 return -EMSGSIZE;
1070 if ((tun_key->tun_flags & TUNNEL_DONT_FRAGMENT) && 1336 if ((output->tun_flags & TUNNEL_DONT_FRAGMENT) &&
1071 nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT)) 1337 nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT))
1072 return -EMSGSIZE; 1338 return -EMSGSIZE;
1073 if ((tun_key->tun_flags & TUNNEL_CSUM) && 1339 if ((output->tun_flags & TUNNEL_CSUM) &&
1074 nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_CSUM)) 1340 nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_CSUM))
1075 return -EMSGSIZE; 1341 return -EMSGSIZE;
1076 1342
@@ -1078,176 +1344,390 @@ int ovs_ipv4_tun_to_nlattr(struct sk_buff *skb,
1078 return 0; 1344 return 0;
1079} 1345}
1080 1346
1081/** 1347static int metadata_from_nlattrs(struct sw_flow_match *match, u64 *attrs,
1082 * ovs_flow_from_nlattrs - parses Netlink attributes into a flow key. 1348 const struct nlattr **a, bool is_mask)
1083 * @swkey: receives the extracted flow key.
1084 * @key_lenp: number of bytes used in @swkey.
1085 * @attr: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute
1086 * sequence.
1087 */
1088int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp,
1089 const struct nlattr *attr)
1090{ 1349{
1091 const struct nlattr *a[OVS_KEY_ATTR_MAX + 1]; 1350 if (*attrs & (1 << OVS_KEY_ATTR_PRIORITY)) {
1092 const struct ovs_key_ethernet *eth_key; 1351 SW_FLOW_KEY_PUT(match, phy.priority,
1093 int key_len; 1352 nla_get_u32(a[OVS_KEY_ATTR_PRIORITY]), is_mask);
1094 u32 attrs; 1353 *attrs &= ~(1 << OVS_KEY_ATTR_PRIORITY);
1095 int err; 1354 }
1096 1355
1097 memset(swkey, 0, sizeof(struct sw_flow_key)); 1356 if (*attrs & (1 << OVS_KEY_ATTR_IN_PORT)) {
1098 key_len = SW_FLOW_KEY_OFFSET(eth); 1357 u32 in_port = nla_get_u32(a[OVS_KEY_ATTR_IN_PORT]);
1099 1358
1100 err = parse_flow_nlattrs(attr, a, &attrs); 1359 if (is_mask)
1101 if (err) 1360 in_port = 0xffffffff; /* Always exact match in_port. */
1102 return err; 1361 else if (in_port >= DP_MAX_PORTS)
1362 return -EINVAL;
1103 1363
1104 /* Metadata attributes. */ 1364 SW_FLOW_KEY_PUT(match, phy.in_port, in_port, is_mask);
1105 if (attrs & (1 << OVS_KEY_ATTR_PRIORITY)) { 1365 *attrs &= ~(1 << OVS_KEY_ATTR_IN_PORT);
1106 swkey->phy.priority = nla_get_u32(a[OVS_KEY_ATTR_PRIORITY]); 1366 } else if (!is_mask) {
1107 attrs &= ~(1 << OVS_KEY_ATTR_PRIORITY); 1367 SW_FLOW_KEY_PUT(match, phy.in_port, DP_MAX_PORTS, is_mask);
1108 } 1368 }
1109 if (attrs & (1 << OVS_KEY_ATTR_IN_PORT)) { 1369
1110 u32 in_port = nla_get_u32(a[OVS_KEY_ATTR_IN_PORT]); 1370 if (*attrs & (1 << OVS_KEY_ATTR_SKB_MARK)) {
1111 if (in_port >= DP_MAX_PORTS) 1371 uint32_t mark = nla_get_u32(a[OVS_KEY_ATTR_SKB_MARK]);
1112 return -EINVAL; 1372
1113 swkey->phy.in_port = in_port; 1373 SW_FLOW_KEY_PUT(match, phy.skb_mark, mark, is_mask);
1114 attrs &= ~(1 << OVS_KEY_ATTR_IN_PORT); 1374 *attrs &= ~(1 << OVS_KEY_ATTR_SKB_MARK);
1115 } else {
1116 swkey->phy.in_port = DP_MAX_PORTS;
1117 } 1375 }
1118 if (attrs & (1 << OVS_KEY_ATTR_SKB_MARK)) { 1376 if (*attrs & (1 << OVS_KEY_ATTR_TUNNEL)) {
1119 swkey->phy.skb_mark = nla_get_u32(a[OVS_KEY_ATTR_SKB_MARK]); 1377 if (ovs_ipv4_tun_from_nlattr(a[OVS_KEY_ATTR_TUNNEL], match,
1120 attrs &= ~(1 << OVS_KEY_ATTR_SKB_MARK); 1378 is_mask))
1379 return -EINVAL;
1380 *attrs &= ~(1 << OVS_KEY_ATTR_TUNNEL);
1121 } 1381 }
1382 return 0;
1383}
1122 1384
1123 if (attrs & (1 << OVS_KEY_ATTR_TUNNEL)) { 1385static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs,
1124 err = ovs_ipv4_tun_from_nlattr(a[OVS_KEY_ATTR_TUNNEL], &swkey->tun_key); 1386 const struct nlattr **a, bool is_mask)
1125 if (err) 1387{
1126 return err; 1388 int err;
1389 u64 orig_attrs = attrs;
1127 1390
1128 attrs &= ~(1 << OVS_KEY_ATTR_TUNNEL); 1391 err = metadata_from_nlattrs(match, &attrs, a, is_mask);
1129 } 1392 if (err)
1393 return err;
1130 1394
1131 /* Data attributes. */ 1395 if (attrs & (1 << OVS_KEY_ATTR_ETHERNET)) {
1132 if (!(attrs & (1 << OVS_KEY_ATTR_ETHERNET))) 1396 const struct ovs_key_ethernet *eth_key;
1133 return -EINVAL;
1134 attrs &= ~(1 << OVS_KEY_ATTR_ETHERNET);
1135 1397
1136 eth_key = nla_data(a[OVS_KEY_ATTR_ETHERNET]); 1398 eth_key = nla_data(a[OVS_KEY_ATTR_ETHERNET]);
1137 memcpy(swkey->eth.src, eth_key->eth_src, ETH_ALEN); 1399 SW_FLOW_KEY_MEMCPY(match, eth.src,
1138 memcpy(swkey->eth.dst, eth_key->eth_dst, ETH_ALEN); 1400 eth_key->eth_src, ETH_ALEN, is_mask);
1401 SW_FLOW_KEY_MEMCPY(match, eth.dst,
1402 eth_key->eth_dst, ETH_ALEN, is_mask);
1403 attrs &= ~(1 << OVS_KEY_ATTR_ETHERNET);
1404 }
1139 1405
1140 if (attrs & (1u << OVS_KEY_ATTR_ETHERTYPE) && 1406 if (attrs & (1 << OVS_KEY_ATTR_VLAN)) {
1141 nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]) == htons(ETH_P_8021Q)) {
1142 const struct nlattr *encap;
1143 __be16 tci; 1407 __be16 tci;
1144 1408
1145 if (attrs != ((1 << OVS_KEY_ATTR_VLAN) |
1146 (1 << OVS_KEY_ATTR_ETHERTYPE) |
1147 (1 << OVS_KEY_ATTR_ENCAP)))
1148 return -EINVAL;
1149
1150 encap = a[OVS_KEY_ATTR_ENCAP];
1151 tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]); 1409 tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]);
1152 if (tci & htons(VLAN_TAG_PRESENT)) { 1410 if (!(tci & htons(VLAN_TAG_PRESENT))) {
1153 swkey->eth.tci = tci; 1411 if (is_mask)
1154 1412 OVS_NLERR("VLAN TCI mask does not have exact match for VLAN_TAG_PRESENT bit.\n");
1155 err = parse_flow_nlattrs(encap, a, &attrs); 1413 else
1156 if (err) 1414 OVS_NLERR("VLAN TCI does not have VLAN_TAG_PRESENT bit set.\n");
1157 return err;
1158 } else if (!tci) {
1159 /* Corner case for truncated 802.1Q header. */
1160 if (nla_len(encap))
1161 return -EINVAL;
1162 1415
1163 swkey->eth.type = htons(ETH_P_8021Q);
1164 *key_lenp = key_len;
1165 return 0;
1166 } else {
1167 return -EINVAL; 1416 return -EINVAL;
1168 } 1417 }
1169 } 1418
1419 SW_FLOW_KEY_PUT(match, eth.tci, tci, is_mask);
1420 attrs &= ~(1 << OVS_KEY_ATTR_VLAN);
1421 } else if (!is_mask)
1422 SW_FLOW_KEY_PUT(match, eth.tci, htons(0xffff), true);
1170 1423
1171 if (attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) { 1424 if (attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) {
1172 swkey->eth.type = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]); 1425 __be16 eth_type;
1173 if (ntohs(swkey->eth.type) < ETH_P_802_3_MIN) 1426
1427 eth_type = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]);
1428 if (is_mask) {
1429 /* Always exact match EtherType. */
1430 eth_type = htons(0xffff);
1431 } else if (ntohs(eth_type) < ETH_P_802_3_MIN) {
1432 OVS_NLERR("EtherType is less than minimum (type=%x, min=%x).\n",
1433 ntohs(eth_type), ETH_P_802_3_MIN);
1174 return -EINVAL; 1434 return -EINVAL;
1435 }
1436
1437 SW_FLOW_KEY_PUT(match, eth.type, eth_type, is_mask);
1175 attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE); 1438 attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE);
1176 } else { 1439 } else if (!is_mask) {
1177 swkey->eth.type = htons(ETH_P_802_2); 1440 SW_FLOW_KEY_PUT(match, eth.type, htons(ETH_P_802_2), is_mask);
1178 } 1441 }
1179 1442
1180 if (swkey->eth.type == htons(ETH_P_IP)) { 1443 if (attrs & (1 << OVS_KEY_ATTR_IPV4)) {
1181 const struct ovs_key_ipv4 *ipv4_key; 1444 const struct ovs_key_ipv4 *ipv4_key;
1182 1445
1183 if (!(attrs & (1 << OVS_KEY_ATTR_IPV4)))
1184 return -EINVAL;
1185 attrs &= ~(1 << OVS_KEY_ATTR_IPV4);
1186
1187 key_len = SW_FLOW_KEY_OFFSET(ipv4.addr);
1188 ipv4_key = nla_data(a[OVS_KEY_ATTR_IPV4]); 1446 ipv4_key = nla_data(a[OVS_KEY_ATTR_IPV4]);
1189 if (ipv4_key->ipv4_frag > OVS_FRAG_TYPE_MAX) 1447 if (!is_mask && ipv4_key->ipv4_frag > OVS_FRAG_TYPE_MAX) {
1448 OVS_NLERR("Unknown IPv4 fragment type (value=%d, max=%d).\n",
1449 ipv4_key->ipv4_frag, OVS_FRAG_TYPE_MAX);
1190 return -EINVAL; 1450 return -EINVAL;
1191 swkey->ip.proto = ipv4_key->ipv4_proto;
1192 swkey->ip.tos = ipv4_key->ipv4_tos;
1193 swkey->ip.ttl = ipv4_key->ipv4_ttl;
1194 swkey->ip.frag = ipv4_key->ipv4_frag;
1195 swkey->ipv4.addr.src = ipv4_key->ipv4_src;
1196 swkey->ipv4.addr.dst = ipv4_key->ipv4_dst;
1197
1198 if (swkey->ip.frag != OVS_FRAG_TYPE_LATER) {
1199 err = ipv4_flow_from_nlattrs(swkey, &key_len, a, &attrs);
1200 if (err)
1201 return err;
1202 } 1451 }
1203 } else if (swkey->eth.type == htons(ETH_P_IPV6)) { 1452 SW_FLOW_KEY_PUT(match, ip.proto,
1204 const struct ovs_key_ipv6 *ipv6_key; 1453 ipv4_key->ipv4_proto, is_mask);
1454 SW_FLOW_KEY_PUT(match, ip.tos,
1455 ipv4_key->ipv4_tos, is_mask);
1456 SW_FLOW_KEY_PUT(match, ip.ttl,
1457 ipv4_key->ipv4_ttl, is_mask);
1458 SW_FLOW_KEY_PUT(match, ip.frag,
1459 ipv4_key->ipv4_frag, is_mask);
1460 SW_FLOW_KEY_PUT(match, ipv4.addr.src,
1461 ipv4_key->ipv4_src, is_mask);
1462 SW_FLOW_KEY_PUT(match, ipv4.addr.dst,
1463 ipv4_key->ipv4_dst, is_mask);
1464 attrs &= ~(1 << OVS_KEY_ATTR_IPV4);
1465 }
1205 1466
1206 if (!(attrs & (1 << OVS_KEY_ATTR_IPV6))) 1467 if (attrs & (1 << OVS_KEY_ATTR_IPV6)) {
1207 return -EINVAL; 1468 const struct ovs_key_ipv6 *ipv6_key;
1208 attrs &= ~(1 << OVS_KEY_ATTR_IPV6);
1209 1469
1210 key_len = SW_FLOW_KEY_OFFSET(ipv6.label);
1211 ipv6_key = nla_data(a[OVS_KEY_ATTR_IPV6]); 1470 ipv6_key = nla_data(a[OVS_KEY_ATTR_IPV6]);
1212 if (ipv6_key->ipv6_frag > OVS_FRAG_TYPE_MAX) 1471 if (!is_mask && ipv6_key->ipv6_frag > OVS_FRAG_TYPE_MAX) {
1472 OVS_NLERR("Unknown IPv6 fragment type (value=%d, max=%d).\n",
1473 ipv6_key->ipv6_frag, OVS_FRAG_TYPE_MAX);
1213 return -EINVAL; 1474 return -EINVAL;
1214 swkey->ipv6.label = ipv6_key->ipv6_label;
1215 swkey->ip.proto = ipv6_key->ipv6_proto;
1216 swkey->ip.tos = ipv6_key->ipv6_tclass;
1217 swkey->ip.ttl = ipv6_key->ipv6_hlimit;
1218 swkey->ip.frag = ipv6_key->ipv6_frag;
1219 memcpy(&swkey->ipv6.addr.src, ipv6_key->ipv6_src,
1220 sizeof(swkey->ipv6.addr.src));
1221 memcpy(&swkey->ipv6.addr.dst, ipv6_key->ipv6_dst,
1222 sizeof(swkey->ipv6.addr.dst));
1223
1224 if (swkey->ip.frag != OVS_FRAG_TYPE_LATER) {
1225 err = ipv6_flow_from_nlattrs(swkey, &key_len, a, &attrs);
1226 if (err)
1227 return err;
1228 } 1475 }
1229 } else if (swkey->eth.type == htons(ETH_P_ARP) || 1476 SW_FLOW_KEY_PUT(match, ipv6.label,
1230 swkey->eth.type == htons(ETH_P_RARP)) { 1477 ipv6_key->ipv6_label, is_mask);
1478 SW_FLOW_KEY_PUT(match, ip.proto,
1479 ipv6_key->ipv6_proto, is_mask);
1480 SW_FLOW_KEY_PUT(match, ip.tos,
1481 ipv6_key->ipv6_tclass, is_mask);
1482 SW_FLOW_KEY_PUT(match, ip.ttl,
1483 ipv6_key->ipv6_hlimit, is_mask);
1484 SW_FLOW_KEY_PUT(match, ip.frag,
1485 ipv6_key->ipv6_frag, is_mask);
1486 SW_FLOW_KEY_MEMCPY(match, ipv6.addr.src,
1487 ipv6_key->ipv6_src,
1488 sizeof(match->key->ipv6.addr.src),
1489 is_mask);
1490 SW_FLOW_KEY_MEMCPY(match, ipv6.addr.dst,
1491 ipv6_key->ipv6_dst,
1492 sizeof(match->key->ipv6.addr.dst),
1493 is_mask);
1494
1495 attrs &= ~(1 << OVS_KEY_ATTR_IPV6);
1496 }
1497
1498 if (attrs & (1 << OVS_KEY_ATTR_ARP)) {
1231 const struct ovs_key_arp *arp_key; 1499 const struct ovs_key_arp *arp_key;
1232 1500
1233 if (!(attrs & (1 << OVS_KEY_ATTR_ARP))) 1501 arp_key = nla_data(a[OVS_KEY_ATTR_ARP]);
1502 if (!is_mask && (arp_key->arp_op & htons(0xff00))) {
1503 OVS_NLERR("Unknown ARP opcode (opcode=%d).\n",
1504 arp_key->arp_op);
1234 return -EINVAL; 1505 return -EINVAL;
1506 }
1507
1508 SW_FLOW_KEY_PUT(match, ipv4.addr.src,
1509 arp_key->arp_sip, is_mask);
1510 SW_FLOW_KEY_PUT(match, ipv4.addr.dst,
1511 arp_key->arp_tip, is_mask);
1512 SW_FLOW_KEY_PUT(match, ip.proto,
1513 ntohs(arp_key->arp_op), is_mask);
1514 SW_FLOW_KEY_MEMCPY(match, ipv4.arp.sha,
1515 arp_key->arp_sha, ETH_ALEN, is_mask);
1516 SW_FLOW_KEY_MEMCPY(match, ipv4.arp.tha,
1517 arp_key->arp_tha, ETH_ALEN, is_mask);
1518
1235 attrs &= ~(1 << OVS_KEY_ATTR_ARP); 1519 attrs &= ~(1 << OVS_KEY_ATTR_ARP);
1520 }
1236 1521
1237 key_len = SW_FLOW_KEY_OFFSET(ipv4.arp); 1522 if (attrs & (1 << OVS_KEY_ATTR_TCP)) {
1238 arp_key = nla_data(a[OVS_KEY_ATTR_ARP]); 1523 const struct ovs_key_tcp *tcp_key;
1239 swkey->ipv4.addr.src = arp_key->arp_sip; 1524
1240 swkey->ipv4.addr.dst = arp_key->arp_tip; 1525 tcp_key = nla_data(a[OVS_KEY_ATTR_TCP]);
1241 if (arp_key->arp_op & htons(0xff00)) 1526 if (orig_attrs & (1 << OVS_KEY_ATTR_IPV4)) {
1527 SW_FLOW_KEY_PUT(match, ipv4.tp.src,
1528 tcp_key->tcp_src, is_mask);
1529 SW_FLOW_KEY_PUT(match, ipv4.tp.dst,
1530 tcp_key->tcp_dst, is_mask);
1531 } else {
1532 SW_FLOW_KEY_PUT(match, ipv6.tp.src,
1533 tcp_key->tcp_src, is_mask);
1534 SW_FLOW_KEY_PUT(match, ipv6.tp.dst,
1535 tcp_key->tcp_dst, is_mask);
1536 }
1537 attrs &= ~(1 << OVS_KEY_ATTR_TCP);
1538 }
1539
1540 if (attrs & (1 << OVS_KEY_ATTR_UDP)) {
1541 const struct ovs_key_udp *udp_key;
1542
1543 udp_key = nla_data(a[OVS_KEY_ATTR_UDP]);
1544 if (orig_attrs & (1 << OVS_KEY_ATTR_IPV4)) {
1545 SW_FLOW_KEY_PUT(match, ipv4.tp.src,
1546 udp_key->udp_src, is_mask);
1547 SW_FLOW_KEY_PUT(match, ipv4.tp.dst,
1548 udp_key->udp_dst, is_mask);
1549 } else {
1550 SW_FLOW_KEY_PUT(match, ipv6.tp.src,
1551 udp_key->udp_src, is_mask);
1552 SW_FLOW_KEY_PUT(match, ipv6.tp.dst,
1553 udp_key->udp_dst, is_mask);
1554 }
1555 attrs &= ~(1 << OVS_KEY_ATTR_UDP);
1556 }
1557
1558 if (attrs & (1 << OVS_KEY_ATTR_SCTP)) {
1559 const struct ovs_key_sctp *sctp_key;
1560
1561 sctp_key = nla_data(a[OVS_KEY_ATTR_SCTP]);
1562 if (orig_attrs & (1 << OVS_KEY_ATTR_IPV4)) {
1563 SW_FLOW_KEY_PUT(match, ipv4.tp.src,
1564 sctp_key->sctp_src, is_mask);
1565 SW_FLOW_KEY_PUT(match, ipv4.tp.dst,
1566 sctp_key->sctp_dst, is_mask);
1567 } else {
1568 SW_FLOW_KEY_PUT(match, ipv6.tp.src,
1569 sctp_key->sctp_src, is_mask);
1570 SW_FLOW_KEY_PUT(match, ipv6.tp.dst,
1571 sctp_key->sctp_dst, is_mask);
1572 }
1573 attrs &= ~(1 << OVS_KEY_ATTR_SCTP);
1574 }
1575
1576 if (attrs & (1 << OVS_KEY_ATTR_ICMP)) {
1577 const struct ovs_key_icmp *icmp_key;
1578
1579 icmp_key = nla_data(a[OVS_KEY_ATTR_ICMP]);
1580 SW_FLOW_KEY_PUT(match, ipv4.tp.src,
1581 htons(icmp_key->icmp_type), is_mask);
1582 SW_FLOW_KEY_PUT(match, ipv4.tp.dst,
1583 htons(icmp_key->icmp_code), is_mask);
1584 attrs &= ~(1 << OVS_KEY_ATTR_ICMP);
1585 }
1586
1587 if (attrs & (1 << OVS_KEY_ATTR_ICMPV6)) {
1588 const struct ovs_key_icmpv6 *icmpv6_key;
1589
1590 icmpv6_key = nla_data(a[OVS_KEY_ATTR_ICMPV6]);
1591 SW_FLOW_KEY_PUT(match, ipv6.tp.src,
1592 htons(icmpv6_key->icmpv6_type), is_mask);
1593 SW_FLOW_KEY_PUT(match, ipv6.tp.dst,
1594 htons(icmpv6_key->icmpv6_code), is_mask);
1595 attrs &= ~(1 << OVS_KEY_ATTR_ICMPV6);
1596 }
1597
1598 if (attrs & (1 << OVS_KEY_ATTR_ND)) {
1599 const struct ovs_key_nd *nd_key;
1600
1601 nd_key = nla_data(a[OVS_KEY_ATTR_ND]);
1602 SW_FLOW_KEY_MEMCPY(match, ipv6.nd.target,
1603 nd_key->nd_target,
1604 sizeof(match->key->ipv6.nd.target),
1605 is_mask);
1606 SW_FLOW_KEY_MEMCPY(match, ipv6.nd.sll,
1607 nd_key->nd_sll, ETH_ALEN, is_mask);
1608 SW_FLOW_KEY_MEMCPY(match, ipv6.nd.tll,
1609 nd_key->nd_tll, ETH_ALEN, is_mask);
1610 attrs &= ~(1 << OVS_KEY_ATTR_ND);
1611 }
1612
1613 if (attrs != 0)
1614 return -EINVAL;
1615
1616 return 0;
1617}
1618
1619/**
1620 * ovs_match_from_nlattrs - parses Netlink attributes into a flow key and
1621 * mask. In case the 'mask' is NULL, the flow is treated as exact match
1622 * flow. Otherwise, it is treated as a wildcarded flow, except the mask
1623 * does not include any don't care bit.
1624 * @match: receives the extracted flow match information.
1625 * @key: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute
1626 * sequence. The fields should of the packet that triggered the creation
1627 * of this flow.
1628 * @mask: Optional. Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink
1629 * attribute specifies the mask field of the wildcarded flow.
1630 */
1631int ovs_match_from_nlattrs(struct sw_flow_match *match,
1632 const struct nlattr *key,
1633 const struct nlattr *mask)
1634{
1635 const struct nlattr *a[OVS_KEY_ATTR_MAX + 1];
1636 const struct nlattr *encap;
1637 u64 key_attrs = 0;
1638 u64 mask_attrs = 0;
1639 bool encap_valid = false;
1640 int err;
1641
1642 err = parse_flow_nlattrs(key, a, &key_attrs);
1643 if (err)
1644 return err;
1645
1646 if ((key_attrs & (1 << OVS_KEY_ATTR_ETHERNET)) &&
1647 (key_attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) &&
1648 (nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]) == htons(ETH_P_8021Q))) {
1649 __be16 tci;
1650
1651 if (!((key_attrs & (1 << OVS_KEY_ATTR_VLAN)) &&
1652 (key_attrs & (1 << OVS_KEY_ATTR_ENCAP)))) {
1653 OVS_NLERR("Invalid Vlan frame.\n");
1242 return -EINVAL; 1654 return -EINVAL;
1243 swkey->ip.proto = ntohs(arp_key->arp_op); 1655 }
1244 memcpy(swkey->ipv4.arp.sha, arp_key->arp_sha, ETH_ALEN); 1656
1245 memcpy(swkey->ipv4.arp.tha, arp_key->arp_tha, ETH_ALEN); 1657 key_attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE);
1658 tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]);
1659 encap = a[OVS_KEY_ATTR_ENCAP];
1660 key_attrs &= ~(1 << OVS_KEY_ATTR_ENCAP);
1661 encap_valid = true;
1662
1663 if (tci & htons(VLAN_TAG_PRESENT)) {
1664 err = parse_flow_nlattrs(encap, a, &key_attrs);
1665 if (err)
1666 return err;
1667 } else if (!tci) {
1668 /* Corner case for truncated 802.1Q header. */
1669 if (nla_len(encap)) {
1670 OVS_NLERR("Truncated 802.1Q header has non-zero encap attribute.\n");
1671 return -EINVAL;
1672 }
1673 } else {
1674 OVS_NLERR("Encap attribute is set for a non-VLAN frame.\n");
1675 return -EINVAL;
1676 }
1246 } 1677 }
1247 1678
1248 if (attrs) 1679 err = ovs_key_from_nlattrs(match, key_attrs, a, false);
1680 if (err)
1681 return err;
1682
1683 if (mask) {
1684 err = parse_flow_mask_nlattrs(mask, a, &mask_attrs);
1685 if (err)
1686 return err;
1687
1688 if (mask_attrs & 1ULL << OVS_KEY_ATTR_ENCAP) {
1689 __be16 eth_type = 0;
1690 __be16 tci = 0;
1691
1692 if (!encap_valid) {
1693 OVS_NLERR("Encap mask attribute is set for non-VLAN frame.\n");
1694 return -EINVAL;
1695 }
1696
1697 mask_attrs &= ~(1 << OVS_KEY_ATTR_ENCAP);
1698 if (a[OVS_KEY_ATTR_ETHERTYPE])
1699 eth_type = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]);
1700
1701 if (eth_type == htons(0xffff)) {
1702 mask_attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE);
1703 encap = a[OVS_KEY_ATTR_ENCAP];
1704 err = parse_flow_mask_nlattrs(encap, a, &mask_attrs);
1705 } else {
1706 OVS_NLERR("VLAN frames must have an exact match on the TPID (mask=%x).\n",
1707 ntohs(eth_type));
1708 return -EINVAL;
1709 }
1710
1711 if (a[OVS_KEY_ATTR_VLAN])
1712 tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]);
1713
1714 if (!(tci & htons(VLAN_TAG_PRESENT))) {
1715 OVS_NLERR("VLAN tag present bit must have an exact match (tci_mask=%x).\n", ntohs(tci));
1716 return -EINVAL;
1717 }
1718 }
1719
1720 err = ovs_key_from_nlattrs(match, mask_attrs, a, true);
1721 if (err)
1722 return err;
1723 } else {
1724 /* Populate exact match flow's key mask. */
1725 if (match->mask)
1726 ovs_sw_flow_mask_set(match->mask, &match->range, 0xff);
1727 }
1728
1729 if (!ovs_match_validate(match, key_attrs, mask_attrs))
1249 return -EINVAL; 1730 return -EINVAL;
1250 *key_lenp = key_len;
1251 1731
1252 return 0; 1732 return 0;
1253} 1733}
@@ -1255,7 +1735,6 @@ int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp,
1255/** 1735/**
1256 * ovs_flow_metadata_from_nlattrs - parses Netlink attributes into a flow key. 1736 * ovs_flow_metadata_from_nlattrs - parses Netlink attributes into a flow key.
1257 * @flow: Receives extracted in_port, priority, tun_key and skb_mark. 1737 * @flow: Receives extracted in_port, priority, tun_key and skb_mark.
1258 * @key_len: Length of key in @flow. Used for calculating flow hash.
1259 * @attr: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute 1738 * @attr: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute
1260 * sequence. 1739 * sequence.
1261 * 1740 *
@@ -1264,102 +1743,100 @@ int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp,
1264 * get the metadata, that is, the parts of the flow key that cannot be 1743 * get the metadata, that is, the parts of the flow key that cannot be
1265 * extracted from the packet itself. 1744 * extracted from the packet itself.
1266 */ 1745 */
1267int ovs_flow_metadata_from_nlattrs(struct sw_flow *flow, int key_len, 1746
1268 const struct nlattr *attr) 1747int ovs_flow_metadata_from_nlattrs(struct sw_flow *flow,
1748 const struct nlattr *attr)
1269{ 1749{
1270 struct ovs_key_ipv4_tunnel *tun_key = &flow->key.tun_key; 1750 struct ovs_key_ipv4_tunnel *tun_key = &flow->key.tun_key;
1271 const struct nlattr *nla; 1751 const struct nlattr *a[OVS_KEY_ATTR_MAX + 1];
1272 int rem; 1752 u64 attrs = 0;
1753 int err;
1754 struct sw_flow_match match;
1273 1755
1274 flow->key.phy.in_port = DP_MAX_PORTS; 1756 flow->key.phy.in_port = DP_MAX_PORTS;
1275 flow->key.phy.priority = 0; 1757 flow->key.phy.priority = 0;
1276 flow->key.phy.skb_mark = 0; 1758 flow->key.phy.skb_mark = 0;
1277 memset(tun_key, 0, sizeof(flow->key.tun_key)); 1759 memset(tun_key, 0, sizeof(flow->key.tun_key));
1278 1760
1279 nla_for_each_nested(nla, attr, rem) { 1761 err = parse_flow_nlattrs(attr, a, &attrs);
1280 int type = nla_type(nla); 1762 if (err)
1281
1282 if (type <= OVS_KEY_ATTR_MAX && ovs_key_lens[type] > 0) {
1283 int err;
1284
1285 if (nla_len(nla) != ovs_key_lens[type])
1286 return -EINVAL;
1287
1288 switch (type) {
1289 case OVS_KEY_ATTR_PRIORITY:
1290 flow->key.phy.priority = nla_get_u32(nla);
1291 break;
1292
1293 case OVS_KEY_ATTR_TUNNEL:
1294 err = ovs_ipv4_tun_from_nlattr(nla, tun_key);
1295 if (err)
1296 return err;
1297 break;
1298
1299 case OVS_KEY_ATTR_IN_PORT:
1300 if (nla_get_u32(nla) >= DP_MAX_PORTS)
1301 return -EINVAL;
1302 flow->key.phy.in_port = nla_get_u32(nla);
1303 break;
1304
1305 case OVS_KEY_ATTR_SKB_MARK:
1306 flow->key.phy.skb_mark = nla_get_u32(nla);
1307 break;
1308 }
1309 }
1310 }
1311 if (rem)
1312 return -EINVAL; 1763 return -EINVAL;
1313 1764
1314 flow->hash = ovs_flow_hash(&flow->key, 1765 memset(&match, 0, sizeof(match));
1315 flow_key_start(&flow->key), key_len); 1766 match.key = &flow->key;
1767
1768 err = metadata_from_nlattrs(&match, &attrs, a, false);
1769 if (err)
1770 return err;
1316 1771
1317 return 0; 1772 return 0;
1318} 1773}
1319 1774
1320int ovs_flow_to_nlattrs(const struct sw_flow_key *swkey, struct sk_buff *skb) 1775int ovs_flow_to_nlattrs(const struct sw_flow_key *swkey,
1776 const struct sw_flow_key *output, struct sk_buff *skb)
1321{ 1777{
1322 struct ovs_key_ethernet *eth_key; 1778 struct ovs_key_ethernet *eth_key;
1323 struct nlattr *nla, *encap; 1779 struct nlattr *nla, *encap;
1780 bool is_mask = (swkey != output);
1324 1781
1325 if (swkey->phy.priority && 1782 if (nla_put_u32(skb, OVS_KEY_ATTR_PRIORITY, output->phy.priority))
1326 nla_put_u32(skb, OVS_KEY_ATTR_PRIORITY, swkey->phy.priority))
1327 goto nla_put_failure; 1783 goto nla_put_failure;
1328 1784
1329 if (swkey->tun_key.ipv4_dst && 1785 if ((swkey->tun_key.ipv4_dst || is_mask) &&
1330 ovs_ipv4_tun_to_nlattr(skb, &swkey->tun_key)) 1786 ovs_ipv4_tun_to_nlattr(skb, &swkey->tun_key, &output->tun_key))
1331 goto nla_put_failure; 1787 goto nla_put_failure;
1332 1788
1333 if (swkey->phy.in_port != DP_MAX_PORTS && 1789 if (swkey->phy.in_port == DP_MAX_PORTS) {
1334 nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT, swkey->phy.in_port)) 1790 if (is_mask && (output->phy.in_port == 0xffff))
1335 goto nla_put_failure; 1791 if (nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT, 0xffffffff))
1792 goto nla_put_failure;
1793 } else {
1794 u16 upper_u16;
1795 upper_u16 = !is_mask ? 0 : 0xffff;
1336 1796
1337 if (swkey->phy.skb_mark && 1797 if (nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT,
1338 nla_put_u32(skb, OVS_KEY_ATTR_SKB_MARK, swkey->phy.skb_mark)) 1798 (upper_u16 << 16) | output->phy.in_port))
1799 goto nla_put_failure;
1800 }
1801
1802 if (nla_put_u32(skb, OVS_KEY_ATTR_SKB_MARK, output->phy.skb_mark))
1339 goto nla_put_failure; 1803 goto nla_put_failure;
1340 1804
1341 nla = nla_reserve(skb, OVS_KEY_ATTR_ETHERNET, sizeof(*eth_key)); 1805 nla = nla_reserve(skb, OVS_KEY_ATTR_ETHERNET, sizeof(*eth_key));
1342 if (!nla) 1806 if (!nla)
1343 goto nla_put_failure; 1807 goto nla_put_failure;
1808
1344 eth_key = nla_data(nla); 1809 eth_key = nla_data(nla);
1345 memcpy(eth_key->eth_src, swkey->eth.src, ETH_ALEN); 1810 memcpy(eth_key->eth_src, output->eth.src, ETH_ALEN);
1346 memcpy(eth_key->eth_dst, swkey->eth.dst, ETH_ALEN); 1811 memcpy(eth_key->eth_dst, output->eth.dst, ETH_ALEN);
1347 1812
1348 if (swkey->eth.tci || swkey->eth.type == htons(ETH_P_8021Q)) { 1813 if (swkey->eth.tci || swkey->eth.type == htons(ETH_P_8021Q)) {
1349 if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, htons(ETH_P_8021Q)) || 1814 __be16 eth_type;
1350 nla_put_be16(skb, OVS_KEY_ATTR_VLAN, swkey->eth.tci)) 1815 eth_type = !is_mask ? htons(ETH_P_8021Q) : htons(0xffff);
1816 if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, eth_type) ||
1817 nla_put_be16(skb, OVS_KEY_ATTR_VLAN, output->eth.tci))
1351 goto nla_put_failure; 1818 goto nla_put_failure;
1352 encap = nla_nest_start(skb, OVS_KEY_ATTR_ENCAP); 1819 encap = nla_nest_start(skb, OVS_KEY_ATTR_ENCAP);
1353 if (!swkey->eth.tci) 1820 if (!swkey->eth.tci)
1354 goto unencap; 1821 goto unencap;
1355 } else { 1822 } else
1356 encap = NULL; 1823 encap = NULL;
1357 }
1358 1824
1359 if (swkey->eth.type == htons(ETH_P_802_2)) 1825 if (swkey->eth.type == htons(ETH_P_802_2)) {
1826 /*
1827 * Ethertype 802.2 is represented in the netlink with omitted
1828 * OVS_KEY_ATTR_ETHERTYPE in the flow key attribute, and
1829 * 0xffff in the mask attribute. Ethertype can also
1830 * be wildcarded.
1831 */
1832 if (is_mask && output->eth.type)
1833 if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE,
1834 output->eth.type))
1835 goto nla_put_failure;
1360 goto unencap; 1836 goto unencap;
1837 }
1361 1838
1362 if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, swkey->eth.type)) 1839 if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, output->eth.type))
1363 goto nla_put_failure; 1840 goto nla_put_failure;
1364 1841
1365 if (swkey->eth.type == htons(ETH_P_IP)) { 1842 if (swkey->eth.type == htons(ETH_P_IP)) {
@@ -1369,12 +1846,12 @@ int ovs_flow_to_nlattrs(const struct sw_flow_key *swkey, struct sk_buff *skb)
1369 if (!nla) 1846 if (!nla)
1370 goto nla_put_failure; 1847 goto nla_put_failure;
1371 ipv4_key = nla_data(nla); 1848 ipv4_key = nla_data(nla);
1372 ipv4_key->ipv4_src = swkey->ipv4.addr.src; 1849 ipv4_key->ipv4_src = output->ipv4.addr.src;
1373 ipv4_key->ipv4_dst = swkey->ipv4.addr.dst; 1850 ipv4_key->ipv4_dst = output->ipv4.addr.dst;
1374 ipv4_key->ipv4_proto = swkey->ip.proto; 1851 ipv4_key->ipv4_proto = output->ip.proto;
1375 ipv4_key->ipv4_tos = swkey->ip.tos; 1852 ipv4_key->ipv4_tos = output->ip.tos;
1376 ipv4_key->ipv4_ttl = swkey->ip.ttl; 1853 ipv4_key->ipv4_ttl = output->ip.ttl;
1377 ipv4_key->ipv4_frag = swkey->ip.frag; 1854 ipv4_key->ipv4_frag = output->ip.frag;
1378 } else if (swkey->eth.type == htons(ETH_P_IPV6)) { 1855 } else if (swkey->eth.type == htons(ETH_P_IPV6)) {
1379 struct ovs_key_ipv6 *ipv6_key; 1856 struct ovs_key_ipv6 *ipv6_key;
1380 1857
@@ -1382,15 +1859,15 @@ int ovs_flow_to_nlattrs(const struct sw_flow_key *swkey, struct sk_buff *skb)
1382 if (!nla) 1859 if (!nla)
1383 goto nla_put_failure; 1860 goto nla_put_failure;
1384 ipv6_key = nla_data(nla); 1861 ipv6_key = nla_data(nla);
1385 memcpy(ipv6_key->ipv6_src, &swkey->ipv6.addr.src, 1862 memcpy(ipv6_key->ipv6_src, &output->ipv6.addr.src,
1386 sizeof(ipv6_key->ipv6_src)); 1863 sizeof(ipv6_key->ipv6_src));
1387 memcpy(ipv6_key->ipv6_dst, &swkey->ipv6.addr.dst, 1864 memcpy(ipv6_key->ipv6_dst, &output->ipv6.addr.dst,
1388 sizeof(ipv6_key->ipv6_dst)); 1865 sizeof(ipv6_key->ipv6_dst));
1389 ipv6_key->ipv6_label = swkey->ipv6.label; 1866 ipv6_key->ipv6_label = output->ipv6.label;
1390 ipv6_key->ipv6_proto = swkey->ip.proto; 1867 ipv6_key->ipv6_proto = output->ip.proto;
1391 ipv6_key->ipv6_tclass = swkey->ip.tos; 1868 ipv6_key->ipv6_tclass = output->ip.tos;
1392 ipv6_key->ipv6_hlimit = swkey->ip.ttl; 1869 ipv6_key->ipv6_hlimit = output->ip.ttl;
1393 ipv6_key->ipv6_frag = swkey->ip.frag; 1870 ipv6_key->ipv6_frag = output->ip.frag;
1394 } else if (swkey->eth.type == htons(ETH_P_ARP) || 1871 } else if (swkey->eth.type == htons(ETH_P_ARP) ||
1395 swkey->eth.type == htons(ETH_P_RARP)) { 1872 swkey->eth.type == htons(ETH_P_RARP)) {
1396 struct ovs_key_arp *arp_key; 1873 struct ovs_key_arp *arp_key;
@@ -1400,11 +1877,11 @@ int ovs_flow_to_nlattrs(const struct sw_flow_key *swkey, struct sk_buff *skb)
1400 goto nla_put_failure; 1877 goto nla_put_failure;
1401 arp_key = nla_data(nla); 1878 arp_key = nla_data(nla);
1402 memset(arp_key, 0, sizeof(struct ovs_key_arp)); 1879 memset(arp_key, 0, sizeof(struct ovs_key_arp));
1403 arp_key->arp_sip = swkey->ipv4.addr.src; 1880 arp_key->arp_sip = output->ipv4.addr.src;
1404 arp_key->arp_tip = swkey->ipv4.addr.dst; 1881 arp_key->arp_tip = output->ipv4.addr.dst;
1405 arp_key->arp_op = htons(swkey->ip.proto); 1882 arp_key->arp_op = htons(output->ip.proto);
1406 memcpy(arp_key->arp_sha, swkey->ipv4.arp.sha, ETH_ALEN); 1883 memcpy(arp_key->arp_sha, output->ipv4.arp.sha, ETH_ALEN);
1407 memcpy(arp_key->arp_tha, swkey->ipv4.arp.tha, ETH_ALEN); 1884 memcpy(arp_key->arp_tha, output->ipv4.arp.tha, ETH_ALEN);
1408 } 1885 }
1409 1886
1410 if ((swkey->eth.type == htons(ETH_P_IP) || 1887 if ((swkey->eth.type == htons(ETH_P_IP) ||
@@ -1419,11 +1896,11 @@ int ovs_flow_to_nlattrs(const struct sw_flow_key *swkey, struct sk_buff *skb)
1419 goto nla_put_failure; 1896 goto nla_put_failure;
1420 tcp_key = nla_data(nla); 1897 tcp_key = nla_data(nla);
1421 if (swkey->eth.type == htons(ETH_P_IP)) { 1898 if (swkey->eth.type == htons(ETH_P_IP)) {
1422 tcp_key->tcp_src = swkey->ipv4.tp.src; 1899 tcp_key->tcp_src = output->ipv4.tp.src;
1423 tcp_key->tcp_dst = swkey->ipv4.tp.dst; 1900 tcp_key->tcp_dst = output->ipv4.tp.dst;
1424 } else if (swkey->eth.type == htons(ETH_P_IPV6)) { 1901 } else if (swkey->eth.type == htons(ETH_P_IPV6)) {
1425 tcp_key->tcp_src = swkey->ipv6.tp.src; 1902 tcp_key->tcp_src = output->ipv6.tp.src;
1426 tcp_key->tcp_dst = swkey->ipv6.tp.dst; 1903 tcp_key->tcp_dst = output->ipv6.tp.dst;
1427 } 1904 }
1428 } else if (swkey->ip.proto == IPPROTO_UDP) { 1905 } else if (swkey->ip.proto == IPPROTO_UDP) {
1429 struct ovs_key_udp *udp_key; 1906 struct ovs_key_udp *udp_key;
@@ -1433,11 +1910,25 @@ int ovs_flow_to_nlattrs(const struct sw_flow_key *swkey, struct sk_buff *skb)
1433 goto nla_put_failure; 1910 goto nla_put_failure;
1434 udp_key = nla_data(nla); 1911 udp_key = nla_data(nla);
1435 if (swkey->eth.type == htons(ETH_P_IP)) { 1912 if (swkey->eth.type == htons(ETH_P_IP)) {
1436 udp_key->udp_src = swkey->ipv4.tp.src; 1913 udp_key->udp_src = output->ipv4.tp.src;
1437 udp_key->udp_dst = swkey->ipv4.tp.dst; 1914 udp_key->udp_dst = output->ipv4.tp.dst;
1915 } else if (swkey->eth.type == htons(ETH_P_IPV6)) {
1916 udp_key->udp_src = output->ipv6.tp.src;
1917 udp_key->udp_dst = output->ipv6.tp.dst;
1918 }
1919 } else if (swkey->ip.proto == IPPROTO_SCTP) {
1920 struct ovs_key_sctp *sctp_key;
1921
1922 nla = nla_reserve(skb, OVS_KEY_ATTR_SCTP, sizeof(*sctp_key));
1923 if (!nla)
1924 goto nla_put_failure;
1925 sctp_key = nla_data(nla);
1926 if (swkey->eth.type == htons(ETH_P_IP)) {
1927 sctp_key->sctp_src = swkey->ipv4.tp.src;
1928 sctp_key->sctp_dst = swkey->ipv4.tp.dst;
1438 } else if (swkey->eth.type == htons(ETH_P_IPV6)) { 1929 } else if (swkey->eth.type == htons(ETH_P_IPV6)) {
1439 udp_key->udp_src = swkey->ipv6.tp.src; 1930 sctp_key->sctp_src = swkey->ipv6.tp.src;
1440 udp_key->udp_dst = swkey->ipv6.tp.dst; 1931 sctp_key->sctp_dst = swkey->ipv6.tp.dst;
1441 } 1932 }
1442 } else if (swkey->eth.type == htons(ETH_P_IP) && 1933 } else if (swkey->eth.type == htons(ETH_P_IP) &&
1443 swkey->ip.proto == IPPROTO_ICMP) { 1934 swkey->ip.proto == IPPROTO_ICMP) {
@@ -1447,8 +1938,8 @@ int ovs_flow_to_nlattrs(const struct sw_flow_key *swkey, struct sk_buff *skb)
1447 if (!nla) 1938 if (!nla)
1448 goto nla_put_failure; 1939 goto nla_put_failure;
1449 icmp_key = nla_data(nla); 1940 icmp_key = nla_data(nla);
1450 icmp_key->icmp_type = ntohs(swkey->ipv4.tp.src); 1941 icmp_key->icmp_type = ntohs(output->ipv4.tp.src);
1451 icmp_key->icmp_code = ntohs(swkey->ipv4.tp.dst); 1942 icmp_key->icmp_code = ntohs(output->ipv4.tp.dst);
1452 } else if (swkey->eth.type == htons(ETH_P_IPV6) && 1943 } else if (swkey->eth.type == htons(ETH_P_IPV6) &&
1453 swkey->ip.proto == IPPROTO_ICMPV6) { 1944 swkey->ip.proto == IPPROTO_ICMPV6) {
1454 struct ovs_key_icmpv6 *icmpv6_key; 1945 struct ovs_key_icmpv6 *icmpv6_key;
@@ -1458,8 +1949,8 @@ int ovs_flow_to_nlattrs(const struct sw_flow_key *swkey, struct sk_buff *skb)
1458 if (!nla) 1949 if (!nla)
1459 goto nla_put_failure; 1950 goto nla_put_failure;
1460 icmpv6_key = nla_data(nla); 1951 icmpv6_key = nla_data(nla);
1461 icmpv6_key->icmpv6_type = ntohs(swkey->ipv6.tp.src); 1952 icmpv6_key->icmpv6_type = ntohs(output->ipv6.tp.src);
1462 icmpv6_key->icmpv6_code = ntohs(swkey->ipv6.tp.dst); 1953 icmpv6_key->icmpv6_code = ntohs(output->ipv6.tp.dst);
1463 1954
1464 if (icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_SOLICITATION || 1955 if (icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_SOLICITATION ||
1465 icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_ADVERTISEMENT) { 1956 icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_ADVERTISEMENT) {
@@ -1469,10 +1960,10 @@ int ovs_flow_to_nlattrs(const struct sw_flow_key *swkey, struct sk_buff *skb)
1469 if (!nla) 1960 if (!nla)
1470 goto nla_put_failure; 1961 goto nla_put_failure;
1471 nd_key = nla_data(nla); 1962 nd_key = nla_data(nla);
1472 memcpy(nd_key->nd_target, &swkey->ipv6.nd.target, 1963 memcpy(nd_key->nd_target, &output->ipv6.nd.target,
1473 sizeof(nd_key->nd_target)); 1964 sizeof(nd_key->nd_target));
1474 memcpy(nd_key->nd_sll, swkey->ipv6.nd.sll, ETH_ALEN); 1965 memcpy(nd_key->nd_sll, output->ipv6.nd.sll, ETH_ALEN);
1475 memcpy(nd_key->nd_tll, swkey->ipv6.nd.tll, ETH_ALEN); 1966 memcpy(nd_key->nd_tll, output->ipv6.nd.tll, ETH_ALEN);
1476 } 1967 }
1477 } 1968 }
1478 } 1969 }
@@ -1491,6 +1982,9 @@ nla_put_failure:
1491 * Returns zero if successful or a negative error code. */ 1982 * Returns zero if successful or a negative error code. */
1492int ovs_flow_init(void) 1983int ovs_flow_init(void)
1493{ 1984{
1985 BUILD_BUG_ON(__alignof__(struct sw_flow_key) % __alignof__(long));
1986 BUILD_BUG_ON(sizeof(struct sw_flow_key) % sizeof(long));
1987
1494 flow_cache = kmem_cache_create("sw_flow", sizeof(struct sw_flow), 0, 1988 flow_cache = kmem_cache_create("sw_flow", sizeof(struct sw_flow), 0,
1495 0, NULL); 1989 0, NULL);
1496 if (flow_cache == NULL) 1990 if (flow_cache == NULL)
@@ -1504,3 +1998,84 @@ void ovs_flow_exit(void)
1504{ 1998{
1505 kmem_cache_destroy(flow_cache); 1999 kmem_cache_destroy(flow_cache);
1506} 2000}
2001
2002struct sw_flow_mask *ovs_sw_flow_mask_alloc(void)
2003{
2004 struct sw_flow_mask *mask;
2005
2006 mask = kmalloc(sizeof(*mask), GFP_KERNEL);
2007 if (mask)
2008 mask->ref_count = 0;
2009
2010 return mask;
2011}
2012
2013void ovs_sw_flow_mask_add_ref(struct sw_flow_mask *mask)
2014{
2015 mask->ref_count++;
2016}
2017
2018void ovs_sw_flow_mask_del_ref(struct sw_flow_mask *mask, bool deferred)
2019{
2020 if (!mask)
2021 return;
2022
2023 BUG_ON(!mask->ref_count);
2024 mask->ref_count--;
2025
2026 if (!mask->ref_count) {
2027 list_del_rcu(&mask->list);
2028 if (deferred)
2029 kfree_rcu(mask, rcu);
2030 else
2031 kfree(mask);
2032 }
2033}
2034
2035static bool ovs_sw_flow_mask_equal(const struct sw_flow_mask *a,
2036 const struct sw_flow_mask *b)
2037{
2038 u8 *a_ = (u8 *)&a->key + a->range.start;
2039 u8 *b_ = (u8 *)&b->key + b->range.start;
2040
2041 return (a->range.end == b->range.end)
2042 && (a->range.start == b->range.start)
2043 && (memcmp(a_, b_, range_n_bytes(&a->range)) == 0);
2044}
2045
2046struct sw_flow_mask *ovs_sw_flow_mask_find(const struct flow_table *tbl,
2047 const struct sw_flow_mask *mask)
2048{
2049 struct list_head *ml;
2050
2051 list_for_each(ml, tbl->mask_list) {
2052 struct sw_flow_mask *m;
2053 m = container_of(ml, struct sw_flow_mask, list);
2054 if (ovs_sw_flow_mask_equal(mask, m))
2055 return m;
2056 }
2057
2058 return NULL;
2059}
2060
2061/**
2062 * add a new mask into the mask list.
2063 * The caller needs to make sure that 'mask' is not the same
2064 * as any masks that are already on the list.
2065 */
2066void ovs_sw_flow_mask_insert(struct flow_table *tbl, struct sw_flow_mask *mask)
2067{
2068 list_add_rcu(&mask->list, tbl->mask_list);
2069}
2070
2071/**
2072 * Set 'range' fields in the mask to the value of 'val'.
2073 */
2074static void ovs_sw_flow_mask_set(struct sw_flow_mask *mask,
2075 struct sw_flow_key_range *range, u8 val)
2076{
2077 u8 *m = (u8 *)&mask->key + range->start;
2078
2079 mask->range = *range;
2080 memset(m, val, range_n_bytes(range));
2081}
diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h
index 66ef7220293e..212fbf7510c4 100644
--- a/net/openvswitch/flow.h
+++ b/net/openvswitch/flow.h
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2007-2011 Nicira, Inc. 2 * Copyright (c) 2007-2013 Nicira, Inc.
3 * 3 *
4 * This program is free software; you can redistribute it and/or 4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of version 2 of the GNU General Public 5 * modify it under the terms of version 2 of the GNU General Public
@@ -33,6 +33,8 @@
33#include <net/inet_ecn.h> 33#include <net/inet_ecn.h>
34 34
35struct sk_buff; 35struct sk_buff;
36struct sw_flow_mask;
37struct flow_table;
36 38
37struct sw_flow_actions { 39struct sw_flow_actions {
38 struct rcu_head rcu; 40 struct rcu_head rcu;
@@ -97,8 +99,8 @@ struct sw_flow_key {
97 } addr; 99 } addr;
98 union { 100 union {
99 struct { 101 struct {
100 __be16 src; /* TCP/UDP source port. */ 102 __be16 src; /* TCP/UDP/SCTP source port. */
101 __be16 dst; /* TCP/UDP destination port. */ 103 __be16 dst; /* TCP/UDP/SCTP destination port. */
102 } tp; 104 } tp;
103 struct { 105 struct {
104 u8 sha[ETH_ALEN]; /* ARP source hardware address. */ 106 u8 sha[ETH_ALEN]; /* ARP source hardware address. */
@@ -113,8 +115,8 @@ struct sw_flow_key {
113 } addr; 115 } addr;
114 __be32 label; /* IPv6 flow label. */ 116 __be32 label; /* IPv6 flow label. */
115 struct { 117 struct {
116 __be16 src; /* TCP/UDP source port. */ 118 __be16 src; /* TCP/UDP/SCTP source port. */
117 __be16 dst; /* TCP/UDP destination port. */ 119 __be16 dst; /* TCP/UDP/SCTP destination port. */
118 } tp; 120 } tp;
119 struct { 121 struct {
120 struct in6_addr target; /* ND target address. */ 122 struct in6_addr target; /* ND target address. */
@@ -123,7 +125,7 @@ struct sw_flow_key {
123 } nd; 125 } nd;
124 } ipv6; 126 } ipv6;
125 }; 127 };
126}; 128} __aligned(BITS_PER_LONG/8); /* Ensure that we can do comparisons as longs. */
127 129
128struct sw_flow { 130struct sw_flow {
129 struct rcu_head rcu; 131 struct rcu_head rcu;
@@ -131,6 +133,8 @@ struct sw_flow {
131 u32 hash; 133 u32 hash;
132 134
133 struct sw_flow_key key; 135 struct sw_flow_key key;
136 struct sw_flow_key unmasked_key;
137 struct sw_flow_mask *mask;
134 struct sw_flow_actions __rcu *sf_acts; 138 struct sw_flow_actions __rcu *sf_acts;
135 139
136 spinlock_t lock; /* Lock for values below. */ 140 spinlock_t lock; /* Lock for values below. */
@@ -140,6 +144,20 @@ struct sw_flow {
140 u8 tcp_flags; /* Union of seen TCP flags. */ 144 u8 tcp_flags; /* Union of seen TCP flags. */
141}; 145};
142 146
147struct sw_flow_key_range {
148 size_t start;
149 size_t end;
150};
151
152struct sw_flow_match {
153 struct sw_flow_key *key;
154 struct sw_flow_key_range range;
155 struct sw_flow_mask *mask;
156};
157
158void ovs_match_init(struct sw_flow_match *match,
159 struct sw_flow_key *key, struct sw_flow_mask *mask);
160
143struct arp_eth_header { 161struct arp_eth_header {
144 __be16 ar_hrd; /* format of hardware address */ 162 __be16 ar_hrd; /* format of hardware address */
145 __be16 ar_pro; /* format of protocol address */ 163 __be16 ar_pro; /* format of protocol address */
@@ -159,21 +177,21 @@ void ovs_flow_exit(void);
159 177
160struct sw_flow *ovs_flow_alloc(void); 178struct sw_flow *ovs_flow_alloc(void);
161void ovs_flow_deferred_free(struct sw_flow *); 179void ovs_flow_deferred_free(struct sw_flow *);
162void ovs_flow_free(struct sw_flow *flow); 180void ovs_flow_free(struct sw_flow *, bool deferred);
163 181
164struct sw_flow_actions *ovs_flow_actions_alloc(int actions_len); 182struct sw_flow_actions *ovs_flow_actions_alloc(int actions_len);
165void ovs_flow_deferred_free_acts(struct sw_flow_actions *); 183void ovs_flow_deferred_free_acts(struct sw_flow_actions *);
166 184
167int ovs_flow_extract(struct sk_buff *, u16 in_port, struct sw_flow_key *, 185int ovs_flow_extract(struct sk_buff *, u16 in_port, struct sw_flow_key *);
168 int *key_lenp);
169void ovs_flow_used(struct sw_flow *, struct sk_buff *); 186void ovs_flow_used(struct sw_flow *, struct sk_buff *);
170u64 ovs_flow_used_time(unsigned long flow_jiffies); 187u64 ovs_flow_used_time(unsigned long flow_jiffies);
171 188int ovs_flow_to_nlattrs(const struct sw_flow_key *,
172int ovs_flow_to_nlattrs(const struct sw_flow_key *, struct sk_buff *); 189 const struct sw_flow_key *, struct sk_buff *);
173int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp, 190int ovs_match_from_nlattrs(struct sw_flow_match *match,
191 const struct nlattr *,
174 const struct nlattr *); 192 const struct nlattr *);
175int ovs_flow_metadata_from_nlattrs(struct sw_flow *flow, int key_len, 193int ovs_flow_metadata_from_nlattrs(struct sw_flow *flow,
176 const struct nlattr *attr); 194 const struct nlattr *attr);
177 195
178#define MAX_ACTIONS_BUFSIZE (32 * 1024) 196#define MAX_ACTIONS_BUFSIZE (32 * 1024)
179#define TBL_MIN_BUCKETS 1024 197#define TBL_MIN_BUCKETS 1024
@@ -182,6 +200,7 @@ struct flow_table {
182 struct flex_array *buckets; 200 struct flex_array *buckets;
183 unsigned int count, n_buckets; 201 unsigned int count, n_buckets;
184 struct rcu_head rcu; 202 struct rcu_head rcu;
203 struct list_head *mask_list;
185 int node_ver; 204 int node_ver;
186 u32 hash_seed; 205 u32 hash_seed;
187 bool keep_flows; 206 bool keep_flows;
@@ -197,22 +216,44 @@ static inline int ovs_flow_tbl_need_to_expand(struct flow_table *table)
197 return (table->count > table->n_buckets); 216 return (table->count > table->n_buckets);
198} 217}
199 218
200struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *table, 219struct sw_flow *ovs_flow_lookup(struct flow_table *,
201 struct sw_flow_key *key, int len); 220 const struct sw_flow_key *);
202void ovs_flow_tbl_destroy(struct flow_table *table); 221struct sw_flow *ovs_flow_lookup_unmasked_key(struct flow_table *table,
203void ovs_flow_tbl_deferred_destroy(struct flow_table *table); 222 struct sw_flow_match *match);
223
224void ovs_flow_tbl_destroy(struct flow_table *table, bool deferred);
204struct flow_table *ovs_flow_tbl_alloc(int new_size); 225struct flow_table *ovs_flow_tbl_alloc(int new_size);
205struct flow_table *ovs_flow_tbl_expand(struct flow_table *table); 226struct flow_table *ovs_flow_tbl_expand(struct flow_table *table);
206struct flow_table *ovs_flow_tbl_rehash(struct flow_table *table); 227struct flow_table *ovs_flow_tbl_rehash(struct flow_table *table);
207void ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow,
208 struct sw_flow_key *key, int key_len);
209void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow);
210 228
211struct sw_flow *ovs_flow_tbl_next(struct flow_table *table, u32 *bucket, u32 *idx); 229void ovs_flow_insert(struct flow_table *table, struct sw_flow *flow);
230void ovs_flow_remove(struct flow_table *table, struct sw_flow *flow);
231
232struct sw_flow *ovs_flow_dump_next(struct flow_table *table, u32 *bucket, u32 *idx);
212extern const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1]; 233extern const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1];
213int ovs_ipv4_tun_from_nlattr(const struct nlattr *attr, 234int ovs_ipv4_tun_from_nlattr(const struct nlattr *attr,
214 struct ovs_key_ipv4_tunnel *tun_key); 235 struct sw_flow_match *match, bool is_mask);
215int ovs_ipv4_tun_to_nlattr(struct sk_buff *skb, 236int ovs_ipv4_tun_to_nlattr(struct sk_buff *skb,
216 const struct ovs_key_ipv4_tunnel *tun_key); 237 const struct ovs_key_ipv4_tunnel *tun_key,
238 const struct ovs_key_ipv4_tunnel *output);
239
240bool ovs_flow_cmp_unmasked_key(const struct sw_flow *flow,
241 const struct sw_flow_key *key, int key_end);
242
243struct sw_flow_mask {
244 int ref_count;
245 struct rcu_head rcu;
246 struct list_head list;
247 struct sw_flow_key_range range;
248 struct sw_flow_key key;
249};
217 250
251struct sw_flow_mask *ovs_sw_flow_mask_alloc(void);
252void ovs_sw_flow_mask_add_ref(struct sw_flow_mask *);
253void ovs_sw_flow_mask_del_ref(struct sw_flow_mask *, bool deferred);
254void ovs_sw_flow_mask_insert(struct flow_table *, struct sw_flow_mask *);
255struct sw_flow_mask *ovs_sw_flow_mask_find(const struct flow_table *,
256 const struct sw_flow_mask *);
257void ovs_flow_key_mask(struct sw_flow_key *dst, const struct sw_flow_key *src,
258 const struct sw_flow_mask *mask);
218#endif /* flow.h */ 259#endif /* flow.h */
diff --git a/net/openvswitch/vport-gre.c b/net/openvswitch/vport-gre.c
index 493e9775dcda..c99dea543d64 100644
--- a/net/openvswitch/vport-gre.c
+++ b/net/openvswitch/vport-gre.c
@@ -16,7 +16,6 @@
16 * 02110-1301, USA 16 * 02110-1301, USA
17 */ 17 */
18 18
19#ifdef CONFIG_OPENVSWITCH_GRE
20#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 19#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
21 20
22#include <linux/if.h> 21#include <linux/if.h>
@@ -177,10 +176,10 @@ static int gre_tnl_send(struct vport *vport, struct sk_buff *skb)
177 176
178 skb->local_df = 1; 177 skb->local_df = 1;
179 178
180 return iptunnel_xmit(net, rt, skb, fl.saddr, 179 return iptunnel_xmit(rt, skb, fl.saddr,
181 OVS_CB(skb)->tun_key->ipv4_dst, IPPROTO_GRE, 180 OVS_CB(skb)->tun_key->ipv4_dst, IPPROTO_GRE,
182 OVS_CB(skb)->tun_key->ipv4_tos, 181 OVS_CB(skb)->tun_key->ipv4_tos,
183 OVS_CB(skb)->tun_key->ipv4_ttl, df); 182 OVS_CB(skb)->tun_key->ipv4_ttl, df, false);
184err_free_rt: 183err_free_rt:
185 ip_rt_put(rt); 184 ip_rt_put(rt);
186error: 185error:
@@ -271,5 +270,3 @@ const struct vport_ops ovs_gre_vport_ops = {
271 .get_name = gre_get_name, 270 .get_name = gre_get_name,
272 .send = gre_tnl_send, 271 .send = gre_tnl_send,
273}; 272};
274
275#endif /* OPENVSWITCH_GRE */
diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c
index 5982f3f62835..09d93c13cfd6 100644
--- a/net/openvswitch/vport-netdev.c
+++ b/net/openvswitch/vport-netdev.c
@@ -25,6 +25,7 @@
25#include <linux/llc.h> 25#include <linux/llc.h>
26#include <linux/rtnetlink.h> 26#include <linux/rtnetlink.h>
27#include <linux/skbuff.h> 27#include <linux/skbuff.h>
28#include <linux/openvswitch.h>
28 29
29#include <net/llc.h> 30#include <net/llc.h>
30 31
@@ -74,6 +75,15 @@ static rx_handler_result_t netdev_frame_hook(struct sk_buff **pskb)
74 return RX_HANDLER_CONSUMED; 75 return RX_HANDLER_CONSUMED;
75} 76}
76 77
78static struct net_device *get_dpdev(struct datapath *dp)
79{
80 struct vport *local;
81
82 local = ovs_vport_ovsl(dp, OVSP_LOCAL);
83 BUG_ON(!local);
84 return netdev_vport_priv(local)->dev;
85}
86
77static struct vport *netdev_create(const struct vport_parms *parms) 87static struct vport *netdev_create(const struct vport_parms *parms)
78{ 88{
79 struct vport *vport; 89 struct vport *vport;
@@ -103,10 +113,15 @@ static struct vport *netdev_create(const struct vport_parms *parms)
103 } 113 }
104 114
105 rtnl_lock(); 115 rtnl_lock();
116 err = netdev_master_upper_dev_link(netdev_vport->dev,
117 get_dpdev(vport->dp));
118 if (err)
119 goto error_unlock;
120
106 err = netdev_rx_handler_register(netdev_vport->dev, netdev_frame_hook, 121 err = netdev_rx_handler_register(netdev_vport->dev, netdev_frame_hook,
107 vport); 122 vport);
108 if (err) 123 if (err)
109 goto error_unlock; 124 goto error_master_upper_dev_unlink;
110 125
111 dev_set_promiscuity(netdev_vport->dev, 1); 126 dev_set_promiscuity(netdev_vport->dev, 1);
112 netdev_vport->dev->priv_flags |= IFF_OVS_DATAPATH; 127 netdev_vport->dev->priv_flags |= IFF_OVS_DATAPATH;
@@ -114,6 +129,8 @@ static struct vport *netdev_create(const struct vport_parms *parms)
114 129
115 return vport; 130 return vport;
116 131
132error_master_upper_dev_unlink:
133 netdev_upper_dev_unlink(netdev_vport->dev, get_dpdev(vport->dp));
117error_unlock: 134error_unlock:
118 rtnl_unlock(); 135 rtnl_unlock();
119error_put: 136error_put:
@@ -140,6 +157,7 @@ static void netdev_destroy(struct vport *vport)
140 rtnl_lock(); 157 rtnl_lock();
141 netdev_vport->dev->priv_flags &= ~IFF_OVS_DATAPATH; 158 netdev_vport->dev->priv_flags &= ~IFF_OVS_DATAPATH;
142 netdev_rx_handler_unregister(netdev_vport->dev); 159 netdev_rx_handler_unregister(netdev_vport->dev);
160 netdev_upper_dev_unlink(netdev_vport->dev, get_dpdev(vport->dp));
143 dev_set_promiscuity(netdev_vport->dev, -1); 161 dev_set_promiscuity(netdev_vport->dev, -1);
144 rtnl_unlock(); 162 rtnl_unlock();
145 163
diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c
new file mode 100644
index 000000000000..a481c03e2861
--- /dev/null
+++ b/net/openvswitch/vport-vxlan.c
@@ -0,0 +1,204 @@
1/*
2 * Copyright (c) 2013 Nicira, Inc.
3 * Copyright (c) 2013 Cisco Systems, Inc.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of version 2 of the GNU General Public
7 * License as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
17 * 02110-1301, USA
18 */
19
20#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
21
22#include <linux/in.h>
23#include <linux/ip.h>
24#include <linux/net.h>
25#include <linux/rculist.h>
26#include <linux/udp.h>
27
28#include <net/icmp.h>
29#include <net/ip.h>
30#include <net/udp.h>
31#include <net/ip_tunnels.h>
32#include <net/udp.h>
33#include <net/rtnetlink.h>
34#include <net/route.h>
35#include <net/dsfield.h>
36#include <net/inet_ecn.h>
37#include <net/net_namespace.h>
38#include <net/netns/generic.h>
39#include <net/vxlan.h>
40
41#include "datapath.h"
42#include "vport.h"
43
44/**
45 * struct vxlan_port - Keeps track of open UDP ports
46 * @vs: vxlan_sock created for the port.
47 * @name: vport name.
48 */
49struct vxlan_port {
50 struct vxlan_sock *vs;
51 char name[IFNAMSIZ];
52};
53
54static inline struct vxlan_port *vxlan_vport(const struct vport *vport)
55{
56 return vport_priv(vport);
57}
58
59/* Called with rcu_read_lock and BH disabled. */
60static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb, __be32 vx_vni)
61{
62 struct ovs_key_ipv4_tunnel tun_key;
63 struct vport *vport = vs->data;
64 struct iphdr *iph;
65 __be64 key;
66
67 /* Save outer tunnel values */
68 iph = ip_hdr(skb);
69 key = cpu_to_be64(ntohl(vx_vni) >> 8);
70 ovs_flow_tun_key_init(&tun_key, iph, key, TUNNEL_KEY);
71
72 ovs_vport_receive(vport, skb, &tun_key);
73}
74
75static int vxlan_get_options(const struct vport *vport, struct sk_buff *skb)
76{
77 struct vxlan_port *vxlan_port = vxlan_vport(vport);
78 __be16 dst_port = inet_sk(vxlan_port->vs->sock->sk)->inet_sport;
79
80 if (nla_put_u16(skb, OVS_TUNNEL_ATTR_DST_PORT, ntohs(dst_port)))
81 return -EMSGSIZE;
82 return 0;
83}
84
85static void vxlan_tnl_destroy(struct vport *vport)
86{
87 struct vxlan_port *vxlan_port = vxlan_vport(vport);
88
89 vxlan_sock_release(vxlan_port->vs);
90
91 ovs_vport_deferred_free(vport);
92}
93
94static struct vport *vxlan_tnl_create(const struct vport_parms *parms)
95{
96 struct net *net = ovs_dp_get_net(parms->dp);
97 struct nlattr *options = parms->options;
98 struct vxlan_port *vxlan_port;
99 struct vxlan_sock *vs;
100 struct vport *vport;
101 struct nlattr *a;
102 u16 dst_port;
103 int err;
104
105 if (!options) {
106 err = -EINVAL;
107 goto error;
108 }
109 a = nla_find_nested(options, OVS_TUNNEL_ATTR_DST_PORT);
110 if (a && nla_len(a) == sizeof(u16)) {
111 dst_port = nla_get_u16(a);
112 } else {
113 /* Require destination port from userspace. */
114 err = -EINVAL;
115 goto error;
116 }
117
118 vport = ovs_vport_alloc(sizeof(struct vxlan_port),
119 &ovs_vxlan_vport_ops, parms);
120 if (IS_ERR(vport))
121 return vport;
122
123 vxlan_port = vxlan_vport(vport);
124 strncpy(vxlan_port->name, parms->name, IFNAMSIZ);
125
126 vs = vxlan_sock_add(net, htons(dst_port), vxlan_rcv, vport, true, false);
127 if (IS_ERR(vs)) {
128 ovs_vport_free(vport);
129 return (void *)vs;
130 }
131 vxlan_port->vs = vs;
132
133 return vport;
134
135error:
136 return ERR_PTR(err);
137}
138
139static int vxlan_tnl_send(struct vport *vport, struct sk_buff *skb)
140{
141 struct net *net = ovs_dp_get_net(vport->dp);
142 struct vxlan_port *vxlan_port = vxlan_vport(vport);
143 __be16 dst_port = inet_sk(vxlan_port->vs->sock->sk)->inet_sport;
144 struct rtable *rt;
145 struct flowi4 fl;
146 __be16 src_port;
147 int port_min;
148 int port_max;
149 __be16 df;
150 int err;
151
152 if (unlikely(!OVS_CB(skb)->tun_key)) {
153 err = -EINVAL;
154 goto error;
155 }
156
157 /* Route lookup */
158 memset(&fl, 0, sizeof(fl));
159 fl.daddr = OVS_CB(skb)->tun_key->ipv4_dst;
160 fl.saddr = OVS_CB(skb)->tun_key->ipv4_src;
161 fl.flowi4_tos = RT_TOS(OVS_CB(skb)->tun_key->ipv4_tos);
162 fl.flowi4_mark = skb->mark;
163 fl.flowi4_proto = IPPROTO_UDP;
164
165 rt = ip_route_output_key(net, &fl);
166 if (IS_ERR(rt)) {
167 err = PTR_ERR(rt);
168 goto error;
169 }
170
171 df = OVS_CB(skb)->tun_key->tun_flags & TUNNEL_DONT_FRAGMENT ?
172 htons(IP_DF) : 0;
173
174 skb->local_df = 1;
175
176 inet_get_local_port_range(&port_min, &port_max);
177 src_port = vxlan_src_port(port_min, port_max, skb);
178
179 err = vxlan_xmit_skb(vxlan_port->vs, rt, skb,
180 fl.saddr, OVS_CB(skb)->tun_key->ipv4_dst,
181 OVS_CB(skb)->tun_key->ipv4_tos,
182 OVS_CB(skb)->tun_key->ipv4_ttl, df,
183 src_port, dst_port,
184 htonl(be64_to_cpu(OVS_CB(skb)->tun_key->tun_id) << 8));
185 if (err < 0)
186 ip_rt_put(rt);
187error:
188 return err;
189}
190
191static const char *vxlan_get_name(const struct vport *vport)
192{
193 struct vxlan_port *vxlan_port = vxlan_vport(vport);
194 return vxlan_port->name;
195}
196
197const struct vport_ops ovs_vxlan_vport_ops = {
198 .type = OVS_VPORT_TYPE_VXLAN,
199 .create = vxlan_tnl_create,
200 .destroy = vxlan_tnl_destroy,
201 .get_name = vxlan_get_name,
202 .get_options = vxlan_get_options,
203 .send = vxlan_tnl_send,
204};
diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c
index d4c7fa04ce08..6f65dbe13812 100644
--- a/net/openvswitch/vport.c
+++ b/net/openvswitch/vport.c
@@ -42,6 +42,9 @@ static const struct vport_ops *vport_ops_list[] = {
42#ifdef CONFIG_OPENVSWITCH_GRE 42#ifdef CONFIG_OPENVSWITCH_GRE
43 &ovs_gre_vport_ops, 43 &ovs_gre_vport_ops,
44#endif 44#endif
45#ifdef CONFIG_OPENVSWITCH_VXLAN
46 &ovs_vxlan_vport_ops,
47#endif
45}; 48};
46 49
47/* Protected by RCU read lock for reading, ovs_mutex for writing. */ 50/* Protected by RCU read lock for reading, ovs_mutex for writing. */
@@ -200,7 +203,7 @@ out:
200 * ovs_vport_set_options - modify existing vport device (for kernel callers) 203 * ovs_vport_set_options - modify existing vport device (for kernel callers)
201 * 204 *
202 * @vport: vport to modify. 205 * @vport: vport to modify.
203 * @port: New configuration. 206 * @options: New configuration.
204 * 207 *
205 * Modifies an existing device with the specified configuration (which is 208 * Modifies an existing device with the specified configuration (which is
206 * dependent on device type). ovs_mutex must be held. 209 * dependent on device type). ovs_mutex must be held.
@@ -325,6 +328,7 @@ int ovs_vport_get_options(const struct vport *vport, struct sk_buff *skb)
325 * 328 *
326 * @vport: vport that received the packet 329 * @vport: vport that received the packet
327 * @skb: skb that was received 330 * @skb: skb that was received
331 * @tun_key: tunnel (if any) that carried packet
328 * 332 *
329 * Must be called with rcu_read_lock. The packet cannot be shared and 333 * Must be called with rcu_read_lock. The packet cannot be shared and
330 * skb->data should point to the Ethernet header. 334 * skb->data should point to the Ethernet header.
diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h
index 376045c42f8b..1a9fbcec6e1b 100644
--- a/net/openvswitch/vport.h
+++ b/net/openvswitch/vport.h
@@ -199,6 +199,7 @@ void ovs_vport_record_error(struct vport *, enum vport_err_type err_type);
199extern const struct vport_ops ovs_netdev_vport_ops; 199extern const struct vport_ops ovs_netdev_vport_ops;
200extern const struct vport_ops ovs_internal_vport_ops; 200extern const struct vport_ops ovs_internal_vport_ops;
201extern const struct vport_ops ovs_gre_vport_ops; 201extern const struct vport_ops ovs_gre_vport_ops;
202extern const struct vport_ops ovs_vxlan_vport_ops;
202 203
203static inline void ovs_skb_postpush_rcsum(struct sk_buff *skb, 204static inline void ovs_skb_postpush_rcsum(struct sk_buff *skb,
204 const void *start, unsigned int len) 205 const void *start, unsigned int len)
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 75c8bbf598c8..2e8286b47c28 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -88,7 +88,7 @@
88#include <linux/virtio_net.h> 88#include <linux/virtio_net.h>
89#include <linux/errqueue.h> 89#include <linux/errqueue.h>
90#include <linux/net_tstamp.h> 90#include <linux/net_tstamp.h>
91 91#include <linux/reciprocal_div.h>
92#ifdef CONFIG_INET 92#ifdef CONFIG_INET
93#include <net/inet_common.h> 93#include <net/inet_common.h>
94#endif 94#endif
@@ -1135,7 +1135,7 @@ static unsigned int fanout_demux_hash(struct packet_fanout *f,
1135 struct sk_buff *skb, 1135 struct sk_buff *skb,
1136 unsigned int num) 1136 unsigned int num)
1137{ 1137{
1138 return (((u64)skb->rxhash) * num) >> 32; 1138 return reciprocal_divide(skb->rxhash, num);
1139} 1139}
1140 1140
1141static unsigned int fanout_demux_lb(struct packet_fanout *f, 1141static unsigned int fanout_demux_lb(struct packet_fanout *f,
@@ -1158,6 +1158,13 @@ static unsigned int fanout_demux_cpu(struct packet_fanout *f,
1158 return smp_processor_id() % num; 1158 return smp_processor_id() % num;
1159} 1159}
1160 1160
1161static unsigned int fanout_demux_rnd(struct packet_fanout *f,
1162 struct sk_buff *skb,
1163 unsigned int num)
1164{
1165 return reciprocal_divide(prandom_u32(), num);
1166}
1167
1161static unsigned int fanout_demux_rollover(struct packet_fanout *f, 1168static unsigned int fanout_demux_rollover(struct packet_fanout *f,
1162 struct sk_buff *skb, 1169 struct sk_buff *skb,
1163 unsigned int idx, unsigned int skip, 1170 unsigned int idx, unsigned int skip,
@@ -1215,6 +1222,9 @@ static int packet_rcv_fanout(struct sk_buff *skb, struct net_device *dev,
1215 case PACKET_FANOUT_CPU: 1222 case PACKET_FANOUT_CPU:
1216 idx = fanout_demux_cpu(f, skb, num); 1223 idx = fanout_demux_cpu(f, skb, num);
1217 break; 1224 break;
1225 case PACKET_FANOUT_RND:
1226 idx = fanout_demux_rnd(f, skb, num);
1227 break;
1218 case PACKET_FANOUT_ROLLOVER: 1228 case PACKET_FANOUT_ROLLOVER:
1219 idx = fanout_demux_rollover(f, skb, 0, (unsigned int) -1, num); 1229 idx = fanout_demux_rollover(f, skb, 0, (unsigned int) -1, num);
1220 break; 1230 break;
@@ -1284,6 +1294,7 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags)
1284 case PACKET_FANOUT_HASH: 1294 case PACKET_FANOUT_HASH:
1285 case PACKET_FANOUT_LB: 1295 case PACKET_FANOUT_LB:
1286 case PACKET_FANOUT_CPU: 1296 case PACKET_FANOUT_CPU:
1297 case PACKET_FANOUT_RND:
1287 break; 1298 break;
1288 default: 1299 default:
1289 return -EINVAL; 1300 return -EINVAL;
@@ -2181,7 +2192,7 @@ static struct sk_buff *packet_alloc_skb(struct sock *sk, size_t prepad,
2181 linear = len; 2192 linear = len;
2182 2193
2183 skb = sock_alloc_send_pskb(sk, prepad + linear, len - linear, noblock, 2194 skb = sock_alloc_send_pskb(sk, prepad + linear, len - linear, noblock,
2184 err); 2195 err, 0);
2185 if (!skb) 2196 if (!skb)
2186 return NULL; 2197 return NULL;
2187 2198
@@ -2638,51 +2649,6 @@ out:
2638 return err; 2649 return err;
2639} 2650}
2640 2651
2641static int packet_recv_error(struct sock *sk, struct msghdr *msg, int len)
2642{
2643 struct sock_exterr_skb *serr;
2644 struct sk_buff *skb, *skb2;
2645 int copied, err;
2646
2647 err = -EAGAIN;
2648 skb = skb_dequeue(&sk->sk_error_queue);
2649 if (skb == NULL)
2650 goto out;
2651
2652 copied = skb->len;
2653 if (copied > len) {
2654 msg->msg_flags |= MSG_TRUNC;
2655 copied = len;
2656 }
2657 err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
2658 if (err)
2659 goto out_free_skb;
2660
2661 sock_recv_timestamp(msg, sk, skb);
2662
2663 serr = SKB_EXT_ERR(skb);
2664 put_cmsg(msg, SOL_PACKET, PACKET_TX_TIMESTAMP,
2665 sizeof(serr->ee), &serr->ee);
2666
2667 msg->msg_flags |= MSG_ERRQUEUE;
2668 err = copied;
2669
2670 /* Reset and regenerate socket error */
2671 spin_lock_bh(&sk->sk_error_queue.lock);
2672 sk->sk_err = 0;
2673 if ((skb2 = skb_peek(&sk->sk_error_queue)) != NULL) {
2674 sk->sk_err = SKB_EXT_ERR(skb2)->ee.ee_errno;
2675 spin_unlock_bh(&sk->sk_error_queue.lock);
2676 sk->sk_error_report(sk);
2677 } else
2678 spin_unlock_bh(&sk->sk_error_queue.lock);
2679
2680out_free_skb:
2681 kfree_skb(skb);
2682out:
2683 return err;
2684}
2685
2686/* 2652/*
2687 * Pull a packet from our receive queue and hand it to the user. 2653 * Pull a packet from our receive queue and hand it to the user.
2688 * If necessary we block. 2654 * If necessary we block.
@@ -2708,7 +2674,8 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
2708#endif 2674#endif
2709 2675
2710 if (flags & MSG_ERRQUEUE) { 2676 if (flags & MSG_ERRQUEUE) {
2711 err = packet_recv_error(sk, msg, len); 2677 err = sock_recv_errqueue(sk, msg, len,
2678 SOL_PACKET, PACKET_TX_TIMESTAMP);
2712 goto out; 2679 goto out;
2713 } 2680 }
2714 2681
diff --git a/net/phonet/socket.c b/net/phonet/socket.c
index 1afd1381cdc7..77e38f733496 100644
--- a/net/phonet/socket.c
+++ b/net/phonet/socket.c
@@ -793,7 +793,7 @@ static int pn_res_seq_show(struct seq_file *seq, void *v)
793 struct sock **psk = v; 793 struct sock **psk = v;
794 struct sock *sk = *psk; 794 struct sock *sk = *psk;
795 795
796 seq_printf(seq, "%02X %5d %lu%n", 796 seq_printf(seq, "%02X %5u %lu%n",
797 (int) (psk - pnres.sk), 797 (int) (psk - pnres.sk),
798 from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk)), 798 from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk)),
799 sock_i_ino(sk), &len); 799 sock_i_ino(sk), &len);
diff --git a/net/rfkill/core.c b/net/rfkill/core.c
index 1cec5e4f3a5e..1bacc1079942 100644
--- a/net/rfkill/core.c
+++ b/net/rfkill/core.c
@@ -576,14 +576,14 @@ void rfkill_set_states(struct rfkill *rfkill, bool sw, bool hw)
576} 576}
577EXPORT_SYMBOL(rfkill_set_states); 577EXPORT_SYMBOL(rfkill_set_states);
578 578
579static ssize_t rfkill_name_show(struct device *dev, 579static ssize_t name_show(struct device *dev, struct device_attribute *attr,
580 struct device_attribute *attr, 580 char *buf)
581 char *buf)
582{ 581{
583 struct rfkill *rfkill = to_rfkill(dev); 582 struct rfkill *rfkill = to_rfkill(dev);
584 583
585 return sprintf(buf, "%s\n", rfkill->name); 584 return sprintf(buf, "%s\n", rfkill->name);
586} 585}
586static DEVICE_ATTR_RO(name);
587 587
588static const char *rfkill_get_type_str(enum rfkill_type type) 588static const char *rfkill_get_type_str(enum rfkill_type type)
589{ 589{
@@ -611,54 +611,52 @@ static const char *rfkill_get_type_str(enum rfkill_type type)
611 } 611 }
612} 612}
613 613
614static ssize_t rfkill_type_show(struct device *dev, 614static ssize_t type_show(struct device *dev, struct device_attribute *attr,
615 struct device_attribute *attr, 615 char *buf)
616 char *buf)
617{ 616{
618 struct rfkill *rfkill = to_rfkill(dev); 617 struct rfkill *rfkill = to_rfkill(dev);
619 618
620 return sprintf(buf, "%s\n", rfkill_get_type_str(rfkill->type)); 619 return sprintf(buf, "%s\n", rfkill_get_type_str(rfkill->type));
621} 620}
621static DEVICE_ATTR_RO(type);
622 622
623static ssize_t rfkill_idx_show(struct device *dev, 623static ssize_t index_show(struct device *dev, struct device_attribute *attr,
624 struct device_attribute *attr, 624 char *buf)
625 char *buf)
626{ 625{
627 struct rfkill *rfkill = to_rfkill(dev); 626 struct rfkill *rfkill = to_rfkill(dev);
628 627
629 return sprintf(buf, "%d\n", rfkill->idx); 628 return sprintf(buf, "%d\n", rfkill->idx);
630} 629}
630static DEVICE_ATTR_RO(index);
631 631
632static ssize_t rfkill_persistent_show(struct device *dev, 632static ssize_t persistent_show(struct device *dev,
633 struct device_attribute *attr, 633 struct device_attribute *attr, char *buf)
634 char *buf)
635{ 634{
636 struct rfkill *rfkill = to_rfkill(dev); 635 struct rfkill *rfkill = to_rfkill(dev);
637 636
638 return sprintf(buf, "%d\n", rfkill->persistent); 637 return sprintf(buf, "%d\n", rfkill->persistent);
639} 638}
639static DEVICE_ATTR_RO(persistent);
640 640
641static ssize_t rfkill_hard_show(struct device *dev, 641static ssize_t hard_show(struct device *dev, struct device_attribute *attr,
642 struct device_attribute *attr, 642 char *buf)
643 char *buf)
644{ 643{
645 struct rfkill *rfkill = to_rfkill(dev); 644 struct rfkill *rfkill = to_rfkill(dev);
646 645
647 return sprintf(buf, "%d\n", (rfkill->state & RFKILL_BLOCK_HW) ? 1 : 0 ); 646 return sprintf(buf, "%d\n", (rfkill->state & RFKILL_BLOCK_HW) ? 1 : 0 );
648} 647}
648static DEVICE_ATTR_RO(hard);
649 649
650static ssize_t rfkill_soft_show(struct device *dev, 650static ssize_t soft_show(struct device *dev, struct device_attribute *attr,
651 struct device_attribute *attr, 651 char *buf)
652 char *buf)
653{ 652{
654 struct rfkill *rfkill = to_rfkill(dev); 653 struct rfkill *rfkill = to_rfkill(dev);
655 654
656 return sprintf(buf, "%d\n", (rfkill->state & RFKILL_BLOCK_SW) ? 1 : 0 ); 655 return sprintf(buf, "%d\n", (rfkill->state & RFKILL_BLOCK_SW) ? 1 : 0 );
657} 656}
658 657
659static ssize_t rfkill_soft_store(struct device *dev, 658static ssize_t soft_store(struct device *dev, struct device_attribute *attr,
660 struct device_attribute *attr, 659 const char *buf, size_t count)
661 const char *buf, size_t count)
662{ 660{
663 struct rfkill *rfkill = to_rfkill(dev); 661 struct rfkill *rfkill = to_rfkill(dev);
664 unsigned long state; 662 unsigned long state;
@@ -680,6 +678,7 @@ static ssize_t rfkill_soft_store(struct device *dev,
680 678
681 return count; 679 return count;
682} 680}
681static DEVICE_ATTR_RW(soft);
683 682
684static u8 user_state_from_blocked(unsigned long state) 683static u8 user_state_from_blocked(unsigned long state)
685{ 684{
@@ -691,18 +690,16 @@ static u8 user_state_from_blocked(unsigned long state)
691 return RFKILL_USER_STATE_UNBLOCKED; 690 return RFKILL_USER_STATE_UNBLOCKED;
692} 691}
693 692
694static ssize_t rfkill_state_show(struct device *dev, 693static ssize_t state_show(struct device *dev, struct device_attribute *attr,
695 struct device_attribute *attr, 694 char *buf)
696 char *buf)
697{ 695{
698 struct rfkill *rfkill = to_rfkill(dev); 696 struct rfkill *rfkill = to_rfkill(dev);
699 697
700 return sprintf(buf, "%d\n", user_state_from_blocked(rfkill->state)); 698 return sprintf(buf, "%d\n", user_state_from_blocked(rfkill->state));
701} 699}
702 700
703static ssize_t rfkill_state_store(struct device *dev, 701static ssize_t state_store(struct device *dev, struct device_attribute *attr,
704 struct device_attribute *attr, 702 const char *buf, size_t count)
705 const char *buf, size_t count)
706{ 703{
707 struct rfkill *rfkill = to_rfkill(dev); 704 struct rfkill *rfkill = to_rfkill(dev);
708 unsigned long state; 705 unsigned long state;
@@ -725,32 +722,27 @@ static ssize_t rfkill_state_store(struct device *dev,
725 722
726 return count; 723 return count;
727} 724}
725static DEVICE_ATTR_RW(state);
728 726
729static ssize_t rfkill_claim_show(struct device *dev, 727static ssize_t claim_show(struct device *dev, struct device_attribute *attr,
730 struct device_attribute *attr, 728 char *buf)
731 char *buf)
732{ 729{
733 return sprintf(buf, "%d\n", 0); 730 return sprintf(buf, "%d\n", 0);
734} 731}
735 732static DEVICE_ATTR_RO(claim);
736static ssize_t rfkill_claim_store(struct device *dev, 733
737 struct device_attribute *attr, 734static struct attribute *rfkill_dev_attrs[] = {
738 const char *buf, size_t count) 735 &dev_attr_name.attr,
739{ 736 &dev_attr_type.attr,
740 return -EOPNOTSUPP; 737 &dev_attr_index.attr,
741} 738 &dev_attr_persistent.attr,
742 739 &dev_attr_state.attr,
743static struct device_attribute rfkill_dev_attrs[] = { 740 &dev_attr_claim.attr,
744 __ATTR(name, S_IRUGO, rfkill_name_show, NULL), 741 &dev_attr_soft.attr,
745 __ATTR(type, S_IRUGO, rfkill_type_show, NULL), 742 &dev_attr_hard.attr,
746 __ATTR(index, S_IRUGO, rfkill_idx_show, NULL), 743 NULL,
747 __ATTR(persistent, S_IRUGO, rfkill_persistent_show, NULL),
748 __ATTR(state, S_IRUGO|S_IWUSR, rfkill_state_show, rfkill_state_store),
749 __ATTR(claim, S_IRUGO|S_IWUSR, rfkill_claim_show, rfkill_claim_store),
750 __ATTR(soft, S_IRUGO|S_IWUSR, rfkill_soft_show, rfkill_soft_store),
751 __ATTR(hard, S_IRUGO, rfkill_hard_show, NULL),
752 __ATTR_NULL
753}; 744};
745ATTRIBUTE_GROUPS(rfkill_dev);
754 746
755static void rfkill_release(struct device *dev) 747static void rfkill_release(struct device *dev)
756{ 748{
@@ -830,7 +822,7 @@ static int rfkill_resume(struct device *dev)
830static struct class rfkill_class = { 822static struct class rfkill_class = {
831 .name = "rfkill", 823 .name = "rfkill",
832 .dev_release = rfkill_release, 824 .dev_release = rfkill_release,
833 .dev_attrs = rfkill_dev_attrs, 825 .dev_groups = rfkill_dev_groups,
834 .dev_uevent = rfkill_dev_uevent, 826 .dev_uevent = rfkill_dev_uevent,
835 .suspend = rfkill_suspend, 827 .suspend = rfkill_suspend,
836 .resume = rfkill_resume, 828 .resume = rfkill_resume,
diff --git a/net/rfkill/rfkill-regulator.c b/net/rfkill/rfkill-regulator.c
index d11ac79246e4..cf5b145902e5 100644
--- a/net/rfkill/rfkill-regulator.c
+++ b/net/rfkill/rfkill-regulator.c
@@ -30,6 +30,7 @@ struct rfkill_regulator_data {
30static int rfkill_regulator_set_block(void *data, bool blocked) 30static int rfkill_regulator_set_block(void *data, bool blocked)
31{ 31{
32 struct rfkill_regulator_data *rfkill_data = data; 32 struct rfkill_regulator_data *rfkill_data = data;
33 int ret = 0;
33 34
34 pr_debug("%s: blocked: %d\n", __func__, blocked); 35 pr_debug("%s: blocked: %d\n", __func__, blocked);
35 36
@@ -40,15 +41,16 @@ static int rfkill_regulator_set_block(void *data, bool blocked)
40 } 41 }
41 } else { 42 } else {
42 if (!rfkill_data->reg_enabled) { 43 if (!rfkill_data->reg_enabled) {
43 regulator_enable(rfkill_data->vcc); 44 ret = regulator_enable(rfkill_data->vcc);
44 rfkill_data->reg_enabled = true; 45 if (!ret)
46 rfkill_data->reg_enabled = true;
45 } 47 }
46 } 48 }
47 49
48 pr_debug("%s: regulator_is_enabled after set_block: %d\n", __func__, 50 pr_debug("%s: regulator_is_enabled after set_block: %d\n", __func__,
49 regulator_is_enabled(rfkill_data->vcc)); 51 regulator_is_enabled(rfkill_data->vcc));
50 52
51 return 0; 53 return ret;
52} 54}
53 55
54static struct rfkill_ops rfkill_regulator_ops = { 56static struct rfkill_ops rfkill_regulator_ops = {
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 235e01acac51..c03a32a0418e 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -272,6 +272,20 @@ config NET_SCH_FQ_CODEL
272 272
273 If unsure, say N. 273 If unsure, say N.
274 274
275config NET_SCH_FQ
276 tristate "Fair Queue"
277 help
278 Say Y here if you want to use the FQ packet scheduling algorithm.
279
280 FQ does flow separation, and is able to respect pacing requirements
281 set by TCP stack into sk->sk_pacing_rate (for localy generated
282 traffic)
283
284 To compile this driver as a module, choose M here: the module
285 will be called sch_fq.
286
287 If unsure, say N.
288
275config NET_SCH_INGRESS 289config NET_SCH_INGRESS
276 tristate "Ingress Qdisc" 290 tristate "Ingress Qdisc"
277 depends on NET_CLS_ACT 291 depends on NET_CLS_ACT
diff --git a/net/sched/Makefile b/net/sched/Makefile
index 978cbf004e80..e5f9abe9a5db 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -39,6 +39,7 @@ obj-$(CONFIG_NET_SCH_CHOKE) += sch_choke.o
39obj-$(CONFIG_NET_SCH_QFQ) += sch_qfq.o 39obj-$(CONFIG_NET_SCH_QFQ) += sch_qfq.o
40obj-$(CONFIG_NET_SCH_CODEL) += sch_codel.o 40obj-$(CONFIG_NET_SCH_CODEL) += sch_codel.o
41obj-$(CONFIG_NET_SCH_FQ_CODEL) += sch_fq_codel.o 41obj-$(CONFIG_NET_SCH_FQ_CODEL) += sch_fq_codel.o
42obj-$(CONFIG_NET_SCH_FQ) += sch_fq.o
42 43
43obj-$(CONFIG_NET_CLS_U32) += cls_u32.o 44obj-$(CONFIG_NET_CLS_U32) += cls_u32.o
44obj-$(CONFIG_NET_CLS_ROUTE4) += cls_route.o 45obj-$(CONFIG_NET_CLS_ROUTE4) += cls_route.o
diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c
index 3a294eb98d61..867b4a3e3980 100644
--- a/net/sched/cls_cgroup.c
+++ b/net/sched/cls_cgroup.c
@@ -23,19 +23,18 @@
23#include <net/sock.h> 23#include <net/sock.h>
24#include <net/cls_cgroup.h> 24#include <net/cls_cgroup.h>
25 25
26static inline struct cgroup_cls_state *cgrp_cls_state(struct cgroup *cgrp) 26static inline struct cgroup_cls_state *css_cls_state(struct cgroup_subsys_state *css)
27{ 27{
28 return container_of(cgroup_subsys_state(cgrp, net_cls_subsys_id), 28 return css ? container_of(css, struct cgroup_cls_state, css) : NULL;
29 struct cgroup_cls_state, css);
30} 29}
31 30
32static inline struct cgroup_cls_state *task_cls_state(struct task_struct *p) 31static inline struct cgroup_cls_state *task_cls_state(struct task_struct *p)
33{ 32{
34 return container_of(task_subsys_state(p, net_cls_subsys_id), 33 return css_cls_state(task_css(p, net_cls_subsys_id));
35 struct cgroup_cls_state, css);
36} 34}
37 35
38static struct cgroup_subsys_state *cgrp_css_alloc(struct cgroup *cgrp) 36static struct cgroup_subsys_state *
37cgrp_css_alloc(struct cgroup_subsys_state *parent_css)
39{ 38{
40 struct cgroup_cls_state *cs; 39 struct cgroup_cls_state *cs;
41 40
@@ -45,17 +44,19 @@ static struct cgroup_subsys_state *cgrp_css_alloc(struct cgroup *cgrp)
45 return &cs->css; 44 return &cs->css;
46} 45}
47 46
48static int cgrp_css_online(struct cgroup *cgrp) 47static int cgrp_css_online(struct cgroup_subsys_state *css)
49{ 48{
50 if (cgrp->parent) 49 struct cgroup_cls_state *cs = css_cls_state(css);
51 cgrp_cls_state(cgrp)->classid = 50 struct cgroup_cls_state *parent = css_cls_state(css_parent(css));
52 cgrp_cls_state(cgrp->parent)->classid; 51
52 if (parent)
53 cs->classid = parent->classid;
53 return 0; 54 return 0;
54} 55}
55 56
56static void cgrp_css_free(struct cgroup *cgrp) 57static void cgrp_css_free(struct cgroup_subsys_state *css)
57{ 58{
58 kfree(cgrp_cls_state(cgrp)); 59 kfree(css_cls_state(css));
59} 60}
60 61
61static int update_classid(const void *v, struct file *file, unsigned n) 62static int update_classid(const void *v, struct file *file, unsigned n)
@@ -67,12 +68,13 @@ static int update_classid(const void *v, struct file *file, unsigned n)
67 return 0; 68 return 0;
68} 69}
69 70
70static void cgrp_attach(struct cgroup *cgrp, struct cgroup_taskset *tset) 71static void cgrp_attach(struct cgroup_subsys_state *css,
72 struct cgroup_taskset *tset)
71{ 73{
72 struct task_struct *p; 74 struct task_struct *p;
73 void *v; 75 void *v;
74 76
75 cgroup_taskset_for_each(p, cgrp, tset) { 77 cgroup_taskset_for_each(p, css, tset) {
76 task_lock(p); 78 task_lock(p);
77 v = (void *)(unsigned long)task_cls_classid(p); 79 v = (void *)(unsigned long)task_cls_classid(p);
78 iterate_fd(p->files, 0, update_classid, v); 80 iterate_fd(p->files, 0, update_classid, v);
@@ -80,14 +82,15 @@ static void cgrp_attach(struct cgroup *cgrp, struct cgroup_taskset *tset)
80 } 82 }
81} 83}
82 84
83static u64 read_classid(struct cgroup *cgrp, struct cftype *cft) 85static u64 read_classid(struct cgroup_subsys_state *css, struct cftype *cft)
84{ 86{
85 return cgrp_cls_state(cgrp)->classid; 87 return css_cls_state(css)->classid;
86} 88}
87 89
88static int write_classid(struct cgroup *cgrp, struct cftype *cft, u64 value) 90static int write_classid(struct cgroup_subsys_state *css, struct cftype *cft,
91 u64 value)
89{ 92{
90 cgrp_cls_state(cgrp)->classid = (u32) value; 93 css_cls_state(css)->classid = (u32) value;
91 return 0; 94 return 0;
92} 95}
93 96
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 51b968d3febb..2adda7fa2d39 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -200,6 +200,58 @@ int unregister_qdisc(struct Qdisc_ops *qops)
200} 200}
201EXPORT_SYMBOL(unregister_qdisc); 201EXPORT_SYMBOL(unregister_qdisc);
202 202
203/* Get default qdisc if not otherwise specified */
204void qdisc_get_default(char *name, size_t len)
205{
206 read_lock(&qdisc_mod_lock);
207 strlcpy(name, default_qdisc_ops->id, len);
208 read_unlock(&qdisc_mod_lock);
209}
210
211static struct Qdisc_ops *qdisc_lookup_default(const char *name)
212{
213 struct Qdisc_ops *q = NULL;
214
215 for (q = qdisc_base; q; q = q->next) {
216 if (!strcmp(name, q->id)) {
217 if (!try_module_get(q->owner))
218 q = NULL;
219 break;
220 }
221 }
222
223 return q;
224}
225
226/* Set new default qdisc to use */
227int qdisc_set_default(const char *name)
228{
229 const struct Qdisc_ops *ops;
230
231 if (!capable(CAP_NET_ADMIN))
232 return -EPERM;
233
234 write_lock(&qdisc_mod_lock);
235 ops = qdisc_lookup_default(name);
236 if (!ops) {
237 /* Not found, drop lock and try to load module */
238 write_unlock(&qdisc_mod_lock);
239 request_module("sch_%s", name);
240 write_lock(&qdisc_mod_lock);
241
242 ops = qdisc_lookup_default(name);
243 }
244
245 if (ops) {
246 /* Set new default */
247 module_put(default_qdisc_ops->owner);
248 default_qdisc_ops = ops;
249 }
250 write_unlock(&qdisc_mod_lock);
251
252 return ops ? 0 : -ENOENT;
253}
254
203/* We know handle. Find qdisc among all qdisc's attached to device 255/* We know handle. Find qdisc among all qdisc's attached to device
204 (root qdisc, all its children, children of children etc.) 256 (root qdisc, all its children, children of children etc.)
205 */ 257 */
@@ -1854,6 +1906,7 @@ static int __init pktsched_init(void)
1854 return err; 1906 return err;
1855 } 1907 }
1856 1908
1909 register_qdisc(&pfifo_fast_ops);
1857 register_qdisc(&pfifo_qdisc_ops); 1910 register_qdisc(&pfifo_qdisc_ops);
1858 register_qdisc(&bfifo_qdisc_ops); 1911 register_qdisc(&bfifo_qdisc_ops);
1859 register_qdisc(&pfifo_head_drop_qdisc_ops); 1912 register_qdisc(&pfifo_head_drop_qdisc_ops);
diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c
index ef53ab8d0aae..ddd73cb2d7ba 100644
--- a/net/sched/sch_choke.c
+++ b/net/sched/sch_choke.c
@@ -438,7 +438,8 @@ static int choke_change(struct Qdisc *sch, struct nlattr *opt)
438 if (mask != q->tab_mask) { 438 if (mask != q->tab_mask) {
439 struct sk_buff **ntab; 439 struct sk_buff **ntab;
440 440
441 ntab = kcalloc(mask + 1, sizeof(struct sk_buff *), GFP_KERNEL); 441 ntab = kcalloc(mask + 1, sizeof(struct sk_buff *),
442 GFP_KERNEL | __GFP_NOWARN);
442 if (!ntab) 443 if (!ntab)
443 ntab = vzalloc((mask + 1) * sizeof(struct sk_buff *)); 444 ntab = vzalloc((mask + 1) * sizeof(struct sk_buff *));
444 if (!ntab) 445 if (!ntab)
diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
new file mode 100644
index 000000000000..32ad015ee8ce
--- /dev/null
+++ b/net/sched/sch_fq.c
@@ -0,0 +1,793 @@
1/*
2 * net/sched/sch_fq.c Fair Queue Packet Scheduler (per flow pacing)
3 *
4 * Copyright (C) 2013 Eric Dumazet <edumazet@google.com>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 * Meant to be mostly used for localy generated traffic :
12 * Fast classification depends on skb->sk being set before reaching us.
13 * If not, (router workload), we use rxhash as fallback, with 32 bits wide hash.
14 * All packets belonging to a socket are considered as a 'flow'.
15 *
16 * Flows are dynamically allocated and stored in a hash table of RB trees
17 * They are also part of one Round Robin 'queues' (new or old flows)
18 *
19 * Burst avoidance (aka pacing) capability :
20 *
21 * Transport (eg TCP) can set in sk->sk_pacing_rate a rate, enqueue a
22 * bunch of packets, and this packet scheduler adds delay between
23 * packets to respect rate limitation.
24 *
25 * enqueue() :
26 * - lookup one RB tree (out of 1024 or more) to find the flow.
27 * If non existent flow, create it, add it to the tree.
28 * Add skb to the per flow list of skb (fifo).
29 * - Use a special fifo for high prio packets
30 *
31 * dequeue() : serves flows in Round Robin
32 * Note : When a flow becomes empty, we do not immediately remove it from
33 * rb trees, for performance reasons (its expected to send additional packets,
34 * or SLAB cache will reuse socket for another flow)
35 */
36
37#include <linux/module.h>
38#include <linux/types.h>
39#include <linux/kernel.h>
40#include <linux/jiffies.h>
41#include <linux/string.h>
42#include <linux/in.h>
43#include <linux/errno.h>
44#include <linux/init.h>
45#include <linux/skbuff.h>
46#include <linux/slab.h>
47#include <linux/rbtree.h>
48#include <linux/hash.h>
49#include <linux/prefetch.h>
50#include <net/netlink.h>
51#include <net/pkt_sched.h>
52#include <net/sock.h>
53#include <net/tcp_states.h>
54
55/*
56 * Per flow structure, dynamically allocated
57 */
58struct fq_flow {
59 struct sk_buff *head; /* list of skbs for this flow : first skb */
60 union {
61 struct sk_buff *tail; /* last skb in the list */
62 unsigned long age; /* jiffies when flow was emptied, for gc */
63 };
64 struct rb_node fq_node; /* anchor in fq_root[] trees */
65 struct sock *sk;
66 int qlen; /* number of packets in flow queue */
67 int credit;
68 u32 socket_hash; /* sk_hash */
69 struct fq_flow *next; /* next pointer in RR lists, or &detached */
70
71 struct rb_node rate_node; /* anchor in q->delayed tree */
72 u64 time_next_packet;
73};
74
75struct fq_flow_head {
76 struct fq_flow *first;
77 struct fq_flow *last;
78};
79
80struct fq_sched_data {
81 struct fq_flow_head new_flows;
82
83 struct fq_flow_head old_flows;
84
85 struct rb_root delayed; /* for rate limited flows */
86 u64 time_next_delayed_flow;
87
88 struct fq_flow internal; /* for non classified or high prio packets */
89 u32 quantum;
90 u32 initial_quantum;
91 u32 flow_default_rate;/* rate per flow : bytes per second */
92 u32 flow_max_rate; /* optional max rate per flow */
93 u32 flow_plimit; /* max packets per flow */
94 struct rb_root *fq_root;
95 u8 rate_enable;
96 u8 fq_trees_log;
97
98 u32 flows;
99 u32 inactive_flows;
100 u32 throttled_flows;
101
102 u64 stat_gc_flows;
103 u64 stat_internal_packets;
104 u64 stat_tcp_retrans;
105 u64 stat_throttled;
106 u64 stat_flows_plimit;
107 u64 stat_pkts_too_long;
108 u64 stat_allocation_errors;
109 struct qdisc_watchdog watchdog;
110};
111
112/* special value to mark a detached flow (not on old/new list) */
113static struct fq_flow detached, throttled;
114
115static void fq_flow_set_detached(struct fq_flow *f)
116{
117 f->next = &detached;
118}
119
120static bool fq_flow_is_detached(const struct fq_flow *f)
121{
122 return f->next == &detached;
123}
124
125static void fq_flow_set_throttled(struct fq_sched_data *q, struct fq_flow *f)
126{
127 struct rb_node **p = &q->delayed.rb_node, *parent = NULL;
128
129 while (*p) {
130 struct fq_flow *aux;
131
132 parent = *p;
133 aux = container_of(parent, struct fq_flow, rate_node);
134 if (f->time_next_packet >= aux->time_next_packet)
135 p = &parent->rb_right;
136 else
137 p = &parent->rb_left;
138 }
139 rb_link_node(&f->rate_node, parent, p);
140 rb_insert_color(&f->rate_node, &q->delayed);
141 q->throttled_flows++;
142 q->stat_throttled++;
143
144 f->next = &throttled;
145 if (q->time_next_delayed_flow > f->time_next_packet)
146 q->time_next_delayed_flow = f->time_next_packet;
147}
148
149
150static struct kmem_cache *fq_flow_cachep __read_mostly;
151
152static void fq_flow_add_tail(struct fq_flow_head *head, struct fq_flow *flow)
153{
154 if (head->first)
155 head->last->next = flow;
156 else
157 head->first = flow;
158 head->last = flow;
159 flow->next = NULL;
160}
161
162/* limit number of collected flows per round */
163#define FQ_GC_MAX 8
164#define FQ_GC_AGE (3*HZ)
165
166static bool fq_gc_candidate(const struct fq_flow *f)
167{
168 return fq_flow_is_detached(f) &&
169 time_after(jiffies, f->age + FQ_GC_AGE);
170}
171
172static void fq_gc(struct fq_sched_data *q,
173 struct rb_root *root,
174 struct sock *sk)
175{
176 struct fq_flow *f, *tofree[FQ_GC_MAX];
177 struct rb_node **p, *parent;
178 int fcnt = 0;
179
180 p = &root->rb_node;
181 parent = NULL;
182 while (*p) {
183 parent = *p;
184
185 f = container_of(parent, struct fq_flow, fq_node);
186 if (f->sk == sk)
187 break;
188
189 if (fq_gc_candidate(f)) {
190 tofree[fcnt++] = f;
191 if (fcnt == FQ_GC_MAX)
192 break;
193 }
194
195 if (f->sk > sk)
196 p = &parent->rb_right;
197 else
198 p = &parent->rb_left;
199 }
200
201 q->flows -= fcnt;
202 q->inactive_flows -= fcnt;
203 q->stat_gc_flows += fcnt;
204 while (fcnt) {
205 struct fq_flow *f = tofree[--fcnt];
206
207 rb_erase(&f->fq_node, root);
208 kmem_cache_free(fq_flow_cachep, f);
209 }
210}
211
212static const u8 prio2band[TC_PRIO_MAX + 1] = {
213 1, 2, 2, 2, 1, 2, 0, 0 , 1, 1, 1, 1, 1, 1, 1, 1
214};
215
216static struct fq_flow *fq_classify(struct sk_buff *skb, struct fq_sched_data *q)
217{
218 struct rb_node **p, *parent;
219 struct sock *sk = skb->sk;
220 struct rb_root *root;
221 struct fq_flow *f;
222 int band;
223
224 /* warning: no starvation prevention... */
225 band = prio2band[skb->priority & TC_PRIO_MAX];
226 if (unlikely(band == 0))
227 return &q->internal;
228
229 if (unlikely(!sk)) {
230 /* By forcing low order bit to 1, we make sure to not
231 * collide with a local flow (socket pointers are word aligned)
232 */
233 sk = (struct sock *)(skb_get_rxhash(skb) | 1L);
234 }
235
236 root = &q->fq_root[hash_32((u32)(long)sk, q->fq_trees_log)];
237
238 if (q->flows >= (2U << q->fq_trees_log) &&
239 q->inactive_flows > q->flows/2)
240 fq_gc(q, root, sk);
241
242 p = &root->rb_node;
243 parent = NULL;
244 while (*p) {
245 parent = *p;
246
247 f = container_of(parent, struct fq_flow, fq_node);
248 if (f->sk == sk) {
249 /* socket might have been reallocated, so check
250 * if its sk_hash is the same.
251 * It not, we need to refill credit with
252 * initial quantum
253 */
254 if (unlikely(skb->sk &&
255 f->socket_hash != sk->sk_hash)) {
256 f->credit = q->initial_quantum;
257 f->socket_hash = sk->sk_hash;
258 }
259 return f;
260 }
261 if (f->sk > sk)
262 p = &parent->rb_right;
263 else
264 p = &parent->rb_left;
265 }
266
267 f = kmem_cache_zalloc(fq_flow_cachep, GFP_ATOMIC | __GFP_NOWARN);
268 if (unlikely(!f)) {
269 q->stat_allocation_errors++;
270 return &q->internal;
271 }
272 fq_flow_set_detached(f);
273 f->sk = sk;
274 if (skb->sk)
275 f->socket_hash = sk->sk_hash;
276 f->credit = q->initial_quantum;
277
278 rb_link_node(&f->fq_node, parent, p);
279 rb_insert_color(&f->fq_node, root);
280
281 q->flows++;
282 q->inactive_flows++;
283 return f;
284}
285
286
287/* remove one skb from head of flow queue */
288static struct sk_buff *fq_dequeue_head(struct fq_flow *flow)
289{
290 struct sk_buff *skb = flow->head;
291
292 if (skb) {
293 flow->head = skb->next;
294 skb->next = NULL;
295 flow->qlen--;
296 }
297 return skb;
298}
299
300/* We might add in the future detection of retransmits
301 * For the time being, just return false
302 */
303static bool skb_is_retransmit(struct sk_buff *skb)
304{
305 return false;
306}
307
308/* add skb to flow queue
309 * flow queue is a linked list, kind of FIFO, except for TCP retransmits
310 * We special case tcp retransmits to be transmitted before other packets.
311 * We rely on fact that TCP retransmits are unlikely, so we do not waste
312 * a separate queue or a pointer.
313 * head-> [retrans pkt 1]
314 * [retrans pkt 2]
315 * [ normal pkt 1]
316 * [ normal pkt 2]
317 * [ normal pkt 3]
318 * tail-> [ normal pkt 4]
319 */
320static void flow_queue_add(struct fq_flow *flow, struct sk_buff *skb)
321{
322 struct sk_buff *prev, *head = flow->head;
323
324 skb->next = NULL;
325 if (!head) {
326 flow->head = skb;
327 flow->tail = skb;
328 return;
329 }
330 if (likely(!skb_is_retransmit(skb))) {
331 flow->tail->next = skb;
332 flow->tail = skb;
333 return;
334 }
335
336 /* This skb is a tcp retransmit,
337 * find the last retrans packet in the queue
338 */
339 prev = NULL;
340 while (skb_is_retransmit(head)) {
341 prev = head;
342 head = head->next;
343 if (!head)
344 break;
345 }
346 if (!prev) { /* no rtx packet in queue, become the new head */
347 skb->next = flow->head;
348 flow->head = skb;
349 } else {
350 if (prev == flow->tail)
351 flow->tail = skb;
352 else
353 skb->next = prev->next;
354 prev->next = skb;
355 }
356}
357
358static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
359{
360 struct fq_sched_data *q = qdisc_priv(sch);
361 struct fq_flow *f;
362
363 if (unlikely(sch->q.qlen >= sch->limit))
364 return qdisc_drop(skb, sch);
365
366 f = fq_classify(skb, q);
367 if (unlikely(f->qlen >= q->flow_plimit && f != &q->internal)) {
368 q->stat_flows_plimit++;
369 return qdisc_drop(skb, sch);
370 }
371
372 f->qlen++;
373 flow_queue_add(f, skb);
374 if (skb_is_retransmit(skb))
375 q->stat_tcp_retrans++;
376 sch->qstats.backlog += qdisc_pkt_len(skb);
377 if (fq_flow_is_detached(f)) {
378 fq_flow_add_tail(&q->new_flows, f);
379 if (q->quantum > f->credit)
380 f->credit = q->quantum;
381 q->inactive_flows--;
382 qdisc_unthrottled(sch);
383 }
384 if (unlikely(f == &q->internal)) {
385 q->stat_internal_packets++;
386 qdisc_unthrottled(sch);
387 }
388 sch->q.qlen++;
389
390 return NET_XMIT_SUCCESS;
391}
392
393static void fq_check_throttled(struct fq_sched_data *q, u64 now)
394{
395 struct rb_node *p;
396
397 if (q->time_next_delayed_flow > now)
398 return;
399
400 q->time_next_delayed_flow = ~0ULL;
401 while ((p = rb_first(&q->delayed)) != NULL) {
402 struct fq_flow *f = container_of(p, struct fq_flow, rate_node);
403
404 if (f->time_next_packet > now) {
405 q->time_next_delayed_flow = f->time_next_packet;
406 break;
407 }
408 rb_erase(p, &q->delayed);
409 q->throttled_flows--;
410 fq_flow_add_tail(&q->old_flows, f);
411 }
412}
413
414static struct sk_buff *fq_dequeue(struct Qdisc *sch)
415{
416 struct fq_sched_data *q = qdisc_priv(sch);
417 u64 now = ktime_to_ns(ktime_get());
418 struct fq_flow_head *head;
419 struct sk_buff *skb;
420 struct fq_flow *f;
421
422 skb = fq_dequeue_head(&q->internal);
423 if (skb)
424 goto out;
425 fq_check_throttled(q, now);
426begin:
427 head = &q->new_flows;
428 if (!head->first) {
429 head = &q->old_flows;
430 if (!head->first) {
431 if (q->time_next_delayed_flow != ~0ULL)
432 qdisc_watchdog_schedule_ns(&q->watchdog,
433 q->time_next_delayed_flow);
434 return NULL;
435 }
436 }
437 f = head->first;
438
439 if (f->credit <= 0) {
440 f->credit += q->quantum;
441 head->first = f->next;
442 fq_flow_add_tail(&q->old_flows, f);
443 goto begin;
444 }
445
446 if (unlikely(f->head && now < f->time_next_packet)) {
447 head->first = f->next;
448 fq_flow_set_throttled(q, f);
449 goto begin;
450 }
451
452 skb = fq_dequeue_head(f);
453 if (!skb) {
454 head->first = f->next;
455 /* force a pass through old_flows to prevent starvation */
456 if ((head == &q->new_flows) && q->old_flows.first) {
457 fq_flow_add_tail(&q->old_flows, f);
458 } else {
459 fq_flow_set_detached(f);
460 f->age = jiffies;
461 q->inactive_flows++;
462 }
463 goto begin;
464 }
465 prefetch(&skb->end);
466 f->time_next_packet = now;
467 f->credit -= qdisc_pkt_len(skb);
468
469 if (f->credit <= 0 &&
470 q->rate_enable &&
471 skb->sk && skb->sk->sk_state != TCP_TIME_WAIT) {
472 u32 rate = skb->sk->sk_pacing_rate ?: q->flow_default_rate;
473
474 rate = min(rate, q->flow_max_rate);
475 if (rate) {
476 u64 len = (u64)qdisc_pkt_len(skb) * NSEC_PER_SEC;
477
478 do_div(len, rate);
479 /* Since socket rate can change later,
480 * clamp the delay to 125 ms.
481 * TODO: maybe segment the too big skb, as in commit
482 * e43ac79a4bc ("sch_tbf: segment too big GSO packets")
483 */
484 if (unlikely(len > 125 * NSEC_PER_MSEC)) {
485 len = 125 * NSEC_PER_MSEC;
486 q->stat_pkts_too_long++;
487 }
488
489 f->time_next_packet = now + len;
490 }
491 }
492out:
493 sch->qstats.backlog -= qdisc_pkt_len(skb);
494 qdisc_bstats_update(sch, skb);
495 sch->q.qlen--;
496 qdisc_unthrottled(sch);
497 return skb;
498}
499
500static void fq_reset(struct Qdisc *sch)
501{
502 struct sk_buff *skb;
503
504 while ((skb = fq_dequeue(sch)) != NULL)
505 kfree_skb(skb);
506}
507
508static void fq_rehash(struct fq_sched_data *q,
509 struct rb_root *old_array, u32 old_log,
510 struct rb_root *new_array, u32 new_log)
511{
512 struct rb_node *op, **np, *parent;
513 struct rb_root *oroot, *nroot;
514 struct fq_flow *of, *nf;
515 int fcnt = 0;
516 u32 idx;
517
518 for (idx = 0; idx < (1U << old_log); idx++) {
519 oroot = &old_array[idx];
520 while ((op = rb_first(oroot)) != NULL) {
521 rb_erase(op, oroot);
522 of = container_of(op, struct fq_flow, fq_node);
523 if (fq_gc_candidate(of)) {
524 fcnt++;
525 kmem_cache_free(fq_flow_cachep, of);
526 continue;
527 }
528 nroot = &new_array[hash_32((u32)(long)of->sk, new_log)];
529
530 np = &nroot->rb_node;
531 parent = NULL;
532 while (*np) {
533 parent = *np;
534
535 nf = container_of(parent, struct fq_flow, fq_node);
536 BUG_ON(nf->sk == of->sk);
537
538 if (nf->sk > of->sk)
539 np = &parent->rb_right;
540 else
541 np = &parent->rb_left;
542 }
543
544 rb_link_node(&of->fq_node, parent, np);
545 rb_insert_color(&of->fq_node, nroot);
546 }
547 }
548 q->flows -= fcnt;
549 q->inactive_flows -= fcnt;
550 q->stat_gc_flows += fcnt;
551}
552
553static int fq_resize(struct fq_sched_data *q, u32 log)
554{
555 struct rb_root *array;
556 u32 idx;
557
558 if (q->fq_root && log == q->fq_trees_log)
559 return 0;
560
561 array = kmalloc(sizeof(struct rb_root) << log, GFP_KERNEL);
562 if (!array)
563 return -ENOMEM;
564
565 for (idx = 0; idx < (1U << log); idx++)
566 array[idx] = RB_ROOT;
567
568 if (q->fq_root) {
569 fq_rehash(q, q->fq_root, q->fq_trees_log, array, log);
570 kfree(q->fq_root);
571 }
572 q->fq_root = array;
573 q->fq_trees_log = log;
574
575 return 0;
576}
577
578static const struct nla_policy fq_policy[TCA_FQ_MAX + 1] = {
579 [TCA_FQ_PLIMIT] = { .type = NLA_U32 },
580 [TCA_FQ_FLOW_PLIMIT] = { .type = NLA_U32 },
581 [TCA_FQ_QUANTUM] = { .type = NLA_U32 },
582 [TCA_FQ_INITIAL_QUANTUM] = { .type = NLA_U32 },
583 [TCA_FQ_RATE_ENABLE] = { .type = NLA_U32 },
584 [TCA_FQ_FLOW_DEFAULT_RATE] = { .type = NLA_U32 },
585 [TCA_FQ_FLOW_MAX_RATE] = { .type = NLA_U32 },
586 [TCA_FQ_BUCKETS_LOG] = { .type = NLA_U32 },
587};
588
589static int fq_change(struct Qdisc *sch, struct nlattr *opt)
590{
591 struct fq_sched_data *q = qdisc_priv(sch);
592 struct nlattr *tb[TCA_FQ_MAX + 1];
593 int err, drop_count = 0;
594 u32 fq_log;
595
596 if (!opt)
597 return -EINVAL;
598
599 err = nla_parse_nested(tb, TCA_FQ_MAX, opt, fq_policy);
600 if (err < 0)
601 return err;
602
603 sch_tree_lock(sch);
604
605 fq_log = q->fq_trees_log;
606
607 if (tb[TCA_FQ_BUCKETS_LOG]) {
608 u32 nval = nla_get_u32(tb[TCA_FQ_BUCKETS_LOG]);
609
610 if (nval >= 1 && nval <= ilog2(256*1024))
611 fq_log = nval;
612 else
613 err = -EINVAL;
614 }
615 if (tb[TCA_FQ_PLIMIT])
616 sch->limit = nla_get_u32(tb[TCA_FQ_PLIMIT]);
617
618 if (tb[TCA_FQ_FLOW_PLIMIT])
619 q->flow_plimit = nla_get_u32(tb[TCA_FQ_FLOW_PLIMIT]);
620
621 if (tb[TCA_FQ_QUANTUM])
622 q->quantum = nla_get_u32(tb[TCA_FQ_QUANTUM]);
623
624 if (tb[TCA_FQ_INITIAL_QUANTUM])
625 q->quantum = nla_get_u32(tb[TCA_FQ_INITIAL_QUANTUM]);
626
627 if (tb[TCA_FQ_FLOW_DEFAULT_RATE])
628 q->flow_default_rate = nla_get_u32(tb[TCA_FQ_FLOW_DEFAULT_RATE]);
629
630 if (tb[TCA_FQ_FLOW_MAX_RATE])
631 q->flow_max_rate = nla_get_u32(tb[TCA_FQ_FLOW_MAX_RATE]);
632
633 if (tb[TCA_FQ_RATE_ENABLE]) {
634 u32 enable = nla_get_u32(tb[TCA_FQ_RATE_ENABLE]);
635
636 if (enable <= 1)
637 q->rate_enable = enable;
638 else
639 err = -EINVAL;
640 }
641
642 if (!err)
643 err = fq_resize(q, fq_log);
644
645 while (sch->q.qlen > sch->limit) {
646 struct sk_buff *skb = fq_dequeue(sch);
647
648 kfree_skb(skb);
649 drop_count++;
650 }
651 qdisc_tree_decrease_qlen(sch, drop_count);
652
653 sch_tree_unlock(sch);
654 return err;
655}
656
657static void fq_destroy(struct Qdisc *sch)
658{
659 struct fq_sched_data *q = qdisc_priv(sch);
660 struct rb_root *root;
661 struct rb_node *p;
662 unsigned int idx;
663
664 if (q->fq_root) {
665 for (idx = 0; idx < (1U << q->fq_trees_log); idx++) {
666 root = &q->fq_root[idx];
667 while ((p = rb_first(root)) != NULL) {
668 rb_erase(p, root);
669 kmem_cache_free(fq_flow_cachep,
670 container_of(p, struct fq_flow, fq_node));
671 }
672 }
673 kfree(q->fq_root);
674 }
675 qdisc_watchdog_cancel(&q->watchdog);
676}
677
678static int fq_init(struct Qdisc *sch, struct nlattr *opt)
679{
680 struct fq_sched_data *q = qdisc_priv(sch);
681 int err;
682
683 sch->limit = 10000;
684 q->flow_plimit = 100;
685 q->quantum = 2 * psched_mtu(qdisc_dev(sch));
686 q->initial_quantum = 10 * psched_mtu(qdisc_dev(sch));
687 q->flow_default_rate = 0;
688 q->flow_max_rate = ~0U;
689 q->rate_enable = 1;
690 q->new_flows.first = NULL;
691 q->old_flows.first = NULL;
692 q->delayed = RB_ROOT;
693 q->fq_root = NULL;
694 q->fq_trees_log = ilog2(1024);
695 qdisc_watchdog_init(&q->watchdog, sch);
696
697 if (opt)
698 err = fq_change(sch, opt);
699 else
700 err = fq_resize(q, q->fq_trees_log);
701
702 return err;
703}
704
705static int fq_dump(struct Qdisc *sch, struct sk_buff *skb)
706{
707 struct fq_sched_data *q = qdisc_priv(sch);
708 struct nlattr *opts;
709
710 opts = nla_nest_start(skb, TCA_OPTIONS);
711 if (opts == NULL)
712 goto nla_put_failure;
713
714 if (nla_put_u32(skb, TCA_FQ_PLIMIT, sch->limit) ||
715 nla_put_u32(skb, TCA_FQ_FLOW_PLIMIT, q->flow_plimit) ||
716 nla_put_u32(skb, TCA_FQ_QUANTUM, q->quantum) ||
717 nla_put_u32(skb, TCA_FQ_INITIAL_QUANTUM, q->initial_quantum) ||
718 nla_put_u32(skb, TCA_FQ_RATE_ENABLE, q->rate_enable) ||
719 nla_put_u32(skb, TCA_FQ_FLOW_DEFAULT_RATE, q->flow_default_rate) ||
720 nla_put_u32(skb, TCA_FQ_FLOW_MAX_RATE, q->flow_max_rate) ||
721 nla_put_u32(skb, TCA_FQ_BUCKETS_LOG, q->fq_trees_log))
722 goto nla_put_failure;
723
724 nla_nest_end(skb, opts);
725 return skb->len;
726
727nla_put_failure:
728 return -1;
729}
730
731static int fq_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
732{
733 struct fq_sched_data *q = qdisc_priv(sch);
734 u64 now = ktime_to_ns(ktime_get());
735 struct tc_fq_qd_stats st = {
736 .gc_flows = q->stat_gc_flows,
737 .highprio_packets = q->stat_internal_packets,
738 .tcp_retrans = q->stat_tcp_retrans,
739 .throttled = q->stat_throttled,
740 .flows_plimit = q->stat_flows_plimit,
741 .pkts_too_long = q->stat_pkts_too_long,
742 .allocation_errors = q->stat_allocation_errors,
743 .flows = q->flows,
744 .inactive_flows = q->inactive_flows,
745 .throttled_flows = q->throttled_flows,
746 .time_next_delayed_flow = q->time_next_delayed_flow - now,
747 };
748
749 return gnet_stats_copy_app(d, &st, sizeof(st));
750}
751
752static struct Qdisc_ops fq_qdisc_ops __read_mostly = {
753 .id = "fq",
754 .priv_size = sizeof(struct fq_sched_data),
755
756 .enqueue = fq_enqueue,
757 .dequeue = fq_dequeue,
758 .peek = qdisc_peek_dequeued,
759 .init = fq_init,
760 .reset = fq_reset,
761 .destroy = fq_destroy,
762 .change = fq_change,
763 .dump = fq_dump,
764 .dump_stats = fq_dump_stats,
765 .owner = THIS_MODULE,
766};
767
768static int __init fq_module_init(void)
769{
770 int ret;
771
772 fq_flow_cachep = kmem_cache_create("fq_flow_cache",
773 sizeof(struct fq_flow),
774 0, 0, NULL);
775 if (!fq_flow_cachep)
776 return -ENOMEM;
777
778 ret = register_qdisc(&fq_qdisc_ops);
779 if (ret)
780 kmem_cache_destroy(fq_flow_cachep);
781 return ret;
782}
783
784static void __exit fq_module_exit(void)
785{
786 unregister_qdisc(&fq_qdisc_ops);
787 kmem_cache_destroy(fq_flow_cachep);
788}
789
790module_init(fq_module_init)
791module_exit(fq_module_exit)
792MODULE_AUTHOR("Eric Dumazet");
793MODULE_LICENSE("GPL");
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 48be3d5c0d92..a74e278654aa 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -30,6 +30,10 @@
30#include <net/pkt_sched.h> 30#include <net/pkt_sched.h>
31#include <net/dst.h> 31#include <net/dst.h>
32 32
33/* Qdisc to use by default */
34const struct Qdisc_ops *default_qdisc_ops = &pfifo_fast_ops;
35EXPORT_SYMBOL(default_qdisc_ops);
36
33/* Main transmission queue. */ 37/* Main transmission queue. */
34 38
35/* Modifications to data participating in scheduling must be protected with 39/* Modifications to data participating in scheduling must be protected with
@@ -530,12 +534,11 @@ struct Qdisc_ops pfifo_fast_ops __read_mostly = {
530 .dump = pfifo_fast_dump, 534 .dump = pfifo_fast_dump,
531 .owner = THIS_MODULE, 535 .owner = THIS_MODULE,
532}; 536};
533EXPORT_SYMBOL(pfifo_fast_ops);
534 537
535static struct lock_class_key qdisc_tx_busylock; 538static struct lock_class_key qdisc_tx_busylock;
536 539
537struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, 540struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
538 struct Qdisc_ops *ops) 541 const struct Qdisc_ops *ops)
539{ 542{
540 void *p; 543 void *p;
541 struct Qdisc *sch; 544 struct Qdisc *sch;
@@ -579,10 +582,14 @@ errout:
579} 582}
580 583
581struct Qdisc *qdisc_create_dflt(struct netdev_queue *dev_queue, 584struct Qdisc *qdisc_create_dflt(struct netdev_queue *dev_queue,
582 struct Qdisc_ops *ops, unsigned int parentid) 585 const struct Qdisc_ops *ops,
586 unsigned int parentid)
583{ 587{
584 struct Qdisc *sch; 588 struct Qdisc *sch;
585 589
590 if (!try_module_get(ops->owner))
591 goto errout;
592
586 sch = qdisc_alloc(dev_queue, ops); 593 sch = qdisc_alloc(dev_queue, ops);
587 if (IS_ERR(sch)) 594 if (IS_ERR(sch))
588 goto errout; 595 goto errout;
@@ -686,7 +693,7 @@ static void attach_one_default_qdisc(struct net_device *dev,
686 693
687 if (dev->tx_queue_len) { 694 if (dev->tx_queue_len) {
688 qdisc = qdisc_create_dflt(dev_queue, 695 qdisc = qdisc_create_dflt(dev_queue,
689 &pfifo_fast_ops, TC_H_ROOT); 696 default_qdisc_ops, TC_H_ROOT);
690 if (!qdisc) { 697 if (!qdisc) {
691 netdev_info(dev, "activation failed\n"); 698 netdev_info(dev, "activation failed\n");
692 return; 699 return;
@@ -739,9 +746,8 @@ void dev_activate(struct net_device *dev)
739 int need_watchdog; 746 int need_watchdog;
740 747
741 /* No queueing discipline is attached to device; 748 /* No queueing discipline is attached to device;
742 create default one i.e. pfifo_fast for devices, 749 * create default one for devices, which need queueing
743 which need queueing and noqueue_qdisc for 750 * and noqueue_qdisc for virtual interfaces
744 virtual interfaces
745 */ 751 */
746 752
747 if (dev->qdisc == &noop_qdisc) 753 if (dev->qdisc == &noop_qdisc)
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index c2178b15ca6e..863846cc5513 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -1495,7 +1495,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
1495 psched_ratecfg_precompute(&cl->ceil, &hopt->ceil); 1495 psched_ratecfg_precompute(&cl->ceil, &hopt->ceil);
1496 1496
1497 cl->buffer = PSCHED_TICKS2NS(hopt->buffer); 1497 cl->buffer = PSCHED_TICKS2NS(hopt->buffer);
1498 cl->cbuffer = PSCHED_TICKS2NS(hopt->buffer); 1498 cl->cbuffer = PSCHED_TICKS2NS(hopt->cbuffer);
1499 1499
1500 sch_tree_unlock(sch); 1500 sch_tree_unlock(sch);
1501 1501
diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c
index 5da78a19ac9a..2e56185736d6 100644
--- a/net/sched/sch_mq.c
+++ b/net/sched/sch_mq.c
@@ -57,7 +57,7 @@ static int mq_init(struct Qdisc *sch, struct nlattr *opt)
57 57
58 for (ntx = 0; ntx < dev->num_tx_queues; ntx++) { 58 for (ntx = 0; ntx < dev->num_tx_queues; ntx++) {
59 dev_queue = netdev_get_tx_queue(dev, ntx); 59 dev_queue = netdev_get_tx_queue(dev, ntx);
60 qdisc = qdisc_create_dflt(dev_queue, &pfifo_fast_ops, 60 qdisc = qdisc_create_dflt(dev_queue, default_qdisc_ops,
61 TC_H_MAKE(TC_H_MAJ(sch->handle), 61 TC_H_MAKE(TC_H_MAJ(sch->handle),
62 TC_H_MIN(ntx + 1))); 62 TC_H_MIN(ntx + 1)));
63 if (qdisc == NULL) 63 if (qdisc == NULL)
diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c
index accec33c454c..d44c868cb537 100644
--- a/net/sched/sch_mqprio.c
+++ b/net/sched/sch_mqprio.c
@@ -124,7 +124,7 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt)
124 124
125 for (i = 0; i < dev->num_tx_queues; i++) { 125 for (i = 0; i < dev->num_tx_queues; i++) {
126 dev_queue = netdev_get_tx_queue(dev, i); 126 dev_queue = netdev_get_tx_queue(dev, i);
127 qdisc = qdisc_create_dflt(dev_queue, &pfifo_fast_ops, 127 qdisc = qdisc_create_dflt(dev_queue, default_qdisc_ops,
128 TC_H_MAKE(TC_H_MAJ(sch->handle), 128 TC_H_MAKE(TC_H_MAJ(sch->handle),
129 TC_H_MIN(i + 1))); 129 TC_H_MIN(i + 1)));
130 if (qdisc == NULL) { 130 if (qdisc == NULL) {
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 82f6016d89ab..a6d788d45216 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -412,12 +412,9 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
412 412
413 /* If a delay is expected, orphan the skb. (orphaning usually takes 413 /* If a delay is expected, orphan the skb. (orphaning usually takes
414 * place at TX completion time, so _before_ the link transit delay) 414 * place at TX completion time, so _before_ the link transit delay)
415 * Ideally, this orphaning should be done after the rate limiting
416 * module, because this breaks TCP Small Queue, and other mechanisms
417 * based on socket sk_wmem_alloc.
418 */ 415 */
419 if (q->latency || q->jitter) 416 if (q->latency || q->jitter)
420 skb_orphan(skb); 417 skb_orphan_partial(skb);
421 418
422 /* 419 /*
423 * If we need to duplicate packet, then re-insert at top of the 420 * If we need to duplicate packet, then re-insert at top of the
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index ab67efc64b24..cef509985192 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -28,10 +28,7 @@
28 * 28 *
29 * Please send any bug reports or fixes you make to the 29 * Please send any bug reports or fixes you make to the
30 * email address(es): 30 * email address(es):
31 * lksctp developers <lksctp-developers@lists.sourceforge.net> 31 * lksctp developers <linux-sctp@vger.kernel.org>
32 *
33 * Or submit a bug report through the following website:
34 * http://www.sf.net/projects/lksctp
35 * 32 *
36 * Written or modified by: 33 * Written or modified by:
37 * La Monte H.P. Yarroll <piggy@acm.org> 34 * La Monte H.P. Yarroll <piggy@acm.org>
@@ -43,9 +40,6 @@
43 * Daisy Chang <daisyc@us.ibm.com> 40 * Daisy Chang <daisyc@us.ibm.com>
44 * Ryan Layer <rmlayer@us.ibm.com> 41 * Ryan Layer <rmlayer@us.ibm.com>
45 * Kevin Gao <kevin.gao@intel.com> 42 * Kevin Gao <kevin.gao@intel.com>
46 *
47 * Any bugs reported given to us we will try to fix... any fixes shared will
48 * be incorporated into the next SCTP release.
49 */ 43 */
50 44
51#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 45#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
diff --git a/net/sctp/auth.c b/net/sctp/auth.c
index ba1dfc3f8def..8c4fa5dec824 100644
--- a/net/sctp/auth.c
+++ b/net/sctp/auth.c
@@ -22,16 +22,10 @@
22 * 22 *
23 * Please send any bug reports or fixes you make to the 23 * Please send any bug reports or fixes you make to the
24 * email address(es): 24 * email address(es):
25 * lksctp developers <lksctp-developers@lists.sourceforge.net> 25 * lksctp developers <linux-sctp@vger.kernel.org>
26 *
27 * Or submit a bug report through the following website:
28 * http://www.sf.net/projects/lksctp
29 * 26 *
30 * Written or modified by: 27 * Written or modified by:
31 * Vlad Yasevich <vladislav.yasevich@hp.com> 28 * Vlad Yasevich <vladislav.yasevich@hp.com>
32 *
33 * Any bugs reported given to us we will try to fix... any fixes shared will
34 * be incorporated into the next SCTP release.
35 */ 29 */
36 30
37#include <linux/slab.h> 31#include <linux/slab.h>
diff --git a/net/sctp/bind_addr.c b/net/sctp/bind_addr.c
index 64977ea0f9c5..077bb070052b 100644
--- a/net/sctp/bind_addr.c
+++ b/net/sctp/bind_addr.c
@@ -27,19 +27,13 @@
27 * 27 *
28 * Please send any bug reports or fixes you make to the 28 * Please send any bug reports or fixes you make to the
29 * email address(es): 29 * email address(es):
30 * lksctp developers <lksctp-developers@lists.sourceforge.net> 30 * lksctp developers <linux-sctp@vger.kernel.org>
31 *
32 * Or submit a bug report through the following website:
33 * http://www.sf.net/projects/lksctp
34 * 31 *
35 * Written or modified by: 32 * Written or modified by:
36 * La Monte H.P. Yarroll <piggy@acm.org> 33 * La Monte H.P. Yarroll <piggy@acm.org>
37 * Karl Knutson <karl@athena.chicago.il.us> 34 * Karl Knutson <karl@athena.chicago.il.us>
38 * Jon Grimm <jgrimm@us.ibm.com> 35 * Jon Grimm <jgrimm@us.ibm.com>
39 * Daisy Chang <daisyc@us.ibm.com> 36 * Daisy Chang <daisyc@us.ibm.com>
40 *
41 * Any bugs reported given to us we will try to fix... any fixes shared will
42 * be incorporated into the next SCTP release.
43 */ 37 */
44 38
45#include <linux/types.h> 39#include <linux/types.h>
diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c
index 5780565f5b7d..7bd5ed4a8657 100644
--- a/net/sctp/chunk.c
+++ b/net/sctp/chunk.c
@@ -24,17 +24,11 @@
24 * 24 *
25 * Please send any bug reports or fixes you make to the 25 * Please send any bug reports or fixes you make to the
26 * email address(es): 26 * email address(es):
27 * lksctp developers <lksctp-developers@lists.sourceforge.net> 27 * lksctp developers <linux-sctp@vger.kernel.org>
28 *
29 * Or submit a bug report through the following website:
30 * http://www.sf.net/projects/lksctp
31 * 28 *
32 * Written or modified by: 29 * Written or modified by:
33 * Jon Grimm <jgrimm@us.ibm.com> 30 * Jon Grimm <jgrimm@us.ibm.com>
34 * Sridhar Samudrala <sri@us.ibm.com> 31 * Sridhar Samudrala <sri@us.ibm.com>
35 *
36 * Any bugs reported given to us we will try to fix... any fixes shared will
37 * be incorporated into the next SCTP release.
38 */ 32 */
39 33
40#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 34#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -201,9 +195,9 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc,
201 /* This is the biggest possible DATA chunk that can fit into 195 /* This is the biggest possible DATA chunk that can fit into
202 * the packet 196 * the packet
203 */ 197 */
204 max_data = asoc->pathmtu - 198 max_data = (asoc->pathmtu -
205 sctp_sk(asoc->base.sk)->pf->af->net_header_len - 199 sctp_sk(asoc->base.sk)->pf->af->net_header_len -
206 sizeof(struct sctphdr) - sizeof(struct sctp_data_chunk); 200 sizeof(struct sctphdr) - sizeof(struct sctp_data_chunk)) & ~3;
207 201
208 max = asoc->frag_point; 202 max = asoc->frag_point;
209 /* If the the peer requested that we authenticate DATA chunks 203 /* If the the peer requested that we authenticate DATA chunks
diff --git a/net/sctp/command.c b/net/sctp/command.c
index c0044019db9e..3d9a9ff69c03 100644
--- a/net/sctp/command.c
+++ b/net/sctp/command.c
@@ -25,17 +25,11 @@
25 * 25 *
26 * Please send any bug reports or fixes you make to the 26 * Please send any bug reports or fixes you make to the
27 * email address(es): 27 * email address(es):
28 * lksctp developers <lksctp-developers@lists.sourceforge.net> 28 * lksctp developers <linux-sctp@vger.kernel.org>
29 *
30 * Or submit a bug report through the following website:
31 * http://www.sf.net/projects/lksctp
32 * 29 *
33 * Written or modified by: 30 * Written or modified by:
34 * La Monte H.P. Yarroll <piggy@acm.org> 31 * La Monte H.P. Yarroll <piggy@acm.org>
35 * Karl Knutson <karl@athena.chicago.il.us> 32 * Karl Knutson <karl@athena.chicago.il.us>
36 *
37 * Any bugs reported given to us we will try to fix... any fixes shared will
38 * be incorporated into the next SCTP release.
39 */ 33 */
40 34
41#include <linux/types.h> 35#include <linux/types.h>
diff --git a/net/sctp/debug.c b/net/sctp/debug.c
index f4998780d6df..e89015d8935a 100644
--- a/net/sctp/debug.c
+++ b/net/sctp/debug.c
@@ -28,10 +28,7 @@
28 * 28 *
29 * Please send any bug reports or fixes you make to the 29 * Please send any bug reports or fixes you make to the
30 * email address(es): 30 * email address(es):
31 * lksctp developers <lksctp-developers@lists.sourceforge.net> 31 * lksctp developers <linux-sctp@vger.kernel.org>
32 *
33 * Or submit a bug report through the following website:
34 * http://www.sf.net/projects/lksctp
35 * 32 *
36 * Written or modified by: 33 * Written or modified by:
37 * La Monte H.P. Yarroll <piggy@acm.org> 34 * La Monte H.P. Yarroll <piggy@acm.org>
@@ -40,9 +37,6 @@
40 * Jon Grimm <jgrimm@us.ibm.com> 37 * Jon Grimm <jgrimm@us.ibm.com>
41 * Daisy Chang <daisyc@us.ibm.com> 38 * Daisy Chang <daisyc@us.ibm.com>
42 * Sridhar Samudrala <sri@us.ibm.com> 39 * Sridhar Samudrala <sri@us.ibm.com>
43 *
44 * Any bugs reported given to us we will try to fix... any fixes shared will
45 * be incorporated into the next SCTP release.
46 */ 40 */
47 41
48#include <net/sctp/sctp.h> 42#include <net/sctp/sctp.h>
diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c
index 9e3d257de0e0..09b8daac87c8 100644
--- a/net/sctp/endpointola.c
+++ b/net/sctp/endpointola.c
@@ -29,10 +29,7 @@
29 * 29 *
30 * Please send any bug reports or fixes you make to the 30 * Please send any bug reports or fixes you make to the
31 * email address(es): 31 * email address(es):
32 * lksctp developers <lksctp-developers@lists.sourceforge.net> 32 * lksctp developers <linux-sctp@vger.kernel.org>
33 *
34 * Or submit a bug report through the following website:
35 * http://www.sf.net/projects/lksctp
36 * 33 *
37 * Written or modified by: 34 * Written or modified by:
38 * La Monte H.P. Yarroll <piggy@acm.org> 35 * La Monte H.P. Yarroll <piggy@acm.org>
@@ -40,9 +37,6 @@
40 * Jon Grimm <jgrimm@austin.ibm.com> 37 * Jon Grimm <jgrimm@austin.ibm.com>
41 * Daisy Chang <daisyc@us.ibm.com> 38 * Daisy Chang <daisyc@us.ibm.com>
42 * Dajiang Zhang <dajiang.zhang@nokia.com> 39 * Dajiang Zhang <dajiang.zhang@nokia.com>
43 *
44 * Any bugs reported given to us we will try to fix... any fixes shared will
45 * be incorporated into the next SCTP release.
46 */ 40 */
47 41
48#include <linux/types.h> 42#include <linux/types.h>
diff --git a/net/sctp/input.c b/net/sctp/input.c
index 3fa4d858c35a..5f2068679f83 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -29,10 +29,7 @@
29 * 29 *
30 * Please send any bug reports or fixes you make to the 30 * Please send any bug reports or fixes you make to the
31 * email address(es): 31 * email address(es):
32 * lksctp developers <lksctp-developers@lists.sourceforge.net> 32 * lksctp developers <linux-sctp@vger.kernel.org>
33 *
34 * Or submit a bug report through the following website:
35 * http://www.sf.net/projects/lksctp
36 * 33 *
37 * Written or modified by: 34 * Written or modified by:
38 * La Monte H.P. Yarroll <piggy@acm.org> 35 * La Monte H.P. Yarroll <piggy@acm.org>
@@ -43,9 +40,6 @@
43 * Daisy Chang <daisyc@us.ibm.com> 40 * Daisy Chang <daisyc@us.ibm.com>
44 * Sridhar Samudrala <sri@us.ibm.com> 41 * Sridhar Samudrala <sri@us.ibm.com>
45 * Ardelle Fan <ardelle.fan@intel.com> 42 * Ardelle Fan <ardelle.fan@intel.com>
46 *
47 * Any bugs reported given to us we will try to fix... any fixes shared will
48 * be incorporated into the next SCTP release.
49 */ 43 */
50 44
51#include <linux/types.h> 45#include <linux/types.h>
@@ -87,15 +81,7 @@ static inline int sctp_rcv_checksum(struct net *net, struct sk_buff *skb)
87{ 81{
88 struct sctphdr *sh = sctp_hdr(skb); 82 struct sctphdr *sh = sctp_hdr(skb);
89 __le32 cmp = sh->checksum; 83 __le32 cmp = sh->checksum;
90 struct sk_buff *list; 84 __le32 val = sctp_compute_cksum(skb, 0);
91 __le32 val;
92 __u32 tmp = sctp_start_cksum((__u8 *)sh, skb_headlen(skb));
93
94 skb_walk_frags(skb, list)
95 tmp = sctp_update_cksum((__u8 *)list->data, skb_headlen(list),
96 tmp);
97
98 val = sctp_end_cksum(tmp);
99 85
100 if (val != cmp) { 86 if (val != cmp) {
101 /* CRC failure, dump it. */ 87 /* CRC failure, dump it. */
diff --git a/net/sctp/inqueue.c b/net/sctp/inqueue.c
index cb25f040fed0..5856932fdc38 100644
--- a/net/sctp/inqueue.c
+++ b/net/sctp/inqueue.c
@@ -30,17 +30,11 @@
30 * 30 *
31 * Please send any bug reports or fixes you make to the 31 * Please send any bug reports or fixes you make to the
32 * email address(es): 32 * email address(es):
33 * lksctp developers <lksctp-developers@lists.sourceforge.net> 33 * lksctp developers <linux-sctp@vger.kernel.org>
34 *
35 * Or submit a bug report through the following website:
36 * http://www.sf.net/projects/lksctp
37 * 34 *
38 * Written or modified by: 35 * Written or modified by:
39 * La Monte H.P. Yarroll <piggy@acm.org> 36 * La Monte H.P. Yarroll <piggy@acm.org>
40 * Karl Knutson <karl@athena.chicago.il.us> 37 * Karl Knutson <karl@athena.chicago.il.us>
41 *
42 * Any bugs reported given to us we will try to fix... any fixes shared will
43 * be incorporated into the next SCTP release.
44 */ 38 */
45 39
46#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 40#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index 09ffcc912d23..da613ceae28c 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -27,10 +27,7 @@
27 * 27 *
28 * Please send any bug reports or fixes you make to the 28 * Please send any bug reports or fixes you make to the
29 * email address(es): 29 * email address(es):
30 * lksctp developers <lksctp-developers@lists.sourceforge.net> 30 * lksctp developers <linux-sctp@vger.kernel.org>
31 *
32 * Or submit a bug report through the following website:
33 * http://www.sf.net/projects/lksctp
34 * 31 *
35 * Written or modified by: 32 * Written or modified by:
36 * Le Yanqun <yanqun.le@nokia.com> 33 * Le Yanqun <yanqun.le@nokia.com>
@@ -42,9 +39,6 @@
42 * 39 *
43 * Based on: 40 * Based on:
44 * linux/net/ipv6/tcp_ipv6.c 41 * linux/net/ipv6/tcp_ipv6.c
45 *
46 * Any bugs reported given to us we will try to fix... any fixes shared will
47 * be incorporated into the next SCTP release.
48 */ 42 */
49 43
50#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 44#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -351,7 +345,7 @@ out:
351 345
352 rt = (struct rt6_info *)dst; 346 rt = (struct rt6_info *)dst;
353 t->dst = dst; 347 t->dst = dst;
354 348 t->dst_cookie = rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0;
355 pr_debug("rt6_dst:%pI6 rt6_src:%pI6\n", &rt->rt6i_dst.addr, 349 pr_debug("rt6_dst:%pI6 rt6_src:%pI6\n", &rt->rt6i_dst.addr,
356 &fl6->saddr); 350 &fl6->saddr);
357 } else { 351 } else {
diff --git a/net/sctp/objcnt.c b/net/sctp/objcnt.c
index fe012c44f8df..5ea573b37648 100644
--- a/net/sctp/objcnt.c
+++ b/net/sctp/objcnt.c
@@ -26,16 +26,10 @@
26 * 26 *
27 * Please send any bug reports or fixes you make to the 27 * Please send any bug reports or fixes you make to the
28 * email address(es): 28 * email address(es):
29 * lksctp developers <lksctp-developers@lists.sourceforge.net> 29 * lksctp developers <linux-sctp@vger.kernel.org>
30 *
31 * Or submit a bug report through the following website:
32 * http://www.sf.net/projects/lksctp
33 * 30 *
34 * Written or modified by: 31 * Written or modified by:
35 * Jon Grimm <jgrimm@us.ibm.com> 32 * Jon Grimm <jgrimm@us.ibm.com>
36 *
37 * Any bugs reported given to us we will try to fix... any fixes shared will
38 * be incorporated into the next SCTP release.
39 */ 33 */
40 34
41#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 35#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
diff --git a/net/sctp/output.c b/net/sctp/output.c
index a46d1eb41762..0ac3a65daccb 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -26,19 +26,13 @@
26 * 26 *
27 * Please send any bug reports or fixes you make to the 27 * Please send any bug reports or fixes you make to the
28 * email address(es): 28 * email address(es):
29 * lksctp developers <lksctp-developers@lists.sourceforge.net> 29 * lksctp developers <linux-sctp@vger.kernel.org>
30 *
31 * Or submit a bug report through the following website:
32 * http://www.sf.net/projects/lksctp
33 * 30 *
34 * Written or modified by: 31 * Written or modified by:
35 * La Monte H.P. Yarroll <piggy@acm.org> 32 * La Monte H.P. Yarroll <piggy@acm.org>
36 * Karl Knutson <karl@athena.chicago.il.us> 33 * Karl Knutson <karl@athena.chicago.il.us>
37 * Jon Grimm <jgrimm@austin.ibm.com> 34 * Jon Grimm <jgrimm@austin.ibm.com>
38 * Sridhar Samudrala <sri@us.ibm.com> 35 * Sridhar Samudrala <sri@us.ibm.com>
39 *
40 * Any bugs reported given to us we will try to fix... any fixes shared will
41 * be incorporated into the next SCTP release.
42 */ 36 */
43 37
44#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 38#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index ef9e2bbc0f2f..94df75877869 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -28,10 +28,7 @@
28 * 28 *
29 * Please send any bug reports or fixes you make to the 29 * Please send any bug reports or fixes you make to the
30 * email address(es): 30 * email address(es):
31 * lksctp developers <lksctp-developers@lists.sourceforge.net> 31 * lksctp developers <linux-sctp@vger.kernel.org>
32 *
33 * Or submit a bug report through the following website:
34 * http://www.sf.net/projects/lksctp
35 * 32 *
36 * Written or modified by: 33 * Written or modified by:
37 * La Monte H.P. Yarroll <piggy@acm.org> 34 * La Monte H.P. Yarroll <piggy@acm.org>
@@ -41,9 +38,6 @@
41 * Hui Huang <hui.huang@nokia.com> 38 * Hui Huang <hui.huang@nokia.com>
42 * Sridhar Samudrala <sri@us.ibm.com> 39 * Sridhar Samudrala <sri@us.ibm.com>
43 * Jon Grimm <jgrimm@us.ibm.com> 40 * Jon Grimm <jgrimm@us.ibm.com>
44 *
45 * Any bugs reported given to us we will try to fix... any fixes shared will
46 * be incorporated into the next SCTP release.
47 */ 41 */
48 42
49#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 43#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
diff --git a/net/sctp/primitive.c b/net/sctp/primitive.c
index 794bb14decde..ce1ffd811775 100644
--- a/net/sctp/primitive.c
+++ b/net/sctp/primitive.c
@@ -29,10 +29,7 @@
29 * 29 *
30 * Please send any bug reports or fixes you make to the 30 * Please send any bug reports or fixes you make to the
31 * email address(es): 31 * email address(es):
32 * lksctp developers <lksctp-developers@lists.sourceforge.net> 32 * lksctp developers <linux-sctp@vger.kernel.org>
33 *
34 * Or submit a bug report through the following website:
35 * http://www.sf.net/projects/lksctp
36 * 33 *
37 * Written or modified by: 34 * Written or modified by:
38 * La Monte H.P. Yarroll <piggy@acm.org> 35 * La Monte H.P. Yarroll <piggy@acm.org>
@@ -40,9 +37,6 @@
40 * Karl Knutson <karl@athena.chicago.il.us> 37 * Karl Knutson <karl@athena.chicago.il.us>
41 * Ardelle Fan <ardelle.fan@intel.com> 38 * Ardelle Fan <ardelle.fan@intel.com>
42 * Kevin Gao <kevin.gao@intel.com> 39 * Kevin Gao <kevin.gao@intel.com>
43 *
44 * Any bugs reported given to us we will try to fix... any fixes shared will
45 * be incorporated into the next SCTP release.
46 */ 40 */
47 41
48#include <linux/types.h> 42#include <linux/types.h>
diff --git a/net/sctp/probe.c b/net/sctp/probe.c
index e62c22535be4..53c452efb40b 100644
--- a/net/sctp/probe.c
+++ b/net/sctp/probe.c
@@ -46,6 +46,10 @@ static int port __read_mostly = 0;
46MODULE_PARM_DESC(port, "Port to match (0=all)"); 46MODULE_PARM_DESC(port, "Port to match (0=all)");
47module_param(port, int, 0); 47module_param(port, int, 0);
48 48
49static unsigned int fwmark __read_mostly = 0;
50MODULE_PARM_DESC(fwmark, "skb mark to match (0=no mark)");
51module_param(fwmark, uint, 0);
52
49static int bufsize __read_mostly = 64 * 1024; 53static int bufsize __read_mostly = 64 * 1024;
50MODULE_PARM_DESC(bufsize, "Log buffer size (default 64k)"); 54MODULE_PARM_DESC(bufsize, "Log buffer size (default 64k)");
51module_param(bufsize, int, 0); 55module_param(bufsize, int, 0);
@@ -129,15 +133,19 @@ static sctp_disposition_t jsctp_sf_eat_sack(struct net *net,
129 void *arg, 133 void *arg,
130 sctp_cmd_seq_t *commands) 134 sctp_cmd_seq_t *commands)
131{ 135{
136 struct sctp_chunk *chunk = arg;
137 struct sk_buff *skb = chunk->skb;
132 struct sctp_transport *sp; 138 struct sctp_transport *sp;
133 static __u32 lcwnd = 0; 139 static __u32 lcwnd = 0;
134 struct timespec now; 140 struct timespec now;
135 141
136 sp = asoc->peer.primary_path; 142 sp = asoc->peer.primary_path;
137 143
138 if ((full || sp->cwnd != lcwnd) && 144 if (((port == 0 && fwmark == 0) ||
139 (!port || asoc->peer.port == port || 145 asoc->peer.port == port ||
140 ep->base.bind_addr.port == port)) { 146 ep->base.bind_addr.port == port ||
147 (fwmark > 0 && skb->mark == fwmark)) &&
148 (full || sp->cwnd != lcwnd)) {
141 lcwnd = sp->cwnd; 149 lcwnd = sp->cwnd;
142 150
143 getnstimeofday(&now); 151 getnstimeofday(&now);
@@ -155,13 +163,8 @@ static sctp_disposition_t jsctp_sf_eat_sack(struct net *net,
155 if (sp == asoc->peer.primary_path) 163 if (sp == asoc->peer.primary_path)
156 printl("*"); 164 printl("*");
157 165
158 if (sp->ipaddr.sa.sa_family == AF_INET) 166 printl("%pISc %2u %8u %8u %8u %8u %8u ",
159 printl("%pI4 ", &sp->ipaddr.v4.sin_addr); 167 &sp->ipaddr, sp->state, sp->cwnd, sp->ssthresh,
160 else
161 printl("%pI6 ", &sp->ipaddr.v6.sin6_addr);
162
163 printl("%2u %8u %8u %8u %8u %8u ",
164 sp->state, sp->cwnd, sp->ssthresh,
165 sp->flight_size, sp->partial_bytes_acked, 168 sp->flight_size, sp->partial_bytes_acked,
166 sp->pathmtu); 169 sp->pathmtu);
167 } 170 }
@@ -203,8 +206,8 @@ static __init int sctpprobe_init(void)
203 if (ret) 206 if (ret)
204 goto remove_proc; 207 goto remove_proc;
205 208
206 pr_info("probe registered (port=%d)\n", port); 209 pr_info("probe registered (port=%d/fwmark=%u) bufsize=%u\n",
207 210 port, fwmark, bufsize);
208 return 0; 211 return 0;
209 212
210remove_proc: 213remove_proc:
diff --git a/net/sctp/proc.c b/net/sctp/proc.c
index 62526c477050..0c0642156842 100644
--- a/net/sctp/proc.c
+++ b/net/sctp/proc.c
@@ -22,16 +22,10 @@
22 * 22 *
23 * Please send any bug reports or fixes you make to the 23 * Please send any bug reports or fixes you make to the
24 * email address(es): 24 * email address(es):
25 * lksctp developers <lksctp-developers@lists.sourceforge.net> 25 * lksctp developers <linux-sctp@vger.kernel.org>
26 *
27 * Or submit a bug report through the following website:
28 * http://www.sf.net/projects/lksctp
29 * 26 *
30 * Written or modified by: 27 * Written or modified by:
31 * Sridhar Samudrala <sri@us.ibm.com> 28 * Sridhar Samudrala <sri@us.ibm.com>
32 *
33 * Any bugs reported given to us we will try to fix... any fixes shared will
34 * be incorporated into the next SCTP release.
35 */ 29 */
36 30
37#include <linux/types.h> 31#include <linux/types.h>
@@ -232,7 +226,7 @@ static int sctp_eps_seq_show(struct seq_file *seq, void *v)
232 sk = epb->sk; 226 sk = epb->sk;
233 if (!net_eq(sock_net(sk), seq_file_net(seq))) 227 if (!net_eq(sock_net(sk), seq_file_net(seq)))
234 continue; 228 continue;
235 seq_printf(seq, "%8pK %8pK %-3d %-3d %-4d %-5d %5d %5lu ", ep, sk, 229 seq_printf(seq, "%8pK %8pK %-3d %-3d %-4d %-5d %5u %5lu ", ep, sk,
236 sctp_sk(sk)->type, sk->sk_state, hash, 230 sctp_sk(sk)->type, sk->sk_state, hash,
237 epb->bind_addr.port, 231 epb->bind_addr.port,
238 from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk)), 232 from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk)),
@@ -342,7 +336,7 @@ static int sctp_assocs_seq_show(struct seq_file *seq, void *v)
342 continue; 336 continue;
343 seq_printf(seq, 337 seq_printf(seq,
344 "%8pK %8pK %-3d %-3d %-2d %-4d " 338 "%8pK %8pK %-3d %-3d %-2d %-4d "
345 "%4d %8d %8d %7d %5lu %-5d %5d ", 339 "%4d %8d %8d %7u %5lu %-5d %5d ",
346 assoc, sk, sctp_sk(sk)->type, sk->sk_state, 340 assoc, sk, sctp_sk(sk)->type, sk->sk_state,
347 assoc->state, hash, 341 assoc->state, hash,
348 assoc->assoc_id, 342 assoc->assoc_id,
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 4a17494d736c..5e17092f4ada 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -29,10 +29,7 @@
29 * 29 *
30 * Please send any bug reports or fixes you make to the 30 * Please send any bug reports or fixes you make to the
31 * email address(es): 31 * email address(es):
32 * lksctp developers <lksctp-developers@lists.sourceforge.net> 32 * lksctp developers <linux-sctp@vger.kernel.org>
33 *
34 * Or submit a bug report through the following website:
35 * http://www.sf.net/projects/lksctp
36 * 33 *
37 * Written or modified by: 34 * Written or modified by:
38 * La Monte H.P. Yarroll <piggy@acm.org> 35 * La Monte H.P. Yarroll <piggy@acm.org>
@@ -41,9 +38,6 @@
41 * Sridhar Samudrala <sri@us.ibm.com> 38 * Sridhar Samudrala <sri@us.ibm.com>
42 * Daisy Chang <daisyc@us.ibm.com> 39 * Daisy Chang <daisyc@us.ibm.com>
43 * Ardelle Fan <ardelle.fan@intel.com> 40 * Ardelle Fan <ardelle.fan@intel.com>
44 *
45 * Any bugs reported given to us we will try to fix... any fixes shared will
46 * be incorporated into the next SCTP release.
47 */ 41 */
48 42
49#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 43#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -1547,7 +1541,7 @@ module_exit(sctp_exit);
1547 */ 1541 */
1548MODULE_ALIAS("net-pf-" __stringify(PF_INET) "-proto-132"); 1542MODULE_ALIAS("net-pf-" __stringify(PF_INET) "-proto-132");
1549MODULE_ALIAS("net-pf-" __stringify(PF_INET6) "-proto-132"); 1543MODULE_ALIAS("net-pf-" __stringify(PF_INET6) "-proto-132");
1550MODULE_AUTHOR("Linux Kernel SCTP developers <lksctp-developers@lists.sourceforge.net>"); 1544MODULE_AUTHOR("Linux Kernel SCTP developers <linux-sctp@vger.kernel.org>");
1551MODULE_DESCRIPTION("Support for the SCTP protocol (RFC2960)"); 1545MODULE_DESCRIPTION("Support for the SCTP protocol (RFC2960)");
1552module_param_named(no_checksums, sctp_checksum_disable, bool, 0644); 1546module_param_named(no_checksums, sctp_checksum_disable, bool, 0644);
1553MODULE_PARM_DESC(no_checksums, "Disable checksums computing and verification"); 1547MODULE_PARM_DESC(no_checksums, "Disable checksums computing and verification");
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 362ae6e2fd93..d244a23ab8d3 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -29,10 +29,7 @@
29 * 29 *
30 * Please send any bug reports or fixes you make to the 30 * Please send any bug reports or fixes you make to the
31 * email address(es): 31 * email address(es):
32 * lksctp developers <lksctp-developers@lists.sourceforge.net> 32 * lksctp developers <linux-sctp@vger.kernel.org>
33 *
34 * Or submit a bug report through the following website:
35 * http://www.sf.net/projects/lksctp
36 * 33 *
37 * Written or modified by: 34 * Written or modified by:
38 * La Monte H.P. Yarroll <piggy@acm.org> 35 * La Monte H.P. Yarroll <piggy@acm.org>
@@ -45,9 +42,6 @@
45 * Daisy Chang <daisyc@us.ibm.com> 42 * Daisy Chang <daisyc@us.ibm.com>
46 * Ardelle Fan <ardelle.fan@intel.com> 43 * Ardelle Fan <ardelle.fan@intel.com>
47 * Kevin Gao <kevin.gao@intel.com> 44 * Kevin Gao <kevin.gao@intel.com>
48 *
49 * Any bugs reported given to us we will try to fix... any fixes shared will
50 * be incorporated into the next SCTP release.
51 */ 45 */
52 46
53#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 47#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -68,8 +62,12 @@
68#include <net/sctp/sctp.h> 62#include <net/sctp/sctp.h>
69#include <net/sctp/sm.h> 63#include <net/sctp/sm.h>
70 64
71static struct sctp_chunk *sctp_make_chunk(const struct sctp_association *asoc, 65static struct sctp_chunk *sctp_make_control(const struct sctp_association *asoc,
72 __u8 type, __u8 flags, int paylen); 66 __u8 type, __u8 flags, int paylen);
67static struct sctp_chunk *sctp_make_data(const struct sctp_association *asoc,
68 __u8 flags, int paylen);
69static struct sctp_chunk *_sctp_make_chunk(const struct sctp_association *asoc,
70 __u8 type, __u8 flags, int paylen);
73static sctp_cookie_param_t *sctp_pack_cookie(const struct sctp_endpoint *ep, 71static sctp_cookie_param_t *sctp_pack_cookie(const struct sctp_endpoint *ep,
74 const struct sctp_association *asoc, 72 const struct sctp_association *asoc,
75 const struct sctp_chunk *init_chunk, 73 const struct sctp_chunk *init_chunk,
@@ -82,6 +80,28 @@ static int sctp_process_param(struct sctp_association *asoc,
82static void *sctp_addto_param(struct sctp_chunk *chunk, int len, 80static void *sctp_addto_param(struct sctp_chunk *chunk, int len,
83 const void *data); 81 const void *data);
84 82
83/* Control chunk destructor */
84static void sctp_control_release_owner(struct sk_buff *skb)
85{
86 /*TODO: do memory release */
87}
88
89static void sctp_control_set_owner_w(struct sctp_chunk *chunk)
90{
91 struct sctp_association *asoc = chunk->asoc;
92 struct sk_buff *skb = chunk->skb;
93
94 /* TODO: properly account for control chunks.
95 * To do it right we'll need:
96 * 1) endpoint if association isn't known.
97 * 2) proper memory accounting.
98 *
99 * For now don't do anything for now.
100 */
101 skb->sk = asoc ? asoc->base.sk : NULL;
102 skb->destructor = sctp_control_release_owner;
103}
104
85/* What was the inbound interface for this chunk? */ 105/* What was the inbound interface for this chunk? */
86int sctp_chunk_iif(const struct sctp_chunk *chunk) 106int sctp_chunk_iif(const struct sctp_chunk *chunk)
87{ 107{
@@ -296,7 +316,7 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc,
296 * PLEASE DO NOT FIXME [This version does not support Host Name.] 316 * PLEASE DO NOT FIXME [This version does not support Host Name.]
297 */ 317 */
298 318
299 retval = sctp_make_chunk(asoc, SCTP_CID_INIT, 0, chunksize); 319 retval = sctp_make_control(asoc, SCTP_CID_INIT, 0, chunksize);
300 if (!retval) 320 if (!retval)
301 goto nodata; 321 goto nodata;
302 322
@@ -443,7 +463,7 @@ struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc,
443 num_ext); 463 num_ext);
444 464
445 /* Now allocate and fill out the chunk. */ 465 /* Now allocate and fill out the chunk. */
446 retval = sctp_make_chunk(asoc, SCTP_CID_INIT_ACK, 0, chunksize); 466 retval = sctp_make_control(asoc, SCTP_CID_INIT_ACK, 0, chunksize);
447 if (!retval) 467 if (!retval)
448 goto nomem_chunk; 468 goto nomem_chunk;
449 469
@@ -548,7 +568,7 @@ struct sctp_chunk *sctp_make_cookie_echo(const struct sctp_association *asoc,
548 cookie_len = asoc->peer.cookie_len; 568 cookie_len = asoc->peer.cookie_len;
549 569
550 /* Build a cookie echo chunk. */ 570 /* Build a cookie echo chunk. */
551 retval = sctp_make_chunk(asoc, SCTP_CID_COOKIE_ECHO, 0, cookie_len); 571 retval = sctp_make_control(asoc, SCTP_CID_COOKIE_ECHO, 0, cookie_len);
552 if (!retval) 572 if (!retval)
553 goto nodata; 573 goto nodata;
554 retval->subh.cookie_hdr = 574 retval->subh.cookie_hdr =
@@ -593,7 +613,7 @@ struct sctp_chunk *sctp_make_cookie_ack(const struct sctp_association *asoc,
593{ 613{
594 struct sctp_chunk *retval; 614 struct sctp_chunk *retval;
595 615
596 retval = sctp_make_chunk(asoc, SCTP_CID_COOKIE_ACK, 0, 0); 616 retval = sctp_make_control(asoc, SCTP_CID_COOKIE_ACK, 0, 0);
597 617
598 /* RFC 2960 6.4 Multi-homed SCTP Endpoints 618 /* RFC 2960 6.4 Multi-homed SCTP Endpoints
599 * 619 *
@@ -641,8 +661,8 @@ struct sctp_chunk *sctp_make_cwr(const struct sctp_association *asoc,
641 sctp_cwrhdr_t cwr; 661 sctp_cwrhdr_t cwr;
642 662
643 cwr.lowest_tsn = htonl(lowest_tsn); 663 cwr.lowest_tsn = htonl(lowest_tsn);
644 retval = sctp_make_chunk(asoc, SCTP_CID_ECN_CWR, 0, 664 retval = sctp_make_control(asoc, SCTP_CID_ECN_CWR, 0,
645 sizeof(sctp_cwrhdr_t)); 665 sizeof(sctp_cwrhdr_t));
646 666
647 if (!retval) 667 if (!retval)
648 goto nodata; 668 goto nodata;
@@ -675,8 +695,8 @@ struct sctp_chunk *sctp_make_ecne(const struct sctp_association *asoc,
675 sctp_ecnehdr_t ecne; 695 sctp_ecnehdr_t ecne;
676 696
677 ecne.lowest_tsn = htonl(lowest_tsn); 697 ecne.lowest_tsn = htonl(lowest_tsn);
678 retval = sctp_make_chunk(asoc, SCTP_CID_ECN_ECNE, 0, 698 retval = sctp_make_control(asoc, SCTP_CID_ECN_ECNE, 0,
679 sizeof(sctp_ecnehdr_t)); 699 sizeof(sctp_ecnehdr_t));
680 if (!retval) 700 if (!retval)
681 goto nodata; 701 goto nodata;
682 retval->subh.ecne_hdr = 702 retval->subh.ecne_hdr =
@@ -712,7 +732,7 @@ struct sctp_chunk *sctp_make_datafrag_empty(struct sctp_association *asoc,
712 dp.ssn = htons(ssn); 732 dp.ssn = htons(ssn);
713 733
714 chunk_len = sizeof(dp) + data_len; 734 chunk_len = sizeof(dp) + data_len;
715 retval = sctp_make_chunk(asoc, SCTP_CID_DATA, flags, chunk_len); 735 retval = sctp_make_data(asoc, flags, chunk_len);
716 if (!retval) 736 if (!retval)
717 goto nodata; 737 goto nodata;
718 738
@@ -759,7 +779,7 @@ struct sctp_chunk *sctp_make_sack(const struct sctp_association *asoc)
759 + sizeof(__u32) * num_dup_tsns; 779 + sizeof(__u32) * num_dup_tsns;
760 780
761 /* Create the chunk. */ 781 /* Create the chunk. */
762 retval = sctp_make_chunk(asoc, SCTP_CID_SACK, 0, len); 782 retval = sctp_make_control(asoc, SCTP_CID_SACK, 0, len);
763 if (!retval) 783 if (!retval)
764 goto nodata; 784 goto nodata;
765 785
@@ -838,8 +858,8 @@ struct sctp_chunk *sctp_make_shutdown(const struct sctp_association *asoc,
838 ctsn = sctp_tsnmap_get_ctsn(&asoc->peer.tsn_map); 858 ctsn = sctp_tsnmap_get_ctsn(&asoc->peer.tsn_map);
839 shut.cum_tsn_ack = htonl(ctsn); 859 shut.cum_tsn_ack = htonl(ctsn);
840 860
841 retval = sctp_make_chunk(asoc, SCTP_CID_SHUTDOWN, 0, 861 retval = sctp_make_control(asoc, SCTP_CID_SHUTDOWN, 0,
842 sizeof(sctp_shutdownhdr_t)); 862 sizeof(sctp_shutdownhdr_t));
843 if (!retval) 863 if (!retval)
844 goto nodata; 864 goto nodata;
845 865
@@ -857,7 +877,7 @@ struct sctp_chunk *sctp_make_shutdown_ack(const struct sctp_association *asoc,
857{ 877{
858 struct sctp_chunk *retval; 878 struct sctp_chunk *retval;
859 879
860 retval = sctp_make_chunk(asoc, SCTP_CID_SHUTDOWN_ACK, 0, 0); 880 retval = sctp_make_control(asoc, SCTP_CID_SHUTDOWN_ACK, 0, 0);
861 881
862 /* RFC 2960 6.4 Multi-homed SCTP Endpoints 882 /* RFC 2960 6.4 Multi-homed SCTP Endpoints
863 * 883 *
@@ -886,7 +906,7 @@ struct sctp_chunk *sctp_make_shutdown_complete(
886 */ 906 */
887 flags |= asoc ? 0 : SCTP_CHUNK_FLAG_T; 907 flags |= asoc ? 0 : SCTP_CHUNK_FLAG_T;
888 908
889 retval = sctp_make_chunk(asoc, SCTP_CID_SHUTDOWN_COMPLETE, flags, 0); 909 retval = sctp_make_control(asoc, SCTP_CID_SHUTDOWN_COMPLETE, flags, 0);
890 910
891 /* RFC 2960 6.4 Multi-homed SCTP Endpoints 911 /* RFC 2960 6.4 Multi-homed SCTP Endpoints
892 * 912 *
@@ -925,7 +945,7 @@ struct sctp_chunk *sctp_make_abort(const struct sctp_association *asoc,
925 flags = SCTP_CHUNK_FLAG_T; 945 flags = SCTP_CHUNK_FLAG_T;
926 } 946 }
927 947
928 retval = sctp_make_chunk(asoc, SCTP_CID_ABORT, flags, hint); 948 retval = sctp_make_control(asoc, SCTP_CID_ABORT, flags, hint);
929 949
930 /* RFC 2960 6.4 Multi-homed SCTP Endpoints 950 /* RFC 2960 6.4 Multi-homed SCTP Endpoints
931 * 951 *
@@ -1117,7 +1137,7 @@ struct sctp_chunk *sctp_make_heartbeat(const struct sctp_association *asoc,
1117 struct sctp_chunk *retval; 1137 struct sctp_chunk *retval;
1118 sctp_sender_hb_info_t hbinfo; 1138 sctp_sender_hb_info_t hbinfo;
1119 1139
1120 retval = sctp_make_chunk(asoc, SCTP_CID_HEARTBEAT, 0, sizeof(hbinfo)); 1140 retval = sctp_make_control(asoc, SCTP_CID_HEARTBEAT, 0, sizeof(hbinfo));
1121 1141
1122 if (!retval) 1142 if (!retval)
1123 goto nodata; 1143 goto nodata;
@@ -1145,7 +1165,7 @@ struct sctp_chunk *sctp_make_heartbeat_ack(const struct sctp_association *asoc,
1145{ 1165{
1146 struct sctp_chunk *retval; 1166 struct sctp_chunk *retval;
1147 1167
1148 retval = sctp_make_chunk(asoc, SCTP_CID_HEARTBEAT_ACK, 0, paylen); 1168 retval = sctp_make_control(asoc, SCTP_CID_HEARTBEAT_ACK, 0, paylen);
1149 if (!retval) 1169 if (!retval)
1150 goto nodata; 1170 goto nodata;
1151 1171
@@ -1177,8 +1197,8 @@ static struct sctp_chunk *sctp_make_op_error_space(
1177{ 1197{
1178 struct sctp_chunk *retval; 1198 struct sctp_chunk *retval;
1179 1199
1180 retval = sctp_make_chunk(asoc, SCTP_CID_ERROR, 0, 1200 retval = sctp_make_control(asoc, SCTP_CID_ERROR, 0,
1181 sizeof(sctp_errhdr_t) + size); 1201 sizeof(sctp_errhdr_t) + size);
1182 if (!retval) 1202 if (!retval)
1183 goto nodata; 1203 goto nodata;
1184 1204
@@ -1248,7 +1268,7 @@ struct sctp_chunk *sctp_make_auth(const struct sctp_association *asoc)
1248 if (unlikely(!hmac_desc)) 1268 if (unlikely(!hmac_desc))
1249 return NULL; 1269 return NULL;
1250 1270
1251 retval = sctp_make_chunk(asoc, SCTP_CID_AUTH, 0, 1271 retval = sctp_make_control(asoc, SCTP_CID_AUTH, 0,
1252 hmac_desc->hmac_len + sizeof(sctp_authhdr_t)); 1272 hmac_desc->hmac_len + sizeof(sctp_authhdr_t));
1253 if (!retval) 1273 if (!retval)
1254 return NULL; 1274 return NULL;
@@ -1351,8 +1371,8 @@ const union sctp_addr *sctp_source(const struct sctp_chunk *chunk)
1351/* Create a new chunk, setting the type and flags headers from the 1371/* Create a new chunk, setting the type and flags headers from the
1352 * arguments, reserving enough space for a 'paylen' byte payload. 1372 * arguments, reserving enough space for a 'paylen' byte payload.
1353 */ 1373 */
1354static struct sctp_chunk *sctp_make_chunk(const struct sctp_association *asoc, 1374static struct sctp_chunk *_sctp_make_chunk(const struct sctp_association *asoc,
1355 __u8 type, __u8 flags, int paylen) 1375 __u8 type, __u8 flags, int paylen)
1356{ 1376{
1357 struct sctp_chunk *retval; 1377 struct sctp_chunk *retval;
1358 sctp_chunkhdr_t *chunk_hdr; 1378 sctp_chunkhdr_t *chunk_hdr;
@@ -1385,14 +1405,27 @@ static struct sctp_chunk *sctp_make_chunk(const struct sctp_association *asoc,
1385 if (sctp_auth_send_cid(type, asoc)) 1405 if (sctp_auth_send_cid(type, asoc))
1386 retval->auth = 1; 1406 retval->auth = 1;
1387 1407
1388 /* Set the skb to the belonging sock for accounting. */
1389 skb->sk = sk;
1390
1391 return retval; 1408 return retval;
1392nodata: 1409nodata:
1393 return NULL; 1410 return NULL;
1394} 1411}
1395 1412
1413static struct sctp_chunk *sctp_make_data(const struct sctp_association *asoc,
1414 __u8 flags, int paylen)
1415{
1416 return _sctp_make_chunk(asoc, SCTP_CID_DATA, flags, paylen);
1417}
1418
1419static struct sctp_chunk *sctp_make_control(const struct sctp_association *asoc,
1420 __u8 type, __u8 flags, int paylen)
1421{
1422 struct sctp_chunk *chunk = _sctp_make_chunk(asoc, type, flags, paylen);
1423
1424 if (chunk)
1425 sctp_control_set_owner_w(chunk);
1426
1427 return chunk;
1428}
1396 1429
1397/* Release the memory occupied by a chunk. */ 1430/* Release the memory occupied by a chunk. */
1398static void sctp_chunk_destroy(struct sctp_chunk *chunk) 1431static void sctp_chunk_destroy(struct sctp_chunk *chunk)
@@ -2207,25 +2240,23 @@ int sctp_verify_init(struct net *net, const struct sctp_association *asoc,
2207 struct sctp_chunk **errp) 2240 struct sctp_chunk **errp)
2208{ 2241{
2209 union sctp_params param; 2242 union sctp_params param;
2210 int has_cookie = 0; 2243 bool has_cookie = false;
2211 int result; 2244 int result;
2212 2245
2213 /* Verify stream values are non-zero. */ 2246 /* Check for missing mandatory parameters. Note: Initial TSN is
2214 if ((0 == peer_init->init_hdr.num_outbound_streams) || 2247 * also mandatory, but is not checked here since the valid range
2215 (0 == peer_init->init_hdr.num_inbound_streams) || 2248 * is 0..2**32-1. RFC4960, section 3.3.3.
2216 (0 == peer_init->init_hdr.init_tag) || 2249 */
2217 (SCTP_DEFAULT_MINWINDOW > ntohl(peer_init->init_hdr.a_rwnd))) { 2250 if (peer_init->init_hdr.num_outbound_streams == 0 ||
2218 2251 peer_init->init_hdr.num_inbound_streams == 0 ||
2252 peer_init->init_hdr.init_tag == 0 ||
2253 ntohl(peer_init->init_hdr.a_rwnd) < SCTP_DEFAULT_MINWINDOW)
2219 return sctp_process_inv_mandatory(asoc, chunk, errp); 2254 return sctp_process_inv_mandatory(asoc, chunk, errp);
2220 }
2221 2255
2222 /* Check for missing mandatory parameters. */
2223 sctp_walk_params(param, peer_init, init_hdr.params) { 2256 sctp_walk_params(param, peer_init, init_hdr.params) {
2224 2257 if (param.p->type == SCTP_PARAM_STATE_COOKIE)
2225 if (SCTP_PARAM_STATE_COOKIE == param.p->type) 2258 has_cookie = true;
2226 has_cookie = 1; 2259 }
2227
2228 } /* for (loop through all parameters) */
2229 2260
2230 /* There is a possibility that a parameter length was bad and 2261 /* There is a possibility that a parameter length was bad and
2231 * in that case we would have stoped walking the parameters. 2262 * in that case we would have stoped walking the parameters.
@@ -2733,7 +2764,7 @@ static struct sctp_chunk *sctp_make_asconf(struct sctp_association *asoc,
2733 length += addrlen; 2764 length += addrlen;
2734 2765
2735 /* Create the chunk. */ 2766 /* Create the chunk. */
2736 retval = sctp_make_chunk(asoc, SCTP_CID_ASCONF, 0, length); 2767 retval = sctp_make_control(asoc, SCTP_CID_ASCONF, 0, length);
2737 if (!retval) 2768 if (!retval)
2738 return NULL; 2769 return NULL;
2739 2770
@@ -2917,7 +2948,7 @@ static struct sctp_chunk *sctp_make_asconf_ack(const struct sctp_association *as
2917 int length = sizeof(asconf) + vparam_len; 2948 int length = sizeof(asconf) + vparam_len;
2918 2949
2919 /* Create the chunk. */ 2950 /* Create the chunk. */
2920 retval = sctp_make_chunk(asoc, SCTP_CID_ASCONF_ACK, 0, length); 2951 retval = sctp_make_control(asoc, SCTP_CID_ASCONF_ACK, 0, length);
2921 if (!retval) 2952 if (!retval)
2922 return NULL; 2953 return NULL;
2923 2954
@@ -3448,7 +3479,7 @@ struct sctp_chunk *sctp_make_fwdtsn(const struct sctp_association *asoc,
3448 3479
3449 hint = (nstreams + 1) * sizeof(__u32); 3480 hint = (nstreams + 1) * sizeof(__u32);
3450 3481
3451 retval = sctp_make_chunk(asoc, SCTP_CID_FWD_TSN, 0, hint); 3482 retval = sctp_make_control(asoc, SCTP_CID_FWD_TSN, 0, hint);
3452 3483
3453 if (!retval) 3484 if (!retval)
3454 return NULL; 3485 return NULL;
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index 9da68852ee94..666c66842799 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -28,10 +28,7 @@
28 * 28 *
29 * Please send any bug reports or fixes you make to the 29 * Please send any bug reports or fixes you make to the
30 * email address(es): 30 * email address(es):
31 * lksctp developers <lksctp-developers@lists.sourceforge.net> 31 * lksctp developers <linux-sctp@vger.kernel.org>
32 *
33 * Or submit a bug report through the following website:
34 * http://www.sf.net/projects/lksctp
35 * 32 *
36 * Written or modified by: 33 * Written or modified by:
37 * La Monte H.P. Yarroll <piggy@acm.org> 34 * La Monte H.P. Yarroll <piggy@acm.org>
@@ -42,9 +39,6 @@
42 * Daisy Chang <daisyc@us.ibm.com> 39 * Daisy Chang <daisyc@us.ibm.com>
43 * Sridhar Samudrala <sri@us.ibm.com> 40 * Sridhar Samudrala <sri@us.ibm.com>
44 * Ardelle Fan <ardelle.fan@intel.com> 41 * Ardelle Fan <ardelle.fan@intel.com>
45 *
46 * Any bugs reported given to us we will try to fix... any fixes shared will
47 * be incorporated into the next SCTP release.
48 */ 42 */
49 43
50#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 44#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index f6b7109195a6..dfe3f36ff2aa 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -28,10 +28,7 @@
28 * 28 *
29 * Please send any bug reports or fixes you make to the 29 * Please send any bug reports or fixes you make to the
30 * email address(es): 30 * email address(es):
31 * lksctp developers <lksctp-developers@lists.sourceforge.net> 31 * lksctp developers <linux-sctp@vger.kernel.org>
32 *
33 * Or submit a bug report through the following website:
34 * http://www.sf.net/projects/lksctp
35 * 32 *
36 * Written or modified by: 33 * Written or modified by:
37 * La Monte H.P. Yarroll <piggy@acm.org> 34 * La Monte H.P. Yarroll <piggy@acm.org>
@@ -45,9 +42,6 @@
45 * Ardelle Fan <ardelle.fan@intel.com> 42 * Ardelle Fan <ardelle.fan@intel.com>
46 * Ryan Layer <rmlayer@us.ibm.com> 43 * Ryan Layer <rmlayer@us.ibm.com>
47 * Kevin Gao <kevin.gao@intel.com> 44 * Kevin Gao <kevin.gao@intel.com>
48 *
49 * Any bugs reported given to us we will try to fix... any fixes shared will
50 * be incorporated into the next SCTP release.
51 */ 45 */
52 46
53#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 47#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
diff --git a/net/sctp/sm_statetable.c b/net/sctp/sm_statetable.c
index 84d98d8a5a74..c5999b2dde7d 100644
--- a/net/sctp/sm_statetable.c
+++ b/net/sctp/sm_statetable.c
@@ -28,10 +28,7 @@
28 * 28 *
29 * Please send any bug reports or fixes you make to the 29 * Please send any bug reports or fixes you make to the
30 * email address(es): 30 * email address(es):
31 * lksctp developers <lksctp-developers@lists.sourceforge.net> 31 * lksctp developers <linux-sctp@vger.kernel.org>
32 *
33 * Or submit a bug report through the following website:
34 * http://www.sf.net/projects/lksctp
35 * 32 *
36 * Written or modified by: 33 * Written or modified by:
37 * La Monte H.P. Yarroll <piggy@acm.org> 34 * La Monte H.P. Yarroll <piggy@acm.org>
@@ -41,9 +38,6 @@
41 * Daisy Chang <daisyc@us.ibm.com> 38 * Daisy Chang <daisyc@us.ibm.com>
42 * Ardelle Fan <ardelle.fan@intel.com> 39 * Ardelle Fan <ardelle.fan@intel.com>
43 * Sridhar Samudrala <sri@us.ibm.com> 40 * Sridhar Samudrala <sri@us.ibm.com>
44 *
45 * Any bugs reported given to us we will try to fix... any fixes shared will
46 * be incorporated into the next SCTP release.
47 */ 41 */
48 42
49#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 43#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index c6670d2e3f8d..911b71b26b0e 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -34,10 +34,7 @@
34 * 34 *
35 * Please send any bug reports or fixes you make to the 35 * Please send any bug reports or fixes you make to the
36 * email address(es): 36 * email address(es):
37 * lksctp developers <lksctp-developers@lists.sourceforge.net> 37 * lksctp developers <linux-sctp@vger.kernel.org>
38 *
39 * Or submit a bug report through the following website:
40 * http://www.sf.net/projects/lksctp
41 * 38 *
42 * Written or modified by: 39 * Written or modified by:
43 * La Monte H.P. Yarroll <piggy@acm.org> 40 * La Monte H.P. Yarroll <piggy@acm.org>
@@ -52,9 +49,6 @@
52 * Ryan Layer <rmlayer@us.ibm.com> 49 * Ryan Layer <rmlayer@us.ibm.com>
53 * Anup Pemmaiah <pemmaiah@cc.usu.edu> 50 * Anup Pemmaiah <pemmaiah@cc.usu.edu>
54 * Kevin Gao <kevin.gao@intel.com> 51 * Kevin Gao <kevin.gao@intel.com>
55 *
56 * Any bugs reported given to us we will try to fix... any fixes shared will
57 * be incorporated into the next SCTP release.
58 */ 52 */
59 53
60#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 54#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -812,6 +806,9 @@ static int sctp_send_asconf_del_ip(struct sock *sk,
812 goto skip_mkasconf; 806 goto skip_mkasconf;
813 } 807 }
814 808
809 if (laddr == NULL)
810 return -EINVAL;
811
815 /* We do not need RCU protection throughout this loop 812 /* We do not need RCU protection throughout this loop
816 * because this is done under a socket lock from the 813 * because this is done under a socket lock from the
817 * setsockopt call. 814 * setsockopt call.
@@ -6182,7 +6179,7 @@ unsigned int sctp_poll(struct file *file, struct socket *sock, poll_table *wait)
6182 /* Is there any exceptional events? */ 6179 /* Is there any exceptional events? */
6183 if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) 6180 if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
6184 mask |= POLLERR | 6181 mask |= POLLERR |
6185 sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0; 6182 (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0);
6186 if (sk->sk_shutdown & RCV_SHUTDOWN) 6183 if (sk->sk_shutdown & RCV_SHUTDOWN)
6187 mask |= POLLRDHUP | POLLIN | POLLRDNORM; 6184 mask |= POLLRDHUP | POLLIN | POLLRDNORM;
6188 if (sk->sk_shutdown == SHUTDOWN_MASK) 6185 if (sk->sk_shutdown == SHUTDOWN_MASK)
diff --git a/net/sctp/ssnmap.c b/net/sctp/ssnmap.c
index da8603523808..6007124aefa0 100644
--- a/net/sctp/ssnmap.c
+++ b/net/sctp/ssnmap.c
@@ -24,16 +24,10 @@
24 * 24 *
25 * Please send any bug reports or fixes you make to the 25 * Please send any bug reports or fixes you make to the
26 * email address(es): 26 * email address(es):
27 * lksctp developers <lksctp-developers@lists.sourceforge.net> 27 * lksctp developers <linux-sctp@vger.kernel.org>
28 *
29 * Or submit a bug report through the following website:
30 * http://www.sf.net/projects/lksctp
31 * 28 *
32 * Written or modified by: 29 * Written or modified by:
33 * Jon Grimm <jgrimm@us.ibm.com> 30 * Jon Grimm <jgrimm@us.ibm.com>
34 *
35 * Any bugs reported given to us we will try to fix... any fixes shared will
36 * be incorporated into the next SCTP release.
37 */ 31 */
38 32
39#include <linux/types.h> 33#include <linux/types.h>
diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c
index 9a5c4c9eddaf..6b36561a1b3b 100644
--- a/net/sctp/sysctl.c
+++ b/net/sctp/sysctl.c
@@ -25,10 +25,7 @@
25 * 25 *
26 * Please send any bug reports or fixes you make to the 26 * Please send any bug reports or fixes you make to the
27 * email address(es): 27 * email address(es):
28 * lksctp developers <lksctp-developers@lists.sourceforge.net> 28 * lksctp developers <linux-sctp@vger.kernel.org>
29 *
30 * Or submit a bug report through the following website:
31 * http://www.sf.net/projects/lksctp
32 * 29 *
33 * Written or modified by: 30 * Written or modified by:
34 * Mingqin Liu <liuming@us.ibm.com> 31 * Mingqin Liu <liuming@us.ibm.com>
@@ -36,9 +33,6 @@
36 * Ardelle Fan <ardelle.fan@intel.com> 33 * Ardelle Fan <ardelle.fan@intel.com>
37 * Ryan Layer <rmlayer@us.ibm.com> 34 * Ryan Layer <rmlayer@us.ibm.com>
38 * Sridhar Samudrala <sri@us.ibm.com> 35 * Sridhar Samudrala <sri@us.ibm.com>
39 *
40 * Any bugs reported given to us we will try to fix... any fixes shared will
41 * be incorporated into the next SCTP release.
42 */ 36 */
43 37
44#include <net/sctp/structs.h> 38#include <net/sctp/structs.h>
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index 8fdd16046d66..e332efb124cc 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -30,10 +30,7 @@
30 * 30 *
31 * Please send any bug reports or fixes you make to the 31 * Please send any bug reports or fixes you make to the
32 * email address(es): 32 * email address(es):
33 * lksctp developers <lksctp-developers@lists.sourceforge.net> 33 * lksctp developers <linux-sctp@vger.kernel.org>
34 *
35 * Or submit a bug report through the following website:
36 * http://www.sf.net/projects/lksctp
37 * 34 *
38 * Written or modified by: 35 * Written or modified by:
39 * La Monte H.P. Yarroll <piggy@acm.org> 36 * La Monte H.P. Yarroll <piggy@acm.org>
@@ -43,9 +40,6 @@
43 * Hui Huang <hui.huang@nokia.com> 40 * Hui Huang <hui.huang@nokia.com>
44 * Sridhar Samudrala <sri@us.ibm.com> 41 * Sridhar Samudrala <sri@us.ibm.com>
45 * Ardelle Fan <ardelle.fan@intel.com> 42 * Ardelle Fan <ardelle.fan@intel.com>
46 *
47 * Any bugs reported given to us we will try to fix... any fixes shared will
48 * be incorporated into the next SCTP release.
49 */ 43 */
50 44
51#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 45#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
diff --git a/net/sctp/tsnmap.c b/net/sctp/tsnmap.c
index b46019568a86..fbda20028285 100644
--- a/net/sctp/tsnmap.c
+++ b/net/sctp/tsnmap.c
@@ -27,19 +27,13 @@
27 * 27 *
28 * Please send any bug reports or fixes you make to the 28 * Please send any bug reports or fixes you make to the
29 * email address(es): 29 * email address(es):
30 * lksctp developers <lksctp-developers@lists.sourceforge.net> 30 * lksctp developers <linux-sctp@vger.kernel.org>
31 *
32 * Or submit a bug report through the following website:
33 * http://www.sf.net/projects/lksctp
34 * 31 *
35 * Written or modified by: 32 * Written or modified by:
36 * La Monte H.P. Yarroll <piggy@acm.org> 33 * La Monte H.P. Yarroll <piggy@acm.org>
37 * Jon Grimm <jgrimm@us.ibm.com> 34 * Jon Grimm <jgrimm@us.ibm.com>
38 * Karl Knutson <karl@athena.chicago.il.us> 35 * Karl Knutson <karl@athena.chicago.il.us>
39 * Sridhar Samudrala <sri@us.ibm.com> 36 * Sridhar Samudrala <sri@us.ibm.com>
40 *
41 * Any bugs reported given to us we will try to fix... any fixes shared will
42 * be incorporated into the next SCTP release.
43 */ 37 */
44 38
45#include <linux/slab.h> 39#include <linux/slab.h>
diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c
index 44a45dbee4df..81089ed65456 100644
--- a/net/sctp/ulpevent.c
+++ b/net/sctp/ulpevent.c
@@ -28,19 +28,13 @@
28 * 28 *
29 * Please send any bug reports or fixes you make to the 29 * Please send any bug reports or fixes you make to the
30 * email address(es): 30 * email address(es):
31 * lksctp developers <lksctp-developers@lists.sourceforge.net> 31 * lksctp developers <linux-sctp@vger.kernel.org>
32 *
33 * Or submit a bug report through the following website:
34 * http://www.sf.net/projects/lksctp
35 * 32 *
36 * Written or modified by: 33 * Written or modified by:
37 * Jon Grimm <jgrimm@us.ibm.com> 34 * Jon Grimm <jgrimm@us.ibm.com>
38 * La Monte H.P. Yarroll <piggy@acm.org> 35 * La Monte H.P. Yarroll <piggy@acm.org>
39 * Ardelle Fan <ardelle.fan@intel.com> 36 * Ardelle Fan <ardelle.fan@intel.com>
40 * Sridhar Samudrala <sri@us.ibm.com> 37 * Sridhar Samudrala <sri@us.ibm.com>
41 *
42 * Any bugs reported given to us we will try to fix... any fixes shared will
43 * be incorporated into the next SCTP release.
44 */ 38 */
45 39
46#include <linux/slab.h> 40#include <linux/slab.h>
diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c
index 04e3d470f877..1c1484ed605d 100644
--- a/net/sctp/ulpqueue.c
+++ b/net/sctp/ulpqueue.c
@@ -27,18 +27,12 @@
27 * 27 *
28 * Please send any bug reports or fixes you make to the 28 * Please send any bug reports or fixes you make to the
29 * email address(es): 29 * email address(es):
30 * lksctp developers <lksctp-developers@lists.sourceforge.net> 30 * lksctp developers <linux-sctp@vger.kernel.org>
31 *
32 * Or submit a bug report through the following website:
33 * http://www.sf.net/projects/lksctp
34 * 31 *
35 * Written or modified by: 32 * Written or modified by:
36 * Jon Grimm <jgrimm@us.ibm.com> 33 * Jon Grimm <jgrimm@us.ibm.com>
37 * La Monte H.P. Yarroll <piggy@acm.org> 34 * La Monte H.P. Yarroll <piggy@acm.org>
38 * Sridhar Samudrala <sri@us.ibm.com> 35 * Sridhar Samudrala <sri@us.ibm.com>
39 *
40 * Any bugs reported given to us we will try to fix... any fixes shared will
41 * be incorporated into the next SCTP release.
42 */ 36 */
43 37
44#include <linux/slab.h> 38#include <linux/slab.h>
diff --git a/net/socket.c b/net/socket.c
index b2d7c629eeb9..ebed4b68f768 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -854,11 +854,6 @@ int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
854} 854}
855EXPORT_SYMBOL(kernel_recvmsg); 855EXPORT_SYMBOL(kernel_recvmsg);
856 856
857static void sock_aio_dtor(struct kiocb *iocb)
858{
859 kfree(iocb->private);
860}
861
862static ssize_t sock_sendpage(struct file *file, struct page *page, 857static ssize_t sock_sendpage(struct file *file, struct page *page,
863 int offset, size_t size, loff_t *ppos, int more) 858 int offset, size_t size, loff_t *ppos, int more)
864{ 859{
@@ -889,12 +884,8 @@ static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
889static struct sock_iocb *alloc_sock_iocb(struct kiocb *iocb, 884static struct sock_iocb *alloc_sock_iocb(struct kiocb *iocb,
890 struct sock_iocb *siocb) 885 struct sock_iocb *siocb)
891{ 886{
892 if (!is_sync_kiocb(iocb)) { 887 if (!is_sync_kiocb(iocb))
893 siocb = kmalloc(sizeof(*siocb), GFP_KERNEL); 888 BUG();
894 if (!siocb)
895 return NULL;
896 iocb->ki_dtor = sock_aio_dtor;
897 }
898 889
899 siocb->kiocb = iocb; 890 siocb->kiocb = iocb;
900 iocb->private = siocb; 891 iocb->private = siocb;
@@ -931,7 +922,7 @@ static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
931 if (pos != 0) 922 if (pos != 0)
932 return -ESPIPE; 923 return -ESPIPE;
933 924
934 if (iocb->ki_left == 0) /* Match SYS5 behaviour */ 925 if (iocb->ki_nbytes == 0) /* Match SYS5 behaviour */
935 return 0; 926 return 0;
936 927
937 928
@@ -3072,12 +3063,12 @@ static int compat_sioc_ifmap(struct net *net, unsigned int cmd,
3072 3063
3073 uifmap32 = &uifr32->ifr_ifru.ifru_map; 3064 uifmap32 = &uifr32->ifr_ifru.ifru_map;
3074 err = copy_from_user(&ifr, uifr32, sizeof(ifr.ifr_name)); 3065 err = copy_from_user(&ifr, uifr32, sizeof(ifr.ifr_name));
3075 err |= __get_user(ifr.ifr_map.mem_start, &uifmap32->mem_start); 3066 err |= get_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
3076 err |= __get_user(ifr.ifr_map.mem_end, &uifmap32->mem_end); 3067 err |= get_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
3077 err |= __get_user(ifr.ifr_map.base_addr, &uifmap32->base_addr); 3068 err |= get_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
3078 err |= __get_user(ifr.ifr_map.irq, &uifmap32->irq); 3069 err |= get_user(ifr.ifr_map.irq, &uifmap32->irq);
3079 err |= __get_user(ifr.ifr_map.dma, &uifmap32->dma); 3070 err |= get_user(ifr.ifr_map.dma, &uifmap32->dma);
3080 err |= __get_user(ifr.ifr_map.port, &uifmap32->port); 3071 err |= get_user(ifr.ifr_map.port, &uifmap32->port);
3081 if (err) 3072 if (err)
3082 return -EFAULT; 3073 return -EFAULT;
3083 3074
@@ -3088,12 +3079,12 @@ static int compat_sioc_ifmap(struct net *net, unsigned int cmd,
3088 3079
3089 if (cmd == SIOCGIFMAP && !err) { 3080 if (cmd == SIOCGIFMAP && !err) {
3090 err = copy_to_user(uifr32, &ifr, sizeof(ifr.ifr_name)); 3081 err = copy_to_user(uifr32, &ifr, sizeof(ifr.ifr_name));
3091 err |= __put_user(ifr.ifr_map.mem_start, &uifmap32->mem_start); 3082 err |= put_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
3092 err |= __put_user(ifr.ifr_map.mem_end, &uifmap32->mem_end); 3083 err |= put_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
3093 err |= __put_user(ifr.ifr_map.base_addr, &uifmap32->base_addr); 3084 err |= put_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
3094 err |= __put_user(ifr.ifr_map.irq, &uifmap32->irq); 3085 err |= put_user(ifr.ifr_map.irq, &uifmap32->irq);
3095 err |= __put_user(ifr.ifr_map.dma, &uifmap32->dma); 3086 err |= put_user(ifr.ifr_map.dma, &uifmap32->dma);
3096 err |= __put_user(ifr.ifr_map.port, &uifmap32->port); 3087 err |= put_user(ifr.ifr_map.port, &uifmap32->port);
3097 if (err) 3088 if (err)
3098 err = -EFAULT; 3089 err = -EFAULT;
3099 } 3090 }
@@ -3167,25 +3158,25 @@ static int routing_ioctl(struct net *net, struct socket *sock,
3167 struct in6_rtmsg32 __user *ur6 = argp; 3158 struct in6_rtmsg32 __user *ur6 = argp;
3168 ret = copy_from_user(&r6.rtmsg_dst, &(ur6->rtmsg_dst), 3159 ret = copy_from_user(&r6.rtmsg_dst, &(ur6->rtmsg_dst),
3169 3 * sizeof(struct in6_addr)); 3160 3 * sizeof(struct in6_addr));
3170 ret |= __get_user(r6.rtmsg_type, &(ur6->rtmsg_type)); 3161 ret |= get_user(r6.rtmsg_type, &(ur6->rtmsg_type));
3171 ret |= __get_user(r6.rtmsg_dst_len, &(ur6->rtmsg_dst_len)); 3162 ret |= get_user(r6.rtmsg_dst_len, &(ur6->rtmsg_dst_len));
3172 ret |= __get_user(r6.rtmsg_src_len, &(ur6->rtmsg_src_len)); 3163 ret |= get_user(r6.rtmsg_src_len, &(ur6->rtmsg_src_len));
3173 ret |= __get_user(r6.rtmsg_metric, &(ur6->rtmsg_metric)); 3164 ret |= get_user(r6.rtmsg_metric, &(ur6->rtmsg_metric));
3174 ret |= __get_user(r6.rtmsg_info, &(ur6->rtmsg_info)); 3165 ret |= get_user(r6.rtmsg_info, &(ur6->rtmsg_info));
3175 ret |= __get_user(r6.rtmsg_flags, &(ur6->rtmsg_flags)); 3166 ret |= get_user(r6.rtmsg_flags, &(ur6->rtmsg_flags));
3176 ret |= __get_user(r6.rtmsg_ifindex, &(ur6->rtmsg_ifindex)); 3167 ret |= get_user(r6.rtmsg_ifindex, &(ur6->rtmsg_ifindex));
3177 3168
3178 r = (void *) &r6; 3169 r = (void *) &r6;
3179 } else { /* ipv4 */ 3170 } else { /* ipv4 */
3180 struct rtentry32 __user *ur4 = argp; 3171 struct rtentry32 __user *ur4 = argp;
3181 ret = copy_from_user(&r4.rt_dst, &(ur4->rt_dst), 3172 ret = copy_from_user(&r4.rt_dst, &(ur4->rt_dst),
3182 3 * sizeof(struct sockaddr)); 3173 3 * sizeof(struct sockaddr));
3183 ret |= __get_user(r4.rt_flags, &(ur4->rt_flags)); 3174 ret |= get_user(r4.rt_flags, &(ur4->rt_flags));
3184 ret |= __get_user(r4.rt_metric, &(ur4->rt_metric)); 3175 ret |= get_user(r4.rt_metric, &(ur4->rt_metric));
3185 ret |= __get_user(r4.rt_mtu, &(ur4->rt_mtu)); 3176 ret |= get_user(r4.rt_mtu, &(ur4->rt_mtu));
3186 ret |= __get_user(r4.rt_window, &(ur4->rt_window)); 3177 ret |= get_user(r4.rt_window, &(ur4->rt_window));
3187 ret |= __get_user(r4.rt_irtt, &(ur4->rt_irtt)); 3178 ret |= get_user(r4.rt_irtt, &(ur4->rt_irtt));
3188 ret |= __get_user(rtdev, &(ur4->rt_dev)); 3179 ret |= get_user(rtdev, &(ur4->rt_dev));
3189 if (rtdev) { 3180 if (rtdev) {
3190 ret |= copy_from_user(devname, compat_ptr(rtdev), 15); 3181 ret |= copy_from_user(devname, compat_ptr(rtdev), 15);
3191 r4.rt_dev = (char __user __force *)devname; 3182 r4.rt_dev = (char __user __force *)devname;
diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index ed2fdd210c0b..5285ead196c0 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -250,11 +250,11 @@ rpcauth_list_flavors(rpc_authflavor_t *array, int size)
250EXPORT_SYMBOL_GPL(rpcauth_list_flavors); 250EXPORT_SYMBOL_GPL(rpcauth_list_flavors);
251 251
252struct rpc_auth * 252struct rpc_auth *
253rpcauth_create(rpc_authflavor_t pseudoflavor, struct rpc_clnt *clnt) 253rpcauth_create(struct rpc_auth_create_args *args, struct rpc_clnt *clnt)
254{ 254{
255 struct rpc_auth *auth; 255 struct rpc_auth *auth;
256 const struct rpc_authops *ops; 256 const struct rpc_authops *ops;
257 u32 flavor = pseudoflavor_to_flavor(pseudoflavor); 257 u32 flavor = pseudoflavor_to_flavor(args->pseudoflavor);
258 258
259 auth = ERR_PTR(-EINVAL); 259 auth = ERR_PTR(-EINVAL);
260 if (flavor >= RPC_AUTH_MAXFLAVOR) 260 if (flavor >= RPC_AUTH_MAXFLAVOR)
@@ -269,7 +269,7 @@ rpcauth_create(rpc_authflavor_t pseudoflavor, struct rpc_clnt *clnt)
269 goto out; 269 goto out;
270 } 270 }
271 spin_unlock(&rpc_authflavor_lock); 271 spin_unlock(&rpc_authflavor_lock);
272 auth = ops->create(clnt, pseudoflavor); 272 auth = ops->create(args, clnt);
273 module_put(ops->owner); 273 module_put(ops->owner);
274 if (IS_ERR(auth)) 274 if (IS_ERR(auth))
275 return auth; 275 return auth;
@@ -343,6 +343,27 @@ out_nocache:
343EXPORT_SYMBOL_GPL(rpcauth_init_credcache); 343EXPORT_SYMBOL_GPL(rpcauth_init_credcache);
344 344
345/* 345/*
346 * Setup a credential key lifetime timeout notification
347 */
348int
349rpcauth_key_timeout_notify(struct rpc_auth *auth, struct rpc_cred *cred)
350{
351 if (!cred->cr_auth->au_ops->key_timeout)
352 return 0;
353 return cred->cr_auth->au_ops->key_timeout(auth, cred);
354}
355EXPORT_SYMBOL_GPL(rpcauth_key_timeout_notify);
356
357bool
358rpcauth_cred_key_to_expire(struct rpc_cred *cred)
359{
360 if (!cred->cr_ops->crkey_to_expire)
361 return false;
362 return cred->cr_ops->crkey_to_expire(cred);
363}
364EXPORT_SYMBOL_GPL(rpcauth_cred_key_to_expire);
365
366/*
346 * Destroy a list of credentials 367 * Destroy a list of credentials
347 */ 368 */
348static inline 369static inline
@@ -413,12 +434,13 @@ EXPORT_SYMBOL_GPL(rpcauth_destroy_credcache);
413/* 434/*
414 * Remove stale credentials. Avoid sleeping inside the loop. 435 * Remove stale credentials. Avoid sleeping inside the loop.
415 */ 436 */
416static int 437static long
417rpcauth_prune_expired(struct list_head *free, int nr_to_scan) 438rpcauth_prune_expired(struct list_head *free, int nr_to_scan)
418{ 439{
419 spinlock_t *cache_lock; 440 spinlock_t *cache_lock;
420 struct rpc_cred *cred, *next; 441 struct rpc_cred *cred, *next;
421 unsigned long expired = jiffies - RPC_AUTH_EXPIRY_MORATORIUM; 442 unsigned long expired = jiffies - RPC_AUTH_EXPIRY_MORATORIUM;
443 long freed = 0;
422 444
423 list_for_each_entry_safe(cred, next, &cred_unused, cr_lru) { 445 list_for_each_entry_safe(cred, next, &cred_unused, cr_lru) {
424 446
@@ -430,10 +452,11 @@ rpcauth_prune_expired(struct list_head *free, int nr_to_scan)
430 */ 452 */
431 if (time_in_range(cred->cr_expire, expired, jiffies) && 453 if (time_in_range(cred->cr_expire, expired, jiffies) &&
432 test_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags) != 0) 454 test_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags) != 0)
433 return 0; 455 break;
434 456
435 list_del_init(&cred->cr_lru); 457 list_del_init(&cred->cr_lru);
436 number_cred_unused--; 458 number_cred_unused--;
459 freed++;
437 if (atomic_read(&cred->cr_count) != 0) 460 if (atomic_read(&cred->cr_count) != 0)
438 continue; 461 continue;
439 462
@@ -446,29 +469,39 @@ rpcauth_prune_expired(struct list_head *free, int nr_to_scan)
446 } 469 }
447 spin_unlock(cache_lock); 470 spin_unlock(cache_lock);
448 } 471 }
449 return (number_cred_unused / 100) * sysctl_vfs_cache_pressure; 472 return freed;
450} 473}
451 474
452/* 475/*
453 * Run memory cache shrinker. 476 * Run memory cache shrinker.
454 */ 477 */
455static int 478static unsigned long
456rpcauth_cache_shrinker(struct shrinker *shrink, struct shrink_control *sc) 479rpcauth_cache_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
480
457{ 481{
458 LIST_HEAD(free); 482 LIST_HEAD(free);
459 int res; 483 unsigned long freed;
460 int nr_to_scan = sc->nr_to_scan;
461 gfp_t gfp_mask = sc->gfp_mask;
462 484
463 if ((gfp_mask & GFP_KERNEL) != GFP_KERNEL) 485 if ((sc->gfp_mask & GFP_KERNEL) != GFP_KERNEL)
464 return (nr_to_scan == 0) ? 0 : -1; 486 return SHRINK_STOP;
487
488 /* nothing left, don't come back */
465 if (list_empty(&cred_unused)) 489 if (list_empty(&cred_unused))
466 return 0; 490 return SHRINK_STOP;
491
467 spin_lock(&rpc_credcache_lock); 492 spin_lock(&rpc_credcache_lock);
468 res = rpcauth_prune_expired(&free, nr_to_scan); 493 freed = rpcauth_prune_expired(&free, sc->nr_to_scan);
469 spin_unlock(&rpc_credcache_lock); 494 spin_unlock(&rpc_credcache_lock);
470 rpcauth_destroy_credlist(&free); 495 rpcauth_destroy_credlist(&free);
471 return res; 496
497 return freed;
498}
499
500static unsigned long
501rpcauth_cache_shrink_count(struct shrinker *shrink, struct shrink_control *sc)
502
503{
504 return (number_cred_unused / 100) * sysctl_vfs_cache_pressure;
472} 505}
473 506
474/* 507/*
@@ -784,7 +817,8 @@ rpcauth_uptodatecred(struct rpc_task *task)
784} 817}
785 818
786static struct shrinker rpc_cred_shrinker = { 819static struct shrinker rpc_cred_shrinker = {
787 .shrink = rpcauth_cache_shrinker, 820 .count_objects = rpcauth_cache_shrink_count,
821 .scan_objects = rpcauth_cache_shrink_scan,
788 .seeks = DEFAULT_SEEKS, 822 .seeks = DEFAULT_SEEKS,
789}; 823};
790 824
diff --git a/net/sunrpc/auth_generic.c b/net/sunrpc/auth_generic.c
index b6badafc6494..ed04869b2d4f 100644
--- a/net/sunrpc/auth_generic.c
+++ b/net/sunrpc/auth_generic.c
@@ -89,6 +89,7 @@ generic_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags)
89 gcred->acred.uid = acred->uid; 89 gcred->acred.uid = acred->uid;
90 gcred->acred.gid = acred->gid; 90 gcred->acred.gid = acred->gid;
91 gcred->acred.group_info = acred->group_info; 91 gcred->acred.group_info = acred->group_info;
92 gcred->acred.ac_flags = 0;
92 if (gcred->acred.group_info != NULL) 93 if (gcred->acred.group_info != NULL)
93 get_group_info(gcred->acred.group_info); 94 get_group_info(gcred->acred.group_info);
94 gcred->acred.machine_cred = acred->machine_cred; 95 gcred->acred.machine_cred = acred->machine_cred;
@@ -182,11 +183,78 @@ void rpc_destroy_generic_auth(void)
182 rpcauth_destroy_credcache(&generic_auth); 183 rpcauth_destroy_credcache(&generic_auth);
183} 184}
184 185
186/*
187 * Test the the current time (now) against the underlying credential key expiry
188 * minus a timeout and setup notification.
189 *
190 * The normal case:
191 * If 'now' is before the key expiry minus RPC_KEY_EXPIRE_TIMEO, set
192 * the RPC_CRED_NOTIFY_TIMEOUT flag to setup the underlying credential
193 * rpc_credops crmatch routine to notify this generic cred when it's key
194 * expiration is within RPC_KEY_EXPIRE_TIMEO, and return 0.
195 *
196 * The error case:
197 * If the underlying cred lookup fails, return -EACCES.
198 *
199 * The 'almost' error case:
200 * If 'now' is within key expiry minus RPC_KEY_EXPIRE_TIMEO, but not within
201 * key expiry minus RPC_KEY_EXPIRE_FAIL, set the RPC_CRED_EXPIRE_SOON bit
202 * on the acred ac_flags and return 0.
203 */
204static int
205generic_key_timeout(struct rpc_auth *auth, struct rpc_cred *cred)
206{
207 struct auth_cred *acred = &container_of(cred, struct generic_cred,
208 gc_base)->acred;
209 struct rpc_cred *tcred;
210 int ret = 0;
211
212
213 /* Fast track for non crkey_timeout (no key) underlying credentials */
214 if (test_bit(RPC_CRED_NO_CRKEY_TIMEOUT, &acred->ac_flags))
215 return 0;
216
217 /* Fast track for the normal case */
218 if (test_bit(RPC_CRED_NOTIFY_TIMEOUT, &acred->ac_flags))
219 return 0;
220
221 /* lookup_cred either returns a valid referenced rpc_cred, or PTR_ERR */
222 tcred = auth->au_ops->lookup_cred(auth, acred, 0);
223 if (IS_ERR(tcred))
224 return -EACCES;
225
226 if (!tcred->cr_ops->crkey_timeout) {
227 set_bit(RPC_CRED_NO_CRKEY_TIMEOUT, &acred->ac_flags);
228 ret = 0;
229 goto out_put;
230 }
231
232 /* Test for the almost error case */
233 ret = tcred->cr_ops->crkey_timeout(tcred);
234 if (ret != 0) {
235 set_bit(RPC_CRED_KEY_EXPIRE_SOON, &acred->ac_flags);
236 ret = 0;
237 } else {
238 /* In case underlying cred key has been reset */
239 if (test_and_clear_bit(RPC_CRED_KEY_EXPIRE_SOON,
240 &acred->ac_flags))
241 dprintk("RPC: UID %d Credential key reset\n",
242 from_kuid(&init_user_ns, tcred->cr_uid));
243 /* set up fasttrack for the normal case */
244 set_bit(RPC_CRED_NOTIFY_TIMEOUT, &acred->ac_flags);
245 }
246
247out_put:
248 put_rpccred(tcred);
249 return ret;
250}
251
185static const struct rpc_authops generic_auth_ops = { 252static const struct rpc_authops generic_auth_ops = {
186 .owner = THIS_MODULE, 253 .owner = THIS_MODULE,
187 .au_name = "Generic", 254 .au_name = "Generic",
188 .lookup_cred = generic_lookup_cred, 255 .lookup_cred = generic_lookup_cred,
189 .crcreate = generic_create_cred, 256 .crcreate = generic_create_cred,
257 .key_timeout = generic_key_timeout,
190}; 258};
191 259
192static struct rpc_auth generic_auth = { 260static struct rpc_auth generic_auth = {
@@ -194,9 +262,23 @@ static struct rpc_auth generic_auth = {
194 .au_count = ATOMIC_INIT(0), 262 .au_count = ATOMIC_INIT(0),
195}; 263};
196 264
265static bool generic_key_to_expire(struct rpc_cred *cred)
266{
267 struct auth_cred *acred = &container_of(cred, struct generic_cred,
268 gc_base)->acred;
269 bool ret;
270
271 get_rpccred(cred);
272 ret = test_bit(RPC_CRED_KEY_EXPIRE_SOON, &acred->ac_flags);
273 put_rpccred(cred);
274
275 return ret;
276}
277
197static const struct rpc_credops generic_credops = { 278static const struct rpc_credops generic_credops = {
198 .cr_name = "Generic cred", 279 .cr_name = "Generic cred",
199 .crdestroy = generic_destroy_cred, 280 .crdestroy = generic_destroy_cred,
200 .crbind = generic_bind_cred, 281 .crbind = generic_bind_cred,
201 .crmatch = generic_match, 282 .crmatch = generic_match,
283 .crkey_to_expire = generic_key_to_expire,
202}; 284};
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index fc2f78d6a9b4..fcac5d141717 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -51,6 +51,7 @@
51#include <linux/sunrpc/rpc_pipe_fs.h> 51#include <linux/sunrpc/rpc_pipe_fs.h>
52#include <linux/sunrpc/gss_api.h> 52#include <linux/sunrpc/gss_api.h>
53#include <asm/uaccess.h> 53#include <asm/uaccess.h>
54#include <linux/hashtable.h>
54 55
55#include "../netns.h" 56#include "../netns.h"
56 57
@@ -62,6 +63,9 @@ static const struct rpc_credops gss_nullops;
62#define GSS_RETRY_EXPIRED 5 63#define GSS_RETRY_EXPIRED 5
63static unsigned int gss_expired_cred_retry_delay = GSS_RETRY_EXPIRED; 64static unsigned int gss_expired_cred_retry_delay = GSS_RETRY_EXPIRED;
64 65
66#define GSS_KEY_EXPIRE_TIMEO 240
67static unsigned int gss_key_expire_timeo = GSS_KEY_EXPIRE_TIMEO;
68
65#ifdef RPC_DEBUG 69#ifdef RPC_DEBUG
66# define RPCDBG_FACILITY RPCDBG_AUTH 70# define RPCDBG_FACILITY RPCDBG_AUTH
67#endif 71#endif
@@ -71,19 +75,33 @@ static unsigned int gss_expired_cred_retry_delay = GSS_RETRY_EXPIRED;
71 * using integrity (two 4-byte integers): */ 75 * using integrity (two 4-byte integers): */
72#define GSS_VERF_SLACK 100 76#define GSS_VERF_SLACK 100
73 77
78static DEFINE_HASHTABLE(gss_auth_hash_table, 4);
79static DEFINE_SPINLOCK(gss_auth_hash_lock);
80
81struct gss_pipe {
82 struct rpc_pipe_dir_object pdo;
83 struct rpc_pipe *pipe;
84 struct rpc_clnt *clnt;
85 const char *name;
86 struct kref kref;
87};
88
74struct gss_auth { 89struct gss_auth {
75 struct kref kref; 90 struct kref kref;
91 struct hlist_node hash;
76 struct rpc_auth rpc_auth; 92 struct rpc_auth rpc_auth;
77 struct gss_api_mech *mech; 93 struct gss_api_mech *mech;
78 enum rpc_gss_svc service; 94 enum rpc_gss_svc service;
79 struct rpc_clnt *client; 95 struct rpc_clnt *client;
96 struct net *net;
80 /* 97 /*
81 * There are two upcall pipes; dentry[1], named "gssd", is used 98 * There are two upcall pipes; dentry[1], named "gssd", is used
82 * for the new text-based upcall; dentry[0] is named after the 99 * for the new text-based upcall; dentry[0] is named after the
83 * mechanism (for example, "krb5") and exists for 100 * mechanism (for example, "krb5") and exists for
84 * backwards-compatibility with older gssd's. 101 * backwards-compatibility with older gssd's.
85 */ 102 */
86 struct rpc_pipe *pipe[2]; 103 struct gss_pipe *gss_pipe[2];
104 const char *target_name;
87}; 105};
88 106
89/* pipe_version >= 0 if and only if someone has a pipe open. */ 107/* pipe_version >= 0 if and only if someone has a pipe open. */
@@ -294,7 +312,7 @@ static void put_pipe_version(struct net *net)
294static void 312static void
295gss_release_msg(struct gss_upcall_msg *gss_msg) 313gss_release_msg(struct gss_upcall_msg *gss_msg)
296{ 314{
297 struct net *net = rpc_net_ns(gss_msg->auth->client); 315 struct net *net = gss_msg->auth->net;
298 if (!atomic_dec_and_test(&gss_msg->count)) 316 if (!atomic_dec_and_test(&gss_msg->count))
299 return; 317 return;
300 put_pipe_version(net); 318 put_pipe_version(net);
@@ -406,8 +424,8 @@ static void gss_encode_v0_msg(struct gss_upcall_msg *gss_msg)
406} 424}
407 425
408static void gss_encode_v1_msg(struct gss_upcall_msg *gss_msg, 426static void gss_encode_v1_msg(struct gss_upcall_msg *gss_msg,
409 struct rpc_clnt *clnt, 427 const char *service_name,
410 const char *service_name) 428 const char *target_name)
411{ 429{
412 struct gss_api_mech *mech = gss_msg->auth->mech; 430 struct gss_api_mech *mech = gss_msg->auth->mech;
413 char *p = gss_msg->databuf; 431 char *p = gss_msg->databuf;
@@ -417,8 +435,8 @@ static void gss_encode_v1_msg(struct gss_upcall_msg *gss_msg,
417 mech->gm_name, 435 mech->gm_name,
418 from_kuid(&init_user_ns, gss_msg->uid)); 436 from_kuid(&init_user_ns, gss_msg->uid));
419 p += gss_msg->msg.len; 437 p += gss_msg->msg.len;
420 if (clnt->cl_principal) { 438 if (target_name) {
421 len = sprintf(p, "target=%s ", clnt->cl_principal); 439 len = sprintf(p, "target=%s ", target_name);
422 p += len; 440 p += len;
423 gss_msg->msg.len += len; 441 gss_msg->msg.len += len;
424 } 442 }
@@ -439,21 +457,8 @@ static void gss_encode_v1_msg(struct gss_upcall_msg *gss_msg,
439 BUG_ON(gss_msg->msg.len > UPCALL_BUF_LEN); 457 BUG_ON(gss_msg->msg.len > UPCALL_BUF_LEN);
440} 458}
441 459
442static void gss_encode_msg(struct gss_upcall_msg *gss_msg,
443 struct rpc_clnt *clnt,
444 const char *service_name)
445{
446 struct net *net = rpc_net_ns(clnt);
447 struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
448
449 if (sn->pipe_version == 0)
450 gss_encode_v0_msg(gss_msg);
451 else /* pipe_version == 1 */
452 gss_encode_v1_msg(gss_msg, clnt, service_name);
453}
454
455static struct gss_upcall_msg * 460static struct gss_upcall_msg *
456gss_alloc_msg(struct gss_auth *gss_auth, struct rpc_clnt *clnt, 461gss_alloc_msg(struct gss_auth *gss_auth,
457 kuid_t uid, const char *service_name) 462 kuid_t uid, const char *service_name)
458{ 463{
459 struct gss_upcall_msg *gss_msg; 464 struct gss_upcall_msg *gss_msg;
@@ -462,31 +467,36 @@ gss_alloc_msg(struct gss_auth *gss_auth, struct rpc_clnt *clnt,
462 gss_msg = kzalloc(sizeof(*gss_msg), GFP_NOFS); 467 gss_msg = kzalloc(sizeof(*gss_msg), GFP_NOFS);
463 if (gss_msg == NULL) 468 if (gss_msg == NULL)
464 return ERR_PTR(-ENOMEM); 469 return ERR_PTR(-ENOMEM);
465 vers = get_pipe_version(rpc_net_ns(clnt)); 470 vers = get_pipe_version(gss_auth->net);
466 if (vers < 0) { 471 if (vers < 0) {
467 kfree(gss_msg); 472 kfree(gss_msg);
468 return ERR_PTR(vers); 473 return ERR_PTR(vers);
469 } 474 }
470 gss_msg->pipe = gss_auth->pipe[vers]; 475 gss_msg->pipe = gss_auth->gss_pipe[vers]->pipe;
471 INIT_LIST_HEAD(&gss_msg->list); 476 INIT_LIST_HEAD(&gss_msg->list);
472 rpc_init_wait_queue(&gss_msg->rpc_waitqueue, "RPCSEC_GSS upcall waitq"); 477 rpc_init_wait_queue(&gss_msg->rpc_waitqueue, "RPCSEC_GSS upcall waitq");
473 init_waitqueue_head(&gss_msg->waitqueue); 478 init_waitqueue_head(&gss_msg->waitqueue);
474 atomic_set(&gss_msg->count, 1); 479 atomic_set(&gss_msg->count, 1);
475 gss_msg->uid = uid; 480 gss_msg->uid = uid;
476 gss_msg->auth = gss_auth; 481 gss_msg->auth = gss_auth;
477 gss_encode_msg(gss_msg, clnt, service_name); 482 switch (vers) {
483 case 0:
484 gss_encode_v0_msg(gss_msg);
485 default:
486 gss_encode_v1_msg(gss_msg, service_name, gss_auth->target_name);
487 };
478 return gss_msg; 488 return gss_msg;
479} 489}
480 490
481static struct gss_upcall_msg * 491static struct gss_upcall_msg *
482gss_setup_upcall(struct rpc_clnt *clnt, struct gss_auth *gss_auth, struct rpc_cred *cred) 492gss_setup_upcall(struct gss_auth *gss_auth, struct rpc_cred *cred)
483{ 493{
484 struct gss_cred *gss_cred = container_of(cred, 494 struct gss_cred *gss_cred = container_of(cred,
485 struct gss_cred, gc_base); 495 struct gss_cred, gc_base);
486 struct gss_upcall_msg *gss_new, *gss_msg; 496 struct gss_upcall_msg *gss_new, *gss_msg;
487 kuid_t uid = cred->cr_uid; 497 kuid_t uid = cred->cr_uid;
488 498
489 gss_new = gss_alloc_msg(gss_auth, clnt, uid, gss_cred->gc_principal); 499 gss_new = gss_alloc_msg(gss_auth, uid, gss_cred->gc_principal);
490 if (IS_ERR(gss_new)) 500 if (IS_ERR(gss_new))
491 return gss_new; 501 return gss_new;
492 gss_msg = gss_add_msg(gss_new); 502 gss_msg = gss_add_msg(gss_new);
@@ -527,7 +537,7 @@ gss_refresh_upcall(struct rpc_task *task)
527 537
528 dprintk("RPC: %5u %s for uid %u\n", 538 dprintk("RPC: %5u %s for uid %u\n",
529 task->tk_pid, __func__, from_kuid(&init_user_ns, cred->cr_uid)); 539 task->tk_pid, __func__, from_kuid(&init_user_ns, cred->cr_uid));
530 gss_msg = gss_setup_upcall(task->tk_client, gss_auth, cred); 540 gss_msg = gss_setup_upcall(gss_auth, cred);
531 if (PTR_ERR(gss_msg) == -EAGAIN) { 541 if (PTR_ERR(gss_msg) == -EAGAIN) {
532 /* XXX: warning on the first, under the assumption we 542 /* XXX: warning on the first, under the assumption we
533 * shouldn't normally hit this case on a refresh. */ 543 * shouldn't normally hit this case on a refresh. */
@@ -566,7 +576,7 @@ out:
566static inline int 576static inline int
567gss_create_upcall(struct gss_auth *gss_auth, struct gss_cred *gss_cred) 577gss_create_upcall(struct gss_auth *gss_auth, struct gss_cred *gss_cred)
568{ 578{
569 struct net *net = rpc_net_ns(gss_auth->client); 579 struct net *net = gss_auth->net;
570 struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); 580 struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
571 struct rpc_pipe *pipe; 581 struct rpc_pipe *pipe;
572 struct rpc_cred *cred = &gss_cred->gc_base; 582 struct rpc_cred *cred = &gss_cred->gc_base;
@@ -583,7 +593,7 @@ retry:
583 timeout = 15 * HZ; 593 timeout = 15 * HZ;
584 if (!sn->gssd_running) 594 if (!sn->gssd_running)
585 timeout = HZ >> 2; 595 timeout = HZ >> 2;
586 gss_msg = gss_setup_upcall(gss_auth->client, gss_auth, cred); 596 gss_msg = gss_setup_upcall(gss_auth, cred);
587 if (PTR_ERR(gss_msg) == -EAGAIN) { 597 if (PTR_ERR(gss_msg) == -EAGAIN) {
588 err = wait_event_interruptible_timeout(pipe_version_waitqueue, 598 err = wait_event_interruptible_timeout(pipe_version_waitqueue,
589 sn->pipe_version >= 0, timeout); 599 sn->pipe_version >= 0, timeout);
@@ -797,83 +807,153 @@ gss_pipe_destroy_msg(struct rpc_pipe_msg *msg)
797 } 807 }
798} 808}
799 809
800static void gss_pipes_dentries_destroy(struct rpc_auth *auth) 810static void gss_pipe_dentry_destroy(struct dentry *dir,
811 struct rpc_pipe_dir_object *pdo)
801{ 812{
802 struct gss_auth *gss_auth; 813 struct gss_pipe *gss_pipe = pdo->pdo_data;
814 struct rpc_pipe *pipe = gss_pipe->pipe;
803 815
804 gss_auth = container_of(auth, struct gss_auth, rpc_auth); 816 if (pipe->dentry != NULL) {
805 if (gss_auth->pipe[0]->dentry) 817 rpc_unlink(pipe->dentry);
806 rpc_unlink(gss_auth->pipe[0]->dentry); 818 pipe->dentry = NULL;
807 if (gss_auth->pipe[1]->dentry) 819 }
808 rpc_unlink(gss_auth->pipe[1]->dentry);
809} 820}
810 821
811static int gss_pipes_dentries_create(struct rpc_auth *auth) 822static int gss_pipe_dentry_create(struct dentry *dir,
823 struct rpc_pipe_dir_object *pdo)
812{ 824{
813 int err; 825 struct gss_pipe *p = pdo->pdo_data;
814 struct gss_auth *gss_auth; 826 struct dentry *dentry;
815 struct rpc_clnt *clnt;
816 827
817 gss_auth = container_of(auth, struct gss_auth, rpc_auth); 828 dentry = rpc_mkpipe_dentry(dir, p->name, p->clnt, p->pipe);
818 clnt = gss_auth->client; 829 if (IS_ERR(dentry))
819 830 return PTR_ERR(dentry);
820 gss_auth->pipe[1]->dentry = rpc_mkpipe_dentry(clnt->cl_dentry, 831 p->pipe->dentry = dentry;
821 "gssd",
822 clnt, gss_auth->pipe[1]);
823 if (IS_ERR(gss_auth->pipe[1]->dentry))
824 return PTR_ERR(gss_auth->pipe[1]->dentry);
825 gss_auth->pipe[0]->dentry = rpc_mkpipe_dentry(clnt->cl_dentry,
826 gss_auth->mech->gm_name,
827 clnt, gss_auth->pipe[0]);
828 if (IS_ERR(gss_auth->pipe[0]->dentry)) {
829 err = PTR_ERR(gss_auth->pipe[0]->dentry);
830 goto err_unlink_pipe_1;
831 }
832 return 0; 832 return 0;
833}
833 834
834err_unlink_pipe_1: 835static const struct rpc_pipe_dir_object_ops gss_pipe_dir_object_ops = {
835 rpc_unlink(gss_auth->pipe[1]->dentry); 836 .create = gss_pipe_dentry_create,
836 return err; 837 .destroy = gss_pipe_dentry_destroy,
838};
839
840static struct gss_pipe *gss_pipe_alloc(struct rpc_clnt *clnt,
841 const char *name,
842 const struct rpc_pipe_ops *upcall_ops)
843{
844 struct gss_pipe *p;
845 int err = -ENOMEM;
846
847 p = kmalloc(sizeof(*p), GFP_KERNEL);
848 if (p == NULL)
849 goto err;
850 p->pipe = rpc_mkpipe_data(upcall_ops, RPC_PIPE_WAIT_FOR_OPEN);
851 if (IS_ERR(p->pipe)) {
852 err = PTR_ERR(p->pipe);
853 goto err_free_gss_pipe;
854 }
855 p->name = name;
856 p->clnt = clnt;
857 kref_init(&p->kref);
858 rpc_init_pipe_dir_object(&p->pdo,
859 &gss_pipe_dir_object_ops,
860 p);
861 return p;
862err_free_gss_pipe:
863 kfree(p);
864err:
865 return ERR_PTR(err);
866}
867
868struct gss_alloc_pdo {
869 struct rpc_clnt *clnt;
870 const char *name;
871 const struct rpc_pipe_ops *upcall_ops;
872};
873
874static int gss_pipe_match_pdo(struct rpc_pipe_dir_object *pdo, void *data)
875{
876 struct gss_pipe *gss_pipe;
877 struct gss_alloc_pdo *args = data;
878
879 if (pdo->pdo_ops != &gss_pipe_dir_object_ops)
880 return 0;
881 gss_pipe = container_of(pdo, struct gss_pipe, pdo);
882 if (strcmp(gss_pipe->name, args->name) != 0)
883 return 0;
884 if (!kref_get_unless_zero(&gss_pipe->kref))
885 return 0;
886 return 1;
837} 887}
838 888
839static void gss_pipes_dentries_destroy_net(struct rpc_clnt *clnt, 889static struct rpc_pipe_dir_object *gss_pipe_alloc_pdo(void *data)
840 struct rpc_auth *auth) 890{
891 struct gss_pipe *gss_pipe;
892 struct gss_alloc_pdo *args = data;
893
894 gss_pipe = gss_pipe_alloc(args->clnt, args->name, args->upcall_ops);
895 if (!IS_ERR(gss_pipe))
896 return &gss_pipe->pdo;
897 return NULL;
898}
899
900static struct gss_pipe *gss_pipe_get(struct rpc_clnt *clnt,
901 const char *name,
902 const struct rpc_pipe_ops *upcall_ops)
841{ 903{
842 struct net *net = rpc_net_ns(clnt); 904 struct net *net = rpc_net_ns(clnt);
843 struct super_block *sb; 905 struct rpc_pipe_dir_object *pdo;
906 struct gss_alloc_pdo args = {
907 .clnt = clnt,
908 .name = name,
909 .upcall_ops = upcall_ops,
910 };
844 911
845 sb = rpc_get_sb_net(net); 912 pdo = rpc_find_or_alloc_pipe_dir_object(net,
846 if (sb) { 913 &clnt->cl_pipedir_objects,
847 if (clnt->cl_dentry) 914 gss_pipe_match_pdo,
848 gss_pipes_dentries_destroy(auth); 915 gss_pipe_alloc_pdo,
849 rpc_put_sb_net(net); 916 &args);
850 } 917 if (pdo != NULL)
918 return container_of(pdo, struct gss_pipe, pdo);
919 return ERR_PTR(-ENOMEM);
851} 920}
852 921
853static int gss_pipes_dentries_create_net(struct rpc_clnt *clnt, 922static void __gss_pipe_free(struct gss_pipe *p)
854 struct rpc_auth *auth)
855{ 923{
924 struct rpc_clnt *clnt = p->clnt;
856 struct net *net = rpc_net_ns(clnt); 925 struct net *net = rpc_net_ns(clnt);
857 struct super_block *sb;
858 int err = 0;
859 926
860 sb = rpc_get_sb_net(net); 927 rpc_remove_pipe_dir_object(net,
861 if (sb) { 928 &clnt->cl_pipedir_objects,
862 if (clnt->cl_dentry) 929 &p->pdo);
863 err = gss_pipes_dentries_create(auth); 930 rpc_destroy_pipe_data(p->pipe);
864 rpc_put_sb_net(net); 931 kfree(p);
865 } 932}
866 return err; 933
934static void __gss_pipe_release(struct kref *kref)
935{
936 struct gss_pipe *p = container_of(kref, struct gss_pipe, kref);
937
938 __gss_pipe_free(p);
939}
940
941static void gss_pipe_free(struct gss_pipe *p)
942{
943 if (p != NULL)
944 kref_put(&p->kref, __gss_pipe_release);
867} 945}
868 946
869/* 947/*
870 * NOTE: we have the opportunity to use different 948 * NOTE: we have the opportunity to use different
871 * parameters based on the input flavor (which must be a pseudoflavor) 949 * parameters based on the input flavor (which must be a pseudoflavor)
872 */ 950 */
873static struct rpc_auth * 951static struct gss_auth *
874gss_create(struct rpc_clnt *clnt, rpc_authflavor_t flavor) 952gss_create_new(struct rpc_auth_create_args *args, struct rpc_clnt *clnt)
875{ 953{
954 rpc_authflavor_t flavor = args->pseudoflavor;
876 struct gss_auth *gss_auth; 955 struct gss_auth *gss_auth;
956 struct gss_pipe *gss_pipe;
877 struct rpc_auth * auth; 957 struct rpc_auth * auth;
878 int err = -ENOMEM; /* XXX? */ 958 int err = -ENOMEM; /* XXX? */
879 959
@@ -883,12 +963,20 @@ gss_create(struct rpc_clnt *clnt, rpc_authflavor_t flavor)
883 return ERR_PTR(err); 963 return ERR_PTR(err);
884 if (!(gss_auth = kmalloc(sizeof(*gss_auth), GFP_KERNEL))) 964 if (!(gss_auth = kmalloc(sizeof(*gss_auth), GFP_KERNEL)))
885 goto out_dec; 965 goto out_dec;
966 INIT_HLIST_NODE(&gss_auth->hash);
967 gss_auth->target_name = NULL;
968 if (args->target_name) {
969 gss_auth->target_name = kstrdup(args->target_name, GFP_KERNEL);
970 if (gss_auth->target_name == NULL)
971 goto err_free;
972 }
886 gss_auth->client = clnt; 973 gss_auth->client = clnt;
974 gss_auth->net = get_net(rpc_net_ns(clnt));
887 err = -EINVAL; 975 err = -EINVAL;
888 gss_auth->mech = gss_mech_get_by_pseudoflavor(flavor); 976 gss_auth->mech = gss_mech_get_by_pseudoflavor(flavor);
889 if (!gss_auth->mech) { 977 if (!gss_auth->mech) {
890 dprintk("RPC: Pseudoflavor %d not found!\n", flavor); 978 dprintk("RPC: Pseudoflavor %d not found!\n", flavor);
891 goto err_free; 979 goto err_put_net;
892 } 980 }
893 gss_auth->service = gss_pseudoflavor_to_service(gss_auth->mech, flavor); 981 gss_auth->service = gss_pseudoflavor_to_service(gss_auth->mech, flavor);
894 if (gss_auth->service == 0) 982 if (gss_auth->service == 0)
@@ -901,42 +989,41 @@ gss_create(struct rpc_clnt *clnt, rpc_authflavor_t flavor)
901 atomic_set(&auth->au_count, 1); 989 atomic_set(&auth->au_count, 1);
902 kref_init(&gss_auth->kref); 990 kref_init(&gss_auth->kref);
903 991
992 err = rpcauth_init_credcache(auth);
993 if (err)
994 goto err_put_mech;
904 /* 995 /*
905 * Note: if we created the old pipe first, then someone who 996 * Note: if we created the old pipe first, then someone who
906 * examined the directory at the right moment might conclude 997 * examined the directory at the right moment might conclude
907 * that we supported only the old pipe. So we instead create 998 * that we supported only the old pipe. So we instead create
908 * the new pipe first. 999 * the new pipe first.
909 */ 1000 */
910 gss_auth->pipe[1] = rpc_mkpipe_data(&gss_upcall_ops_v1, 1001 gss_pipe = gss_pipe_get(clnt, "gssd", &gss_upcall_ops_v1);
911 RPC_PIPE_WAIT_FOR_OPEN); 1002 if (IS_ERR(gss_pipe)) {
912 if (IS_ERR(gss_auth->pipe[1])) { 1003 err = PTR_ERR(gss_pipe);
913 err = PTR_ERR(gss_auth->pipe[1]); 1004 goto err_destroy_credcache;
914 goto err_put_mech;
915 } 1005 }
1006 gss_auth->gss_pipe[1] = gss_pipe;
916 1007
917 gss_auth->pipe[0] = rpc_mkpipe_data(&gss_upcall_ops_v0, 1008 gss_pipe = gss_pipe_get(clnt, gss_auth->mech->gm_name,
918 RPC_PIPE_WAIT_FOR_OPEN); 1009 &gss_upcall_ops_v0);
919 if (IS_ERR(gss_auth->pipe[0])) { 1010 if (IS_ERR(gss_pipe)) {
920 err = PTR_ERR(gss_auth->pipe[0]); 1011 err = PTR_ERR(gss_pipe);
921 goto err_destroy_pipe_1; 1012 goto err_destroy_pipe_1;
922 } 1013 }
923 err = gss_pipes_dentries_create_net(clnt, auth); 1014 gss_auth->gss_pipe[0] = gss_pipe;
924 if (err)
925 goto err_destroy_pipe_0;
926 err = rpcauth_init_credcache(auth);
927 if (err)
928 goto err_unlink_pipes;
929 1015
930 return auth; 1016 return gss_auth;
931err_unlink_pipes:
932 gss_pipes_dentries_destroy_net(clnt, auth);
933err_destroy_pipe_0:
934 rpc_destroy_pipe_data(gss_auth->pipe[0]);
935err_destroy_pipe_1: 1017err_destroy_pipe_1:
936 rpc_destroy_pipe_data(gss_auth->pipe[1]); 1018 gss_pipe_free(gss_auth->gss_pipe[1]);
1019err_destroy_credcache:
1020 rpcauth_destroy_credcache(auth);
937err_put_mech: 1021err_put_mech:
938 gss_mech_put(gss_auth->mech); 1022 gss_mech_put(gss_auth->mech);
1023err_put_net:
1024 put_net(gss_auth->net);
939err_free: 1025err_free:
1026 kfree(gss_auth->target_name);
940 kfree(gss_auth); 1027 kfree(gss_auth);
941out_dec: 1028out_dec:
942 module_put(THIS_MODULE); 1029 module_put(THIS_MODULE);
@@ -946,10 +1033,11 @@ out_dec:
946static void 1033static void
947gss_free(struct gss_auth *gss_auth) 1034gss_free(struct gss_auth *gss_auth)
948{ 1035{
949 gss_pipes_dentries_destroy_net(gss_auth->client, &gss_auth->rpc_auth); 1036 gss_pipe_free(gss_auth->gss_pipe[0]);
950 rpc_destroy_pipe_data(gss_auth->pipe[0]); 1037 gss_pipe_free(gss_auth->gss_pipe[1]);
951 rpc_destroy_pipe_data(gss_auth->pipe[1]);
952 gss_mech_put(gss_auth->mech); 1038 gss_mech_put(gss_auth->mech);
1039 put_net(gss_auth->net);
1040 kfree(gss_auth->target_name);
953 1041
954 kfree(gss_auth); 1042 kfree(gss_auth);
955 module_put(THIS_MODULE); 1043 module_put(THIS_MODULE);
@@ -966,17 +1054,101 @@ gss_free_callback(struct kref *kref)
966static void 1054static void
967gss_destroy(struct rpc_auth *auth) 1055gss_destroy(struct rpc_auth *auth)
968{ 1056{
969 struct gss_auth *gss_auth; 1057 struct gss_auth *gss_auth = container_of(auth,
1058 struct gss_auth, rpc_auth);
970 1059
971 dprintk("RPC: destroying GSS authenticator %p flavor %d\n", 1060 dprintk("RPC: destroying GSS authenticator %p flavor %d\n",
972 auth, auth->au_flavor); 1061 auth, auth->au_flavor);
973 1062
1063 if (hash_hashed(&gss_auth->hash)) {
1064 spin_lock(&gss_auth_hash_lock);
1065 hash_del(&gss_auth->hash);
1066 spin_unlock(&gss_auth_hash_lock);
1067 }
1068
1069 gss_pipe_free(gss_auth->gss_pipe[0]);
1070 gss_auth->gss_pipe[0] = NULL;
1071 gss_pipe_free(gss_auth->gss_pipe[1]);
1072 gss_auth->gss_pipe[1] = NULL;
974 rpcauth_destroy_credcache(auth); 1073 rpcauth_destroy_credcache(auth);
975 1074
976 gss_auth = container_of(auth, struct gss_auth, rpc_auth);
977 kref_put(&gss_auth->kref, gss_free_callback); 1075 kref_put(&gss_auth->kref, gss_free_callback);
978} 1076}
979 1077
1078static struct gss_auth *
1079gss_auth_find_or_add_hashed(struct rpc_auth_create_args *args,
1080 struct rpc_clnt *clnt,
1081 struct gss_auth *new)
1082{
1083 struct gss_auth *gss_auth;
1084 unsigned long hashval = (unsigned long)clnt;
1085
1086 spin_lock(&gss_auth_hash_lock);
1087 hash_for_each_possible(gss_auth_hash_table,
1088 gss_auth,
1089 hash,
1090 hashval) {
1091 if (gss_auth->rpc_auth.au_flavor != args->pseudoflavor)
1092 continue;
1093 if (gss_auth->target_name != args->target_name) {
1094 if (gss_auth->target_name == NULL)
1095 continue;
1096 if (args->target_name == NULL)
1097 continue;
1098 if (strcmp(gss_auth->target_name, args->target_name))
1099 continue;
1100 }
1101 if (!atomic_inc_not_zero(&gss_auth->rpc_auth.au_count))
1102 continue;
1103 goto out;
1104 }
1105 if (new)
1106 hash_add(gss_auth_hash_table, &new->hash, hashval);
1107 gss_auth = new;
1108out:
1109 spin_unlock(&gss_auth_hash_lock);
1110 return gss_auth;
1111}
1112
1113static struct gss_auth *
1114gss_create_hashed(struct rpc_auth_create_args *args, struct rpc_clnt *clnt)
1115{
1116 struct gss_auth *gss_auth;
1117 struct gss_auth *new;
1118
1119 gss_auth = gss_auth_find_or_add_hashed(args, clnt, NULL);
1120 if (gss_auth != NULL)
1121 goto out;
1122 new = gss_create_new(args, clnt);
1123 if (IS_ERR(new))
1124 return new;
1125 gss_auth = gss_auth_find_or_add_hashed(args, clnt, new);
1126 if (gss_auth != new)
1127 gss_destroy(&new->rpc_auth);
1128out:
1129 return gss_auth;
1130}
1131
1132static struct rpc_auth *
1133gss_create(struct rpc_auth_create_args *args, struct rpc_clnt *clnt)
1134{
1135 struct gss_auth *gss_auth;
1136 struct rpc_xprt *xprt = rcu_access_pointer(clnt->cl_xprt);
1137
1138 while (clnt != clnt->cl_parent) {
1139 struct rpc_clnt *parent = clnt->cl_parent;
1140 /* Find the original parent for this transport */
1141 if (rcu_access_pointer(parent->cl_xprt) != xprt)
1142 break;
1143 clnt = parent;
1144 }
1145
1146 gss_auth = gss_create_hashed(args, clnt);
1147 if (IS_ERR(gss_auth))
1148 return ERR_CAST(gss_auth);
1149 return &gss_auth->rpc_auth;
1150}
1151
980/* 1152/*
981 * gss_destroying_context will cause the RPCSEC_GSS to send a NULL RPC call 1153 * gss_destroying_context will cause the RPCSEC_GSS to send a NULL RPC call
982 * to the server with the GSS control procedure field set to 1154 * to the server with the GSS control procedure field set to
@@ -1126,10 +1298,32 @@ gss_cred_init(struct rpc_auth *auth, struct rpc_cred *cred)
1126 return err; 1298 return err;
1127} 1299}
1128 1300
1301/*
1302 * Returns -EACCES if GSS context is NULL or will expire within the
1303 * timeout (miliseconds)
1304 */
1305static int
1306gss_key_timeout(struct rpc_cred *rc)
1307{
1308 struct gss_cred *gss_cred = container_of(rc, struct gss_cred, gc_base);
1309 unsigned long now = jiffies;
1310 unsigned long expire;
1311
1312 if (gss_cred->gc_ctx == NULL)
1313 return -EACCES;
1314
1315 expire = gss_cred->gc_ctx->gc_expiry - (gss_key_expire_timeo * HZ);
1316
1317 if (time_after(now, expire))
1318 return -EACCES;
1319 return 0;
1320}
1321
1129static int 1322static int
1130gss_match(struct auth_cred *acred, struct rpc_cred *rc, int flags) 1323gss_match(struct auth_cred *acred, struct rpc_cred *rc, int flags)
1131{ 1324{
1132 struct gss_cred *gss_cred = container_of(rc, struct gss_cred, gc_base); 1325 struct gss_cred *gss_cred = container_of(rc, struct gss_cred, gc_base);
1326 int ret;
1133 1327
1134 if (test_bit(RPCAUTH_CRED_NEW, &rc->cr_flags)) 1328 if (test_bit(RPCAUTH_CRED_NEW, &rc->cr_flags))
1135 goto out; 1329 goto out;
@@ -1142,11 +1336,26 @@ out:
1142 if (acred->principal != NULL) { 1336 if (acred->principal != NULL) {
1143 if (gss_cred->gc_principal == NULL) 1337 if (gss_cred->gc_principal == NULL)
1144 return 0; 1338 return 0;
1145 return strcmp(acred->principal, gss_cred->gc_principal) == 0; 1339 ret = strcmp(acred->principal, gss_cred->gc_principal) == 0;
1340 goto check_expire;
1146 } 1341 }
1147 if (gss_cred->gc_principal != NULL) 1342 if (gss_cred->gc_principal != NULL)
1148 return 0; 1343 return 0;
1149 return uid_eq(rc->cr_uid, acred->uid); 1344 ret = uid_eq(rc->cr_uid, acred->uid);
1345
1346check_expire:
1347 if (ret == 0)
1348 return ret;
1349
1350 /* Notify acred users of GSS context expiration timeout */
1351 if (test_bit(RPC_CRED_NOTIFY_TIMEOUT, &acred->ac_flags) &&
1352 (gss_key_timeout(rc) != 0)) {
1353 /* test will now be done from generic cred */
1354 test_and_clear_bit(RPC_CRED_NOTIFY_TIMEOUT, &acred->ac_flags);
1355 /* tell NFS layer that key will expire soon */
1356 set_bit(RPC_CRED_KEY_EXPIRE_SOON, &acred->ac_flags);
1357 }
1358 return ret;
1150} 1359}
1151 1360
1152/* 1361/*
@@ -1292,6 +1501,7 @@ gss_validate(struct rpc_task *task, __be32 *p)
1292 struct xdr_netobj mic; 1501 struct xdr_netobj mic;
1293 u32 flav,len; 1502 u32 flav,len;
1294 u32 maj_stat; 1503 u32 maj_stat;
1504 __be32 *ret = ERR_PTR(-EIO);
1295 1505
1296 dprintk("RPC: %5u %s\n", task->tk_pid, __func__); 1506 dprintk("RPC: %5u %s\n", task->tk_pid, __func__);
1297 1507
@@ -1307,6 +1517,7 @@ gss_validate(struct rpc_task *task, __be32 *p)
1307 mic.data = (u8 *)p; 1517 mic.data = (u8 *)p;
1308 mic.len = len; 1518 mic.len = len;
1309 1519
1520 ret = ERR_PTR(-EACCES);
1310 maj_stat = gss_verify_mic(ctx->gc_gss_ctx, &verf_buf, &mic); 1521 maj_stat = gss_verify_mic(ctx->gc_gss_ctx, &verf_buf, &mic);
1311 if (maj_stat == GSS_S_CONTEXT_EXPIRED) 1522 if (maj_stat == GSS_S_CONTEXT_EXPIRED)
1312 clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags); 1523 clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags);
@@ -1324,8 +1535,9 @@ gss_validate(struct rpc_task *task, __be32 *p)
1324 return p + XDR_QUADLEN(len); 1535 return p + XDR_QUADLEN(len);
1325out_bad: 1536out_bad:
1326 gss_put_ctx(ctx); 1537 gss_put_ctx(ctx);
1327 dprintk("RPC: %5u %s failed.\n", task->tk_pid, __func__); 1538 dprintk("RPC: %5u %s failed ret %ld.\n", task->tk_pid, __func__,
1328 return NULL; 1539 PTR_ERR(ret));
1540 return ret;
1329} 1541}
1330 1542
1331static void gss_wrap_req_encode(kxdreproc_t encode, struct rpc_rqst *rqstp, 1543static void gss_wrap_req_encode(kxdreproc_t encode, struct rpc_rqst *rqstp,
@@ -1657,8 +1869,6 @@ static const struct rpc_authops authgss_ops = {
1657 .destroy = gss_destroy, 1869 .destroy = gss_destroy,
1658 .lookup_cred = gss_lookup_cred, 1870 .lookup_cred = gss_lookup_cred,
1659 .crcreate = gss_create_cred, 1871 .crcreate = gss_create_cred,
1660 .pipes_create = gss_pipes_dentries_create,
1661 .pipes_destroy = gss_pipes_dentries_destroy,
1662 .list_pseudoflavors = gss_mech_list_pseudoflavors, 1872 .list_pseudoflavors = gss_mech_list_pseudoflavors,
1663 .info2flavor = gss_mech_info2flavor, 1873 .info2flavor = gss_mech_info2flavor,
1664 .flavor2info = gss_mech_flavor2info, 1874 .flavor2info = gss_mech_flavor2info,
@@ -1675,6 +1885,7 @@ static const struct rpc_credops gss_credops = {
1675 .crvalidate = gss_validate, 1885 .crvalidate = gss_validate,
1676 .crwrap_req = gss_wrap_req, 1886 .crwrap_req = gss_wrap_req,
1677 .crunwrap_resp = gss_unwrap_resp, 1887 .crunwrap_resp = gss_unwrap_resp,
1888 .crkey_timeout = gss_key_timeout,
1678}; 1889};
1679 1890
1680static const struct rpc_credops gss_nullops = { 1891static const struct rpc_credops gss_nullops = {
@@ -1762,5 +1973,12 @@ module_param_named(expired_cred_retry_delay,
1762MODULE_PARM_DESC(expired_cred_retry_delay, "Timeout (in seconds) until " 1973MODULE_PARM_DESC(expired_cred_retry_delay, "Timeout (in seconds) until "
1763 "the RPC engine retries an expired credential"); 1974 "the RPC engine retries an expired credential");
1764 1975
1976module_param_named(key_expire_timeo,
1977 gss_key_expire_timeo,
1978 uint, 0644);
1979MODULE_PARM_DESC(key_expire_timeo, "Time (in seconds) at the end of a "
1980 "credential keys lifetime where the NFS layer cleans up "
1981 "prior to key expiration");
1982
1765module_init(init_rpcsec_gss) 1983module_init(init_rpcsec_gss)
1766module_exit(exit_rpcsec_gss) 1984module_exit(exit_rpcsec_gss)
diff --git a/net/sunrpc/auth_gss/gss_rpc_upcall.c b/net/sunrpc/auth_gss/gss_rpc_upcall.c
index af7ffd447fee..f1eb0d16666c 100644
--- a/net/sunrpc/auth_gss/gss_rpc_upcall.c
+++ b/net/sunrpc/auth_gss/gss_rpc_upcall.c
@@ -213,6 +213,26 @@ static int gssp_call(struct net *net, struct rpc_message *msg)
213 return status; 213 return status;
214} 214}
215 215
216static void gssp_free_receive_pages(struct gssx_arg_accept_sec_context *arg)
217{
218 int i;
219
220 for (i = 0; i < arg->npages && arg->pages[i]; i++)
221 __free_page(arg->pages[i]);
222}
223
224static int gssp_alloc_receive_pages(struct gssx_arg_accept_sec_context *arg)
225{
226 arg->npages = DIV_ROUND_UP(NGROUPS_MAX * 4, PAGE_SIZE);
227 arg->pages = kzalloc(arg->npages * sizeof(struct page *), GFP_KERNEL);
228 /*
229 * XXX: actual pages are allocated by xdr layer in
230 * xdr_partial_copy_from_skb.
231 */
232 if (!arg->pages)
233 return -ENOMEM;
234 return 0;
235}
216 236
217/* 237/*
218 * Public functions 238 * Public functions
@@ -261,10 +281,16 @@ int gssp_accept_sec_context_upcall(struct net *net,
261 arg.context_handle = &ctxh; 281 arg.context_handle = &ctxh;
262 res.output_token->len = GSSX_max_output_token_sz; 282 res.output_token->len = GSSX_max_output_token_sz;
263 283
284 ret = gssp_alloc_receive_pages(&arg);
285 if (ret)
286 return ret;
287
264 /* use nfs/ for targ_name ? */ 288 /* use nfs/ for targ_name ? */
265 289
266 ret = gssp_call(net, &msg); 290 ret = gssp_call(net, &msg);
267 291
292 gssp_free_receive_pages(&arg);
293
268 /* we need to fetch all data even in case of error so 294 /* we need to fetch all data even in case of error so
269 * that we can free special strctures is they have been allocated */ 295 * that we can free special strctures is they have been allocated */
270 data->major_status = res.status.major_status; 296 data->major_status = res.status.major_status;
diff --git a/net/sunrpc/auth_gss/gss_rpc_xdr.c b/net/sunrpc/auth_gss/gss_rpc_xdr.c
index 3c85d1c8a028..f0f78c5f1c7d 100644
--- a/net/sunrpc/auth_gss/gss_rpc_xdr.c
+++ b/net/sunrpc/auth_gss/gss_rpc_xdr.c
@@ -166,14 +166,15 @@ static int dummy_dec_opt_array(struct xdr_stream *xdr,
166 return 0; 166 return 0;
167} 167}
168 168
169static int get_s32(void **p, void *max, s32 *res) 169static int get_host_u32(struct xdr_stream *xdr, u32 *res)
170{ 170{
171 void *base = *p; 171 __be32 *p;
172 void *next = (void *)((char *)base + sizeof(s32)); 172
173 if (unlikely(next > max || next < base)) 173 p = xdr_inline_decode(xdr, 4);
174 if (!p)
174 return -EINVAL; 175 return -EINVAL;
175 memcpy(res, base, sizeof(s32)); 176 /* Contents of linux creds are all host-endian: */
176 *p = next; 177 memcpy(res, p, sizeof(u32));
177 return 0; 178 return 0;
178} 179}
179 180
@@ -182,9 +183,9 @@ static int gssx_dec_linux_creds(struct xdr_stream *xdr,
182{ 183{
183 u32 length; 184 u32 length;
184 __be32 *p; 185 __be32 *p;
185 void *q, *end; 186 u32 tmp;
186 s32 tmp; 187 u32 N;
187 int N, i, err; 188 int i, err;
188 189
189 p = xdr_inline_decode(xdr, 4); 190 p = xdr_inline_decode(xdr, 4);
190 if (unlikely(p == NULL)) 191 if (unlikely(p == NULL))
@@ -192,33 +193,28 @@ static int gssx_dec_linux_creds(struct xdr_stream *xdr,
192 193
193 length = be32_to_cpup(p); 194 length = be32_to_cpup(p);
194 195
195 /* FIXME: we do not want to use the scratch buffer for this one 196 if (length > (3 + NGROUPS_MAX) * sizeof(u32))
196 * may need to use functions that allows us to access an io vector
197 * directly */
198 p = xdr_inline_decode(xdr, length);
199 if (unlikely(p == NULL))
200 return -ENOSPC; 197 return -ENOSPC;
201 198
202 q = p;
203 end = q + length;
204
205 /* uid */ 199 /* uid */
206 err = get_s32(&q, end, &tmp); 200 err = get_host_u32(xdr, &tmp);
207 if (err) 201 if (err)
208 return err; 202 return err;
209 creds->cr_uid = make_kuid(&init_user_ns, tmp); 203 creds->cr_uid = make_kuid(&init_user_ns, tmp);
210 204
211 /* gid */ 205 /* gid */
212 err = get_s32(&q, end, &tmp); 206 err = get_host_u32(xdr, &tmp);
213 if (err) 207 if (err)
214 return err; 208 return err;
215 creds->cr_gid = make_kgid(&init_user_ns, tmp); 209 creds->cr_gid = make_kgid(&init_user_ns, tmp);
216 210
217 /* number of additional gid's */ 211 /* number of additional gid's */
218 err = get_s32(&q, end, &tmp); 212 err = get_host_u32(xdr, &tmp);
219 if (err) 213 if (err)
220 return err; 214 return err;
221 N = tmp; 215 N = tmp;
216 if ((3 + N) * sizeof(u32) != length)
217 return -EINVAL;
222 creds->cr_group_info = groups_alloc(N); 218 creds->cr_group_info = groups_alloc(N);
223 if (creds->cr_group_info == NULL) 219 if (creds->cr_group_info == NULL)
224 return -ENOMEM; 220 return -ENOMEM;
@@ -226,7 +222,7 @@ static int gssx_dec_linux_creds(struct xdr_stream *xdr,
226 /* gid's */ 222 /* gid's */
227 for (i = 0; i < N; i++) { 223 for (i = 0; i < N; i++) {
228 kgid_t kgid; 224 kgid_t kgid;
229 err = get_s32(&q, end, &tmp); 225 err = get_host_u32(xdr, &tmp);
230 if (err) 226 if (err)
231 goto out_free_groups; 227 goto out_free_groups;
232 err = -EINVAL; 228 err = -EINVAL;
@@ -784,6 +780,9 @@ void gssx_enc_accept_sec_context(struct rpc_rqst *req,
784 /* arg->options */ 780 /* arg->options */
785 err = dummy_enc_opt_array(xdr, &arg->options); 781 err = dummy_enc_opt_array(xdr, &arg->options);
786 782
783 xdr_inline_pages(&req->rq_rcv_buf,
784 PAGE_SIZE/2 /* pretty arbitrary */,
785 arg->pages, 0 /* page base */, arg->npages * PAGE_SIZE);
787done: 786done:
788 if (err) 787 if (err)
789 dprintk("RPC: gssx_enc_accept_sec_context: %d\n", err); 788 dprintk("RPC: gssx_enc_accept_sec_context: %d\n", err);
diff --git a/net/sunrpc/auth_gss/gss_rpc_xdr.h b/net/sunrpc/auth_gss/gss_rpc_xdr.h
index 1c98b27d870c..685a688f3d8a 100644
--- a/net/sunrpc/auth_gss/gss_rpc_xdr.h
+++ b/net/sunrpc/auth_gss/gss_rpc_xdr.h
@@ -147,6 +147,8 @@ struct gssx_arg_accept_sec_context {
147 struct gssx_cb *input_cb; 147 struct gssx_cb *input_cb;
148 u32 ret_deleg_cred; 148 u32 ret_deleg_cred;
149 struct gssx_option_array options; 149 struct gssx_option_array options;
150 struct page **pages;
151 unsigned int npages;
150}; 152};
151 153
152struct gssx_res_accept_sec_context { 154struct gssx_res_accept_sec_context {
@@ -240,7 +242,8 @@ int gssx_dec_accept_sec_context(struct rpc_rqst *rqstp,
240 2 * GSSX_max_princ_sz + \ 242 2 * GSSX_max_princ_sz + \
241 8 + 8 + 4 + 4 + 4) 243 8 + 8 + 4 + 4 + 4)
242#define GSSX_max_output_token_sz 1024 244#define GSSX_max_output_token_sz 1024
243#define GSSX_max_creds_sz (4 + 4 + 4 + NGROUPS_MAX * 4) 245/* grouplist not included; we allocate separate pages for that: */
246#define GSSX_max_creds_sz (4 + 4 + 4 /* + NGROUPS_MAX*4 */)
244#define GSSX_RES_accept_sec_context_sz (GSSX_default_status_sz + \ 247#define GSSX_RES_accept_sec_context_sz (GSSX_default_status_sz + \
245 GSSX_default_ctx_sz + \ 248 GSSX_default_ctx_sz + \
246 GSSX_max_output_token_sz + \ 249 GSSX_max_output_token_sz + \
diff --git a/net/sunrpc/auth_null.c b/net/sunrpc/auth_null.c
index a5c36c01707b..f0ebe07978a2 100644
--- a/net/sunrpc/auth_null.c
+++ b/net/sunrpc/auth_null.c
@@ -18,7 +18,7 @@ static struct rpc_auth null_auth;
18static struct rpc_cred null_cred; 18static struct rpc_cred null_cred;
19 19
20static struct rpc_auth * 20static struct rpc_auth *
21nul_create(struct rpc_clnt *clnt, rpc_authflavor_t flavor) 21nul_create(struct rpc_auth_create_args *args, struct rpc_clnt *clnt)
22{ 22{
23 atomic_inc(&null_auth.au_count); 23 atomic_inc(&null_auth.au_count);
24 return &null_auth; 24 return &null_auth;
@@ -88,13 +88,13 @@ nul_validate(struct rpc_task *task, __be32 *p)
88 flavor = ntohl(*p++); 88 flavor = ntohl(*p++);
89 if (flavor != RPC_AUTH_NULL) { 89 if (flavor != RPC_AUTH_NULL) {
90 printk("RPC: bad verf flavor: %u\n", flavor); 90 printk("RPC: bad verf flavor: %u\n", flavor);
91 return NULL; 91 return ERR_PTR(-EIO);
92 } 92 }
93 93
94 size = ntohl(*p++); 94 size = ntohl(*p++);
95 if (size != 0) { 95 if (size != 0) {
96 printk("RPC: bad verf size: %u\n", size); 96 printk("RPC: bad verf size: %u\n", size);
97 return NULL; 97 return ERR_PTR(-EIO);
98 } 98 }
99 99
100 return p; 100 return p;
diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c
index dc37021fc3e5..d5d692366294 100644
--- a/net/sunrpc/auth_unix.c
+++ b/net/sunrpc/auth_unix.c
@@ -33,7 +33,7 @@ static struct rpc_auth unix_auth;
33static const struct rpc_credops unix_credops; 33static const struct rpc_credops unix_credops;
34 34
35static struct rpc_auth * 35static struct rpc_auth *
36unx_create(struct rpc_clnt *clnt, rpc_authflavor_t flavor) 36unx_create(struct rpc_auth_create_args *args, struct rpc_clnt *clnt)
37{ 37{
38 dprintk("RPC: creating UNIX authenticator for client %p\n", 38 dprintk("RPC: creating UNIX authenticator for client %p\n",
39 clnt); 39 clnt);
@@ -192,13 +192,13 @@ unx_validate(struct rpc_task *task, __be32 *p)
192 flavor != RPC_AUTH_UNIX && 192 flavor != RPC_AUTH_UNIX &&
193 flavor != RPC_AUTH_SHORT) { 193 flavor != RPC_AUTH_SHORT) {
194 printk("RPC: bad verf flavor: %u\n", flavor); 194 printk("RPC: bad verf flavor: %u\n", flavor);
195 return NULL; 195 return ERR_PTR(-EIO);
196 } 196 }
197 197
198 size = ntohl(*p++); 198 size = ntohl(*p++);
199 if (size > RPC_MAX_AUTH_SIZE) { 199 if (size > RPC_MAX_AUTH_SIZE) {
200 printk("RPC: giant verf size: %u\n", size); 200 printk("RPC: giant verf size: %u\n", size);
201 return NULL; 201 return ERR_PTR(-EIO);
202 } 202 }
203 task->tk_rqstp->rq_cred->cr_auth->au_rslack = (size >> 2) + 2; 203 task->tk_rqstp->rq_cred->cr_auth->au_rslack = (size >> 2) + 2;
204 p += (size >> 2); 204 p += (size >> 2);
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index ecbc4e3d83ad..77479606a971 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -102,12 +102,7 @@ static void rpc_unregister_client(struct rpc_clnt *clnt)
102 102
103static void __rpc_clnt_remove_pipedir(struct rpc_clnt *clnt) 103static void __rpc_clnt_remove_pipedir(struct rpc_clnt *clnt)
104{ 104{
105 if (clnt->cl_dentry) { 105 rpc_remove_client_dir(clnt);
106 if (clnt->cl_auth && clnt->cl_auth->au_ops->pipes_destroy)
107 clnt->cl_auth->au_ops->pipes_destroy(clnt->cl_auth);
108 rpc_remove_client_dir(clnt->cl_dentry);
109 }
110 clnt->cl_dentry = NULL;
111} 106}
112 107
113static void rpc_clnt_remove_pipedir(struct rpc_clnt *clnt) 108static void rpc_clnt_remove_pipedir(struct rpc_clnt *clnt)
@@ -123,10 +118,10 @@ static void rpc_clnt_remove_pipedir(struct rpc_clnt *clnt)
123} 118}
124 119
125static struct dentry *rpc_setup_pipedir_sb(struct super_block *sb, 120static struct dentry *rpc_setup_pipedir_sb(struct super_block *sb,
126 struct rpc_clnt *clnt, 121 struct rpc_clnt *clnt)
127 const char *dir_name)
128{ 122{
129 static uint32_t clntid; 123 static uint32_t clntid;
124 const char *dir_name = clnt->cl_program->pipe_dir_name;
130 char name[15]; 125 char name[15];
131 struct dentry *dir, *dentry; 126 struct dentry *dir, *dentry;
132 127
@@ -153,28 +148,35 @@ static struct dentry *rpc_setup_pipedir_sb(struct super_block *sb,
153} 148}
154 149
155static int 150static int
156rpc_setup_pipedir(struct rpc_clnt *clnt, const char *dir_name, 151rpc_setup_pipedir(struct super_block *pipefs_sb, struct rpc_clnt *clnt)
157 struct super_block *pipefs_sb)
158{ 152{
159 struct dentry *dentry; 153 struct dentry *dentry;
160 154
161 clnt->cl_dentry = NULL; 155 if (clnt->cl_program->pipe_dir_name != NULL) {
162 if (dir_name == NULL) 156 dentry = rpc_setup_pipedir_sb(pipefs_sb, clnt);
163 return 0; 157 if (IS_ERR(dentry))
164 dentry = rpc_setup_pipedir_sb(pipefs_sb, clnt, dir_name); 158 return PTR_ERR(dentry);
165 if (IS_ERR(dentry)) 159 }
166 return PTR_ERR(dentry);
167 clnt->cl_dentry = dentry;
168 return 0; 160 return 0;
169} 161}
170 162
171static inline int rpc_clnt_skip_event(struct rpc_clnt *clnt, unsigned long event) 163static int rpc_clnt_skip_event(struct rpc_clnt *clnt, unsigned long event)
172{ 164{
173 if (((event == RPC_PIPEFS_MOUNT) && clnt->cl_dentry) || 165 if (clnt->cl_program->pipe_dir_name == NULL)
174 ((event == RPC_PIPEFS_UMOUNT) && !clnt->cl_dentry))
175 return 1;
176 if ((event == RPC_PIPEFS_MOUNT) && atomic_read(&clnt->cl_count) == 0)
177 return 1; 166 return 1;
167
168 switch (event) {
169 case RPC_PIPEFS_MOUNT:
170 if (clnt->cl_pipedir_objects.pdh_dentry != NULL)
171 return 1;
172 if (atomic_read(&clnt->cl_count) == 0)
173 return 1;
174 break;
175 case RPC_PIPEFS_UMOUNT:
176 if (clnt->cl_pipedir_objects.pdh_dentry == NULL)
177 return 1;
178 break;
179 }
178 return 0; 180 return 0;
179} 181}
180 182
@@ -186,18 +188,11 @@ static int __rpc_clnt_handle_event(struct rpc_clnt *clnt, unsigned long event,
186 188
187 switch (event) { 189 switch (event) {
188 case RPC_PIPEFS_MOUNT: 190 case RPC_PIPEFS_MOUNT:
189 dentry = rpc_setup_pipedir_sb(sb, clnt, 191 dentry = rpc_setup_pipedir_sb(sb, clnt);
190 clnt->cl_program->pipe_dir_name);
191 if (!dentry) 192 if (!dentry)
192 return -ENOENT; 193 return -ENOENT;
193 if (IS_ERR(dentry)) 194 if (IS_ERR(dentry))
194 return PTR_ERR(dentry); 195 return PTR_ERR(dentry);
195 clnt->cl_dentry = dentry;
196 if (clnt->cl_auth->au_ops->pipes_create) {
197 err = clnt->cl_auth->au_ops->pipes_create(clnt->cl_auth);
198 if (err)
199 __rpc_clnt_remove_pipedir(clnt);
200 }
201 break; 196 break;
202 case RPC_PIPEFS_UMOUNT: 197 case RPC_PIPEFS_UMOUNT:
203 __rpc_clnt_remove_pipedir(clnt); 198 __rpc_clnt_remove_pipedir(clnt);
@@ -230,8 +225,6 @@ static struct rpc_clnt *rpc_get_client_for_event(struct net *net, int event)
230 225
231 spin_lock(&sn->rpc_client_lock); 226 spin_lock(&sn->rpc_client_lock);
232 list_for_each_entry(clnt, &sn->all_clients, cl_clients) { 227 list_for_each_entry(clnt, &sn->all_clients, cl_clients) {
233 if (clnt->cl_program->pipe_dir_name == NULL)
234 continue;
235 if (rpc_clnt_skip_event(clnt, event)) 228 if (rpc_clnt_skip_event(clnt, event))
236 continue; 229 continue;
237 spin_unlock(&sn->rpc_client_lock); 230 spin_unlock(&sn->rpc_client_lock);
@@ -282,7 +275,10 @@ static void rpc_clnt_set_nodename(struct rpc_clnt *clnt, const char *nodename)
282static int rpc_client_register(const struct rpc_create_args *args, 275static int rpc_client_register(const struct rpc_create_args *args,
283 struct rpc_clnt *clnt) 276 struct rpc_clnt *clnt)
284{ 277{
285 const struct rpc_program *program = args->program; 278 struct rpc_auth_create_args auth_args = {
279 .pseudoflavor = args->authflavor,
280 .target_name = args->client_name,
281 };
286 struct rpc_auth *auth; 282 struct rpc_auth *auth;
287 struct net *net = rpc_net_ns(clnt); 283 struct net *net = rpc_net_ns(clnt);
288 struct super_block *pipefs_sb; 284 struct super_block *pipefs_sb;
@@ -290,7 +286,7 @@ static int rpc_client_register(const struct rpc_create_args *args,
290 286
291 pipefs_sb = rpc_get_sb_net(net); 287 pipefs_sb = rpc_get_sb_net(net);
292 if (pipefs_sb) { 288 if (pipefs_sb) {
293 err = rpc_setup_pipedir(clnt, program->pipe_dir_name, pipefs_sb); 289 err = rpc_setup_pipedir(pipefs_sb, clnt);
294 if (err) 290 if (err)
295 goto out; 291 goto out;
296 } 292 }
@@ -299,7 +295,7 @@ static int rpc_client_register(const struct rpc_create_args *args,
299 if (pipefs_sb) 295 if (pipefs_sb)
300 rpc_put_sb_net(net); 296 rpc_put_sb_net(net);
301 297
302 auth = rpcauth_create(args->authflavor, clnt); 298 auth = rpcauth_create(&auth_args, clnt);
303 if (IS_ERR(auth)) { 299 if (IS_ERR(auth)) {
304 dprintk("RPC: Couldn't create auth handle (flavor %u)\n", 300 dprintk("RPC: Couldn't create auth handle (flavor %u)\n",
305 args->authflavor); 301 args->authflavor);
@@ -317,7 +313,27 @@ out:
317 return err; 313 return err;
318} 314}
319 315
320static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, struct rpc_xprt *xprt) 316static DEFINE_IDA(rpc_clids);
317
318static int rpc_alloc_clid(struct rpc_clnt *clnt)
319{
320 int clid;
321
322 clid = ida_simple_get(&rpc_clids, 0, 0, GFP_KERNEL);
323 if (clid < 0)
324 return clid;
325 clnt->cl_clid = clid;
326 return 0;
327}
328
329static void rpc_free_clid(struct rpc_clnt *clnt)
330{
331 ida_simple_remove(&rpc_clids, clnt->cl_clid);
332}
333
334static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args,
335 struct rpc_xprt *xprt,
336 struct rpc_clnt *parent)
321{ 337{
322 const struct rpc_program *program = args->program; 338 const struct rpc_program *program = args->program;
323 const struct rpc_version *version; 339 const struct rpc_version *version;
@@ -343,16 +359,20 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, stru
343 clnt = kzalloc(sizeof(*clnt), GFP_KERNEL); 359 clnt = kzalloc(sizeof(*clnt), GFP_KERNEL);
344 if (!clnt) 360 if (!clnt)
345 goto out_err; 361 goto out_err;
346 clnt->cl_parent = clnt; 362 clnt->cl_parent = parent ? : clnt;
363
364 err = rpc_alloc_clid(clnt);
365 if (err)
366 goto out_no_clid;
347 367
348 rcu_assign_pointer(clnt->cl_xprt, xprt); 368 rcu_assign_pointer(clnt->cl_xprt, xprt);
349 clnt->cl_procinfo = version->procs; 369 clnt->cl_procinfo = version->procs;
350 clnt->cl_maxproc = version->nrprocs; 370 clnt->cl_maxproc = version->nrprocs;
351 clnt->cl_protname = program->name;
352 clnt->cl_prog = args->prognumber ? : program->number; 371 clnt->cl_prog = args->prognumber ? : program->number;
353 clnt->cl_vers = version->number; 372 clnt->cl_vers = version->number;
354 clnt->cl_stats = program->stats; 373 clnt->cl_stats = program->stats;
355 clnt->cl_metrics = rpc_alloc_iostats(clnt); 374 clnt->cl_metrics = rpc_alloc_iostats(clnt);
375 rpc_init_pipe_dir_head(&clnt->cl_pipedir_objects);
356 err = -ENOMEM; 376 err = -ENOMEM;
357 if (clnt->cl_metrics == NULL) 377 if (clnt->cl_metrics == NULL)
358 goto out_no_stats; 378 goto out_no_stats;
@@ -372,12 +392,6 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, stru
372 392
373 clnt->cl_rtt = &clnt->cl_rtt_default; 393 clnt->cl_rtt = &clnt->cl_rtt_default;
374 rpc_init_rtt(&clnt->cl_rtt_default, clnt->cl_timeout->to_initval); 394 rpc_init_rtt(&clnt->cl_rtt_default, clnt->cl_timeout->to_initval);
375 clnt->cl_principal = NULL;
376 if (args->client_name) {
377 clnt->cl_principal = kstrdup(args->client_name, GFP_KERNEL);
378 if (!clnt->cl_principal)
379 goto out_no_principal;
380 }
381 395
382 atomic_set(&clnt->cl_count, 1); 396 atomic_set(&clnt->cl_count, 1);
383 397
@@ -387,13 +401,15 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, stru
387 err = rpc_client_register(args, clnt); 401 err = rpc_client_register(args, clnt);
388 if (err) 402 if (err)
389 goto out_no_path; 403 goto out_no_path;
404 if (parent)
405 atomic_inc(&parent->cl_count);
390 return clnt; 406 return clnt;
391 407
392out_no_path: 408out_no_path:
393 kfree(clnt->cl_principal);
394out_no_principal:
395 rpc_free_iostats(clnt->cl_metrics); 409 rpc_free_iostats(clnt->cl_metrics);
396out_no_stats: 410out_no_stats:
411 rpc_free_clid(clnt);
412out_no_clid:
397 kfree(clnt); 413 kfree(clnt);
398out_err: 414out_err:
399 rpciod_down(); 415 rpciod_down();
@@ -479,7 +495,7 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args)
479 if (args->flags & RPC_CLNT_CREATE_NONPRIVPORT) 495 if (args->flags & RPC_CLNT_CREATE_NONPRIVPORT)
480 xprt->resvport = 0; 496 xprt->resvport = 0;
481 497
482 clnt = rpc_new_client(args, xprt); 498 clnt = rpc_new_client(args, xprt, NULL);
483 if (IS_ERR(clnt)) 499 if (IS_ERR(clnt))
484 return clnt; 500 return clnt;
485 501
@@ -526,15 +542,12 @@ static struct rpc_clnt *__rpc_clone_client(struct rpc_create_args *args,
526 goto out_err; 542 goto out_err;
527 args->servername = xprt->servername; 543 args->servername = xprt->servername;
528 544
529 new = rpc_new_client(args, xprt); 545 new = rpc_new_client(args, xprt, clnt);
530 if (IS_ERR(new)) { 546 if (IS_ERR(new)) {
531 err = PTR_ERR(new); 547 err = PTR_ERR(new);
532 goto out_err; 548 goto out_err;
533 } 549 }
534 550
535 atomic_inc(&clnt->cl_count);
536 new->cl_parent = clnt;
537
538 /* Turn off autobind on clones */ 551 /* Turn off autobind on clones */
539 new->cl_autobind = 0; 552 new->cl_autobind = 0;
540 new->cl_softrtry = clnt->cl_softrtry; 553 new->cl_softrtry = clnt->cl_softrtry;
@@ -561,7 +574,6 @@ struct rpc_clnt *rpc_clone_client(struct rpc_clnt *clnt)
561 .prognumber = clnt->cl_prog, 574 .prognumber = clnt->cl_prog,
562 .version = clnt->cl_vers, 575 .version = clnt->cl_vers,
563 .authflavor = clnt->cl_auth->au_flavor, 576 .authflavor = clnt->cl_auth->au_flavor,
564 .client_name = clnt->cl_principal,
565 }; 577 };
566 return __rpc_clone_client(&args, clnt); 578 return __rpc_clone_client(&args, clnt);
567} 579}
@@ -583,7 +595,6 @@ rpc_clone_client_set_auth(struct rpc_clnt *clnt, rpc_authflavor_t flavor)
583 .prognumber = clnt->cl_prog, 595 .prognumber = clnt->cl_prog,
584 .version = clnt->cl_vers, 596 .version = clnt->cl_vers,
585 .authflavor = flavor, 597 .authflavor = flavor,
586 .client_name = clnt->cl_principal,
587 }; 598 };
588 return __rpc_clone_client(&args, clnt); 599 return __rpc_clone_client(&args, clnt);
589} 600}
@@ -629,7 +640,7 @@ void rpc_shutdown_client(struct rpc_clnt *clnt)
629 might_sleep(); 640 might_sleep();
630 641
631 dprintk_rcu("RPC: shutting down %s client for %s\n", 642 dprintk_rcu("RPC: shutting down %s client for %s\n",
632 clnt->cl_protname, 643 clnt->cl_program->name,
633 rcu_dereference(clnt->cl_xprt)->servername); 644 rcu_dereference(clnt->cl_xprt)->servername);
634 645
635 while (!list_empty(&clnt->cl_tasks)) { 646 while (!list_empty(&clnt->cl_tasks)) {
@@ -649,17 +660,17 @@ static void
649rpc_free_client(struct rpc_clnt *clnt) 660rpc_free_client(struct rpc_clnt *clnt)
650{ 661{
651 dprintk_rcu("RPC: destroying %s client for %s\n", 662 dprintk_rcu("RPC: destroying %s client for %s\n",
652 clnt->cl_protname, 663 clnt->cl_program->name,
653 rcu_dereference(clnt->cl_xprt)->servername); 664 rcu_dereference(clnt->cl_xprt)->servername);
654 if (clnt->cl_parent != clnt) 665 if (clnt->cl_parent != clnt)
655 rpc_release_client(clnt->cl_parent); 666 rpc_release_client(clnt->cl_parent);
656 rpc_clnt_remove_pipedir(clnt); 667 rpc_clnt_remove_pipedir(clnt);
657 rpc_unregister_client(clnt); 668 rpc_unregister_client(clnt);
658 rpc_free_iostats(clnt->cl_metrics); 669 rpc_free_iostats(clnt->cl_metrics);
659 kfree(clnt->cl_principal);
660 clnt->cl_metrics = NULL; 670 clnt->cl_metrics = NULL;
661 xprt_put(rcu_dereference_raw(clnt->cl_xprt)); 671 xprt_put(rcu_dereference_raw(clnt->cl_xprt));
662 rpciod_down(); 672 rpciod_down();
673 rpc_free_clid(clnt);
663 kfree(clnt); 674 kfree(clnt);
664} 675}
665 676
@@ -720,7 +731,6 @@ struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *old,
720 .prognumber = program->number, 731 .prognumber = program->number,
721 .version = vers, 732 .version = vers,
722 .authflavor = old->cl_auth->au_flavor, 733 .authflavor = old->cl_auth->au_flavor,
723 .client_name = old->cl_principal,
724 }; 734 };
725 struct rpc_clnt *clnt; 735 struct rpc_clnt *clnt;
726 int err; 736 int err;
@@ -1299,7 +1309,7 @@ call_start(struct rpc_task *task)
1299 struct rpc_clnt *clnt = task->tk_client; 1309 struct rpc_clnt *clnt = task->tk_client;
1300 1310
1301 dprintk("RPC: %5u call_start %s%d proc %s (%s)\n", task->tk_pid, 1311 dprintk("RPC: %5u call_start %s%d proc %s (%s)\n", task->tk_pid,
1302 clnt->cl_protname, clnt->cl_vers, 1312 clnt->cl_program->name, clnt->cl_vers,
1303 rpc_proc_name(task), 1313 rpc_proc_name(task),
1304 (RPC_IS_ASYNC(task) ? "async" : "sync")); 1314 (RPC_IS_ASYNC(task) ? "async" : "sync"));
1305 1315
@@ -1423,9 +1433,9 @@ call_refreshresult(struct rpc_task *task)
1423 return; 1433 return;
1424 case -ETIMEDOUT: 1434 case -ETIMEDOUT:
1425 rpc_delay(task, 3*HZ); 1435 rpc_delay(task, 3*HZ);
1426 case -EKEYEXPIRED:
1427 case -EAGAIN: 1436 case -EAGAIN:
1428 status = -EACCES; 1437 status = -EACCES;
1438 case -EKEYEXPIRED:
1429 if (!task->tk_cred_retry) 1439 if (!task->tk_cred_retry)
1430 break; 1440 break;
1431 task->tk_cred_retry--; 1441 task->tk_cred_retry--;
@@ -1912,7 +1922,7 @@ call_status(struct rpc_task *task)
1912 default: 1922 default:
1913 if (clnt->cl_chatty) 1923 if (clnt->cl_chatty)
1914 printk("%s: RPC call returned error %d\n", 1924 printk("%s: RPC call returned error %d\n",
1915 clnt->cl_protname, -status); 1925 clnt->cl_program->name, -status);
1916 rpc_exit(task, status); 1926 rpc_exit(task, status);
1917 } 1927 }
1918} 1928}
@@ -1943,7 +1953,7 @@ call_timeout(struct rpc_task *task)
1943 if (clnt->cl_chatty) { 1953 if (clnt->cl_chatty) {
1944 rcu_read_lock(); 1954 rcu_read_lock();
1945 printk(KERN_NOTICE "%s: server %s not responding, timed out\n", 1955 printk(KERN_NOTICE "%s: server %s not responding, timed out\n",
1946 clnt->cl_protname, 1956 clnt->cl_program->name,
1947 rcu_dereference(clnt->cl_xprt)->servername); 1957 rcu_dereference(clnt->cl_xprt)->servername);
1948 rcu_read_unlock(); 1958 rcu_read_unlock();
1949 } 1959 }
@@ -1959,7 +1969,7 @@ call_timeout(struct rpc_task *task)
1959 if (clnt->cl_chatty) { 1969 if (clnt->cl_chatty) {
1960 rcu_read_lock(); 1970 rcu_read_lock();
1961 printk(KERN_NOTICE "%s: server %s not responding, still trying\n", 1971 printk(KERN_NOTICE "%s: server %s not responding, still trying\n",
1962 clnt->cl_protname, 1972 clnt->cl_program->name,
1963 rcu_dereference(clnt->cl_xprt)->servername); 1973 rcu_dereference(clnt->cl_xprt)->servername);
1964 rcu_read_unlock(); 1974 rcu_read_unlock();
1965 } 1975 }
@@ -1994,7 +2004,7 @@ call_decode(struct rpc_task *task)
1994 if (clnt->cl_chatty) { 2004 if (clnt->cl_chatty) {
1995 rcu_read_lock(); 2005 rcu_read_lock();
1996 printk(KERN_NOTICE "%s: server %s OK\n", 2006 printk(KERN_NOTICE "%s: server %s OK\n",
1997 clnt->cl_protname, 2007 clnt->cl_program->name,
1998 rcu_dereference(clnt->cl_xprt)->servername); 2008 rcu_dereference(clnt->cl_xprt)->servername);
1999 rcu_read_unlock(); 2009 rcu_read_unlock();
2000 } 2010 }
@@ -2019,7 +2029,7 @@ call_decode(struct rpc_task *task)
2019 goto out_retry; 2029 goto out_retry;
2020 } 2030 }
2021 dprintk("RPC: %s: too small RPC reply size (%d bytes)\n", 2031 dprintk("RPC: %s: too small RPC reply size (%d bytes)\n",
2022 clnt->cl_protname, task->tk_status); 2032 clnt->cl_program->name, task->tk_status);
2023 task->tk_action = call_timeout; 2033 task->tk_action = call_timeout;
2024 goto out_retry; 2034 goto out_retry;
2025 } 2035 }
@@ -2091,7 +2101,8 @@ rpc_verify_header(struct rpc_task *task)
2091 dprintk("RPC: %5u %s: XDR representation not a multiple of" 2101 dprintk("RPC: %5u %s: XDR representation not a multiple of"
2092 " 4 bytes: 0x%x\n", task->tk_pid, __func__, 2102 " 4 bytes: 0x%x\n", task->tk_pid, __func__,
2093 task->tk_rqstp->rq_rcv_buf.len); 2103 task->tk_rqstp->rq_rcv_buf.len);
2094 goto out_eio; 2104 error = -EIO;
2105 goto out_err;
2095 } 2106 }
2096 if ((len -= 3) < 0) 2107 if ((len -= 3) < 0)
2097 goto out_overflow; 2108 goto out_overflow;
@@ -2100,6 +2111,7 @@ rpc_verify_header(struct rpc_task *task)
2100 if ((n = ntohl(*p++)) != RPC_REPLY) { 2111 if ((n = ntohl(*p++)) != RPC_REPLY) {
2101 dprintk("RPC: %5u %s: not an RPC reply: %x\n", 2112 dprintk("RPC: %5u %s: not an RPC reply: %x\n",
2102 task->tk_pid, __func__, n); 2113 task->tk_pid, __func__, n);
2114 error = -EIO;
2103 goto out_garbage; 2115 goto out_garbage;
2104 } 2116 }
2105 2117
@@ -2118,7 +2130,8 @@ rpc_verify_header(struct rpc_task *task)
2118 dprintk("RPC: %5u %s: RPC call rejected, " 2130 dprintk("RPC: %5u %s: RPC call rejected, "
2119 "unknown error: %x\n", 2131 "unknown error: %x\n",
2120 task->tk_pid, __func__, n); 2132 task->tk_pid, __func__, n);
2121 goto out_eio; 2133 error = -EIO;
2134 goto out_err;
2122 } 2135 }
2123 if (--len < 0) 2136 if (--len < 0)
2124 goto out_overflow; 2137 goto out_overflow;
@@ -2163,9 +2176,11 @@ rpc_verify_header(struct rpc_task *task)
2163 task->tk_pid, __func__, n); 2176 task->tk_pid, __func__, n);
2164 goto out_err; 2177 goto out_err;
2165 } 2178 }
2166 if (!(p = rpcauth_checkverf(task, p))) { 2179 p = rpcauth_checkverf(task, p);
2167 dprintk("RPC: %5u %s: auth check failed\n", 2180 if (IS_ERR(p)) {
2168 task->tk_pid, __func__); 2181 error = PTR_ERR(p);
2182 dprintk("RPC: %5u %s: auth check failed with %d\n",
2183 task->tk_pid, __func__, error);
2169 goto out_garbage; /* bad verifier, retry */ 2184 goto out_garbage; /* bad verifier, retry */
2170 } 2185 }
2171 len = p - (__be32 *)iov->iov_base - 1; 2186 len = p - (__be32 *)iov->iov_base - 1;
@@ -2218,8 +2233,6 @@ out_garbage:
2218out_retry: 2233out_retry:
2219 return ERR_PTR(-EAGAIN); 2234 return ERR_PTR(-EAGAIN);
2220 } 2235 }
2221out_eio:
2222 error = -EIO;
2223out_err: 2236out_err:
2224 rpc_exit(task, error); 2237 rpc_exit(task, error);
2225 dprintk("RPC: %5u %s: call failed with error %d\n", task->tk_pid, 2238 dprintk("RPC: %5u %s: call failed with error %d\n", task->tk_pid,
@@ -2291,7 +2304,7 @@ static void rpc_show_task(const struct rpc_clnt *clnt,
2291 printk(KERN_INFO "%5u %04x %6d %8p %8p %8ld %8p %sv%u %s a:%ps q:%s\n", 2304 printk(KERN_INFO "%5u %04x %6d %8p %8p %8ld %8p %sv%u %s a:%ps q:%s\n",
2292 task->tk_pid, task->tk_flags, task->tk_status, 2305 task->tk_pid, task->tk_flags, task->tk_status,
2293 clnt, task->tk_rqstp, task->tk_timeout, task->tk_ops, 2306 clnt, task->tk_rqstp, task->tk_timeout, task->tk_ops,
2294 clnt->cl_protname, clnt->cl_vers, rpc_proc_name(task), 2307 clnt->cl_program->name, clnt->cl_vers, rpc_proc_name(task),
2295 task->tk_action, rpc_waitq); 2308 task->tk_action, rpc_waitq);
2296} 2309}
2297 2310
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 406859cc68aa..f94567b45bb3 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -409,7 +409,7 @@ rpc_show_info(struct seq_file *m, void *v)
409 rcu_read_lock(); 409 rcu_read_lock();
410 seq_printf(m, "RPC server: %s\n", 410 seq_printf(m, "RPC server: %s\n",
411 rcu_dereference(clnt->cl_xprt)->servername); 411 rcu_dereference(clnt->cl_xprt)->servername);
412 seq_printf(m, "service: %s (%d) version %d\n", clnt->cl_protname, 412 seq_printf(m, "service: %s (%d) version %d\n", clnt->cl_program->name,
413 clnt->cl_prog, clnt->cl_vers); 413 clnt->cl_prog, clnt->cl_vers);
414 seq_printf(m, "address: %s\n", rpc_peeraddr2str(clnt, RPC_DISPLAY_ADDR)); 414 seq_printf(m, "address: %s\n", rpc_peeraddr2str(clnt, RPC_DISPLAY_ADDR));
415 seq_printf(m, "protocol: %s\n", rpc_peeraddr2str(clnt, RPC_DISPLAY_PROTO)); 415 seq_printf(m, "protocol: %s\n", rpc_peeraddr2str(clnt, RPC_DISPLAY_PROTO));
@@ -480,23 +480,6 @@ static const struct dentry_operations rpc_dentry_operations = {
480 .d_delete = rpc_delete_dentry, 480 .d_delete = rpc_delete_dentry,
481}; 481};
482 482
483/*
484 * Lookup the data. This is trivial - if the dentry didn't already
485 * exist, we know it is negative.
486 */
487static struct dentry *
488rpc_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
489{
490 if (dentry->d_name.len > NAME_MAX)
491 return ERR_PTR(-ENAMETOOLONG);
492 d_add(dentry, NULL);
493 return NULL;
494}
495
496static const struct inode_operations rpc_dir_inode_operations = {
497 .lookup = rpc_lookup,
498};
499
500static struct inode * 483static struct inode *
501rpc_get_inode(struct super_block *sb, umode_t mode) 484rpc_get_inode(struct super_block *sb, umode_t mode)
502{ 485{
@@ -509,7 +492,7 @@ rpc_get_inode(struct super_block *sb, umode_t mode)
509 switch (mode & S_IFMT) { 492 switch (mode & S_IFMT) {
510 case S_IFDIR: 493 case S_IFDIR:
511 inode->i_fop = &simple_dir_operations; 494 inode->i_fop = &simple_dir_operations;
512 inode->i_op = &rpc_dir_inode_operations; 495 inode->i_op = &simple_dir_inode_operations;
513 inc_nlink(inode); 496 inc_nlink(inode);
514 default: 497 default:
515 break; 498 break;
@@ -901,6 +884,159 @@ rpc_unlink(struct dentry *dentry)
901} 884}
902EXPORT_SYMBOL_GPL(rpc_unlink); 885EXPORT_SYMBOL_GPL(rpc_unlink);
903 886
887/**
888 * rpc_init_pipe_dir_head - initialise a struct rpc_pipe_dir_head
889 * @pdh: pointer to struct rpc_pipe_dir_head
890 */
891void rpc_init_pipe_dir_head(struct rpc_pipe_dir_head *pdh)
892{
893 INIT_LIST_HEAD(&pdh->pdh_entries);
894 pdh->pdh_dentry = NULL;
895}
896EXPORT_SYMBOL_GPL(rpc_init_pipe_dir_head);
897
898/**
899 * rpc_init_pipe_dir_object - initialise a struct rpc_pipe_dir_object
900 * @pdo: pointer to struct rpc_pipe_dir_object
901 * @pdo_ops: pointer to const struct rpc_pipe_dir_object_ops
902 * @pdo_data: pointer to caller-defined data
903 */
904void rpc_init_pipe_dir_object(struct rpc_pipe_dir_object *pdo,
905 const struct rpc_pipe_dir_object_ops *pdo_ops,
906 void *pdo_data)
907{
908 INIT_LIST_HEAD(&pdo->pdo_head);
909 pdo->pdo_ops = pdo_ops;
910 pdo->pdo_data = pdo_data;
911}
912EXPORT_SYMBOL_GPL(rpc_init_pipe_dir_object);
913
914static int
915rpc_add_pipe_dir_object_locked(struct net *net,
916 struct rpc_pipe_dir_head *pdh,
917 struct rpc_pipe_dir_object *pdo)
918{
919 int ret = 0;
920
921 if (pdh->pdh_dentry)
922 ret = pdo->pdo_ops->create(pdh->pdh_dentry, pdo);
923 if (ret == 0)
924 list_add_tail(&pdo->pdo_head, &pdh->pdh_entries);
925 return ret;
926}
927
928static void
929rpc_remove_pipe_dir_object_locked(struct net *net,
930 struct rpc_pipe_dir_head *pdh,
931 struct rpc_pipe_dir_object *pdo)
932{
933 if (pdh->pdh_dentry)
934 pdo->pdo_ops->destroy(pdh->pdh_dentry, pdo);
935 list_del_init(&pdo->pdo_head);
936}
937
938/**
939 * rpc_add_pipe_dir_object - associate a rpc_pipe_dir_object to a directory
940 * @net: pointer to struct net
941 * @pdh: pointer to struct rpc_pipe_dir_head
942 * @pdo: pointer to struct rpc_pipe_dir_object
943 *
944 */
945int
946rpc_add_pipe_dir_object(struct net *net,
947 struct rpc_pipe_dir_head *pdh,
948 struct rpc_pipe_dir_object *pdo)
949{
950 int ret = 0;
951
952 if (list_empty(&pdo->pdo_head)) {
953 struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
954
955 mutex_lock(&sn->pipefs_sb_lock);
956 ret = rpc_add_pipe_dir_object_locked(net, pdh, pdo);
957 mutex_unlock(&sn->pipefs_sb_lock);
958 }
959 return ret;
960}
961EXPORT_SYMBOL_GPL(rpc_add_pipe_dir_object);
962
963/**
964 * rpc_remove_pipe_dir_object - remove a rpc_pipe_dir_object from a directory
965 * @net: pointer to struct net
966 * @pdh: pointer to struct rpc_pipe_dir_head
967 * @pdo: pointer to struct rpc_pipe_dir_object
968 *
969 */
970void
971rpc_remove_pipe_dir_object(struct net *net,
972 struct rpc_pipe_dir_head *pdh,
973 struct rpc_pipe_dir_object *pdo)
974{
975 if (!list_empty(&pdo->pdo_head)) {
976 struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
977
978 mutex_lock(&sn->pipefs_sb_lock);
979 rpc_remove_pipe_dir_object_locked(net, pdh, pdo);
980 mutex_unlock(&sn->pipefs_sb_lock);
981 }
982}
983EXPORT_SYMBOL_GPL(rpc_remove_pipe_dir_object);
984
985/**
986 * rpc_find_or_alloc_pipe_dir_object
987 * @net: pointer to struct net
988 * @pdh: pointer to struct rpc_pipe_dir_head
989 * @match: match struct rpc_pipe_dir_object to data
990 * @alloc: allocate a new struct rpc_pipe_dir_object
991 * @data: user defined data for match() and alloc()
992 *
993 */
994struct rpc_pipe_dir_object *
995rpc_find_or_alloc_pipe_dir_object(struct net *net,
996 struct rpc_pipe_dir_head *pdh,
997 int (*match)(struct rpc_pipe_dir_object *, void *),
998 struct rpc_pipe_dir_object *(*alloc)(void *),
999 void *data)
1000{
1001 struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
1002 struct rpc_pipe_dir_object *pdo;
1003
1004 mutex_lock(&sn->pipefs_sb_lock);
1005 list_for_each_entry(pdo, &pdh->pdh_entries, pdo_head) {
1006 if (!match(pdo, data))
1007 continue;
1008 goto out;
1009 }
1010 pdo = alloc(data);
1011 if (!pdo)
1012 goto out;
1013 rpc_add_pipe_dir_object_locked(net, pdh, pdo);
1014out:
1015 mutex_unlock(&sn->pipefs_sb_lock);
1016 return pdo;
1017}
1018EXPORT_SYMBOL_GPL(rpc_find_or_alloc_pipe_dir_object);
1019
1020static void
1021rpc_create_pipe_dir_objects(struct rpc_pipe_dir_head *pdh)
1022{
1023 struct rpc_pipe_dir_object *pdo;
1024 struct dentry *dir = pdh->pdh_dentry;
1025
1026 list_for_each_entry(pdo, &pdh->pdh_entries, pdo_head)
1027 pdo->pdo_ops->create(dir, pdo);
1028}
1029
1030static void
1031rpc_destroy_pipe_dir_objects(struct rpc_pipe_dir_head *pdh)
1032{
1033 struct rpc_pipe_dir_object *pdo;
1034 struct dentry *dir = pdh->pdh_dentry;
1035
1036 list_for_each_entry(pdo, &pdh->pdh_entries, pdo_head)
1037 pdo->pdo_ops->destroy(dir, pdo);
1038}
1039
904enum { 1040enum {
905 RPCAUTH_info, 1041 RPCAUTH_info,
906 RPCAUTH_EOF 1042 RPCAUTH_EOF
@@ -941,16 +1077,29 @@ struct dentry *rpc_create_client_dir(struct dentry *dentry,
941 const char *name, 1077 const char *name,
942 struct rpc_clnt *rpc_client) 1078 struct rpc_clnt *rpc_client)
943{ 1079{
944 return rpc_mkdir_populate(dentry, name, S_IRUGO | S_IXUGO, NULL, 1080 struct dentry *ret;
1081
1082 ret = rpc_mkdir_populate(dentry, name, S_IRUGO | S_IXUGO, NULL,
945 rpc_clntdir_populate, rpc_client); 1083 rpc_clntdir_populate, rpc_client);
1084 if (!IS_ERR(ret)) {
1085 rpc_client->cl_pipedir_objects.pdh_dentry = ret;
1086 rpc_create_pipe_dir_objects(&rpc_client->cl_pipedir_objects);
1087 }
1088 return ret;
946} 1089}
947 1090
948/** 1091/**
949 * rpc_remove_client_dir - Remove a directory created with rpc_create_client_dir() 1092 * rpc_remove_client_dir - Remove a directory created with rpc_create_client_dir()
950 * @dentry: dentry for the pipe 1093 * @rpc_client: rpc_client for the pipe
951 */ 1094 */
952int rpc_remove_client_dir(struct dentry *dentry) 1095int rpc_remove_client_dir(struct rpc_clnt *rpc_client)
953{ 1096{
1097 struct dentry *dentry = rpc_client->cl_pipedir_objects.pdh_dentry;
1098
1099 if (dentry == NULL)
1100 return 0;
1101 rpc_destroy_pipe_dir_objects(&rpc_client->cl_pipedir_objects);
1102 rpc_client->cl_pipedir_objects.pdh_dentry = NULL;
954 return rpc_rmdir_depopulate(dentry, rpc_clntdir_depopulate); 1103 return rpc_rmdir_depopulate(dentry, rpc_clntdir_depopulate);
955} 1104}
956 1105
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index 93a7a4e94d80..ff3cc4bf4b24 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -258,7 +258,7 @@ static int rpc_wait_bit_killable(void *word)
258 return 0; 258 return 0;
259} 259}
260 260
261#ifdef RPC_DEBUG 261#if defined(RPC_DEBUG) || defined(RPC_TRACEPOINTS)
262static void rpc_task_set_debuginfo(struct rpc_task *task) 262static void rpc_task_set_debuginfo(struct rpc_task *task)
263{ 263{
264 static atomic_t rpc_pid; 264 static atomic_t rpc_pid;
diff --git a/net/sunrpc/stats.c b/net/sunrpc/stats.c
index 21b75cb08c03..54530490944e 100644
--- a/net/sunrpc/stats.c
+++ b/net/sunrpc/stats.c
@@ -188,7 +188,7 @@ void rpc_print_iostats(struct seq_file *seq, struct rpc_clnt *clnt)
188 188
189 seq_printf(seq, "\tRPC iostats version: %s ", RPC_IOSTATS_VERS); 189 seq_printf(seq, "\tRPC iostats version: %s ", RPC_IOSTATS_VERS);
190 seq_printf(seq, "p/v: %u/%u (%s)\n", 190 seq_printf(seq, "p/v: %u/%u (%s)\n",
191 clnt->cl_prog, clnt->cl_vers, clnt->cl_protname); 191 clnt->cl_prog, clnt->cl_vers, clnt->cl_program->name);
192 192
193 rcu_read_lock(); 193 rcu_read_lock();
194 xprt = rcu_dereference(clnt->cl_xprt); 194 xprt = rcu_dereference(clnt->cl_xprt);
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 7762b9f8a8b7..9c9caaa5e0d3 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -442,7 +442,7 @@ static void svc_tcp_write_space(struct sock *sk)
442{ 442{
443 struct socket *sock = sk->sk_socket; 443 struct socket *sock = sk->sk_socket;
444 444
445 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk) && sock) 445 if (sk_stream_is_writeable(sk) && sock)
446 clear_bit(SOCK_NOSPACE, &sock->flags); 446 clear_bit(SOCK_NOSPACE, &sock->flags);
447 svc_write_space(sk); 447 svc_write_space(sk);
448} 448}
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index ddf0602603bd..ee03d35677d9 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -47,6 +47,8 @@
47#include <net/udp.h> 47#include <net/udp.h>
48#include <net/tcp.h> 48#include <net/tcp.h>
49 49
50#include <trace/events/sunrpc.h>
51
50#include "sunrpc.h" 52#include "sunrpc.h"
51 53
52static void xs_close(struct rpc_xprt *xprt); 54static void xs_close(struct rpc_xprt *xprt);
@@ -665,8 +667,10 @@ static void xs_tcp_shutdown(struct rpc_xprt *xprt)
665 struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); 667 struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
666 struct socket *sock = transport->sock; 668 struct socket *sock = transport->sock;
667 669
668 if (sock != NULL) 670 if (sock != NULL) {
669 kernel_sock_shutdown(sock, SHUT_WR); 671 kernel_sock_shutdown(sock, SHUT_WR);
672 trace_rpc_socket_shutdown(xprt, sock);
673 }
670} 674}
671 675
672/** 676/**
@@ -811,6 +815,7 @@ static void xs_reset_transport(struct sock_xprt *transport)
811 815
812 sk->sk_no_check = 0; 816 sk->sk_no_check = 0;
813 817
818 trace_rpc_socket_close(&transport->xprt, sock);
814 sock_release(sock); 819 sock_release(sock);
815} 820}
816 821
@@ -1492,6 +1497,7 @@ static void xs_tcp_state_change(struct sock *sk)
1492 sock_flag(sk, SOCK_ZAPPED), 1497 sock_flag(sk, SOCK_ZAPPED),
1493 sk->sk_shutdown); 1498 sk->sk_shutdown);
1494 1499
1500 trace_rpc_socket_state_change(xprt, sk->sk_socket);
1495 switch (sk->sk_state) { 1501 switch (sk->sk_state) {
1496 case TCP_ESTABLISHED: 1502 case TCP_ESTABLISHED:
1497 spin_lock(&xprt->transport_lock); 1503 spin_lock(&xprt->transport_lock);
@@ -1602,7 +1608,7 @@ static void xs_tcp_write_space(struct sock *sk)
1602 read_lock_bh(&sk->sk_callback_lock); 1608 read_lock_bh(&sk->sk_callback_lock);
1603 1609
1604 /* from net/core/stream.c:sk_stream_write_space */ 1610 /* from net/core/stream.c:sk_stream_write_space */
1605 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) 1611 if (sk_stream_is_writeable(sk))
1606 xs_write_space(sk); 1612 xs_write_space(sk);
1607 1613
1608 read_unlock_bh(&sk->sk_callback_lock); 1614 read_unlock_bh(&sk->sk_callback_lock);
@@ -1896,6 +1902,7 @@ static int xs_local_setup_socket(struct sock_xprt *transport)
1896 xprt, xprt->address_strings[RPC_DISPLAY_ADDR]); 1902 xprt, xprt->address_strings[RPC_DISPLAY_ADDR]);
1897 1903
1898 status = xs_local_finish_connecting(xprt, sock); 1904 status = xs_local_finish_connecting(xprt, sock);
1905 trace_rpc_socket_connect(xprt, sock, status);
1899 switch (status) { 1906 switch (status) {
1900 case 0: 1907 case 0:
1901 dprintk("RPC: xprt %p connected to %s\n", 1908 dprintk("RPC: xprt %p connected to %s\n",
@@ -2039,6 +2046,7 @@ static void xs_udp_setup_socket(struct work_struct *work)
2039 xprt->address_strings[RPC_DISPLAY_PORT]); 2046 xprt->address_strings[RPC_DISPLAY_PORT]);
2040 2047
2041 xs_udp_finish_connecting(xprt, sock); 2048 xs_udp_finish_connecting(xprt, sock);
2049 trace_rpc_socket_connect(xprt, sock, 0);
2042 status = 0; 2050 status = 0;
2043out: 2051out:
2044 xprt_clear_connecting(xprt); 2052 xprt_clear_connecting(xprt);
@@ -2064,6 +2072,8 @@ static void xs_abort_connection(struct sock_xprt *transport)
2064 memset(&any, 0, sizeof(any)); 2072 memset(&any, 0, sizeof(any));
2065 any.sa_family = AF_UNSPEC; 2073 any.sa_family = AF_UNSPEC;
2066 result = kernel_connect(transport->sock, &any, sizeof(any), 0); 2074 result = kernel_connect(transport->sock, &any, sizeof(any), 0);
2075 trace_rpc_socket_reset_connection(&transport->xprt,
2076 transport->sock, result);
2067 if (!result) 2077 if (!result)
2068 xs_sock_reset_connection_flags(&transport->xprt); 2078 xs_sock_reset_connection_flags(&transport->xprt);
2069 dprintk("RPC: AF_UNSPEC connect return code %d\n", result); 2079 dprintk("RPC: AF_UNSPEC connect return code %d\n", result);
@@ -2194,6 +2204,7 @@ static void xs_tcp_setup_socket(struct work_struct *work)
2194 xprt->address_strings[RPC_DISPLAY_PORT]); 2204 xprt->address_strings[RPC_DISPLAY_PORT]);
2195 2205
2196 status = xs_tcp_finish_connecting(xprt, sock); 2206 status = xs_tcp_finish_connecting(xprt, sock);
2207 trace_rpc_socket_connect(xprt, sock, status);
2197 dprintk("RPC: %p connect status %d connected %d sock state %d\n", 2208 dprintk("RPC: %p connect status %d connected %d sock state %d\n",
2198 xprt, -status, xprt_connected(xprt), 2209 xprt, -status, xprt_connected(xprt),
2199 sock->sk->sk_state); 2210 sock->sk->sk_state);
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index c4ce243824bb..86de99ad2976 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -1479,7 +1479,8 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
1479 MAX_SKB_FRAGS * PAGE_SIZE); 1479 MAX_SKB_FRAGS * PAGE_SIZE);
1480 1480
1481 skb = sock_alloc_send_pskb(sk, len - data_len, data_len, 1481 skb = sock_alloc_send_pskb(sk, len - data_len, data_len,
1482 msg->msg_flags & MSG_DONTWAIT, &err); 1482 msg->msg_flags & MSG_DONTWAIT, &err,
1483 PAGE_ALLOC_COSTLY_ORDER);
1483 if (skb == NULL) 1484 if (skb == NULL)
1484 goto out; 1485 goto out;
1485 1486
@@ -1596,6 +1597,10 @@ out:
1596 return err; 1597 return err;
1597} 1598}
1598 1599
1600/* We use paged skbs for stream sockets, and limit occupancy to 32768
1601 * bytes, and a minimun of a full page.
1602 */
1603#define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768))
1599 1604
1600static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock, 1605static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
1601 struct msghdr *msg, size_t len) 1606 struct msghdr *msg, size_t len)
@@ -1609,6 +1614,7 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
1609 struct scm_cookie tmp_scm; 1614 struct scm_cookie tmp_scm;
1610 bool fds_sent = false; 1615 bool fds_sent = false;
1611 int max_level; 1616 int max_level;
1617 int data_len;
1612 1618
1613 if (NULL == siocb->scm) 1619 if (NULL == siocb->scm)
1614 siocb->scm = &tmp_scm; 1620 siocb->scm = &tmp_scm;
@@ -1635,40 +1641,22 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
1635 goto pipe_err; 1641 goto pipe_err;
1636 1642
1637 while (sent < len) { 1643 while (sent < len) {
1638 /* 1644 size = len - sent;
1639 * Optimisation for the fact that under 0.01% of X
1640 * messages typically need breaking up.
1641 */
1642
1643 size = len-sent;
1644 1645
1645 /* Keep two messages in the pipe so it schedules better */ 1646 /* Keep two messages in the pipe so it schedules better */
1646 if (size > ((sk->sk_sndbuf >> 1) - 64)) 1647 size = min_t(int, size, (sk->sk_sndbuf >> 1) - 64);
1647 size = (sk->sk_sndbuf >> 1) - 64;
1648 1648
1649 if (size > SKB_MAX_ALLOC) 1649 /* allow fallback to order-0 allocations */
1650 size = SKB_MAX_ALLOC; 1650 size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ);
1651
1652 /*
1653 * Grab a buffer
1654 */
1655 1651
1656 skb = sock_alloc_send_skb(sk, size, msg->msg_flags&MSG_DONTWAIT, 1652 data_len = max_t(int, 0, size - SKB_MAX_HEAD(0));
1657 &err);
1658 1653
1659 if (skb == NULL) 1654 skb = sock_alloc_send_pskb(sk, size - data_len, data_len,
1655 msg->msg_flags & MSG_DONTWAIT, &err,
1656 get_order(UNIX_SKB_FRAGS_SZ));
1657 if (!skb)
1660 goto out_err; 1658 goto out_err;
1661 1659
1662 /*
1663 * If you pass two values to the sock_alloc_send_skb
1664 * it tries to grab the large buffer with GFP_NOFS
1665 * (which can fail easily), and if it fails grab the
1666 * fallback size buffer which is under a page and will
1667 * succeed. [Alan]
1668 */
1669 size = min_t(int, size, skb_tailroom(skb));
1670
1671
1672 /* Only send the fds in the first buffer */ 1660 /* Only send the fds in the first buffer */
1673 err = unix_scm_to_skb(siocb->scm, skb, !fds_sent); 1661 err = unix_scm_to_skb(siocb->scm, skb, !fds_sent);
1674 if (err < 0) { 1662 if (err < 0) {
@@ -1678,7 +1666,11 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
1678 max_level = err + 1; 1666 max_level = err + 1;
1679 fds_sent = true; 1667 fds_sent = true;
1680 1668
1681 err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size); 1669 skb_put(skb, size - data_len);
1670 skb->data_len = data_len;
1671 skb->len = size;
1672 err = skb_copy_datagram_from_iovec(skb, 0, msg->msg_iov,
1673 sent, size);
1682 if (err) { 1674 if (err) {
1683 kfree_skb(skb); 1675 kfree_skb(skb);
1684 goto out_err; 1676 goto out_err;
@@ -1890,6 +1882,11 @@ static long unix_stream_data_wait(struct sock *sk, long timeo,
1890 return timeo; 1882 return timeo;
1891} 1883}
1892 1884
1885static unsigned int unix_skb_len(const struct sk_buff *skb)
1886{
1887 return skb->len - UNIXCB(skb).consumed;
1888}
1889
1893static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock, 1890static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
1894 struct msghdr *msg, size_t size, 1891 struct msghdr *msg, size_t size,
1895 int flags) 1892 int flags)
@@ -1977,8 +1974,8 @@ again:
1977 } 1974 }
1978 1975
1979 skip = sk_peek_offset(sk, flags); 1976 skip = sk_peek_offset(sk, flags);
1980 while (skip >= skb->len) { 1977 while (skip >= unix_skb_len(skb)) {
1981 skip -= skb->len; 1978 skip -= unix_skb_len(skb);
1982 last = skb; 1979 last = skb;
1983 skb = skb_peek_next(skb, &sk->sk_receive_queue); 1980 skb = skb_peek_next(skb, &sk->sk_receive_queue);
1984 if (!skb) 1981 if (!skb)
@@ -2005,8 +2002,9 @@ again:
2005 sunaddr = NULL; 2002 sunaddr = NULL;
2006 } 2003 }
2007 2004
2008 chunk = min_t(unsigned int, skb->len - skip, size); 2005 chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size);
2009 if (memcpy_toiovec(msg->msg_iov, skb->data + skip, chunk)) { 2006 if (skb_copy_datagram_iovec(skb, UNIXCB(skb).consumed + skip,
2007 msg->msg_iov, chunk)) {
2010 if (copied == 0) 2008 if (copied == 0)
2011 copied = -EFAULT; 2009 copied = -EFAULT;
2012 break; 2010 break;
@@ -2016,14 +2014,14 @@ again:
2016 2014
2017 /* Mark read part of skb as used */ 2015 /* Mark read part of skb as used */
2018 if (!(flags & MSG_PEEK)) { 2016 if (!(flags & MSG_PEEK)) {
2019 skb_pull(skb, chunk); 2017 UNIXCB(skb).consumed += chunk;
2020 2018
2021 sk_peek_offset_bwd(sk, chunk); 2019 sk_peek_offset_bwd(sk, chunk);
2022 2020
2023 if (UNIXCB(skb).fp) 2021 if (UNIXCB(skb).fp)
2024 unix_detach_fds(siocb->scm, skb); 2022 unix_detach_fds(siocb->scm, skb);
2025 2023
2026 if (skb->len) 2024 if (unix_skb_len(skb))
2027 break; 2025 break;
2028 2026
2029 skb_unlink(skb, &sk->sk_receive_queue); 2027 skb_unlink(skb, &sk->sk_receive_queue);
@@ -2107,7 +2105,7 @@ long unix_inq_len(struct sock *sk)
2107 if (sk->sk_type == SOCK_STREAM || 2105 if (sk->sk_type == SOCK_STREAM ||
2108 sk->sk_type == SOCK_SEQPACKET) { 2106 sk->sk_type == SOCK_SEQPACKET) {
2109 skb_queue_walk(&sk->sk_receive_queue, skb) 2107 skb_queue_walk(&sk->sk_receive_queue, skb)
2110 amount += skb->len; 2108 amount += unix_skb_len(skb);
2111 } else { 2109 } else {
2112 skb = skb_peek(&sk->sk_receive_queue); 2110 skb = skb_peek(&sk->sk_receive_queue);
2113 if (skb) 2111 if (skb)
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index 4d9334683f84..545c08b8a1d4 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -96,8 +96,7 @@
96#include <linux/wait.h> 96#include <linux/wait.h>
97#include <linux/workqueue.h> 97#include <linux/workqueue.h>
98#include <net/sock.h> 98#include <net/sock.h>
99 99#include <net/af_vsock.h>
100#include "af_vsock.h"
101 100
102static int __vsock_bind(struct sock *sk, struct sockaddr_vm *addr); 101static int __vsock_bind(struct sock *sk, struct sockaddr_vm *addr);
103static void vsock_sk_destruct(struct sock *sk); 102static void vsock_sk_destruct(struct sock *sk);
diff --git a/net/vmw_vsock/af_vsock.h b/net/vmw_vsock/af_vsock.h
deleted file mode 100644
index 7d64d3609ec9..000000000000
--- a/net/vmw_vsock/af_vsock.h
+++ /dev/null
@@ -1,175 +0,0 @@
1/*
2 * VMware vSockets Driver
3 *
4 * Copyright (C) 2007-2013 VMware, Inc. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by the Free
8 * Software Foundation version 2 and no later version.
9 *
10 * This program is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 */
15
16#ifndef __AF_VSOCK_H__
17#define __AF_VSOCK_H__
18
19#include <linux/kernel.h>
20#include <linux/workqueue.h>
21#include <linux/vm_sockets.h>
22
23#include "vsock_addr.h"
24
25#define LAST_RESERVED_PORT 1023
26
27#define vsock_sk(__sk) ((struct vsock_sock *)__sk)
28#define sk_vsock(__vsk) (&(__vsk)->sk)
29
30struct vsock_sock {
31 /* sk must be the first member. */
32 struct sock sk;
33 struct sockaddr_vm local_addr;
34 struct sockaddr_vm remote_addr;
35 /* Links for the global tables of bound and connected sockets. */
36 struct list_head bound_table;
37 struct list_head connected_table;
38 /* Accessed without the socket lock held. This means it can never be
39 * modified outsided of socket create or destruct.
40 */
41 bool trusted;
42 bool cached_peer_allow_dgram; /* Dgram communication allowed to
43 * cached peer?
44 */
45 u32 cached_peer; /* Context ID of last dgram destination check. */
46 const struct cred *owner;
47 /* Rest are SOCK_STREAM only. */
48 long connect_timeout;
49 /* Listening socket that this came from. */
50 struct sock *listener;
51 /* Used for pending list and accept queue during connection handshake.
52 * The listening socket is the head for both lists. Sockets created
53 * for connection requests are placed in the pending list until they
54 * are connected, at which point they are put in the accept queue list
55 * so they can be accepted in accept(). If accept() cannot accept the
56 * connection, it is marked as rejected so the cleanup function knows
57 * to clean up the socket.
58 */
59 struct list_head pending_links;
60 struct list_head accept_queue;
61 bool rejected;
62 struct delayed_work dwork;
63 u32 peer_shutdown;
64 bool sent_request;
65 bool ignore_connecting_rst;
66
67 /* Private to transport. */
68 void *trans;
69};
70
71s64 vsock_stream_has_data(struct vsock_sock *vsk);
72s64 vsock_stream_has_space(struct vsock_sock *vsk);
73void vsock_pending_work(struct work_struct *work);
74struct sock *__vsock_create(struct net *net,
75 struct socket *sock,
76 struct sock *parent,
77 gfp_t priority, unsigned short type);
78
79/**** TRANSPORT ****/
80
81struct vsock_transport_recv_notify_data {
82 u64 data1; /* Transport-defined. */
83 u64 data2; /* Transport-defined. */
84 bool notify_on_block;
85};
86
87struct vsock_transport_send_notify_data {
88 u64 data1; /* Transport-defined. */
89 u64 data2; /* Transport-defined. */
90};
91
92struct vsock_transport {
93 /* Initialize/tear-down socket. */
94 int (*init)(struct vsock_sock *, struct vsock_sock *);
95 void (*destruct)(struct vsock_sock *);
96 void (*release)(struct vsock_sock *);
97
98 /* Connections. */
99 int (*connect)(struct vsock_sock *);
100
101 /* DGRAM. */
102 int (*dgram_bind)(struct vsock_sock *, struct sockaddr_vm *);
103 int (*dgram_dequeue)(struct kiocb *kiocb, struct vsock_sock *vsk,
104 struct msghdr *msg, size_t len, int flags);
105 int (*dgram_enqueue)(struct vsock_sock *, struct sockaddr_vm *,
106 struct iovec *, size_t len);
107 bool (*dgram_allow)(u32 cid, u32 port);
108
109 /* STREAM. */
110 /* TODO: stream_bind() */
111 ssize_t (*stream_dequeue)(struct vsock_sock *, struct iovec *,
112 size_t len, int flags);
113 ssize_t (*stream_enqueue)(struct vsock_sock *, struct iovec *,
114 size_t len);
115 s64 (*stream_has_data)(struct vsock_sock *);
116 s64 (*stream_has_space)(struct vsock_sock *);
117 u64 (*stream_rcvhiwat)(struct vsock_sock *);
118 bool (*stream_is_active)(struct vsock_sock *);
119 bool (*stream_allow)(u32 cid, u32 port);
120
121 /* Notification. */
122 int (*notify_poll_in)(struct vsock_sock *, size_t, bool *);
123 int (*notify_poll_out)(struct vsock_sock *, size_t, bool *);
124 int (*notify_recv_init)(struct vsock_sock *, size_t,
125 struct vsock_transport_recv_notify_data *);
126 int (*notify_recv_pre_block)(struct vsock_sock *, size_t,
127 struct vsock_transport_recv_notify_data *);
128 int (*notify_recv_pre_dequeue)(struct vsock_sock *, size_t,
129 struct vsock_transport_recv_notify_data *);
130 int (*notify_recv_post_dequeue)(struct vsock_sock *, size_t,
131 ssize_t, bool, struct vsock_transport_recv_notify_data *);
132 int (*notify_send_init)(struct vsock_sock *,
133 struct vsock_transport_send_notify_data *);
134 int (*notify_send_pre_block)(struct vsock_sock *,
135 struct vsock_transport_send_notify_data *);
136 int (*notify_send_pre_enqueue)(struct vsock_sock *,
137 struct vsock_transport_send_notify_data *);
138 int (*notify_send_post_enqueue)(struct vsock_sock *, ssize_t,
139 struct vsock_transport_send_notify_data *);
140
141 /* Shutdown. */
142 int (*shutdown)(struct vsock_sock *, int);
143
144 /* Buffer sizes. */
145 void (*set_buffer_size)(struct vsock_sock *, u64);
146 void (*set_min_buffer_size)(struct vsock_sock *, u64);
147 void (*set_max_buffer_size)(struct vsock_sock *, u64);
148 u64 (*get_buffer_size)(struct vsock_sock *);
149 u64 (*get_min_buffer_size)(struct vsock_sock *);
150 u64 (*get_max_buffer_size)(struct vsock_sock *);
151
152 /* Addressing. */
153 u32 (*get_local_cid)(void);
154};
155
156/**** CORE ****/
157
158int vsock_core_init(const struct vsock_transport *t);
159void vsock_core_exit(void);
160
161/**** UTILS ****/
162
163void vsock_release_pending(struct sock *pending);
164void vsock_add_pending(struct sock *listener, struct sock *pending);
165void vsock_remove_pending(struct sock *listener, struct sock *pending);
166void vsock_enqueue_accept(struct sock *listener, struct sock *connected);
167void vsock_insert_connected(struct vsock_sock *vsk);
168void vsock_remove_bound(struct vsock_sock *vsk);
169void vsock_remove_connected(struct vsock_sock *vsk);
170struct sock *vsock_find_bound_socket(struct sockaddr_vm *addr);
171struct sock *vsock_find_connected_socket(struct sockaddr_vm *src,
172 struct sockaddr_vm *dst);
173void vsock_for_each_connected_socket(void (*fn)(struct sock *sk));
174
175#endif /* __AF_VSOCK_H__ */
diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c
index ffc11df02af2..9d6986634e0b 100644
--- a/net/vmw_vsock/vmci_transport.c
+++ b/net/vmw_vsock/vmci_transport.c
@@ -34,8 +34,8 @@
34#include <linux/wait.h> 34#include <linux/wait.h>
35#include <linux/workqueue.h> 35#include <linux/workqueue.h>
36#include <net/sock.h> 36#include <net/sock.h>
37#include <net/af_vsock.h>
37 38
38#include "af_vsock.h"
39#include "vmci_transport_notify.h" 39#include "vmci_transport_notify.h"
40 40
41static int vmci_transport_recv_dgram_cb(void *data, struct vmci_datagram *dg); 41static int vmci_transport_recv_dgram_cb(void *data, struct vmci_datagram *dg);
diff --git a/net/vmw_vsock/vmci_transport.h b/net/vmw_vsock/vmci_transport.h
index fd88ea8924e4..ce6c9623d5f0 100644
--- a/net/vmw_vsock/vmci_transport.h
+++ b/net/vmw_vsock/vmci_transport.h
@@ -19,8 +19,8 @@
19#include <linux/vmw_vmci_defs.h> 19#include <linux/vmw_vmci_defs.h>
20#include <linux/vmw_vmci_api.h> 20#include <linux/vmw_vmci_api.h>
21 21
22#include "vsock_addr.h" 22#include <net/vsock_addr.h>
23#include "af_vsock.h" 23#include <net/af_vsock.h>
24 24
25/* If the packet format changes in a release then this should change too. */ 25/* If the packet format changes in a release then this should change too. */
26#define VMCI_TRANSPORT_PACKET_VERSION 1 26#define VMCI_TRANSPORT_PACKET_VERSION 1
diff --git a/net/vmw_vsock/vsock_addr.c b/net/vmw_vsock/vsock_addr.c
index ec2611b4ea0e..82486ee55eac 100644
--- a/net/vmw_vsock/vsock_addr.c
+++ b/net/vmw_vsock/vsock_addr.c
@@ -17,8 +17,7 @@
17#include <linux/socket.h> 17#include <linux/socket.h>
18#include <linux/stddef.h> 18#include <linux/stddef.h>
19#include <net/sock.h> 19#include <net/sock.h>
20 20#include <net/vsock_addr.h>
21#include "vsock_addr.h"
22 21
23void vsock_addr_init(struct sockaddr_vm *addr, u32 cid, u32 port) 22void vsock_addr_init(struct sockaddr_vm *addr, u32 cid, u32 port)
24{ 23{
diff --git a/net/vmw_vsock/vsock_addr.h b/net/vmw_vsock/vsock_addr.h
deleted file mode 100644
index 9ccd5316eac0..000000000000
--- a/net/vmw_vsock/vsock_addr.h
+++ /dev/null
@@ -1,30 +0,0 @@
1/*
2 * VMware vSockets Driver
3 *
4 * Copyright (C) 2007-2013 VMware, Inc. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by the Free
8 * Software Foundation version 2 and no later version.
9 *
10 * This program is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 */
15
16#ifndef _VSOCK_ADDR_H_
17#define _VSOCK_ADDR_H_
18
19#include <linux/vm_sockets.h>
20
21void vsock_addr_init(struct sockaddr_vm *addr, u32 cid, u32 port);
22int vsock_addr_validate(const struct sockaddr_vm *addr);
23bool vsock_addr_bound(const struct sockaddr_vm *addr);
24void vsock_addr_unbind(struct sockaddr_vm *addr);
25bool vsock_addr_equals_addr(const struct sockaddr_vm *addr,
26 const struct sockaddr_vm *other);
27int vsock_addr_cast(const struct sockaddr *addr, size_t len,
28 struct sockaddr_vm **out_addr);
29
30#endif
diff --git a/net/wireless/core.c b/net/wireless/core.c
index a8c29fa4f1b3..67153964aad2 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -462,6 +462,14 @@ int wiphy_register(struct wiphy *wiphy)
462 return -EINVAL; 462 return -EINVAL;
463#endif 463#endif
464 464
465 if (WARN_ON(wiphy->coalesce &&
466 (!wiphy->coalesce->n_rules ||
467 !wiphy->coalesce->n_patterns) &&
468 (!wiphy->coalesce->pattern_min_len ||
469 wiphy->coalesce->pattern_min_len >
470 wiphy->coalesce->pattern_max_len)))
471 return -EINVAL;
472
465 if (WARN_ON(wiphy->ap_sme_capa && 473 if (WARN_ON(wiphy->ap_sme_capa &&
466 !(wiphy->flags & WIPHY_FLAG_HAVE_AP_SME))) 474 !(wiphy->flags & WIPHY_FLAG_HAVE_AP_SME)))
467 return -EINVAL; 475 return -EINVAL;
@@ -668,6 +676,7 @@ void wiphy_unregister(struct wiphy *wiphy)
668 rdev_set_wakeup(rdev, false); 676 rdev_set_wakeup(rdev, false);
669#endif 677#endif
670 cfg80211_rdev_free_wowlan(rdev); 678 cfg80211_rdev_free_wowlan(rdev);
679 cfg80211_rdev_free_coalesce(rdev);
671} 680}
672EXPORT_SYMBOL(wiphy_unregister); 681EXPORT_SYMBOL(wiphy_unregister);
673 682
diff --git a/net/wireless/core.h b/net/wireless/core.h
index a6b45bf00f33..9ad43c619c54 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -79,6 +79,8 @@ struct cfg80211_registered_device {
79 /* netlink port which started critical protocol (0 means not started) */ 79 /* netlink port which started critical protocol (0 means not started) */
80 u32 crit_proto_nlportid; 80 u32 crit_proto_nlportid;
81 81
82 struct cfg80211_coalesce *coalesce;
83
82 /* must be last because of the way we do wiphy_priv(), 84 /* must be last because of the way we do wiphy_priv(),
83 * and it should at least be aligned to NETDEV_ALIGN */ 85 * and it should at least be aligned to NETDEV_ALIGN */
84 struct wiphy wiphy __aligned(NETDEV_ALIGN); 86 struct wiphy wiphy __aligned(NETDEV_ALIGN);
diff --git a/net/wireless/mesh.c b/net/wireless/mesh.c
index 30c49202ee4d..0553fd4d85ae 100644
--- a/net/wireless/mesh.c
+++ b/net/wireless/mesh.c
@@ -167,9 +167,12 @@ int __cfg80211_join_mesh(struct cfg80211_registered_device *rdev,
167 * basic rates 167 * basic rates
168 */ 168 */
169 if (!setup->basic_rates) { 169 if (!setup->basic_rates) {
170 enum nl80211_bss_scan_width scan_width;
170 struct ieee80211_supported_band *sband = 171 struct ieee80211_supported_band *sband =
171 rdev->wiphy.bands[setup->chandef.chan->band]; 172 rdev->wiphy.bands[setup->chandef.chan->band];
172 setup->basic_rates = ieee80211_mandatory_rates(sband); 173 scan_width = cfg80211_chandef_to_scan_width(&setup->chandef);
174 setup->basic_rates = ieee80211_mandatory_rates(sband,
175 scan_width);
173 } 176 }
174 177
175 if (!cfg80211_reg_can_beacon(&rdev->wiphy, &setup->chandef)) 178 if (!cfg80211_reg_can_beacon(&rdev->wiphy, &setup->chandef))
diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c
index bfac5e186f57..8d49c1ce3dea 100644
--- a/net/wireless/mlme.c
+++ b/net/wireless/mlme.c
@@ -621,7 +621,7 @@ int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev,
621} 621}
622 622
623bool cfg80211_rx_mgmt(struct wireless_dev *wdev, int freq, int sig_mbm, 623bool cfg80211_rx_mgmt(struct wireless_dev *wdev, int freq, int sig_mbm,
624 const u8 *buf, size_t len, gfp_t gfp) 624 const u8 *buf, size_t len, u32 flags, gfp_t gfp)
625{ 625{
626 struct wiphy *wiphy = wdev->wiphy; 626 struct wiphy *wiphy = wdev->wiphy;
627 struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); 627 struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
@@ -664,7 +664,7 @@ bool cfg80211_rx_mgmt(struct wireless_dev *wdev, int freq, int sig_mbm,
664 /* Indicate the received Action frame to user space */ 664 /* Indicate the received Action frame to user space */
665 if (nl80211_send_mgmt(rdev, wdev, reg->nlportid, 665 if (nl80211_send_mgmt(rdev, wdev, reg->nlportid,
666 freq, sig_mbm, 666 freq, sig_mbm,
667 buf, len, gfp)) 667 buf, len, flags, gfp))
668 continue; 668 continue;
669 669
670 result = true; 670 result = true;
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 5f6e982cdcf4..af8d84a4a5b2 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -349,6 +349,11 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = {
349 [NL80211_ATTR_IE_RIC] = { .type = NLA_BINARY, 349 [NL80211_ATTR_IE_RIC] = { .type = NLA_BINARY,
350 .len = IEEE80211_MAX_DATA_LEN }, 350 .len = IEEE80211_MAX_DATA_LEN },
351 [NL80211_ATTR_PEER_AID] = { .type = NLA_U16 }, 351 [NL80211_ATTR_PEER_AID] = { .type = NLA_U16 },
352 [NL80211_ATTR_CH_SWITCH_COUNT] = { .type = NLA_U32 },
353 [NL80211_ATTR_CH_SWITCH_BLOCK_TX] = { .type = NLA_FLAG },
354 [NL80211_ATTR_CSA_IES] = { .type = NLA_NESTED },
355 [NL80211_ATTR_CSA_C_OFF_BEACON] = { .type = NLA_U16 },
356 [NL80211_ATTR_CSA_C_OFF_PRESP] = { .type = NLA_U16 },
352}; 357};
353 358
354/* policy for the key attributes */ 359/* policy for the key attributes */
@@ -403,6 +408,14 @@ nl80211_wowlan_tcp_policy[NUM_NL80211_WOWLAN_TCP] = {
403 [NL80211_WOWLAN_TCP_WAKE_MASK] = { .len = 1 }, 408 [NL80211_WOWLAN_TCP_WAKE_MASK] = { .len = 1 },
404}; 409};
405 410
411/* policy for coalesce rule attributes */
412static const struct nla_policy
413nl80211_coalesce_policy[NUM_NL80211_ATTR_COALESCE_RULE] = {
414 [NL80211_ATTR_COALESCE_RULE_DELAY] = { .type = NLA_U32 },
415 [NL80211_ATTR_COALESCE_RULE_CONDITION] = { .type = NLA_U32 },
416 [NL80211_ATTR_COALESCE_RULE_PKT_PATTERN] = { .type = NLA_NESTED },
417};
418
406/* policy for GTK rekey offload attributes */ 419/* policy for GTK rekey offload attributes */
407static const struct nla_policy 420static const struct nla_policy
408nl80211_rekey_policy[NUM_NL80211_REKEY_DATA] = { 421nl80211_rekey_policy[NUM_NL80211_REKEY_DATA] = {
@@ -976,7 +989,7 @@ static int nl80211_send_wowlan(struct sk_buff *msg,
976 return -ENOBUFS; 989 return -ENOBUFS;
977 990
978 if (dev->wiphy.wowlan->n_patterns) { 991 if (dev->wiphy.wowlan->n_patterns) {
979 struct nl80211_wowlan_pattern_support pat = { 992 struct nl80211_pattern_support pat = {
980 .max_patterns = dev->wiphy.wowlan->n_patterns, 993 .max_patterns = dev->wiphy.wowlan->n_patterns,
981 .min_pattern_len = dev->wiphy.wowlan->pattern_min_len, 994 .min_pattern_len = dev->wiphy.wowlan->pattern_min_len,
982 .max_pattern_len = dev->wiphy.wowlan->pattern_max_len, 995 .max_pattern_len = dev->wiphy.wowlan->pattern_max_len,
@@ -997,6 +1010,27 @@ static int nl80211_send_wowlan(struct sk_buff *msg,
997} 1010}
998#endif 1011#endif
999 1012
1013static int nl80211_send_coalesce(struct sk_buff *msg,
1014 struct cfg80211_registered_device *dev)
1015{
1016 struct nl80211_coalesce_rule_support rule;
1017
1018 if (!dev->wiphy.coalesce)
1019 return 0;
1020
1021 rule.max_rules = dev->wiphy.coalesce->n_rules;
1022 rule.max_delay = dev->wiphy.coalesce->max_delay;
1023 rule.pat.max_patterns = dev->wiphy.coalesce->n_patterns;
1024 rule.pat.min_pattern_len = dev->wiphy.coalesce->pattern_min_len;
1025 rule.pat.max_pattern_len = dev->wiphy.coalesce->pattern_max_len;
1026 rule.pat.max_pkt_offset = dev->wiphy.coalesce->max_pkt_offset;
1027
1028 if (nla_put(msg, NL80211_ATTR_COALESCE_RULE, sizeof(rule), &rule))
1029 return -ENOBUFS;
1030
1031 return 0;
1032}
1033
1000static int nl80211_send_band_rateinfo(struct sk_buff *msg, 1034static int nl80211_send_band_rateinfo(struct sk_buff *msg,
1001 struct ieee80211_supported_band *sband) 1035 struct ieee80211_supported_band *sband)
1002{ 1036{
@@ -1395,6 +1429,8 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *dev,
1395 if (state->split) { 1429 if (state->split) {
1396 CMD(crit_proto_start, CRIT_PROTOCOL_START); 1430 CMD(crit_proto_start, CRIT_PROTOCOL_START);
1397 CMD(crit_proto_stop, CRIT_PROTOCOL_STOP); 1431 CMD(crit_proto_stop, CRIT_PROTOCOL_STOP);
1432 if (dev->wiphy.flags & WIPHY_FLAG_HAS_CHANNEL_SWITCH)
1433 CMD(channel_switch, CHANNEL_SWITCH);
1398 } 1434 }
1399 1435
1400#ifdef CONFIG_NL80211_TESTMODE 1436#ifdef CONFIG_NL80211_TESTMODE
@@ -1515,6 +1551,12 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *dev,
1515 dev->wiphy.vht_capa_mod_mask)) 1551 dev->wiphy.vht_capa_mod_mask))
1516 goto nla_put_failure; 1552 goto nla_put_failure;
1517 1553
1554 state->split_start++;
1555 break;
1556 case 10:
1557 if (nl80211_send_coalesce(msg, dev))
1558 goto nla_put_failure;
1559
1518 /* done */ 1560 /* done */
1519 state->split_start = 0; 1561 state->split_start = 0;
1520 break; 1562 break;
@@ -5580,6 +5622,111 @@ static int nl80211_start_radar_detection(struct sk_buff *skb,
5580 return err; 5622 return err;
5581} 5623}
5582 5624
5625static int nl80211_channel_switch(struct sk_buff *skb, struct genl_info *info)
5626{
5627 struct cfg80211_registered_device *rdev = info->user_ptr[0];
5628 struct net_device *dev = info->user_ptr[1];
5629 struct wireless_dev *wdev = dev->ieee80211_ptr;
5630 struct cfg80211_csa_settings params;
5631 /* csa_attrs is defined static to avoid waste of stack size - this
5632 * function is called under RTNL lock, so this should not be a problem.
5633 */
5634 static struct nlattr *csa_attrs[NL80211_ATTR_MAX+1];
5635 u8 radar_detect_width = 0;
5636 int err;
5637
5638 if (!rdev->ops->channel_switch ||
5639 !(rdev->wiphy.flags & WIPHY_FLAG_HAS_CHANNEL_SWITCH))
5640 return -EOPNOTSUPP;
5641
5642 /* may add IBSS support later */
5643 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP &&
5644 dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO)
5645 return -EOPNOTSUPP;
5646
5647 memset(&params, 0, sizeof(params));
5648
5649 if (!info->attrs[NL80211_ATTR_WIPHY_FREQ] ||
5650 !info->attrs[NL80211_ATTR_CH_SWITCH_COUNT])
5651 return -EINVAL;
5652
5653 /* only important for AP, IBSS and mesh create IEs internally */
5654 if (!info->attrs[NL80211_ATTR_CSA_IES])
5655 return -EINVAL;
5656
5657 /* useless if AP is not running */
5658 if (!wdev->beacon_interval)
5659 return -EINVAL;
5660
5661 params.count = nla_get_u32(info->attrs[NL80211_ATTR_CH_SWITCH_COUNT]);
5662
5663 err = nl80211_parse_beacon(info->attrs, &params.beacon_after);
5664 if (err)
5665 return err;
5666
5667 err = nla_parse_nested(csa_attrs, NL80211_ATTR_MAX,
5668 info->attrs[NL80211_ATTR_CSA_IES],
5669 nl80211_policy);
5670 if (err)
5671 return err;
5672
5673 err = nl80211_parse_beacon(csa_attrs, &params.beacon_csa);
5674 if (err)
5675 return err;
5676
5677 if (!csa_attrs[NL80211_ATTR_CSA_C_OFF_BEACON])
5678 return -EINVAL;
5679
5680 params.counter_offset_beacon =
5681 nla_get_u16(csa_attrs[NL80211_ATTR_CSA_C_OFF_BEACON]);
5682 if (params.counter_offset_beacon >= params.beacon_csa.tail_len)
5683 return -EINVAL;
5684
5685 /* sanity check - counters should be the same */
5686 if (params.beacon_csa.tail[params.counter_offset_beacon] !=
5687 params.count)
5688 return -EINVAL;
5689
5690 if (csa_attrs[NL80211_ATTR_CSA_C_OFF_PRESP]) {
5691 params.counter_offset_presp =
5692 nla_get_u16(csa_attrs[NL80211_ATTR_CSA_C_OFF_PRESP]);
5693 if (params.counter_offset_presp >=
5694 params.beacon_csa.probe_resp_len)
5695 return -EINVAL;
5696
5697 if (params.beacon_csa.probe_resp[params.counter_offset_presp] !=
5698 params.count)
5699 return -EINVAL;
5700 }
5701
5702 err = nl80211_parse_chandef(rdev, info, &params.chandef);
5703 if (err)
5704 return err;
5705
5706 if (!cfg80211_reg_can_beacon(&rdev->wiphy, &params.chandef))
5707 return -EINVAL;
5708
5709 err = cfg80211_chandef_dfs_required(wdev->wiphy, &params.chandef);
5710 if (err < 0) {
5711 return err;
5712 } else if (err) {
5713 radar_detect_width = BIT(params.chandef.width);
5714 params.radar_required = true;
5715 }
5716
5717 err = cfg80211_can_use_iftype_chan(rdev, wdev, wdev->iftype,
5718 params.chandef.chan,
5719 CHAN_MODE_SHARED,
5720 radar_detect_width);
5721 if (err)
5722 return err;
5723
5724 if (info->attrs[NL80211_ATTR_CH_SWITCH_BLOCK_TX])
5725 params.block_tx = true;
5726
5727 return rdev_channel_switch(rdev, dev, &params);
5728}
5729
5583static int nl80211_send_bss(struct sk_buff *msg, struct netlink_callback *cb, 5730static int nl80211_send_bss(struct sk_buff *msg, struct netlink_callback *cb,
5584 u32 seq, int flags, 5731 u32 seq, int flags,
5585 struct cfg80211_registered_device *rdev, 5732 struct cfg80211_registered_device *rdev,
@@ -5641,6 +5788,7 @@ static int nl80211_send_bss(struct sk_buff *msg, struct netlink_callback *cb,
5641 goto nla_put_failure; 5788 goto nla_put_failure;
5642 if (nla_put_u16(msg, NL80211_BSS_CAPABILITY, res->capability) || 5789 if (nla_put_u16(msg, NL80211_BSS_CAPABILITY, res->capability) ||
5643 nla_put_u32(msg, NL80211_BSS_FREQUENCY, res->channel->center_freq) || 5790 nla_put_u32(msg, NL80211_BSS_FREQUENCY, res->channel->center_freq) ||
5791 nla_put_u32(msg, NL80211_BSS_CHAN_WIDTH, res->scan_width) ||
5644 nla_put_u32(msg, NL80211_BSS_SEEN_MS_AGO, 5792 nla_put_u32(msg, NL80211_BSS_SEEN_MS_AGO,
5645 jiffies_to_msecs(jiffies - intbss->ts))) 5793 jiffies_to_msecs(jiffies - intbss->ts)))
5646 goto nla_put_failure; 5794 goto nla_put_failure;
@@ -6321,6 +6469,8 @@ static int nl80211_join_ibss(struct sk_buff *skb, struct genl_info *info)
6321 return -EINVAL; 6469 return -EINVAL;
6322 6470
6323 switch (ibss.chandef.width) { 6471 switch (ibss.chandef.width) {
6472 case NL80211_CHAN_WIDTH_5:
6473 case NL80211_CHAN_WIDTH_10:
6324 case NL80211_CHAN_WIDTH_20_NOHT: 6474 case NL80211_CHAN_WIDTH_20_NOHT:
6325 break; 6475 break;
6326 case NL80211_CHAN_WIDTH_20: 6476 case NL80211_CHAN_WIDTH_20:
@@ -6348,6 +6498,19 @@ static int nl80211_join_ibss(struct sk_buff *skb, struct genl_info *info)
6348 return err; 6498 return err;
6349 } 6499 }
6350 6500
6501 if (info->attrs[NL80211_ATTR_HT_CAPABILITY_MASK])
6502 memcpy(&ibss.ht_capa_mask,
6503 nla_data(info->attrs[NL80211_ATTR_HT_CAPABILITY_MASK]),
6504 sizeof(ibss.ht_capa_mask));
6505
6506 if (info->attrs[NL80211_ATTR_HT_CAPABILITY]) {
6507 if (!info->attrs[NL80211_ATTR_HT_CAPABILITY_MASK])
6508 return -EINVAL;
6509 memcpy(&ibss.ht_capa,
6510 nla_data(info->attrs[NL80211_ATTR_HT_CAPABILITY]),
6511 sizeof(ibss.ht_capa));
6512 }
6513
6351 if (info->attrs[NL80211_ATTR_MCAST_RATE] && 6514 if (info->attrs[NL80211_ATTR_MCAST_RATE] &&
6352 !nl80211_parse_mcast_rate(rdev, ibss.mcast_rate, 6515 !nl80211_parse_mcast_rate(rdev, ibss.mcast_rate,
6353 nla_get_u32(info->attrs[NL80211_ATTR_MCAST_RATE]))) 6516 nla_get_u32(info->attrs[NL80211_ATTR_MCAST_RATE])))
@@ -6430,19 +6593,30 @@ static struct genl_multicast_group nl80211_testmode_mcgrp = {
6430static int nl80211_testmode_do(struct sk_buff *skb, struct genl_info *info) 6593static int nl80211_testmode_do(struct sk_buff *skb, struct genl_info *info)
6431{ 6594{
6432 struct cfg80211_registered_device *rdev = info->user_ptr[0]; 6595 struct cfg80211_registered_device *rdev = info->user_ptr[0];
6596 struct wireless_dev *wdev =
6597 __cfg80211_wdev_from_attrs(genl_info_net(info), info->attrs);
6433 int err; 6598 int err;
6434 6599
6600 if (!rdev->ops->testmode_cmd)
6601 return -EOPNOTSUPP;
6602
6603 if (IS_ERR(wdev)) {
6604 err = PTR_ERR(wdev);
6605 if (err != -EINVAL)
6606 return err;
6607 wdev = NULL;
6608 } else if (wdev->wiphy != &rdev->wiphy) {
6609 return -EINVAL;
6610 }
6611
6435 if (!info->attrs[NL80211_ATTR_TESTDATA]) 6612 if (!info->attrs[NL80211_ATTR_TESTDATA])
6436 return -EINVAL; 6613 return -EINVAL;
6437 6614
6438 err = -EOPNOTSUPP; 6615 rdev->testmode_info = info;
6439 if (rdev->ops->testmode_cmd) { 6616 err = rdev_testmode_cmd(rdev, wdev,
6440 rdev->testmode_info = info;
6441 err = rdev_testmode_cmd(rdev,
6442 nla_data(info->attrs[NL80211_ATTR_TESTDATA]), 6617 nla_data(info->attrs[NL80211_ATTR_TESTDATA]),
6443 nla_len(info->attrs[NL80211_ATTR_TESTDATA])); 6618 nla_len(info->attrs[NL80211_ATTR_TESTDATA]));
6444 rdev->testmode_info = NULL; 6619 rdev->testmode_info = NULL;
6445 }
6446 6620
6447 return err; 6621 return err;
6448} 6622}
@@ -7404,14 +7578,12 @@ static int nl80211_set_cqm_txe(struct genl_info *info,
7404 u32 rate, u32 pkts, u32 intvl) 7578 u32 rate, u32 pkts, u32 intvl)
7405{ 7579{
7406 struct cfg80211_registered_device *rdev = info->user_ptr[0]; 7580 struct cfg80211_registered_device *rdev = info->user_ptr[0];
7407 struct wireless_dev *wdev;
7408 struct net_device *dev = info->user_ptr[1]; 7581 struct net_device *dev = info->user_ptr[1];
7582 struct wireless_dev *wdev = dev->ieee80211_ptr;
7409 7583
7410 if (rate > 100 || intvl > NL80211_CQM_TXE_MAX_INTVL) 7584 if (rate > 100 || intvl > NL80211_CQM_TXE_MAX_INTVL)
7411 return -EINVAL; 7585 return -EINVAL;
7412 7586
7413 wdev = dev->ieee80211_ptr;
7414
7415 if (!rdev->ops->set_cqm_txe_config) 7587 if (!rdev->ops->set_cqm_txe_config)
7416 return -EOPNOTSUPP; 7588 return -EOPNOTSUPP;
7417 7589
@@ -7426,13 +7598,15 @@ static int nl80211_set_cqm_rssi(struct genl_info *info,
7426 s32 threshold, u32 hysteresis) 7598 s32 threshold, u32 hysteresis)
7427{ 7599{
7428 struct cfg80211_registered_device *rdev = info->user_ptr[0]; 7600 struct cfg80211_registered_device *rdev = info->user_ptr[0];
7429 struct wireless_dev *wdev;
7430 struct net_device *dev = info->user_ptr[1]; 7601 struct net_device *dev = info->user_ptr[1];
7602 struct wireless_dev *wdev = dev->ieee80211_ptr;
7431 7603
7432 if (threshold > 0) 7604 if (threshold > 0)
7433 return -EINVAL; 7605 return -EINVAL;
7434 7606
7435 wdev = dev->ieee80211_ptr; 7607 /* disabling - hysteresis should also be zero then */
7608 if (threshold == 0)
7609 hysteresis = 0;
7436 7610
7437 if (!rdev->ops->set_cqm_rssi_config) 7611 if (!rdev->ops->set_cqm_rssi_config)
7438 return -EOPNOTSUPP; 7612 return -EOPNOTSUPP;
@@ -7451,36 +7625,33 @@ static int nl80211_set_cqm(struct sk_buff *skb, struct genl_info *info)
7451 int err; 7625 int err;
7452 7626
7453 cqm = info->attrs[NL80211_ATTR_CQM]; 7627 cqm = info->attrs[NL80211_ATTR_CQM];
7454 if (!cqm) { 7628 if (!cqm)
7455 err = -EINVAL; 7629 return -EINVAL;
7456 goto out;
7457 }
7458 7630
7459 err = nla_parse_nested(attrs, NL80211_ATTR_CQM_MAX, cqm, 7631 err = nla_parse_nested(attrs, NL80211_ATTR_CQM_MAX, cqm,
7460 nl80211_attr_cqm_policy); 7632 nl80211_attr_cqm_policy);
7461 if (err) 7633 if (err)
7462 goto out; 7634 return err;
7463 7635
7464 if (attrs[NL80211_ATTR_CQM_RSSI_THOLD] && 7636 if (attrs[NL80211_ATTR_CQM_RSSI_THOLD] &&
7465 attrs[NL80211_ATTR_CQM_RSSI_HYST]) { 7637 attrs[NL80211_ATTR_CQM_RSSI_HYST]) {
7466 s32 threshold; 7638 s32 threshold = nla_get_s32(attrs[NL80211_ATTR_CQM_RSSI_THOLD]);
7467 u32 hysteresis; 7639 u32 hysteresis = nla_get_u32(attrs[NL80211_ATTR_CQM_RSSI_HYST]);
7468 threshold = nla_get_u32(attrs[NL80211_ATTR_CQM_RSSI_THOLD]);
7469 hysteresis = nla_get_u32(attrs[NL80211_ATTR_CQM_RSSI_HYST]);
7470 err = nl80211_set_cqm_rssi(info, threshold, hysteresis);
7471 } else if (attrs[NL80211_ATTR_CQM_TXE_RATE] &&
7472 attrs[NL80211_ATTR_CQM_TXE_PKTS] &&
7473 attrs[NL80211_ATTR_CQM_TXE_INTVL]) {
7474 u32 rate, pkts, intvl;
7475 rate = nla_get_u32(attrs[NL80211_ATTR_CQM_TXE_RATE]);
7476 pkts = nla_get_u32(attrs[NL80211_ATTR_CQM_TXE_PKTS]);
7477 intvl = nla_get_u32(attrs[NL80211_ATTR_CQM_TXE_INTVL]);
7478 err = nl80211_set_cqm_txe(info, rate, pkts, intvl);
7479 } else
7480 err = -EINVAL;
7481 7640
7482out: 7641 return nl80211_set_cqm_rssi(info, threshold, hysteresis);
7483 return err; 7642 }
7643
7644 if (attrs[NL80211_ATTR_CQM_TXE_RATE] &&
7645 attrs[NL80211_ATTR_CQM_TXE_PKTS] &&
7646 attrs[NL80211_ATTR_CQM_TXE_INTVL]) {
7647 u32 rate = nla_get_u32(attrs[NL80211_ATTR_CQM_TXE_RATE]);
7648 u32 pkts = nla_get_u32(attrs[NL80211_ATTR_CQM_TXE_PKTS]);
7649 u32 intvl = nla_get_u32(attrs[NL80211_ATTR_CQM_TXE_INTVL]);
7650
7651 return nl80211_set_cqm_txe(info, rate, pkts, intvl);
7652 }
7653
7654 return -EINVAL;
7484} 7655}
7485 7656
7486static int nl80211_join_mesh(struct sk_buff *skb, struct genl_info *info) 7657static int nl80211_join_mesh(struct sk_buff *skb, struct genl_info *info)
@@ -7596,12 +7767,11 @@ static int nl80211_send_wowlan_patterns(struct sk_buff *msg,
7596 if (!nl_pat) 7767 if (!nl_pat)
7597 return -ENOBUFS; 7768 return -ENOBUFS;
7598 pat_len = wowlan->patterns[i].pattern_len; 7769 pat_len = wowlan->patterns[i].pattern_len;
7599 if (nla_put(msg, NL80211_WOWLAN_PKTPAT_MASK, 7770 if (nla_put(msg, NL80211_PKTPAT_MASK, DIV_ROUND_UP(pat_len, 8),
7600 DIV_ROUND_UP(pat_len, 8),
7601 wowlan->patterns[i].mask) || 7771 wowlan->patterns[i].mask) ||
7602 nla_put(msg, NL80211_WOWLAN_PKTPAT_PATTERN, 7772 nla_put(msg, NL80211_PKTPAT_PATTERN, pat_len,
7603 pat_len, wowlan->patterns[i].pattern) || 7773 wowlan->patterns[i].pattern) ||
7604 nla_put_u32(msg, NL80211_WOWLAN_PKTPAT_OFFSET, 7774 nla_put_u32(msg, NL80211_PKTPAT_OFFSET,
7605 wowlan->patterns[i].pkt_offset)) 7775 wowlan->patterns[i].pkt_offset))
7606 return -ENOBUFS; 7776 return -ENOBUFS;
7607 nla_nest_end(msg, nl_pat); 7777 nla_nest_end(msg, nl_pat);
@@ -7942,7 +8112,7 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info)
7942 struct nlattr *pat; 8112 struct nlattr *pat;
7943 int n_patterns = 0; 8113 int n_patterns = 0;
7944 int rem, pat_len, mask_len, pkt_offset; 8114 int rem, pat_len, mask_len, pkt_offset;
7945 struct nlattr *pat_tb[NUM_NL80211_WOWLAN_PKTPAT]; 8115 struct nlattr *pat_tb[NUM_NL80211_PKTPAT];
7946 8116
7947 nla_for_each_nested(pat, tb[NL80211_WOWLAN_TRIG_PKT_PATTERN], 8117 nla_for_each_nested(pat, tb[NL80211_WOWLAN_TRIG_PKT_PATTERN],
7948 rem) 8118 rem)
@@ -7961,26 +8131,25 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info)
7961 8131
7962 nla_for_each_nested(pat, tb[NL80211_WOWLAN_TRIG_PKT_PATTERN], 8132 nla_for_each_nested(pat, tb[NL80211_WOWLAN_TRIG_PKT_PATTERN],
7963 rem) { 8133 rem) {
7964 nla_parse(pat_tb, MAX_NL80211_WOWLAN_PKTPAT, 8134 nla_parse(pat_tb, MAX_NL80211_PKTPAT, nla_data(pat),
7965 nla_data(pat), nla_len(pat), NULL); 8135 nla_len(pat), NULL);
7966 err = -EINVAL; 8136 err = -EINVAL;
7967 if (!pat_tb[NL80211_WOWLAN_PKTPAT_MASK] || 8137 if (!pat_tb[NL80211_PKTPAT_MASK] ||
7968 !pat_tb[NL80211_WOWLAN_PKTPAT_PATTERN]) 8138 !pat_tb[NL80211_PKTPAT_PATTERN])
7969 goto error; 8139 goto error;
7970 pat_len = nla_len(pat_tb[NL80211_WOWLAN_PKTPAT_PATTERN]); 8140 pat_len = nla_len(pat_tb[NL80211_PKTPAT_PATTERN]);
7971 mask_len = DIV_ROUND_UP(pat_len, 8); 8141 mask_len = DIV_ROUND_UP(pat_len, 8);
7972 if (nla_len(pat_tb[NL80211_WOWLAN_PKTPAT_MASK]) != 8142 if (nla_len(pat_tb[NL80211_PKTPAT_MASK]) != mask_len)
7973 mask_len)
7974 goto error; 8143 goto error;
7975 if (pat_len > wowlan->pattern_max_len || 8144 if (pat_len > wowlan->pattern_max_len ||
7976 pat_len < wowlan->pattern_min_len) 8145 pat_len < wowlan->pattern_min_len)
7977 goto error; 8146 goto error;
7978 8147
7979 if (!pat_tb[NL80211_WOWLAN_PKTPAT_OFFSET]) 8148 if (!pat_tb[NL80211_PKTPAT_OFFSET])
7980 pkt_offset = 0; 8149 pkt_offset = 0;
7981 else 8150 else
7982 pkt_offset = nla_get_u32( 8151 pkt_offset = nla_get_u32(
7983 pat_tb[NL80211_WOWLAN_PKTPAT_OFFSET]); 8152 pat_tb[NL80211_PKTPAT_OFFSET]);
7984 if (pkt_offset > wowlan->max_pkt_offset) 8153 if (pkt_offset > wowlan->max_pkt_offset)
7985 goto error; 8154 goto error;
7986 new_triggers.patterns[i].pkt_offset = pkt_offset; 8155 new_triggers.patterns[i].pkt_offset = pkt_offset;
@@ -7994,11 +8163,11 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info)
7994 new_triggers.patterns[i].pattern = 8163 new_triggers.patterns[i].pattern =
7995 new_triggers.patterns[i].mask + mask_len; 8164 new_triggers.patterns[i].mask + mask_len;
7996 memcpy(new_triggers.patterns[i].mask, 8165 memcpy(new_triggers.patterns[i].mask,
7997 nla_data(pat_tb[NL80211_WOWLAN_PKTPAT_MASK]), 8166 nla_data(pat_tb[NL80211_PKTPAT_MASK]),
7998 mask_len); 8167 mask_len);
7999 new_triggers.patterns[i].pattern_len = pat_len; 8168 new_triggers.patterns[i].pattern_len = pat_len;
8000 memcpy(new_triggers.patterns[i].pattern, 8169 memcpy(new_triggers.patterns[i].pattern,
8001 nla_data(pat_tb[NL80211_WOWLAN_PKTPAT_PATTERN]), 8170 nla_data(pat_tb[NL80211_PKTPAT_PATTERN]),
8002 pat_len); 8171 pat_len);
8003 i++; 8172 i++;
8004 } 8173 }
@@ -8037,6 +8206,264 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info)
8037} 8206}
8038#endif 8207#endif
8039 8208
8209static int nl80211_send_coalesce_rules(struct sk_buff *msg,
8210 struct cfg80211_registered_device *rdev)
8211{
8212 struct nlattr *nl_pats, *nl_pat, *nl_rule, *nl_rules;
8213 int i, j, pat_len;
8214 struct cfg80211_coalesce_rules *rule;
8215
8216 if (!rdev->coalesce->n_rules)
8217 return 0;
8218
8219 nl_rules = nla_nest_start(msg, NL80211_ATTR_COALESCE_RULE);
8220 if (!nl_rules)
8221 return -ENOBUFS;
8222
8223 for (i = 0; i < rdev->coalesce->n_rules; i++) {
8224 nl_rule = nla_nest_start(msg, i + 1);
8225 if (!nl_rule)
8226 return -ENOBUFS;
8227
8228 rule = &rdev->coalesce->rules[i];
8229 if (nla_put_u32(msg, NL80211_ATTR_COALESCE_RULE_DELAY,
8230 rule->delay))
8231 return -ENOBUFS;
8232
8233 if (nla_put_u32(msg, NL80211_ATTR_COALESCE_RULE_CONDITION,
8234 rule->condition))
8235 return -ENOBUFS;
8236
8237 nl_pats = nla_nest_start(msg,
8238 NL80211_ATTR_COALESCE_RULE_PKT_PATTERN);
8239 if (!nl_pats)
8240 return -ENOBUFS;
8241
8242 for (j = 0; j < rule->n_patterns; j++) {
8243 nl_pat = nla_nest_start(msg, j + 1);
8244 if (!nl_pat)
8245 return -ENOBUFS;
8246 pat_len = rule->patterns[j].pattern_len;
8247 if (nla_put(msg, NL80211_PKTPAT_MASK,
8248 DIV_ROUND_UP(pat_len, 8),
8249 rule->patterns[j].mask) ||
8250 nla_put(msg, NL80211_PKTPAT_PATTERN, pat_len,
8251 rule->patterns[j].pattern) ||
8252 nla_put_u32(msg, NL80211_PKTPAT_OFFSET,
8253 rule->patterns[j].pkt_offset))
8254 return -ENOBUFS;
8255 nla_nest_end(msg, nl_pat);
8256 }
8257 nla_nest_end(msg, nl_pats);
8258 nla_nest_end(msg, nl_rule);
8259 }
8260 nla_nest_end(msg, nl_rules);
8261
8262 return 0;
8263}
8264
8265static int nl80211_get_coalesce(struct sk_buff *skb, struct genl_info *info)
8266{
8267 struct cfg80211_registered_device *rdev = info->user_ptr[0];
8268 struct sk_buff *msg;
8269 void *hdr;
8270
8271 if (!rdev->wiphy.coalesce)
8272 return -EOPNOTSUPP;
8273
8274 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
8275 if (!msg)
8276 return -ENOMEM;
8277
8278 hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0,
8279 NL80211_CMD_GET_COALESCE);
8280 if (!hdr)
8281 goto nla_put_failure;
8282
8283 if (rdev->coalesce && nl80211_send_coalesce_rules(msg, rdev))
8284 goto nla_put_failure;
8285
8286 genlmsg_end(msg, hdr);
8287 return genlmsg_reply(msg, info);
8288
8289nla_put_failure:
8290 nlmsg_free(msg);
8291 return -ENOBUFS;
8292}
8293
8294void cfg80211_rdev_free_coalesce(struct cfg80211_registered_device *rdev)
8295{
8296 struct cfg80211_coalesce *coalesce = rdev->coalesce;
8297 int i, j;
8298 struct cfg80211_coalesce_rules *rule;
8299
8300 if (!coalesce)
8301 return;
8302
8303 for (i = 0; i < coalesce->n_rules; i++) {
8304 rule = &coalesce->rules[i];
8305 for (j = 0; j < rule->n_patterns; j++)
8306 kfree(rule->patterns[j].mask);
8307 kfree(rule->patterns);
8308 }
8309 kfree(coalesce->rules);
8310 kfree(coalesce);
8311 rdev->coalesce = NULL;
8312}
8313
8314static int nl80211_parse_coalesce_rule(struct cfg80211_registered_device *rdev,
8315 struct nlattr *rule,
8316 struct cfg80211_coalesce_rules *new_rule)
8317{
8318 int err, i;
8319 const struct wiphy_coalesce_support *coalesce = rdev->wiphy.coalesce;
8320 struct nlattr *tb[NUM_NL80211_ATTR_COALESCE_RULE], *pat;
8321 int rem, pat_len, mask_len, pkt_offset, n_patterns = 0;
8322 struct nlattr *pat_tb[NUM_NL80211_PKTPAT];
8323
8324 err = nla_parse(tb, NL80211_ATTR_COALESCE_RULE_MAX, nla_data(rule),
8325 nla_len(rule), nl80211_coalesce_policy);
8326 if (err)
8327 return err;
8328
8329 if (tb[NL80211_ATTR_COALESCE_RULE_DELAY])
8330 new_rule->delay =
8331 nla_get_u32(tb[NL80211_ATTR_COALESCE_RULE_DELAY]);
8332 if (new_rule->delay > coalesce->max_delay)
8333 return -EINVAL;
8334
8335 if (tb[NL80211_ATTR_COALESCE_RULE_CONDITION])
8336 new_rule->condition =
8337 nla_get_u32(tb[NL80211_ATTR_COALESCE_RULE_CONDITION]);
8338 if (new_rule->condition != NL80211_COALESCE_CONDITION_MATCH &&
8339 new_rule->condition != NL80211_COALESCE_CONDITION_NO_MATCH)
8340 return -EINVAL;
8341
8342 if (!tb[NL80211_ATTR_COALESCE_RULE_PKT_PATTERN])
8343 return -EINVAL;
8344
8345 nla_for_each_nested(pat, tb[NL80211_ATTR_COALESCE_RULE_PKT_PATTERN],
8346 rem)
8347 n_patterns++;
8348 if (n_patterns > coalesce->n_patterns)
8349 return -EINVAL;
8350
8351 new_rule->patterns = kcalloc(n_patterns, sizeof(new_rule->patterns[0]),
8352 GFP_KERNEL);
8353 if (!new_rule->patterns)
8354 return -ENOMEM;
8355
8356 new_rule->n_patterns = n_patterns;
8357 i = 0;
8358
8359 nla_for_each_nested(pat, tb[NL80211_ATTR_COALESCE_RULE_PKT_PATTERN],
8360 rem) {
8361 nla_parse(pat_tb, MAX_NL80211_PKTPAT, nla_data(pat),
8362 nla_len(pat), NULL);
8363 if (!pat_tb[NL80211_PKTPAT_MASK] ||
8364 !pat_tb[NL80211_PKTPAT_PATTERN])
8365 return -EINVAL;
8366 pat_len = nla_len(pat_tb[NL80211_PKTPAT_PATTERN]);
8367 mask_len = DIV_ROUND_UP(pat_len, 8);
8368 if (nla_len(pat_tb[NL80211_PKTPAT_MASK]) != mask_len)
8369 return -EINVAL;
8370 if (pat_len > coalesce->pattern_max_len ||
8371 pat_len < coalesce->pattern_min_len)
8372 return -EINVAL;
8373
8374 if (!pat_tb[NL80211_PKTPAT_OFFSET])
8375 pkt_offset = 0;
8376 else
8377 pkt_offset = nla_get_u32(pat_tb[NL80211_PKTPAT_OFFSET]);
8378 if (pkt_offset > coalesce->max_pkt_offset)
8379 return -EINVAL;
8380 new_rule->patterns[i].pkt_offset = pkt_offset;
8381
8382 new_rule->patterns[i].mask =
8383 kmalloc(mask_len + pat_len, GFP_KERNEL);
8384 if (!new_rule->patterns[i].mask)
8385 return -ENOMEM;
8386 new_rule->patterns[i].pattern =
8387 new_rule->patterns[i].mask + mask_len;
8388 memcpy(new_rule->patterns[i].mask,
8389 nla_data(pat_tb[NL80211_PKTPAT_MASK]), mask_len);
8390 new_rule->patterns[i].pattern_len = pat_len;
8391 memcpy(new_rule->patterns[i].pattern,
8392 nla_data(pat_tb[NL80211_PKTPAT_PATTERN]), pat_len);
8393 i++;
8394 }
8395
8396 return 0;
8397}
8398
8399static int nl80211_set_coalesce(struct sk_buff *skb, struct genl_info *info)
8400{
8401 struct cfg80211_registered_device *rdev = info->user_ptr[0];
8402 const struct wiphy_coalesce_support *coalesce = rdev->wiphy.coalesce;
8403 struct cfg80211_coalesce new_coalesce = {};
8404 struct cfg80211_coalesce *n_coalesce;
8405 int err, rem_rule, n_rules = 0, i, j;
8406 struct nlattr *rule;
8407 struct cfg80211_coalesce_rules *tmp_rule;
8408
8409 if (!rdev->wiphy.coalesce || !rdev->ops->set_coalesce)
8410 return -EOPNOTSUPP;
8411
8412 if (!info->attrs[NL80211_ATTR_COALESCE_RULE]) {
8413 cfg80211_rdev_free_coalesce(rdev);
8414 rdev->ops->set_coalesce(&rdev->wiphy, NULL);
8415 return 0;
8416 }
8417
8418 nla_for_each_nested(rule, info->attrs[NL80211_ATTR_COALESCE_RULE],
8419 rem_rule)
8420 n_rules++;
8421 if (n_rules > coalesce->n_rules)
8422 return -EINVAL;
8423
8424 new_coalesce.rules = kcalloc(n_rules, sizeof(new_coalesce.rules[0]),
8425 GFP_KERNEL);
8426 if (!new_coalesce.rules)
8427 return -ENOMEM;
8428
8429 new_coalesce.n_rules = n_rules;
8430 i = 0;
8431
8432 nla_for_each_nested(rule, info->attrs[NL80211_ATTR_COALESCE_RULE],
8433 rem_rule) {
8434 err = nl80211_parse_coalesce_rule(rdev, rule,
8435 &new_coalesce.rules[i]);
8436 if (err)
8437 goto error;
8438
8439 i++;
8440 }
8441
8442 err = rdev->ops->set_coalesce(&rdev->wiphy, &new_coalesce);
8443 if (err)
8444 goto error;
8445
8446 n_coalesce = kmemdup(&new_coalesce, sizeof(new_coalesce), GFP_KERNEL);
8447 if (!n_coalesce) {
8448 err = -ENOMEM;
8449 goto error;
8450 }
8451 cfg80211_rdev_free_coalesce(rdev);
8452 rdev->coalesce = n_coalesce;
8453
8454 return 0;
8455error:
8456 for (i = 0; i < new_coalesce.n_rules; i++) {
8457 tmp_rule = &new_coalesce.rules[i];
8458 for (j = 0; j < tmp_rule->n_patterns; j++)
8459 kfree(tmp_rule->patterns[j].mask);
8460 kfree(tmp_rule->patterns);
8461 }
8462 kfree(new_coalesce.rules);
8463
8464 return err;
8465}
8466
8040static int nl80211_set_rekey_data(struct sk_buff *skb, struct genl_info *info) 8467static int nl80211_set_rekey_data(struct sk_buff *skb, struct genl_info *info)
8041{ 8468{
8042 struct cfg80211_registered_device *rdev = info->user_ptr[0]; 8469 struct cfg80211_registered_device *rdev = info->user_ptr[0];
@@ -9043,7 +9470,30 @@ static struct genl_ops nl80211_ops[] = {
9043 .flags = GENL_ADMIN_PERM, 9470 .flags = GENL_ADMIN_PERM,
9044 .internal_flags = NL80211_FLAG_NEED_WDEV_UP | 9471 .internal_flags = NL80211_FLAG_NEED_WDEV_UP |
9045 NL80211_FLAG_NEED_RTNL, 9472 NL80211_FLAG_NEED_RTNL,
9046 } 9473 },
9474 {
9475 .cmd = NL80211_CMD_GET_COALESCE,
9476 .doit = nl80211_get_coalesce,
9477 .policy = nl80211_policy,
9478 .internal_flags = NL80211_FLAG_NEED_WIPHY |
9479 NL80211_FLAG_NEED_RTNL,
9480 },
9481 {
9482 .cmd = NL80211_CMD_SET_COALESCE,
9483 .doit = nl80211_set_coalesce,
9484 .policy = nl80211_policy,
9485 .flags = GENL_ADMIN_PERM,
9486 .internal_flags = NL80211_FLAG_NEED_WIPHY |
9487 NL80211_FLAG_NEED_RTNL,
9488 },
9489 {
9490 .cmd = NL80211_CMD_CHANNEL_SWITCH,
9491 .doit = nl80211_channel_switch,
9492 .policy = nl80211_policy,
9493 .flags = GENL_ADMIN_PERM,
9494 .internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
9495 NL80211_FLAG_NEED_RTNL,
9496 },
9047}; 9497};
9048 9498
9049static struct genl_multicast_group nl80211_mlme_mcgrp = { 9499static struct genl_multicast_group nl80211_mlme_mcgrp = {
@@ -10000,7 +10450,7 @@ EXPORT_SYMBOL(cfg80211_rx_unexpected_4addr_frame);
10000int nl80211_send_mgmt(struct cfg80211_registered_device *rdev, 10450int nl80211_send_mgmt(struct cfg80211_registered_device *rdev,
10001 struct wireless_dev *wdev, u32 nlportid, 10451 struct wireless_dev *wdev, u32 nlportid,
10002 int freq, int sig_dbm, 10452 int freq, int sig_dbm,
10003 const u8 *buf, size_t len, gfp_t gfp) 10453 const u8 *buf, size_t len, u32 flags, gfp_t gfp)
10004{ 10454{
10005 struct net_device *netdev = wdev->netdev; 10455 struct net_device *netdev = wdev->netdev;
10006 struct sk_buff *msg; 10456 struct sk_buff *msg;
@@ -10023,7 +10473,9 @@ int nl80211_send_mgmt(struct cfg80211_registered_device *rdev,
10023 nla_put_u32(msg, NL80211_ATTR_WIPHY_FREQ, freq) || 10473 nla_put_u32(msg, NL80211_ATTR_WIPHY_FREQ, freq) ||
10024 (sig_dbm && 10474 (sig_dbm &&
10025 nla_put_u32(msg, NL80211_ATTR_RX_SIGNAL_DBM, sig_dbm)) || 10475 nla_put_u32(msg, NL80211_ATTR_RX_SIGNAL_DBM, sig_dbm)) ||
10026 nla_put(msg, NL80211_ATTR_FRAME, len, buf)) 10476 nla_put(msg, NL80211_ATTR_FRAME, len, buf) ||
10477 (flags &&
10478 nla_put_u32(msg, NL80211_ATTR_RXMGMT_FLAGS, flags)))
10027 goto nla_put_failure; 10479 goto nla_put_failure;
10028 10480
10029 genlmsg_end(msg, hdr); 10481 genlmsg_end(msg, hdr);
diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h
index a4073e808c13..2c0f2b3c07cb 100644
--- a/net/wireless/nl80211.h
+++ b/net/wireless/nl80211.h
@@ -66,7 +66,7 @@ void nl80211_send_ibss_bssid(struct cfg80211_registered_device *rdev,
66int nl80211_send_mgmt(struct cfg80211_registered_device *rdev, 66int nl80211_send_mgmt(struct cfg80211_registered_device *rdev,
67 struct wireless_dev *wdev, u32 nlpid, 67 struct wireless_dev *wdev, u32 nlpid,
68 int freq, int sig_dbm, 68 int freq, int sig_dbm,
69 const u8 *buf, size_t len, gfp_t gfp); 69 const u8 *buf, size_t len, u32 flags, gfp_t gfp);
70 70
71void 71void
72nl80211_radar_notify(struct cfg80211_registered_device *rdev, 72nl80211_radar_notify(struct cfg80211_registered_device *rdev,
@@ -74,4 +74,6 @@ nl80211_radar_notify(struct cfg80211_registered_device *rdev,
74 enum nl80211_radar_event event, 74 enum nl80211_radar_event event,
75 struct net_device *netdev, gfp_t gfp); 75 struct net_device *netdev, gfp_t gfp);
76 76
77void cfg80211_rdev_free_coalesce(struct cfg80211_registered_device *rdev);
78
77#endif /* __NET_WIRELESS_NL80211_H */ 79#endif /* __NET_WIRELESS_NL80211_H */
diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h
index 9f15f0ac824d..37ce9fdfe934 100644
--- a/net/wireless/rdev-ops.h
+++ b/net/wireless/rdev-ops.h
@@ -516,11 +516,12 @@ static inline void rdev_rfkill_poll(struct cfg80211_registered_device *rdev)
516 516
517#ifdef CONFIG_NL80211_TESTMODE 517#ifdef CONFIG_NL80211_TESTMODE
518static inline int rdev_testmode_cmd(struct cfg80211_registered_device *rdev, 518static inline int rdev_testmode_cmd(struct cfg80211_registered_device *rdev,
519 struct wireless_dev *wdev,
519 void *data, int len) 520 void *data, int len)
520{ 521{
521 int ret; 522 int ret;
522 trace_rdev_testmode_cmd(&rdev->wiphy); 523 trace_rdev_testmode_cmd(&rdev->wiphy, wdev);
523 ret = rdev->ops->testmode_cmd(&rdev->wiphy, data, len); 524 ret = rdev->ops->testmode_cmd(&rdev->wiphy, wdev, data, len);
524 trace_rdev_return_int(&rdev->wiphy, ret); 525 trace_rdev_return_int(&rdev->wiphy, ret);
525 return ret; 526 return ret;
526} 527}
@@ -923,4 +924,16 @@ static inline void rdev_crit_proto_stop(struct cfg80211_registered_device *rdev,
923 trace_rdev_return_void(&rdev->wiphy); 924 trace_rdev_return_void(&rdev->wiphy);
924} 925}
925 926
927static inline int rdev_channel_switch(struct cfg80211_registered_device *rdev,
928 struct net_device *dev,
929 struct cfg80211_csa_settings *params)
930{
931 int ret;
932
933 trace_rdev_channel_switch(&rdev->wiphy, dev, params);
934 ret = rdev->ops->channel_switch(&rdev->wiphy, dev, params);
935 trace_rdev_return_int(&rdev->wiphy, ret);
936 return ret;
937}
938
926#endif /* __CFG80211_RDEV_OPS */ 939#endif /* __CFG80211_RDEV_OPS */
diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index ae8c186b50d6..eeb71480f1af 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -465,10 +465,6 @@ static int cmp_bss(struct cfg80211_bss *a,
465 } 465 }
466 } 466 }
467 467
468 /*
469 * we can't use compare_ether_addr here since we need a < > operator.
470 * The binary return value of compare_ether_addr isn't enough
471 */
472 r = memcmp(a->bssid, b->bssid, sizeof(a->bssid)); 468 r = memcmp(a->bssid, b->bssid, sizeof(a->bssid));
473 if (r) 469 if (r)
474 return r; 470 return r;
@@ -651,6 +647,8 @@ static bool cfg80211_combine_bsses(struct cfg80211_registered_device *dev,
651 continue; 647 continue;
652 if (bss->pub.channel != new->pub.channel) 648 if (bss->pub.channel != new->pub.channel)
653 continue; 649 continue;
650 if (bss->pub.scan_width != new->pub.scan_width)
651 continue;
654 if (rcu_access_pointer(bss->pub.beacon_ies)) 652 if (rcu_access_pointer(bss->pub.beacon_ies))
655 continue; 653 continue;
656 ies = rcu_access_pointer(bss->pub.ies); 654 ies = rcu_access_pointer(bss->pub.ies);
@@ -870,11 +868,12 @@ cfg80211_get_bss_channel(struct wiphy *wiphy, const u8 *ie, size_t ielen,
870 868
871/* Returned bss is reference counted and must be cleaned up appropriately. */ 869/* Returned bss is reference counted and must be cleaned up appropriately. */
872struct cfg80211_bss* 870struct cfg80211_bss*
873cfg80211_inform_bss(struct wiphy *wiphy, 871cfg80211_inform_bss_width(struct wiphy *wiphy,
874 struct ieee80211_channel *channel, 872 struct ieee80211_channel *channel,
875 const u8 *bssid, u64 tsf, u16 capability, 873 enum nl80211_bss_scan_width scan_width,
876 u16 beacon_interval, const u8 *ie, size_t ielen, 874 const u8 *bssid, u64 tsf, u16 capability,
877 s32 signal, gfp_t gfp) 875 u16 beacon_interval, const u8 *ie, size_t ielen,
876 s32 signal, gfp_t gfp)
878{ 877{
879 struct cfg80211_bss_ies *ies; 878 struct cfg80211_bss_ies *ies;
880 struct cfg80211_internal_bss tmp = {}, *res; 879 struct cfg80211_internal_bss tmp = {}, *res;
@@ -892,6 +891,7 @@ cfg80211_inform_bss(struct wiphy *wiphy,
892 891
893 memcpy(tmp.pub.bssid, bssid, ETH_ALEN); 892 memcpy(tmp.pub.bssid, bssid, ETH_ALEN);
894 tmp.pub.channel = channel; 893 tmp.pub.channel = channel;
894 tmp.pub.scan_width = scan_width;
895 tmp.pub.signal = signal; 895 tmp.pub.signal = signal;
896 tmp.pub.beacon_interval = beacon_interval; 896 tmp.pub.beacon_interval = beacon_interval;
897 tmp.pub.capability = capability; 897 tmp.pub.capability = capability;
@@ -924,14 +924,15 @@ cfg80211_inform_bss(struct wiphy *wiphy,
924 /* cfg80211_bss_update gives us a referenced result */ 924 /* cfg80211_bss_update gives us a referenced result */
925 return &res->pub; 925 return &res->pub;
926} 926}
927EXPORT_SYMBOL(cfg80211_inform_bss); 927EXPORT_SYMBOL(cfg80211_inform_bss_width);
928 928
929/* Returned bss is reference counted and must be cleaned up appropriately. */ 929/* Returned bss is reference counted and must be cleaned up appropriately. */
930struct cfg80211_bss * 930struct cfg80211_bss *
931cfg80211_inform_bss_frame(struct wiphy *wiphy, 931cfg80211_inform_bss_width_frame(struct wiphy *wiphy,
932 struct ieee80211_channel *channel, 932 struct ieee80211_channel *channel,
933 struct ieee80211_mgmt *mgmt, size_t len, 933 enum nl80211_bss_scan_width scan_width,
934 s32 signal, gfp_t gfp) 934 struct ieee80211_mgmt *mgmt, size_t len,
935 s32 signal, gfp_t gfp)
935{ 936{
936 struct cfg80211_internal_bss tmp = {}, *res; 937 struct cfg80211_internal_bss tmp = {}, *res;
937 struct cfg80211_bss_ies *ies; 938 struct cfg80211_bss_ies *ies;
@@ -941,7 +942,8 @@ cfg80211_inform_bss_frame(struct wiphy *wiphy,
941 BUILD_BUG_ON(offsetof(struct ieee80211_mgmt, u.probe_resp.variable) != 942 BUILD_BUG_ON(offsetof(struct ieee80211_mgmt, u.probe_resp.variable) !=
942 offsetof(struct ieee80211_mgmt, u.beacon.variable)); 943 offsetof(struct ieee80211_mgmt, u.beacon.variable));
943 944
944 trace_cfg80211_inform_bss_frame(wiphy, channel, mgmt, len, signal); 945 trace_cfg80211_inform_bss_width_frame(wiphy, channel, scan_width, mgmt,
946 len, signal);
945 947
946 if (WARN_ON(!mgmt)) 948 if (WARN_ON(!mgmt))
947 return NULL; 949 return NULL;
@@ -976,6 +978,7 @@ cfg80211_inform_bss_frame(struct wiphy *wiphy,
976 978
977 memcpy(tmp.pub.bssid, mgmt->bssid, ETH_ALEN); 979 memcpy(tmp.pub.bssid, mgmt->bssid, ETH_ALEN);
978 tmp.pub.channel = channel; 980 tmp.pub.channel = channel;
981 tmp.pub.scan_width = scan_width;
979 tmp.pub.signal = signal; 982 tmp.pub.signal = signal;
980 tmp.pub.beacon_interval = le16_to_cpu(mgmt->u.probe_resp.beacon_int); 983 tmp.pub.beacon_interval = le16_to_cpu(mgmt->u.probe_resp.beacon_int);
981 tmp.pub.capability = le16_to_cpu(mgmt->u.probe_resp.capab_info); 984 tmp.pub.capability = le16_to_cpu(mgmt->u.probe_resp.capab_info);
@@ -991,7 +994,7 @@ cfg80211_inform_bss_frame(struct wiphy *wiphy,
991 /* cfg80211_bss_update gives us a referenced result */ 994 /* cfg80211_bss_update gives us a referenced result */
992 return &res->pub; 995 return &res->pub;
993} 996}
994EXPORT_SYMBOL(cfg80211_inform_bss_frame); 997EXPORT_SYMBOL(cfg80211_inform_bss_width_frame);
995 998
996void cfg80211_ref_bss(struct wiphy *wiphy, struct cfg80211_bss *pub) 999void cfg80211_ref_bss(struct wiphy *wiphy, struct cfg80211_bss *pub)
997{ 1000{
diff --git a/net/wireless/sysfs.c b/net/wireless/sysfs.c
index a23253e06358..9ee6bc1a7610 100644
--- a/net/wireless/sysfs.c
+++ b/net/wireless/sysfs.c
@@ -30,7 +30,8 @@ static ssize_t name ## _show(struct device *dev, \
30 char *buf) \ 30 char *buf) \
31{ \ 31{ \
32 return sprintf(buf, fmt "\n", dev_to_rdev(dev)->member); \ 32 return sprintf(buf, fmt "\n", dev_to_rdev(dev)->member); \
33} 33} \
34static DEVICE_ATTR_RO(name)
34 35
35SHOW_FMT(index, "%d", wiphy_idx); 36SHOW_FMT(index, "%d", wiphy_idx);
36SHOW_FMT(macaddress, "%pM", wiphy.perm_addr); 37SHOW_FMT(macaddress, "%pM", wiphy.perm_addr);
@@ -42,7 +43,7 @@ static ssize_t name_show(struct device *dev,
42 struct wiphy *wiphy = &dev_to_rdev(dev)->wiphy; 43 struct wiphy *wiphy = &dev_to_rdev(dev)->wiphy;
43 return sprintf(buf, "%s\n", dev_name(&wiphy->dev)); 44 return sprintf(buf, "%s\n", dev_name(&wiphy->dev));
44} 45}
45 46static DEVICE_ATTR_RO(name);
46 47
47static ssize_t addresses_show(struct device *dev, 48static ssize_t addresses_show(struct device *dev,
48 struct device_attribute *attr, 49 struct device_attribute *attr,
@@ -60,15 +61,17 @@ static ssize_t addresses_show(struct device *dev,
60 61
61 return buf - start; 62 return buf - start;
62} 63}
63 64static DEVICE_ATTR_RO(addresses);
64static struct device_attribute ieee80211_dev_attrs[] = { 65
65 __ATTR_RO(index), 66static struct attribute *ieee80211_attrs[] = {
66 __ATTR_RO(macaddress), 67 &dev_attr_index.attr,
67 __ATTR_RO(address_mask), 68 &dev_attr_macaddress.attr,
68 __ATTR_RO(addresses), 69 &dev_attr_address_mask.attr,
69 __ATTR_RO(name), 70 &dev_attr_addresses.attr,
70 {} 71 &dev_attr_name.attr,
72 NULL,
71}; 73};
74ATTRIBUTE_GROUPS(ieee80211);
72 75
73static void wiphy_dev_release(struct device *dev) 76static void wiphy_dev_release(struct device *dev)
74{ 77{
@@ -146,7 +149,7 @@ struct class ieee80211_class = {
146 .name = "ieee80211", 149 .name = "ieee80211",
147 .owner = THIS_MODULE, 150 .owner = THIS_MODULE,
148 .dev_release = wiphy_dev_release, 151 .dev_release = wiphy_dev_release,
149 .dev_attrs = ieee80211_dev_attrs, 152 .dev_groups = ieee80211_groups,
150 .dev_uevent = wiphy_uevent, 153 .dev_uevent = wiphy_uevent,
151#ifdef CONFIG_PM 154#ifdef CONFIG_PM
152 .suspend = wiphy_suspend, 155 .suspend = wiphy_suspend,
diff --git a/net/wireless/trace.h b/net/wireless/trace.h
index e1534baf2ebb..ba5f0d6614d5 100644
--- a/net/wireless/trace.h
+++ b/net/wireless/trace.h
@@ -1293,15 +1293,17 @@ TRACE_EVENT(rdev_return_int_int,
1293 1293
1294#ifdef CONFIG_NL80211_TESTMODE 1294#ifdef CONFIG_NL80211_TESTMODE
1295TRACE_EVENT(rdev_testmode_cmd, 1295TRACE_EVENT(rdev_testmode_cmd,
1296 TP_PROTO(struct wiphy *wiphy), 1296 TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev),
1297 TP_ARGS(wiphy), 1297 TP_ARGS(wiphy, wdev),
1298 TP_STRUCT__entry( 1298 TP_STRUCT__entry(
1299 WIPHY_ENTRY 1299 WIPHY_ENTRY
1300 WDEV_ENTRY
1300 ), 1301 ),
1301 TP_fast_assign( 1302 TP_fast_assign(
1302 WIPHY_ASSIGN; 1303 WIPHY_ASSIGN;
1304 WDEV_ASSIGN;
1303 ), 1305 ),
1304 TP_printk(WIPHY_PR_FMT, WIPHY_PR_ARG) 1306 TP_printk(WIPHY_PR_FMT WDEV_PR_FMT, WIPHY_PR_ARG, WDEV_PR_ARG)
1305); 1307);
1306 1308
1307TRACE_EVENT(rdev_testmode_dump, 1309TRACE_EVENT(rdev_testmode_dump,
@@ -1841,6 +1843,39 @@ TRACE_EVENT(rdev_crit_proto_stop,
1841 WIPHY_PR_ARG, WDEV_PR_ARG) 1843 WIPHY_PR_ARG, WDEV_PR_ARG)
1842); 1844);
1843 1845
1846TRACE_EVENT(rdev_channel_switch,
1847 TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
1848 struct cfg80211_csa_settings *params),
1849 TP_ARGS(wiphy, netdev, params),
1850 TP_STRUCT__entry(
1851 WIPHY_ENTRY
1852 NETDEV_ENTRY
1853 CHAN_DEF_ENTRY
1854 __field(u16, counter_offset_beacon)
1855 __field(u16, counter_offset_presp)
1856 __field(bool, radar_required)
1857 __field(bool, block_tx)
1858 __field(u8, count)
1859 ),
1860 TP_fast_assign(
1861 WIPHY_ASSIGN;
1862 NETDEV_ASSIGN;
1863 CHAN_DEF_ASSIGN(&params->chandef);
1864 __entry->counter_offset_beacon = params->counter_offset_beacon;
1865 __entry->counter_offset_presp = params->counter_offset_presp;
1866 __entry->radar_required = params->radar_required;
1867 __entry->block_tx = params->block_tx;
1868 __entry->count = params->count;
1869 ),
1870 TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", " CHAN_DEF_PR_FMT
1871 ", block_tx: %d, count: %u, radar_required: %d"
1872 ", counter offsets (beacon/presp): %u/%u",
1873 WIPHY_PR_ARG, NETDEV_PR_ARG, CHAN_DEF_PR_ARG,
1874 __entry->block_tx, __entry->count, __entry->radar_required,
1875 __entry->counter_offset_beacon,
1876 __entry->counter_offset_presp)
1877);
1878
1844/************************************************************* 1879/*************************************************************
1845 * cfg80211 exported functions traces * 1880 * cfg80211 exported functions traces *
1846 *************************************************************/ 1881 *************************************************************/
@@ -2391,26 +2426,30 @@ TRACE_EVENT(cfg80211_get_bss,
2391 __entry->capa_mask, __entry->capa_val) 2426 __entry->capa_mask, __entry->capa_val)
2392); 2427);
2393 2428
2394TRACE_EVENT(cfg80211_inform_bss_frame, 2429TRACE_EVENT(cfg80211_inform_bss_width_frame,
2395 TP_PROTO(struct wiphy *wiphy, struct ieee80211_channel *channel, 2430 TP_PROTO(struct wiphy *wiphy, struct ieee80211_channel *channel,
2431 enum nl80211_bss_scan_width scan_width,
2396 struct ieee80211_mgmt *mgmt, size_t len, 2432 struct ieee80211_mgmt *mgmt, size_t len,
2397 s32 signal), 2433 s32 signal),
2398 TP_ARGS(wiphy, channel, mgmt, len, signal), 2434 TP_ARGS(wiphy, channel, scan_width, mgmt, len, signal),
2399 TP_STRUCT__entry( 2435 TP_STRUCT__entry(
2400 WIPHY_ENTRY 2436 WIPHY_ENTRY
2401 CHAN_ENTRY 2437 CHAN_ENTRY
2438 __field(enum nl80211_bss_scan_width, scan_width)
2402 __dynamic_array(u8, mgmt, len) 2439 __dynamic_array(u8, mgmt, len)
2403 __field(s32, signal) 2440 __field(s32, signal)
2404 ), 2441 ),
2405 TP_fast_assign( 2442 TP_fast_assign(
2406 WIPHY_ASSIGN; 2443 WIPHY_ASSIGN;
2407 CHAN_ASSIGN(channel); 2444 CHAN_ASSIGN(channel);
2445 __entry->scan_width = scan_width;
2408 if (mgmt) 2446 if (mgmt)
2409 memcpy(__get_dynamic_array(mgmt), mgmt, len); 2447 memcpy(__get_dynamic_array(mgmt), mgmt, len);
2410 __entry->signal = signal; 2448 __entry->signal = signal;
2411 ), 2449 ),
2412 TP_printk(WIPHY_PR_FMT ", " CHAN_PR_FMT "signal: %d", 2450 TP_printk(WIPHY_PR_FMT ", " CHAN_PR_FMT "(scan_width: %d) signal: %d",
2413 WIPHY_PR_ARG, CHAN_PR_ARG, __entry->signal) 2451 WIPHY_PR_ARG, CHAN_PR_ARG, __entry->scan_width,
2452 __entry->signal)
2414); 2453);
2415 2454
2416DECLARE_EVENT_CLASS(cfg80211_bss_evt, 2455DECLARE_EVENT_CLASS(cfg80211_bss_evt,
diff --git a/net/wireless/util.c b/net/wireless/util.c
index 74458b7f61eb..ce090c1c5e4f 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -33,7 +33,8 @@ ieee80211_get_response_rate(struct ieee80211_supported_band *sband,
33} 33}
34EXPORT_SYMBOL(ieee80211_get_response_rate); 34EXPORT_SYMBOL(ieee80211_get_response_rate);
35 35
36u32 ieee80211_mandatory_rates(struct ieee80211_supported_band *sband) 36u32 ieee80211_mandatory_rates(struct ieee80211_supported_band *sband,
37 enum nl80211_bss_scan_width scan_width)
37{ 38{
38 struct ieee80211_rate *bitrates; 39 struct ieee80211_rate *bitrates;
39 u32 mandatory_rates = 0; 40 u32 mandatory_rates = 0;
@@ -43,10 +44,15 @@ u32 ieee80211_mandatory_rates(struct ieee80211_supported_band *sband)
43 if (WARN_ON(!sband)) 44 if (WARN_ON(!sband))
44 return 1; 45 return 1;
45 46
46 if (sband->band == IEEE80211_BAND_2GHZ) 47 if (sband->band == IEEE80211_BAND_2GHZ) {
47 mandatory_flag = IEEE80211_RATE_MANDATORY_B; 48 if (scan_width == NL80211_BSS_CHAN_WIDTH_5 ||
48 else 49 scan_width == NL80211_BSS_CHAN_WIDTH_10)
50 mandatory_flag = IEEE80211_RATE_MANDATORY_G;
51 else
52 mandatory_flag = IEEE80211_RATE_MANDATORY_B;
53 } else {
49 mandatory_flag = IEEE80211_RATE_MANDATORY_A; 54 mandatory_flag = IEEE80211_RATE_MANDATORY_A;
55 }
50 56
51 bitrates = sband->bitrates; 57 bitrates = sband->bitrates;
52 for (i = 0; i < sband->n_bitrates; i++) 58 for (i = 0; i < sband->n_bitrates; i++)
diff --git a/net/x25/x25_facilities.c b/net/x25/x25_facilities.c
index 66c638730c7a..b8253250d723 100644
--- a/net/x25/x25_facilities.c
+++ b/net/x25/x25_facilities.c
@@ -156,6 +156,8 @@ int x25_parse_facilities(struct sk_buff *skb, struct x25_facilities *facilities,
156 case X25_FAC_CALLING_AE: 156 case X25_FAC_CALLING_AE:
157 if (p[1] > X25_MAX_DTE_FACIL_LEN || p[1] <= 1) 157 if (p[1] > X25_MAX_DTE_FACIL_LEN || p[1] <= 1)
158 return -1; 158 return -1;
159 if (p[2] > X25_MAX_AE_LEN)
160 return -1;
159 dte_facs->calling_len = p[2]; 161 dte_facs->calling_len = p[2];
160 memcpy(dte_facs->calling_ae, &p[3], p[1] - 1); 162 memcpy(dte_facs->calling_ae, &p[3], p[1] - 1);
161 *vc_fac_mask |= X25_MASK_CALLING_AE; 163 *vc_fac_mask |= X25_MASK_CALLING_AE;
@@ -163,6 +165,8 @@ int x25_parse_facilities(struct sk_buff *skb, struct x25_facilities *facilities,
163 case X25_FAC_CALLED_AE: 165 case X25_FAC_CALLED_AE:
164 if (p[1] > X25_MAX_DTE_FACIL_LEN || p[1] <= 1) 166 if (p[1] > X25_MAX_DTE_FACIL_LEN || p[1] <= 1)
165 return -1; 167 return -1;
168 if (p[2] > X25_MAX_AE_LEN)
169 return -1;
166 dte_facs->called_len = p[2]; 170 dte_facs->called_len = p[2];
167 memcpy(dte_facs->called_ae, &p[3], p[1] - 1); 171 memcpy(dte_facs->called_ae, &p[3], p[1] - 1);
168 *vc_fac_mask |= X25_MASK_CALLED_AE; 172 *vc_fac_mask |= X25_MASK_CALLED_AE;
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index f77c371ea72b..ed38d5d81f9e 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -308,7 +308,7 @@ void xfrm_policy_destroy(struct xfrm_policy *policy)
308{ 308{
309 BUG_ON(!policy->walk.dead); 309 BUG_ON(!policy->walk.dead);
310 310
311 if (del_timer(&policy->timer)) 311 if (del_timer(&policy->timer) || del_timer(&policy->polq.hold_timer))
312 BUG(); 312 BUG();
313 313
314 security_xfrm_policy_free(policy->security); 314 security_xfrm_policy_free(policy->security);
@@ -658,7 +658,13 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
658 xfrm_pol_hold(policy); 658 xfrm_pol_hold(policy);
659 net->xfrm.policy_count[dir]++; 659 net->xfrm.policy_count[dir]++;
660 atomic_inc(&flow_cache_genid); 660 atomic_inc(&flow_cache_genid);
661 rt_genid_bump(net); 661
662 /* After previous checking, family can either be AF_INET or AF_INET6 */
663 if (policy->family == AF_INET)
664 rt_genid_bump_ipv4(net);
665 else
666 rt_genid_bump_ipv6(net);
667
662 if (delpol) { 668 if (delpol) {
663 xfrm_policy_requeue(delpol, policy); 669 xfrm_policy_requeue(delpol, policy);
664 __xfrm_policy_unlink(delpol, dir); 670 __xfrm_policy_unlink(delpol, dir);
@@ -2119,8 +2125,6 @@ restart:
2119 * have the xfrm_state's. We need to wait for KM to 2125 * have the xfrm_state's. We need to wait for KM to
2120 * negotiate new SA's or bail out with error.*/ 2126 * negotiate new SA's or bail out with error.*/
2121 if (net->xfrm.sysctl_larval_drop) { 2127 if (net->xfrm.sysctl_larval_drop) {
2122 /* EREMOTE tells the caller to generate
2123 * a one-shot blackhole route. */
2124 dst_release(dst); 2128 dst_release(dst);
2125 xfrm_pols_put(pols, drop_pols); 2129 xfrm_pols_put(pols, drop_pols);
2126 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES); 2130 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES);
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 54c0acd29468..b9c3f9e943a9 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -496,7 +496,8 @@ struct xfrm_state *xfrm_state_alloc(struct net *net)
496 INIT_HLIST_NODE(&x->bydst); 496 INIT_HLIST_NODE(&x->bydst);
497 INIT_HLIST_NODE(&x->bysrc); 497 INIT_HLIST_NODE(&x->bysrc);
498 INIT_HLIST_NODE(&x->byspi); 498 INIT_HLIST_NODE(&x->byspi);
499 tasklet_hrtimer_init(&x->mtimer, xfrm_timer_handler, CLOCK_REALTIME, HRTIMER_MODE_ABS); 499 tasklet_hrtimer_init(&x->mtimer, xfrm_timer_handler,
500 CLOCK_BOOTTIME, HRTIMER_MODE_ABS);
500 setup_timer(&x->rtimer, xfrm_replay_timer_handler, 501 setup_timer(&x->rtimer, xfrm_replay_timer_handler,
501 (unsigned long)x); 502 (unsigned long)x);
502 x->curlft.add_time = get_seconds(); 503 x->curlft.add_time = get_seconds();
@@ -987,11 +988,13 @@ void xfrm_state_insert(struct xfrm_state *x)
987EXPORT_SYMBOL(xfrm_state_insert); 988EXPORT_SYMBOL(xfrm_state_insert);
988 989
989/* xfrm_state_lock is held */ 990/* xfrm_state_lock is held */
990static struct xfrm_state *__find_acq_core(struct net *net, struct xfrm_mark *m, 991static struct xfrm_state *__find_acq_core(struct net *net,
992 const struct xfrm_mark *m,
991 unsigned short family, u8 mode, 993 unsigned short family, u8 mode,
992 u32 reqid, u8 proto, 994 u32 reqid, u8 proto,
993 const xfrm_address_t *daddr, 995 const xfrm_address_t *daddr,
994 const xfrm_address_t *saddr, int create) 996 const xfrm_address_t *saddr,
997 int create)
995{ 998{
996 unsigned int h = xfrm_dst_hash(net, daddr, saddr, reqid, family); 999 unsigned int h = xfrm_dst_hash(net, daddr, saddr, reqid, family);
997 struct xfrm_state *x; 1000 struct xfrm_state *x;
@@ -1396,9 +1399,9 @@ xfrm_state_lookup_byaddr(struct net *net, u32 mark,
1396EXPORT_SYMBOL(xfrm_state_lookup_byaddr); 1399EXPORT_SYMBOL(xfrm_state_lookup_byaddr);
1397 1400
1398struct xfrm_state * 1401struct xfrm_state *
1399xfrm_find_acq(struct net *net, struct xfrm_mark *mark, u8 mode, u32 reqid, u8 proto, 1402xfrm_find_acq(struct net *net, const struct xfrm_mark *mark, u8 mode, u32 reqid,
1400 const xfrm_address_t *daddr, const xfrm_address_t *saddr, 1403 u8 proto, const xfrm_address_t *daddr,
1401 int create, unsigned short family) 1404 const xfrm_address_t *saddr, int create, unsigned short family)
1402{ 1405{
1403 struct xfrm_state *x; 1406 struct xfrm_state *x;
1404 1407