From c49dc9008b1c641a86837297df7c90cef070571b Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 24 Jan 2013 09:40:00 +0300 Subject: cfg80211: off by one in ieee80211_bss() We do a: sprintf(buf, " Last beacon: %ums ago", elapsed_jiffies_msecs(bss->ts)); elapsed_jiffies_msecs() can return a 10 digit number so "buf" needs to be 31 characters long. Signed-off-by: Dan Carpenter Signed-off-by: Johannes Berg --- net/wireless/scan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/wireless/scan.c b/net/wireless/scan.c index 01592d7d4789..45f1618c8e23 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -1358,7 +1358,7 @@ ieee80211_bss(struct wiphy *wiphy, struct iw_request_info *info, &iwe, IW_EV_UINT_LEN); } - buf = kmalloc(30, GFP_ATOMIC); + buf = kmalloc(31, GFP_ATOMIC); if (buf) { memset(&iwe, 0, sizeof(iwe)); iwe.cmd = IWEVCUSTOM; -- cgit v1.2.2 From 80d84ef3ff1ddc7a829c58980a9dd566a8af5203 Mon Sep 17 00:00:00 2001 From: Tom Parkin Date: Tue, 22 Jan 2013 05:13:48 +0000 Subject: l2tp: prevent l2tp_tunnel_delete racing with userspace close If a tunnel socket is created by userspace, l2tp hooks the socket destructor in order to clean up resources if userspace closes the socket or crashes. It also caches a pointer to the struct sock for use in the data path and in the netlink interface. While it is safe to use the cached sock pointer in the data path, where the skb references keep the socket alive, it is not safe to use it elsewhere as such access introduces a race with userspace closing the socket. In particular, l2tp_tunnel_delete is prone to oopsing if a multithreaded userspace application closes a socket at the same time as sending a netlink delete command for the tunnel. This patch fixes this oops by forcing l2tp_tunnel_delete to explicitly look up a tunnel socket held by userspace using sockfd_lookup(). Signed-off-by: Tom Parkin Signed-off-by: James Chapman Signed-off-by: David S. Miller --- net/l2tp/l2tp_core.c | 76 ++++++++++++++++++++++++++++++++++++++++++++-------- net/l2tp/l2tp_core.h | 5 +++- 2 files changed, 69 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 1a9f3723c13c..06389d5ff120 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -168,6 +168,51 @@ l2tp_session_id_hash_2(struct l2tp_net *pn, u32 session_id) } +/* Lookup the tunnel socket, possibly involving the fs code if the socket is + * owned by userspace. A struct sock returned from this function must be + * released using l2tp_tunnel_sock_put once you're done with it. + */ +struct sock *l2tp_tunnel_sock_lookup(struct l2tp_tunnel *tunnel) +{ + int err = 0; + struct socket *sock = NULL; + struct sock *sk = NULL; + + if (!tunnel) + goto out; + + if (tunnel->fd >= 0) { + /* Socket is owned by userspace, who might be in the process + * of closing it. Look the socket up using the fd to ensure + * consistency. + */ + sock = sockfd_lookup(tunnel->fd, &err); + if (sock) + sk = sock->sk; + } else { + /* Socket is owned by kernelspace */ + sk = tunnel->sock; + } + +out: + return sk; +} +EXPORT_SYMBOL_GPL(l2tp_tunnel_sock_lookup); + +/* Drop a reference to a tunnel socket obtained via. l2tp_tunnel_sock_put */ +void l2tp_tunnel_sock_put(struct sock *sk) +{ + struct l2tp_tunnel *tunnel = l2tp_sock_to_tunnel(sk); + if (tunnel) { + if (tunnel->fd >= 0) { + /* Socket is owned by userspace */ + sockfd_put(sk->sk_socket); + } + sock_put(sk); + } +} +EXPORT_SYMBOL_GPL(l2tp_tunnel_sock_put); + /* Lookup a session by id in the global session list */ static struct l2tp_session *l2tp_session_find_2(struct net *net, u32 session_id) @@ -1607,6 +1652,7 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32 tunnel->old_sk_destruct = sk->sk_destruct; sk->sk_destruct = &l2tp_tunnel_destruct; tunnel->sock = sk; + tunnel->fd = fd; lockdep_set_class_and_name(&sk->sk_lock.slock, &l2tp_socket_class, "l2tp_sock"); sk->sk_allocation = GFP_ATOMIC; @@ -1642,24 +1688,32 @@ EXPORT_SYMBOL_GPL(l2tp_tunnel_create); */ int l2tp_tunnel_delete(struct l2tp_tunnel *tunnel) { - int err = 0; - struct socket *sock = tunnel->sock ? tunnel->sock->sk_socket : NULL; + int err = -EBADF; + struct socket *sock = NULL; + struct sock *sk = NULL; + + sk = l2tp_tunnel_sock_lookup(tunnel); + if (!sk) + goto out; + + sock = sk->sk_socket; + BUG_ON(!sock); /* Force the tunnel socket to close. This will eventually * cause the tunnel to be deleted via the normal socket close * mechanisms when userspace closes the tunnel socket. */ - if (sock != NULL) { - err = inet_shutdown(sock, 2); + err = inet_shutdown(sock, 2); - /* If the tunnel's socket was created by the kernel, - * close the socket here since the socket was not - * created by userspace. - */ - if (sock->file == NULL) - err = inet_release(sock); - } + /* If the tunnel's socket was created by the kernel, + * close the socket here since the socket was not + * created by userspace. + */ + if (sock->file == NULL) + err = inet_release(sock); + l2tp_tunnel_sock_put(sk); +out: return err; } EXPORT_SYMBOL_GPL(l2tp_tunnel_delete); diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h index 56d583e083a7..e62204cad4fe 100644 --- a/net/l2tp/l2tp_core.h +++ b/net/l2tp/l2tp_core.h @@ -188,7 +188,8 @@ struct l2tp_tunnel { int (*recv_payload_hook)(struct sk_buff *skb); void (*old_sk_destruct)(struct sock *); struct sock *sock; /* Parent socket */ - int fd; + int fd; /* Parent fd, if tunnel socket + * was created by userspace */ uint8_t priv[0]; /* private data */ }; @@ -228,6 +229,8 @@ out: return tunnel; } +extern struct sock *l2tp_tunnel_sock_lookup(struct l2tp_tunnel *tunnel); +extern void l2tp_tunnel_sock_put(struct sock *sk); extern struct l2tp_session *l2tp_session_find(struct net *net, struct l2tp_tunnel *tunnel, u32 session_id); extern struct l2tp_session *l2tp_session_find_nth(struct l2tp_tunnel *tunnel, int nth); extern struct l2tp_session *l2tp_session_find_by_ifname(struct net *net, char *ifname); -- cgit v1.2.2 From a13d3104710184ecc43edc35a25ae8092058463f Mon Sep 17 00:00:00 2001 From: Johannes Naab Date: Wed, 23 Jan 2013 11:36:51 +0000 Subject: netem: fix delay calculation in rate extension The delay calculation with the rate extension introduces in v3.3 does not properly work, if other packets are still queued for transmission. For the delay calculation to work, both delay types (latency and delay introduces by rate limitation) have to be handled differently. The latency delay for a packet can overlap with the delay of other packets. The delay introduced by the rate however is separate, and can only start, once all other rate-introduced delays finished. Latency delay is from same distribution for each packet, rate delay depends on the packet size. .: latency delay -: rate delay x: additional delay we have to wait since another packet is currently transmitted .....---- Packet 1 .....xx------ Packet 2 .....------ Packet 3 ^^^^^ latency stacks ^^ rate delay doesn't stack ^^ latency stacks -----> time When a packet is enqueued, we first consider the latency delay. If other packets are already queued, we can reduce the latency delay until the last packet in the queue is send, however the latency delay cannot be <0, since this would mean that the rate is overcommitted. The new reference point is the time at which the last packet will be send. To find the time, when the packet should be send, the rate introduces delay has to be added on top of that. Signed-off-by: Johannes Naab Acked-by: Hagen Paul Pfeifer Signed-off-by: David S. Miller --- net/sched/sch_netem.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 298c0ddfb57e..3d2acc7a9c80 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -438,18 +438,18 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) if (q->rate) { struct sk_buff_head *list = &sch->q; - delay += packet_len_2_sched_time(skb->len, q); - if (!skb_queue_empty(list)) { /* - * Last packet in queue is reference point (now). - * First packet in queue is already in flight, - * calculate this time bonus and substract + * Last packet in queue is reference point (now), + * calculate this time bonus and subtract * from delay. */ - delay -= now - netem_skb_cb(skb_peek(list))->time_to_send; + delay -= netem_skb_cb(skb_peek_tail(list))->time_to_send - now; + delay = max_t(psched_tdiff_t, 0, delay); now = netem_skb_cb(skb_peek_tail(list))->time_to_send; } + + delay += packet_len_2_sched_time(skb->len, q); } cb->time_to_send = now + delay; -- cgit v1.2.2 From 604dfd6efc9b79bce432f2394791708d8e8f6efc Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Sun, 27 Jan 2013 21:14:08 +0000 Subject: pktgen: correctly handle failures when adding a device The return value of pktgen_add_device() is not checked, so even if we fail to add some device, for example, non-exist one, we still see "OK:...". This patch fixes it. After this patch, I got: # echo "add_device non-exist" > /proc/net/pktgen/kpktgend_0 -bash: echo: write error: No such device # cat /proc/net/pktgen/kpktgend_0 Running: Stopped: Result: ERROR: can not add device non-exist # echo "add_device eth0" > /proc/net/pktgen/kpktgend_0 # cat /proc/net/pktgen/kpktgend_0 Running: Stopped: eth0 Result: OK: add_device=eth0 (Candidate for -stable) Cc: David S. Miller Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/core/pktgen.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/core/pktgen.c b/net/core/pktgen.c index b29dacf900f9..e6e1cbe863f5 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -1781,10 +1781,13 @@ static ssize_t pktgen_thread_write(struct file *file, return -EFAULT; i += len; mutex_lock(&pktgen_thread_lock); - pktgen_add_device(t, f); + ret = pktgen_add_device(t, f); mutex_unlock(&pktgen_thread_lock); - ret = count; - sprintf(pg_result, "OK: add_device=%s", f); + if (!ret) { + ret = count; + sprintf(pg_result, "OK: add_device=%s", f); + } else + sprintf(pg_result, "ERROR: can not add device %s", f); goto out; } -- cgit v1.2.2 From 5e98a36ed4bf6ea396170e3af0dd4fcbe51d772f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?YOSHIFUJI=20Hideaki=20/=20=E5=90=89=E8=97=A4=E8=8B=B1?= =?UTF-8?q?=E6=98=8E?= Date: Mon, 28 Jan 2013 10:44:29 +0000 Subject: ipv6 addrconf: Fix interface identifiers of 802.15.4 devices. The "Universal/Local" (U/L) bit must be complmented according to RFC4944 and RFC2464. Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 420e56326384..1b5d8cb9b123 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1660,6 +1660,7 @@ static int addrconf_ifid_eui64(u8 *eui, struct net_device *dev) if (dev->addr_len != IEEE802154_ADDR_LEN) return -1; memcpy(eui, dev->dev_addr, 8); + eui[0] ^= 2; return 0; } -- cgit v1.2.2 From 2aeef18d37aa8c0bfca169d4ede1790d972bf649 Mon Sep 17 00:00:00 2001 From: Nivedita Singhvi Date: Mon, 28 Jan 2013 17:52:37 +0000 Subject: tcp: Increment LISTENOVERFLOW and LISTENDROPS in tcp_v4_conn_request() We drop a connection request if the accept backlog is full and there are sufficient packets in the syn queue to warrant starting drops. Increment the appropriate counters so this isn't silent, for accurate stats and help in debugging. This patch assumes LINUX_MIB_LISTENDROPS is a superset of/includes the counter LINUX_MIB_LISTENOVERFLOWS. Signed-off-by: Nivedita Singhvi Acked-by: Vijay Subramanian Signed-off-by: David S. Miller --- net/ipv4/tcp_ipv4.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 70b09ef2463b..629937d514eb 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1500,8 +1500,11 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) * clogging syn queue with openreqs with exponentially increasing * timeout. */ - if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) + if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) { + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); goto drop; + } req = inet_reqsk_alloc(&tcp_request_sock_ops); if (!req) -- cgit v1.2.2 From bd30e947207e2ea0ff2c08f5b4a03025ddce48d3 Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Tue, 29 Jan 2013 22:26:08 +0000 Subject: ipv6: do not create neighbor entries for local delivery They will be created at output, if ever needed. This avoids creating empty neighbor entries when TPROXYing/Forwarding packets for addresses that are not even directly reachable. Note that IPv4 already handles it this way. No neighbor entries are created for local input. Tested by myself and customer. Signed-off-by: Jiri Pirko Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- net/ipv6/route.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv6/route.c b/net/ipv6/route.c index e229a3bc345d..363d8b7772e8 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -928,7 +928,7 @@ restart: dst_hold(&rt->dst); read_unlock_bh(&table->tb6_lock); - if (!rt->n && !(rt->rt6i_flags & RTF_NONEXTHOP)) + if (!rt->n && !(rt->rt6i_flags & (RTF_NONEXTHOP | RTF_LOCAL))) nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr); else if (!(rt->dst.flags & DST_HOST)) nrt = rt6_alloc_clone(rt, &fl6->daddr); -- cgit v1.2.2 From 73df66f8b1926c59cbc83000af6bf37ecc5509dd Mon Sep 17 00:00:00 2001 From: Tom Parkin Date: Thu, 31 Jan 2013 01:02:24 +0000 Subject: ipv6: rename datagram_send_ctl and datagram_recv_ctl The datagram_*_ctl functions in net/ipv6/datagram.c are IPv6-specific. Since datagram_send_ctl is publicly exported it should be appropriately named to reflect the fact that it's for IPv6 only. Signed-off-by: Tom Parkin Signed-off-by: James Chapman Signed-off-by: David S. Miller --- net/ipv6/datagram.c | 15 ++++++++------- net/ipv6/ip6_flowlabel.c | 4 ++-- net/ipv6/ipv6_sockglue.c | 6 +++--- net/ipv6/raw.c | 6 +++--- net/ipv6/udp.c | 6 +++--- net/l2tp/l2tp_ip6.c | 4 ++-- net/sunrpc/svcsock.c | 2 +- 7 files changed, 22 insertions(+), 21 deletions(-) (limited to 'net') diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 8edf2601065a..06fd2730838b 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -380,7 +380,7 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len) if (skb->protocol == htons(ETH_P_IPV6)) { sin->sin6_addr = ipv6_hdr(skb)->saddr; if (np->rxopt.all) - datagram_recv_ctl(sk, msg, skb); + ip6_datagram_recv_ctl(sk, msg, skb); if (ipv6_addr_type(&sin->sin6_addr) & IPV6_ADDR_LINKLOCAL) sin->sin6_scope_id = IP6CB(skb)->iif; } else { @@ -468,7 +468,8 @@ out: } -int datagram_recv_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb) +int ip6_datagram_recv_ctl(struct sock *sk, struct msghdr *msg, + struct sk_buff *skb) { struct ipv6_pinfo *np = inet6_sk(sk); struct inet6_skb_parm *opt = IP6CB(skb); @@ -598,10 +599,10 @@ int datagram_recv_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb) return 0; } -int datagram_send_ctl(struct net *net, struct sock *sk, - struct msghdr *msg, struct flowi6 *fl6, - struct ipv6_txoptions *opt, - int *hlimit, int *tclass, int *dontfrag) +int ip6_datagram_send_ctl(struct net *net, struct sock *sk, + struct msghdr *msg, struct flowi6 *fl6, + struct ipv6_txoptions *opt, + int *hlimit, int *tclass, int *dontfrag) { struct in6_pktinfo *src_info; struct cmsghdr *cmsg; @@ -871,4 +872,4 @@ int datagram_send_ctl(struct net *net, struct sock *sk, exit_f: return err; } -EXPORT_SYMBOL_GPL(datagram_send_ctl); +EXPORT_SYMBOL_GPL(ip6_datagram_send_ctl); diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index 29124b7a04c8..d6de4b447250 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -365,8 +365,8 @@ fl_create(struct net *net, struct sock *sk, struct in6_flowlabel_req *freq, msg.msg_control = (void*)(fl->opt+1); memset(&flowi6, 0, sizeof(flowi6)); - err = datagram_send_ctl(net, sk, &msg, &flowi6, fl->opt, &junk, - &junk, &junk); + err = ip6_datagram_send_ctl(net, sk, &msg, &flowi6, fl->opt, + &junk, &junk, &junk); if (err) goto done; err = -EINVAL; diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index ee94d31c9d4d..d1e2e8ef29c5 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -476,8 +476,8 @@ sticky_done: msg.msg_controllen = optlen; msg.msg_control = (void*)(opt+1); - retv = datagram_send_ctl(net, sk, &msg, &fl6, opt, &junk, &junk, - &junk); + retv = ip6_datagram_send_ctl(net, sk, &msg, &fl6, opt, &junk, + &junk, &junk); if (retv) goto done; update: @@ -1002,7 +1002,7 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, release_sock(sk); if (skb) { - int err = datagram_recv_ctl(sk, &msg, skb); + int err = ip6_datagram_recv_ctl(sk, &msg, skb); kfree_skb(skb); if (err) return err; diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 6cd29b1e8b92..70fa81449997 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -507,7 +507,7 @@ static int rawv6_recvmsg(struct kiocb *iocb, struct sock *sk, sock_recv_ts_and_drops(msg, sk, skb); if (np->rxopt.all) - datagram_recv_ctl(sk, msg, skb); + ip6_datagram_recv_ctl(sk, msg, skb); err = copied; if (flags & MSG_TRUNC) @@ -822,8 +822,8 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, memset(opt, 0, sizeof(struct ipv6_txoptions)); opt->tot_len = sizeof(struct ipv6_txoptions); - err = datagram_send_ctl(sock_net(sk), sk, msg, &fl6, opt, - &hlimit, &tclass, &dontfrag); + err = ip6_datagram_send_ctl(sock_net(sk), sk, msg, &fl6, opt, + &hlimit, &tclass, &dontfrag); if (err < 0) { fl6_sock_release(flowlabel); return err; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index dfaa29b8b293..fb083295ff0b 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -443,7 +443,7 @@ try_again: ip_cmsg_recv(msg, skb); } else { if (np->rxopt.all) - datagram_recv_ctl(sk, msg, skb); + ip6_datagram_recv_ctl(sk, msg, skb); } err = copied; @@ -1153,8 +1153,8 @@ do_udp_sendmsg: memset(opt, 0, sizeof(struct ipv6_txoptions)); opt->tot_len = sizeof(*opt); - err = datagram_send_ctl(sock_net(sk), sk, msg, &fl6, opt, - &hlimit, &tclass, &dontfrag); + err = ip6_datagram_send_ctl(sock_net(sk), sk, msg, &fl6, opt, + &hlimit, &tclass, &dontfrag); if (err < 0) { fl6_sock_release(flowlabel); return err; diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index 927547171bc7..2316947ee772 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -554,8 +554,8 @@ static int l2tp_ip6_sendmsg(struct kiocb *iocb, struct sock *sk, memset(opt, 0, sizeof(struct ipv6_txoptions)); opt->tot_len = sizeof(struct ipv6_txoptions); - err = datagram_send_ctl(sock_net(sk), sk, msg, &fl6, opt, - &hlimit, &tclass, &dontfrag); + err = ip6_datagram_send_ctl(sock_net(sk), sk, msg, &fl6, opt, + &hlimit, &tclass, &dontfrag); if (err < 0) { fl6_sock_release(flowlabel); return err; diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 0a148c9d2a5c..0f679df7d072 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -465,7 +465,7 @@ static int svc_udp_get_dest_address4(struct svc_rqst *rqstp, } /* - * See net/ipv6/datagram.c : datagram_recv_ctl + * See net/ipv6/datagram.c : ip6_datagram_recv_ctl */ static int svc_udp_get_dest_address6(struct svc_rqst *rqstp, struct cmsghdr *cmh) -- cgit v1.2.2 From 8e72d37eb304d9ec5dfb51bc2d83e900b79ee764 Mon Sep 17 00:00:00 2001 From: Tom Parkin Date: Thu, 31 Jan 2013 01:02:25 +0000 Subject: ipv6: export ip6_datagram_recv_ctl ip6_datagram_recv_ctl and ip6_datagram_send_ctl are used for handling IPv6 ancillary data. Since ip6_datagram_send_ctl is already publicly exported for use in modules, ip6_datagram_recv_ctl should also be available to support ancillary data in the receive path. Signed-off-by: Tom Parkin Signed-off-by: James Chapman Signed-off-by: David S. Miller --- net/ipv6/datagram.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 06fd2730838b..7a778b9a7b85 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -598,6 +598,7 @@ int ip6_datagram_recv_ctl(struct sock *sk, struct msghdr *msg, } return 0; } +EXPORT_SYMBOL_GPL(ip6_datagram_recv_ctl); int ip6_datagram_send_ctl(struct net *net, struct sock *sk, struct msghdr *msg, struct flowi6 *fl6, -- cgit v1.2.2 From 700163db3de397a7557831e1eb9b8ce60e55590a Mon Sep 17 00:00:00 2001 From: Tom Parkin Date: Thu, 31 Jan 2013 01:02:26 +0000 Subject: l2tp: correctly handle ancillary data in the ip6 recv path l2tp_ip6 is incorrectly using the IPv4-specific ip_cmsg_recv to handle ancillary data. This means that socket options such as IPV6_RECVPKTINFO are not honoured in userspace. Convert l2tp_ip6 to use the IPv6-specific handler. Ref: net/ipv6/udp.c Signed-off-by: Tom Parkin Signed-off-by: James Chapman Signed-off-by: Chris Elston Signed-off-by: David S. Miller --- net/l2tp/l2tp_ip6.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index 2316947ee772..8ee4a86ae996 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -646,7 +646,7 @@ static int l2tp_ip6_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, size_t len, int noblock, int flags, int *addr_len) { - struct inet_sock *inet = inet_sk(sk); + struct ipv6_pinfo *np = inet6_sk(sk); struct sockaddr_l2tpip6 *lsa = (struct sockaddr_l2tpip6 *)msg->msg_name; size_t copied = 0; int err = -EOPNOTSUPP; @@ -688,8 +688,8 @@ static int l2tp_ip6_recvmsg(struct kiocb *iocb, struct sock *sk, lsa->l2tp_scope_id = IP6CB(skb)->iif; } - if (inet->cmsg_flags) - ip_cmsg_recv(msg, skb); + if (np->rxopt.all) + ip6_datagram_recv_ctl(sk, msg, skb); if (flags & MSG_TRUNC) copied = skb->len; -- cgit v1.2.2 From 66555e92fb7a619188c02cceae4bbc414f15f96d Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Thu, 31 Jan 2013 11:16:46 -0800 Subject: tcp: detect SYN/data drop when F-RTO is disabled On receiving the SYN-ACK, Fast Open checks icsk_retransmit for SYN retransmission to detect SYN/data drops. But if F-RTO is disabled, icsk_retransmit is reset at step D of tcp_fastretrans_alert() ( under tcp_ack()) before tcp_rcv_fastopen_synack(). The fix is to use total_retrans instead which accounts for SYN retransmission regardless the use of F-RTO. Signed-off-by: Yuchung Cheng Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 18f97ca76b00..8aca4ee95ff9 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -5649,8 +5649,7 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack, * the remote receives only the retransmitted (regular) SYNs: either * the original SYN-data or the corresponding SYN-ACK is lost. */ - syn_drop = (cookie->len <= 0 && data && - inet_csk(sk)->icsk_retransmits); + syn_drop = (cookie->len <= 0 && data && tp->total_retrans); tcp_fastopen_cache_set(sk, mss, cookie, syn_drop); -- cgit v1.2.2 From 973ec449bb4f2b8c514bacbcb4d9506fc31c8ce3 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 2 Feb 2013 05:23:16 +0000 Subject: tcp: fix an infinite loop in tcp_slow_start() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since commit 9dc274151a548 (tcp: fix ABC in tcp_slow_start()), a nul snd_cwnd triggers an infinite loop in tcp_slow_start() Avoid this infinite loop and log a one time error for further analysis. FRTO code is suspected to cause this bug. Reported-by: Pasi Kärkkäinen Signed-off-by: Eric Dumazet Cc: Neal Cardwell Cc: Yuchung Cheng Signed-off-by: David S. Miller --- net/ipv4/tcp_cong.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index 291f2ed7cc31..cdf2e707bb10 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c @@ -310,6 +310,12 @@ void tcp_slow_start(struct tcp_sock *tp) { int cnt; /* increase in packets */ unsigned int delta = 0; + u32 snd_cwnd = tp->snd_cwnd; + + if (unlikely(!snd_cwnd)) { + pr_err_once("snd_cwnd is nul, please report this bug.\n"); + snd_cwnd = 1U; + } /* RFC3465: ABC Slow start * Increase only after a full MSS of bytes is acked @@ -324,7 +330,7 @@ void tcp_slow_start(struct tcp_sock *tp) if (sysctl_tcp_max_ssthresh > 0 && tp->snd_cwnd > sysctl_tcp_max_ssthresh) cnt = sysctl_tcp_max_ssthresh >> 1; /* limited slow start */ else - cnt = tp->snd_cwnd; /* exponential increase */ + cnt = snd_cwnd; /* exponential increase */ /* RFC3465: ABC * We MAY increase by 2 if discovered delayed ack @@ -334,11 +340,11 @@ void tcp_slow_start(struct tcp_sock *tp) tp->bytes_acked = 0; tp->snd_cwnd_cnt += cnt; - while (tp->snd_cwnd_cnt >= tp->snd_cwnd) { - tp->snd_cwnd_cnt -= tp->snd_cwnd; + while (tp->snd_cwnd_cnt >= snd_cwnd) { + tp->snd_cwnd_cnt -= snd_cwnd; delta++; } - tp->snd_cwnd = min(tp->snd_cwnd + delta, tp->snd_cwnd_clamp); + tp->snd_cwnd = min(snd_cwnd + delta, tp->snd_cwnd_clamp); } EXPORT_SYMBOL_GPL(tcp_slow_start); -- cgit v1.2.2 From 2e5f421211ff76c17130b4597bc06df4eeead24f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 3 Feb 2013 09:13:05 +0000 Subject: tcp: frto should not set snd_cwnd to 0 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 9dc274151a548 (tcp: fix ABC in tcp_slow_start()) uncovered a bug in FRTO code : tcp_process_frto() is setting snd_cwnd to 0 if the number of in flight packets is 0. As Neal pointed out, if no packet is in flight we lost our chance to disambiguate whether a loss timeout was spurious. We should assume it was a proper loss. Reported-by: Pasi Kärkkäinen Signed-off-by: Neal Cardwell Signed-off-by: Eric Dumazet Cc: Ilpo Järvinen Cc: Yuchung Cheng Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 8aca4ee95ff9..680c4224ed96 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3484,7 +3484,8 @@ static bool tcp_process_frto(struct sock *sk, int flag) ((tp->frto_counter >= 2) && (flag & FLAG_RETRANS_DATA_ACKED))) tp->undo_marker = 0; - if (!before(tp->snd_una, tp->frto_highmark)) { + if (!before(tp->snd_una, tp->frto_highmark) || + !tcp_packets_in_flight(tp)) { tcp_enter_frto_loss(sk, (tp->frto_counter == 1 ? 2 : 3), flag); return true; } -- cgit v1.2.2 From 92df9b217ee2392024483ba5b85a88d92d60f3c1 Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Fri, 1 Feb 2013 15:18:49 +0000 Subject: net: Fix inner_network_header assignment in skb-copy. Use correct inner offset to set inner_network_offset. Found by inspection. Signed-off-by: Pravin B Shelar Signed-off-by: David S. Miller --- net/core/skbuff.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index a9a2ae3e2213..32443ebc3e89 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -683,7 +683,7 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old) new->network_header = old->network_header; new->mac_header = old->mac_header; new->inner_transport_header = old->inner_transport_header; - new->inner_network_header = old->inner_transport_header; + new->inner_network_header = old->inner_network_header; skb_dst_copy(new, old); new->rxhash = old->rxhash; new->ooo_okay = old->ooo_okay; -- cgit v1.2.2 From 9665d5d62487e8e7b1f546c00e11107155384b9a Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Fri, 1 Feb 2013 07:21:41 +0000 Subject: packet: fix leakage of tx_ring memory When releasing a packet socket, the routine packet_set_ring() is reused to free rings instead of allocating them. But when calling it for the first time, it fills req->tp_block_nr with the value of rb->pg_vec_len which in the second invocation makes it bail out since req->tp_block_nr is greater zero but req->tp_block_size is zero. This patch solves the problem by passing a zeroed auto-variable to packet_set_ring() upon each invocation from packet_release(). As far as I can tell, this issue exists even since 69e3c75 (net: TX_RING and packet mmap), i.e. the original inclusion of TX ring support into af_packet, but applies only to sockets with both RX and TX ring allocated, which is probably why this was unnoticed all the time. Signed-off-by: Phil Sutter Cc: Johann Baudy Cc: Daniel Borkmann Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- net/packet/af_packet.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index e639645e8fec..c111bd0e083a 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -2361,13 +2361,15 @@ static int packet_release(struct socket *sock) packet_flush_mclist(sk); - memset(&req_u, 0, sizeof(req_u)); - - if (po->rx_ring.pg_vec) + if (po->rx_ring.pg_vec) { + memset(&req_u, 0, sizeof(req_u)); packet_set_ring(sk, &req_u, 1, 0); + } - if (po->tx_ring.pg_vec) + if (po->tx_ring.pg_vec) { + memset(&req_u, 0, sizeof(req_u)); packet_set_ring(sk, &req_u, 1, 1); + } fanout_release(sk); -- cgit v1.2.2 From 848bf15f361c7c22da7998c81d50ed3dffbc827d Mon Sep 17 00:00:00 2001 From: Vijay Subramanian Date: Thu, 31 Jan 2013 08:24:06 +0000 Subject: tcp: Update MIB counters for drops This patch updates LINUX_MIB_LISTENDROPS in tcp_v4_conn_request() and tcp_v4_err(). tcp_v4_conn_request() in particular can drop SYNs for various reasons which are not currently tracked. Signed-off-by: Vijay Subramanian Signed-off-by: David S. Miller --- net/ipv4/tcp_ipv4.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 629937d514eb..eadb693eef55 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -496,6 +496,7 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) * errors returned from accept(). */ inet_csk_reqsk_queue_drop(sk, req, prev); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); goto out; case TCP_SYN_SENT: @@ -1502,7 +1503,6 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) */ if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) { NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); goto drop; } @@ -1669,6 +1669,7 @@ drop_and_release: drop_and_free: reqsk_free(req); drop: + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); return 0; } EXPORT_SYMBOL(tcp_v4_conn_request); -- cgit v1.2.2 From 5f1e942cb45d06968b0ce94472d97014e0e1fdc9 Mon Sep 17 00:00:00 2001 From: Vijay Subramanian Date: Thu, 31 Jan 2013 08:24:19 +0000 Subject: tcp: ipv6: Update MIB counters for drops This patch updates LINUX_MIB_LISTENDROPS and LINUX_MIB_LISTENOVERFLOWS in tcp_v6_conn_request() and tcp_v6_err(). tcp_v6_conn_request() in particular can drop SYNs for various reasons which are not currently tracked. Signed-off-by: Vijay Subramanian Signed-off-by: David S. Miller --- net/ipv6/tcp_ipv6.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 93825dd3a7c0..4f43537197ef 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -423,6 +423,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, } inet_csk_reqsk_queue_drop(sk, req, prev); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); goto out; case TCP_SYN_SENT: @@ -958,8 +959,10 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) goto drop; } - if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) + if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) { + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); goto drop; + } req = inet6_reqsk_alloc(&tcp6_request_sock_ops); if (req == NULL) @@ -1108,6 +1111,7 @@ drop_and_release: drop_and_free: reqsk_free(req); drop: + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); return 0; /* don't send reset */ } -- cgit v1.2.2