diff options
author | Matthew Dawson <matthew@mjdsystems.ca> | 2017-08-18 15:04:54 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2017-08-18 18:12:54 -0400 |
commit | a0917e0bc6efc05834c0c1eafebd579a9c75e6e9 (patch) | |
tree | 31f8b60d8e7a0a20943057d7345fdca3558e5ed8 | |
parent | 2110ba58303f0c2a03360c5f81fbe67ed312e7b9 (diff) |
datagram: When peeking datagrams with offset < 0 don't skip empty skbs
Due to commit e6afc8ace6dd5cef5e812f26c72579da8806f5ac ("udp: remove
headers from UDP packets before queueing"), when udp packets are being
peeked the requested extra offset is always 0 as there is no need to skip
the udp header. However, when the offset is 0 and the next skb is
of length 0, it is only returned once. The behaviour can be seen with
the following python script:
from socket import *;
f=socket(AF_INET6, SOCK_DGRAM | SOCK_NONBLOCK, 0);
g=socket(AF_INET6, SOCK_DGRAM | SOCK_NONBLOCK, 0);
f.bind(('::', 0));
addr=('::1', f.getsockname()[1]);
g.sendto(b'', addr)
g.sendto(b'b', addr)
print(f.recvfrom(10, MSG_PEEK));
print(f.recvfrom(10, MSG_PEEK));
Where the expected output should be the empty string twice.
Instead, make sk_peek_offset return negative values, and pass those values
to __skb_try_recv_datagram/__skb_try_recv_from_queue. If the passed offset
to __skb_try_recv_from_queue is negative, the checked skb is never skipped.
__skb_try_recv_from_queue will then ensure the offset is reset back to 0
if a peek is requested without an offset, unless no packets are found.
Also simplify the if condition in __skb_try_recv_from_queue. If _off is
greater then 0, and off is greater then or equal to skb->len, then
(_off || skb->len) must always be true assuming skb->len >= 0 is always
true.
Also remove a redundant check around a call to sk_peek_offset in af_unix.c,
as it double checked if MSG_PEEK was set in the flags.
V2:
- Moved the negative fixup into __skb_try_recv_from_queue, and remove now
redundant checks
- Fix peeking in udp{,v6}_recvmsg to report the right value when the
offset is 0
V3:
- Marked new branch in __skb_try_recv_from_queue as unlikely.
Signed-off-by: Matthew Dawson <matthew@mjdsystems.ca>
Acked-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | include/net/sock.h | 4 | ||||
-rw-r--r-- | net/core/datagram.c | 12 | ||||
-rw-r--r-- | net/ipv4/udp.c | 3 | ||||
-rw-r--r-- | net/ipv6/udp.c | 3 | ||||
-rw-r--r-- | net/unix/af_unix.c | 5 |
5 files changed, 15 insertions, 12 deletions
diff --git a/include/net/sock.h b/include/net/sock.h index 7c0632c7e870..aeeec62992ca 100644 --- a/include/net/sock.h +++ b/include/net/sock.h | |||
@@ -507,9 +507,7 @@ int sk_set_peek_off(struct sock *sk, int val); | |||
507 | static inline int sk_peek_offset(struct sock *sk, int flags) | 507 | static inline int sk_peek_offset(struct sock *sk, int flags) |
508 | { | 508 | { |
509 | if (unlikely(flags & MSG_PEEK)) { | 509 | if (unlikely(flags & MSG_PEEK)) { |
510 | s32 off = READ_ONCE(sk->sk_peek_off); | 510 | return READ_ONCE(sk->sk_peek_off); |
511 | if (off >= 0) | ||
512 | return off; | ||
513 | } | 511 | } |
514 | 512 | ||
515 | return 0; | 513 | return 0; |
diff --git a/net/core/datagram.c b/net/core/datagram.c index ee5647bd91b3..a21ca8dee5ea 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c | |||
@@ -169,14 +169,20 @@ struct sk_buff *__skb_try_recv_from_queue(struct sock *sk, | |||
169 | int *peeked, int *off, int *err, | 169 | int *peeked, int *off, int *err, |
170 | struct sk_buff **last) | 170 | struct sk_buff **last) |
171 | { | 171 | { |
172 | bool peek_at_off = false; | ||
172 | struct sk_buff *skb; | 173 | struct sk_buff *skb; |
173 | int _off = *off; | 174 | int _off = 0; |
175 | |||
176 | if (unlikely(flags & MSG_PEEK && *off >= 0)) { | ||
177 | peek_at_off = true; | ||
178 | _off = *off; | ||
179 | } | ||
174 | 180 | ||
175 | *last = queue->prev; | 181 | *last = queue->prev; |
176 | skb_queue_walk(queue, skb) { | 182 | skb_queue_walk(queue, skb) { |
177 | if (flags & MSG_PEEK) { | 183 | if (flags & MSG_PEEK) { |
178 | if (_off >= skb->len && (skb->len || _off || | 184 | if (peek_at_off && _off >= skb->len && |
179 | skb->peeked)) { | 185 | (_off || skb->peeked)) { |
180 | _off -= skb->len; | 186 | _off -= skb->len; |
181 | continue; | 187 | continue; |
182 | } | 188 | } |
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index a7c804f73990..cd1d044a7fa5 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c | |||
@@ -1574,7 +1574,8 @@ int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock, | |||
1574 | return ip_recv_error(sk, msg, len, addr_len); | 1574 | return ip_recv_error(sk, msg, len, addr_len); |
1575 | 1575 | ||
1576 | try_again: | 1576 | try_again: |
1577 | peeking = off = sk_peek_offset(sk, flags); | 1577 | peeking = flags & MSG_PEEK; |
1578 | off = sk_peek_offset(sk, flags); | ||
1578 | skb = __skb_recv_udp(sk, flags, noblock, &peeked, &off, &err); | 1579 | skb = __skb_recv_udp(sk, flags, noblock, &peeked, &off, &err); |
1579 | if (!skb) | 1580 | if (!skb) |
1580 | return err; | 1581 | return err; |
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 578142b7ca3e..20039c8501eb 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c | |||
@@ -362,7 +362,8 @@ int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, | |||
362 | return ipv6_recv_rxpmtu(sk, msg, len, addr_len); | 362 | return ipv6_recv_rxpmtu(sk, msg, len, addr_len); |
363 | 363 | ||
364 | try_again: | 364 | try_again: |
365 | peeking = off = sk_peek_offset(sk, flags); | 365 | peeking = flags & MSG_PEEK; |
366 | off = sk_peek_offset(sk, flags); | ||
366 | skb = __skb_recv_udp(sk, flags, noblock, &peeked, &off, &err); | 367 | skb = __skb_recv_udp(sk, flags, noblock, &peeked, &off, &err); |
367 | if (!skb) | 368 | if (!skb) |
368 | return err; | 369 | return err; |
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 7b52a380d710..be8982b4f8c0 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c | |||
@@ -2304,10 +2304,7 @@ static int unix_stream_read_generic(struct unix_stream_read_state *state, | |||
2304 | */ | 2304 | */ |
2305 | mutex_lock(&u->iolock); | 2305 | mutex_lock(&u->iolock); |
2306 | 2306 | ||
2307 | if (flags & MSG_PEEK) | 2307 | skip = max(sk_peek_offset(sk, flags), 0); |
2308 | skip = sk_peek_offset(sk, flags); | ||
2309 | else | ||
2310 | skip = 0; | ||
2311 | 2308 | ||
2312 | do { | 2309 | do { |
2313 | int chunk; | 2310 | int chunk; |