diff options
Diffstat (limited to 'net/ipv4/tcp.c')
-rw-r--r-- | net/ipv4/tcp.c | 40 |
1 files changed, 25 insertions, 15 deletions
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index e457c7ab2e28..2aa69c8ae60c 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c | |||
@@ -536,13 +536,14 @@ int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg) | |||
536 | { | 536 | { |
537 | struct tcp_sock *tp = tcp_sk(sk); | 537 | struct tcp_sock *tp = tcp_sk(sk); |
538 | int answ; | 538 | int answ; |
539 | bool slow; | ||
539 | 540 | ||
540 | switch (cmd) { | 541 | switch (cmd) { |
541 | case SIOCINQ: | 542 | case SIOCINQ: |
542 | if (sk->sk_state == TCP_LISTEN) | 543 | if (sk->sk_state == TCP_LISTEN) |
543 | return -EINVAL; | 544 | return -EINVAL; |
544 | 545 | ||
545 | lock_sock(sk); | 546 | slow = lock_sock_fast(sk); |
546 | if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) | 547 | if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) |
547 | answ = 0; | 548 | answ = 0; |
548 | else if (sock_flag(sk, SOCK_URGINLINE) || | 549 | else if (sock_flag(sk, SOCK_URGINLINE) || |
@@ -557,7 +558,7 @@ int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg) | |||
557 | answ--; | 558 | answ--; |
558 | } else | 559 | } else |
559 | answ = tp->urg_seq - tp->copied_seq; | 560 | answ = tp->urg_seq - tp->copied_seq; |
560 | release_sock(sk); | 561 | unlock_sock_fast(sk, slow); |
561 | break; | 562 | break; |
562 | case SIOCATMARK: | 563 | case SIOCATMARK: |
563 | answ = tp->urg_data && tp->urg_seq == tp->copied_seq; | 564 | answ = tp->urg_data && tp->urg_seq == tp->copied_seq; |
@@ -1427,12 +1428,12 @@ static void tcp_service_net_dma(struct sock *sk, bool wait) | |||
1427 | } | 1428 | } |
1428 | #endif | 1429 | #endif |
1429 | 1430 | ||
1430 | static inline struct sk_buff *tcp_recv_skb(struct sock *sk, u32 seq, u32 *off) | 1431 | static struct sk_buff *tcp_recv_skb(struct sock *sk, u32 seq, u32 *off) |
1431 | { | 1432 | { |
1432 | struct sk_buff *skb; | 1433 | struct sk_buff *skb; |
1433 | u32 offset; | 1434 | u32 offset; |
1434 | 1435 | ||
1435 | skb_queue_walk(&sk->sk_receive_queue, skb) { | 1436 | while ((skb = skb_peek(&sk->sk_receive_queue)) != NULL) { |
1436 | offset = seq - TCP_SKB_CB(skb)->seq; | 1437 | offset = seq - TCP_SKB_CB(skb)->seq; |
1437 | if (tcp_hdr(skb)->syn) | 1438 | if (tcp_hdr(skb)->syn) |
1438 | offset--; | 1439 | offset--; |
@@ -1440,6 +1441,11 @@ static inline struct sk_buff *tcp_recv_skb(struct sock *sk, u32 seq, u32 *off) | |||
1440 | *off = offset; | 1441 | *off = offset; |
1441 | return skb; | 1442 | return skb; |
1442 | } | 1443 | } |
1444 | /* This looks weird, but this can happen if TCP collapsing | ||
1445 | * splitted a fat GRO packet, while we released socket lock | ||
1446 | * in skb_splice_bits() | ||
1447 | */ | ||
1448 | sk_eat_skb(sk, skb, false); | ||
1443 | } | 1449 | } |
1444 | return NULL; | 1450 | return NULL; |
1445 | } | 1451 | } |
@@ -1481,7 +1487,7 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, | |||
1481 | break; | 1487 | break; |
1482 | } | 1488 | } |
1483 | used = recv_actor(desc, skb, offset, len); | 1489 | used = recv_actor(desc, skb, offset, len); |
1484 | if (used < 0) { | 1490 | if (used <= 0) { |
1485 | if (!copied) | 1491 | if (!copied) |
1486 | copied = used; | 1492 | copied = used; |
1487 | break; | 1493 | break; |
@@ -1490,15 +1496,19 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, | |||
1490 | copied += used; | 1496 | copied += used; |
1491 | offset += used; | 1497 | offset += used; |
1492 | } | 1498 | } |
1493 | /* | 1499 | /* If recv_actor drops the lock (e.g. TCP splice |
1494 | * If recv_actor drops the lock (e.g. TCP splice | ||
1495 | * receive) the skb pointer might be invalid when | 1500 | * receive) the skb pointer might be invalid when |
1496 | * getting here: tcp_collapse might have deleted it | 1501 | * getting here: tcp_collapse might have deleted it |
1497 | * while aggregating skbs from the socket queue. | 1502 | * while aggregating skbs from the socket queue. |
1498 | */ | 1503 | */ |
1499 | skb = tcp_recv_skb(sk, seq-1, &offset); | 1504 | skb = tcp_recv_skb(sk, seq - 1, &offset); |
1500 | if (!skb || (offset+1 != skb->len)) | 1505 | if (!skb) |
1501 | break; | 1506 | break; |
1507 | /* TCP coalescing might have appended data to the skb. | ||
1508 | * Try to splice more frags | ||
1509 | */ | ||
1510 | if (offset + 1 != skb->len) | ||
1511 | continue; | ||
1502 | } | 1512 | } |
1503 | if (tcp_hdr(skb)->fin) { | 1513 | if (tcp_hdr(skb)->fin) { |
1504 | sk_eat_skb(sk, skb, false); | 1514 | sk_eat_skb(sk, skb, false); |
@@ -1515,8 +1525,10 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, | |||
1515 | tcp_rcv_space_adjust(sk); | 1525 | tcp_rcv_space_adjust(sk); |
1516 | 1526 | ||
1517 | /* Clean up data we have read: This will do ACK frames. */ | 1527 | /* Clean up data we have read: This will do ACK frames. */ |
1518 | if (copied > 0) | 1528 | if (copied > 0) { |
1529 | tcp_recv_skb(sk, seq, &offset); | ||
1519 | tcp_cleanup_rbuf(sk, copied); | 1530 | tcp_cleanup_rbuf(sk, copied); |
1531 | } | ||
1520 | return copied; | 1532 | return copied; |
1521 | } | 1533 | } |
1522 | EXPORT_SYMBOL(tcp_read_sock); | 1534 | EXPORT_SYMBOL(tcp_read_sock); |
@@ -2300,7 +2312,7 @@ void tcp_sock_destruct(struct sock *sk) | |||
2300 | 2312 | ||
2301 | static inline bool tcp_can_repair_sock(const struct sock *sk) | 2313 | static inline bool tcp_can_repair_sock(const struct sock *sk) |
2302 | { | 2314 | { |
2303 | return capable(CAP_NET_ADMIN) && | 2315 | return ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN) && |
2304 | ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_ESTABLISHED)); | 2316 | ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_ESTABLISHED)); |
2305 | } | 2317 | } |
2306 | 2318 | ||
@@ -3586,8 +3598,7 @@ void __init tcp_init(void) | |||
3586 | alloc_large_system_hash("TCP established", | 3598 | alloc_large_system_hash("TCP established", |
3587 | sizeof(struct inet_ehash_bucket), | 3599 | sizeof(struct inet_ehash_bucket), |
3588 | thash_entries, | 3600 | thash_entries, |
3589 | (totalram_pages >= 128 * 1024) ? | 3601 | 17, /* one slot per 128 KB of memory */ |
3590 | 13 : 15, | ||
3591 | 0, | 3602 | 0, |
3592 | NULL, | 3603 | NULL, |
3593 | &tcp_hashinfo.ehash_mask, | 3604 | &tcp_hashinfo.ehash_mask, |
@@ -3603,8 +3614,7 @@ void __init tcp_init(void) | |||
3603 | alloc_large_system_hash("TCP bind", | 3614 | alloc_large_system_hash("TCP bind", |
3604 | sizeof(struct inet_bind_hashbucket), | 3615 | sizeof(struct inet_bind_hashbucket), |
3605 | tcp_hashinfo.ehash_mask + 1, | 3616 | tcp_hashinfo.ehash_mask + 1, |
3606 | (totalram_pages >= 128 * 1024) ? | 3617 | 17, /* one slot per 128 KB of memory */ |
3607 | 13 : 15, | ||
3608 | 0, | 3618 | 0, |
3609 | &tcp_hashinfo.bhash_size, | 3619 | &tcp_hashinfo.bhash_size, |
3610 | NULL, | 3620 | NULL, |