diff options
author | Jonathan Herman <hermanjl@cs.unc.edu> | 2013-01-17 16:15:55 -0500 |
---|---|---|
committer | Jonathan Herman <hermanjl@cs.unc.edu> | 2013-01-17 16:15:55 -0500 |
commit | 8dea78da5cee153b8af9c07a2745f6c55057fe12 (patch) | |
tree | a8f4d49d63b1ecc92f2fddceba0655b2472c5bd9 /net/ipv4/tcp.c | |
parent | 406089d01562f1e2bf9f089fd7637009ebaad589 (diff) |
Patched in Tegra support.
Diffstat (limited to 'net/ipv4/tcp.c')
-rw-r--r-- | net/ipv4/tcp.c | 891 |
1 files changed, 328 insertions, 563 deletions
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 2aa69c8ae60..09ced58e6a5 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c | |||
@@ -245,8 +245,6 @@ | |||
245 | * TCP_CLOSE socket is finished | 245 | * TCP_CLOSE socket is finished |
246 | */ | 246 | */ |
247 | 247 | ||
248 | #define pr_fmt(fmt) "TCP: " fmt | ||
249 | |||
250 | #include <linux/kernel.h> | 248 | #include <linux/kernel.h> |
251 | #include <linux/module.h> | 249 | #include <linux/module.h> |
252 | #include <linux/types.h> | 250 | #include <linux/types.h> |
@@ -268,12 +266,15 @@ | |||
268 | #include <linux/crypto.h> | 266 | #include <linux/crypto.h> |
269 | #include <linux/time.h> | 267 | #include <linux/time.h> |
270 | #include <linux/slab.h> | 268 | #include <linux/slab.h> |
269 | #include <linux/uid_stat.h> | ||
271 | 270 | ||
272 | #include <net/icmp.h> | 271 | #include <net/icmp.h> |
273 | #include <net/inet_common.h> | ||
274 | #include <net/tcp.h> | 272 | #include <net/tcp.h> |
275 | #include <net/xfrm.h> | 273 | #include <net/xfrm.h> |
276 | #include <net/ip.h> | 274 | #include <net/ip.h> |
275 | #include <net/ip6_route.h> | ||
276 | #include <net/ipv6.h> | ||
277 | #include <net/transp_v6.h> | ||
277 | #include <net/netdma.h> | 278 | #include <net/netdma.h> |
278 | #include <net/sock.h> | 279 | #include <net/sock.h> |
279 | 280 | ||
@@ -285,9 +286,11 @@ int sysctl_tcp_fin_timeout __read_mostly = TCP_FIN_TIMEOUT; | |||
285 | struct percpu_counter tcp_orphan_count; | 286 | struct percpu_counter tcp_orphan_count; |
286 | EXPORT_SYMBOL_GPL(tcp_orphan_count); | 287 | EXPORT_SYMBOL_GPL(tcp_orphan_count); |
287 | 288 | ||
289 | long sysctl_tcp_mem[3] __read_mostly; | ||
288 | int sysctl_tcp_wmem[3] __read_mostly; | 290 | int sysctl_tcp_wmem[3] __read_mostly; |
289 | int sysctl_tcp_rmem[3] __read_mostly; | 291 | int sysctl_tcp_rmem[3] __read_mostly; |
290 | 292 | ||
293 | EXPORT_SYMBOL(sysctl_tcp_mem); | ||
291 | EXPORT_SYMBOL(sysctl_tcp_rmem); | 294 | EXPORT_SYMBOL(sysctl_tcp_rmem); |
292 | EXPORT_SYMBOL(sysctl_tcp_wmem); | 295 | EXPORT_SYMBOL(sysctl_tcp_wmem); |
293 | 296 | ||
@@ -364,72 +367,6 @@ static int retrans_to_secs(u8 retrans, int timeout, int rto_max) | |||
364 | return period; | 367 | return period; |
365 | } | 368 | } |
366 | 369 | ||
367 | /* Address-family independent initialization for a tcp_sock. | ||
368 | * | ||
369 | * NOTE: A lot of things set to zero explicitly by call to | ||
370 | * sk_alloc() so need not be done here. | ||
371 | */ | ||
372 | void tcp_init_sock(struct sock *sk) | ||
373 | { | ||
374 | struct inet_connection_sock *icsk = inet_csk(sk); | ||
375 | struct tcp_sock *tp = tcp_sk(sk); | ||
376 | |||
377 | skb_queue_head_init(&tp->out_of_order_queue); | ||
378 | tcp_init_xmit_timers(sk); | ||
379 | tcp_prequeue_init(tp); | ||
380 | INIT_LIST_HEAD(&tp->tsq_node); | ||
381 | |||
382 | icsk->icsk_rto = TCP_TIMEOUT_INIT; | ||
383 | tp->mdev = TCP_TIMEOUT_INIT; | ||
384 | |||
385 | /* So many TCP implementations out there (incorrectly) count the | ||
386 | * initial SYN frame in their delayed-ACK and congestion control | ||
387 | * algorithms that we must have the following bandaid to talk | ||
388 | * efficiently to them. -DaveM | ||
389 | */ | ||
390 | tp->snd_cwnd = TCP_INIT_CWND; | ||
391 | |||
392 | /* See draft-stevens-tcpca-spec-01 for discussion of the | ||
393 | * initialization of these values. | ||
394 | */ | ||
395 | tp->snd_ssthresh = TCP_INFINITE_SSTHRESH; | ||
396 | tp->snd_cwnd_clamp = ~0; | ||
397 | tp->mss_cache = TCP_MSS_DEFAULT; | ||
398 | |||
399 | tp->reordering = sysctl_tcp_reordering; | ||
400 | tcp_enable_early_retrans(tp); | ||
401 | icsk->icsk_ca_ops = &tcp_init_congestion_ops; | ||
402 | |||
403 | sk->sk_state = TCP_CLOSE; | ||
404 | |||
405 | sk->sk_write_space = sk_stream_write_space; | ||
406 | sock_set_flag(sk, SOCK_USE_WRITE_QUEUE); | ||
407 | |||
408 | icsk->icsk_sync_mss = tcp_sync_mss; | ||
409 | |||
410 | /* TCP Cookie Transactions */ | ||
411 | if (sysctl_tcp_cookie_size > 0) { | ||
412 | /* Default, cookies without s_data_payload. */ | ||
413 | tp->cookie_values = | ||
414 | kzalloc(sizeof(*tp->cookie_values), | ||
415 | sk->sk_allocation); | ||
416 | if (tp->cookie_values != NULL) | ||
417 | kref_init(&tp->cookie_values->kref); | ||
418 | } | ||
419 | /* Presumed zeroed, in order of appearance: | ||
420 | * cookie_in_always, cookie_out_never, | ||
421 | * s_data_constant, s_data_in, s_data_out | ||
422 | */ | ||
423 | sk->sk_sndbuf = sysctl_tcp_wmem[1]; | ||
424 | sk->sk_rcvbuf = sysctl_tcp_rmem[1]; | ||
425 | |||
426 | local_bh_disable(); | ||
427 | sock_update_memcg(sk); | ||
428 | sk_sockets_allocated_inc(sk); | ||
429 | local_bh_enable(); | ||
430 | } | ||
431 | EXPORT_SYMBOL(tcp_init_sock); | ||
432 | |||
433 | /* | 370 | /* |
434 | * Wait for a TCP event. | 371 | * Wait for a TCP event. |
435 | * | 372 | * |
@@ -441,7 +378,7 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait) | |||
441 | { | 378 | { |
442 | unsigned int mask; | 379 | unsigned int mask; |
443 | struct sock *sk = sock->sk; | 380 | struct sock *sk = sock->sk; |
444 | const struct tcp_sock *tp = tcp_sk(sk); | 381 | struct tcp_sock *tp = tcp_sk(sk); |
445 | 382 | ||
446 | sock_poll_wait(file, sk_sleep(sk), wait); | 383 | sock_poll_wait(file, sk_sleep(sk), wait); |
447 | if (sk->sk_state == TCP_LISTEN) | 384 | if (sk->sk_state == TCP_LISTEN) |
@@ -486,9 +423,8 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait) | |||
486 | if (sk->sk_shutdown & RCV_SHUTDOWN) | 423 | if (sk->sk_shutdown & RCV_SHUTDOWN) |
487 | mask |= POLLIN | POLLRDNORM | POLLRDHUP; | 424 | mask |= POLLIN | POLLRDNORM | POLLRDHUP; |
488 | 425 | ||
489 | /* Connected or passive Fast Open socket? */ | 426 | /* Connected? */ |
490 | if (sk->sk_state != TCP_SYN_SENT && | 427 | if ((1 << sk->sk_state) & ~(TCPF_SYN_SENT | TCPF_SYN_RECV)) { |
491 | (sk->sk_state != TCP_SYN_RECV || tp->fastopen_rsk != NULL)) { | ||
492 | int target = sock_rcvlowat(sk, 0, INT_MAX); | 428 | int target = sock_rcvlowat(sk, 0, INT_MAX); |
493 | 429 | ||
494 | if (tp->urg_seq == tp->copied_seq && | 430 | if (tp->urg_seq == tp->copied_seq && |
@@ -536,29 +472,30 @@ int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg) | |||
536 | { | 472 | { |
537 | struct tcp_sock *tp = tcp_sk(sk); | 473 | struct tcp_sock *tp = tcp_sk(sk); |
538 | int answ; | 474 | int answ; |
539 | bool slow; | ||
540 | 475 | ||
541 | switch (cmd) { | 476 | switch (cmd) { |
542 | case SIOCINQ: | 477 | case SIOCINQ: |
543 | if (sk->sk_state == TCP_LISTEN) | 478 | if (sk->sk_state == TCP_LISTEN) |
544 | return -EINVAL; | 479 | return -EINVAL; |
545 | 480 | ||
546 | slow = lock_sock_fast(sk); | 481 | lock_sock(sk); |
547 | if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) | 482 | if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) |
548 | answ = 0; | 483 | answ = 0; |
549 | else if (sock_flag(sk, SOCK_URGINLINE) || | 484 | else if (sock_flag(sk, SOCK_URGINLINE) || |
550 | !tp->urg_data || | 485 | !tp->urg_data || |
551 | before(tp->urg_seq, tp->copied_seq) || | 486 | before(tp->urg_seq, tp->copied_seq) || |
552 | !before(tp->urg_seq, tp->rcv_nxt)) { | 487 | !before(tp->urg_seq, tp->rcv_nxt)) { |
488 | struct sk_buff *skb; | ||
553 | 489 | ||
554 | answ = tp->rcv_nxt - tp->copied_seq; | 490 | answ = tp->rcv_nxt - tp->copied_seq; |
555 | 491 | ||
556 | /* Subtract 1, if FIN was received */ | 492 | /* Subtract 1, if FIN is in queue. */ |
557 | if (answ && sock_flag(sk, SOCK_DONE)) | 493 | skb = skb_peek_tail(&sk->sk_receive_queue); |
558 | answ--; | 494 | if (answ && skb) |
495 | answ -= tcp_hdr(skb)->fin; | ||
559 | } else | 496 | } else |
560 | answ = tp->urg_seq - tp->copied_seq; | 497 | answ = tp->urg_seq - tp->copied_seq; |
561 | unlock_sock_fast(sk, slow); | 498 | release_sock(sk); |
562 | break; | 499 | break; |
563 | case SIOCATMARK: | 500 | case SIOCATMARK: |
564 | answ = tp->urg_data && tp->urg_seq == tp->copied_seq; | 501 | answ = tp->urg_data && tp->urg_seq == tp->copied_seq; |
@@ -591,11 +528,11 @@ EXPORT_SYMBOL(tcp_ioctl); | |||
591 | 528 | ||
592 | static inline void tcp_mark_push(struct tcp_sock *tp, struct sk_buff *skb) | 529 | static inline void tcp_mark_push(struct tcp_sock *tp, struct sk_buff *skb) |
593 | { | 530 | { |
594 | TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_PSH; | 531 | TCP_SKB_CB(skb)->flags |= TCPHDR_PSH; |
595 | tp->pushed_seq = tp->write_seq; | 532 | tp->pushed_seq = tp->write_seq; |
596 | } | 533 | } |
597 | 534 | ||
598 | static inline bool forced_push(const struct tcp_sock *tp) | 535 | static inline int forced_push(struct tcp_sock *tp) |
599 | { | 536 | { |
600 | return after(tp->write_seq, tp->pushed_seq + (tp->max_window >> 1)); | 537 | return after(tp->write_seq, tp->pushed_seq + (tp->max_window >> 1)); |
601 | } | 538 | } |
@@ -607,7 +544,7 @@ static inline void skb_entail(struct sock *sk, struct sk_buff *skb) | |||
607 | 544 | ||
608 | skb->csum = 0; | 545 | skb->csum = 0; |
609 | tcb->seq = tcb->end_seq = tp->write_seq; | 546 | tcb->seq = tcb->end_seq = tp->write_seq; |
610 | tcb->tcp_flags = TCPHDR_ACK; | 547 | tcb->flags = TCPHDR_ACK; |
611 | tcb->sacked = 0; | 548 | tcb->sacked = 0; |
612 | skb_header_release(skb); | 549 | skb_header_release(skb); |
613 | tcp_add_write_queue_tail(sk, skb); | 550 | tcp_add_write_queue_tail(sk, skb); |
@@ -768,12 +705,11 @@ struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp) | |||
768 | skb = alloc_skb_fclone(size + sk->sk_prot->max_header, gfp); | 705 | skb = alloc_skb_fclone(size + sk->sk_prot->max_header, gfp); |
769 | if (skb) { | 706 | if (skb) { |
770 | if (sk_wmem_schedule(sk, skb->truesize)) { | 707 | if (sk_wmem_schedule(sk, skb->truesize)) { |
771 | skb_reserve(skb, sk->sk_prot->max_header); | ||
772 | /* | 708 | /* |
773 | * Make sure that we have exactly size bytes | 709 | * Make sure that we have exactly size bytes |
774 | * available to the caller, no more, no less. | 710 | * available to the caller, no more, no less. |
775 | */ | 711 | */ |
776 | skb->avail_size = size; | 712 | skb_reserve(skb, skb_tailroom(skb) - size); |
777 | return skb; | 713 | return skb; |
778 | } | 714 | } |
779 | __kfree_skb(skb); | 715 | __kfree_skb(skb); |
@@ -798,10 +734,6 @@ static unsigned int tcp_xmit_size_goal(struct sock *sk, u32 mss_now, | |||
798 | inet_csk(sk)->icsk_ext_hdr_len - | 734 | inet_csk(sk)->icsk_ext_hdr_len - |
799 | tp->tcp_header_len); | 735 | tp->tcp_header_len); |
800 | 736 | ||
801 | /* TSQ : try to have two TSO segments in flight */ | ||
802 | xmit_size_goal = min_t(u32, xmit_size_goal, | ||
803 | sysctl_tcp_limit_output_bytes >> 1); | ||
804 | |||
805 | xmit_size_goal = tcp_bound_to_half_wnd(tp, xmit_size_goal); | 737 | xmit_size_goal = tcp_bound_to_half_wnd(tp, xmit_size_goal); |
806 | 738 | ||
807 | /* We try hard to avoid divides here */ | 739 | /* We try hard to avoid divides here */ |
@@ -811,9 +743,7 @@ static unsigned int tcp_xmit_size_goal(struct sock *sk, u32 mss_now, | |||
811 | old_size_goal + mss_now > xmit_size_goal)) { | 743 | old_size_goal + mss_now > xmit_size_goal)) { |
812 | xmit_size_goal = old_size_goal; | 744 | xmit_size_goal = old_size_goal; |
813 | } else { | 745 | } else { |
814 | tp->xmit_size_goal_segs = | 746 | tp->xmit_size_goal_segs = xmit_size_goal / mss_now; |
815 | min_t(u16, xmit_size_goal / mss_now, | ||
816 | sk->sk_gso_max_segs); | ||
817 | xmit_size_goal = tp->xmit_size_goal_segs * mss_now; | 747 | xmit_size_goal = tp->xmit_size_goal_segs * mss_now; |
818 | } | 748 | } |
819 | } | 749 | } |
@@ -831,8 +761,8 @@ static int tcp_send_mss(struct sock *sk, int *size_goal, int flags) | |||
831 | return mss_now; | 761 | return mss_now; |
832 | } | 762 | } |
833 | 763 | ||
834 | static ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset, | 764 | static ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffset, |
835 | size_t size, int flags) | 765 | size_t psize, int flags) |
836 | { | 766 | { |
837 | struct tcp_sock *tp = tcp_sk(sk); | 767 | struct tcp_sock *tp = tcp_sk(sk); |
838 | int mss_now, size_goal; | 768 | int mss_now, size_goal; |
@@ -840,15 +770,10 @@ static ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset, | |||
840 | ssize_t copied; | 770 | ssize_t copied; |
841 | long timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT); | 771 | long timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT); |
842 | 772 | ||
843 | /* Wait for a connection to finish. One exception is TCP Fast Open | 773 | /* Wait for a connection to finish. */ |
844 | * (passive side) where data is allowed to be sent before a connection | 774 | if ((1 << sk->sk_state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) |
845 | * is fully established. | ||
846 | */ | ||
847 | if (((1 << sk->sk_state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) && | ||
848 | !tcp_passive_fastopen(sk)) { | ||
849 | if ((err = sk_stream_wait_connect(sk, &timeo)) != 0) | 775 | if ((err = sk_stream_wait_connect(sk, &timeo)) != 0) |
850 | goto out_err; | 776 | goto out_err; |
851 | } | ||
852 | 777 | ||
853 | clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); | 778 | clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); |
854 | 779 | ||
@@ -859,10 +784,12 @@ static ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset, | |||
859 | if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) | 784 | if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) |
860 | goto out_err; | 785 | goto out_err; |
861 | 786 | ||
862 | while (size > 0) { | 787 | while (psize > 0) { |
863 | struct sk_buff *skb = tcp_write_queue_tail(sk); | 788 | struct sk_buff *skb = tcp_write_queue_tail(sk); |
864 | int copy, i; | 789 | struct page *page = pages[poffset / PAGE_SIZE]; |
865 | bool can_coalesce; | 790 | int copy, i, can_coalesce; |
791 | int offset = poffset % PAGE_SIZE; | ||
792 | int size = min_t(size_t, psize, PAGE_SIZE - offset); | ||
866 | 793 | ||
867 | if (!tcp_send_head(sk) || (copy = size_goal - skb->len) <= 0) { | 794 | if (!tcp_send_head(sk) || (copy = size_goal - skb->len) <= 0) { |
868 | new_segment: | 795 | new_segment: |
@@ -890,7 +817,7 @@ new_segment: | |||
890 | goto wait_for_memory; | 817 | goto wait_for_memory; |
891 | 818 | ||
892 | if (can_coalesce) { | 819 | if (can_coalesce) { |
893 | skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy); | 820 | skb_shinfo(skb)->frags[i - 1].size += copy; |
894 | } else { | 821 | } else { |
895 | get_page(page); | 822 | get_page(page); |
896 | skb_fill_page_desc(skb, i, page, offset, copy); | 823 | skb_fill_page_desc(skb, i, page, offset, copy); |
@@ -907,11 +834,11 @@ new_segment: | |||
907 | skb_shinfo(skb)->gso_segs = 0; | 834 | skb_shinfo(skb)->gso_segs = 0; |
908 | 835 | ||
909 | if (!copied) | 836 | if (!copied) |
910 | TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_PSH; | 837 | TCP_SKB_CB(skb)->flags &= ~TCPHDR_PSH; |
911 | 838 | ||
912 | copied += copy; | 839 | copied += copy; |
913 | offset += copy; | 840 | poffset += copy; |
914 | if (!(size -= copy)) | 841 | if (!(psize -= copy)) |
915 | goto out; | 842 | goto out; |
916 | 843 | ||
917 | if (skb->len < size_goal || (flags & MSG_OOB)) | 844 | if (skb->len < size_goal || (flags & MSG_OOB)) |
@@ -927,7 +854,8 @@ new_segment: | |||
927 | wait_for_sndbuf: | 854 | wait_for_sndbuf: |
928 | set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); | 855 | set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); |
929 | wait_for_memory: | 856 | wait_for_memory: |
930 | tcp_push(sk, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH); | 857 | if (copied) |
858 | tcp_push(sk, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH); | ||
931 | 859 | ||
932 | if ((err = sk_stream_wait_memory(sk, &timeo)) != 0) | 860 | if ((err = sk_stream_wait_memory(sk, &timeo)) != 0) |
933 | goto do_error; | 861 | goto do_error; |
@@ -936,7 +864,7 @@ wait_for_memory: | |||
936 | } | 864 | } |
937 | 865 | ||
938 | out: | 866 | out: |
939 | if (copied && !(flags & MSG_SENDPAGE_NOTLAST)) | 867 | if (copied) |
940 | tcp_push(sk, flags, mss_now, tp->nonagle); | 868 | tcp_push(sk, flags, mss_now, tp->nonagle); |
941 | return copied; | 869 | return copied; |
942 | 870 | ||
@@ -958,24 +886,24 @@ int tcp_sendpage(struct sock *sk, struct page *page, int offset, | |||
958 | flags); | 886 | flags); |
959 | 887 | ||
960 | lock_sock(sk); | 888 | lock_sock(sk); |
961 | res = do_tcp_sendpages(sk, page, offset, size, flags); | 889 | res = do_tcp_sendpages(sk, &page, offset, size, flags); |
962 | release_sock(sk); | 890 | release_sock(sk); |
963 | return res; | 891 | return res; |
964 | } | 892 | } |
965 | EXPORT_SYMBOL(tcp_sendpage); | 893 | EXPORT_SYMBOL(tcp_sendpage); |
966 | 894 | ||
967 | static inline int select_size(const struct sock *sk, bool sg) | 895 | #define TCP_PAGE(sk) (sk->sk_sndmsg_page) |
896 | #define TCP_OFF(sk) (sk->sk_sndmsg_off) | ||
897 | |||
898 | static inline int select_size(struct sock *sk, int sg) | ||
968 | { | 899 | { |
969 | const struct tcp_sock *tp = tcp_sk(sk); | 900 | struct tcp_sock *tp = tcp_sk(sk); |
970 | int tmp = tp->mss_cache; | 901 | int tmp = tp->mss_cache; |
971 | 902 | ||
972 | if (sg) { | 903 | if (sg) { |
973 | if (sk_can_gso(sk)) { | 904 | if (sk_can_gso(sk)) |
974 | /* Small frames wont use a full page: | 905 | tmp = 0; |
975 | * Payload will immediately follow tcp header. | 906 | else { |
976 | */ | ||
977 | tmp = SKB_WITH_OVERHEAD(2048 - MAX_TCP_HEADER); | ||
978 | } else { | ||
979 | int pgbreak = SKB_MAX_HEAD(MAX_TCP_HEADER); | 907 | int pgbreak = SKB_MAX_HEAD(MAX_TCP_HEADER); |
980 | 908 | ||
981 | if (tmp >= pgbreak && | 909 | if (tmp >= pgbreak && |
@@ -987,86 +915,27 @@ static inline int select_size(const struct sock *sk, bool sg) | |||
987 | return tmp; | 915 | return tmp; |
988 | } | 916 | } |
989 | 917 | ||
990 | void tcp_free_fastopen_req(struct tcp_sock *tp) | ||
991 | { | ||
992 | if (tp->fastopen_req != NULL) { | ||
993 | kfree(tp->fastopen_req); | ||
994 | tp->fastopen_req = NULL; | ||
995 | } | ||
996 | } | ||
997 | |||
998 | static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, int *size) | ||
999 | { | ||
1000 | struct tcp_sock *tp = tcp_sk(sk); | ||
1001 | int err, flags; | ||
1002 | |||
1003 | if (!(sysctl_tcp_fastopen & TFO_CLIENT_ENABLE)) | ||
1004 | return -EOPNOTSUPP; | ||
1005 | if (tp->fastopen_req != NULL) | ||
1006 | return -EALREADY; /* Another Fast Open is in progress */ | ||
1007 | |||
1008 | tp->fastopen_req = kzalloc(sizeof(struct tcp_fastopen_request), | ||
1009 | sk->sk_allocation); | ||
1010 | if (unlikely(tp->fastopen_req == NULL)) | ||
1011 | return -ENOBUFS; | ||
1012 | tp->fastopen_req->data = msg; | ||
1013 | |||
1014 | flags = (msg->msg_flags & MSG_DONTWAIT) ? O_NONBLOCK : 0; | ||
1015 | err = __inet_stream_connect(sk->sk_socket, msg->msg_name, | ||
1016 | msg->msg_namelen, flags); | ||
1017 | *size = tp->fastopen_req->copied; | ||
1018 | tcp_free_fastopen_req(tp); | ||
1019 | return err; | ||
1020 | } | ||
1021 | |||
1022 | int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | 918 | int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, |
1023 | size_t size) | 919 | size_t size) |
1024 | { | 920 | { |
1025 | struct iovec *iov; | 921 | struct iovec *iov; |
1026 | struct tcp_sock *tp = tcp_sk(sk); | 922 | struct tcp_sock *tp = tcp_sk(sk); |
1027 | struct sk_buff *skb; | 923 | struct sk_buff *skb; |
1028 | int iovlen, flags, err, copied = 0; | 924 | int iovlen, flags; |
1029 | int mss_now = 0, size_goal, copied_syn = 0, offset = 0; | 925 | int mss_now, size_goal; |
1030 | bool sg; | 926 | int sg, err, copied; |
1031 | long timeo; | 927 | long timeo; |
1032 | 928 | ||
1033 | lock_sock(sk); | 929 | lock_sock(sk); |
1034 | 930 | ||
1035 | flags = msg->msg_flags; | 931 | flags = msg->msg_flags; |
1036 | if (flags & MSG_FASTOPEN) { | ||
1037 | err = tcp_sendmsg_fastopen(sk, msg, &copied_syn); | ||
1038 | if (err == -EINPROGRESS && copied_syn > 0) | ||
1039 | goto out; | ||
1040 | else if (err) | ||
1041 | goto out_err; | ||
1042 | offset = copied_syn; | ||
1043 | } | ||
1044 | |||
1045 | timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT); | 932 | timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT); |
1046 | 933 | ||
1047 | /* Wait for a connection to finish. One exception is TCP Fast Open | 934 | /* Wait for a connection to finish. */ |
1048 | * (passive side) where data is allowed to be sent before a connection | 935 | if ((1 << sk->sk_state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) |
1049 | * is fully established. | ||
1050 | */ | ||
1051 | if (((1 << sk->sk_state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) && | ||
1052 | !tcp_passive_fastopen(sk)) { | ||
1053 | if ((err = sk_stream_wait_connect(sk, &timeo)) != 0) | 936 | if ((err = sk_stream_wait_connect(sk, &timeo)) != 0) |
1054 | goto do_error; | ||
1055 | } | ||
1056 | |||
1057 | if (unlikely(tp->repair)) { | ||
1058 | if (tp->repair_queue == TCP_RECV_QUEUE) { | ||
1059 | copied = tcp_send_rcvq(sk, msg, size); | ||
1060 | goto out; | ||
1061 | } | ||
1062 | |||
1063 | err = -EINVAL; | ||
1064 | if (tp->repair_queue == TCP_NO_QUEUE) | ||
1065 | goto out_err; | 937 | goto out_err; |
1066 | 938 | ||
1067 | /* 'common' sending to sendq */ | ||
1068 | } | ||
1069 | |||
1070 | /* This should be in poll */ | 939 | /* This should be in poll */ |
1071 | clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); | 940 | clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); |
1072 | 941 | ||
@@ -1081,22 +950,13 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
1081 | if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) | 950 | if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) |
1082 | goto out_err; | 951 | goto out_err; |
1083 | 952 | ||
1084 | sg = !!(sk->sk_route_caps & NETIF_F_SG); | 953 | sg = sk->sk_route_caps & NETIF_F_SG; |
1085 | 954 | ||
1086 | while (--iovlen >= 0) { | 955 | while (--iovlen >= 0) { |
1087 | size_t seglen = iov->iov_len; | 956 | size_t seglen = iov->iov_len; |
1088 | unsigned char __user *from = iov->iov_base; | 957 | unsigned char __user *from = iov->iov_base; |
1089 | 958 | ||
1090 | iov++; | 959 | iov++; |
1091 | if (unlikely(offset > 0)) { /* Skip bytes copied in SYN */ | ||
1092 | if (offset >= seglen) { | ||
1093 | offset -= seglen; | ||
1094 | continue; | ||
1095 | } | ||
1096 | seglen -= offset; | ||
1097 | from += offset; | ||
1098 | offset = 0; | ||
1099 | } | ||
1100 | 960 | ||
1101 | while (seglen > 0) { | 961 | while (seglen > 0) { |
1102 | int copy = 0; | 962 | int copy = 0; |
@@ -1139,54 +999,86 @@ new_segment: | |||
1139 | copy = seglen; | 999 | copy = seglen; |
1140 | 1000 | ||
1141 | /* Where to copy to? */ | 1001 | /* Where to copy to? */ |
1142 | if (skb_availroom(skb) > 0) { | 1002 | if (skb_tailroom(skb) > 0) { |
1143 | /* We have some space in skb head. Superb! */ | 1003 | /* We have some space in skb head. Superb! */ |
1144 | copy = min_t(int, copy, skb_availroom(skb)); | 1004 | if (copy > skb_tailroom(skb)) |
1005 | copy = skb_tailroom(skb); | ||
1145 | err = skb_add_data_nocache(sk, skb, from, copy); | 1006 | err = skb_add_data_nocache(sk, skb, from, copy); |
1146 | if (err) | 1007 | if (err) |
1147 | goto do_fault; | 1008 | goto do_fault; |
1148 | } else { | 1009 | } else { |
1149 | bool merge = true; | 1010 | int merge = 0; |
1150 | int i = skb_shinfo(skb)->nr_frags; | 1011 | int i = skb_shinfo(skb)->nr_frags; |
1151 | struct page_frag *pfrag = sk_page_frag(sk); | 1012 | struct page *page = TCP_PAGE(sk); |
1152 | 1013 | int off = TCP_OFF(sk); | |
1153 | if (!sk_page_frag_refill(sk, pfrag)) | 1014 | |
1154 | goto wait_for_memory; | 1015 | if (skb_can_coalesce(skb, i, page, off) && |
1155 | 1016 | off != PAGE_SIZE) { | |
1156 | if (!skb_can_coalesce(skb, i, pfrag->page, | 1017 | /* We can extend the last page |
1157 | pfrag->offset)) { | 1018 | * fragment. */ |
1158 | if (i == MAX_SKB_FRAGS || !sg) { | 1019 | merge = 1; |
1159 | tcp_mark_push(tp, skb); | 1020 | } else if (i == MAX_SKB_FRAGS || !sg) { |
1160 | goto new_segment; | 1021 | /* Need to add new fragment and cannot |
1022 | * do this because interface is non-SG, | ||
1023 | * or because all the page slots are | ||
1024 | * busy. */ | ||
1025 | tcp_mark_push(tp, skb); | ||
1026 | goto new_segment; | ||
1027 | } else if (page) { | ||
1028 | if (off == PAGE_SIZE) { | ||
1029 | put_page(page); | ||
1030 | TCP_PAGE(sk) = page = NULL; | ||
1031 | off = 0; | ||
1161 | } | 1032 | } |
1162 | merge = false; | 1033 | } else |
1163 | } | 1034 | off = 0; |
1164 | 1035 | ||
1165 | copy = min_t(int, copy, pfrag->size - pfrag->offset); | 1036 | if (copy > PAGE_SIZE - off) |
1037 | copy = PAGE_SIZE - off; | ||
1166 | 1038 | ||
1167 | if (!sk_wmem_schedule(sk, copy)) | 1039 | if (!sk_wmem_schedule(sk, copy)) |
1168 | goto wait_for_memory; | 1040 | goto wait_for_memory; |
1169 | 1041 | ||
1042 | if (!page) { | ||
1043 | /* Allocate new cache page. */ | ||
1044 | if (!(page = sk_stream_alloc_page(sk))) | ||
1045 | goto wait_for_memory; | ||
1046 | } | ||
1047 | |||
1048 | /* Time to copy data. We are close to | ||
1049 | * the end! */ | ||
1170 | err = skb_copy_to_page_nocache(sk, from, skb, | 1050 | err = skb_copy_to_page_nocache(sk, from, skb, |
1171 | pfrag->page, | 1051 | page, off, copy); |
1172 | pfrag->offset, | 1052 | if (err) { |
1173 | copy); | 1053 | /* If this page was new, give it to the |
1174 | if (err) | 1054 | * socket so it does not get leaked. |
1055 | */ | ||
1056 | if (!TCP_PAGE(sk)) { | ||
1057 | TCP_PAGE(sk) = page; | ||
1058 | TCP_OFF(sk) = 0; | ||
1059 | } | ||
1175 | goto do_error; | 1060 | goto do_error; |
1061 | } | ||
1176 | 1062 | ||
1177 | /* Update the skb. */ | 1063 | /* Update the skb. */ |
1178 | if (merge) { | 1064 | if (merge) { |
1179 | skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy); | 1065 | skb_shinfo(skb)->frags[i - 1].size += |
1066 | copy; | ||
1180 | } else { | 1067 | } else { |
1181 | skb_fill_page_desc(skb, i, pfrag->page, | 1068 | skb_fill_page_desc(skb, i, page, off, copy); |
1182 | pfrag->offset, copy); | 1069 | if (TCP_PAGE(sk)) { |
1183 | get_page(pfrag->page); | 1070 | get_page(page); |
1071 | } else if (off + copy < PAGE_SIZE) { | ||
1072 | get_page(page); | ||
1073 | TCP_PAGE(sk) = page; | ||
1074 | } | ||
1184 | } | 1075 | } |
1185 | pfrag->offset += copy; | 1076 | |
1077 | TCP_OFF(sk) = off + copy; | ||
1186 | } | 1078 | } |
1187 | 1079 | ||
1188 | if (!copied) | 1080 | if (!copied) |
1189 | TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_PSH; | 1081 | TCP_SKB_CB(skb)->flags &= ~TCPHDR_PSH; |
1190 | 1082 | ||
1191 | tp->write_seq += copy; | 1083 | tp->write_seq += copy; |
1192 | TCP_SKB_CB(skb)->end_seq += copy; | 1084 | TCP_SKB_CB(skb)->end_seq += copy; |
@@ -1197,7 +1089,7 @@ new_segment: | |||
1197 | if ((seglen -= copy) == 0 && iovlen == 0) | 1089 | if ((seglen -= copy) == 0 && iovlen == 0) |
1198 | goto out; | 1090 | goto out; |
1199 | 1091 | ||
1200 | if (skb->len < max || (flags & MSG_OOB) || unlikely(tp->repair)) | 1092 | if (skb->len < max || (flags & MSG_OOB)) |
1201 | continue; | 1093 | continue; |
1202 | 1094 | ||
1203 | if (forced_push(tp)) { | 1095 | if (forced_push(tp)) { |
@@ -1224,7 +1116,10 @@ out: | |||
1224 | if (copied) | 1116 | if (copied) |
1225 | tcp_push(sk, flags, mss_now, tp->nonagle); | 1117 | tcp_push(sk, flags, mss_now, tp->nonagle); |
1226 | release_sock(sk); | 1118 | release_sock(sk); |
1227 | return copied + copied_syn; | 1119 | |
1120 | if (copied > 0) | ||
1121 | uid_stat_tcp_snd(current_uid(), copied); | ||
1122 | return copied; | ||
1228 | 1123 | ||
1229 | do_fault: | 1124 | do_fault: |
1230 | if (!skb->len) { | 1125 | if (!skb->len) { |
@@ -1237,7 +1132,7 @@ do_fault: | |||
1237 | } | 1132 | } |
1238 | 1133 | ||
1239 | do_error: | 1134 | do_error: |
1240 | if (copied + copied_syn) | 1135 | if (copied) |
1241 | goto out; | 1136 | goto out; |
1242 | out_err: | 1137 | out_err: |
1243 | err = sk_stream_error(sk, flags, err); | 1138 | err = sk_stream_error(sk, flags, err); |
@@ -1295,24 +1190,6 @@ static int tcp_recv_urg(struct sock *sk, struct msghdr *msg, int len, int flags) | |||
1295 | return -EAGAIN; | 1190 | return -EAGAIN; |
1296 | } | 1191 | } |
1297 | 1192 | ||
1298 | static int tcp_peek_sndq(struct sock *sk, struct msghdr *msg, int len) | ||
1299 | { | ||
1300 | struct sk_buff *skb; | ||
1301 | int copied = 0, err = 0; | ||
1302 | |||
1303 | /* XXX -- need to support SO_PEEK_OFF */ | ||
1304 | |||
1305 | skb_queue_walk(&sk->sk_write_queue, skb) { | ||
1306 | err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, skb->len); | ||
1307 | if (err) | ||
1308 | break; | ||
1309 | |||
1310 | copied += skb->len; | ||
1311 | } | ||
1312 | |||
1313 | return err ?: copied; | ||
1314 | } | ||
1315 | |||
1316 | /* Clean up the receive buffer for full frames taken by the user, | 1193 | /* Clean up the receive buffer for full frames taken by the user, |
1317 | * then send an ACK if necessary. COPIED is the number of bytes | 1194 | * then send an ACK if necessary. COPIED is the number of bytes |
1318 | * tcp_recvmsg has given to the user so far, it speeds up the | 1195 | * tcp_recvmsg has given to the user so far, it speeds up the |
@@ -1322,13 +1199,15 @@ static int tcp_peek_sndq(struct sock *sk, struct msghdr *msg, int len) | |||
1322 | void tcp_cleanup_rbuf(struct sock *sk, int copied) | 1199 | void tcp_cleanup_rbuf(struct sock *sk, int copied) |
1323 | { | 1200 | { |
1324 | struct tcp_sock *tp = tcp_sk(sk); | 1201 | struct tcp_sock *tp = tcp_sk(sk); |
1325 | bool time_to_ack = false; | 1202 | int time_to_ack = 0; |
1326 | 1203 | ||
1204 | #if TCP_DEBUG | ||
1327 | struct sk_buff *skb = skb_peek(&sk->sk_receive_queue); | 1205 | struct sk_buff *skb = skb_peek(&sk->sk_receive_queue); |
1328 | 1206 | ||
1329 | WARN(skb && !before(tp->copied_seq, TCP_SKB_CB(skb)->end_seq), | 1207 | WARN(skb && !before(tp->copied_seq, TCP_SKB_CB(skb)->end_seq), |
1330 | "cleanup rbuf bug: copied %X seq %X rcvnxt %X\n", | 1208 | "cleanup rbuf bug: copied %X seq %X rcvnxt %X\n", |
1331 | tp->copied_seq, TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt); | 1209 | tp->copied_seq, TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt); |
1210 | #endif | ||
1332 | 1211 | ||
1333 | if (inet_csk_ack_scheduled(sk)) { | 1212 | if (inet_csk_ack_scheduled(sk)) { |
1334 | const struct inet_connection_sock *icsk = inet_csk(sk); | 1213 | const struct inet_connection_sock *icsk = inet_csk(sk); |
@@ -1348,7 +1227,7 @@ void tcp_cleanup_rbuf(struct sock *sk, int copied) | |||
1348 | ((icsk->icsk_ack.pending & ICSK_ACK_PUSHED) && | 1227 | ((icsk->icsk_ack.pending & ICSK_ACK_PUSHED) && |
1349 | !icsk->icsk_ack.pingpong)) && | 1228 | !icsk->icsk_ack.pingpong)) && |
1350 | !atomic_read(&sk->sk_rmem_alloc))) | 1229 | !atomic_read(&sk->sk_rmem_alloc))) |
1351 | time_to_ack = true; | 1230 | time_to_ack = 1; |
1352 | } | 1231 | } |
1353 | 1232 | ||
1354 | /* We send an ACK if we can now advertise a non-zero window | 1233 | /* We send an ACK if we can now advertise a non-zero window |
@@ -1370,7 +1249,7 @@ void tcp_cleanup_rbuf(struct sock *sk, int copied) | |||
1370 | * "Lots" means "at least twice" here. | 1249 | * "Lots" means "at least twice" here. |
1371 | */ | 1250 | */ |
1372 | if (new_window && new_window >= 2 * rcv_window_now) | 1251 | if (new_window && new_window >= 2 * rcv_window_now) |
1373 | time_to_ack = true; | 1252 | time_to_ack = 1; |
1374 | } | 1253 | } |
1375 | } | 1254 | } |
1376 | if (time_to_ack) | 1255 | if (time_to_ack) |
@@ -1428,12 +1307,12 @@ static void tcp_service_net_dma(struct sock *sk, bool wait) | |||
1428 | } | 1307 | } |
1429 | #endif | 1308 | #endif |
1430 | 1309 | ||
1431 | static struct sk_buff *tcp_recv_skb(struct sock *sk, u32 seq, u32 *off) | 1310 | static inline struct sk_buff *tcp_recv_skb(struct sock *sk, u32 seq, u32 *off) |
1432 | { | 1311 | { |
1433 | struct sk_buff *skb; | 1312 | struct sk_buff *skb; |
1434 | u32 offset; | 1313 | u32 offset; |
1435 | 1314 | ||
1436 | while ((skb = skb_peek(&sk->sk_receive_queue)) != NULL) { | 1315 | skb_queue_walk(&sk->sk_receive_queue, skb) { |
1437 | offset = seq - TCP_SKB_CB(skb)->seq; | 1316 | offset = seq - TCP_SKB_CB(skb)->seq; |
1438 | if (tcp_hdr(skb)->syn) | 1317 | if (tcp_hdr(skb)->syn) |
1439 | offset--; | 1318 | offset--; |
@@ -1441,11 +1320,6 @@ static struct sk_buff *tcp_recv_skb(struct sock *sk, u32 seq, u32 *off) | |||
1441 | *off = offset; | 1320 | *off = offset; |
1442 | return skb; | 1321 | return skb; |
1443 | } | 1322 | } |
1444 | /* This looks weird, but this can happen if TCP collapsing | ||
1445 | * splitted a fat GRO packet, while we released socket lock | ||
1446 | * in skb_splice_bits() | ||
1447 | */ | ||
1448 | sk_eat_skb(sk, skb, false); | ||
1449 | } | 1323 | } |
1450 | return NULL; | 1324 | return NULL; |
1451 | } | 1325 | } |
@@ -1487,7 +1361,7 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, | |||
1487 | break; | 1361 | break; |
1488 | } | 1362 | } |
1489 | used = recv_actor(desc, skb, offset, len); | 1363 | used = recv_actor(desc, skb, offset, len); |
1490 | if (used <= 0) { | 1364 | if (used < 0) { |
1491 | if (!copied) | 1365 | if (!copied) |
1492 | copied = used; | 1366 | copied = used; |
1493 | break; | 1367 | break; |
@@ -1496,26 +1370,22 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, | |||
1496 | copied += used; | 1370 | copied += used; |
1497 | offset += used; | 1371 | offset += used; |
1498 | } | 1372 | } |
1499 | /* If recv_actor drops the lock (e.g. TCP splice | 1373 | /* |
1374 | * If recv_actor drops the lock (e.g. TCP splice | ||
1500 | * receive) the skb pointer might be invalid when | 1375 | * receive) the skb pointer might be invalid when |
1501 | * getting here: tcp_collapse might have deleted it | 1376 | * getting here: tcp_collapse might have deleted it |
1502 | * while aggregating skbs from the socket queue. | 1377 | * while aggregating skbs from the socket queue. |
1503 | */ | 1378 | */ |
1504 | skb = tcp_recv_skb(sk, seq - 1, &offset); | 1379 | skb = tcp_recv_skb(sk, seq-1, &offset); |
1505 | if (!skb) | 1380 | if (!skb || (offset+1 != skb->len)) |
1506 | break; | 1381 | break; |
1507 | /* TCP coalescing might have appended data to the skb. | ||
1508 | * Try to splice more frags | ||
1509 | */ | ||
1510 | if (offset + 1 != skb->len) | ||
1511 | continue; | ||
1512 | } | 1382 | } |
1513 | if (tcp_hdr(skb)->fin) { | 1383 | if (tcp_hdr(skb)->fin) { |
1514 | sk_eat_skb(sk, skb, false); | 1384 | sk_eat_skb(sk, skb, 0); |
1515 | ++seq; | 1385 | ++seq; |
1516 | break; | 1386 | break; |
1517 | } | 1387 | } |
1518 | sk_eat_skb(sk, skb, false); | 1388 | sk_eat_skb(sk, skb, 0); |
1519 | if (!desc->count) | 1389 | if (!desc->count) |
1520 | break; | 1390 | break; |
1521 | tp->copied_seq = seq; | 1391 | tp->copied_seq = seq; |
@@ -1526,9 +1396,10 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, | |||
1526 | 1396 | ||
1527 | /* Clean up data we have read: This will do ACK frames. */ | 1397 | /* Clean up data we have read: This will do ACK frames. */ |
1528 | if (copied > 0) { | 1398 | if (copied > 0) { |
1529 | tcp_recv_skb(sk, seq, &offset); | ||
1530 | tcp_cleanup_rbuf(sk, copied); | 1399 | tcp_cleanup_rbuf(sk, copied); |
1400 | uid_stat_tcp_rcv(current_uid(), copied); | ||
1531 | } | 1401 | } |
1402 | |||
1532 | return copied; | 1403 | return copied; |
1533 | } | 1404 | } |
1534 | EXPORT_SYMBOL(tcp_read_sock); | 1405 | EXPORT_SYMBOL(tcp_read_sock); |
@@ -1553,7 +1424,7 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
1553 | int target; /* Read at least this many bytes */ | 1424 | int target; /* Read at least this many bytes */ |
1554 | long timeo; | 1425 | long timeo; |
1555 | struct task_struct *user_recv = NULL; | 1426 | struct task_struct *user_recv = NULL; |
1556 | bool copied_early = false; | 1427 | int copied_early = 0; |
1557 | struct sk_buff *skb; | 1428 | struct sk_buff *skb; |
1558 | u32 urg_hole = 0; | 1429 | u32 urg_hole = 0; |
1559 | 1430 | ||
@@ -1569,21 +1440,6 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
1569 | if (flags & MSG_OOB) | 1440 | if (flags & MSG_OOB) |
1570 | goto recv_urg; | 1441 | goto recv_urg; |
1571 | 1442 | ||
1572 | if (unlikely(tp->repair)) { | ||
1573 | err = -EPERM; | ||
1574 | if (!(flags & MSG_PEEK)) | ||
1575 | goto out; | ||
1576 | |||
1577 | if (tp->repair_queue == TCP_SEND_QUEUE) | ||
1578 | goto recv_sndq; | ||
1579 | |||
1580 | err = -EINVAL; | ||
1581 | if (tp->repair_queue == TCP_NO_QUEUE) | ||
1582 | goto out; | ||
1583 | |||
1584 | /* 'common' recv queue MSG_PEEK-ing */ | ||
1585 | } | ||
1586 | |||
1587 | seq = &tp->copied_seq; | 1443 | seq = &tp->copied_seq; |
1588 | if (flags & MSG_PEEK) { | 1444 | if (flags & MSG_PEEK) { |
1589 | peek_seq = tp->copied_seq; | 1445 | peek_seq = tp->copied_seq; |
@@ -1604,7 +1460,7 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
1604 | if ((available < target) && | 1460 | if ((available < target) && |
1605 | (len > sysctl_tcp_dma_copybreak) && !(flags & MSG_PEEK) && | 1461 | (len > sysctl_tcp_dma_copybreak) && !(flags & MSG_PEEK) && |
1606 | !sysctl_tcp_low_latency && | 1462 | !sysctl_tcp_low_latency && |
1607 | net_dma_find_channel()) { | 1463 | dma_find_channel(DMA_MEMCPY)) { |
1608 | preempt_enable_no_resched(); | 1464 | preempt_enable_no_resched(); |
1609 | tp->ucopy.pinned_list = | 1465 | tp->ucopy.pinned_list = |
1610 | dma_pin_iovec_pages(msg->msg_iov, len); | 1466 | dma_pin_iovec_pages(msg->msg_iov, len); |
@@ -1745,14 +1601,8 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
1745 | } | 1601 | } |
1746 | 1602 | ||
1747 | #ifdef CONFIG_NET_DMA | 1603 | #ifdef CONFIG_NET_DMA |
1748 | if (tp->ucopy.dma_chan) { | 1604 | if (tp->ucopy.dma_chan) |
1749 | if (tp->rcv_wnd == 0 && | 1605 | dma_async_memcpy_issue_pending(tp->ucopy.dma_chan); |
1750 | !skb_queue_empty(&sk->sk_async_wait_queue)) { | ||
1751 | tcp_service_net_dma(sk, true); | ||
1752 | tcp_cleanup_rbuf(sk, copied); | ||
1753 | } else | ||
1754 | dma_async_memcpy_issue_pending(tp->ucopy.dma_chan); | ||
1755 | } | ||
1756 | #endif | 1606 | #endif |
1757 | if (copied >= target) { | 1607 | if (copied >= target) { |
1758 | /* Do not sleep, just process backlog. */ | 1608 | /* Do not sleep, just process backlog. */ |
@@ -1791,9 +1641,9 @@ do_prequeue: | |||
1791 | } | 1641 | } |
1792 | if ((flags & MSG_PEEK) && | 1642 | if ((flags & MSG_PEEK) && |
1793 | (peek_seq - copied - urg_hole != tp->copied_seq)) { | 1643 | (peek_seq - copied - urg_hole != tp->copied_seq)) { |
1794 | net_dbg_ratelimited("TCP(%s:%d): Application bug, race in MSG_PEEK\n", | 1644 | if (net_ratelimit()) |
1795 | current->comm, | 1645 | printk(KERN_DEBUG "TCP(%s:%d): Application bug, race in MSG_PEEK.\n", |
1796 | task_pid_nr(current)); | 1646 | current->comm, task_pid_nr(current)); |
1797 | peek_seq = tp->copied_seq; | 1647 | peek_seq = tp->copied_seq; |
1798 | } | 1648 | } |
1799 | continue; | 1649 | continue; |
@@ -1825,7 +1675,7 @@ do_prequeue: | |||
1825 | if (!(flags & MSG_TRUNC)) { | 1675 | if (!(flags & MSG_TRUNC)) { |
1826 | #ifdef CONFIG_NET_DMA | 1676 | #ifdef CONFIG_NET_DMA |
1827 | if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list) | 1677 | if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list) |
1828 | tp->ucopy.dma_chan = net_dma_find_channel(); | 1678 | tp->ucopy.dma_chan = dma_find_channel(DMA_MEMCPY); |
1829 | 1679 | ||
1830 | if (tp->ucopy.dma_chan) { | 1680 | if (tp->ucopy.dma_chan) { |
1831 | tp->ucopy.dma_cookie = dma_skb_copy_datagram_iovec( | 1681 | tp->ucopy.dma_cookie = dma_skb_copy_datagram_iovec( |
@@ -1835,8 +1685,7 @@ do_prequeue: | |||
1835 | 1685 | ||
1836 | if (tp->ucopy.dma_cookie < 0) { | 1686 | if (tp->ucopy.dma_cookie < 0) { |
1837 | 1687 | ||
1838 | pr_alert("%s: dma_cookie < 0\n", | 1688 | printk(KERN_ALERT "dma_cookie < 0\n"); |
1839 | __func__); | ||
1840 | 1689 | ||
1841 | /* Exception. Bailout! */ | 1690 | /* Exception. Bailout! */ |
1842 | if (!copied) | 1691 | if (!copied) |
@@ -1847,7 +1696,7 @@ do_prequeue: | |||
1847 | dma_async_memcpy_issue_pending(tp->ucopy.dma_chan); | 1696 | dma_async_memcpy_issue_pending(tp->ucopy.dma_chan); |
1848 | 1697 | ||
1849 | if ((offset + used) == skb->len) | 1698 | if ((offset + used) == skb->len) |
1850 | copied_early = true; | 1699 | copied_early = 1; |
1851 | 1700 | ||
1852 | } else | 1701 | } else |
1853 | #endif | 1702 | #endif |
@@ -1881,7 +1730,7 @@ skip_copy: | |||
1881 | goto found_fin_ok; | 1730 | goto found_fin_ok; |
1882 | if (!(flags & MSG_PEEK)) { | 1731 | if (!(flags & MSG_PEEK)) { |
1883 | sk_eat_skb(sk, skb, copied_early); | 1732 | sk_eat_skb(sk, skb, copied_early); |
1884 | copied_early = false; | 1733 | copied_early = 0; |
1885 | } | 1734 | } |
1886 | continue; | 1735 | continue; |
1887 | 1736 | ||
@@ -1890,7 +1739,7 @@ skip_copy: | |||
1890 | ++*seq; | 1739 | ++*seq; |
1891 | if (!(flags & MSG_PEEK)) { | 1740 | if (!(flags & MSG_PEEK)) { |
1892 | sk_eat_skb(sk, skb, copied_early); | 1741 | sk_eat_skb(sk, skb, copied_early); |
1893 | copied_early = false; | 1742 | copied_early = 0; |
1894 | } | 1743 | } |
1895 | break; | 1744 | break; |
1896 | } while (len > 0); | 1745 | } while (len > 0); |
@@ -1932,6 +1781,9 @@ skip_copy: | |||
1932 | tcp_cleanup_rbuf(sk, copied); | 1781 | tcp_cleanup_rbuf(sk, copied); |
1933 | 1782 | ||
1934 | release_sock(sk); | 1783 | release_sock(sk); |
1784 | |||
1785 | if (copied > 0) | ||
1786 | uid_stat_tcp_rcv(current_uid(), copied); | ||
1935 | return copied; | 1787 | return copied; |
1936 | 1788 | ||
1937 | out: | 1789 | out: |
@@ -1940,10 +1792,8 @@ out: | |||
1940 | 1792 | ||
1941 | recv_urg: | 1793 | recv_urg: |
1942 | err = tcp_recv_urg(sk, msg, len, flags); | 1794 | err = tcp_recv_urg(sk, msg, len, flags); |
1943 | goto out; | 1795 | if (err > 0) |
1944 | 1796 | uid_stat_tcp_rcv(current_uid(), err); | |
1945 | recv_sndq: | ||
1946 | err = tcp_peek_sndq(sk, msg, len); | ||
1947 | goto out; | 1797 | goto out; |
1948 | } | 1798 | } |
1949 | EXPORT_SYMBOL(tcp_recvmsg); | 1799 | EXPORT_SYMBOL(tcp_recvmsg); |
@@ -2041,20 +1891,6 @@ void tcp_shutdown(struct sock *sk, int how) | |||
2041 | } | 1891 | } |
2042 | EXPORT_SYMBOL(tcp_shutdown); | 1892 | EXPORT_SYMBOL(tcp_shutdown); |
2043 | 1893 | ||
2044 | bool tcp_check_oom(struct sock *sk, int shift) | ||
2045 | { | ||
2046 | bool too_many_orphans, out_of_socket_memory; | ||
2047 | |||
2048 | too_many_orphans = tcp_too_many_orphans(sk, shift); | ||
2049 | out_of_socket_memory = tcp_out_of_memory(sk); | ||
2050 | |||
2051 | if (too_many_orphans) | ||
2052 | net_info_ratelimited("too many orphaned sockets\n"); | ||
2053 | if (out_of_socket_memory) | ||
2054 | net_info_ratelimited("out of memory -- consider tuning tcp_mem\n"); | ||
2055 | return too_many_orphans || out_of_socket_memory; | ||
2056 | } | ||
2057 | |||
2058 | void tcp_close(struct sock *sk, long timeout) | 1894 | void tcp_close(struct sock *sk, long timeout) |
2059 | { | 1895 | { |
2060 | struct sk_buff *skb; | 1896 | struct sk_buff *skb; |
@@ -2097,9 +1933,7 @@ void tcp_close(struct sock *sk, long timeout) | |||
2097 | * advertise a zero window, then kill -9 the FTP client, wheee... | 1933 | * advertise a zero window, then kill -9 the FTP client, wheee... |
2098 | * Note: timeout is always zero in such a case. | 1934 | * Note: timeout is always zero in such a case. |
2099 | */ | 1935 | */ |
2100 | if (unlikely(tcp_sk(sk)->repair)) { | 1936 | if (data_was_unread) { |
2101 | sk->sk_prot->disconnect(sk, 0); | ||
2102 | } else if (data_was_unread) { | ||
2103 | /* Unread data was tossed, zap the connection. */ | 1937 | /* Unread data was tossed, zap the connection. */ |
2104 | NET_INC_STATS_USER(sock_net(sk), LINUX_MIB_TCPABORTONCLOSE); | 1938 | NET_INC_STATS_USER(sock_net(sk), LINUX_MIB_TCPABORTONCLOSE); |
2105 | tcp_set_state(sk, TCP_CLOSE); | 1939 | tcp_set_state(sk, TCP_CLOSE); |
@@ -2133,10 +1967,6 @@ void tcp_close(struct sock *sk, long timeout) | |||
2133 | * they look as CLOSING or LAST_ACK for Linux) | 1967 | * they look as CLOSING or LAST_ACK for Linux) |
2134 | * Probably, I missed some more holelets. | 1968 | * Probably, I missed some more holelets. |
2135 | * --ANK | 1969 | * --ANK |
2136 | * XXX (TFO) - To start off we don't support SYN+ACK+FIN | ||
2137 | * in a single packet! (May consider it later but will | ||
2138 | * probably need API support or TCP_CORK SYN-ACK until | ||
2139 | * data is written and socket is closed.) | ||
2140 | */ | 1970 | */ |
2141 | tcp_send_fin(sk); | 1971 | tcp_send_fin(sk); |
2142 | } | 1972 | } |
@@ -2200,7 +2030,10 @@ adjudge_to_death: | |||
2200 | } | 2030 | } |
2201 | if (sk->sk_state != TCP_CLOSE) { | 2031 | if (sk->sk_state != TCP_CLOSE) { |
2202 | sk_mem_reclaim(sk); | 2032 | sk_mem_reclaim(sk); |
2203 | if (tcp_check_oom(sk, 0)) { | 2033 | if (tcp_too_many_orphans(sk, 0)) { |
2034 | if (net_ratelimit()) | ||
2035 | printk(KERN_INFO "TCP: too many of orphaned " | ||
2036 | "sockets\n"); | ||
2204 | tcp_set_state(sk, TCP_CLOSE); | 2037 | tcp_set_state(sk, TCP_CLOSE); |
2205 | tcp_send_active_reset(sk, GFP_ATOMIC); | 2038 | tcp_send_active_reset(sk, GFP_ATOMIC); |
2206 | NET_INC_STATS_BH(sock_net(sk), | 2039 | NET_INC_STATS_BH(sock_net(sk), |
@@ -2208,16 +2041,8 @@ adjudge_to_death: | |||
2208 | } | 2041 | } |
2209 | } | 2042 | } |
2210 | 2043 | ||
2211 | if (sk->sk_state == TCP_CLOSE) { | 2044 | if (sk->sk_state == TCP_CLOSE) |
2212 | struct request_sock *req = tcp_sk(sk)->fastopen_rsk; | ||
2213 | /* We could get here with a non-NULL req if the socket is | ||
2214 | * aborted (e.g., closed with unread data) before 3WHS | ||
2215 | * finishes. | ||
2216 | */ | ||
2217 | if (req != NULL) | ||
2218 | reqsk_fastopen_remove(sk, req, false); | ||
2219 | inet_csk_destroy_sock(sk); | 2045 | inet_csk_destroy_sock(sk); |
2220 | } | ||
2221 | /* Otherwise, socket is reprieved until protocol close. */ | 2046 | /* Otherwise, socket is reprieved until protocol close. */ |
2222 | 2047 | ||
2223 | out: | 2048 | out: |
@@ -2229,7 +2054,7 @@ EXPORT_SYMBOL(tcp_close); | |||
2229 | 2054 | ||
2230 | /* These states need RST on ABORT according to RFC793 */ | 2055 | /* These states need RST on ABORT according to RFC793 */ |
2231 | 2056 | ||
2232 | static inline bool tcp_need_reset(int state) | 2057 | static inline int tcp_need_reset(int state) |
2233 | { | 2058 | { |
2234 | return (1 << state) & | 2059 | return (1 << state) & |
2235 | (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT | TCPF_FIN_WAIT1 | | 2060 | (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT | TCPF_FIN_WAIT1 | |
@@ -2250,8 +2075,6 @@ int tcp_disconnect(struct sock *sk, int flags) | |||
2250 | /* ABORT function of RFC793 */ | 2075 | /* ABORT function of RFC793 */ |
2251 | if (old_state == TCP_LISTEN) { | 2076 | if (old_state == TCP_LISTEN) { |
2252 | inet_csk_listen_stop(sk); | 2077 | inet_csk_listen_stop(sk); |
2253 | } else if (unlikely(tp->repair)) { | ||
2254 | sk->sk_err = ECONNABORTED; | ||
2255 | } else if (tcp_need_reset(old_state) || | 2078 | } else if (tcp_need_reset(old_state) || |
2256 | (tp->snd_nxt != tp->write_seq && | 2079 | (tp->snd_nxt != tp->write_seq && |
2257 | (1 << old_state) & (TCPF_CLOSING | TCPF_LAST_ACK))) { | 2080 | (1 << old_state) & (TCPF_CLOSING | TCPF_LAST_ACK))) { |
@@ -2303,68 +2126,6 @@ int tcp_disconnect(struct sock *sk, int flags) | |||
2303 | } | 2126 | } |
2304 | EXPORT_SYMBOL(tcp_disconnect); | 2127 | EXPORT_SYMBOL(tcp_disconnect); |
2305 | 2128 | ||
2306 | void tcp_sock_destruct(struct sock *sk) | ||
2307 | { | ||
2308 | inet_sock_destruct(sk); | ||
2309 | |||
2310 | kfree(inet_csk(sk)->icsk_accept_queue.fastopenq); | ||
2311 | } | ||
2312 | |||
2313 | static inline bool tcp_can_repair_sock(const struct sock *sk) | ||
2314 | { | ||
2315 | return ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN) && | ||
2316 | ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_ESTABLISHED)); | ||
2317 | } | ||
2318 | |||
2319 | static int tcp_repair_options_est(struct tcp_sock *tp, | ||
2320 | struct tcp_repair_opt __user *optbuf, unsigned int len) | ||
2321 | { | ||
2322 | struct tcp_repair_opt opt; | ||
2323 | |||
2324 | while (len >= sizeof(opt)) { | ||
2325 | if (copy_from_user(&opt, optbuf, sizeof(opt))) | ||
2326 | return -EFAULT; | ||
2327 | |||
2328 | optbuf++; | ||
2329 | len -= sizeof(opt); | ||
2330 | |||
2331 | switch (opt.opt_code) { | ||
2332 | case TCPOPT_MSS: | ||
2333 | tp->rx_opt.mss_clamp = opt.opt_val; | ||
2334 | break; | ||
2335 | case TCPOPT_WINDOW: | ||
2336 | { | ||
2337 | u16 snd_wscale = opt.opt_val & 0xFFFF; | ||
2338 | u16 rcv_wscale = opt.opt_val >> 16; | ||
2339 | |||
2340 | if (snd_wscale > 14 || rcv_wscale > 14) | ||
2341 | return -EFBIG; | ||
2342 | |||
2343 | tp->rx_opt.snd_wscale = snd_wscale; | ||
2344 | tp->rx_opt.rcv_wscale = rcv_wscale; | ||
2345 | tp->rx_opt.wscale_ok = 1; | ||
2346 | } | ||
2347 | break; | ||
2348 | case TCPOPT_SACK_PERM: | ||
2349 | if (opt.opt_val != 0) | ||
2350 | return -EINVAL; | ||
2351 | |||
2352 | tp->rx_opt.sack_ok |= TCP_SACK_SEEN; | ||
2353 | if (sysctl_tcp_fack) | ||
2354 | tcp_enable_fack(tp); | ||
2355 | break; | ||
2356 | case TCPOPT_TIMESTAMP: | ||
2357 | if (opt.opt_val != 0) | ||
2358 | return -EINVAL; | ||
2359 | |||
2360 | tp->rx_opt.tstamp_ok = 1; | ||
2361 | break; | ||
2362 | } | ||
2363 | } | ||
2364 | |||
2365 | return 0; | ||
2366 | } | ||
2367 | |||
2368 | /* | 2129 | /* |
2369 | * Socket option code for TCP. | 2130 | * Socket option code for TCP. |
2370 | */ | 2131 | */ |
@@ -2535,55 +2296,6 @@ static int do_tcp_setsockopt(struct sock *sk, int level, | |||
2535 | err = -EINVAL; | 2296 | err = -EINVAL; |
2536 | else | 2297 | else |
2537 | tp->thin_dupack = val; | 2298 | tp->thin_dupack = val; |
2538 | if (tp->thin_dupack) | ||
2539 | tcp_disable_early_retrans(tp); | ||
2540 | break; | ||
2541 | |||
2542 | case TCP_REPAIR: | ||
2543 | if (!tcp_can_repair_sock(sk)) | ||
2544 | err = -EPERM; | ||
2545 | else if (val == 1) { | ||
2546 | tp->repair = 1; | ||
2547 | sk->sk_reuse = SK_FORCE_REUSE; | ||
2548 | tp->repair_queue = TCP_NO_QUEUE; | ||
2549 | } else if (val == 0) { | ||
2550 | tp->repair = 0; | ||
2551 | sk->sk_reuse = SK_NO_REUSE; | ||
2552 | tcp_send_window_probe(sk); | ||
2553 | } else | ||
2554 | err = -EINVAL; | ||
2555 | |||
2556 | break; | ||
2557 | |||
2558 | case TCP_REPAIR_QUEUE: | ||
2559 | if (!tp->repair) | ||
2560 | err = -EPERM; | ||
2561 | else if (val < TCP_QUEUES_NR) | ||
2562 | tp->repair_queue = val; | ||
2563 | else | ||
2564 | err = -EINVAL; | ||
2565 | break; | ||
2566 | |||
2567 | case TCP_QUEUE_SEQ: | ||
2568 | if (sk->sk_state != TCP_CLOSE) | ||
2569 | err = -EPERM; | ||
2570 | else if (tp->repair_queue == TCP_SEND_QUEUE) | ||
2571 | tp->write_seq = val; | ||
2572 | else if (tp->repair_queue == TCP_RECV_QUEUE) | ||
2573 | tp->rcv_nxt = val; | ||
2574 | else | ||
2575 | err = -EINVAL; | ||
2576 | break; | ||
2577 | |||
2578 | case TCP_REPAIR_OPTIONS: | ||
2579 | if (!tp->repair) | ||
2580 | err = -EINVAL; | ||
2581 | else if (sk->sk_state == TCP_ESTABLISHED) | ||
2582 | err = tcp_repair_options_est(tp, | ||
2583 | (struct tcp_repair_opt __user *)optval, | ||
2584 | optlen); | ||
2585 | else | ||
2586 | err = -EPERM; | ||
2587 | break; | 2299 | break; |
2588 | 2300 | ||
2589 | case TCP_CORK: | 2301 | case TCP_CORK: |
@@ -2698,18 +2410,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level, | |||
2698 | /* Cap the max timeout in ms TCP will retry/retrans | 2410 | /* Cap the max timeout in ms TCP will retry/retrans |
2699 | * before giving up and aborting (ETIMEDOUT) a connection. | 2411 | * before giving up and aborting (ETIMEDOUT) a connection. |
2700 | */ | 2412 | */ |
2701 | if (val < 0) | 2413 | icsk->icsk_user_timeout = msecs_to_jiffies(val); |
2702 | err = -EINVAL; | ||
2703 | else | ||
2704 | icsk->icsk_user_timeout = msecs_to_jiffies(val); | ||
2705 | break; | ||
2706 | |||
2707 | case TCP_FASTOPEN: | ||
2708 | if (val >= 0 && ((1 << sk->sk_state) & (TCPF_CLOSE | | ||
2709 | TCPF_LISTEN))) | ||
2710 | err = fastopen_init_queue(sk, val); | ||
2711 | else | ||
2712 | err = -EINVAL; | ||
2713 | break; | 2414 | break; |
2714 | default: | 2415 | default: |
2715 | err = -ENOPROTOOPT; | 2416 | err = -ENOPROTOOPT; |
@@ -2723,7 +2424,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level, | |||
2723 | int tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval, | 2424 | int tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval, |
2724 | unsigned int optlen) | 2425 | unsigned int optlen) |
2725 | { | 2426 | { |
2726 | const struct inet_connection_sock *icsk = inet_csk(sk); | 2427 | struct inet_connection_sock *icsk = inet_csk(sk); |
2727 | 2428 | ||
2728 | if (level != SOL_TCP) | 2429 | if (level != SOL_TCP) |
2729 | return icsk->icsk_af_ops->setsockopt(sk, level, optname, | 2430 | return icsk->icsk_af_ops->setsockopt(sk, level, optname, |
@@ -2745,9 +2446,9 @@ EXPORT_SYMBOL(compat_tcp_setsockopt); | |||
2745 | #endif | 2446 | #endif |
2746 | 2447 | ||
2747 | /* Return information about state of tcp endpoint in API format. */ | 2448 | /* Return information about state of tcp endpoint in API format. */ |
2748 | void tcp_get_info(const struct sock *sk, struct tcp_info *info) | 2449 | void tcp_get_info(struct sock *sk, struct tcp_info *info) |
2749 | { | 2450 | { |
2750 | const struct tcp_sock *tp = tcp_sk(sk); | 2451 | struct tcp_sock *tp = tcp_sk(sk); |
2751 | const struct inet_connection_sock *icsk = inet_csk(sk); | 2452 | const struct inet_connection_sock *icsk = inet_csk(sk); |
2752 | u32 now = tcp_time_stamp; | 2453 | u32 now = tcp_time_stamp; |
2753 | 2454 | ||
@@ -2769,12 +2470,8 @@ void tcp_get_info(const struct sock *sk, struct tcp_info *info) | |||
2769 | info->tcpi_rcv_wscale = tp->rx_opt.rcv_wscale; | 2470 | info->tcpi_rcv_wscale = tp->rx_opt.rcv_wscale; |
2770 | } | 2471 | } |
2771 | 2472 | ||
2772 | if (tp->ecn_flags & TCP_ECN_OK) | 2473 | if (tp->ecn_flags&TCP_ECN_OK) |
2773 | info->tcpi_options |= TCPI_OPT_ECN; | 2474 | info->tcpi_options |= TCPI_OPT_ECN; |
2774 | if (tp->ecn_flags & TCP_ECN_SEEN) | ||
2775 | info->tcpi_options |= TCPI_OPT_ECN_SEEN; | ||
2776 | if (tp->syn_data_acked) | ||
2777 | info->tcpi_options |= TCPI_OPT_SYN_DATA; | ||
2778 | 2475 | ||
2779 | info->tcpi_rto = jiffies_to_usecs(icsk->icsk_rto); | 2476 | info->tcpi_rto = jiffies_to_usecs(icsk->icsk_rto); |
2780 | info->tcpi_ato = jiffies_to_usecs(icsk->icsk_ack.ato); | 2477 | info->tcpi_ato = jiffies_to_usecs(icsk->icsk_ack.ato); |
@@ -2832,8 +2529,6 @@ static int do_tcp_getsockopt(struct sock *sk, int level, | |||
2832 | val = tp->mss_cache; | 2529 | val = tp->mss_cache; |
2833 | if (!val && ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) | 2530 | if (!val && ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) |
2834 | val = tp->rx_opt.user_mss; | 2531 | val = tp->rx_opt.user_mss; |
2835 | if (tp->repair) | ||
2836 | val = tp->rx_opt.mss_clamp; | ||
2837 | break; | 2532 | break; |
2838 | case TCP_NODELAY: | 2533 | case TCP_NODELAY: |
2839 | val = !!(tp->nonagle&TCP_NAGLE_OFF); | 2534 | val = !!(tp->nonagle&TCP_NAGLE_OFF); |
@@ -2936,26 +2631,6 @@ static int do_tcp_getsockopt(struct sock *sk, int level, | |||
2936 | val = tp->thin_dupack; | 2631 | val = tp->thin_dupack; |
2937 | break; | 2632 | break; |
2938 | 2633 | ||
2939 | case TCP_REPAIR: | ||
2940 | val = tp->repair; | ||
2941 | break; | ||
2942 | |||
2943 | case TCP_REPAIR_QUEUE: | ||
2944 | if (tp->repair) | ||
2945 | val = tp->repair_queue; | ||
2946 | else | ||
2947 | return -EINVAL; | ||
2948 | break; | ||
2949 | |||
2950 | case TCP_QUEUE_SEQ: | ||
2951 | if (tp->repair_queue == TCP_SEND_QUEUE) | ||
2952 | val = tp->write_seq; | ||
2953 | else if (tp->repair_queue == TCP_RECV_QUEUE) | ||
2954 | val = tp->rcv_nxt; | ||
2955 | else | ||
2956 | return -EINVAL; | ||
2957 | break; | ||
2958 | |||
2959 | case TCP_USER_TIMEOUT: | 2634 | case TCP_USER_TIMEOUT: |
2960 | val = jiffies_to_msecs(icsk->icsk_user_timeout); | 2635 | val = jiffies_to_msecs(icsk->icsk_user_timeout); |
2961 | break; | 2636 | break; |
@@ -2994,12 +2669,11 @@ int compat_tcp_getsockopt(struct sock *sk, int level, int optname, | |||
2994 | EXPORT_SYMBOL(compat_tcp_getsockopt); | 2669 | EXPORT_SYMBOL(compat_tcp_getsockopt); |
2995 | #endif | 2670 | #endif |
2996 | 2671 | ||
2997 | struct sk_buff *tcp_tso_segment(struct sk_buff *skb, | 2672 | struct sk_buff *tcp_tso_segment(struct sk_buff *skb, u32 features) |
2998 | netdev_features_t features) | ||
2999 | { | 2673 | { |
3000 | struct sk_buff *segs = ERR_PTR(-EINVAL); | 2674 | struct sk_buff *segs = ERR_PTR(-EINVAL); |
3001 | struct tcphdr *th; | 2675 | struct tcphdr *th; |
3002 | unsigned int thlen; | 2676 | unsigned thlen; |
3003 | unsigned int seq; | 2677 | unsigned int seq; |
3004 | __be32 delta; | 2678 | __be32 delta; |
3005 | unsigned int oldlen; | 2679 | unsigned int oldlen; |
@@ -3198,25 +2872,26 @@ EXPORT_SYMBOL(tcp_gro_complete); | |||
3198 | 2872 | ||
3199 | #ifdef CONFIG_TCP_MD5SIG | 2873 | #ifdef CONFIG_TCP_MD5SIG |
3200 | static unsigned long tcp_md5sig_users; | 2874 | static unsigned long tcp_md5sig_users; |
3201 | static struct tcp_md5sig_pool __percpu *tcp_md5sig_pool; | 2875 | static struct tcp_md5sig_pool * __percpu *tcp_md5sig_pool; |
3202 | static DEFINE_SPINLOCK(tcp_md5sig_pool_lock); | 2876 | static DEFINE_SPINLOCK(tcp_md5sig_pool_lock); |
3203 | 2877 | ||
3204 | static void __tcp_free_md5sig_pool(struct tcp_md5sig_pool __percpu *pool) | 2878 | static void __tcp_free_md5sig_pool(struct tcp_md5sig_pool * __percpu *pool) |
3205 | { | 2879 | { |
3206 | int cpu; | 2880 | int cpu; |
3207 | |||
3208 | for_each_possible_cpu(cpu) { | 2881 | for_each_possible_cpu(cpu) { |
3209 | struct tcp_md5sig_pool *p = per_cpu_ptr(pool, cpu); | 2882 | struct tcp_md5sig_pool *p = *per_cpu_ptr(pool, cpu); |
3210 | 2883 | if (p) { | |
3211 | if (p->md5_desc.tfm) | 2884 | if (p->md5_desc.tfm) |
3212 | crypto_free_hash(p->md5_desc.tfm); | 2885 | crypto_free_hash(p->md5_desc.tfm); |
2886 | kfree(p); | ||
2887 | } | ||
3213 | } | 2888 | } |
3214 | free_percpu(pool); | 2889 | free_percpu(pool); |
3215 | } | 2890 | } |
3216 | 2891 | ||
3217 | void tcp_free_md5sig_pool(void) | 2892 | void tcp_free_md5sig_pool(void) |
3218 | { | 2893 | { |
3219 | struct tcp_md5sig_pool __percpu *pool = NULL; | 2894 | struct tcp_md5sig_pool * __percpu *pool = NULL; |
3220 | 2895 | ||
3221 | spin_lock_bh(&tcp_md5sig_pool_lock); | 2896 | spin_lock_bh(&tcp_md5sig_pool_lock); |
3222 | if (--tcp_md5sig_users == 0) { | 2897 | if (--tcp_md5sig_users == 0) { |
@@ -3229,24 +2904,30 @@ void tcp_free_md5sig_pool(void) | |||
3229 | } | 2904 | } |
3230 | EXPORT_SYMBOL(tcp_free_md5sig_pool); | 2905 | EXPORT_SYMBOL(tcp_free_md5sig_pool); |
3231 | 2906 | ||
3232 | static struct tcp_md5sig_pool __percpu * | 2907 | static struct tcp_md5sig_pool * __percpu * |
3233 | __tcp_alloc_md5sig_pool(struct sock *sk) | 2908 | __tcp_alloc_md5sig_pool(struct sock *sk) |
3234 | { | 2909 | { |
3235 | int cpu; | 2910 | int cpu; |
3236 | struct tcp_md5sig_pool __percpu *pool; | 2911 | struct tcp_md5sig_pool * __percpu *pool; |
3237 | 2912 | ||
3238 | pool = alloc_percpu(struct tcp_md5sig_pool); | 2913 | pool = alloc_percpu(struct tcp_md5sig_pool *); |
3239 | if (!pool) | 2914 | if (!pool) |
3240 | return NULL; | 2915 | return NULL; |
3241 | 2916 | ||
3242 | for_each_possible_cpu(cpu) { | 2917 | for_each_possible_cpu(cpu) { |
2918 | struct tcp_md5sig_pool *p; | ||
3243 | struct crypto_hash *hash; | 2919 | struct crypto_hash *hash; |
3244 | 2920 | ||
2921 | p = kzalloc(sizeof(*p), sk->sk_allocation); | ||
2922 | if (!p) | ||
2923 | goto out_free; | ||
2924 | *per_cpu_ptr(pool, cpu) = p; | ||
2925 | |||
3245 | hash = crypto_alloc_hash("md5", 0, CRYPTO_ALG_ASYNC); | 2926 | hash = crypto_alloc_hash("md5", 0, CRYPTO_ALG_ASYNC); |
3246 | if (!hash || IS_ERR(hash)) | 2927 | if (!hash || IS_ERR(hash)) |
3247 | goto out_free; | 2928 | goto out_free; |
3248 | 2929 | ||
3249 | per_cpu_ptr(pool, cpu)->md5_desc.tfm = hash; | 2930 | p->md5_desc.tfm = hash; |
3250 | } | 2931 | } |
3251 | return pool; | 2932 | return pool; |
3252 | out_free: | 2933 | out_free: |
@@ -3254,16 +2935,16 @@ out_free: | |||
3254 | return NULL; | 2935 | return NULL; |
3255 | } | 2936 | } |
3256 | 2937 | ||
3257 | struct tcp_md5sig_pool __percpu *tcp_alloc_md5sig_pool(struct sock *sk) | 2938 | struct tcp_md5sig_pool * __percpu *tcp_alloc_md5sig_pool(struct sock *sk) |
3258 | { | 2939 | { |
3259 | struct tcp_md5sig_pool __percpu *pool; | 2940 | struct tcp_md5sig_pool * __percpu *pool; |
3260 | bool alloc = false; | 2941 | int alloc = 0; |
3261 | 2942 | ||
3262 | retry: | 2943 | retry: |
3263 | spin_lock_bh(&tcp_md5sig_pool_lock); | 2944 | spin_lock_bh(&tcp_md5sig_pool_lock); |
3264 | pool = tcp_md5sig_pool; | 2945 | pool = tcp_md5sig_pool; |
3265 | if (tcp_md5sig_users++ == 0) { | 2946 | if (tcp_md5sig_users++ == 0) { |
3266 | alloc = true; | 2947 | alloc = 1; |
3267 | spin_unlock_bh(&tcp_md5sig_pool_lock); | 2948 | spin_unlock_bh(&tcp_md5sig_pool_lock); |
3268 | } else if (!pool) { | 2949 | } else if (!pool) { |
3269 | tcp_md5sig_users--; | 2950 | tcp_md5sig_users--; |
@@ -3275,7 +2956,7 @@ retry: | |||
3275 | 2956 | ||
3276 | if (alloc) { | 2957 | if (alloc) { |
3277 | /* we cannot hold spinlock here because this may sleep. */ | 2958 | /* we cannot hold spinlock here because this may sleep. */ |
3278 | struct tcp_md5sig_pool __percpu *p; | 2959 | struct tcp_md5sig_pool * __percpu *p; |
3279 | 2960 | ||
3280 | p = __tcp_alloc_md5sig_pool(sk); | 2961 | p = __tcp_alloc_md5sig_pool(sk); |
3281 | spin_lock_bh(&tcp_md5sig_pool_lock); | 2962 | spin_lock_bh(&tcp_md5sig_pool_lock); |
@@ -3308,7 +2989,7 @@ EXPORT_SYMBOL(tcp_alloc_md5sig_pool); | |||
3308 | */ | 2989 | */ |
3309 | struct tcp_md5sig_pool *tcp_get_md5sig_pool(void) | 2990 | struct tcp_md5sig_pool *tcp_get_md5sig_pool(void) |
3310 | { | 2991 | { |
3311 | struct tcp_md5sig_pool __percpu *p; | 2992 | struct tcp_md5sig_pool * __percpu *p; |
3312 | 2993 | ||
3313 | local_bh_disable(); | 2994 | local_bh_disable(); |
3314 | 2995 | ||
@@ -3319,7 +3000,7 @@ struct tcp_md5sig_pool *tcp_get_md5sig_pool(void) | |||
3319 | spin_unlock(&tcp_md5sig_pool_lock); | 3000 | spin_unlock(&tcp_md5sig_pool_lock); |
3320 | 3001 | ||
3321 | if (p) | 3002 | if (p) |
3322 | return this_cpu_ptr(p); | 3003 | return *this_cpu_ptr(p); |
3323 | 3004 | ||
3324 | local_bh_enable(); | 3005 | local_bh_enable(); |
3325 | return NULL; | 3006 | return NULL; |
@@ -3334,32 +3015,30 @@ void tcp_put_md5sig_pool(void) | |||
3334 | EXPORT_SYMBOL(tcp_put_md5sig_pool); | 3015 | EXPORT_SYMBOL(tcp_put_md5sig_pool); |
3335 | 3016 | ||
3336 | int tcp_md5_hash_header(struct tcp_md5sig_pool *hp, | 3017 | int tcp_md5_hash_header(struct tcp_md5sig_pool *hp, |
3337 | const struct tcphdr *th) | 3018 | struct tcphdr *th) |
3338 | { | 3019 | { |
3339 | struct scatterlist sg; | 3020 | struct scatterlist sg; |
3340 | struct tcphdr hdr; | ||
3341 | int err; | 3021 | int err; |
3342 | 3022 | ||
3343 | /* We are not allowed to change tcphdr, make a local copy */ | 3023 | __sum16 old_checksum = th->check; |
3344 | memcpy(&hdr, th, sizeof(hdr)); | 3024 | th->check = 0; |
3345 | hdr.check = 0; | ||
3346 | |||
3347 | /* options aren't included in the hash */ | 3025 | /* options aren't included in the hash */ |
3348 | sg_init_one(&sg, &hdr, sizeof(hdr)); | 3026 | sg_init_one(&sg, th, sizeof(struct tcphdr)); |
3349 | err = crypto_hash_update(&hp->md5_desc, &sg, sizeof(hdr)); | 3027 | err = crypto_hash_update(&hp->md5_desc, &sg, sizeof(struct tcphdr)); |
3028 | th->check = old_checksum; | ||
3350 | return err; | 3029 | return err; |
3351 | } | 3030 | } |
3352 | EXPORT_SYMBOL(tcp_md5_hash_header); | 3031 | EXPORT_SYMBOL(tcp_md5_hash_header); |
3353 | 3032 | ||
3354 | int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp, | 3033 | int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp, |
3355 | const struct sk_buff *skb, unsigned int header_len) | 3034 | struct sk_buff *skb, unsigned header_len) |
3356 | { | 3035 | { |
3357 | struct scatterlist sg; | 3036 | struct scatterlist sg; |
3358 | const struct tcphdr *tp = tcp_hdr(skb); | 3037 | const struct tcphdr *tp = tcp_hdr(skb); |
3359 | struct hash_desc *desc = &hp->md5_desc; | 3038 | struct hash_desc *desc = &hp->md5_desc; |
3360 | unsigned int i; | 3039 | unsigned i; |
3361 | const unsigned int head_data_len = skb_headlen(skb) > header_len ? | 3040 | const unsigned head_data_len = skb_headlen(skb) > header_len ? |
3362 | skb_headlen(skb) - header_len : 0; | 3041 | skb_headlen(skb) - header_len : 0; |
3363 | const struct skb_shared_info *shi = skb_shinfo(skb); | 3042 | const struct skb_shared_info *shi = skb_shinfo(skb); |
3364 | struct sk_buff *frag_iter; | 3043 | struct sk_buff *frag_iter; |
3365 | 3044 | ||
@@ -3371,9 +3050,8 @@ int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp, | |||
3371 | 3050 | ||
3372 | for (i = 0; i < shi->nr_frags; ++i) { | 3051 | for (i = 0; i < shi->nr_frags; ++i) { |
3373 | const struct skb_frag_struct *f = &shi->frags[i]; | 3052 | const struct skb_frag_struct *f = &shi->frags[i]; |
3374 | struct page *page = skb_frag_page(f); | 3053 | sg_set_page(&sg, f->page, f->size, f->page_offset); |
3375 | sg_set_page(&sg, page, skb_frag_size(f), f->page_offset); | 3054 | if (crypto_hash_update(desc, &sg, f->size)) |
3376 | if (crypto_hash_update(desc, &sg, skb_frag_size(f))) | ||
3377 | return 1; | 3055 | return 1; |
3378 | } | 3056 | } |
3379 | 3057 | ||
@@ -3385,7 +3063,7 @@ int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp, | |||
3385 | } | 3063 | } |
3386 | EXPORT_SYMBOL(tcp_md5_hash_skb_data); | 3064 | EXPORT_SYMBOL(tcp_md5_hash_skb_data); |
3387 | 3065 | ||
3388 | int tcp_md5_hash_key(struct tcp_md5sig_pool *hp, const struct tcp_md5sig_key *key) | 3066 | int tcp_md5_hash_key(struct tcp_md5sig_pool *hp, struct tcp_md5sig_key *key) |
3389 | { | 3067 | { |
3390 | struct scatterlist sg; | 3068 | struct scatterlist sg; |
3391 | 3069 | ||
@@ -3396,7 +3074,8 @@ EXPORT_SYMBOL(tcp_md5_hash_key); | |||
3396 | 3074 | ||
3397 | #endif | 3075 | #endif |
3398 | 3076 | ||
3399 | /* Each Responder maintains up to two secret values concurrently for | 3077 | /** |
3078 | * Each Responder maintains up to two secret values concurrently for | ||
3400 | * efficient secret rollover. Each secret value has 4 states: | 3079 | * efficient secret rollover. Each secret value has 4 states: |
3401 | * | 3080 | * |
3402 | * Generating. (tcp_secret_generating != tcp_secret_primary) | 3081 | * Generating. (tcp_secret_generating != tcp_secret_primary) |
@@ -3526,15 +3205,11 @@ EXPORT_SYMBOL(tcp_cookie_generator); | |||
3526 | 3205 | ||
3527 | void tcp_done(struct sock *sk) | 3206 | void tcp_done(struct sock *sk) |
3528 | { | 3207 | { |
3529 | struct request_sock *req = tcp_sk(sk)->fastopen_rsk; | ||
3530 | |||
3531 | if (sk->sk_state == TCP_SYN_SENT || sk->sk_state == TCP_SYN_RECV) | 3208 | if (sk->sk_state == TCP_SYN_SENT || sk->sk_state == TCP_SYN_RECV) |
3532 | TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_ATTEMPTFAILS); | 3209 | TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_ATTEMPTFAILS); |
3533 | 3210 | ||
3534 | tcp_set_state(sk, TCP_CLOSE); | 3211 | tcp_set_state(sk, TCP_CLOSE); |
3535 | tcp_clear_xmit_timers(sk); | 3212 | tcp_clear_xmit_timers(sk); |
3536 | if (req != NULL) | ||
3537 | reqsk_fastopen_remove(sk, req, false); | ||
3538 | 3213 | ||
3539 | sk->sk_shutdown = SHUTDOWN_MASK; | 3214 | sk->sk_shutdown = SHUTDOWN_MASK; |
3540 | 3215 | ||
@@ -3550,34 +3225,18 @@ extern struct tcp_congestion_ops tcp_reno; | |||
3550 | static __initdata unsigned long thash_entries; | 3225 | static __initdata unsigned long thash_entries; |
3551 | static int __init set_thash_entries(char *str) | 3226 | static int __init set_thash_entries(char *str) |
3552 | { | 3227 | { |
3553 | ssize_t ret; | ||
3554 | |||
3555 | if (!str) | 3228 | if (!str) |
3556 | return 0; | 3229 | return 0; |
3557 | 3230 | thash_entries = simple_strtoul(str, &str, 0); | |
3558 | ret = kstrtoul(str, 0, &thash_entries); | ||
3559 | if (ret) | ||
3560 | return 0; | ||
3561 | |||
3562 | return 1; | 3231 | return 1; |
3563 | } | 3232 | } |
3564 | __setup("thash_entries=", set_thash_entries); | 3233 | __setup("thash_entries=", set_thash_entries); |
3565 | 3234 | ||
3566 | void tcp_init_mem(struct net *net) | ||
3567 | { | ||
3568 | unsigned long limit = nr_free_buffer_pages() / 8; | ||
3569 | limit = max(limit, 128UL); | ||
3570 | net->ipv4.sysctl_tcp_mem[0] = limit / 4 * 3; | ||
3571 | net->ipv4.sysctl_tcp_mem[1] = limit; | ||
3572 | net->ipv4.sysctl_tcp_mem[2] = net->ipv4.sysctl_tcp_mem[0] * 2; | ||
3573 | } | ||
3574 | |||
3575 | void __init tcp_init(void) | 3235 | void __init tcp_init(void) |
3576 | { | 3236 | { |
3577 | struct sk_buff *skb = NULL; | 3237 | struct sk_buff *skb = NULL; |
3578 | unsigned long limit; | 3238 | unsigned long limit; |
3579 | int max_rshare, max_wshare, cnt; | 3239 | int i, max_share, cnt; |
3580 | unsigned int i; | ||
3581 | unsigned long jiffy = jiffies; | 3240 | unsigned long jiffy = jiffies; |
3582 | 3241 | ||
3583 | BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > sizeof(skb->cb)); | 3242 | BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > sizeof(skb->cb)); |
@@ -3598,11 +3257,11 @@ void __init tcp_init(void) | |||
3598 | alloc_large_system_hash("TCP established", | 3257 | alloc_large_system_hash("TCP established", |
3599 | sizeof(struct inet_ehash_bucket), | 3258 | sizeof(struct inet_ehash_bucket), |
3600 | thash_entries, | 3259 | thash_entries, |
3601 | 17, /* one slot per 128 KB of memory */ | 3260 | (totalram_pages >= 128 * 1024) ? |
3261 | 13 : 15, | ||
3602 | 0, | 3262 | 0, |
3603 | NULL, | 3263 | NULL, |
3604 | &tcp_hashinfo.ehash_mask, | 3264 | &tcp_hashinfo.ehash_mask, |
3605 | 0, | ||
3606 | thash_entries ? 0 : 512 * 1024); | 3265 | thash_entries ? 0 : 512 * 1024); |
3607 | for (i = 0; i <= tcp_hashinfo.ehash_mask; i++) { | 3266 | for (i = 0; i <= tcp_hashinfo.ehash_mask; i++) { |
3608 | INIT_HLIST_NULLS_HEAD(&tcp_hashinfo.ehash[i].chain, i); | 3267 | INIT_HLIST_NULLS_HEAD(&tcp_hashinfo.ehash[i].chain, i); |
@@ -3614,13 +3273,13 @@ void __init tcp_init(void) | |||
3614 | alloc_large_system_hash("TCP bind", | 3273 | alloc_large_system_hash("TCP bind", |
3615 | sizeof(struct inet_bind_hashbucket), | 3274 | sizeof(struct inet_bind_hashbucket), |
3616 | tcp_hashinfo.ehash_mask + 1, | 3275 | tcp_hashinfo.ehash_mask + 1, |
3617 | 17, /* one slot per 128 KB of memory */ | 3276 | (totalram_pages >= 128 * 1024) ? |
3277 | 13 : 15, | ||
3618 | 0, | 3278 | 0, |
3619 | &tcp_hashinfo.bhash_size, | 3279 | &tcp_hashinfo.bhash_size, |
3620 | NULL, | 3280 | NULL, |
3621 | 0, | ||
3622 | 64 * 1024); | 3281 | 64 * 1024); |
3623 | tcp_hashinfo.bhash_size = 1U << tcp_hashinfo.bhash_size; | 3282 | tcp_hashinfo.bhash_size = 1 << tcp_hashinfo.bhash_size; |
3624 | for (i = 0; i < tcp_hashinfo.bhash_size; i++) { | 3283 | for (i = 0; i < tcp_hashinfo.bhash_size; i++) { |
3625 | spin_lock_init(&tcp_hashinfo.bhash[i].lock); | 3284 | spin_lock_init(&tcp_hashinfo.bhash[i].lock); |
3626 | INIT_HLIST_HEAD(&tcp_hashinfo.bhash[i].chain); | 3285 | INIT_HLIST_HEAD(&tcp_hashinfo.bhash[i].chain); |
@@ -3633,24 +3292,27 @@ void __init tcp_init(void) | |||
3633 | sysctl_tcp_max_orphans = cnt / 2; | 3292 | sysctl_tcp_max_orphans = cnt / 2; |
3634 | sysctl_max_syn_backlog = max(128, cnt / 256); | 3293 | sysctl_max_syn_backlog = max(128, cnt / 256); |
3635 | 3294 | ||
3636 | tcp_init_mem(&init_net); | 3295 | limit = nr_free_buffer_pages() / 8; |
3296 | limit = max(limit, 128UL); | ||
3297 | sysctl_tcp_mem[0] = limit / 4 * 3; | ||
3298 | sysctl_tcp_mem[1] = limit; | ||
3299 | sysctl_tcp_mem[2] = sysctl_tcp_mem[0] * 2; | ||
3300 | |||
3637 | /* Set per-socket limits to no more than 1/128 the pressure threshold */ | 3301 | /* Set per-socket limits to no more than 1/128 the pressure threshold */ |
3638 | limit = nr_free_buffer_pages() << (PAGE_SHIFT - 7); | 3302 | limit = ((unsigned long)sysctl_tcp_mem[1]) << (PAGE_SHIFT - 7); |
3639 | max_wshare = min(4UL*1024*1024, limit); | 3303 | max_share = min(4UL*1024*1024, limit); |
3640 | max_rshare = min(6UL*1024*1024, limit); | ||
3641 | 3304 | ||
3642 | sysctl_tcp_wmem[0] = SK_MEM_QUANTUM; | 3305 | sysctl_tcp_wmem[0] = SK_MEM_QUANTUM; |
3643 | sysctl_tcp_wmem[1] = 16*1024; | 3306 | sysctl_tcp_wmem[1] = 16*1024; |
3644 | sysctl_tcp_wmem[2] = max(64*1024, max_wshare); | 3307 | sysctl_tcp_wmem[2] = max(64*1024, max_share); |
3645 | 3308 | ||
3646 | sysctl_tcp_rmem[0] = SK_MEM_QUANTUM; | 3309 | sysctl_tcp_rmem[0] = SK_MEM_QUANTUM; |
3647 | sysctl_tcp_rmem[1] = 87380; | 3310 | sysctl_tcp_rmem[1] = 87380; |
3648 | sysctl_tcp_rmem[2] = max(87380, max_rshare); | 3311 | sysctl_tcp_rmem[2] = max(87380, max_share); |
3649 | 3312 | ||
3650 | pr_info("Hash tables configured (established %u bind %u)\n", | 3313 | printk(KERN_INFO "TCP: Hash tables configured " |
3651 | tcp_hashinfo.ehash_mask + 1, tcp_hashinfo.bhash_size); | 3314 | "(established %u bind %u)\n", |
3652 | 3315 | tcp_hashinfo.ehash_mask + 1, tcp_hashinfo.bhash_size); | |
3653 | tcp_metrics_init(); | ||
3654 | 3316 | ||
3655 | tcp_register_congestion_control(&tcp_reno); | 3317 | tcp_register_congestion_control(&tcp_reno); |
3656 | 3318 | ||
@@ -3662,5 +3324,108 @@ void __init tcp_init(void) | |||
3662 | tcp_secret_primary = &tcp_secret_one; | 3324 | tcp_secret_primary = &tcp_secret_one; |
3663 | tcp_secret_retiring = &tcp_secret_two; | 3325 | tcp_secret_retiring = &tcp_secret_two; |
3664 | tcp_secret_secondary = &tcp_secret_two; | 3326 | tcp_secret_secondary = &tcp_secret_two; |
3665 | tcp_tasklet_init(); | 3327 | } |
3328 | |||
3329 | static int tcp_is_local(struct net *net, __be32 addr) { | ||
3330 | struct rtable *rt; | ||
3331 | struct flowi4 fl4 = { .daddr = addr }; | ||
3332 | rt = ip_route_output_key(net, &fl4); | ||
3333 | if (IS_ERR_OR_NULL(rt)) | ||
3334 | return 0; | ||
3335 | return rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK); | ||
3336 | } | ||
3337 | |||
3338 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) | ||
3339 | static int tcp_is_local6(struct net *net, struct in6_addr *addr) { | ||
3340 | struct rt6_info *rt6 = rt6_lookup(net, addr, addr, 0, 0); | ||
3341 | return rt6 && rt6->rt6i_dev && (rt6->rt6i_dev->flags & IFF_LOOPBACK); | ||
3342 | } | ||
3343 | #endif | ||
3344 | |||
3345 | /* | ||
3346 | * tcp_nuke_addr - destroy all sockets on the given local address | ||
3347 | * if local address is the unspecified address (0.0.0.0 or ::), destroy all | ||
3348 | * sockets with local addresses that are not configured. | ||
3349 | */ | ||
3350 | int tcp_nuke_addr(struct net *net, struct sockaddr *addr) | ||
3351 | { | ||
3352 | int family = addr->sa_family; | ||
3353 | unsigned int bucket; | ||
3354 | |||
3355 | struct in_addr *in; | ||
3356 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) | ||
3357 | struct in6_addr *in6; | ||
3358 | #endif | ||
3359 | if (family == AF_INET) { | ||
3360 | in = &((struct sockaddr_in *)addr)->sin_addr; | ||
3361 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) | ||
3362 | } else if (family == AF_INET6) { | ||
3363 | in6 = &((struct sockaddr_in6 *)addr)->sin6_addr; | ||
3364 | #endif | ||
3365 | } else { | ||
3366 | return -EAFNOSUPPORT; | ||
3367 | } | ||
3368 | |||
3369 | for (bucket = 0; bucket < tcp_hashinfo.ehash_mask; bucket++) { | ||
3370 | struct hlist_nulls_node *node; | ||
3371 | struct sock *sk; | ||
3372 | spinlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, bucket); | ||
3373 | |||
3374 | restart: | ||
3375 | spin_lock_bh(lock); | ||
3376 | sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[bucket].chain) { | ||
3377 | struct inet_sock *inet = inet_sk(sk); | ||
3378 | |||
3379 | if (sysctl_ip_dynaddr && sk->sk_state == TCP_SYN_SENT) | ||
3380 | continue; | ||
3381 | if (sock_flag(sk, SOCK_DEAD)) | ||
3382 | continue; | ||
3383 | |||
3384 | if (family == AF_INET) { | ||
3385 | __be32 s4 = inet->inet_rcv_saddr; | ||
3386 | if (s4 == LOOPBACK4_IPV6) | ||
3387 | continue; | ||
3388 | |||
3389 | if (in->s_addr != s4 && | ||
3390 | !(in->s_addr == INADDR_ANY && | ||
3391 | !tcp_is_local(net, s4))) | ||
3392 | continue; | ||
3393 | } | ||
3394 | |||
3395 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) | ||
3396 | if (family == AF_INET6) { | ||
3397 | struct in6_addr *s6; | ||
3398 | if (!inet->pinet6) | ||
3399 | continue; | ||
3400 | |||
3401 | s6 = &inet->pinet6->rcv_saddr; | ||
3402 | if (ipv6_addr_type(s6) == IPV6_ADDR_MAPPED) | ||
3403 | continue; | ||
3404 | |||
3405 | if (!ipv6_addr_equal(in6, s6) && | ||
3406 | !(ipv6_addr_equal(in6, &in6addr_any) && | ||
3407 | !tcp_is_local6(net, s6))) | ||
3408 | continue; | ||
3409 | } | ||
3410 | #endif | ||
3411 | |||
3412 | sock_hold(sk); | ||
3413 | spin_unlock_bh(lock); | ||
3414 | |||
3415 | local_bh_disable(); | ||
3416 | bh_lock_sock(sk); | ||
3417 | sk->sk_err = ETIMEDOUT; | ||
3418 | sk->sk_error_report(sk); | ||
3419 | |||
3420 | tcp_done(sk); | ||
3421 | bh_unlock_sock(sk); | ||
3422 | local_bh_enable(); | ||
3423 | sock_put(sk); | ||
3424 | |||
3425 | goto restart; | ||
3426 | } | ||
3427 | spin_unlock_bh(lock); | ||
3428 | } | ||
3429 | |||
3430 | return 0; | ||
3666 | } | 3431 | } |