diff options
-rw-r--r-- | include/linux/tcp.h | 4 | ||||
-rw-r--r-- | include/net/tcp.h | 2 | ||||
-rw-r--r-- | net/ipv4/tcp_output.c | 46 | ||||
-rw-r--r-- | net/ipv4/tcp_timer.c | 70 |
4 files changed, 71 insertions, 51 deletions
diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 9febfb685c33..2761856987b2 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h | |||
@@ -515,7 +515,9 @@ struct tcp_sock { | |||
515 | enum tsq_flags { | 515 | enum tsq_flags { |
516 | TSQ_THROTTLED, | 516 | TSQ_THROTTLED, |
517 | TSQ_QUEUED, | 517 | TSQ_QUEUED, |
518 | TSQ_OWNED, /* tcp_tasklet_func() found socket was locked */ | 518 | TCP_TSQ_DEFERRED, /* tcp_tasklet_func() found socket was owned */ |
519 | TCP_WRITE_TIMER_DEFERRED, /* tcp_write_timer() found socket was owned */ | ||
520 | TCP_DELACK_TIMER_DEFERRED, /* tcp_delack_timer() found socket was owned */ | ||
519 | }; | 521 | }; |
520 | 522 | ||
521 | static inline struct tcp_sock *tcp_sk(const struct sock *sk) | 523 | static inline struct tcp_sock *tcp_sk(const struct sock *sk) |
diff --git a/include/net/tcp.h b/include/net/tcp.h index bc7c134ec054..e19124b84cd2 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h | |||
@@ -350,6 +350,8 @@ extern int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
350 | extern int tcp_sendpage(struct sock *sk, struct page *page, int offset, | 350 | extern int tcp_sendpage(struct sock *sk, struct page *page, int offset, |
351 | size_t size, int flags); | 351 | size_t size, int flags); |
352 | extern void tcp_release_cb(struct sock *sk); | 352 | extern void tcp_release_cb(struct sock *sk); |
353 | extern void tcp_write_timer_handler(struct sock *sk); | ||
354 | extern void tcp_delack_timer_handler(struct sock *sk); | ||
353 | extern int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg); | 355 | extern int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg); |
354 | extern int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, | 356 | extern int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, |
355 | const struct tcphdr *th, unsigned int len); | 357 | const struct tcphdr *th, unsigned int len); |
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 27a32acfdb62..950aebfd9967 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c | |||
@@ -837,6 +837,13 @@ struct tsq_tasklet { | |||
837 | }; | 837 | }; |
838 | static DEFINE_PER_CPU(struct tsq_tasklet, tsq_tasklet); | 838 | static DEFINE_PER_CPU(struct tsq_tasklet, tsq_tasklet); |
839 | 839 | ||
840 | static void tcp_tsq_handler(struct sock *sk) | ||
841 | { | ||
842 | if ((1 << sk->sk_state) & | ||
843 | (TCPF_ESTABLISHED | TCPF_FIN_WAIT1 | TCPF_CLOSING | | ||
844 | TCPF_CLOSE_WAIT | TCPF_LAST_ACK)) | ||
845 | tcp_write_xmit(sk, tcp_current_mss(sk), 0, 0, GFP_ATOMIC); | ||
846 | } | ||
840 | /* | 847 | /* |
841 | * One tasklest per cpu tries to send more skbs. | 848 | * One tasklest per cpu tries to send more skbs. |
842 | * We run in tasklet context but need to disable irqs when | 849 | * We run in tasklet context but need to disable irqs when |
@@ -864,16 +871,10 @@ static void tcp_tasklet_func(unsigned long data) | |||
864 | bh_lock_sock(sk); | 871 | bh_lock_sock(sk); |
865 | 872 | ||
866 | if (!sock_owned_by_user(sk)) { | 873 | if (!sock_owned_by_user(sk)) { |
867 | if ((1 << sk->sk_state) & | 874 | tcp_tsq_handler(sk); |
868 | (TCPF_ESTABLISHED | TCPF_FIN_WAIT1 | | ||
869 | TCPF_CLOSING | TCPF_CLOSE_WAIT | TCPF_LAST_ACK)) | ||
870 | tcp_write_xmit(sk, | ||
871 | tcp_current_mss(sk), | ||
872 | 0, 0, | ||
873 | GFP_ATOMIC); | ||
874 | } else { | 875 | } else { |
875 | /* defer the work to tcp_release_cb() */ | 876 | /* defer the work to tcp_release_cb() */ |
876 | set_bit(TSQ_OWNED, &tp->tsq_flags); | 877 | set_bit(TCP_TSQ_DEFERRED, &tp->tsq_flags); |
877 | } | 878 | } |
878 | bh_unlock_sock(sk); | 879 | bh_unlock_sock(sk); |
879 | 880 | ||
@@ -882,6 +883,9 @@ static void tcp_tasklet_func(unsigned long data) | |||
882 | } | 883 | } |
883 | } | 884 | } |
884 | 885 | ||
886 | #define TCP_DEFERRED_ALL ((1UL << TCP_TSQ_DEFERRED) | \ | ||
887 | (1UL << TCP_WRITE_TIMER_DEFERRED) | \ | ||
888 | (1UL << TCP_DELACK_TIMER_DEFERRED)) | ||
885 | /** | 889 | /** |
886 | * tcp_release_cb - tcp release_sock() callback | 890 | * tcp_release_cb - tcp release_sock() callback |
887 | * @sk: socket | 891 | * @sk: socket |
@@ -892,16 +896,24 @@ static void tcp_tasklet_func(unsigned long data) | |||
892 | void tcp_release_cb(struct sock *sk) | 896 | void tcp_release_cb(struct sock *sk) |
893 | { | 897 | { |
894 | struct tcp_sock *tp = tcp_sk(sk); | 898 | struct tcp_sock *tp = tcp_sk(sk); |
899 | unsigned long flags, nflags; | ||
895 | 900 | ||
896 | if (test_and_clear_bit(TSQ_OWNED, &tp->tsq_flags)) { | 901 | /* perform an atomic operation only if at least one flag is set */ |
897 | if ((1 << sk->sk_state) & | 902 | do { |
898 | (TCPF_ESTABLISHED | TCPF_FIN_WAIT1 | | 903 | flags = tp->tsq_flags; |
899 | TCPF_CLOSING | TCPF_CLOSE_WAIT | TCPF_LAST_ACK)) | 904 | if (!(flags & TCP_DEFERRED_ALL)) |
900 | tcp_write_xmit(sk, | 905 | return; |
901 | tcp_current_mss(sk), | 906 | nflags = flags & ~TCP_DEFERRED_ALL; |
902 | 0, 0, | 907 | } while (cmpxchg(&tp->tsq_flags, flags, nflags) != flags); |
903 | GFP_ATOMIC); | 908 | |
904 | } | 909 | if (flags & (1UL << TCP_TSQ_DEFERRED)) |
910 | tcp_tsq_handler(sk); | ||
911 | |||
912 | if (flags & (1UL << TCP_WRITE_TIMER_DEFERRED)) | ||
913 | tcp_write_timer_handler(sk); | ||
914 | |||
915 | if (flags & (1UL << TCP_DELACK_TIMER_DEFERRED)) | ||
916 | tcp_delack_timer_handler(sk); | ||
905 | } | 917 | } |
906 | EXPORT_SYMBOL(tcp_release_cb); | 918 | EXPORT_SYMBOL(tcp_release_cb); |
907 | 919 | ||
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index e911e6c523ec..6df36ad55a38 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c | |||
@@ -32,17 +32,6 @@ int sysctl_tcp_retries2 __read_mostly = TCP_RETR2; | |||
32 | int sysctl_tcp_orphan_retries __read_mostly; | 32 | int sysctl_tcp_orphan_retries __read_mostly; |
33 | int sysctl_tcp_thin_linear_timeouts __read_mostly; | 33 | int sysctl_tcp_thin_linear_timeouts __read_mostly; |
34 | 34 | ||
35 | static void tcp_write_timer(unsigned long); | ||
36 | static void tcp_delack_timer(unsigned long); | ||
37 | static void tcp_keepalive_timer (unsigned long data); | ||
38 | |||
39 | void tcp_init_xmit_timers(struct sock *sk) | ||
40 | { | ||
41 | inet_csk_init_xmit_timers(sk, &tcp_write_timer, &tcp_delack_timer, | ||
42 | &tcp_keepalive_timer); | ||
43 | } | ||
44 | EXPORT_SYMBOL(tcp_init_xmit_timers); | ||
45 | |||
46 | static void tcp_write_err(struct sock *sk) | 35 | static void tcp_write_err(struct sock *sk) |
47 | { | 36 | { |
48 | sk->sk_err = sk->sk_err_soft ? : ETIMEDOUT; | 37 | sk->sk_err = sk->sk_err_soft ? : ETIMEDOUT; |
@@ -205,21 +194,11 @@ static int tcp_write_timeout(struct sock *sk) | |||
205 | return 0; | 194 | return 0; |
206 | } | 195 | } |
207 | 196 | ||
208 | static void tcp_delack_timer(unsigned long data) | 197 | void tcp_delack_timer_handler(struct sock *sk) |
209 | { | 198 | { |
210 | struct sock *sk = (struct sock *)data; | ||
211 | struct tcp_sock *tp = tcp_sk(sk); | 199 | struct tcp_sock *tp = tcp_sk(sk); |
212 | struct inet_connection_sock *icsk = inet_csk(sk); | 200 | struct inet_connection_sock *icsk = inet_csk(sk); |
213 | 201 | ||
214 | bh_lock_sock(sk); | ||
215 | if (sock_owned_by_user(sk)) { | ||
216 | /* Try again later. */ | ||
217 | icsk->icsk_ack.blocked = 1; | ||
218 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKLOCKED); | ||
219 | sk_reset_timer(sk, &icsk->icsk_delack_timer, jiffies + TCP_DELACK_MIN); | ||
220 | goto out_unlock; | ||
221 | } | ||
222 | |||
223 | sk_mem_reclaim_partial(sk); | 202 | sk_mem_reclaim_partial(sk); |
224 | 203 | ||
225 | if (sk->sk_state == TCP_CLOSE || !(icsk->icsk_ack.pending & ICSK_ACK_TIMER)) | 204 | if (sk->sk_state == TCP_CLOSE || !(icsk->icsk_ack.pending & ICSK_ACK_TIMER)) |
@@ -260,7 +239,21 @@ static void tcp_delack_timer(unsigned long data) | |||
260 | out: | 239 | out: |
261 | if (sk_under_memory_pressure(sk)) | 240 | if (sk_under_memory_pressure(sk)) |
262 | sk_mem_reclaim(sk); | 241 | sk_mem_reclaim(sk); |
263 | out_unlock: | 242 | } |
243 | |||
244 | static void tcp_delack_timer(unsigned long data) | ||
245 | { | ||
246 | struct sock *sk = (struct sock *)data; | ||
247 | |||
248 | bh_lock_sock(sk); | ||
249 | if (!sock_owned_by_user(sk)) { | ||
250 | tcp_delack_timer_handler(sk); | ||
251 | } else { | ||
252 | inet_csk(sk)->icsk_ack.blocked = 1; | ||
253 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKLOCKED); | ||
254 | /* deleguate our work to tcp_release_cb() */ | ||
255 | set_bit(TCP_WRITE_TIMER_DEFERRED, &tcp_sk(sk)->tsq_flags); | ||
256 | } | ||
264 | bh_unlock_sock(sk); | 257 | bh_unlock_sock(sk); |
265 | sock_put(sk); | 258 | sock_put(sk); |
266 | } | 259 | } |
@@ -450,19 +443,11 @@ out_reset_timer: | |||
450 | out:; | 443 | out:; |
451 | } | 444 | } |
452 | 445 | ||
453 | static void tcp_write_timer(unsigned long data) | 446 | void tcp_write_timer_handler(struct sock *sk) |
454 | { | 447 | { |
455 | struct sock *sk = (struct sock *)data; | ||
456 | struct inet_connection_sock *icsk = inet_csk(sk); | 448 | struct inet_connection_sock *icsk = inet_csk(sk); |
457 | int event; | 449 | int event; |
458 | 450 | ||
459 | bh_lock_sock(sk); | ||
460 | if (sock_owned_by_user(sk)) { | ||
461 | /* Try again later */ | ||
462 | sk_reset_timer(sk, &icsk->icsk_retransmit_timer, jiffies + (HZ / 20)); | ||
463 | goto out_unlock; | ||
464 | } | ||
465 | |||
466 | if (sk->sk_state == TCP_CLOSE || !icsk->icsk_pending) | 451 | if (sk->sk_state == TCP_CLOSE || !icsk->icsk_pending) |
467 | goto out; | 452 | goto out; |
468 | 453 | ||
@@ -485,7 +470,19 @@ static void tcp_write_timer(unsigned long data) | |||
485 | 470 | ||
486 | out: | 471 | out: |
487 | sk_mem_reclaim(sk); | 472 | sk_mem_reclaim(sk); |
488 | out_unlock: | 473 | } |
474 | |||
475 | static void tcp_write_timer(unsigned long data) | ||
476 | { | ||
477 | struct sock *sk = (struct sock *)data; | ||
478 | |||
479 | bh_lock_sock(sk); | ||
480 | if (!sock_owned_by_user(sk)) { | ||
481 | tcp_write_timer_handler(sk); | ||
482 | } else { | ||
483 | /* deleguate our work to tcp_release_cb() */ | ||
484 | set_bit(TCP_WRITE_TIMER_DEFERRED, &tcp_sk(sk)->tsq_flags); | ||
485 | } | ||
489 | bh_unlock_sock(sk); | 486 | bh_unlock_sock(sk); |
490 | sock_put(sk); | 487 | sock_put(sk); |
491 | } | 488 | } |
@@ -602,3 +599,10 @@ out: | |||
602 | bh_unlock_sock(sk); | 599 | bh_unlock_sock(sk); |
603 | sock_put(sk); | 600 | sock_put(sk); |
604 | } | 601 | } |
602 | |||
603 | void tcp_init_xmit_timers(struct sock *sk) | ||
604 | { | ||
605 | inet_csk_init_xmit_timers(sk, &tcp_write_timer, &tcp_delack_timer, | ||
606 | &tcp_keepalive_timer); | ||
607 | } | ||
608 | EXPORT_SYMBOL(tcp_init_xmit_timers); | ||