diff options
Diffstat (limited to 'net/ipv4/tcp.c')
| -rw-r--r-- | net/ipv4/tcp.c | 305 |
1 files changed, 180 insertions, 125 deletions
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index b0a26bb25e2e..f115ea68a4ef 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c | |||
| @@ -265,6 +265,7 @@ | |||
| 265 | #include <linux/err.h> | 265 | #include <linux/err.h> |
| 266 | #include <linux/crypto.h> | 266 | #include <linux/crypto.h> |
| 267 | #include <linux/time.h> | 267 | #include <linux/time.h> |
| 268 | #include <linux/slab.h> | ||
| 268 | 269 | ||
| 269 | #include <net/icmp.h> | 270 | #include <net/icmp.h> |
| 270 | #include <net/tcp.h> | 271 | #include <net/tcp.h> |
| @@ -314,7 +315,6 @@ struct tcp_splice_state { | |||
| 314 | * is strict, actions are advisory and have some latency. | 315 | * is strict, actions are advisory and have some latency. |
| 315 | */ | 316 | */ |
| 316 | int tcp_memory_pressure __read_mostly; | 317 | int tcp_memory_pressure __read_mostly; |
| 317 | |||
| 318 | EXPORT_SYMBOL(tcp_memory_pressure); | 318 | EXPORT_SYMBOL(tcp_memory_pressure); |
| 319 | 319 | ||
| 320 | void tcp_enter_memory_pressure(struct sock *sk) | 320 | void tcp_enter_memory_pressure(struct sock *sk) |
| @@ -324,7 +324,6 @@ void tcp_enter_memory_pressure(struct sock *sk) | |||
| 324 | tcp_memory_pressure = 1; | 324 | tcp_memory_pressure = 1; |
| 325 | } | 325 | } |
| 326 | } | 326 | } |
| 327 | |||
| 328 | EXPORT_SYMBOL(tcp_enter_memory_pressure); | 327 | EXPORT_SYMBOL(tcp_enter_memory_pressure); |
| 329 | 328 | ||
| 330 | /* Convert seconds to retransmits based on initial and max timeout */ | 329 | /* Convert seconds to retransmits based on initial and max timeout */ |
| @@ -377,7 +376,7 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait) | |||
| 377 | struct sock *sk = sock->sk; | 376 | struct sock *sk = sock->sk; |
| 378 | struct tcp_sock *tp = tcp_sk(sk); | 377 | struct tcp_sock *tp = tcp_sk(sk); |
| 379 | 378 | ||
| 380 | sock_poll_wait(file, sk->sk_sleep, wait); | 379 | sock_poll_wait(file, sk_sleep(sk), wait); |
| 381 | if (sk->sk_state == TCP_LISTEN) | 380 | if (sk->sk_state == TCP_LISTEN) |
| 382 | return inet_csk_listen_poll(sk); | 381 | return inet_csk_listen_poll(sk); |
| 383 | 382 | ||
| @@ -387,8 +386,6 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait) | |||
| 387 | */ | 386 | */ |
| 388 | 387 | ||
| 389 | mask = 0; | 388 | mask = 0; |
| 390 | if (sk->sk_err) | ||
| 391 | mask = POLLERR; | ||
| 392 | 389 | ||
| 393 | /* | 390 | /* |
| 394 | * POLLHUP is certainly not done right. But poll() doesn't | 391 | * POLLHUP is certainly not done right. But poll() doesn't |
| @@ -429,7 +426,7 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait) | |||
| 429 | if (tp->urg_seq == tp->copied_seq && | 426 | if (tp->urg_seq == tp->copied_seq && |
| 430 | !sock_flag(sk, SOCK_URGINLINE) && | 427 | !sock_flag(sk, SOCK_URGINLINE) && |
| 431 | tp->urg_data) | 428 | tp->urg_data) |
| 432 | target--; | 429 | target++; |
| 433 | 430 | ||
| 434 | /* Potential race condition. If read of tp below will | 431 | /* Potential race condition. If read of tp below will |
| 435 | * escape above sk->sk_state, we can be illegally awaken | 432 | * escape above sk->sk_state, we can be illegally awaken |
| @@ -452,13 +449,20 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait) | |||
| 452 | if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) | 449 | if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) |
| 453 | mask |= POLLOUT | POLLWRNORM; | 450 | mask |= POLLOUT | POLLWRNORM; |
| 454 | } | 451 | } |
| 455 | } | 452 | } else |
| 453 | mask |= POLLOUT | POLLWRNORM; | ||
| 456 | 454 | ||
| 457 | if (tp->urg_data & TCP_URG_VALID) | 455 | if (tp->urg_data & TCP_URG_VALID) |
| 458 | mask |= POLLPRI; | 456 | mask |= POLLPRI; |
| 459 | } | 457 | } |
| 458 | /* This barrier is coupled with smp_wmb() in tcp_reset() */ | ||
| 459 | smp_rmb(); | ||
| 460 | if (sk->sk_err) | ||
| 461 | mask |= POLLERR; | ||
| 462 | |||
| 460 | return mask; | 463 | return mask; |
| 461 | } | 464 | } |
| 465 | EXPORT_SYMBOL(tcp_poll); | ||
| 462 | 466 | ||
| 463 | int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg) | 467 | int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg) |
| 464 | { | 468 | { |
| @@ -507,10 +511,11 @@ int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg) | |||
| 507 | 511 | ||
| 508 | return put_user(answ, (int __user *)arg); | 512 | return put_user(answ, (int __user *)arg); |
| 509 | } | 513 | } |
| 514 | EXPORT_SYMBOL(tcp_ioctl); | ||
| 510 | 515 | ||
| 511 | static inline void tcp_mark_push(struct tcp_sock *tp, struct sk_buff *skb) | 516 | static inline void tcp_mark_push(struct tcp_sock *tp, struct sk_buff *skb) |
| 512 | { | 517 | { |
| 513 | TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH; | 518 | TCP_SKB_CB(skb)->flags |= TCPHDR_PSH; |
| 514 | tp->pushed_seq = tp->write_seq; | 519 | tp->pushed_seq = tp->write_seq; |
| 515 | } | 520 | } |
| 516 | 521 | ||
| @@ -526,7 +531,7 @@ static inline void skb_entail(struct sock *sk, struct sk_buff *skb) | |||
| 526 | 531 | ||
| 527 | skb->csum = 0; | 532 | skb->csum = 0; |
| 528 | tcb->seq = tcb->end_seq = tp->write_seq; | 533 | tcb->seq = tcb->end_seq = tp->write_seq; |
| 529 | tcb->flags = TCPCB_FLAG_ACK; | 534 | tcb->flags = TCPHDR_ACK; |
| 530 | tcb->sacked = 0; | 535 | tcb->sacked = 0; |
| 531 | skb_header_release(skb); | 536 | skb_header_release(skb); |
| 532 | tcp_add_write_queue_tail(sk, skb); | 537 | tcp_add_write_queue_tail(sk, skb); |
| @@ -536,8 +541,7 @@ static inline void skb_entail(struct sock *sk, struct sk_buff *skb) | |||
| 536 | tp->nonagle &= ~TCP_NAGLE_PUSH; | 541 | tp->nonagle &= ~TCP_NAGLE_PUSH; |
| 537 | } | 542 | } |
| 538 | 543 | ||
| 539 | static inline void tcp_mark_urg(struct tcp_sock *tp, int flags, | 544 | static inline void tcp_mark_urg(struct tcp_sock *tp, int flags) |
| 540 | struct sk_buff *skb) | ||
| 541 | { | 545 | { |
| 542 | if (flags & MSG_OOB) | 546 | if (flags & MSG_OOB) |
| 543 | tp->snd_up = tp->write_seq; | 547 | tp->snd_up = tp->write_seq; |
| @@ -546,13 +550,13 @@ static inline void tcp_mark_urg(struct tcp_sock *tp, int flags, | |||
| 546 | static inline void tcp_push(struct sock *sk, int flags, int mss_now, | 550 | static inline void tcp_push(struct sock *sk, int flags, int mss_now, |
| 547 | int nonagle) | 551 | int nonagle) |
| 548 | { | 552 | { |
| 549 | struct tcp_sock *tp = tcp_sk(sk); | ||
| 550 | |||
| 551 | if (tcp_send_head(sk)) { | 553 | if (tcp_send_head(sk)) { |
| 552 | struct sk_buff *skb = tcp_write_queue_tail(sk); | 554 | struct tcp_sock *tp = tcp_sk(sk); |
| 555 | |||
| 553 | if (!(flags & MSG_MORE) || forced_push(tp)) | 556 | if (!(flags & MSG_MORE) || forced_push(tp)) |
| 554 | tcp_mark_push(tp, skb); | 557 | tcp_mark_push(tp, tcp_write_queue_tail(sk)); |
| 555 | tcp_mark_urg(tp, flags, skb); | 558 | |
| 559 | tcp_mark_urg(tp, flags); | ||
| 556 | __tcp_push_pending_frames(sk, mss_now, | 560 | __tcp_push_pending_frames(sk, mss_now, |
| 557 | (flags & MSG_MORE) ? TCP_NAGLE_CORK : nonagle); | 561 | (flags & MSG_MORE) ? TCP_NAGLE_CORK : nonagle); |
| 558 | } | 562 | } |
| @@ -608,6 +612,7 @@ ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos, | |||
| 608 | ssize_t spliced; | 612 | ssize_t spliced; |
| 609 | int ret; | 613 | int ret; |
| 610 | 614 | ||
| 615 | sock_rps_record_flow(sk); | ||
| 611 | /* | 616 | /* |
| 612 | * We can't seek on a socket input | 617 | * We can't seek on a socket input |
| 613 | */ | 618 | */ |
| @@ -675,6 +680,7 @@ ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos, | |||
| 675 | 680 | ||
| 676 | return ret; | 681 | return ret; |
| 677 | } | 682 | } |
| 683 | EXPORT_SYMBOL(tcp_splice_read); | ||
| 678 | 684 | ||
| 679 | struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp) | 685 | struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp) |
| 680 | { | 686 | { |
| @@ -815,7 +821,7 @@ new_segment: | |||
| 815 | skb_shinfo(skb)->gso_segs = 0; | 821 | skb_shinfo(skb)->gso_segs = 0; |
| 816 | 822 | ||
| 817 | if (!copied) | 823 | if (!copied) |
| 818 | TCP_SKB_CB(skb)->flags &= ~TCPCB_FLAG_PSH; | 824 | TCP_SKB_CB(skb)->flags &= ~TCPHDR_PSH; |
| 819 | 825 | ||
| 820 | copied += copy; | 826 | copied += copy; |
| 821 | poffset += copy; | 827 | poffset += copy; |
| @@ -856,15 +862,15 @@ out_err: | |||
| 856 | return sk_stream_error(sk, flags, err); | 862 | return sk_stream_error(sk, flags, err); |
| 857 | } | 863 | } |
| 858 | 864 | ||
| 859 | ssize_t tcp_sendpage(struct socket *sock, struct page *page, int offset, | 865 | int tcp_sendpage(struct sock *sk, struct page *page, int offset, |
| 860 | size_t size, int flags) | 866 | size_t size, int flags) |
| 861 | { | 867 | { |
| 862 | ssize_t res; | 868 | ssize_t res; |
| 863 | struct sock *sk = sock->sk; | ||
| 864 | 869 | ||
| 865 | if (!(sk->sk_route_caps & NETIF_F_SG) || | 870 | if (!(sk->sk_route_caps & NETIF_F_SG) || |
| 866 | !(sk->sk_route_caps & NETIF_F_ALL_CSUM)) | 871 | !(sk->sk_route_caps & NETIF_F_ALL_CSUM)) |
| 867 | return sock_no_sendpage(sock, page, offset, size, flags); | 872 | return sock_no_sendpage(sk->sk_socket, page, offset, size, |
| 873 | flags); | ||
| 868 | 874 | ||
| 869 | lock_sock(sk); | 875 | lock_sock(sk); |
| 870 | TCP_CHECK_TIMER(sk); | 876 | TCP_CHECK_TIMER(sk); |
| @@ -873,16 +879,17 @@ ssize_t tcp_sendpage(struct socket *sock, struct page *page, int offset, | |||
| 873 | release_sock(sk); | 879 | release_sock(sk); |
| 874 | return res; | 880 | return res; |
| 875 | } | 881 | } |
| 882 | EXPORT_SYMBOL(tcp_sendpage); | ||
| 876 | 883 | ||
| 877 | #define TCP_PAGE(sk) (sk->sk_sndmsg_page) | 884 | #define TCP_PAGE(sk) (sk->sk_sndmsg_page) |
| 878 | #define TCP_OFF(sk) (sk->sk_sndmsg_off) | 885 | #define TCP_OFF(sk) (sk->sk_sndmsg_off) |
| 879 | 886 | ||
| 880 | static inline int select_size(struct sock *sk) | 887 | static inline int select_size(struct sock *sk, int sg) |
| 881 | { | 888 | { |
| 882 | struct tcp_sock *tp = tcp_sk(sk); | 889 | struct tcp_sock *tp = tcp_sk(sk); |
| 883 | int tmp = tp->mss_cache; | 890 | int tmp = tp->mss_cache; |
| 884 | 891 | ||
| 885 | if (sk->sk_route_caps & NETIF_F_SG) { | 892 | if (sg) { |
| 886 | if (sk_can_gso(sk)) | 893 | if (sk_can_gso(sk)) |
| 887 | tmp = 0; | 894 | tmp = 0; |
| 888 | else { | 895 | else { |
| @@ -897,16 +904,15 @@ static inline int select_size(struct sock *sk) | |||
| 897 | return tmp; | 904 | return tmp; |
| 898 | } | 905 | } |
| 899 | 906 | ||
| 900 | int tcp_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, | 907 | int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, |
| 901 | size_t size) | 908 | size_t size) |
| 902 | { | 909 | { |
| 903 | struct sock *sk = sock->sk; | ||
| 904 | struct iovec *iov; | 910 | struct iovec *iov; |
| 905 | struct tcp_sock *tp = tcp_sk(sk); | 911 | struct tcp_sock *tp = tcp_sk(sk); |
| 906 | struct sk_buff *skb; | 912 | struct sk_buff *skb; |
| 907 | int iovlen, flags; | 913 | int iovlen, flags; |
| 908 | int mss_now, size_goal; | 914 | int mss_now, size_goal; |
| 909 | int err, copied; | 915 | int sg, err, copied; |
| 910 | long timeo; | 916 | long timeo; |
| 911 | 917 | ||
| 912 | lock_sock(sk); | 918 | lock_sock(sk); |
| @@ -934,8 +940,10 @@ int tcp_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, | |||
| 934 | if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) | 940 | if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) |
| 935 | goto out_err; | 941 | goto out_err; |
| 936 | 942 | ||
| 943 | sg = sk->sk_route_caps & NETIF_F_SG; | ||
| 944 | |||
| 937 | while (--iovlen >= 0) { | 945 | while (--iovlen >= 0) { |
| 938 | int seglen = iov->iov_len; | 946 | size_t seglen = iov->iov_len; |
| 939 | unsigned char __user *from = iov->iov_base; | 947 | unsigned char __user *from = iov->iov_base; |
| 940 | 948 | ||
| 941 | iov++; | 949 | iov++; |
| @@ -959,8 +967,9 @@ new_segment: | |||
| 959 | if (!sk_stream_memory_free(sk)) | 967 | if (!sk_stream_memory_free(sk)) |
| 960 | goto wait_for_sndbuf; | 968 | goto wait_for_sndbuf; |
| 961 | 969 | ||
| 962 | skb = sk_stream_alloc_skb(sk, select_size(sk), | 970 | skb = sk_stream_alloc_skb(sk, |
| 963 | sk->sk_allocation); | 971 | select_size(sk, sg), |
| 972 | sk->sk_allocation); | ||
| 964 | if (!skb) | 973 | if (!skb) |
| 965 | goto wait_for_memory; | 974 | goto wait_for_memory; |
| 966 | 975 | ||
| @@ -997,9 +1006,7 @@ new_segment: | |||
| 997 | /* We can extend the last page | 1006 | /* We can extend the last page |
| 998 | * fragment. */ | 1007 | * fragment. */ |
| 999 | merge = 1; | 1008 | merge = 1; |
| 1000 | } else if (i == MAX_SKB_FRAGS || | 1009 | } else if (i == MAX_SKB_FRAGS || !sg) { |
| 1001 | (!i && | ||
| 1002 | !(sk->sk_route_caps & NETIF_F_SG))) { | ||
| 1003 | /* Need to add new fragment and cannot | 1010 | /* Need to add new fragment and cannot |
| 1004 | * do this because interface is non-SG, | 1011 | * do this because interface is non-SG, |
| 1005 | * or because all the page slots are | 1012 | * or because all the page slots are |
| @@ -1060,7 +1067,7 @@ new_segment: | |||
| 1060 | } | 1067 | } |
| 1061 | 1068 | ||
| 1062 | if (!copied) | 1069 | if (!copied) |
| 1063 | TCP_SKB_CB(skb)->flags &= ~TCPCB_FLAG_PSH; | 1070 | TCP_SKB_CB(skb)->flags &= ~TCPHDR_PSH; |
| 1064 | 1071 | ||
| 1065 | tp->write_seq += copy; | 1072 | tp->write_seq += copy; |
| 1066 | TCP_SKB_CB(skb)->end_seq += copy; | 1073 | TCP_SKB_CB(skb)->end_seq += copy; |
| @@ -1120,6 +1127,7 @@ out_err: | |||
| 1120 | release_sock(sk); | 1127 | release_sock(sk); |
| 1121 | return err; | 1128 | return err; |
| 1122 | } | 1129 | } |
| 1130 | EXPORT_SYMBOL(tcp_sendmsg); | ||
| 1123 | 1131 | ||
| 1124 | /* | 1132 | /* |
| 1125 | * Handle reading urgent data. BSD has very simple semantics for | 1133 | * Handle reading urgent data. BSD has very simple semantics for |
| @@ -1254,6 +1262,39 @@ static void tcp_prequeue_process(struct sock *sk) | |||
| 1254 | tp->ucopy.memory = 0; | 1262 | tp->ucopy.memory = 0; |
| 1255 | } | 1263 | } |
| 1256 | 1264 | ||
| 1265 | #ifdef CONFIG_NET_DMA | ||
| 1266 | static void tcp_service_net_dma(struct sock *sk, bool wait) | ||
| 1267 | { | ||
| 1268 | dma_cookie_t done, used; | ||
| 1269 | dma_cookie_t last_issued; | ||
| 1270 | struct tcp_sock *tp = tcp_sk(sk); | ||
| 1271 | |||
| 1272 | if (!tp->ucopy.dma_chan) | ||
| 1273 | return; | ||
| 1274 | |||
| 1275 | last_issued = tp->ucopy.dma_cookie; | ||
| 1276 | dma_async_memcpy_issue_pending(tp->ucopy.dma_chan); | ||
| 1277 | |||
| 1278 | do { | ||
| 1279 | if (dma_async_memcpy_complete(tp->ucopy.dma_chan, | ||
| 1280 | last_issued, &done, | ||
| 1281 | &used) == DMA_SUCCESS) { | ||
| 1282 | /* Safe to free early-copied skbs now */ | ||
| 1283 | __skb_queue_purge(&sk->sk_async_wait_queue); | ||
| 1284 | break; | ||
| 1285 | } else { | ||
| 1286 | struct sk_buff *skb; | ||
| 1287 | while ((skb = skb_peek(&sk->sk_async_wait_queue)) && | ||
| 1288 | (dma_async_is_complete(skb->dma_cookie, done, | ||
| 1289 | used) == DMA_SUCCESS)) { | ||
| 1290 | __skb_dequeue(&sk->sk_async_wait_queue); | ||
| 1291 | kfree_skb(skb); | ||
| 1292 | } | ||
| 1293 | } | ||
| 1294 | } while (wait); | ||
| 1295 | } | ||
| 1296 | #endif | ||
| 1297 | |||
| 1257 | static inline struct sk_buff *tcp_recv_skb(struct sock *sk, u32 seq, u32 *off) | 1298 | static inline struct sk_buff *tcp_recv_skb(struct sock *sk, u32 seq, u32 *off) |
| 1258 | { | 1299 | { |
| 1259 | struct sk_buff *skb; | 1300 | struct sk_buff *skb; |
| @@ -1335,6 +1376,7 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, | |||
| 1335 | sk_eat_skb(sk, skb, 0); | 1376 | sk_eat_skb(sk, skb, 0); |
| 1336 | if (!desc->count) | 1377 | if (!desc->count) |
| 1337 | break; | 1378 | break; |
| 1379 | tp->copied_seq = seq; | ||
| 1338 | } | 1380 | } |
| 1339 | tp->copied_seq = seq; | 1381 | tp->copied_seq = seq; |
| 1340 | 1382 | ||
| @@ -1345,6 +1387,7 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, | |||
| 1345 | tcp_cleanup_rbuf(sk, copied); | 1387 | tcp_cleanup_rbuf(sk, copied); |
| 1346 | return copied; | 1388 | return copied; |
| 1347 | } | 1389 | } |
| 1390 | EXPORT_SYMBOL(tcp_read_sock); | ||
| 1348 | 1391 | ||
| 1349 | /* | 1392 | /* |
| 1350 | * This routine copies from a sock struct into the user buffer. | 1393 | * This routine copies from a sock struct into the user buffer. |
| @@ -1546,6 +1589,10 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
| 1546 | /* __ Set realtime policy in scheduler __ */ | 1589 | /* __ Set realtime policy in scheduler __ */ |
| 1547 | } | 1590 | } |
| 1548 | 1591 | ||
| 1592 | #ifdef CONFIG_NET_DMA | ||
| 1593 | if (tp->ucopy.dma_chan) | ||
| 1594 | dma_async_memcpy_issue_pending(tp->ucopy.dma_chan); | ||
| 1595 | #endif | ||
| 1549 | if (copied >= target) { | 1596 | if (copied >= target) { |
| 1550 | /* Do not sleep, just process backlog. */ | 1597 | /* Do not sleep, just process backlog. */ |
| 1551 | release_sock(sk); | 1598 | release_sock(sk); |
| @@ -1554,6 +1601,7 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
| 1554 | sk_wait_data(sk, &timeo); | 1601 | sk_wait_data(sk, &timeo); |
| 1555 | 1602 | ||
| 1556 | #ifdef CONFIG_NET_DMA | 1603 | #ifdef CONFIG_NET_DMA |
| 1604 | tcp_service_net_dma(sk, false); /* Don't block */ | ||
| 1557 | tp->ucopy.wakeup = 0; | 1605 | tp->ucopy.wakeup = 0; |
| 1558 | #endif | 1606 | #endif |
| 1559 | 1607 | ||
| @@ -1633,6 +1681,9 @@ do_prequeue: | |||
| 1633 | copied = -EFAULT; | 1681 | copied = -EFAULT; |
| 1634 | break; | 1682 | break; |
| 1635 | } | 1683 | } |
| 1684 | |||
| 1685 | dma_async_memcpy_issue_pending(tp->ucopy.dma_chan); | ||
| 1686 | |||
| 1636 | if ((offset + used) == skb->len) | 1687 | if ((offset + used) == skb->len) |
| 1637 | copied_early = 1; | 1688 | copied_early = 1; |
| 1638 | 1689 | ||
| @@ -1702,27 +1753,9 @@ skip_copy: | |||
| 1702 | } | 1753 | } |
| 1703 | 1754 | ||
| 1704 | #ifdef CONFIG_NET_DMA | 1755 | #ifdef CONFIG_NET_DMA |
| 1705 | if (tp->ucopy.dma_chan) { | 1756 | tcp_service_net_dma(sk, true); /* Wait for queue to drain */ |
| 1706 | dma_cookie_t done, used; | 1757 | tp->ucopy.dma_chan = NULL; |
| 1707 | |||
| 1708 | dma_async_memcpy_issue_pending(tp->ucopy.dma_chan); | ||
| 1709 | |||
| 1710 | while (dma_async_memcpy_complete(tp->ucopy.dma_chan, | ||
| 1711 | tp->ucopy.dma_cookie, &done, | ||
| 1712 | &used) == DMA_IN_PROGRESS) { | ||
| 1713 | /* do partial cleanup of sk_async_wait_queue */ | ||
| 1714 | while ((skb = skb_peek(&sk->sk_async_wait_queue)) && | ||
| 1715 | (dma_async_is_complete(skb->dma_cookie, done, | ||
| 1716 | used) == DMA_SUCCESS)) { | ||
| 1717 | __skb_dequeue(&sk->sk_async_wait_queue); | ||
| 1718 | kfree_skb(skb); | ||
| 1719 | } | ||
| 1720 | } | ||
| 1721 | 1758 | ||
| 1722 | /* Safe to free early-copied skbs now */ | ||
| 1723 | __skb_queue_purge(&sk->sk_async_wait_queue); | ||
| 1724 | tp->ucopy.dma_chan = NULL; | ||
| 1725 | } | ||
| 1726 | if (tp->ucopy.pinned_list) { | 1759 | if (tp->ucopy.pinned_list) { |
| 1727 | dma_unpin_iovec_pages(tp->ucopy.pinned_list); | 1760 | dma_unpin_iovec_pages(tp->ucopy.pinned_list); |
| 1728 | tp->ucopy.pinned_list = NULL; | 1761 | tp->ucopy.pinned_list = NULL; |
| @@ -1749,6 +1782,7 @@ recv_urg: | |||
| 1749 | err = tcp_recv_urg(sk, msg, len, flags); | 1782 | err = tcp_recv_urg(sk, msg, len, flags); |
| 1750 | goto out; | 1783 | goto out; |
| 1751 | } | 1784 | } |
| 1785 | EXPORT_SYMBOL(tcp_recvmsg); | ||
| 1752 | 1786 | ||
| 1753 | void tcp_set_state(struct sock *sk, int state) | 1787 | void tcp_set_state(struct sock *sk, int state) |
| 1754 | { | 1788 | { |
| @@ -1841,6 +1875,7 @@ void tcp_shutdown(struct sock *sk, int how) | |||
| 1841 | tcp_send_fin(sk); | 1875 | tcp_send_fin(sk); |
| 1842 | } | 1876 | } |
| 1843 | } | 1877 | } |
| 1878 | EXPORT_SYMBOL(tcp_shutdown); | ||
| 1844 | 1879 | ||
| 1845 | void tcp_close(struct sock *sk, long timeout) | 1880 | void tcp_close(struct sock *sk, long timeout) |
| 1846 | { | 1881 | { |
| @@ -1873,6 +1908,10 @@ void tcp_close(struct sock *sk, long timeout) | |||
| 1873 | 1908 | ||
| 1874 | sk_mem_reclaim(sk); | 1909 | sk_mem_reclaim(sk); |
| 1875 | 1910 | ||
| 1911 | /* If socket has been already reset (e.g. in tcp_reset()) - kill it. */ | ||
| 1912 | if (sk->sk_state == TCP_CLOSE) | ||
| 1913 | goto adjudge_to_death; | ||
| 1914 | |||
| 1876 | /* As outlined in RFC 2525, section 2.17, we send a RST here because | 1915 | /* As outlined in RFC 2525, section 2.17, we send a RST here because |
| 1877 | * data was lost. To witness the awful effects of the old behavior of | 1916 | * data was lost. To witness the awful effects of the old behavior of |
| 1878 | * always doing a FIN, run an older 2.1.x kernel or 2.0.x, start a bulk | 1917 | * always doing a FIN, run an older 2.1.x kernel or 2.0.x, start a bulk |
| @@ -1976,11 +2015,8 @@ adjudge_to_death: | |||
| 1976 | } | 2015 | } |
| 1977 | } | 2016 | } |
| 1978 | if (sk->sk_state != TCP_CLOSE) { | 2017 | if (sk->sk_state != TCP_CLOSE) { |
| 1979 | int orphan_count = percpu_counter_read_positive( | ||
| 1980 | sk->sk_prot->orphan_count); | ||
| 1981 | |||
| 1982 | sk_mem_reclaim(sk); | 2018 | sk_mem_reclaim(sk); |
| 1983 | if (tcp_too_many_orphans(sk, orphan_count)) { | 2019 | if (tcp_too_many_orphans(sk, 0)) { |
| 1984 | if (net_ratelimit()) | 2020 | if (net_ratelimit()) |
| 1985 | printk(KERN_INFO "TCP: too many of orphaned " | 2021 | printk(KERN_INFO "TCP: too many of orphaned " |
| 1986 | "sockets\n"); | 2022 | "sockets\n"); |
| @@ -2000,6 +2036,7 @@ out: | |||
| 2000 | local_bh_enable(); | 2036 | local_bh_enable(); |
| 2001 | sock_put(sk); | 2037 | sock_put(sk); |
| 2002 | } | 2038 | } |
| 2039 | EXPORT_SYMBOL(tcp_close); | ||
| 2003 | 2040 | ||
| 2004 | /* These states need RST on ABORT according to RFC793 */ | 2041 | /* These states need RST on ABORT according to RFC793 */ |
| 2005 | 2042 | ||
| @@ -2073,6 +2110,7 @@ int tcp_disconnect(struct sock *sk, int flags) | |||
| 2073 | sk->sk_error_report(sk); | 2110 | sk->sk_error_report(sk); |
| 2074 | return err; | 2111 | return err; |
| 2075 | } | 2112 | } |
| 2113 | EXPORT_SYMBOL(tcp_disconnect); | ||
| 2076 | 2114 | ||
| 2077 | /* | 2115 | /* |
| 2078 | * Socket option code for TCP. | 2116 | * Socket option code for TCP. |
| @@ -2150,6 +2188,8 @@ static int do_tcp_setsockopt(struct sock *sk, int level, | |||
| 2150 | GFP_KERNEL); | 2188 | GFP_KERNEL); |
| 2151 | if (cvp == NULL) | 2189 | if (cvp == NULL) |
| 2152 | return -ENOMEM; | 2190 | return -ENOMEM; |
| 2191 | |||
| 2192 | kref_init(&cvp->kref); | ||
| 2153 | } | 2193 | } |
| 2154 | lock_sock(sk); | 2194 | lock_sock(sk); |
| 2155 | tp->rx_opt.cookie_in_always = | 2195 | tp->rx_opt.cookie_in_always = |
| @@ -2164,12 +2204,11 @@ static int do_tcp_setsockopt(struct sock *sk, int level, | |||
| 2164 | */ | 2204 | */ |
| 2165 | kref_put(&tp->cookie_values->kref, | 2205 | kref_put(&tp->cookie_values->kref, |
| 2166 | tcp_cookie_values_release); | 2206 | tcp_cookie_values_release); |
| 2167 | kref_init(&cvp->kref); | ||
| 2168 | tp->cookie_values = cvp; | ||
| 2169 | } else { | 2207 | } else { |
| 2170 | cvp = tp->cookie_values; | 2208 | cvp = tp->cookie_values; |
| 2171 | } | 2209 | } |
| 2172 | } | 2210 | } |
| 2211 | |||
| 2173 | if (cvp != NULL) { | 2212 | if (cvp != NULL) { |
| 2174 | cvp->cookie_desired = ctd.tcpct_cookie_desired; | 2213 | cvp->cookie_desired = ctd.tcpct_cookie_desired; |
| 2175 | 2214 | ||
| @@ -2183,6 +2222,8 @@ static int do_tcp_setsockopt(struct sock *sk, int level, | |||
| 2183 | cvp->s_data_desired = ctd.tcpct_s_data_desired; | 2222 | cvp->s_data_desired = ctd.tcpct_s_data_desired; |
| 2184 | cvp->s_data_constant = 0; /* false */ | 2223 | cvp->s_data_constant = 0; /* false */ |
| 2185 | } | 2224 | } |
| 2225 | |||
| 2226 | tp->cookie_values = cvp; | ||
| 2186 | } | 2227 | } |
| 2187 | release_sock(sk); | 2228 | release_sock(sk); |
| 2188 | return err; | 2229 | return err; |
| @@ -2190,7 +2231,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level, | |||
| 2190 | default: | 2231 | default: |
| 2191 | /* fallthru */ | 2232 | /* fallthru */ |
| 2192 | break; | 2233 | break; |
| 2193 | }; | 2234 | } |
| 2194 | 2235 | ||
| 2195 | if (optlen < sizeof(int)) | 2236 | if (optlen < sizeof(int)) |
| 2196 | return -EINVAL; | 2237 | return -EINVAL; |
| @@ -2229,6 +2270,20 @@ static int do_tcp_setsockopt(struct sock *sk, int level, | |||
| 2229 | } | 2270 | } |
| 2230 | break; | 2271 | break; |
| 2231 | 2272 | ||
| 2273 | case TCP_THIN_LINEAR_TIMEOUTS: | ||
| 2274 | if (val < 0 || val > 1) | ||
| 2275 | err = -EINVAL; | ||
| 2276 | else | ||
| 2277 | tp->thin_lto = val; | ||
| 2278 | break; | ||
| 2279 | |||
| 2280 | case TCP_THIN_DUPACK: | ||
| 2281 | if (val < 0 || val > 1) | ||
| 2282 | err = -EINVAL; | ||
| 2283 | else | ||
| 2284 | tp->thin_dupack = val; | ||
| 2285 | break; | ||
| 2286 | |||
| 2232 | case TCP_CORK: | 2287 | case TCP_CORK: |
| 2233 | /* When set indicates to always queue non-full frames. | 2288 | /* When set indicates to always queue non-full frames. |
| 2234 | * Later the user clears this option and we transmit | 2289 | * Later the user clears this option and we transmit |
| @@ -2259,7 +2314,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level, | |||
| 2259 | if (sock_flag(sk, SOCK_KEEPOPEN) && | 2314 | if (sock_flag(sk, SOCK_KEEPOPEN) && |
| 2260 | !((1 << sk->sk_state) & | 2315 | !((1 << sk->sk_state) & |
| 2261 | (TCPF_CLOSE | TCPF_LISTEN))) { | 2316 | (TCPF_CLOSE | TCPF_LISTEN))) { |
| 2262 | __u32 elapsed = tcp_time_stamp - tp->rcv_tstamp; | 2317 | u32 elapsed = keepalive_time_elapsed(tp); |
| 2263 | if (tp->keepalive_time > elapsed) | 2318 | if (tp->keepalive_time > elapsed) |
| 2264 | elapsed = tp->keepalive_time - elapsed; | 2319 | elapsed = tp->keepalive_time - elapsed; |
| 2265 | else | 2320 | else |
| @@ -2357,6 +2412,7 @@ int tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval, | |||
| 2357 | optval, optlen); | 2412 | optval, optlen); |
| 2358 | return do_tcp_setsockopt(sk, level, optname, optval, optlen); | 2413 | return do_tcp_setsockopt(sk, level, optname, optval, optlen); |
| 2359 | } | 2414 | } |
| 2415 | EXPORT_SYMBOL(tcp_setsockopt); | ||
| 2360 | 2416 | ||
| 2361 | #ifdef CONFIG_COMPAT | 2417 | #ifdef CONFIG_COMPAT |
| 2362 | int compat_tcp_setsockopt(struct sock *sk, int level, int optname, | 2418 | int compat_tcp_setsockopt(struct sock *sk, int level, int optname, |
| @@ -2367,7 +2423,6 @@ int compat_tcp_setsockopt(struct sock *sk, int level, int optname, | |||
| 2367 | optval, optlen); | 2423 | optval, optlen); |
| 2368 | return do_tcp_setsockopt(sk, level, optname, optval, optlen); | 2424 | return do_tcp_setsockopt(sk, level, optname, optval, optlen); |
| 2369 | } | 2425 | } |
| 2370 | |||
| 2371 | EXPORT_SYMBOL(compat_tcp_setsockopt); | 2426 | EXPORT_SYMBOL(compat_tcp_setsockopt); |
| 2372 | #endif | 2427 | #endif |
| 2373 | 2428 | ||
| @@ -2433,7 +2488,6 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) | |||
| 2433 | 2488 | ||
| 2434 | info->tcpi_total_retrans = tp->total_retrans; | 2489 | info->tcpi_total_retrans = tp->total_retrans; |
| 2435 | } | 2490 | } |
| 2436 | |||
| 2437 | EXPORT_SYMBOL_GPL(tcp_get_info); | 2491 | EXPORT_SYMBOL_GPL(tcp_get_info); |
| 2438 | 2492 | ||
| 2439 | static int do_tcp_getsockopt(struct sock *sk, int level, | 2493 | static int do_tcp_getsockopt(struct sock *sk, int level, |
| @@ -2551,6 +2605,12 @@ static int do_tcp_getsockopt(struct sock *sk, int level, | |||
| 2551 | return -EFAULT; | 2605 | return -EFAULT; |
| 2552 | return 0; | 2606 | return 0; |
| 2553 | } | 2607 | } |
| 2608 | case TCP_THIN_LINEAR_TIMEOUTS: | ||
| 2609 | val = tp->thin_lto; | ||
| 2610 | break; | ||
| 2611 | case TCP_THIN_DUPACK: | ||
| 2612 | val = tp->thin_dupack; | ||
| 2613 | break; | ||
| 2554 | default: | 2614 | default: |
| 2555 | return -ENOPROTOOPT; | 2615 | return -ENOPROTOOPT; |
| 2556 | } | 2616 | } |
| @@ -2572,6 +2632,7 @@ int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, | |||
| 2572 | optval, optlen); | 2632 | optval, optlen); |
| 2573 | return do_tcp_getsockopt(sk, level, optname, optval, optlen); | 2633 | return do_tcp_getsockopt(sk, level, optname, optval, optlen); |
| 2574 | } | 2634 | } |
| 2635 | EXPORT_SYMBOL(tcp_getsockopt); | ||
| 2575 | 2636 | ||
| 2576 | #ifdef CONFIG_COMPAT | 2637 | #ifdef CONFIG_COMPAT |
| 2577 | int compat_tcp_getsockopt(struct sock *sk, int level, int optname, | 2638 | int compat_tcp_getsockopt(struct sock *sk, int level, int optname, |
| @@ -2582,7 +2643,6 @@ int compat_tcp_getsockopt(struct sock *sk, int level, int optname, | |||
| 2582 | optval, optlen); | 2643 | optval, optlen); |
| 2583 | return do_tcp_getsockopt(sk, level, optname, optval, optlen); | 2644 | return do_tcp_getsockopt(sk, level, optname, optval, optlen); |
| 2584 | } | 2645 | } |
| 2585 | |||
| 2586 | EXPORT_SYMBOL(compat_tcp_getsockopt); | 2646 | EXPORT_SYMBOL(compat_tcp_getsockopt); |
| 2587 | #endif | 2647 | #endif |
| 2588 | 2648 | ||
| @@ -2682,7 +2742,7 @@ struct sk_buff **tcp_gro_receive(struct sk_buff **head, struct sk_buff *skb) | |||
| 2682 | struct tcphdr *th2; | 2742 | struct tcphdr *th2; |
| 2683 | unsigned int len; | 2743 | unsigned int len; |
| 2684 | unsigned int thlen; | 2744 | unsigned int thlen; |
| 2685 | unsigned int flags; | 2745 | __be32 flags; |
| 2686 | unsigned int mss = 1; | 2746 | unsigned int mss = 1; |
| 2687 | unsigned int hlen; | 2747 | unsigned int hlen; |
| 2688 | unsigned int off; | 2748 | unsigned int off; |
| @@ -2732,10 +2792,10 @@ struct sk_buff **tcp_gro_receive(struct sk_buff **head, struct sk_buff *skb) | |||
| 2732 | 2792 | ||
| 2733 | found: | 2793 | found: |
| 2734 | flush = NAPI_GRO_CB(p)->flush; | 2794 | flush = NAPI_GRO_CB(p)->flush; |
| 2735 | flush |= flags & TCP_FLAG_CWR; | 2795 | flush |= (__force int)(flags & TCP_FLAG_CWR); |
| 2736 | flush |= (flags ^ tcp_flag_word(th2)) & | 2796 | flush |= (__force int)((flags ^ tcp_flag_word(th2)) & |
| 2737 | ~(TCP_FLAG_CWR | TCP_FLAG_FIN | TCP_FLAG_PSH); | 2797 | ~(TCP_FLAG_CWR | TCP_FLAG_FIN | TCP_FLAG_PSH)); |
| 2738 | flush |= th->ack_seq ^ th2->ack_seq; | 2798 | flush |= (__force int)(th->ack_seq ^ th2->ack_seq); |
| 2739 | for (i = sizeof(*th); i < thlen; i += 4) | 2799 | for (i = sizeof(*th); i < thlen; i += 4) |
| 2740 | flush |= *(u32 *)((u8 *)th + i) ^ | 2800 | flush |= *(u32 *)((u8 *)th + i) ^ |
| 2741 | *(u32 *)((u8 *)th2 + i); | 2801 | *(u32 *)((u8 *)th2 + i); |
| @@ -2756,8 +2816,9 @@ found: | |||
| 2756 | 2816 | ||
| 2757 | out_check_final: | 2817 | out_check_final: |
| 2758 | flush = len < mss; | 2818 | flush = len < mss; |
| 2759 | flush |= flags & (TCP_FLAG_URG | TCP_FLAG_PSH | TCP_FLAG_RST | | 2819 | flush |= (__force int)(flags & (TCP_FLAG_URG | TCP_FLAG_PSH | |
| 2760 | TCP_FLAG_SYN | TCP_FLAG_FIN); | 2820 | TCP_FLAG_RST | TCP_FLAG_SYN | |
| 2821 | TCP_FLAG_FIN)); | ||
| 2761 | 2822 | ||
| 2762 | if (p && (!NAPI_GRO_CB(skb)->same_flow || flush)) | 2823 | if (p && (!NAPI_GRO_CB(skb)->same_flow || flush)) |
| 2763 | pp = head; | 2824 | pp = head; |
| @@ -2788,10 +2849,10 @@ EXPORT_SYMBOL(tcp_gro_complete); | |||
| 2788 | 2849 | ||
| 2789 | #ifdef CONFIG_TCP_MD5SIG | 2850 | #ifdef CONFIG_TCP_MD5SIG |
| 2790 | static unsigned long tcp_md5sig_users; | 2851 | static unsigned long tcp_md5sig_users; |
| 2791 | static struct tcp_md5sig_pool **tcp_md5sig_pool; | 2852 | static struct tcp_md5sig_pool * __percpu *tcp_md5sig_pool; |
| 2792 | static DEFINE_SPINLOCK(tcp_md5sig_pool_lock); | 2853 | static DEFINE_SPINLOCK(tcp_md5sig_pool_lock); |
| 2793 | 2854 | ||
| 2794 | static void __tcp_free_md5sig_pool(struct tcp_md5sig_pool **pool) | 2855 | static void __tcp_free_md5sig_pool(struct tcp_md5sig_pool * __percpu *pool) |
| 2795 | { | 2856 | { |
| 2796 | int cpu; | 2857 | int cpu; |
| 2797 | for_each_possible_cpu(cpu) { | 2858 | for_each_possible_cpu(cpu) { |
| @@ -2800,7 +2861,6 @@ static void __tcp_free_md5sig_pool(struct tcp_md5sig_pool **pool) | |||
| 2800 | if (p->md5_desc.tfm) | 2861 | if (p->md5_desc.tfm) |
| 2801 | crypto_free_hash(p->md5_desc.tfm); | 2862 | crypto_free_hash(p->md5_desc.tfm); |
| 2802 | kfree(p); | 2863 | kfree(p); |
| 2803 | p = NULL; | ||
| 2804 | } | 2864 | } |
| 2805 | } | 2865 | } |
| 2806 | free_percpu(pool); | 2866 | free_percpu(pool); |
| @@ -2808,7 +2868,7 @@ static void __tcp_free_md5sig_pool(struct tcp_md5sig_pool **pool) | |||
| 2808 | 2868 | ||
| 2809 | void tcp_free_md5sig_pool(void) | 2869 | void tcp_free_md5sig_pool(void) |
| 2810 | { | 2870 | { |
| 2811 | struct tcp_md5sig_pool **pool = NULL; | 2871 | struct tcp_md5sig_pool * __percpu *pool = NULL; |
| 2812 | 2872 | ||
| 2813 | spin_lock_bh(&tcp_md5sig_pool_lock); | 2873 | spin_lock_bh(&tcp_md5sig_pool_lock); |
| 2814 | if (--tcp_md5sig_users == 0) { | 2874 | if (--tcp_md5sig_users == 0) { |
| @@ -2819,13 +2879,13 @@ void tcp_free_md5sig_pool(void) | |||
| 2819 | if (pool) | 2879 | if (pool) |
| 2820 | __tcp_free_md5sig_pool(pool); | 2880 | __tcp_free_md5sig_pool(pool); |
| 2821 | } | 2881 | } |
| 2822 | |||
| 2823 | EXPORT_SYMBOL(tcp_free_md5sig_pool); | 2882 | EXPORT_SYMBOL(tcp_free_md5sig_pool); |
| 2824 | 2883 | ||
| 2825 | static struct tcp_md5sig_pool **__tcp_alloc_md5sig_pool(struct sock *sk) | 2884 | static struct tcp_md5sig_pool * __percpu * |
| 2885 | __tcp_alloc_md5sig_pool(struct sock *sk) | ||
| 2826 | { | 2886 | { |
| 2827 | int cpu; | 2887 | int cpu; |
| 2828 | struct tcp_md5sig_pool **pool; | 2888 | struct tcp_md5sig_pool * __percpu *pool; |
| 2829 | 2889 | ||
| 2830 | pool = alloc_percpu(struct tcp_md5sig_pool *); | 2890 | pool = alloc_percpu(struct tcp_md5sig_pool *); |
| 2831 | if (!pool) | 2891 | if (!pool) |
| @@ -2852,9 +2912,9 @@ out_free: | |||
| 2852 | return NULL; | 2912 | return NULL; |
| 2853 | } | 2913 | } |
| 2854 | 2914 | ||
| 2855 | struct tcp_md5sig_pool **tcp_alloc_md5sig_pool(struct sock *sk) | 2915 | struct tcp_md5sig_pool * __percpu *tcp_alloc_md5sig_pool(struct sock *sk) |
| 2856 | { | 2916 | { |
| 2857 | struct tcp_md5sig_pool **pool; | 2917 | struct tcp_md5sig_pool * __percpu *pool; |
| 2858 | int alloc = 0; | 2918 | int alloc = 0; |
| 2859 | 2919 | ||
| 2860 | retry: | 2920 | retry: |
| @@ -2873,7 +2933,9 @@ retry: | |||
| 2873 | 2933 | ||
| 2874 | if (alloc) { | 2934 | if (alloc) { |
| 2875 | /* we cannot hold spinlock here because this may sleep. */ | 2935 | /* we cannot hold spinlock here because this may sleep. */ |
| 2876 | struct tcp_md5sig_pool **p = __tcp_alloc_md5sig_pool(sk); | 2936 | struct tcp_md5sig_pool * __percpu *p; |
| 2937 | |||
| 2938 | p = __tcp_alloc_md5sig_pool(sk); | ||
| 2877 | spin_lock_bh(&tcp_md5sig_pool_lock); | 2939 | spin_lock_bh(&tcp_md5sig_pool_lock); |
| 2878 | if (!p) { | 2940 | if (!p) { |
| 2879 | tcp_md5sig_users--; | 2941 | tcp_md5sig_users--; |
| @@ -2892,28 +2954,42 @@ retry: | |||
| 2892 | } | 2954 | } |
| 2893 | return pool; | 2955 | return pool; |
| 2894 | } | 2956 | } |
| 2895 | |||
| 2896 | EXPORT_SYMBOL(tcp_alloc_md5sig_pool); | 2957 | EXPORT_SYMBOL(tcp_alloc_md5sig_pool); |
| 2897 | 2958 | ||
| 2898 | struct tcp_md5sig_pool *__tcp_get_md5sig_pool(int cpu) | 2959 | |
| 2960 | /** | ||
| 2961 | * tcp_get_md5sig_pool - get md5sig_pool for this user | ||
| 2962 | * | ||
| 2963 | * We use percpu structure, so if we succeed, we exit with preemption | ||
| 2964 | * and BH disabled, to make sure another thread or softirq handling | ||
| 2965 | * wont try to get same context. | ||
| 2966 | */ | ||
| 2967 | struct tcp_md5sig_pool *tcp_get_md5sig_pool(void) | ||
| 2899 | { | 2968 | { |
| 2900 | struct tcp_md5sig_pool **p; | 2969 | struct tcp_md5sig_pool * __percpu *p; |
| 2901 | spin_lock_bh(&tcp_md5sig_pool_lock); | 2970 | |
| 2971 | local_bh_disable(); | ||
| 2972 | |||
| 2973 | spin_lock(&tcp_md5sig_pool_lock); | ||
| 2902 | p = tcp_md5sig_pool; | 2974 | p = tcp_md5sig_pool; |
| 2903 | if (p) | 2975 | if (p) |
| 2904 | tcp_md5sig_users++; | 2976 | tcp_md5sig_users++; |
| 2905 | spin_unlock_bh(&tcp_md5sig_pool_lock); | 2977 | spin_unlock(&tcp_md5sig_pool_lock); |
| 2906 | return (p ? *per_cpu_ptr(p, cpu) : NULL); | 2978 | |
| 2907 | } | 2979 | if (p) |
| 2980 | return *this_cpu_ptr(p); | ||
| 2908 | 2981 | ||
| 2909 | EXPORT_SYMBOL(__tcp_get_md5sig_pool); | 2982 | local_bh_enable(); |
| 2983 | return NULL; | ||
| 2984 | } | ||
| 2985 | EXPORT_SYMBOL(tcp_get_md5sig_pool); | ||
| 2910 | 2986 | ||
| 2911 | void __tcp_put_md5sig_pool(void) | 2987 | void tcp_put_md5sig_pool(void) |
| 2912 | { | 2988 | { |
| 2989 | local_bh_enable(); | ||
| 2913 | tcp_free_md5sig_pool(); | 2990 | tcp_free_md5sig_pool(); |
| 2914 | } | 2991 | } |
| 2915 | 2992 | EXPORT_SYMBOL(tcp_put_md5sig_pool); | |
| 2916 | EXPORT_SYMBOL(__tcp_put_md5sig_pool); | ||
| 2917 | 2993 | ||
| 2918 | int tcp_md5_hash_header(struct tcp_md5sig_pool *hp, | 2994 | int tcp_md5_hash_header(struct tcp_md5sig_pool *hp, |
| 2919 | struct tcphdr *th) | 2995 | struct tcphdr *th) |
| @@ -2929,7 +3005,6 @@ int tcp_md5_hash_header(struct tcp_md5sig_pool *hp, | |||
| 2929 | th->check = old_checksum; | 3005 | th->check = old_checksum; |
| 2930 | return err; | 3006 | return err; |
| 2931 | } | 3007 | } |
| 2932 | |||
| 2933 | EXPORT_SYMBOL(tcp_md5_hash_header); | 3008 | EXPORT_SYMBOL(tcp_md5_hash_header); |
| 2934 | 3009 | ||
| 2935 | int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp, | 3010 | int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp, |
| @@ -2942,6 +3017,7 @@ int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp, | |||
| 2942 | const unsigned head_data_len = skb_headlen(skb) > header_len ? | 3017 | const unsigned head_data_len = skb_headlen(skb) > header_len ? |
| 2943 | skb_headlen(skb) - header_len : 0; | 3018 | skb_headlen(skb) - header_len : 0; |
| 2944 | const struct skb_shared_info *shi = skb_shinfo(skb); | 3019 | const struct skb_shared_info *shi = skb_shinfo(skb); |
| 3020 | struct sk_buff *frag_iter; | ||
| 2945 | 3021 | ||
| 2946 | sg_init_table(&sg, 1); | 3022 | sg_init_table(&sg, 1); |
| 2947 | 3023 | ||
| @@ -2956,9 +3032,12 @@ int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp, | |||
| 2956 | return 1; | 3032 | return 1; |
| 2957 | } | 3033 | } |
| 2958 | 3034 | ||
| 3035 | skb_walk_frags(skb, frag_iter) | ||
| 3036 | if (tcp_md5_hash_skb_data(hp, frag_iter, 0)) | ||
| 3037 | return 1; | ||
| 3038 | |||
| 2959 | return 0; | 3039 | return 0; |
| 2960 | } | 3040 | } |
| 2961 | |||
| 2962 | EXPORT_SYMBOL(tcp_md5_hash_skb_data); | 3041 | EXPORT_SYMBOL(tcp_md5_hash_skb_data); |
| 2963 | 3042 | ||
| 2964 | int tcp_md5_hash_key(struct tcp_md5sig_pool *hp, struct tcp_md5sig_key *key) | 3043 | int tcp_md5_hash_key(struct tcp_md5sig_pool *hp, struct tcp_md5sig_key *key) |
| @@ -2968,7 +3047,6 @@ int tcp_md5_hash_key(struct tcp_md5sig_pool *hp, struct tcp_md5sig_key *key) | |||
| 2968 | sg_init_one(&sg, key->key, key->keylen); | 3047 | sg_init_one(&sg, key->key, key->keylen); |
| 2969 | return crypto_hash_update(&hp->md5_desc, &sg, key->keylen); | 3048 | return crypto_hash_update(&hp->md5_desc, &sg, key->keylen); |
| 2970 | } | 3049 | } |
| 2971 | |||
| 2972 | EXPORT_SYMBOL(tcp_md5_hash_key); | 3050 | EXPORT_SYMBOL(tcp_md5_hash_key); |
| 2973 | 3051 | ||
| 2974 | #endif | 3052 | #endif |
| @@ -3135,7 +3213,7 @@ void __init tcp_init(void) | |||
| 3135 | { | 3213 | { |
| 3136 | struct sk_buff *skb = NULL; | 3214 | struct sk_buff *skb = NULL; |
| 3137 | unsigned long nr_pages, limit; | 3215 | unsigned long nr_pages, limit; |
| 3138 | int order, i, max_share; | 3216 | int i, max_share, cnt; |
| 3139 | unsigned long jiffy = jiffies; | 3217 | unsigned long jiffy = jiffies; |
| 3140 | 3218 | ||
| 3141 | BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > sizeof(skb->cb)); | 3219 | BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > sizeof(skb->cb)); |
| @@ -3184,22 +3262,12 @@ void __init tcp_init(void) | |||
| 3184 | INIT_HLIST_HEAD(&tcp_hashinfo.bhash[i].chain); | 3262 | INIT_HLIST_HEAD(&tcp_hashinfo.bhash[i].chain); |
| 3185 | } | 3263 | } |
| 3186 | 3264 | ||
| 3187 | /* Try to be a bit smarter and adjust defaults depending | 3265 | |
| 3188 | * on available memory. | 3266 | cnt = tcp_hashinfo.ehash_mask + 1; |
| 3189 | */ | 3267 | |
| 3190 | for (order = 0; ((1 << order) << PAGE_SHIFT) < | 3268 | tcp_death_row.sysctl_max_tw_buckets = cnt / 2; |
| 3191 | (tcp_hashinfo.bhash_size * sizeof(struct inet_bind_hashbucket)); | 3269 | sysctl_tcp_max_orphans = cnt / 2; |
| 3192 | order++) | 3270 | sysctl_max_syn_backlog = max(128, cnt / 256); |
| 3193 | ; | ||
| 3194 | if (order >= 4) { | ||
| 3195 | tcp_death_row.sysctl_max_tw_buckets = 180000; | ||
| 3196 | sysctl_tcp_max_orphans = 4096 << (order - 4); | ||
| 3197 | sysctl_max_syn_backlog = 1024; | ||
| 3198 | } else if (order < 3) { | ||
| 3199 | tcp_death_row.sysctl_max_tw_buckets >>= (3 - order); | ||
| 3200 | sysctl_tcp_max_orphans >>= (3 - order); | ||
| 3201 | sysctl_max_syn_backlog = 128; | ||
| 3202 | } | ||
| 3203 | 3271 | ||
| 3204 | /* Set the pressure threshold to be a fraction of global memory that | 3272 | /* Set the pressure threshold to be a fraction of global memory that |
| 3205 | * is up to 1/2 at 256 MB, decreasing toward zero with the amount of | 3273 | * is up to 1/2 at 256 MB, decreasing toward zero with the amount of |
| @@ -3240,16 +3308,3 @@ void __init tcp_init(void) | |||
| 3240 | tcp_secret_retiring = &tcp_secret_two; | 3308 | tcp_secret_retiring = &tcp_secret_two; |
| 3241 | tcp_secret_secondary = &tcp_secret_two; | 3309 | tcp_secret_secondary = &tcp_secret_two; |
| 3242 | } | 3310 | } |
| 3243 | |||
| 3244 | EXPORT_SYMBOL(tcp_close); | ||
| 3245 | EXPORT_SYMBOL(tcp_disconnect); | ||
| 3246 | EXPORT_SYMBOL(tcp_getsockopt); | ||
| 3247 | EXPORT_SYMBOL(tcp_ioctl); | ||
| 3248 | EXPORT_SYMBOL(tcp_poll); | ||
| 3249 | EXPORT_SYMBOL(tcp_read_sock); | ||
| 3250 | EXPORT_SYMBOL(tcp_recvmsg); | ||
| 3251 | EXPORT_SYMBOL(tcp_sendmsg); | ||
| 3252 | EXPORT_SYMBOL(tcp_splice_read); | ||
| 3253 | EXPORT_SYMBOL(tcp_sendpage); | ||
| 3254 | EXPORT_SYMBOL(tcp_setsockopt); | ||
| 3255 | EXPORT_SYMBOL(tcp_shutdown); | ||
