diff options
Diffstat (limited to 'net/ipv4/tcp.c')
-rw-r--r-- | net/ipv4/tcp.c | 115 |
1 files changed, 87 insertions, 28 deletions
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index fc54a48fde1e..0b491bf03db4 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c | |||
@@ -5,8 +5,6 @@ | |||
5 | * | 5 | * |
6 | * Implementation of the Transmission Control Protocol(TCP). | 6 | * Implementation of the Transmission Control Protocol(TCP). |
7 | * | 7 | * |
8 | * Version: $Id: tcp.c,v 1.216 2002/02/01 22:01:04 davem Exp $ | ||
9 | * | ||
10 | * Authors: Ross Biro | 8 | * Authors: Ross Biro |
11 | * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> | 9 | * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> |
12 | * Mark Evans, <evansmp@uhura.aston.ac.uk> | 10 | * Mark Evans, <evansmp@uhura.aston.ac.uk> |
@@ -255,11 +253,14 @@ | |||
255 | #include <linux/init.h> | 253 | #include <linux/init.h> |
256 | #include <linux/fs.h> | 254 | #include <linux/fs.h> |
257 | #include <linux/skbuff.h> | 255 | #include <linux/skbuff.h> |
256 | #include <linux/scatterlist.h> | ||
258 | #include <linux/splice.h> | 257 | #include <linux/splice.h> |
259 | #include <linux/net.h> | 258 | #include <linux/net.h> |
260 | #include <linux/socket.h> | 259 | #include <linux/socket.h> |
261 | #include <linux/random.h> | 260 | #include <linux/random.h> |
262 | #include <linux/bootmem.h> | 261 | #include <linux/bootmem.h> |
262 | #include <linux/highmem.h> | ||
263 | #include <linux/swap.h> | ||
263 | #include <linux/cache.h> | 264 | #include <linux/cache.h> |
264 | #include <linux/err.h> | 265 | #include <linux/err.h> |
265 | #include <linux/crypto.h> | 266 | #include <linux/crypto.h> |
@@ -276,8 +277,6 @@ | |||
276 | 277 | ||
277 | int sysctl_tcp_fin_timeout __read_mostly = TCP_FIN_TIMEOUT; | 278 | int sysctl_tcp_fin_timeout __read_mostly = TCP_FIN_TIMEOUT; |
278 | 279 | ||
279 | DEFINE_SNMP_STAT(struct tcp_mib, tcp_statistics) __read_mostly; | ||
280 | |||
281 | atomic_t tcp_orphan_count = ATOMIC_INIT(0); | 280 | atomic_t tcp_orphan_count = ATOMIC_INIT(0); |
282 | 281 | ||
283 | EXPORT_SYMBOL_GPL(tcp_orphan_count); | 282 | EXPORT_SYMBOL_GPL(tcp_orphan_count); |
@@ -315,10 +314,10 @@ int tcp_memory_pressure __read_mostly; | |||
315 | 314 | ||
316 | EXPORT_SYMBOL(tcp_memory_pressure); | 315 | EXPORT_SYMBOL(tcp_memory_pressure); |
317 | 316 | ||
318 | void tcp_enter_memory_pressure(void) | 317 | void tcp_enter_memory_pressure(struct sock *sk) |
319 | { | 318 | { |
320 | if (!tcp_memory_pressure) { | 319 | if (!tcp_memory_pressure) { |
321 | NET_INC_STATS(LINUX_MIB_TCPMEMORYPRESSURES); | 320 | NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMEMORYPRESSURES); |
322 | tcp_memory_pressure = 1; | 321 | tcp_memory_pressure = 1; |
323 | } | 322 | } |
324 | } | 323 | } |
@@ -343,8 +342,8 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait) | |||
343 | return inet_csk_listen_poll(sk); | 342 | return inet_csk_listen_poll(sk); |
344 | 343 | ||
345 | /* Socket is not locked. We are protected from async events | 344 | /* Socket is not locked. We are protected from async events |
346 | by poll logic and correct handling of state changes | 345 | * by poll logic and correct handling of state changes |
347 | made by another threads is impossible in any case. | 346 | * made by other threads is impossible in any case. |
348 | */ | 347 | */ |
349 | 348 | ||
350 | mask = 0; | 349 | mask = 0; |
@@ -370,10 +369,10 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait) | |||
370 | * in state CLOSE_WAIT. One solution is evident --- to set POLLHUP | 369 | * in state CLOSE_WAIT. One solution is evident --- to set POLLHUP |
371 | * if and only if shutdown has been made in both directions. | 370 | * if and only if shutdown has been made in both directions. |
372 | * Actually, it is interesting to look how Solaris and DUX | 371 | * Actually, it is interesting to look how Solaris and DUX |
373 | * solve this dilemma. I would prefer, if PULLHUP were maskable, | 372 | * solve this dilemma. I would prefer, if POLLHUP were maskable, |
374 | * then we could set it on SND_SHUTDOWN. BTW examples given | 373 | * then we could set it on SND_SHUTDOWN. BTW examples given |
375 | * in Stevens' books assume exactly this behaviour, it explains | 374 | * in Stevens' books assume exactly this behaviour, it explains |
376 | * why PULLHUP is incompatible with POLLOUT. --ANK | 375 | * why POLLHUP is incompatible with POLLOUT. --ANK |
377 | * | 376 | * |
378 | * NOTE. Check for TCP_CLOSE is added. The goal is to prevent | 377 | * NOTE. Check for TCP_CLOSE is added. The goal is to prevent |
379 | * blocking on fresh not-connected or disconnected socket. --ANK | 378 | * blocking on fresh not-connected or disconnected socket. --ANK |
@@ -648,7 +647,7 @@ struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp) | |||
648 | } | 647 | } |
649 | __kfree_skb(skb); | 648 | __kfree_skb(skb); |
650 | } else { | 649 | } else { |
651 | sk->sk_prot->enter_memory_pressure(); | 650 | sk->sk_prot->enter_memory_pressure(sk); |
652 | sk_stream_moderate_sndbuf(sk); | 651 | sk_stream_moderate_sndbuf(sk); |
653 | } | 652 | } |
654 | return NULL; | 653 | return NULL; |
@@ -1152,7 +1151,7 @@ static void tcp_prequeue_process(struct sock *sk) | |||
1152 | struct sk_buff *skb; | 1151 | struct sk_buff *skb; |
1153 | struct tcp_sock *tp = tcp_sk(sk); | 1152 | struct tcp_sock *tp = tcp_sk(sk); |
1154 | 1153 | ||
1155 | NET_INC_STATS_USER(LINUX_MIB_TCPPREQUEUED); | 1154 | NET_INC_STATS_USER(sock_net(sk), LINUX_MIB_TCPPREQUEUED); |
1156 | 1155 | ||
1157 | /* RX process wants to run with disabled BHs, though it is not | 1156 | /* RX process wants to run with disabled BHs, though it is not |
1158 | * necessary */ | 1157 | * necessary */ |
@@ -1206,7 +1205,8 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, | |||
1206 | return -ENOTCONN; | 1205 | return -ENOTCONN; |
1207 | while ((skb = tcp_recv_skb(sk, seq, &offset)) != NULL) { | 1206 | while ((skb = tcp_recv_skb(sk, seq, &offset)) != NULL) { |
1208 | if (offset < skb->len) { | 1207 | if (offset < skb->len) { |
1209 | size_t used, len; | 1208 | int used; |
1209 | size_t len; | ||
1210 | 1210 | ||
1211 | len = skb->len - offset; | 1211 | len = skb->len - offset; |
1212 | /* Stop reading if we hit a patch of urgent data */ | 1212 | /* Stop reading if we hit a patch of urgent data */ |
@@ -1473,7 +1473,7 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
1473 | /* __ Restore normal policy in scheduler __ */ | 1473 | /* __ Restore normal policy in scheduler __ */ |
1474 | 1474 | ||
1475 | if ((chunk = len - tp->ucopy.len) != 0) { | 1475 | if ((chunk = len - tp->ucopy.len) != 0) { |
1476 | NET_ADD_STATS_USER(LINUX_MIB_TCPDIRECTCOPYFROMBACKLOG, chunk); | 1476 | NET_ADD_STATS_USER(sock_net(sk), LINUX_MIB_TCPDIRECTCOPYFROMBACKLOG, chunk); |
1477 | len -= chunk; | 1477 | len -= chunk; |
1478 | copied += chunk; | 1478 | copied += chunk; |
1479 | } | 1479 | } |
@@ -1484,7 +1484,7 @@ do_prequeue: | |||
1484 | tcp_prequeue_process(sk); | 1484 | tcp_prequeue_process(sk); |
1485 | 1485 | ||
1486 | if ((chunk = len - tp->ucopy.len) != 0) { | 1486 | if ((chunk = len - tp->ucopy.len) != 0) { |
1487 | NET_ADD_STATS_USER(LINUX_MIB_TCPDIRECTCOPYFROMPREQUEUE, chunk); | 1487 | NET_ADD_STATS_USER(sock_net(sk), LINUX_MIB_TCPDIRECTCOPYFROMPREQUEUE, chunk); |
1488 | len -= chunk; | 1488 | len -= chunk; |
1489 | copied += chunk; | 1489 | copied += chunk; |
1490 | } | 1490 | } |
@@ -1599,7 +1599,7 @@ skip_copy: | |||
1599 | tcp_prequeue_process(sk); | 1599 | tcp_prequeue_process(sk); |
1600 | 1600 | ||
1601 | if (copied > 0 && (chunk = len - tp->ucopy.len) != 0) { | 1601 | if (copied > 0 && (chunk = len - tp->ucopy.len) != 0) { |
1602 | NET_ADD_STATS_USER(LINUX_MIB_TCPDIRECTCOPYFROMPREQUEUE, chunk); | 1602 | NET_ADD_STATS_USER(sock_net(sk), LINUX_MIB_TCPDIRECTCOPYFROMPREQUEUE, chunk); |
1603 | len -= chunk; | 1603 | len -= chunk; |
1604 | copied += chunk; | 1604 | copied += chunk; |
1605 | } | 1605 | } |
@@ -1666,12 +1666,12 @@ void tcp_set_state(struct sock *sk, int state) | |||
1666 | switch (state) { | 1666 | switch (state) { |
1667 | case TCP_ESTABLISHED: | 1667 | case TCP_ESTABLISHED: |
1668 | if (oldstate != TCP_ESTABLISHED) | 1668 | if (oldstate != TCP_ESTABLISHED) |
1669 | TCP_INC_STATS(TCP_MIB_CURRESTAB); | 1669 | TCP_INC_STATS(sock_net(sk), TCP_MIB_CURRESTAB); |
1670 | break; | 1670 | break; |
1671 | 1671 | ||
1672 | case TCP_CLOSE: | 1672 | case TCP_CLOSE: |
1673 | if (oldstate == TCP_CLOSE_WAIT || oldstate == TCP_ESTABLISHED) | 1673 | if (oldstate == TCP_CLOSE_WAIT || oldstate == TCP_ESTABLISHED) |
1674 | TCP_INC_STATS(TCP_MIB_ESTABRESETS); | 1674 | TCP_INC_STATS(sock_net(sk), TCP_MIB_ESTABRESETS); |
1675 | 1675 | ||
1676 | sk->sk_prot->unhash(sk); | 1676 | sk->sk_prot->unhash(sk); |
1677 | if (inet_csk(sk)->icsk_bind_hash && | 1677 | if (inet_csk(sk)->icsk_bind_hash && |
@@ -1680,7 +1680,7 @@ void tcp_set_state(struct sock *sk, int state) | |||
1680 | /* fall through */ | 1680 | /* fall through */ |
1681 | default: | 1681 | default: |
1682 | if (oldstate==TCP_ESTABLISHED) | 1682 | if (oldstate==TCP_ESTABLISHED) |
1683 | TCP_DEC_STATS(TCP_MIB_CURRESTAB); | 1683 | TCP_DEC_STATS(sock_net(sk), TCP_MIB_CURRESTAB); |
1684 | } | 1684 | } |
1685 | 1685 | ||
1686 | /* Change state AFTER socket is unhashed to avoid closed | 1686 | /* Change state AFTER socket is unhashed to avoid closed |
@@ -1791,13 +1791,13 @@ void tcp_close(struct sock *sk, long timeout) | |||
1791 | */ | 1791 | */ |
1792 | if (data_was_unread) { | 1792 | if (data_was_unread) { |
1793 | /* Unread data was tossed, zap the connection. */ | 1793 | /* Unread data was tossed, zap the connection. */ |
1794 | NET_INC_STATS_USER(LINUX_MIB_TCPABORTONCLOSE); | 1794 | NET_INC_STATS_USER(sock_net(sk), LINUX_MIB_TCPABORTONCLOSE); |
1795 | tcp_set_state(sk, TCP_CLOSE); | 1795 | tcp_set_state(sk, TCP_CLOSE); |
1796 | tcp_send_active_reset(sk, GFP_KERNEL); | 1796 | tcp_send_active_reset(sk, GFP_KERNEL); |
1797 | } else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) { | 1797 | } else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) { |
1798 | /* Check zero linger _after_ checking for unread data. */ | 1798 | /* Check zero linger _after_ checking for unread data. */ |
1799 | sk->sk_prot->disconnect(sk, 0); | 1799 | sk->sk_prot->disconnect(sk, 0); |
1800 | NET_INC_STATS_USER(LINUX_MIB_TCPABORTONDATA); | 1800 | NET_INC_STATS_USER(sock_net(sk), LINUX_MIB_TCPABORTONDATA); |
1801 | } else if (tcp_close_state(sk)) { | 1801 | } else if (tcp_close_state(sk)) { |
1802 | /* We FIN if the application ate all the data before | 1802 | /* We FIN if the application ate all the data before |
1803 | * zapping the connection. | 1803 | * zapping the connection. |
@@ -1869,7 +1869,8 @@ adjudge_to_death: | |||
1869 | if (tp->linger2 < 0) { | 1869 | if (tp->linger2 < 0) { |
1870 | tcp_set_state(sk, TCP_CLOSE); | 1870 | tcp_set_state(sk, TCP_CLOSE); |
1871 | tcp_send_active_reset(sk, GFP_ATOMIC); | 1871 | tcp_send_active_reset(sk, GFP_ATOMIC); |
1872 | NET_INC_STATS_BH(LINUX_MIB_TCPABORTONLINGER); | 1872 | NET_INC_STATS_BH(sock_net(sk), |
1873 | LINUX_MIB_TCPABORTONLINGER); | ||
1873 | } else { | 1874 | } else { |
1874 | const int tmo = tcp_fin_time(sk); | 1875 | const int tmo = tcp_fin_time(sk); |
1875 | 1876 | ||
@@ -1891,7 +1892,8 @@ adjudge_to_death: | |||
1891 | "sockets\n"); | 1892 | "sockets\n"); |
1892 | tcp_set_state(sk, TCP_CLOSE); | 1893 | tcp_set_state(sk, TCP_CLOSE); |
1893 | tcp_send_active_reset(sk, GFP_ATOMIC); | 1894 | tcp_send_active_reset(sk, GFP_ATOMIC); |
1894 | NET_INC_STATS_BH(LINUX_MIB_TCPABORTONMEMORY); | 1895 | NET_INC_STATS_BH(sock_net(sk), |
1896 | LINUX_MIB_TCPABORTONMEMORY); | ||
1895 | } | 1897 | } |
1896 | } | 1898 | } |
1897 | 1899 | ||
@@ -2586,12 +2588,69 @@ void __tcp_put_md5sig_pool(void) | |||
2586 | } | 2588 | } |
2587 | 2589 | ||
2588 | EXPORT_SYMBOL(__tcp_put_md5sig_pool); | 2590 | EXPORT_SYMBOL(__tcp_put_md5sig_pool); |
2591 | |||
2592 | int tcp_md5_hash_header(struct tcp_md5sig_pool *hp, | ||
2593 | struct tcphdr *th) | ||
2594 | { | ||
2595 | struct scatterlist sg; | ||
2596 | int err; | ||
2597 | |||
2598 | __sum16 old_checksum = th->check; | ||
2599 | th->check = 0; | ||
2600 | /* options aren't included in the hash */ | ||
2601 | sg_init_one(&sg, th, sizeof(struct tcphdr)); | ||
2602 | err = crypto_hash_update(&hp->md5_desc, &sg, sizeof(struct tcphdr)); | ||
2603 | th->check = old_checksum; | ||
2604 | return err; | ||
2605 | } | ||
2606 | |||
2607 | EXPORT_SYMBOL(tcp_md5_hash_header); | ||
2608 | |||
2609 | int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp, | ||
2610 | struct sk_buff *skb, unsigned header_len) | ||
2611 | { | ||
2612 | struct scatterlist sg; | ||
2613 | const struct tcphdr *tp = tcp_hdr(skb); | ||
2614 | struct hash_desc *desc = &hp->md5_desc; | ||
2615 | unsigned i; | ||
2616 | const unsigned head_data_len = skb_headlen(skb) > header_len ? | ||
2617 | skb_headlen(skb) - header_len : 0; | ||
2618 | const struct skb_shared_info *shi = skb_shinfo(skb); | ||
2619 | |||
2620 | sg_init_table(&sg, 1); | ||
2621 | |||
2622 | sg_set_buf(&sg, ((u8 *) tp) + header_len, head_data_len); | ||
2623 | if (crypto_hash_update(desc, &sg, head_data_len)) | ||
2624 | return 1; | ||
2625 | |||
2626 | for (i = 0; i < shi->nr_frags; ++i) { | ||
2627 | const struct skb_frag_struct *f = &shi->frags[i]; | ||
2628 | sg_set_page(&sg, f->page, f->size, f->page_offset); | ||
2629 | if (crypto_hash_update(desc, &sg, f->size)) | ||
2630 | return 1; | ||
2631 | } | ||
2632 | |||
2633 | return 0; | ||
2634 | } | ||
2635 | |||
2636 | EXPORT_SYMBOL(tcp_md5_hash_skb_data); | ||
2637 | |||
2638 | int tcp_md5_hash_key(struct tcp_md5sig_pool *hp, struct tcp_md5sig_key *key) | ||
2639 | { | ||
2640 | struct scatterlist sg; | ||
2641 | |||
2642 | sg_init_one(&sg, key->key, key->keylen); | ||
2643 | return crypto_hash_update(&hp->md5_desc, &sg, key->keylen); | ||
2644 | } | ||
2645 | |||
2646 | EXPORT_SYMBOL(tcp_md5_hash_key); | ||
2647 | |||
2589 | #endif | 2648 | #endif |
2590 | 2649 | ||
2591 | void tcp_done(struct sock *sk) | 2650 | void tcp_done(struct sock *sk) |
2592 | { | 2651 | { |
2593 | if(sk->sk_state == TCP_SYN_SENT || sk->sk_state == TCP_SYN_RECV) | 2652 | if(sk->sk_state == TCP_SYN_SENT || sk->sk_state == TCP_SYN_RECV) |
2594 | TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS); | 2653 | TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_ATTEMPTFAILS); |
2595 | 2654 | ||
2596 | tcp_set_state(sk, TCP_CLOSE); | 2655 | tcp_set_state(sk, TCP_CLOSE); |
2597 | tcp_clear_xmit_timers(sk); | 2656 | tcp_clear_xmit_timers(sk); |
@@ -2620,7 +2679,7 @@ __setup("thash_entries=", set_thash_entries); | |||
2620 | void __init tcp_init(void) | 2679 | void __init tcp_init(void) |
2621 | { | 2680 | { |
2622 | struct sk_buff *skb = NULL; | 2681 | struct sk_buff *skb = NULL; |
2623 | unsigned long limit; | 2682 | unsigned long nr_pages, limit; |
2624 | int order, i, max_share; | 2683 | int order, i, max_share; |
2625 | 2684 | ||
2626 | BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > sizeof(skb->cb)); | 2685 | BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > sizeof(skb->cb)); |
@@ -2689,8 +2748,9 @@ void __init tcp_init(void) | |||
2689 | * is up to 1/2 at 256 MB, decreasing toward zero with the amount of | 2748 | * is up to 1/2 at 256 MB, decreasing toward zero with the amount of |
2690 | * memory, with a floor of 128 pages. | 2749 | * memory, with a floor of 128 pages. |
2691 | */ | 2750 | */ |
2692 | limit = min(nr_all_pages, 1UL<<(28-PAGE_SHIFT)) >> (20-PAGE_SHIFT); | 2751 | nr_pages = totalram_pages - totalhigh_pages; |
2693 | limit = (limit * (nr_all_pages >> (20-PAGE_SHIFT))) >> (PAGE_SHIFT-11); | 2752 | limit = min(nr_pages, 1UL<<(28-PAGE_SHIFT)) >> (20-PAGE_SHIFT); |
2753 | limit = (limit * (nr_pages >> (20-PAGE_SHIFT))) >> (PAGE_SHIFT-11); | ||
2694 | limit = max(limit, 128UL); | 2754 | limit = max(limit, 128UL); |
2695 | sysctl_tcp_mem[0] = limit / 4 * 3; | 2755 | sysctl_tcp_mem[0] = limit / 4 * 3; |
2696 | sysctl_tcp_mem[1] = limit; | 2756 | sysctl_tcp_mem[1] = limit; |
@@ -2727,4 +2787,3 @@ EXPORT_SYMBOL(tcp_splice_read); | |||
2727 | EXPORT_SYMBOL(tcp_sendpage); | 2787 | EXPORT_SYMBOL(tcp_sendpage); |
2728 | EXPORT_SYMBOL(tcp_setsockopt); | 2788 | EXPORT_SYMBOL(tcp_setsockopt); |
2729 | EXPORT_SYMBOL(tcp_shutdown); | 2789 | EXPORT_SYMBOL(tcp_shutdown); |
2730 | EXPORT_SYMBOL(tcp_statistics); | ||