aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/tcp.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/tcp.c')
-rw-r--r--net/ipv4/tcp.c115
1 files changed, 87 insertions, 28 deletions
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index fc54a48fde1e..0b491bf03db4 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -5,8 +5,6 @@
5 * 5 *
6 * Implementation of the Transmission Control Protocol(TCP). 6 * Implementation of the Transmission Control Protocol(TCP).
7 * 7 *
8 * Version: $Id: tcp.c,v 1.216 2002/02/01 22:01:04 davem Exp $
9 *
10 * Authors: Ross Biro 8 * Authors: Ross Biro
11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> 9 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 * Mark Evans, <evansmp@uhura.aston.ac.uk> 10 * Mark Evans, <evansmp@uhura.aston.ac.uk>
@@ -255,11 +253,14 @@
255#include <linux/init.h> 253#include <linux/init.h>
256#include <linux/fs.h> 254#include <linux/fs.h>
257#include <linux/skbuff.h> 255#include <linux/skbuff.h>
256#include <linux/scatterlist.h>
258#include <linux/splice.h> 257#include <linux/splice.h>
259#include <linux/net.h> 258#include <linux/net.h>
260#include <linux/socket.h> 259#include <linux/socket.h>
261#include <linux/random.h> 260#include <linux/random.h>
262#include <linux/bootmem.h> 261#include <linux/bootmem.h>
262#include <linux/highmem.h>
263#include <linux/swap.h>
263#include <linux/cache.h> 264#include <linux/cache.h>
264#include <linux/err.h> 265#include <linux/err.h>
265#include <linux/crypto.h> 266#include <linux/crypto.h>
@@ -276,8 +277,6 @@
276 277
277int sysctl_tcp_fin_timeout __read_mostly = TCP_FIN_TIMEOUT; 278int sysctl_tcp_fin_timeout __read_mostly = TCP_FIN_TIMEOUT;
278 279
279DEFINE_SNMP_STAT(struct tcp_mib, tcp_statistics) __read_mostly;
280
281atomic_t tcp_orphan_count = ATOMIC_INIT(0); 280atomic_t tcp_orphan_count = ATOMIC_INIT(0);
282 281
283EXPORT_SYMBOL_GPL(tcp_orphan_count); 282EXPORT_SYMBOL_GPL(tcp_orphan_count);
@@ -315,10 +314,10 @@ int tcp_memory_pressure __read_mostly;
315 314
316EXPORT_SYMBOL(tcp_memory_pressure); 315EXPORT_SYMBOL(tcp_memory_pressure);
317 316
318void tcp_enter_memory_pressure(void) 317void tcp_enter_memory_pressure(struct sock *sk)
319{ 318{
320 if (!tcp_memory_pressure) { 319 if (!tcp_memory_pressure) {
321 NET_INC_STATS(LINUX_MIB_TCPMEMORYPRESSURES); 320 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMEMORYPRESSURES);
322 tcp_memory_pressure = 1; 321 tcp_memory_pressure = 1;
323 } 322 }
324} 323}
@@ -343,8 +342,8 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
343 return inet_csk_listen_poll(sk); 342 return inet_csk_listen_poll(sk);
344 343
345 /* Socket is not locked. We are protected from async events 344 /* Socket is not locked. We are protected from async events
346 by poll logic and correct handling of state changes 345 * by poll logic and correct handling of state changes
347 made by another threads is impossible in any case. 346 * made by other threads is impossible in any case.
348 */ 347 */
349 348
350 mask = 0; 349 mask = 0;
@@ -370,10 +369,10 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
370 * in state CLOSE_WAIT. One solution is evident --- to set POLLHUP 369 * in state CLOSE_WAIT. One solution is evident --- to set POLLHUP
371 * if and only if shutdown has been made in both directions. 370 * if and only if shutdown has been made in both directions.
372 * Actually, it is interesting to look how Solaris and DUX 371 * Actually, it is interesting to look how Solaris and DUX
373 * solve this dilemma. I would prefer, if PULLHUP were maskable, 372 * solve this dilemma. I would prefer, if POLLHUP were maskable,
374 * then we could set it on SND_SHUTDOWN. BTW examples given 373 * then we could set it on SND_SHUTDOWN. BTW examples given
375 * in Stevens' books assume exactly this behaviour, it explains 374 * in Stevens' books assume exactly this behaviour, it explains
376 * why PULLHUP is incompatible with POLLOUT. --ANK 375 * why POLLHUP is incompatible with POLLOUT. --ANK
377 * 376 *
378 * NOTE. Check for TCP_CLOSE is added. The goal is to prevent 377 * NOTE. Check for TCP_CLOSE is added. The goal is to prevent
379 * blocking on fresh not-connected or disconnected socket. --ANK 378 * blocking on fresh not-connected or disconnected socket. --ANK
@@ -648,7 +647,7 @@ struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp)
648 } 647 }
649 __kfree_skb(skb); 648 __kfree_skb(skb);
650 } else { 649 } else {
651 sk->sk_prot->enter_memory_pressure(); 650 sk->sk_prot->enter_memory_pressure(sk);
652 sk_stream_moderate_sndbuf(sk); 651 sk_stream_moderate_sndbuf(sk);
653 } 652 }
654 return NULL; 653 return NULL;
@@ -1152,7 +1151,7 @@ static void tcp_prequeue_process(struct sock *sk)
1152 struct sk_buff *skb; 1151 struct sk_buff *skb;
1153 struct tcp_sock *tp = tcp_sk(sk); 1152 struct tcp_sock *tp = tcp_sk(sk);
1154 1153
1155 NET_INC_STATS_USER(LINUX_MIB_TCPPREQUEUED); 1154 NET_INC_STATS_USER(sock_net(sk), LINUX_MIB_TCPPREQUEUED);
1156 1155
1157 /* RX process wants to run with disabled BHs, though it is not 1156 /* RX process wants to run with disabled BHs, though it is not
1158 * necessary */ 1157 * necessary */
@@ -1206,7 +1205,8 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
1206 return -ENOTCONN; 1205 return -ENOTCONN;
1207 while ((skb = tcp_recv_skb(sk, seq, &offset)) != NULL) { 1206 while ((skb = tcp_recv_skb(sk, seq, &offset)) != NULL) {
1208 if (offset < skb->len) { 1207 if (offset < skb->len) {
1209 size_t used, len; 1208 int used;
1209 size_t len;
1210 1210
1211 len = skb->len - offset; 1211 len = skb->len - offset;
1212 /* Stop reading if we hit a patch of urgent data */ 1212 /* Stop reading if we hit a patch of urgent data */
@@ -1473,7 +1473,7 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
1473 /* __ Restore normal policy in scheduler __ */ 1473 /* __ Restore normal policy in scheduler __ */
1474 1474
1475 if ((chunk = len - tp->ucopy.len) != 0) { 1475 if ((chunk = len - tp->ucopy.len) != 0) {
1476 NET_ADD_STATS_USER(LINUX_MIB_TCPDIRECTCOPYFROMBACKLOG, chunk); 1476 NET_ADD_STATS_USER(sock_net(sk), LINUX_MIB_TCPDIRECTCOPYFROMBACKLOG, chunk);
1477 len -= chunk; 1477 len -= chunk;
1478 copied += chunk; 1478 copied += chunk;
1479 } 1479 }
@@ -1484,7 +1484,7 @@ do_prequeue:
1484 tcp_prequeue_process(sk); 1484 tcp_prequeue_process(sk);
1485 1485
1486 if ((chunk = len - tp->ucopy.len) != 0) { 1486 if ((chunk = len - tp->ucopy.len) != 0) {
1487 NET_ADD_STATS_USER(LINUX_MIB_TCPDIRECTCOPYFROMPREQUEUE, chunk); 1487 NET_ADD_STATS_USER(sock_net(sk), LINUX_MIB_TCPDIRECTCOPYFROMPREQUEUE, chunk);
1488 len -= chunk; 1488 len -= chunk;
1489 copied += chunk; 1489 copied += chunk;
1490 } 1490 }
@@ -1599,7 +1599,7 @@ skip_copy:
1599 tcp_prequeue_process(sk); 1599 tcp_prequeue_process(sk);
1600 1600
1601 if (copied > 0 && (chunk = len - tp->ucopy.len) != 0) { 1601 if (copied > 0 && (chunk = len - tp->ucopy.len) != 0) {
1602 NET_ADD_STATS_USER(LINUX_MIB_TCPDIRECTCOPYFROMPREQUEUE, chunk); 1602 NET_ADD_STATS_USER(sock_net(sk), LINUX_MIB_TCPDIRECTCOPYFROMPREQUEUE, chunk);
1603 len -= chunk; 1603 len -= chunk;
1604 copied += chunk; 1604 copied += chunk;
1605 } 1605 }
@@ -1666,12 +1666,12 @@ void tcp_set_state(struct sock *sk, int state)
1666 switch (state) { 1666 switch (state) {
1667 case TCP_ESTABLISHED: 1667 case TCP_ESTABLISHED:
1668 if (oldstate != TCP_ESTABLISHED) 1668 if (oldstate != TCP_ESTABLISHED)
1669 TCP_INC_STATS(TCP_MIB_CURRESTAB); 1669 TCP_INC_STATS(sock_net(sk), TCP_MIB_CURRESTAB);
1670 break; 1670 break;
1671 1671
1672 case TCP_CLOSE: 1672 case TCP_CLOSE:
1673 if (oldstate == TCP_CLOSE_WAIT || oldstate == TCP_ESTABLISHED) 1673 if (oldstate == TCP_CLOSE_WAIT || oldstate == TCP_ESTABLISHED)
1674 TCP_INC_STATS(TCP_MIB_ESTABRESETS); 1674 TCP_INC_STATS(sock_net(sk), TCP_MIB_ESTABRESETS);
1675 1675
1676 sk->sk_prot->unhash(sk); 1676 sk->sk_prot->unhash(sk);
1677 if (inet_csk(sk)->icsk_bind_hash && 1677 if (inet_csk(sk)->icsk_bind_hash &&
@@ -1680,7 +1680,7 @@ void tcp_set_state(struct sock *sk, int state)
1680 /* fall through */ 1680 /* fall through */
1681 default: 1681 default:
1682 if (oldstate==TCP_ESTABLISHED) 1682 if (oldstate==TCP_ESTABLISHED)
1683 TCP_DEC_STATS(TCP_MIB_CURRESTAB); 1683 TCP_DEC_STATS(sock_net(sk), TCP_MIB_CURRESTAB);
1684 } 1684 }
1685 1685
1686 /* Change state AFTER socket is unhashed to avoid closed 1686 /* Change state AFTER socket is unhashed to avoid closed
@@ -1791,13 +1791,13 @@ void tcp_close(struct sock *sk, long timeout)
1791 */ 1791 */
1792 if (data_was_unread) { 1792 if (data_was_unread) {
1793 /* Unread data was tossed, zap the connection. */ 1793 /* Unread data was tossed, zap the connection. */
1794 NET_INC_STATS_USER(LINUX_MIB_TCPABORTONCLOSE); 1794 NET_INC_STATS_USER(sock_net(sk), LINUX_MIB_TCPABORTONCLOSE);
1795 tcp_set_state(sk, TCP_CLOSE); 1795 tcp_set_state(sk, TCP_CLOSE);
1796 tcp_send_active_reset(sk, GFP_KERNEL); 1796 tcp_send_active_reset(sk, GFP_KERNEL);
1797 } else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) { 1797 } else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
1798 /* Check zero linger _after_ checking for unread data. */ 1798 /* Check zero linger _after_ checking for unread data. */
1799 sk->sk_prot->disconnect(sk, 0); 1799 sk->sk_prot->disconnect(sk, 0);
1800 NET_INC_STATS_USER(LINUX_MIB_TCPABORTONDATA); 1800 NET_INC_STATS_USER(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
1801 } else if (tcp_close_state(sk)) { 1801 } else if (tcp_close_state(sk)) {
1802 /* We FIN if the application ate all the data before 1802 /* We FIN if the application ate all the data before
1803 * zapping the connection. 1803 * zapping the connection.
@@ -1869,7 +1869,8 @@ adjudge_to_death:
1869 if (tp->linger2 < 0) { 1869 if (tp->linger2 < 0) {
1870 tcp_set_state(sk, TCP_CLOSE); 1870 tcp_set_state(sk, TCP_CLOSE);
1871 tcp_send_active_reset(sk, GFP_ATOMIC); 1871 tcp_send_active_reset(sk, GFP_ATOMIC);
1872 NET_INC_STATS_BH(LINUX_MIB_TCPABORTONLINGER); 1872 NET_INC_STATS_BH(sock_net(sk),
1873 LINUX_MIB_TCPABORTONLINGER);
1873 } else { 1874 } else {
1874 const int tmo = tcp_fin_time(sk); 1875 const int tmo = tcp_fin_time(sk);
1875 1876
@@ -1891,7 +1892,8 @@ adjudge_to_death:
1891 "sockets\n"); 1892 "sockets\n");
1892 tcp_set_state(sk, TCP_CLOSE); 1893 tcp_set_state(sk, TCP_CLOSE);
1893 tcp_send_active_reset(sk, GFP_ATOMIC); 1894 tcp_send_active_reset(sk, GFP_ATOMIC);
1894 NET_INC_STATS_BH(LINUX_MIB_TCPABORTONMEMORY); 1895 NET_INC_STATS_BH(sock_net(sk),
1896 LINUX_MIB_TCPABORTONMEMORY);
1895 } 1897 }
1896 } 1898 }
1897 1899
@@ -2586,12 +2588,69 @@ void __tcp_put_md5sig_pool(void)
2586} 2588}
2587 2589
2588EXPORT_SYMBOL(__tcp_put_md5sig_pool); 2590EXPORT_SYMBOL(__tcp_put_md5sig_pool);
2591
2592int tcp_md5_hash_header(struct tcp_md5sig_pool *hp,
2593 struct tcphdr *th)
2594{
2595 struct scatterlist sg;
2596 int err;
2597
2598 __sum16 old_checksum = th->check;
2599 th->check = 0;
2600 /* options aren't included in the hash */
2601 sg_init_one(&sg, th, sizeof(struct tcphdr));
2602 err = crypto_hash_update(&hp->md5_desc, &sg, sizeof(struct tcphdr));
2603 th->check = old_checksum;
2604 return err;
2605}
2606
2607EXPORT_SYMBOL(tcp_md5_hash_header);
2608
2609int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp,
2610 struct sk_buff *skb, unsigned header_len)
2611{
2612 struct scatterlist sg;
2613 const struct tcphdr *tp = tcp_hdr(skb);
2614 struct hash_desc *desc = &hp->md5_desc;
2615 unsigned i;
2616 const unsigned head_data_len = skb_headlen(skb) > header_len ?
2617 skb_headlen(skb) - header_len : 0;
2618 const struct skb_shared_info *shi = skb_shinfo(skb);
2619
2620 sg_init_table(&sg, 1);
2621
2622 sg_set_buf(&sg, ((u8 *) tp) + header_len, head_data_len);
2623 if (crypto_hash_update(desc, &sg, head_data_len))
2624 return 1;
2625
2626 for (i = 0; i < shi->nr_frags; ++i) {
2627 const struct skb_frag_struct *f = &shi->frags[i];
2628 sg_set_page(&sg, f->page, f->size, f->page_offset);
2629 if (crypto_hash_update(desc, &sg, f->size))
2630 return 1;
2631 }
2632
2633 return 0;
2634}
2635
2636EXPORT_SYMBOL(tcp_md5_hash_skb_data);
2637
2638int tcp_md5_hash_key(struct tcp_md5sig_pool *hp, struct tcp_md5sig_key *key)
2639{
2640 struct scatterlist sg;
2641
2642 sg_init_one(&sg, key->key, key->keylen);
2643 return crypto_hash_update(&hp->md5_desc, &sg, key->keylen);
2644}
2645
2646EXPORT_SYMBOL(tcp_md5_hash_key);
2647
2589#endif 2648#endif
2590 2649
2591void tcp_done(struct sock *sk) 2650void tcp_done(struct sock *sk)
2592{ 2651{
2593 if(sk->sk_state == TCP_SYN_SENT || sk->sk_state == TCP_SYN_RECV) 2652 if(sk->sk_state == TCP_SYN_SENT || sk->sk_state == TCP_SYN_RECV)
2594 TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS); 2653 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_ATTEMPTFAILS);
2595 2654
2596 tcp_set_state(sk, TCP_CLOSE); 2655 tcp_set_state(sk, TCP_CLOSE);
2597 tcp_clear_xmit_timers(sk); 2656 tcp_clear_xmit_timers(sk);
@@ -2620,7 +2679,7 @@ __setup("thash_entries=", set_thash_entries);
2620void __init tcp_init(void) 2679void __init tcp_init(void)
2621{ 2680{
2622 struct sk_buff *skb = NULL; 2681 struct sk_buff *skb = NULL;
2623 unsigned long limit; 2682 unsigned long nr_pages, limit;
2624 int order, i, max_share; 2683 int order, i, max_share;
2625 2684
2626 BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > sizeof(skb->cb)); 2685 BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > sizeof(skb->cb));
@@ -2689,8 +2748,9 @@ void __init tcp_init(void)
2689 * is up to 1/2 at 256 MB, decreasing toward zero with the amount of 2748 * is up to 1/2 at 256 MB, decreasing toward zero with the amount of
2690 * memory, with a floor of 128 pages. 2749 * memory, with a floor of 128 pages.
2691 */ 2750 */
2692 limit = min(nr_all_pages, 1UL<<(28-PAGE_SHIFT)) >> (20-PAGE_SHIFT); 2751 nr_pages = totalram_pages - totalhigh_pages;
2693 limit = (limit * (nr_all_pages >> (20-PAGE_SHIFT))) >> (PAGE_SHIFT-11); 2752 limit = min(nr_pages, 1UL<<(28-PAGE_SHIFT)) >> (20-PAGE_SHIFT);
2753 limit = (limit * (nr_pages >> (20-PAGE_SHIFT))) >> (PAGE_SHIFT-11);
2694 limit = max(limit, 128UL); 2754 limit = max(limit, 128UL);
2695 sysctl_tcp_mem[0] = limit / 4 * 3; 2755 sysctl_tcp_mem[0] = limit / 4 * 3;
2696 sysctl_tcp_mem[1] = limit; 2756 sysctl_tcp_mem[1] = limit;
@@ -2727,4 +2787,3 @@ EXPORT_SYMBOL(tcp_splice_read);
2727EXPORT_SYMBOL(tcp_sendpage); 2787EXPORT_SYMBOL(tcp_sendpage);
2728EXPORT_SYMBOL(tcp_setsockopt); 2788EXPORT_SYMBOL(tcp_setsockopt);
2729EXPORT_SYMBOL(tcp_shutdown); 2789EXPORT_SYMBOL(tcp_shutdown);
2730EXPORT_SYMBOL(tcp_statistics);