aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorNeil Horman <nhorman@tuxdriver.com>2007-08-15 19:07:44 -0400
committerDavid S. Miller <davem@sunset.davemloft.net>2007-10-10 19:48:09 -0400
commit4d93df0abd50b9c9e2d4561439a1a1d21ec5e68f (patch)
tree47e5bde0c37ed7ce68032ffa9110f252533fc710 /net
parent13c99b248f06e0b71d925f162d8e3b0084886a21 (diff)
[SCTP]: Rewrite of sctp buffer management code
This patch introduces autotuning to the sctp buffer management code similar to the TCP. The buffer space can be grown if the advertised receive window still has room. This might happen if small message sizes are used, which is common in telecom environmens. New tunables are introduced that provide limits to buffer growth and memory pressure is entered if to much buffer spaces is used. Signed-off-by: Neil Horman <nhorman@tuxdriver.com> Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r--net/sctp/endpointola.c1
-rw-r--r--net/sctp/protocol.c32
-rw-r--r--net/sctp/sm_statefuns.c74
-rw-r--r--net/sctp/socket.c69
-rw-r--r--net/sctp/sysctl.c33
-rw-r--r--net/sctp/ulpevent.c18
-rw-r--r--net/sctp/ulpqueue.c1
7 files changed, 160 insertions, 68 deletions
diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c
index 8f485a0d14bd..22371185efb6 100644
--- a/net/sctp/endpointola.c
+++ b/net/sctp/endpointola.c
@@ -102,6 +102,7 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep,
102 102
103 /* Use SCTP specific send buffer space queues. */ 103 /* Use SCTP specific send buffer space queues. */
104 ep->sndbuf_policy = sctp_sndbuf_policy; 104 ep->sndbuf_policy = sctp_sndbuf_policy;
105
105 sk->sk_write_space = sctp_write_space; 106 sk->sk_write_space = sctp_write_space;
106 sock_set_flag(sk, SOCK_USE_WRITE_QUEUE); 107 sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
107 108
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 3d036cdfae41..957c118a6068 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -51,6 +51,7 @@
51#include <linux/netdevice.h> 51#include <linux/netdevice.h>
52#include <linux/inetdevice.h> 52#include <linux/inetdevice.h>
53#include <linux/seq_file.h> 53#include <linux/seq_file.h>
54#include <linux/bootmem.h>
54#include <net/protocol.h> 55#include <net/protocol.h>
55#include <net/ip.h> 56#include <net/ip.h>
56#include <net/ipv6.h> 57#include <net/ipv6.h>
@@ -82,6 +83,10 @@ static struct sctp_af *sctp_af_v6_specific;
82struct kmem_cache *sctp_chunk_cachep __read_mostly; 83struct kmem_cache *sctp_chunk_cachep __read_mostly;
83struct kmem_cache *sctp_bucket_cachep __read_mostly; 84struct kmem_cache *sctp_bucket_cachep __read_mostly;
84 85
86extern int sysctl_sctp_mem[3];
87extern int sysctl_sctp_rmem[3];
88extern int sysctl_sctp_wmem[3];
89
85/* Return the address of the control sock. */ 90/* Return the address of the control sock. */
86struct sock *sctp_get_ctl_sock(void) 91struct sock *sctp_get_ctl_sock(void)
87{ 92{
@@ -987,6 +992,8 @@ SCTP_STATIC __init int sctp_init(void)
987 int i; 992 int i;
988 int status = -EINVAL; 993 int status = -EINVAL;
989 unsigned long goal; 994 unsigned long goal;
995 unsigned long limit;
996 int max_share;
990 int order; 997 int order;
991 998
992 /* SCTP_DEBUG sanity check. */ 999 /* SCTP_DEBUG sanity check. */
@@ -1077,6 +1084,31 @@ SCTP_STATIC __init int sctp_init(void)
1077 /* Initialize handle used for association ids. */ 1084 /* Initialize handle used for association ids. */
1078 idr_init(&sctp_assocs_id); 1085 idr_init(&sctp_assocs_id);
1079 1086
1087 /* Set the pressure threshold to be a fraction of global memory that
1088 * is up to 1/2 at 256 MB, decreasing toward zero with the amount of
1089 * memory, with a floor of 128 pages.
1090 * Note this initalizes the data in sctpv6_prot too
1091 * Unabashedly stolen from tcp_init
1092 */
1093 limit = min(num_physpages, 1UL<<(28-PAGE_SHIFT)) >> (20-PAGE_SHIFT);
1094 limit = (limit * (num_physpages >> (20-PAGE_SHIFT))) >> (PAGE_SHIFT-11);
1095 limit = max(limit, 128UL);
1096 sysctl_sctp_mem[0] = limit / 4 * 3;
1097 sysctl_sctp_mem[1] = limit;
1098 sysctl_sctp_mem[2] = sysctl_sctp_mem[0] * 2;
1099
1100 /* Set per-socket limits to no more than 1/128 the pressure threshold*/
1101 limit = (sysctl_sctp_mem[1]) << (PAGE_SHIFT - 7);
1102 max_share = min(4UL*1024*1024, limit);
1103
1104 sysctl_sctp_rmem[0] = PAGE_SIZE; /* give each asoc 1 page min */
1105 sysctl_sctp_rmem[1] = (1500 *(sizeof(struct sk_buff) + 1));
1106 sysctl_sctp_rmem[2] = max(sysctl_sctp_rmem[1], max_share);
1107
1108 sysctl_sctp_wmem[0] = SK_STREAM_MEM_QUANTUM;
1109 sysctl_sctp_wmem[1] = 16*1024;
1110 sysctl_sctp_wmem[2] = max(64*1024, max_share);
1111
1080 /* Size and allocate the association hash table. 1112 /* Size and allocate the association hash table.
1081 * The methodology is similar to that of the tcp hash tables. 1113 * The methodology is similar to that of the tcp hash tables.
1082 */ 1114 */
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index a583d67cab63..ec0328b1cdb1 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -5428,10 +5428,8 @@ static int sctp_eat_data(const struct sctp_association *asoc,
5428 sctp_verb_t deliver; 5428 sctp_verb_t deliver;
5429 int tmp; 5429 int tmp;
5430 __u32 tsn; 5430 __u32 tsn;
5431 int account_value;
5432 struct sctp_tsnmap *map = (struct sctp_tsnmap *)&asoc->peer.tsn_map; 5431 struct sctp_tsnmap *map = (struct sctp_tsnmap *)&asoc->peer.tsn_map;
5433 struct sock *sk = asoc->base.sk; 5432 struct sock *sk = asoc->base.sk;
5434 int rcvbuf_over = 0;
5435 5433
5436 data_hdr = chunk->subh.data_hdr = (sctp_datahdr_t *)chunk->skb->data; 5434 data_hdr = chunk->subh.data_hdr = (sctp_datahdr_t *)chunk->skb->data;
5437 skb_pull(chunk->skb, sizeof(sctp_datahdr_t)); 5435 skb_pull(chunk->skb, sizeof(sctp_datahdr_t));
@@ -5441,48 +5439,6 @@ static int sctp_eat_data(const struct sctp_association *asoc,
5441 5439
5442 /* ASSERT: Now skb->data is really the user data. */ 5440 /* ASSERT: Now skb->data is really the user data. */
5443 5441
5444 /*
5445 * If we are established, and we have used up our receive buffer
5446 * memory, think about droping the frame.
5447 * Note that we have an opportunity to improve performance here.
5448 * If we accept one chunk from an skbuff, we have to keep all the
5449 * memory of that skbuff around until the chunk is read into user
5450 * space. Therefore, once we accept 1 chunk we may as well accept all
5451 * remaining chunks in the skbuff. The data_accepted flag helps us do
5452 * that.
5453 */
5454 if ((asoc->state == SCTP_STATE_ESTABLISHED) && (!chunk->data_accepted)) {
5455 /*
5456 * If the receive buffer policy is 1, then each
5457 * association can allocate up to sk_rcvbuf bytes
5458 * otherwise, all the associations in aggregate
5459 * may allocate up to sk_rcvbuf bytes
5460 */
5461 if (asoc->ep->rcvbuf_policy)
5462 account_value = atomic_read(&asoc->rmem_alloc);
5463 else
5464 account_value = atomic_read(&sk->sk_rmem_alloc);
5465 if (account_value > sk->sk_rcvbuf) {
5466 /*
5467 * We need to make forward progress, even when we are
5468 * under memory pressure, so we always allow the
5469 * next tsn after the ctsn ack point to be accepted.
5470 * This lets us avoid deadlocks in which we have to
5471 * drop frames that would otherwise let us drain the
5472 * receive queue.
5473 */
5474 if ((sctp_tsnmap_get_ctsn(map) + 1) != tsn)
5475 return SCTP_IERROR_IGNORE_TSN;
5476
5477 /*
5478 * We're going to accept the frame but we should renege
5479 * to make space for it. This will send us down that
5480 * path later in this function.
5481 */
5482 rcvbuf_over = 1;
5483 }
5484 }
5485
5486 /* Process ECN based congestion. 5442 /* Process ECN based congestion.
5487 * 5443 *
5488 * Since the chunk structure is reused for all chunks within 5444 * Since the chunk structure is reused for all chunks within
@@ -5542,18 +5498,9 @@ static int sctp_eat_data(const struct sctp_association *asoc,
5542 * seems a bit troublesome in that frag_point varies based on 5498 * seems a bit troublesome in that frag_point varies based on
5543 * PMTU. In cases, such as loopback, this might be a rather 5499 * PMTU. In cases, such as loopback, this might be a rather
5544 * large spill over. 5500 * large spill over.
5545 * NOTE: If we have a full receive buffer here, we only renege if 5501 */
5546 * our receiver can still make progress without the tsn being 5502 if ((!chunk->data_accepted) && (!asoc->rwnd || asoc->rwnd_over ||
5547 * received. We do this because in the event that the associations 5503 (datalen > asoc->rwnd + asoc->frag_point))) {
5548 * receive queue is empty we are filling a leading gap, and since
5549 * reneging moves the gap to the end of the tsn stream, we are likely
5550 * to stall again very shortly. Avoiding the renege when we fill a
5551 * leading gap is a good heuristic for avoiding such steady state
5552 * stalls.
5553 */
5554 if (!asoc->rwnd || asoc->rwnd_over ||
5555 (datalen > asoc->rwnd + asoc->frag_point) ||
5556 (rcvbuf_over && (!skb_queue_len(&sk->sk_receive_queue)))) {
5557 5504
5558 /* If this is the next TSN, consider reneging to make 5505 /* If this is the next TSN, consider reneging to make
5559 * room. Note: Playing nice with a confused sender. A 5506 * room. Note: Playing nice with a confused sender. A
@@ -5574,6 +5521,21 @@ static int sctp_eat_data(const struct sctp_association *asoc,
5574 } 5521 }
5575 5522
5576 /* 5523 /*
5524 * Also try to renege to limit our memory usage in the event that
5525 * we are under memory pressure
5526 * If we can't renege, don't worry about it, the sk_stream_rmem_schedule
5527 * in sctp_ulpevent_make_rcvmsg will drop the frame if we grow our
5528 * memory usage too much
5529 */
5530 if (*sk->sk_prot_creator->memory_pressure) {
5531 if (sctp_tsnmap_has_gap(map) &&
5532 (sctp_tsnmap_get_ctsn(map) + 1) == tsn) {
5533 SCTP_DEBUG_PRINTK("Under Pressure! Reneging for tsn:%u\n", tsn);
5534 deliver = SCTP_CMD_RENEGE;
5535 }
5536 }
5537
5538 /*
5577 * Section 3.3.10.9 No User Data (9) 5539 * Section 3.3.10.9 No User Data (9)
5578 * 5540 *
5579 * Cause of error 5541 * Cause of error
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 772fbfb4bfda..b9952425c79a 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -107,23 +107,42 @@ static void sctp_sock_migrate(struct sock *, struct sock *,
107 struct sctp_association *, sctp_socket_type_t); 107 struct sctp_association *, sctp_socket_type_t);
108static char *sctp_hmac_alg = SCTP_COOKIE_HMAC_ALG; 108static char *sctp_hmac_alg = SCTP_COOKIE_HMAC_ALG;
109 109
110extern struct kmem_cache *sctp_bucket_cachep;
111extern int sysctl_sctp_mem[3];
112extern int sysctl_sctp_rmem[3];
113extern int sysctl_sctp_wmem[3];
114
115int sctp_memory_pressure;
116atomic_t sctp_memory_allocated;
117atomic_t sctp_sockets_allocated;
118
119static void sctp_enter_memory_pressure(void)
120{
121 sctp_memory_pressure = 1;
122}
123
124
110/* Get the sndbuf space available at the time on the association. */ 125/* Get the sndbuf space available at the time on the association. */
111static inline int sctp_wspace(struct sctp_association *asoc) 126static inline int sctp_wspace(struct sctp_association *asoc)
112{ 127{
113 struct sock *sk = asoc->base.sk; 128 int amt;
114 int amt = 0;
115 129
116 if (asoc->ep->sndbuf_policy) { 130 if (asoc->ep->sndbuf_policy)
117 /* make sure that no association uses more than sk_sndbuf */ 131 amt = asoc->sndbuf_used;
118 amt = sk->sk_sndbuf - asoc->sndbuf_used; 132 else
133 amt = atomic_read(&asoc->base.sk->sk_wmem_alloc);
134
135 if (amt >= asoc->base.sk->sk_sndbuf) {
136 if (asoc->base.sk->sk_userlocks & SOCK_SNDBUF_LOCK)
137 amt = 0;
138 else {
139 amt = sk_stream_wspace(asoc->base.sk);
140 if (amt < 0)
141 amt = 0;
142 }
119 } else { 143 } else {
120 /* do socket level accounting */ 144 amt = asoc->base.sk->sk_sndbuf - amt;
121 amt = sk->sk_sndbuf - atomic_read(&sk->sk_wmem_alloc);
122 } 145 }
123
124 if (amt < 0)
125 amt = 0;
126
127 return amt; 146 return amt;
128} 147}
129 148
@@ -155,6 +174,7 @@ static inline void sctp_set_owner_w(struct sctp_chunk *chunk)
155 sizeof(struct sctp_chunk); 174 sizeof(struct sctp_chunk);
156 175
157 atomic_add(sizeof(struct sctp_chunk), &sk->sk_wmem_alloc); 176 atomic_add(sizeof(struct sctp_chunk), &sk->sk_wmem_alloc);
177 sk_charge_skb(sk, chunk->skb);
158} 178}
159 179
160/* Verify that this is a valid address. */ 180/* Verify that this is a valid address. */
@@ -3293,6 +3313,7 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk)
3293 sp->hmac = NULL; 3313 sp->hmac = NULL;
3294 3314
3295 SCTP_DBG_OBJCNT_INC(sock); 3315 SCTP_DBG_OBJCNT_INC(sock);
3316 atomic_inc(&sctp_sockets_allocated);
3296 return 0; 3317 return 0;
3297} 3318}
3298 3319
@@ -3306,7 +3327,7 @@ SCTP_STATIC int sctp_destroy_sock(struct sock *sk)
3306 /* Release our hold on the endpoint. */ 3327 /* Release our hold on the endpoint. */
3307 ep = sctp_sk(sk)->ep; 3328 ep = sctp_sk(sk)->ep;
3308 sctp_endpoint_free(ep); 3329 sctp_endpoint_free(ep);
3309 3330 atomic_dec(&sctp_sockets_allocated);
3310 return 0; 3331 return 0;
3311} 3332}
3312 3333
@@ -5720,6 +5741,12 @@ static void sctp_wfree(struct sk_buff *skb)
5720 5741
5721 atomic_sub(sizeof(struct sctp_chunk), &sk->sk_wmem_alloc); 5742 atomic_sub(sizeof(struct sctp_chunk), &sk->sk_wmem_alloc);
5722 5743
5744 /*
5745 * This undoes what is done via sk_charge_skb
5746 */
5747 sk->sk_wmem_queued -= skb->truesize;
5748 sk->sk_forward_alloc += skb->truesize;
5749
5723 sock_wfree(skb); 5750 sock_wfree(skb);
5724 __sctp_write_space(asoc); 5751 __sctp_write_space(asoc);
5725 5752
@@ -5737,6 +5764,11 @@ void sctp_sock_rfree(struct sk_buff *skb)
5737 struct sctp_ulpevent *event = sctp_skb2event(skb); 5764 struct sctp_ulpevent *event = sctp_skb2event(skb);
5738 5765
5739 atomic_sub(event->rmem_len, &sk->sk_rmem_alloc); 5766 atomic_sub(event->rmem_len, &sk->sk_rmem_alloc);
5767
5768 /*
5769 * Mimic the behavior of sk_stream_rfree
5770 */
5771 sk->sk_forward_alloc += event->rmem_len;
5740} 5772}
5741 5773
5742 5774
@@ -6126,6 +6158,7 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk,
6126 sctp_release_sock(newsk); 6158 sctp_release_sock(newsk);
6127} 6159}
6128 6160
6161
6129/* This proto struct describes the ULP interface for SCTP. */ 6162/* This proto struct describes the ULP interface for SCTP. */
6130struct proto sctp_prot = { 6163struct proto sctp_prot = {
6131 .name = "SCTP", 6164 .name = "SCTP",
@@ -6148,6 +6181,12 @@ struct proto sctp_prot = {
6148 .unhash = sctp_unhash, 6181 .unhash = sctp_unhash,
6149 .get_port = sctp_get_port, 6182 .get_port = sctp_get_port,
6150 .obj_size = sizeof(struct sctp_sock), 6183 .obj_size = sizeof(struct sctp_sock),
6184 .sysctl_mem = sysctl_sctp_mem,
6185 .sysctl_rmem = sysctl_sctp_rmem,
6186 .sysctl_wmem = sysctl_sctp_wmem,
6187 .memory_pressure = &sctp_memory_pressure,
6188 .enter_memory_pressure = sctp_enter_memory_pressure,
6189 .memory_allocated = &sctp_memory_allocated,
6151}; 6190};
6152 6191
6153#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) 6192#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
@@ -6172,5 +6211,11 @@ struct proto sctpv6_prot = {
6172 .unhash = sctp_unhash, 6211 .unhash = sctp_unhash,
6173 .get_port = sctp_get_port, 6212 .get_port = sctp_get_port,
6174 .obj_size = sizeof(struct sctp6_sock), 6213 .obj_size = sizeof(struct sctp6_sock),
6214 .sysctl_mem = sysctl_sctp_mem,
6215 .sysctl_rmem = sysctl_sctp_rmem,
6216 .sysctl_wmem = sysctl_sctp_wmem,
6217 .memory_pressure = &sctp_memory_pressure,
6218 .enter_memory_pressure = sctp_enter_memory_pressure,
6219 .memory_allocated = &sctp_memory_allocated,
6175}; 6220};
6176#endif /* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */ 6221#endif /* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */
diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c
index e2c679baf912..ba75ef4669e3 100644
--- a/net/sctp/sysctl.c
+++ b/net/sctp/sysctl.c
@@ -52,6 +52,15 @@ static int int_max = INT_MAX;
52static long sack_timer_min = 1; 52static long sack_timer_min = 1;
53static long sack_timer_max = 500; 53static long sack_timer_max = 500;
54 54
55int sysctl_sctp_mem[3];
56int sysctl_sctp_rmem[3];
57int sysctl_sctp_wmem[3];
58
59/*
60 * per assoc memory limitationf for sends
61 */
62int sysctl_sctp_wmem[3];
63
55static ctl_table sctp_table[] = { 64static ctl_table sctp_table[] = {
56 { 65 {
57 .ctl_name = NET_SCTP_RTO_INITIAL, 66 .ctl_name = NET_SCTP_RTO_INITIAL,
@@ -226,6 +235,30 @@ static ctl_table sctp_table[] = {
226 .extra1 = &sack_timer_min, 235 .extra1 = &sack_timer_min,
227 .extra2 = &sack_timer_max, 236 .extra2 = &sack_timer_max,
228 }, 237 },
238 {
239 .ctl_name = CTL_UNNUMBERED,
240 .procname = "sctp_mem",
241 .data = &sysctl_sctp_mem,
242 .maxlen = sizeof(sysctl_sctp_mem),
243 .mode = 0644,
244 .proc_handler = &proc_dointvec,
245 },
246 {
247 .ctl_name = CTL_UNNUMBERED,
248 .procname = "sctp_rmem",
249 .data = &sysctl_sctp_rmem,
250 .maxlen = sizeof(sysctl_sctp_rmem),
251 .mode = 0644,
252 .proc_handler = &proc_dointvec,
253 },
254 {
255 .ctl_name = CTL_UNNUMBERED,
256 .procname = "sctp_wmem",
257 .data = &sysctl_sctp_wmem,
258 .maxlen = sizeof(sysctl_sctp_wmem),
259 .mode = 0644,
260 .proc_handler = &proc_dointvec,
261 },
229 { .ctl_name = 0 } 262 { .ctl_name = 0 }
230}; 263};
231 264
diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c
index bfecb353ab3d..5dc094b9732d 100644
--- a/net/sctp/ulpevent.c
+++ b/net/sctp/ulpevent.c
@@ -685,6 +685,24 @@ struct sctp_ulpevent *sctp_ulpevent_make_rcvmsg(struct sctp_association *asoc,
685 struct sctp_ulpevent *event = NULL; 685 struct sctp_ulpevent *event = NULL;
686 struct sk_buff *skb; 686 struct sk_buff *skb;
687 size_t padding, len; 687 size_t padding, len;
688 int rx_count;
689
690 /*
691 * check to see if we need to make space for this
692 * new skb, expand the rcvbuffer if needed, or drop
693 * the frame
694 */
695 if (asoc->ep->rcvbuf_policy)
696 rx_count = atomic_read(&asoc->rmem_alloc);
697 else
698 rx_count = atomic_read(&asoc->base.sk->sk_rmem_alloc);
699
700 if (rx_count >= asoc->base.sk->sk_rcvbuf) {
701
702 if ((asoc->base.sk->sk_userlocks & SOCK_RCVBUF_LOCK) ||
703 (!sk_stream_rmem_schedule(asoc->base.sk, chunk->skb)))
704 goto fail;
705 }
688 706
689 /* Clone the original skb, sharing the data. */ 707 /* Clone the original skb, sharing the data. */
690 skb = skb_clone(chunk->skb, gfp); 708 skb = skb_clone(chunk->skb, gfp);
diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c
index fa0ba2a5564e..b9370956b187 100644
--- a/net/sctp/ulpqueue.c
+++ b/net/sctp/ulpqueue.c
@@ -1027,6 +1027,7 @@ void sctp_ulpq_renege(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk,
1027 sctp_ulpq_partial_delivery(ulpq, chunk, gfp); 1027 sctp_ulpq_partial_delivery(ulpq, chunk, gfp);
1028 } 1028 }
1029 1029
1030 sk_stream_mem_reclaim(asoc->base.sk);
1030 return; 1031 return;
1031} 1032}
1032 1033