aboutsummaryrefslogtreecommitdiffstats
path: root/net/sctp
diff options
context:
space:
mode:
Diffstat (limited to 'net/sctp')
-rw-r--r--net/sctp/endpointola.c1
-rw-r--r--net/sctp/protocol.c32
-rw-r--r--net/sctp/sm_statefuns.c74
-rw-r--r--net/sctp/socket.c69
-rw-r--r--net/sctp/sysctl.c33
-rw-r--r--net/sctp/ulpevent.c18
-rw-r--r--net/sctp/ulpqueue.c1
7 files changed, 160 insertions, 68 deletions
diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c
index 8f485a0d14b..22371185efb 100644
--- a/net/sctp/endpointola.c
+++ b/net/sctp/endpointola.c
@@ -102,6 +102,7 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep,
102 102
103 /* Use SCTP specific send buffer space queues. */ 103 /* Use SCTP specific send buffer space queues. */
104 ep->sndbuf_policy = sctp_sndbuf_policy; 104 ep->sndbuf_policy = sctp_sndbuf_policy;
105
105 sk->sk_write_space = sctp_write_space; 106 sk->sk_write_space = sctp_write_space;
106 sock_set_flag(sk, SOCK_USE_WRITE_QUEUE); 107 sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
107 108
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 3d036cdfae4..957c118a606 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -51,6 +51,7 @@
51#include <linux/netdevice.h> 51#include <linux/netdevice.h>
52#include <linux/inetdevice.h> 52#include <linux/inetdevice.h>
53#include <linux/seq_file.h> 53#include <linux/seq_file.h>
54#include <linux/bootmem.h>
54#include <net/protocol.h> 55#include <net/protocol.h>
55#include <net/ip.h> 56#include <net/ip.h>
56#include <net/ipv6.h> 57#include <net/ipv6.h>
@@ -82,6 +83,10 @@ static struct sctp_af *sctp_af_v6_specific;
82struct kmem_cache *sctp_chunk_cachep __read_mostly; 83struct kmem_cache *sctp_chunk_cachep __read_mostly;
83struct kmem_cache *sctp_bucket_cachep __read_mostly; 84struct kmem_cache *sctp_bucket_cachep __read_mostly;
84 85
86extern int sysctl_sctp_mem[3];
87extern int sysctl_sctp_rmem[3];
88extern int sysctl_sctp_wmem[3];
89
85/* Return the address of the control sock. */ 90/* Return the address of the control sock. */
86struct sock *sctp_get_ctl_sock(void) 91struct sock *sctp_get_ctl_sock(void)
87{ 92{
@@ -987,6 +992,8 @@ SCTP_STATIC __init int sctp_init(void)
987 int i; 992 int i;
988 int status = -EINVAL; 993 int status = -EINVAL;
989 unsigned long goal; 994 unsigned long goal;
995 unsigned long limit;
996 int max_share;
990 int order; 997 int order;
991 998
992 /* SCTP_DEBUG sanity check. */ 999 /* SCTP_DEBUG sanity check. */
@@ -1077,6 +1084,31 @@ SCTP_STATIC __init int sctp_init(void)
1077 /* Initialize handle used for association ids. */ 1084 /* Initialize handle used for association ids. */
1078 idr_init(&sctp_assocs_id); 1085 idr_init(&sctp_assocs_id);
1079 1086
1087 /* Set the pressure threshold to be a fraction of global memory that
1088 * is up to 1/2 at 256 MB, decreasing toward zero with the amount of
1089 * memory, with a floor of 128 pages.
1090 * Note this initalizes the data in sctpv6_prot too
1091 * Unabashedly stolen from tcp_init
1092 */
1093 limit = min(num_physpages, 1UL<<(28-PAGE_SHIFT)) >> (20-PAGE_SHIFT);
1094 limit = (limit * (num_physpages >> (20-PAGE_SHIFT))) >> (PAGE_SHIFT-11);
1095 limit = max(limit, 128UL);
1096 sysctl_sctp_mem[0] = limit / 4 * 3;
1097 sysctl_sctp_mem[1] = limit;
1098 sysctl_sctp_mem[2] = sysctl_sctp_mem[0] * 2;
1099
1100 /* Set per-socket limits to no more than 1/128 the pressure threshold*/
1101 limit = (sysctl_sctp_mem[1]) << (PAGE_SHIFT - 7);
1102 max_share = min(4UL*1024*1024, limit);
1103
1104 sysctl_sctp_rmem[0] = PAGE_SIZE; /* give each asoc 1 page min */
1105 sysctl_sctp_rmem[1] = (1500 *(sizeof(struct sk_buff) + 1));
1106 sysctl_sctp_rmem[2] = max(sysctl_sctp_rmem[1], max_share);
1107
1108 sysctl_sctp_wmem[0] = SK_STREAM_MEM_QUANTUM;
1109 sysctl_sctp_wmem[1] = 16*1024;
1110 sysctl_sctp_wmem[2] = max(64*1024, max_share);
1111
1080 /* Size and allocate the association hash table. 1112 /* Size and allocate the association hash table.
1081 * The methodology is similar to that of the tcp hash tables. 1113 * The methodology is similar to that of the tcp hash tables.
1082 */ 1114 */
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index a583d67cab6..ec0328b1cdb 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -5428,10 +5428,8 @@ static int sctp_eat_data(const struct sctp_association *asoc,
5428 sctp_verb_t deliver; 5428 sctp_verb_t deliver;
5429 int tmp; 5429 int tmp;
5430 __u32 tsn; 5430 __u32 tsn;
5431 int account_value;
5432 struct sctp_tsnmap *map = (struct sctp_tsnmap *)&asoc->peer.tsn_map; 5431 struct sctp_tsnmap *map = (struct sctp_tsnmap *)&asoc->peer.tsn_map;
5433 struct sock *sk = asoc->base.sk; 5432 struct sock *sk = asoc->base.sk;
5434 int rcvbuf_over = 0;
5435 5433
5436 data_hdr = chunk->subh.data_hdr = (sctp_datahdr_t *)chunk->skb->data; 5434 data_hdr = chunk->subh.data_hdr = (sctp_datahdr_t *)chunk->skb->data;
5437 skb_pull(chunk->skb, sizeof(sctp_datahdr_t)); 5435 skb_pull(chunk->skb, sizeof(sctp_datahdr_t));
@@ -5441,48 +5439,6 @@ static int sctp_eat_data(const struct sctp_association *asoc,
5441 5439
5442 /* ASSERT: Now skb->data is really the user data. */ 5440 /* ASSERT: Now skb->data is really the user data. */
5443 5441
5444 /*
5445 * If we are established, and we have used up our receive buffer
5446 * memory, think about droping the frame.
5447 * Note that we have an opportunity to improve performance here.
5448 * If we accept one chunk from an skbuff, we have to keep all the
5449 * memory of that skbuff around until the chunk is read into user
5450 * space. Therefore, once we accept 1 chunk we may as well accept all
5451 * remaining chunks in the skbuff. The data_accepted flag helps us do
5452 * that.
5453 */
5454 if ((asoc->state == SCTP_STATE_ESTABLISHED) && (!chunk->data_accepted)) {
5455 /*
5456 * If the receive buffer policy is 1, then each
5457 * association can allocate up to sk_rcvbuf bytes
5458 * otherwise, all the associations in aggregate
5459 * may allocate up to sk_rcvbuf bytes
5460 */
5461 if (asoc->ep->rcvbuf_policy)
5462 account_value = atomic_read(&asoc->rmem_alloc);
5463 else
5464 account_value = atomic_read(&sk->sk_rmem_alloc);
5465 if (account_value > sk->sk_rcvbuf) {
5466 /*
5467 * We need to make forward progress, even when we are
5468 * under memory pressure, so we always allow the
5469 * next tsn after the ctsn ack point to be accepted.
5470 * This lets us avoid deadlocks in which we have to
5471 * drop frames that would otherwise let us drain the
5472 * receive queue.
5473 */
5474 if ((sctp_tsnmap_get_ctsn(map) + 1) != tsn)
5475 return SCTP_IERROR_IGNORE_TSN;
5476
5477 /*
5478 * We're going to accept the frame but we should renege
5479 * to make space for it. This will send us down that
5480 * path later in this function.
5481 */
5482 rcvbuf_over = 1;
5483 }
5484 }
5485
5486 /* Process ECN based congestion. 5442 /* Process ECN based congestion.
5487 * 5443 *
5488 * Since the chunk structure is reused for all chunks within 5444 * Since the chunk structure is reused for all chunks within
@@ -5542,18 +5498,9 @@ static int sctp_eat_data(const struct sctp_association *asoc,
5542 * seems a bit troublesome in that frag_point varies based on 5498 * seems a bit troublesome in that frag_point varies based on
5543 * PMTU. In cases, such as loopback, this might be a rather 5499 * PMTU. In cases, such as loopback, this might be a rather
5544 * large spill over. 5500 * large spill over.
5545 * NOTE: If we have a full receive buffer here, we only renege if 5501 */
5546 * our receiver can still make progress without the tsn being 5502 if ((!chunk->data_accepted) && (!asoc->rwnd || asoc->rwnd_over ||
5547 * received. We do this because in the event that the associations 5503 (datalen > asoc->rwnd + asoc->frag_point))) {
5548 * receive queue is empty we are filling a leading gap, and since
5549 * reneging moves the gap to the end of the tsn stream, we are likely
5550 * to stall again very shortly. Avoiding the renege when we fill a
5551 * leading gap is a good heuristic for avoiding such steady state
5552 * stalls.
5553 */
5554 if (!asoc->rwnd || asoc->rwnd_over ||
5555 (datalen > asoc->rwnd + asoc->frag_point) ||
5556 (rcvbuf_over && (!skb_queue_len(&sk->sk_receive_queue)))) {
5557 5504
5558 /* If this is the next TSN, consider reneging to make 5505 /* If this is the next TSN, consider reneging to make
5559 * room. Note: Playing nice with a confused sender. A 5506 * room. Note: Playing nice with a confused sender. A
@@ -5574,6 +5521,21 @@ static int sctp_eat_data(const struct sctp_association *asoc,
5574 } 5521 }
5575 5522
5576 /* 5523 /*
5524 * Also try to renege to limit our memory usage in the event that
5525 * we are under memory pressure
5526 * If we can't renege, don't worry about it, the sk_stream_rmem_schedule
5527 * in sctp_ulpevent_make_rcvmsg will drop the frame if we grow our
5528 * memory usage too much
5529 */
5530 if (*sk->sk_prot_creator->memory_pressure) {
5531 if (sctp_tsnmap_has_gap(map) &&
5532 (sctp_tsnmap_get_ctsn(map) + 1) == tsn) {
5533 SCTP_DEBUG_PRINTK("Under Pressure! Reneging for tsn:%u\n", tsn);
5534 deliver = SCTP_CMD_RENEGE;
5535 }
5536 }
5537
5538 /*
5577 * Section 3.3.10.9 No User Data (9) 5539 * Section 3.3.10.9 No User Data (9)
5578 * 5540 *
5579 * Cause of error 5541 * Cause of error
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 772fbfb4bfd..b9952425c79 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -107,23 +107,42 @@ static void sctp_sock_migrate(struct sock *, struct sock *,
107 struct sctp_association *, sctp_socket_type_t); 107 struct sctp_association *, sctp_socket_type_t);
108static char *sctp_hmac_alg = SCTP_COOKIE_HMAC_ALG; 108static char *sctp_hmac_alg = SCTP_COOKIE_HMAC_ALG;
109 109
110extern struct kmem_cache *sctp_bucket_cachep;
111extern int sysctl_sctp_mem[3];
112extern int sysctl_sctp_rmem[3];
113extern int sysctl_sctp_wmem[3];
114
115int sctp_memory_pressure;
116atomic_t sctp_memory_allocated;
117atomic_t sctp_sockets_allocated;
118
119static void sctp_enter_memory_pressure(void)
120{
121 sctp_memory_pressure = 1;
122}
123
124
110/* Get the sndbuf space available at the time on the association. */ 125/* Get the sndbuf space available at the time on the association. */
111static inline int sctp_wspace(struct sctp_association *asoc) 126static inline int sctp_wspace(struct sctp_association *asoc)
112{ 127{
113 struct sock *sk = asoc->base.sk; 128 int amt;
114 int amt = 0;
115 129
116 if (asoc->ep->sndbuf_policy) { 130 if (asoc->ep->sndbuf_policy)
117 /* make sure that no association uses more than sk_sndbuf */ 131 amt = asoc->sndbuf_used;
118 amt = sk->sk_sndbuf - asoc->sndbuf_used; 132 else
133 amt = atomic_read(&asoc->base.sk->sk_wmem_alloc);
134
135 if (amt >= asoc->base.sk->sk_sndbuf) {
136 if (asoc->base.sk->sk_userlocks & SOCK_SNDBUF_LOCK)
137 amt = 0;
138 else {
139 amt = sk_stream_wspace(asoc->base.sk);
140 if (amt < 0)
141 amt = 0;
142 }
119 } else { 143 } else {
120 /* do socket level accounting */ 144 amt = asoc->base.sk->sk_sndbuf - amt;
121 amt = sk->sk_sndbuf - atomic_read(&sk->sk_wmem_alloc);
122 } 145 }
123
124 if (amt < 0)
125 amt = 0;
126
127 return amt; 146 return amt;
128} 147}
129 148
@@ -155,6 +174,7 @@ static inline void sctp_set_owner_w(struct sctp_chunk *chunk)
155 sizeof(struct sctp_chunk); 174 sizeof(struct sctp_chunk);
156 175
157 atomic_add(sizeof(struct sctp_chunk), &sk->sk_wmem_alloc); 176 atomic_add(sizeof(struct sctp_chunk), &sk->sk_wmem_alloc);
177 sk_charge_skb(sk, chunk->skb);
158} 178}
159 179
160/* Verify that this is a valid address. */ 180/* Verify that this is a valid address. */
@@ -3293,6 +3313,7 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk)
3293 sp->hmac = NULL; 3313 sp->hmac = NULL;
3294 3314
3295 SCTP_DBG_OBJCNT_INC(sock); 3315 SCTP_DBG_OBJCNT_INC(sock);
3316 atomic_inc(&sctp_sockets_allocated);
3296 return 0; 3317 return 0;
3297} 3318}
3298 3319
@@ -3306,7 +3327,7 @@ SCTP_STATIC int sctp_destroy_sock(struct sock *sk)
3306 /* Release our hold on the endpoint. */ 3327 /* Release our hold on the endpoint. */
3307 ep = sctp_sk(sk)->ep; 3328 ep = sctp_sk(sk)->ep;
3308 sctp_endpoint_free(ep); 3329 sctp_endpoint_free(ep);
3309 3330 atomic_dec(&sctp_sockets_allocated);
3310 return 0; 3331 return 0;
3311} 3332}
3312 3333
@@ -5720,6 +5741,12 @@ static void sctp_wfree(struct sk_buff *skb)
5720 5741
5721 atomic_sub(sizeof(struct sctp_chunk), &sk->sk_wmem_alloc); 5742 atomic_sub(sizeof(struct sctp_chunk), &sk->sk_wmem_alloc);
5722 5743
5744 /*
5745 * This undoes what is done via sk_charge_skb
5746 */
5747 sk->sk_wmem_queued -= skb->truesize;
5748 sk->sk_forward_alloc += skb->truesize;
5749
5723 sock_wfree(skb); 5750 sock_wfree(skb);
5724 __sctp_write_space(asoc); 5751 __sctp_write_space(asoc);
5725 5752
@@ -5737,6 +5764,11 @@ void sctp_sock_rfree(struct sk_buff *skb)
5737 struct sctp_ulpevent *event = sctp_skb2event(skb); 5764 struct sctp_ulpevent *event = sctp_skb2event(skb);
5738 5765
5739 atomic_sub(event->rmem_len, &sk->sk_rmem_alloc); 5766 atomic_sub(event->rmem_len, &sk->sk_rmem_alloc);
5767
5768 /*
5769 * Mimic the behavior of sk_stream_rfree
5770 */
5771 sk->sk_forward_alloc += event->rmem_len;
5740} 5772}
5741 5773
5742 5774
@@ -6126,6 +6158,7 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk,
6126 sctp_release_sock(newsk); 6158 sctp_release_sock(newsk);
6127} 6159}
6128 6160
6161
6129/* This proto struct describes the ULP interface for SCTP. */ 6162/* This proto struct describes the ULP interface for SCTP. */
6130struct proto sctp_prot = { 6163struct proto sctp_prot = {
6131 .name = "SCTP", 6164 .name = "SCTP",
@@ -6148,6 +6181,12 @@ struct proto sctp_prot = {
6148 .unhash = sctp_unhash, 6181 .unhash = sctp_unhash,
6149 .get_port = sctp_get_port, 6182 .get_port = sctp_get_port,
6150 .obj_size = sizeof(struct sctp_sock), 6183 .obj_size = sizeof(struct sctp_sock),
6184 .sysctl_mem = sysctl_sctp_mem,
6185 .sysctl_rmem = sysctl_sctp_rmem,
6186 .sysctl_wmem = sysctl_sctp_wmem,
6187 .memory_pressure = &sctp_memory_pressure,
6188 .enter_memory_pressure = sctp_enter_memory_pressure,
6189 .memory_allocated = &sctp_memory_allocated,
6151}; 6190};
6152 6191
6153#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) 6192#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
@@ -6172,5 +6211,11 @@ struct proto sctpv6_prot = {
6172 .unhash = sctp_unhash, 6211 .unhash = sctp_unhash,
6173 .get_port = sctp_get_port, 6212 .get_port = sctp_get_port,
6174 .obj_size = sizeof(struct sctp6_sock), 6213 .obj_size = sizeof(struct sctp6_sock),
6214 .sysctl_mem = sysctl_sctp_mem,
6215 .sysctl_rmem = sysctl_sctp_rmem,
6216 .sysctl_wmem = sysctl_sctp_wmem,
6217 .memory_pressure = &sctp_memory_pressure,
6218 .enter_memory_pressure = sctp_enter_memory_pressure,
6219 .memory_allocated = &sctp_memory_allocated,
6175}; 6220};
6176#endif /* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */ 6221#endif /* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */
diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c
index e2c679baf91..ba75ef4669e 100644
--- a/net/sctp/sysctl.c
+++ b/net/sctp/sysctl.c
@@ -52,6 +52,15 @@ static int int_max = INT_MAX;
52static long sack_timer_min = 1; 52static long sack_timer_min = 1;
53static long sack_timer_max = 500; 53static long sack_timer_max = 500;
54 54
55int sysctl_sctp_mem[3];
56int sysctl_sctp_rmem[3];
57int sysctl_sctp_wmem[3];
58
59/*
60 * per assoc memory limitationf for sends
61 */
62int sysctl_sctp_wmem[3];
63
55static ctl_table sctp_table[] = { 64static ctl_table sctp_table[] = {
56 { 65 {
57 .ctl_name = NET_SCTP_RTO_INITIAL, 66 .ctl_name = NET_SCTP_RTO_INITIAL,
@@ -226,6 +235,30 @@ static ctl_table sctp_table[] = {
226 .extra1 = &sack_timer_min, 235 .extra1 = &sack_timer_min,
227 .extra2 = &sack_timer_max, 236 .extra2 = &sack_timer_max,
228 }, 237 },
238 {
239 .ctl_name = CTL_UNNUMBERED,
240 .procname = "sctp_mem",
241 .data = &sysctl_sctp_mem,
242 .maxlen = sizeof(sysctl_sctp_mem),
243 .mode = 0644,
244 .proc_handler = &proc_dointvec,
245 },
246 {
247 .ctl_name = CTL_UNNUMBERED,
248 .procname = "sctp_rmem",
249 .data = &sysctl_sctp_rmem,
250 .maxlen = sizeof(sysctl_sctp_rmem),
251 .mode = 0644,
252 .proc_handler = &proc_dointvec,
253 },
254 {
255 .ctl_name = CTL_UNNUMBERED,
256 .procname = "sctp_wmem",
257 .data = &sysctl_sctp_wmem,
258 .maxlen = sizeof(sysctl_sctp_wmem),
259 .mode = 0644,
260 .proc_handler = &proc_dointvec,
261 },
229 { .ctl_name = 0 } 262 { .ctl_name = 0 }
230}; 263};
231 264
diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c
index bfecb353ab3..5dc094b9732 100644
--- a/net/sctp/ulpevent.c
+++ b/net/sctp/ulpevent.c
@@ -685,6 +685,24 @@ struct sctp_ulpevent *sctp_ulpevent_make_rcvmsg(struct sctp_association *asoc,
685 struct sctp_ulpevent *event = NULL; 685 struct sctp_ulpevent *event = NULL;
686 struct sk_buff *skb; 686 struct sk_buff *skb;
687 size_t padding, len; 687 size_t padding, len;
688 int rx_count;
689
690 /*
691 * check to see if we need to make space for this
692 * new skb, expand the rcvbuffer if needed, or drop
693 * the frame
694 */
695 if (asoc->ep->rcvbuf_policy)
696 rx_count = atomic_read(&asoc->rmem_alloc);
697 else
698 rx_count = atomic_read(&asoc->base.sk->sk_rmem_alloc);
699
700 if (rx_count >= asoc->base.sk->sk_rcvbuf) {
701
702 if ((asoc->base.sk->sk_userlocks & SOCK_RCVBUF_LOCK) ||
703 (!sk_stream_rmem_schedule(asoc->base.sk, chunk->skb)))
704 goto fail;
705 }
688 706
689 /* Clone the original skb, sharing the data. */ 707 /* Clone the original skb, sharing the data. */
690 skb = skb_clone(chunk->skb, gfp); 708 skb = skb_clone(chunk->skb, gfp);
diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c
index fa0ba2a5564..b9370956b18 100644
--- a/net/sctp/ulpqueue.c
+++ b/net/sctp/ulpqueue.c
@@ -1027,6 +1027,7 @@ void sctp_ulpq_renege(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk,
1027 sctp_ulpq_partial_delivery(ulpq, chunk, gfp); 1027 sctp_ulpq_partial_delivery(ulpq, chunk, gfp);
1028 } 1028 }
1029 1029
1030 sk_stream_mem_reclaim(asoc->base.sk);
1030 return; 1031 return;
1031} 1032}
1032 1033