summaryrefslogtreecommitdiffstats
path: root/net/sctp
diff options
context:
space:
mode:
authorMarcelo Ricardo Leitner <marcelo.leitner@gmail.com>2017-10-03 18:20:16 -0400
committerDavid S. Miller <davem@davemloft.net>2017-10-03 19:27:29 -0400
commit637784ade221a3c8a7ecd0f583eddd95d6276b9a (patch)
treef2ae2b8f52c227395fdce0d014318c0899e60776 /net/sctp
parent0ccdf3c7fdeda511b10def19505178a9d2d3fccd (diff)
sctp: introduce priority based stream scheduler
This patch introduces RFC Draft ndata section 3.4 Priority Based Scheduler (SCTP_SS_PRIO). It works by having a struct sctp_stream_priority for each priority configured. This struct is then enlisted on a queue ordered per priority if, and only if, there is a stream with data queued, so that dequeueing is very straightforward: either finish current datamsg or simply dequeue from the highest priority queued, which is the next stream pointed, and that's it. If there are multiple streams assigned with the same priority and with data queued, it will do round robin amongst them while respecting datamsgs boundaries (when not using idata chunks), to be reasonably fair. We intentionally don't maintain a list of priorities nor a list of all streams with the same priority to save memory. The first would mean at least 2 other pointers per priority (which, for 1000 priorities, that can mean 16kB) and the second would also mean 2 other pointers but per stream. As SCTP supports up to 65535 streams on a given asoc, that's 1MB. This impacts when giving a priority to some stream, as we have to find out if the new priority is already being used and if we can free the old one, and also when tearing down. The new fields in struct sctp_stream_out_ext and sctp_stream are added under a union because that memory is to be shared with other schedulers. See-also: https://tools.ietf.org/html/draft-ietf-tsvwg-sctp-ndata-13 Tested-by: Xin Long <lucien.xin@gmail.com> Signed-off-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/sctp')
-rw-r--r--net/sctp/Makefile2
-rw-r--r--net/sctp/stream_sched.c3
-rw-r--r--net/sctp/stream_sched_prio.c347
3 files changed, 351 insertions, 1 deletions
diff --git a/net/sctp/Makefile b/net/sctp/Makefile
index 0f6e6d1d69fd..647c9cfd4e95 100644
--- a/net/sctp/Makefile
+++ b/net/sctp/Makefile
@@ -12,7 +12,7 @@ sctp-y := sm_statetable.o sm_statefuns.o sm_sideeffect.o \
12 inqueue.o outqueue.o ulpqueue.o \ 12 inqueue.o outqueue.o ulpqueue.o \
13 tsnmap.o bind_addr.o socket.o primitive.o \ 13 tsnmap.o bind_addr.o socket.o primitive.o \
14 output.o input.o debug.o stream.o auth.o \ 14 output.o input.o debug.o stream.o auth.o \
15 offload.o stream_sched.o 15 offload.o stream_sched.o stream_sched_prio.o
16 16
17sctp_probe-y := probe.o 17sctp_probe-y := probe.o
18 18
diff --git a/net/sctp/stream_sched.c b/net/sctp/stream_sched.c
index 40a9a9de2b98..115ddb765169 100644
--- a/net/sctp/stream_sched.c
+++ b/net/sctp/stream_sched.c
@@ -121,8 +121,11 @@ static struct sctp_sched_ops sctp_sched_fcfs = {
121 121
122/* API to other parts of the stack */ 122/* API to other parts of the stack */
123 123
124extern struct sctp_sched_ops sctp_sched_prio;
125
124struct sctp_sched_ops *sctp_sched_ops[] = { 126struct sctp_sched_ops *sctp_sched_ops[] = {
125 &sctp_sched_fcfs, 127 &sctp_sched_fcfs,
128 &sctp_sched_prio,
126}; 129};
127 130
128int sctp_sched_set_sched(struct sctp_association *asoc, 131int sctp_sched_set_sched(struct sctp_association *asoc,
diff --git a/net/sctp/stream_sched_prio.c b/net/sctp/stream_sched_prio.c
new file mode 100644
index 000000000000..384dbf3c8760
--- /dev/null
+++ b/net/sctp/stream_sched_prio.c
@@ -0,0 +1,347 @@
1/* SCTP kernel implementation
2 * (C) Copyright Red Hat Inc. 2017
3 *
4 * This file is part of the SCTP kernel implementation
5 *
6 * These functions manipulate sctp stream queue/scheduling.
7 *
8 * This SCTP implementation is free software;
9 * you can redistribute it and/or modify it under the terms of
10 * the GNU General Public License as published by
11 * the Free Software Foundation; either version 2, or (at your option)
12 * any later version.
13 *
14 * This SCTP implementation is distributed in the hope that it
15 * will be useful, but WITHOUT ANY WARRANTY; without even the implied
16 * ************************
17 * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
18 * See the GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with GNU CC; see the file COPYING. If not, see
22 * <http://www.gnu.org/licenses/>.
23 *
24 * Please send any bug reports or fixes you make to the
25 * email addresched(es):
26 * lksctp developers <linux-sctp@vger.kernel.org>
27 *
28 * Written or modified by:
29 * Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
30 */
31
32#include <linux/list.h>
33#include <net/sctp/sctp.h>
34#include <net/sctp/sm.h>
35#include <net/sctp/stream_sched.h>
36
37/* Priority handling
38 * RFC DRAFT ndata section 3.4
39 */
40
41static void sctp_sched_prio_unsched_all(struct sctp_stream *stream);
42
43static struct sctp_stream_priorities *sctp_sched_prio_new_head(
44 struct sctp_stream *stream, int prio, gfp_t gfp)
45{
46 struct sctp_stream_priorities *p;
47
48 p = kmalloc(sizeof(*p), gfp);
49 if (!p)
50 return NULL;
51
52 INIT_LIST_HEAD(&p->prio_sched);
53 INIT_LIST_HEAD(&p->active);
54 p->next = NULL;
55 p->prio = prio;
56
57 return p;
58}
59
60static struct sctp_stream_priorities *sctp_sched_prio_get_head(
61 struct sctp_stream *stream, int prio, gfp_t gfp)
62{
63 struct sctp_stream_priorities *p;
64 int i;
65
66 /* Look into scheduled priorities first, as they are sorted and
67 * we can find it fast IF it's scheduled.
68 */
69 list_for_each_entry(p, &stream->prio_list, prio_sched) {
70 if (p->prio == prio)
71 return p;
72 if (p->prio > prio)
73 break;
74 }
75
76 /* No luck. So we search on all streams now. */
77 for (i = 0; i < stream->outcnt; i++) {
78 if (!stream->out[i].ext)
79 continue;
80
81 p = stream->out[i].ext->prio_head;
82 if (!p)
83 /* Means all other streams won't be initialized
84 * as well.
85 */
86 break;
87 if (p->prio == prio)
88 return p;
89 }
90
91 /* If not even there, allocate a new one. */
92 return sctp_sched_prio_new_head(stream, prio, gfp);
93}
94
95static void sctp_sched_prio_next_stream(struct sctp_stream_priorities *p)
96{
97 struct list_head *pos;
98
99 pos = p->next->prio_list.next;
100 if (pos == &p->active)
101 pos = pos->next;
102 p->next = list_entry(pos, struct sctp_stream_out_ext, prio_list);
103}
104
105static bool sctp_sched_prio_unsched(struct sctp_stream_out_ext *soute)
106{
107 bool scheduled = false;
108
109 if (!list_empty(&soute->prio_list)) {
110 struct sctp_stream_priorities *prio_head = soute->prio_head;
111
112 /* Scheduled */
113 scheduled = true;
114
115 if (prio_head->next == soute)
116 /* Try to move to the next stream */
117 sctp_sched_prio_next_stream(prio_head);
118
119 list_del_init(&soute->prio_list);
120
121 /* Also unsched the priority if this was the last stream */
122 if (list_empty(&prio_head->active)) {
123 list_del_init(&prio_head->prio_sched);
124 /* If there is no stream left, clear next */
125 prio_head->next = NULL;
126 }
127 }
128
129 return scheduled;
130}
131
132static void sctp_sched_prio_sched(struct sctp_stream *stream,
133 struct sctp_stream_out_ext *soute)
134{
135 struct sctp_stream_priorities *prio, *prio_head;
136
137 prio_head = soute->prio_head;
138
139 /* Nothing to do if already scheduled */
140 if (!list_empty(&soute->prio_list))
141 return;
142
143 /* Schedule the stream. If there is a next, we schedule the new
144 * one before it, so it's the last in round robin order.
145 * If there isn't, we also have to schedule the priority.
146 */
147 if (prio_head->next) {
148 list_add(&soute->prio_list, prio_head->next->prio_list.prev);
149 return;
150 }
151
152 list_add(&soute->prio_list, &prio_head->active);
153 prio_head->next = soute;
154
155 list_for_each_entry(prio, &stream->prio_list, prio_sched) {
156 if (prio->prio > prio_head->prio) {
157 list_add(&prio_head->prio_sched, prio->prio_sched.prev);
158 return;
159 }
160 }
161
162 list_add_tail(&prio_head->prio_sched, &stream->prio_list);
163}
164
165static int sctp_sched_prio_set(struct sctp_stream *stream, __u16 sid,
166 __u16 prio, gfp_t gfp)
167{
168 struct sctp_stream_out *sout = &stream->out[sid];
169 struct sctp_stream_out_ext *soute = sout->ext;
170 struct sctp_stream_priorities *prio_head, *old;
171 bool reschedule = false;
172 int i;
173
174 prio_head = sctp_sched_prio_get_head(stream, prio, gfp);
175 if (!prio_head)
176 return -ENOMEM;
177
178 reschedule = sctp_sched_prio_unsched(soute);
179 old = soute->prio_head;
180 soute->prio_head = prio_head;
181 if (reschedule)
182 sctp_sched_prio_sched(stream, soute);
183
184 if (!old)
185 /* Happens when we set the priority for the first time */
186 return 0;
187
188 for (i = 0; i < stream->outcnt; i++) {
189 soute = stream->out[i].ext;
190 if (soute && soute->prio_head == old)
191 /* It's still in use, nothing else to do here. */
192 return 0;
193 }
194
195 /* No hits, we are good to free it. */
196 kfree(old);
197
198 return 0;
199}
200
201static int sctp_sched_prio_get(struct sctp_stream *stream, __u16 sid,
202 __u16 *value)
203{
204 *value = stream->out[sid].ext->prio_head->prio;
205 return 0;
206}
207
208static int sctp_sched_prio_init(struct sctp_stream *stream)
209{
210 INIT_LIST_HEAD(&stream->prio_list);
211
212 return 0;
213}
214
215static int sctp_sched_prio_init_sid(struct sctp_stream *stream, __u16 sid,
216 gfp_t gfp)
217{
218 INIT_LIST_HEAD(&stream->out[sid].ext->prio_list);
219 return sctp_sched_prio_set(stream, sid, 0, gfp);
220}
221
222static void sctp_sched_prio_free(struct sctp_stream *stream)
223{
224 struct sctp_stream_priorities *prio, *n;
225 LIST_HEAD(list);
226 int i;
227
228 /* As we don't keep a list of priorities, to avoid multiple
229 * frees we have to do it in 3 steps:
230 * 1. unsched everyone, so the lists are free to use in 2.
231 * 2. build the list of the priorities
232 * 3. free the list
233 */
234 sctp_sched_prio_unsched_all(stream);
235 for (i = 0; i < stream->outcnt; i++) {
236 if (!stream->out[i].ext)
237 continue;
238 prio = stream->out[i].ext->prio_head;
239 if (prio && list_empty(&prio->prio_sched))
240 list_add(&prio->prio_sched, &list);
241 }
242 list_for_each_entry_safe(prio, n, &list, prio_sched) {
243 list_del_init(&prio->prio_sched);
244 kfree(prio);
245 }
246}
247
248static void sctp_sched_prio_enqueue(struct sctp_outq *q,
249 struct sctp_datamsg *msg)
250{
251 struct sctp_stream *stream;
252 struct sctp_chunk *ch;
253 __u16 sid;
254
255 ch = list_first_entry(&msg->chunks, struct sctp_chunk, frag_list);
256 sid = sctp_chunk_stream_no(ch);
257 stream = &q->asoc->stream;
258 sctp_sched_prio_sched(stream, stream->out[sid].ext);
259}
260
261static struct sctp_chunk *sctp_sched_prio_dequeue(struct sctp_outq *q)
262{
263 struct sctp_stream *stream = &q->asoc->stream;
264 struct sctp_stream_priorities *prio;
265 struct sctp_stream_out_ext *soute;
266 struct sctp_chunk *ch = NULL;
267
268 /* Bail out quickly if queue is empty */
269 if (list_empty(&q->out_chunk_list))
270 goto out;
271
272 /* Find which chunk is next. It's easy, it's either the current
273 * one or the first chunk on the next active stream.
274 */
275 if (stream->out_curr) {
276 soute = stream->out_curr->ext;
277 } else {
278 prio = list_entry(stream->prio_list.next,
279 struct sctp_stream_priorities, prio_sched);
280 soute = prio->next;
281 }
282 ch = list_entry(soute->outq.next, struct sctp_chunk, stream_list);
283 sctp_sched_dequeue_common(q, ch);
284
285out:
286 return ch;
287}
288
289static void sctp_sched_prio_dequeue_done(struct sctp_outq *q,
290 struct sctp_chunk *ch)
291{
292 struct sctp_stream_priorities *prio;
293 struct sctp_stream_out_ext *soute;
294 __u16 sid;
295
296 /* Last chunk on that msg, move to the next stream on
297 * this priority.
298 */
299 sid = sctp_chunk_stream_no(ch);
300 soute = q->asoc->stream.out[sid].ext;
301 prio = soute->prio_head;
302
303 sctp_sched_prio_next_stream(prio);
304
305 if (list_empty(&soute->outq))
306 sctp_sched_prio_unsched(soute);
307}
308
309static void sctp_sched_prio_sched_all(struct sctp_stream *stream)
310{
311 struct sctp_association *asoc;
312 struct sctp_stream_out *sout;
313 struct sctp_chunk *ch;
314
315 asoc = container_of(stream, struct sctp_association, stream);
316 list_for_each_entry(ch, &asoc->outqueue.out_chunk_list, list) {
317 __u16 sid;
318
319 sid = sctp_chunk_stream_no(ch);
320 sout = &stream->out[sid];
321 if (sout->ext)
322 sctp_sched_prio_sched(stream, sout->ext);
323 }
324}
325
326static void sctp_sched_prio_unsched_all(struct sctp_stream *stream)
327{
328 struct sctp_stream_priorities *p, *tmp;
329 struct sctp_stream_out_ext *soute, *souttmp;
330
331 list_for_each_entry_safe(p, tmp, &stream->prio_list, prio_sched)
332 list_for_each_entry_safe(soute, souttmp, &p->active, prio_list)
333 sctp_sched_prio_unsched(soute);
334}
335
336struct sctp_sched_ops sctp_sched_prio = {
337 .set = sctp_sched_prio_set,
338 .get = sctp_sched_prio_get,
339 .init = sctp_sched_prio_init,
340 .init_sid = sctp_sched_prio_init_sid,
341 .free = sctp_sched_prio_free,
342 .enqueue = sctp_sched_prio_enqueue,
343 .dequeue = sctp_sched_prio_dequeue,
344 .dequeue_done = sctp_sched_prio_dequeue_done,
345 .sched_all = sctp_sched_prio_sched_all,
346 .unsched_all = sctp_sched_prio_unsched_all,
347};