aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/uapi/linux/pkt_sched.h13
-rw-r--r--net/sched/sch_taprio.c605
2 files changed, 412 insertions, 206 deletions
diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h
index 7ee74c3474bf..8b2f993cbb77 100644
--- a/include/uapi/linux/pkt_sched.h
+++ b/include/uapi/linux/pkt_sched.h
@@ -1148,6 +1148,16 @@ enum {
1148 1148
1149#define TCA_TAPRIO_SCHED_MAX (__TCA_TAPRIO_SCHED_MAX - 1) 1149#define TCA_TAPRIO_SCHED_MAX (__TCA_TAPRIO_SCHED_MAX - 1)
1150 1150
1151/* The format for the admin sched (dump only):
1152 * [TCA_TAPRIO_SCHED_ADMIN_SCHED]
1153 * [TCA_TAPRIO_ATTR_SCHED_BASE_TIME]
1154 * [TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST]
1155 * [TCA_TAPRIO_ATTR_SCHED_ENTRY]
1156 * [TCA_TAPRIO_ATTR_SCHED_ENTRY_CMD]
1157 * [TCA_TAPRIO_ATTR_SCHED_ENTRY_GATES]
1158 * [TCA_TAPRIO_ATTR_SCHED_ENTRY_INTERVAL]
1159 */
1160
1151enum { 1161enum {
1152 TCA_TAPRIO_ATTR_UNSPEC, 1162 TCA_TAPRIO_ATTR_UNSPEC,
1153 TCA_TAPRIO_ATTR_PRIOMAP, /* struct tc_mqprio_qopt */ 1163 TCA_TAPRIO_ATTR_PRIOMAP, /* struct tc_mqprio_qopt */
@@ -1156,6 +1166,9 @@ enum {
1156 TCA_TAPRIO_ATTR_SCHED_SINGLE_ENTRY, /* single entry */ 1166 TCA_TAPRIO_ATTR_SCHED_SINGLE_ENTRY, /* single entry */
1157 TCA_TAPRIO_ATTR_SCHED_CLOCKID, /* s32 */ 1167 TCA_TAPRIO_ATTR_SCHED_CLOCKID, /* s32 */
1158 TCA_TAPRIO_PAD, 1168 TCA_TAPRIO_PAD,
1169 TCA_TAPRIO_ATTR_ADMIN_SCHED, /* The admin sched, only used in dump */
1170 TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME, /* s64 */
1171 TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME_EXTENSION, /* s64 */
1159 __TCA_TAPRIO_ATTR_MAX, 1172 __TCA_TAPRIO_ATTR_MAX,
1160}; 1173};
1161 1174
diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c
index 09563c245473..539677120b9f 100644
--- a/net/sched/sch_taprio.c
+++ b/net/sched/sch_taprio.c
@@ -16,6 +16,7 @@
16#include <linux/math64.h> 16#include <linux/math64.h>
17#include <linux/module.h> 17#include <linux/module.h>
18#include <linux/spinlock.h> 18#include <linux/spinlock.h>
19#include <linux/rcupdate.h>
19#include <net/netlink.h> 20#include <net/netlink.h>
20#include <net/pkt_sched.h> 21#include <net/pkt_sched.h>
21#include <net/pkt_cls.h> 22#include <net/pkt_cls.h>
@@ -41,25 +42,88 @@ struct sched_entry {
41 u8 command; 42 u8 command;
42}; 43};
43 44
45struct sched_gate_list {
46 struct rcu_head rcu;
47 struct list_head entries;
48 size_t num_entries;
49 ktime_t cycle_close_time;
50 s64 cycle_time;
51 s64 cycle_time_extension;
52 s64 base_time;
53};
54
44struct taprio_sched { 55struct taprio_sched {
45 struct Qdisc **qdiscs; 56 struct Qdisc **qdiscs;
46 struct Qdisc *root; 57 struct Qdisc *root;
47 s64 base_time;
48 int clockid; 58 int clockid;
49 atomic64_t picos_per_byte; /* Using picoseconds because for 10Gbps+ 59 atomic64_t picos_per_byte; /* Using picoseconds because for 10Gbps+
50 * speeds it's sub-nanoseconds per byte 60 * speeds it's sub-nanoseconds per byte
51 */ 61 */
52 size_t num_entries;
53 62
54 /* Protects the update side of the RCU protected current_entry */ 63 /* Protects the update side of the RCU protected current_entry */
55 spinlock_t current_entry_lock; 64 spinlock_t current_entry_lock;
56 struct sched_entry __rcu *current_entry; 65 struct sched_entry __rcu *current_entry;
57 struct list_head entries; 66 struct sched_gate_list __rcu *oper_sched;
67 struct sched_gate_list __rcu *admin_sched;
58 ktime_t (*get_time)(void); 68 ktime_t (*get_time)(void);
59 struct hrtimer advance_timer; 69 struct hrtimer advance_timer;
60 struct list_head taprio_list; 70 struct list_head taprio_list;
61}; 71};
62 72
73static ktime_t sched_base_time(const struct sched_gate_list *sched)
74{
75 if (!sched)
76 return KTIME_MAX;
77
78 return ns_to_ktime(sched->base_time);
79}
80
81static void taprio_free_sched_cb(struct rcu_head *head)
82{
83 struct sched_gate_list *sched = container_of(head, struct sched_gate_list, rcu);
84 struct sched_entry *entry, *n;
85
86 if (!sched)
87 return;
88
89 list_for_each_entry_safe(entry, n, &sched->entries, list) {
90 list_del(&entry->list);
91 kfree(entry);
92 }
93
94 kfree(sched);
95}
96
97static void switch_schedules(struct taprio_sched *q,
98 struct sched_gate_list **admin,
99 struct sched_gate_list **oper)
100{
101 rcu_assign_pointer(q->oper_sched, *admin);
102 rcu_assign_pointer(q->admin_sched, NULL);
103
104 if (*oper)
105 call_rcu(&(*oper)->rcu, taprio_free_sched_cb);
106
107 *oper = *admin;
108 *admin = NULL;
109}
110
111static ktime_t get_cycle_time(struct sched_gate_list *sched)
112{
113 struct sched_entry *entry;
114 ktime_t cycle = 0;
115
116 if (sched->cycle_time != 0)
117 return sched->cycle_time;
118
119 list_for_each_entry(entry, &sched->entries, list)
120 cycle = ktime_add_ns(cycle, entry->interval);
121
122 sched->cycle_time = cycle;
123
124 return cycle;
125}
126
63static int taprio_enqueue(struct sk_buff *skb, struct Qdisc *sch, 127static int taprio_enqueue(struct sk_buff *skb, struct Qdisc *sch,
64 struct sk_buff **to_free) 128 struct sk_buff **to_free)
65{ 129{
@@ -136,8 +200,8 @@ static struct sk_buff *taprio_dequeue(struct Qdisc *sch)
136{ 200{
137 struct taprio_sched *q = qdisc_priv(sch); 201 struct taprio_sched *q = qdisc_priv(sch);
138 struct net_device *dev = qdisc_dev(sch); 202 struct net_device *dev = qdisc_dev(sch);
203 struct sk_buff *skb = NULL;
139 struct sched_entry *entry; 204 struct sched_entry *entry;
140 struct sk_buff *skb;
141 u32 gate_mask; 205 u32 gate_mask;
142 int i; 206 int i;
143 207
@@ -154,10 +218,9 @@ static struct sk_buff *taprio_dequeue(struct Qdisc *sch)
154 * "AdminGateSates" 218 * "AdminGateSates"
155 */ 219 */
156 gate_mask = entry ? entry->gate_mask : TAPRIO_ALL_GATES_OPEN; 220 gate_mask = entry ? entry->gate_mask : TAPRIO_ALL_GATES_OPEN;
157 rcu_read_unlock();
158 221
159 if (!gate_mask) 222 if (!gate_mask)
160 return NULL; 223 goto done;
161 224
162 for (i = 0; i < dev->num_tx_queues; i++) { 225 for (i = 0; i < dev->num_tx_queues; i++) {
163 struct Qdisc *child = q->qdiscs[i]; 226 struct Qdisc *child = q->qdiscs[i];
@@ -197,22 +260,72 @@ static struct sk_buff *taprio_dequeue(struct Qdisc *sch)
197 260
198 skb = child->ops->dequeue(child); 261 skb = child->ops->dequeue(child);
199 if (unlikely(!skb)) 262 if (unlikely(!skb))
200 return NULL; 263 goto done;
201 264
202 qdisc_bstats_update(sch, skb); 265 qdisc_bstats_update(sch, skb);
203 qdisc_qstats_backlog_dec(sch, skb); 266 qdisc_qstats_backlog_dec(sch, skb);
204 sch->q.qlen--; 267 sch->q.qlen--;
205 268
206 return skb; 269 goto done;
207 } 270 }
208 271
209 return NULL; 272done:
273 rcu_read_unlock();
274
275 return skb;
276}
277
278static bool should_restart_cycle(const struct sched_gate_list *oper,
279 const struct sched_entry *entry)
280{
281 if (list_is_last(&entry->list, &oper->entries))
282 return true;
283
284 if (ktime_compare(entry->close_time, oper->cycle_close_time) == 0)
285 return true;
286
287 return false;
288}
289
290static bool should_change_schedules(const struct sched_gate_list *admin,
291 const struct sched_gate_list *oper,
292 ktime_t close_time)
293{
294 ktime_t next_base_time, extension_time;
295
296 if (!admin)
297 return false;
298
299 next_base_time = sched_base_time(admin);
300
301 /* This is the simple case, the close_time would fall after
302 * the next schedule base_time.
303 */
304 if (ktime_compare(next_base_time, close_time) <= 0)
305 return true;
306
307 /* This is the cycle_time_extension case, if the close_time
308 * plus the amount that can be extended would fall after the
309 * next schedule base_time, we can extend the current schedule
310 * for that amount.
311 */
312 extension_time = ktime_add_ns(close_time, oper->cycle_time_extension);
313
314 /* FIXME: the IEEE 802.1Q-2018 Specification isn't clear about
315 * how precisely the extension should be made. So after
316 * conformance testing, this logic may change.
317 */
318 if (ktime_compare(next_base_time, extension_time) <= 0)
319 return true;
320
321 return false;
210} 322}
211 323
212static enum hrtimer_restart advance_sched(struct hrtimer *timer) 324static enum hrtimer_restart advance_sched(struct hrtimer *timer)
213{ 325{
214 struct taprio_sched *q = container_of(timer, struct taprio_sched, 326 struct taprio_sched *q = container_of(timer, struct taprio_sched,
215 advance_timer); 327 advance_timer);
328 struct sched_gate_list *oper, *admin;
216 struct sched_entry *entry, *next; 329 struct sched_entry *entry, *next;
217 struct Qdisc *sch = q->root; 330 struct Qdisc *sch = q->root;
218 ktime_t close_time; 331 ktime_t close_time;
@@ -220,25 +333,46 @@ static enum hrtimer_restart advance_sched(struct hrtimer *timer)
220 spin_lock(&q->current_entry_lock); 333 spin_lock(&q->current_entry_lock);
221 entry = rcu_dereference_protected(q->current_entry, 334 entry = rcu_dereference_protected(q->current_entry,
222 lockdep_is_held(&q->current_entry_lock)); 335 lockdep_is_held(&q->current_entry_lock));
336 oper = rcu_dereference_protected(q->oper_sched,
337 lockdep_is_held(&q->current_entry_lock));
338 admin = rcu_dereference_protected(q->admin_sched,
339 lockdep_is_held(&q->current_entry_lock));
223 340
224 /* This is the case that it's the first time that the schedule 341 if (!oper)
225 * runs, so it only happens once per schedule. The first entry 342 switch_schedules(q, &admin, &oper);
226 * is pre-calculated during the schedule initialization. 343
344 /* This can happen in two cases: 1. this is the very first run
345 * of this function (i.e. we weren't running any schedule
346 * previously); 2. The previous schedule just ended. The first
347 * entry of all schedules are pre-calculated during the
348 * schedule initialization.
227 */ 349 */
228 if (unlikely(!entry)) { 350 if (unlikely(!entry || entry->close_time == oper->base_time)) {
229 next = list_first_entry(&q->entries, struct sched_entry, 351 next = list_first_entry(&oper->entries, struct sched_entry,
230 list); 352 list);
231 close_time = next->close_time; 353 close_time = next->close_time;
232 goto first_run; 354 goto first_run;
233 } 355 }
234 356
235 if (list_is_last(&entry->list, &q->entries)) 357 if (should_restart_cycle(oper, entry)) {
236 next = list_first_entry(&q->entries, struct sched_entry, 358 next = list_first_entry(&oper->entries, struct sched_entry,
237 list); 359 list);
238 else 360 oper->cycle_close_time = ktime_add_ns(oper->cycle_close_time,
361 oper->cycle_time);
362 } else {
239 next = list_next_entry(entry, list); 363 next = list_next_entry(entry, list);
364 }
240 365
241 close_time = ktime_add_ns(entry->close_time, next->interval); 366 close_time = ktime_add_ns(entry->close_time, next->interval);
367 close_time = min_t(ktime_t, close_time, oper->cycle_close_time);
368
369 if (should_change_schedules(admin, oper, close_time)) {
370 /* Set things so the next time this runs, the new
371 * schedule runs.
372 */
373 close_time = sched_base_time(admin);
374 switch_schedules(q, &admin, &oper);
375 }
242 376
243 next->close_time = close_time; 377 next->close_time = close_time;
244 taprio_set_budget(q, next); 378 taprio_set_budget(q, next);
@@ -271,10 +405,12 @@ static const struct nla_policy taprio_policy[TCA_TAPRIO_ATTR_MAX + 1] = {
271 [TCA_TAPRIO_ATTR_PRIOMAP] = { 405 [TCA_TAPRIO_ATTR_PRIOMAP] = {
272 .len = sizeof(struct tc_mqprio_qopt) 406 .len = sizeof(struct tc_mqprio_qopt)
273 }, 407 },
274 [TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST] = { .type = NLA_NESTED }, 408 [TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST] = { .type = NLA_NESTED },
275 [TCA_TAPRIO_ATTR_SCHED_BASE_TIME] = { .type = NLA_S64 }, 409 [TCA_TAPRIO_ATTR_SCHED_BASE_TIME] = { .type = NLA_S64 },
276 [TCA_TAPRIO_ATTR_SCHED_SINGLE_ENTRY] = { .type = NLA_NESTED }, 410 [TCA_TAPRIO_ATTR_SCHED_SINGLE_ENTRY] = { .type = NLA_NESTED },
277 [TCA_TAPRIO_ATTR_SCHED_CLOCKID] = { .type = NLA_S32 }, 411 [TCA_TAPRIO_ATTR_SCHED_CLOCKID] = { .type = NLA_S32 },
412 [TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME] = { .type = NLA_S64 },
413 [TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME_EXTENSION] = { .type = NLA_S64 },
278}; 414};
279 415
280static int fill_sched_entry(struct nlattr **tb, struct sched_entry *entry, 416static int fill_sched_entry(struct nlattr **tb, struct sched_entry *entry,
@@ -322,71 +458,8 @@ static int parse_sched_entry(struct nlattr *n, struct sched_entry *entry,
322 return fill_sched_entry(tb, entry, extack); 458 return fill_sched_entry(tb, entry, extack);
323} 459}
324 460
325/* Returns the number of entries in case of success */
326static int parse_sched_single_entry(struct nlattr *n,
327 struct taprio_sched *q,
328 struct netlink_ext_ack *extack)
329{
330 struct nlattr *tb_entry[TCA_TAPRIO_SCHED_ENTRY_MAX + 1] = { };
331 struct nlattr *tb_list[TCA_TAPRIO_SCHED_MAX + 1] = { };
332 struct sched_entry *entry;
333 bool found = false;
334 u32 index;
335 int err;
336
337 err = nla_parse_nested_deprecated(tb_list, TCA_TAPRIO_SCHED_MAX, n,
338 entry_list_policy, NULL);
339 if (err < 0) {
340 NL_SET_ERR_MSG(extack, "Could not parse nested entry");
341 return -EINVAL;
342 }
343
344 if (!tb_list[TCA_TAPRIO_SCHED_ENTRY]) {
345 NL_SET_ERR_MSG(extack, "Single-entry must include an entry");
346 return -EINVAL;
347 }
348
349 err = nla_parse_nested_deprecated(tb_entry,
350 TCA_TAPRIO_SCHED_ENTRY_MAX,
351 tb_list[TCA_TAPRIO_SCHED_ENTRY],
352 entry_policy, NULL);
353 if (err < 0) {
354 NL_SET_ERR_MSG(extack, "Could not parse nested entry");
355 return -EINVAL;
356 }
357
358 if (!tb_entry[TCA_TAPRIO_SCHED_ENTRY_INDEX]) {
359 NL_SET_ERR_MSG(extack, "Entry must specify an index\n");
360 return -EINVAL;
361 }
362
363 index = nla_get_u32(tb_entry[TCA_TAPRIO_SCHED_ENTRY_INDEX]);
364 if (index >= q->num_entries) {
365 NL_SET_ERR_MSG(extack, "Index for single entry exceeds number of entries in schedule");
366 return -EINVAL;
367 }
368
369 list_for_each_entry(entry, &q->entries, list) {
370 if (entry->index == index) {
371 found = true;
372 break;
373 }
374 }
375
376 if (!found) {
377 NL_SET_ERR_MSG(extack, "Could not find entry");
378 return -ENOENT;
379 }
380
381 err = fill_sched_entry(tb_entry, entry, extack);
382 if (err < 0)
383 return err;
384
385 return q->num_entries;
386}
387
388static int parse_sched_list(struct nlattr *list, 461static int parse_sched_list(struct nlattr *list,
389 struct taprio_sched *q, 462 struct sched_gate_list *sched,
390 struct netlink_ext_ack *extack) 463 struct netlink_ext_ack *extack)
391{ 464{
392 struct nlattr *n; 465 struct nlattr *n;
@@ -416,64 +489,42 @@ static int parse_sched_list(struct nlattr *list,
416 return err; 489 return err;
417 } 490 }
418 491
419 list_add_tail(&entry->list, &q->entries); 492 list_add_tail(&entry->list, &sched->entries);
420 i++; 493 i++;
421 } 494 }
422 495
423 q->num_entries = i; 496 sched->num_entries = i;
424 497
425 return i; 498 return i;
426} 499}
427 500
428/* Returns the number of entries in case of success */ 501static int parse_taprio_schedule(struct nlattr **tb,
429static int parse_taprio_opt(struct nlattr **tb, struct taprio_sched *q, 502 struct sched_gate_list *new,
430 struct netlink_ext_ack *extack) 503 struct netlink_ext_ack *extack)
431{ 504{
432 int err = 0; 505 int err = 0;
433 int clockid;
434
435 if (tb[TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST] &&
436 tb[TCA_TAPRIO_ATTR_SCHED_SINGLE_ENTRY])
437 return -EINVAL;
438
439 if (tb[TCA_TAPRIO_ATTR_SCHED_SINGLE_ENTRY] && q->num_entries == 0)
440 return -EINVAL;
441 506
442 if (q->clockid == -1 && !tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID]) 507 if (tb[TCA_TAPRIO_ATTR_SCHED_SINGLE_ENTRY]) {
443 return -EINVAL; 508 NL_SET_ERR_MSG(extack, "Adding a single entry is not supported");
509 return -ENOTSUPP;
510 }
444 511
445 if (tb[TCA_TAPRIO_ATTR_SCHED_BASE_TIME]) 512 if (tb[TCA_TAPRIO_ATTR_SCHED_BASE_TIME])
446 q->base_time = nla_get_s64( 513 new->base_time = nla_get_s64(tb[TCA_TAPRIO_ATTR_SCHED_BASE_TIME]);
447 tb[TCA_TAPRIO_ATTR_SCHED_BASE_TIME]);
448 514
449 if (tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID]) { 515 if (tb[TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME_EXTENSION])
450 clockid = nla_get_s32(tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID]); 516 new->cycle_time_extension = nla_get_s64(tb[TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME_EXTENSION]);
451
452 /* We only support static clockids and we don't allow
453 * for it to be modified after the first init.
454 */
455 if (clockid < 0 || (q->clockid != -1 && q->clockid != clockid))
456 return -EINVAL;
457 517
458 q->clockid = clockid; 518 if (tb[TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME])
459 } 519 new->cycle_time = nla_get_s64(tb[TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME]);
460 520
461 if (tb[TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST]) 521 if (tb[TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST])
462 err = parse_sched_list( 522 err = parse_sched_list(
463 tb[TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST], q, extack); 523 tb[TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST], new, extack);
464 else if (tb[TCA_TAPRIO_ATTR_SCHED_SINGLE_ENTRY]) 524 if (err < 0)
465 err = parse_sched_single_entry( 525 return err;
466 tb[TCA_TAPRIO_ATTR_SCHED_SINGLE_ENTRY], q, extack);
467
468 /* parse_sched_* return the number of entries in the schedule,
469 * a schedule with zero entries is an error.
470 */
471 if (err == 0) {
472 NL_SET_ERR_MSG(extack, "The schedule should contain at least one entry");
473 return -EINVAL;
474 }
475 526
476 return err; 527 return 0;
477} 528}
478 529
479static int taprio_parse_mqprio_opt(struct net_device *dev, 530static int taprio_parse_mqprio_opt(struct net_device *dev,
@@ -482,11 +533,17 @@ static int taprio_parse_mqprio_opt(struct net_device *dev,
482{ 533{
483 int i, j; 534 int i, j;
484 535
485 if (!qopt) { 536 if (!qopt && !dev->num_tc) {
486 NL_SET_ERR_MSG(extack, "'mqprio' configuration is necessary"); 537 NL_SET_ERR_MSG(extack, "'mqprio' configuration is necessary");
487 return -EINVAL; 538 return -EINVAL;
488 } 539 }
489 540
541 /* If num_tc is already set, it means that the user already
542 * configured the mqprio part
543 */
544 if (dev->num_tc)
545 return 0;
546
490 /* Verify num_tc is not out of max range */ 547 /* Verify num_tc is not out of max range */
491 if (qopt->num_tc > TC_MAX_QUEUE) { 548 if (qopt->num_tc > TC_MAX_QUEUE) {
492 NL_SET_ERR_MSG(extack, "Number of traffic classes is outside valid range"); 549 NL_SET_ERR_MSG(extack, "Number of traffic classes is outside valid range");
@@ -532,14 +589,15 @@ static int taprio_parse_mqprio_opt(struct net_device *dev,
532 return 0; 589 return 0;
533} 590}
534 591
535static int taprio_get_start_time(struct Qdisc *sch, ktime_t *start) 592static int taprio_get_start_time(struct Qdisc *sch,
593 struct sched_gate_list *sched,
594 ktime_t *start)
536{ 595{
537 struct taprio_sched *q = qdisc_priv(sch); 596 struct taprio_sched *q = qdisc_priv(sch);
538 struct sched_entry *entry;
539 ktime_t now, base, cycle; 597 ktime_t now, base, cycle;
540 s64 n; 598 s64 n;
541 599
542 base = ns_to_ktime(q->base_time); 600 base = sched_base_time(sched);
543 now = q->get_time(); 601 now = q->get_time();
544 602
545 if (ktime_after(base, now)) { 603 if (ktime_after(base, now)) {
@@ -547,11 +605,7 @@ static int taprio_get_start_time(struct Qdisc *sch, ktime_t *start)
547 return 0; 605 return 0;
548 } 606 }
549 607
550 /* Calculate the cycle_time, by summing all the intervals. 608 cycle = get_cycle_time(sched);
551 */
552 cycle = 0;
553 list_for_each_entry(entry, &q->entries, list)
554 cycle = ktime_add_ns(cycle, entry->interval);
555 609
556 /* The qdisc is expected to have at least one sched_entry. Moreover, 610 /* The qdisc is expected to have at least one sched_entry. Moreover,
557 * any entry must have 'interval' > 0. Thus if the cycle time is zero, 611 * any entry must have 'interval' > 0. Thus if the cycle time is zero,
@@ -569,22 +623,40 @@ static int taprio_get_start_time(struct Qdisc *sch, ktime_t *start)
569 return 0; 623 return 0;
570} 624}
571 625
572static void taprio_start_sched(struct Qdisc *sch, ktime_t start) 626static void setup_first_close_time(struct taprio_sched *q,
627 struct sched_gate_list *sched, ktime_t base)
573{ 628{
574 struct taprio_sched *q = qdisc_priv(sch);
575 struct sched_entry *first; 629 struct sched_entry *first;
576 unsigned long flags; 630 ktime_t cycle;
577 631
578 spin_lock_irqsave(&q->current_entry_lock, flags); 632 first = list_first_entry(&sched->entries,
633 struct sched_entry, list);
579 634
580 first = list_first_entry(&q->entries, struct sched_entry, 635 cycle = get_cycle_time(sched);
581 list);
582 636
583 first->close_time = ktime_add_ns(start, first->interval); 637 /* FIXME: find a better place to do this */
638 sched->cycle_close_time = ktime_add_ns(base, cycle);
639
640 first->close_time = ktime_add_ns(base, first->interval);
584 taprio_set_budget(q, first); 641 taprio_set_budget(q, first);
585 rcu_assign_pointer(q->current_entry, NULL); 642 rcu_assign_pointer(q->current_entry, NULL);
643}
586 644
587 spin_unlock_irqrestore(&q->current_entry_lock, flags); 645static void taprio_start_sched(struct Qdisc *sch,
646 ktime_t start, struct sched_gate_list *new)
647{
648 struct taprio_sched *q = qdisc_priv(sch);
649 ktime_t expires;
650
651 expires = hrtimer_get_expires(&q->advance_timer);
652 if (expires == 0)
653 expires = KTIME_MAX;
654
655 /* If the new schedule starts before the next expiration, we
656 * reprogram it to the earliest one, so we change the admin
657 * schedule to the operational one at the right time.
658 */
659 start = min_t(ktime_t, start, expires);
588 660
589 hrtimer_start(&q->advance_timer, start, HRTIMER_MODE_ABS); 661 hrtimer_start(&q->advance_timer, start, HRTIMER_MODE_ABS);
590} 662}
@@ -639,10 +711,12 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt,
639 struct netlink_ext_ack *extack) 711 struct netlink_ext_ack *extack)
640{ 712{
641 struct nlattr *tb[TCA_TAPRIO_ATTR_MAX + 1] = { }; 713 struct nlattr *tb[TCA_TAPRIO_ATTR_MAX + 1] = { };
714 struct sched_gate_list *oper, *admin, *new_admin;
642 struct taprio_sched *q = qdisc_priv(sch); 715 struct taprio_sched *q = qdisc_priv(sch);
643 struct net_device *dev = qdisc_dev(sch); 716 struct net_device *dev = qdisc_dev(sch);
644 struct tc_mqprio_qopt *mqprio = NULL; 717 struct tc_mqprio_qopt *mqprio = NULL;
645 int i, err, size; 718 int i, err, clockid;
719 unsigned long flags;
646 ktime_t start; 720 ktime_t start;
647 721
648 err = nla_parse_nested_deprecated(tb, TCA_TAPRIO_ATTR_MAX, opt, 722 err = nla_parse_nested_deprecated(tb, TCA_TAPRIO_ATTR_MAX, opt,
@@ -657,48 +731,64 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt,
657 if (err < 0) 731 if (err < 0)
658 return err; 732 return err;
659 733
660 /* A schedule with less than one entry is an error */ 734 new_admin = kzalloc(sizeof(*new_admin), GFP_KERNEL);
661 size = parse_taprio_opt(tb, q, extack); 735 if (!new_admin) {
662 if (size < 0) 736 NL_SET_ERR_MSG(extack, "Not enough memory for a new schedule");
663 return size; 737 return -ENOMEM;
738 }
739 INIT_LIST_HEAD(&new_admin->entries);
664 740
665 hrtimer_init(&q->advance_timer, q->clockid, HRTIMER_MODE_ABS); 741 rcu_read_lock();
666 q->advance_timer.function = advance_sched; 742 oper = rcu_dereference(q->oper_sched);
743 admin = rcu_dereference(q->admin_sched);
744 rcu_read_unlock();
667 745
668 switch (q->clockid) { 746 if (mqprio && (oper || admin)) {
669 case CLOCK_REALTIME: 747 NL_SET_ERR_MSG(extack, "Changing the traffic mapping of a running schedule is not supported");
670 q->get_time = ktime_get_real; 748 err = -ENOTSUPP;
671 break; 749 goto free_sched;
672 case CLOCK_MONOTONIC:
673 q->get_time = ktime_get;
674 break;
675 case CLOCK_BOOTTIME:
676 q->get_time = ktime_get_boottime;
677 break;
678 case CLOCK_TAI:
679 q->get_time = ktime_get_clocktai;
680 break;
681 default:
682 return -ENOTSUPP;
683 } 750 }
684 751
685 for (i = 0; i < dev->num_tx_queues; i++) { 752 err = parse_taprio_schedule(tb, new_admin, extack);
686 struct netdev_queue *dev_queue; 753 if (err < 0)
687 struct Qdisc *qdisc; 754 goto free_sched;
688 755
689 dev_queue = netdev_get_tx_queue(dev, i); 756 if (new_admin->num_entries == 0) {
690 qdisc = qdisc_create_dflt(dev_queue, 757 NL_SET_ERR_MSG(extack, "There should be at least one entry in the schedule");
691 &pfifo_qdisc_ops, 758 err = -EINVAL;
692 TC_H_MAKE(TC_H_MAJ(sch->handle), 759 goto free_sched;
693 TC_H_MIN(i + 1)), 760 }
694 extack);
695 if (!qdisc)
696 return -ENOMEM;
697 761
698 if (i < dev->real_num_tx_queues) 762 if (tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID]) {
699 qdisc_hash_add(qdisc, false); 763 clockid = nla_get_s32(tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID]);
700 764
701 q->qdiscs[i] = qdisc; 765 /* We only support static clockids and we don't allow
766 * for it to be modified after the first init.
767 */
768 if (clockid < 0 ||
769 (q->clockid != -1 && q->clockid != clockid)) {
770 NL_SET_ERR_MSG(extack, "Changing the 'clockid' of a running schedule is not supported");
771 err = -ENOTSUPP;
772 goto free_sched;
773 }
774
775 q->clockid = clockid;
776 }
777
778 if (q->clockid == -1 && !tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID]) {
779 NL_SET_ERR_MSG(extack, "Specifying a 'clockid' is mandatory");
780 err = -EINVAL;
781 goto free_sched;
782 }
783
784 taprio_set_picos_per_byte(dev, q);
785
786 /* Protects against enqueue()/dequeue() */
787 spin_lock_bh(qdisc_lock(sch));
788
789 if (!hrtimer_active(&q->advance_timer)) {
790 hrtimer_init(&q->advance_timer, q->clockid, HRTIMER_MODE_ABS);
791 q->advance_timer.function = advance_sched;
702 } 792 }
703 793
704 if (mqprio) { 794 if (mqprio) {
@@ -714,24 +804,60 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt,
714 mqprio->prio_tc_map[i]); 804 mqprio->prio_tc_map[i]);
715 } 805 }
716 806
717 taprio_set_picos_per_byte(dev, q); 807 switch (q->clockid) {
808 case CLOCK_REALTIME:
809 q->get_time = ktime_get_real;
810 break;
811 case CLOCK_MONOTONIC:
812 q->get_time = ktime_get;
813 break;
814 case CLOCK_BOOTTIME:
815 q->get_time = ktime_get_boottime;
816 break;
817 case CLOCK_TAI:
818 q->get_time = ktime_get_clocktai;
819 break;
820 default:
821 NL_SET_ERR_MSG(extack, "Invalid 'clockid'");
822 err = -EINVAL;
823 goto unlock;
824 }
718 825
719 err = taprio_get_start_time(sch, &start); 826 err = taprio_get_start_time(sch, new_admin, &start);
720 if (err < 0) { 827 if (err < 0) {
721 NL_SET_ERR_MSG(extack, "Internal error: failed get start time"); 828 NL_SET_ERR_MSG(extack, "Internal error: failed get start time");
722 return err; 829 goto unlock;
723 } 830 }
724 831
725 taprio_start_sched(sch, start); 832 setup_first_close_time(q, new_admin, start);
726 833
727 return 0; 834 /* Protects against advance_sched() */
835 spin_lock_irqsave(&q->current_entry_lock, flags);
836
837 taprio_start_sched(sch, start, new_admin);
838
839 rcu_assign_pointer(q->admin_sched, new_admin);
840 if (admin)
841 call_rcu(&admin->rcu, taprio_free_sched_cb);
842 new_admin = NULL;
843
844 spin_unlock_irqrestore(&q->current_entry_lock, flags);
845
846 err = 0;
847
848unlock:
849 spin_unlock_bh(qdisc_lock(sch));
850
851free_sched:
852 kfree(new_admin);
853
854 return err;
728} 855}
729 856
730static void taprio_destroy(struct Qdisc *sch) 857static void taprio_destroy(struct Qdisc *sch)
731{ 858{
732 struct taprio_sched *q = qdisc_priv(sch); 859 struct taprio_sched *q = qdisc_priv(sch);
733 struct net_device *dev = qdisc_dev(sch); 860 struct net_device *dev = qdisc_dev(sch);
734 struct sched_entry *entry, *n;
735 unsigned int i; 861 unsigned int i;
736 862
737 spin_lock(&taprio_list_lock); 863 spin_lock(&taprio_list_lock);
@@ -750,10 +876,11 @@ static void taprio_destroy(struct Qdisc *sch)
750 876
751 netdev_set_num_tc(dev, 0); 877 netdev_set_num_tc(dev, 0);
752 878
753 list_for_each_entry_safe(entry, n, &q->entries, list) { 879 if (q->oper_sched)
754 list_del(&entry->list); 880 call_rcu(&q->oper_sched->rcu, taprio_free_sched_cb);
755 kfree(entry); 881
756 } 882 if (q->admin_sched)
883 call_rcu(&q->admin_sched->rcu, taprio_free_sched_cb);
757} 884}
758 885
759static int taprio_init(struct Qdisc *sch, struct nlattr *opt, 886static int taprio_init(struct Qdisc *sch, struct nlattr *opt,
@@ -761,12 +888,12 @@ static int taprio_init(struct Qdisc *sch, struct nlattr *opt,
761{ 888{
762 struct taprio_sched *q = qdisc_priv(sch); 889 struct taprio_sched *q = qdisc_priv(sch);
763 struct net_device *dev = qdisc_dev(sch); 890 struct net_device *dev = qdisc_dev(sch);
891 int i;
764 892
765 INIT_LIST_HEAD(&q->entries);
766 spin_lock_init(&q->current_entry_lock); 893 spin_lock_init(&q->current_entry_lock);
767 894
768 /* We may overwrite the configuration later */
769 hrtimer_init(&q->advance_timer, CLOCK_TAI, HRTIMER_MODE_ABS); 895 hrtimer_init(&q->advance_timer, CLOCK_TAI, HRTIMER_MODE_ABS);
896 q->advance_timer.function = advance_sched;
770 897
771 q->root = sch; 898 q->root = sch;
772 899
@@ -796,6 +923,25 @@ static int taprio_init(struct Qdisc *sch, struct nlattr *opt,
796 list_add(&q->taprio_list, &taprio_list); 923 list_add(&q->taprio_list, &taprio_list);
797 spin_unlock(&taprio_list_lock); 924 spin_unlock(&taprio_list_lock);
798 925
926 for (i = 0; i < dev->num_tx_queues; i++) {
927 struct netdev_queue *dev_queue;
928 struct Qdisc *qdisc;
929
930 dev_queue = netdev_get_tx_queue(dev, i);
931 qdisc = qdisc_create_dflt(dev_queue,
932 &pfifo_qdisc_ops,
933 TC_H_MAKE(TC_H_MAJ(sch->handle),
934 TC_H_MIN(i + 1)),
935 extack);
936 if (!qdisc)
937 return -ENOMEM;
938
939 if (i < dev->real_num_tx_queues)
940 qdisc_hash_add(qdisc, false);
941
942 q->qdiscs[i] = qdisc;
943 }
944
799 return taprio_change(sch, opt, extack); 945 return taprio_change(sch, opt, extack);
800} 946}
801 947
@@ -867,15 +1013,55 @@ nla_put_failure:
867 return -1; 1013 return -1;
868} 1014}
869 1015
1016static int dump_schedule(struct sk_buff *msg,
1017 const struct sched_gate_list *root)
1018{
1019 struct nlattr *entry_list;
1020 struct sched_entry *entry;
1021
1022 if (nla_put_s64(msg, TCA_TAPRIO_ATTR_SCHED_BASE_TIME,
1023 root->base_time, TCA_TAPRIO_PAD))
1024 return -1;
1025
1026 if (nla_put_s64(msg, TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME,
1027 root->cycle_time, TCA_TAPRIO_PAD))
1028 return -1;
1029
1030 if (nla_put_s64(msg, TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME_EXTENSION,
1031 root->cycle_time_extension, TCA_TAPRIO_PAD))
1032 return -1;
1033
1034 entry_list = nla_nest_start_noflag(msg,
1035 TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST);
1036 if (!entry_list)
1037 goto error_nest;
1038
1039 list_for_each_entry(entry, &root->entries, list) {
1040 if (dump_entry(msg, entry) < 0)
1041 goto error_nest;
1042 }
1043
1044 nla_nest_end(msg, entry_list);
1045 return 0;
1046
1047error_nest:
1048 nla_nest_cancel(msg, entry_list);
1049 return -1;
1050}
1051
870static int taprio_dump(struct Qdisc *sch, struct sk_buff *skb) 1052static int taprio_dump(struct Qdisc *sch, struct sk_buff *skb)
871{ 1053{
872 struct taprio_sched *q = qdisc_priv(sch); 1054 struct taprio_sched *q = qdisc_priv(sch);
873 struct net_device *dev = qdisc_dev(sch); 1055 struct net_device *dev = qdisc_dev(sch);
1056 struct sched_gate_list *oper, *admin;
874 struct tc_mqprio_qopt opt = { 0 }; 1057 struct tc_mqprio_qopt opt = { 0 };
875 struct nlattr *nest, *entry_list; 1058 struct nlattr *nest, *sched_nest;
876 struct sched_entry *entry;
877 unsigned int i; 1059 unsigned int i;
878 1060
1061 rcu_read_lock();
1062 oper = rcu_dereference(q->oper_sched);
1063 admin = rcu_dereference(q->admin_sched);
1064
879 opt.num_tc = netdev_get_num_tc(dev); 1065 opt.num_tc = netdev_get_num_tc(dev);
880 memcpy(opt.prio_tc_map, dev->prio_tc_map, sizeof(opt.prio_tc_map)); 1066 memcpy(opt.prio_tc_map, dev->prio_tc_map, sizeof(opt.prio_tc_map));
881 1067
@@ -886,35 +1072,41 @@ static int taprio_dump(struct Qdisc *sch, struct sk_buff *skb)
886 1072
887 nest = nla_nest_start_noflag(skb, TCA_OPTIONS); 1073 nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
888 if (!nest) 1074 if (!nest)
889 return -ENOSPC; 1075 goto start_error;
890 1076
891 if (nla_put(skb, TCA_TAPRIO_ATTR_PRIOMAP, sizeof(opt), &opt)) 1077 if (nla_put(skb, TCA_TAPRIO_ATTR_PRIOMAP, sizeof(opt), &opt))
892 goto options_error; 1078 goto options_error;
893 1079
894 if (nla_put_s64(skb, TCA_TAPRIO_ATTR_SCHED_BASE_TIME,
895 q->base_time, TCA_TAPRIO_PAD))
896 goto options_error;
897
898 if (nla_put_s32(skb, TCA_TAPRIO_ATTR_SCHED_CLOCKID, q->clockid)) 1080 if (nla_put_s32(skb, TCA_TAPRIO_ATTR_SCHED_CLOCKID, q->clockid))
899 goto options_error; 1081 goto options_error;
900 1082
901 entry_list = nla_nest_start_noflag(skb, 1083 if (oper && dump_schedule(skb, oper))
902 TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST);
903 if (!entry_list)
904 goto options_error; 1084 goto options_error;
905 1085
906 list_for_each_entry(entry, &q->entries, list) { 1086 if (!admin)
907 if (dump_entry(skb, entry) < 0) 1087 goto done;
908 goto options_error; 1088
909 } 1089 sched_nest = nla_nest_start_noflag(skb, TCA_TAPRIO_ATTR_ADMIN_SCHED);
1090
1091 if (dump_schedule(skb, admin))
1092 goto admin_error;
910 1093
911 nla_nest_end(skb, entry_list); 1094 nla_nest_end(skb, sched_nest);
1095
1096done:
1097 rcu_read_unlock();
912 1098
913 return nla_nest_end(skb, nest); 1099 return nla_nest_end(skb, nest);
914 1100
1101admin_error:
1102 nla_nest_cancel(skb, sched_nest);
1103
915options_error: 1104options_error:
916 nla_nest_cancel(skb, nest); 1105 nla_nest_cancel(skb, nest);
917 return -1; 1106
1107start_error:
1108 rcu_read_unlock();
1109 return -ENOSPC;
918} 1110}
919 1111
920static struct Qdisc *taprio_leaf(struct Qdisc *sch, unsigned long cl) 1112static struct Qdisc *taprio_leaf(struct Qdisc *sch, unsigned long cl)
@@ -1001,6 +1193,7 @@ static struct Qdisc_ops taprio_qdisc_ops __read_mostly = {
1001 .id = "taprio", 1193 .id = "taprio",
1002 .priv_size = sizeof(struct taprio_sched), 1194 .priv_size = sizeof(struct taprio_sched),
1003 .init = taprio_init, 1195 .init = taprio_init,
1196 .change = taprio_change,
1004 .destroy = taprio_destroy, 1197 .destroy = taprio_destroy,
1005 .peek = taprio_peek, 1198 .peek = taprio_peek,
1006 .dequeue = taprio_dequeue, 1199 .dequeue = taprio_dequeue,