diff options
author | David S. Miller <davem@davemloft.net> | 2008-07-19 01:50:15 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2008-07-19 01:50:15 -0400 |
commit | 3072367300aa8c779e3a14ee8e89de079e90f3ad (patch) | |
tree | 7f74c5b8fdb300532fbbc83ba00d6d1d17af020e | |
parent | 72b25a913ed9b1ab49c7022adaf3f271a65ea219 (diff) |
pkt_sched: Manage qdisc list inside of root qdisc.
Idea is from Patrick McHardy.
Instead of managing the list of qdiscs on the device level, manage it
in the root qdisc of a netdev_queue. This solves all kinds of
visibility issues during qdisc destruction.
The way to iterate over all qdiscs of a netdev_queue is to visit
the netdev_queue->qdisc, and then traverse it's list.
The only special case is to ignore builting qdiscs at the root when
dumping or doing a qdisc_lookup(). That was not needed previously
because builtin qdiscs were not added to the device's qdisc_list.
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | include/linux/netdevice.h | 2 | ||||
-rw-r--r-- | net/core/dev.c | 2 | ||||
-rw-r--r-- | net/sched/sch_api.c | 175 | ||||
-rw-r--r-- | net/sched/sch_generic.c | 10 |
4 files changed, 133 insertions, 56 deletions
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 9c5a68850114..812bcd8b4363 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h | |||
@@ -636,8 +636,6 @@ struct net_device | |||
636 | unsigned int real_num_tx_queues; | 636 | unsigned int real_num_tx_queues; |
637 | 637 | ||
638 | unsigned long tx_queue_len; /* Max frames per queue allowed */ | 638 | unsigned long tx_queue_len; /* Max frames per queue allowed */ |
639 | spinlock_t qdisc_list_lock; | ||
640 | struct list_head qdisc_list; | ||
641 | 639 | ||
642 | /* | 640 | /* |
643 | * One part is mostly used on xmit path (device) | 641 | * One part is mostly used on xmit path (device) |
diff --git a/net/core/dev.c b/net/core/dev.c index e54acde839da..065b9817e209 100644 --- a/net/core/dev.c +++ b/net/core/dev.c | |||
@@ -3888,8 +3888,6 @@ int register_netdevice(struct net_device *dev) | |||
3888 | net = dev_net(dev); | 3888 | net = dev_net(dev); |
3889 | 3889 | ||
3890 | spin_lock_init(&dev->addr_list_lock); | 3890 | spin_lock_init(&dev->addr_list_lock); |
3891 | spin_lock_init(&dev->qdisc_list_lock); | ||
3892 | INIT_LIST_HEAD(&dev->qdisc_list); | ||
3893 | netdev_init_queue_locks(dev); | 3891 | netdev_init_queue_locks(dev); |
3894 | 3892 | ||
3895 | dev->iflink = -1; | 3893 | dev->iflink = -1; |
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index b3ef8307204e..fb43731c9860 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c | |||
@@ -185,11 +185,20 @@ EXPORT_SYMBOL(unregister_qdisc); | |||
185 | 185 | ||
186 | struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle) | 186 | struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle) |
187 | { | 187 | { |
188 | struct Qdisc *q; | 188 | unsigned int i; |
189 | |||
190 | for (i = 0; i < dev->num_tx_queues; i++) { | ||
191 | struct netdev_queue *txq = netdev_get_tx_queue(dev, i); | ||
192 | struct Qdisc *q, *txq_root = txq->qdisc; | ||
189 | 193 | ||
190 | list_for_each_entry(q, &dev->qdisc_list, list) { | 194 | if (!(txq_root->flags & TCQ_F_BUILTIN) && |
191 | if (q->handle == handle) | 195 | txq_root->handle == handle) |
192 | return q; | 196 | return txq_root; |
197 | |||
198 | list_for_each_entry(q, &txq_root->list, list) { | ||
199 | if (q->handle == handle) | ||
200 | return q; | ||
201 | } | ||
193 | } | 202 | } |
194 | return NULL; | 203 | return NULL; |
195 | } | 204 | } |
@@ -676,9 +685,8 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue, | |||
676 | goto err_out3; | 685 | goto err_out3; |
677 | } | 686 | } |
678 | } | 687 | } |
679 | spin_lock_bh(&dev->qdisc_list_lock); | 688 | if (parent) |
680 | list_add_tail(&sch->list, &dev->qdisc_list); | 689 | list_add_tail(&sch->list, &dev_queue->qdisc->list); |
681 | spin_unlock_bh(&dev->qdisc_list_lock); | ||
682 | 690 | ||
683 | return sch; | 691 | return sch; |
684 | } | 692 | } |
@@ -1037,13 +1045,57 @@ err_out: | |||
1037 | return -EINVAL; | 1045 | return -EINVAL; |
1038 | } | 1046 | } |
1039 | 1047 | ||
1048 | static bool tc_qdisc_dump_ignore(struct Qdisc *q) | ||
1049 | { | ||
1050 | return (q->flags & TCQ_F_BUILTIN) ? true : false; | ||
1051 | } | ||
1052 | |||
1053 | static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb, | ||
1054 | struct netlink_callback *cb, | ||
1055 | int *q_idx_p, int s_q_idx) | ||
1056 | { | ||
1057 | int ret = 0, q_idx = *q_idx_p; | ||
1058 | struct Qdisc *q; | ||
1059 | |||
1060 | if (!root) | ||
1061 | return 0; | ||
1062 | |||
1063 | q = root; | ||
1064 | if (q_idx < s_q_idx) { | ||
1065 | q_idx++; | ||
1066 | } else { | ||
1067 | if (!tc_qdisc_dump_ignore(q) && | ||
1068 | tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).pid, | ||
1069 | cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0) | ||
1070 | goto done; | ||
1071 | q_idx++; | ||
1072 | } | ||
1073 | list_for_each_entry(q, &root->list, list) { | ||
1074 | if (q_idx < s_q_idx) { | ||
1075 | q_idx++; | ||
1076 | continue; | ||
1077 | } | ||
1078 | if (!tc_qdisc_dump_ignore(q) && | ||
1079 | tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).pid, | ||
1080 | cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0) | ||
1081 | goto done; | ||
1082 | q_idx++; | ||
1083 | } | ||
1084 | |||
1085 | out: | ||
1086 | *q_idx_p = q_idx; | ||
1087 | return ret; | ||
1088 | done: | ||
1089 | ret = -1; | ||
1090 | goto out; | ||
1091 | } | ||
1092 | |||
1040 | static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb) | 1093 | static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb) |
1041 | { | 1094 | { |
1042 | struct net *net = sock_net(skb->sk); | 1095 | struct net *net = sock_net(skb->sk); |
1043 | int idx, q_idx; | 1096 | int idx, q_idx; |
1044 | int s_idx, s_q_idx; | 1097 | int s_idx, s_q_idx; |
1045 | struct net_device *dev; | 1098 | struct net_device *dev; |
1046 | struct Qdisc *q; | ||
1047 | 1099 | ||
1048 | if (net != &init_net) | 1100 | if (net != &init_net) |
1049 | return 0; | 1101 | return 0; |
@@ -1053,21 +1105,22 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb) | |||
1053 | read_lock(&dev_base_lock); | 1105 | read_lock(&dev_base_lock); |
1054 | idx = 0; | 1106 | idx = 0; |
1055 | for_each_netdev(&init_net, dev) { | 1107 | for_each_netdev(&init_net, dev) { |
1108 | struct netdev_queue *dev_queue; | ||
1109 | |||
1056 | if (idx < s_idx) | 1110 | if (idx < s_idx) |
1057 | goto cont; | 1111 | goto cont; |
1058 | if (idx > s_idx) | 1112 | if (idx > s_idx) |
1059 | s_q_idx = 0; | 1113 | s_q_idx = 0; |
1060 | q_idx = 0; | 1114 | q_idx = 0; |
1061 | list_for_each_entry(q, &dev->qdisc_list, list) { | 1115 | |
1062 | if (q_idx < s_q_idx) { | 1116 | dev_queue = netdev_get_tx_queue(dev, 0); |
1063 | q_idx++; | 1117 | if (tc_dump_qdisc_root(dev_queue->qdisc, skb, cb, &q_idx, s_q_idx) < 0) |
1064 | continue; | 1118 | goto done; |
1065 | } | 1119 | |
1066 | if (tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).pid, | 1120 | dev_queue = &dev->rx_queue; |
1067 | cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0) | 1121 | if (tc_dump_qdisc_root(dev_queue->qdisc, skb, cb, &q_idx, s_q_idx) < 0) |
1068 | goto done; | 1122 | goto done; |
1069 | q_idx++; | 1123 | |
1070 | } | ||
1071 | cont: | 1124 | cont: |
1072 | idx++; | 1125 | idx++; |
1073 | } | 1126 | } |
@@ -1285,15 +1338,62 @@ static int qdisc_class_dump(struct Qdisc *q, unsigned long cl, struct qdisc_walk | |||
1285 | a->cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWTCLASS); | 1338 | a->cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWTCLASS); |
1286 | } | 1339 | } |
1287 | 1340 | ||
1341 | static int tc_dump_tclass_qdisc(struct Qdisc *q, struct sk_buff *skb, | ||
1342 | struct tcmsg *tcm, struct netlink_callback *cb, | ||
1343 | int *t_p, int s_t) | ||
1344 | { | ||
1345 | struct qdisc_dump_args arg; | ||
1346 | |||
1347 | if (tc_qdisc_dump_ignore(q) || | ||
1348 | *t_p < s_t || !q->ops->cl_ops || | ||
1349 | (tcm->tcm_parent && | ||
1350 | TC_H_MAJ(tcm->tcm_parent) != q->handle)) { | ||
1351 | (*t_p)++; | ||
1352 | return 0; | ||
1353 | } | ||
1354 | if (*t_p > s_t) | ||
1355 | memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0])); | ||
1356 | arg.w.fn = qdisc_class_dump; | ||
1357 | arg.skb = skb; | ||
1358 | arg.cb = cb; | ||
1359 | arg.w.stop = 0; | ||
1360 | arg.w.skip = cb->args[1]; | ||
1361 | arg.w.count = 0; | ||
1362 | q->ops->cl_ops->walk(q, &arg.w); | ||
1363 | cb->args[1] = arg.w.count; | ||
1364 | if (arg.w.stop) | ||
1365 | return -1; | ||
1366 | (*t_p)++; | ||
1367 | return 0; | ||
1368 | } | ||
1369 | |||
1370 | static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb, | ||
1371 | struct tcmsg *tcm, struct netlink_callback *cb, | ||
1372 | int *t_p, int s_t) | ||
1373 | { | ||
1374 | struct Qdisc *q; | ||
1375 | |||
1376 | if (!root) | ||
1377 | return 0; | ||
1378 | |||
1379 | if (tc_dump_tclass_qdisc(root, skb, tcm, cb, t_p, s_t) < 0) | ||
1380 | return -1; | ||
1381 | |||
1382 | list_for_each_entry(q, &root->list, list) { | ||
1383 | if (tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0) | ||
1384 | return -1; | ||
1385 | } | ||
1386 | |||
1387 | return 0; | ||
1388 | } | ||
1389 | |||
1288 | static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb) | 1390 | static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb) |
1289 | { | 1391 | { |
1392 | struct tcmsg *tcm = (struct tcmsg*)NLMSG_DATA(cb->nlh); | ||
1290 | struct net *net = sock_net(skb->sk); | 1393 | struct net *net = sock_net(skb->sk); |
1291 | int t; | 1394 | struct netdev_queue *dev_queue; |
1292 | int s_t; | ||
1293 | struct net_device *dev; | 1395 | struct net_device *dev; |
1294 | struct Qdisc *q; | 1396 | int t, s_t; |
1295 | struct tcmsg *tcm = (struct tcmsg*)NLMSG_DATA(cb->nlh); | ||
1296 | struct qdisc_dump_args arg; | ||
1297 | 1397 | ||
1298 | if (net != &init_net) | 1398 | if (net != &init_net) |
1299 | return 0; | 1399 | return 0; |
@@ -1306,28 +1406,15 @@ static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb) | |||
1306 | s_t = cb->args[0]; | 1406 | s_t = cb->args[0]; |
1307 | t = 0; | 1407 | t = 0; |
1308 | 1408 | ||
1309 | list_for_each_entry(q, &dev->qdisc_list, list) { | 1409 | dev_queue = netdev_get_tx_queue(dev, 0); |
1310 | if (t < s_t || !q->ops->cl_ops || | 1410 | if (tc_dump_tclass_root(dev_queue->qdisc, skb, tcm, cb, &t, s_t) < 0) |
1311 | (tcm->tcm_parent && | 1411 | goto done; |
1312 | TC_H_MAJ(tcm->tcm_parent) != q->handle)) { | 1412 | |
1313 | t++; | 1413 | dev_queue = &dev->rx_queue; |
1314 | continue; | 1414 | if (tc_dump_tclass_root(dev_queue->qdisc, skb, tcm, cb, &t, s_t) < 0) |
1315 | } | 1415 | goto done; |
1316 | if (t > s_t) | ||
1317 | memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0])); | ||
1318 | arg.w.fn = qdisc_class_dump; | ||
1319 | arg.skb = skb; | ||
1320 | arg.cb = cb; | ||
1321 | arg.w.stop = 0; | ||
1322 | arg.w.skip = cb->args[1]; | ||
1323 | arg.w.count = 0; | ||
1324 | q->ops->cl_ops->walk(q, &arg.w); | ||
1325 | cb->args[1] = arg.w.count; | ||
1326 | if (arg.w.stop) | ||
1327 | break; | ||
1328 | t++; | ||
1329 | } | ||
1330 | 1416 | ||
1417 | done: | ||
1331 | cb->args[0] = t; | 1418 | cb->args[0] = t; |
1332 | 1419 | ||
1333 | dev_put(dev); | 1420 | dev_put(dev); |
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index e244c462e6bd..14cc443d0490 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c | |||
@@ -480,15 +480,12 @@ static void __qdisc_destroy(struct rcu_head *head) | |||
480 | 480 | ||
481 | void qdisc_destroy(struct Qdisc *qdisc) | 481 | void qdisc_destroy(struct Qdisc *qdisc) |
482 | { | 482 | { |
483 | struct net_device *dev = qdisc_dev(qdisc); | ||
484 | |||
485 | if (qdisc->flags & TCQ_F_BUILTIN || | 483 | if (qdisc->flags & TCQ_F_BUILTIN || |
486 | !atomic_dec_and_test(&qdisc->refcnt)) | 484 | !atomic_dec_and_test(&qdisc->refcnt)) |
487 | return; | 485 | return; |
488 | 486 | ||
489 | spin_lock_bh(&dev->qdisc_list_lock); | 487 | if (qdisc->parent) |
490 | list_del(&qdisc->list); | 488 | list_del(&qdisc->list); |
491 | spin_unlock_bh(&dev->qdisc_list_lock); | ||
492 | 489 | ||
493 | call_rcu(&qdisc->q_rcu, __qdisc_destroy); | 490 | call_rcu(&qdisc->q_rcu, __qdisc_destroy); |
494 | } | 491 | } |
@@ -520,9 +517,6 @@ static void attach_one_default_qdisc(struct net_device *dev, | |||
520 | printk(KERN_INFO "%s: activation failed\n", dev->name); | 517 | printk(KERN_INFO "%s: activation failed\n", dev->name); |
521 | return; | 518 | return; |
522 | } | 519 | } |
523 | spin_lock_bh(&dev->qdisc_list_lock); | ||
524 | list_add_tail(&qdisc->list, &dev->qdisc_list); | ||
525 | spin_unlock_bh(&dev->qdisc_list_lock); | ||
526 | } else { | 520 | } else { |
527 | qdisc = &noqueue_qdisc; | 521 | qdisc = &noqueue_qdisc; |
528 | } | 522 | } |