aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAlexander Duyck <alexander.h.duyck@intel.com>2018-07-09 12:19:38 -0400
committerJeff Kirsher <jeffrey.t.kirsher@intel.com>2018-07-09 15:11:23 -0400
commitffcfe25bb50f27395e15fa999f1a7eb769f55360 (patch)
treedeab1921aae18f9e6c558f23d57fb76ac7c8b5d0
parentd7be97756f8a4874ac17003de5843c742dd84153 (diff)
net: Add support for subordinate device traffic classes
This patch is meant to provide the basic tools needed to allow us to create subordinate device traffic classes. The general idea here is to allow subdividing the queues of a device into queue groups accessible through an upper device such as a macvlan. The idea here is to enforce the idea that an upper device has to be a single queue device, ideally with IFF_NO_QUQUE set. With that being the case we can pretty much guarantee that the tc_to_txq mappings and XPS maps for the upper device are unused. As such we could reuse those in order to support subdividing the lower device and distributing those queues between the subordinate devices. In order to distinguish between a regular set of traffic classes and if a device is carrying subordinate traffic classes I changed num_tc from a u8 to a s16 value and use the negative values to represent the subordinate pool values. So starting at -1 and running to -32768 we can encode those as pool values, and the existing values of 0 to 15 can be maintained. Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com> Tested-by: Andrew Bowers <andrewx.bowers@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
-rw-r--r--include/linux/netdevice.h16
-rw-r--r--net/core/dev.c89
-rw-r--r--net/core/net-sysfs.c21
3 files changed, 124 insertions, 2 deletions
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index b683971e500d..b1ff77276bc4 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -575,6 +575,9 @@ struct netdev_queue {
575 * (/sys/class/net/DEV/Q/trans_timeout) 575 * (/sys/class/net/DEV/Q/trans_timeout)
576 */ 576 */
577 unsigned long trans_timeout; 577 unsigned long trans_timeout;
578
579 /* Subordinate device that the queue has been assigned to */
580 struct net_device *sb_dev;
578/* 581/*
579 * write-mostly part 582 * write-mostly part
580 */ 583 */
@@ -1991,7 +1994,7 @@ struct net_device {
1991#ifdef CONFIG_DCB 1994#ifdef CONFIG_DCB
1992 const struct dcbnl_rtnl_ops *dcbnl_ops; 1995 const struct dcbnl_rtnl_ops *dcbnl_ops;
1993#endif 1996#endif
1994 u8 num_tc; 1997 s16 num_tc;
1995 struct netdev_tc_txq tc_to_txq[TC_MAX_QUEUE]; 1998 struct netdev_tc_txq tc_to_txq[TC_MAX_QUEUE];
1996 u8 prio_tc_map[TC_BITMASK + 1]; 1999 u8 prio_tc_map[TC_BITMASK + 1];
1997 2000
@@ -2045,6 +2048,17 @@ int netdev_get_num_tc(struct net_device *dev)
2045 return dev->num_tc; 2048 return dev->num_tc;
2046} 2049}
2047 2050
2051void netdev_unbind_sb_channel(struct net_device *dev,
2052 struct net_device *sb_dev);
2053int netdev_bind_sb_channel_queue(struct net_device *dev,
2054 struct net_device *sb_dev,
2055 u8 tc, u16 count, u16 offset);
2056int netdev_set_sb_channel(struct net_device *dev, u16 channel);
2057static inline int netdev_get_sb_channel(struct net_device *dev)
2058{
2059 return max_t(int, -dev->num_tc, 0);
2060}
2061
2048static inline 2062static inline
2049struct netdev_queue *netdev_get_tx_queue(const struct net_device *dev, 2063struct netdev_queue *netdev_get_tx_queue(const struct net_device *dev,
2050 unsigned int index) 2064 unsigned int index)
diff --git a/net/core/dev.c b/net/core/dev.c
index 89825c1eccdc..cc1d6bba017a 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2067,11 +2067,13 @@ int netdev_txq_to_tc(struct net_device *dev, unsigned int txq)
2067 struct netdev_tc_txq *tc = &dev->tc_to_txq[0]; 2067 struct netdev_tc_txq *tc = &dev->tc_to_txq[0];
2068 int i; 2068 int i;
2069 2069
2070 /* walk through the TCs and see if it falls into any of them */
2070 for (i = 0; i < TC_MAX_QUEUE; i++, tc++) { 2071 for (i = 0; i < TC_MAX_QUEUE; i++, tc++) {
2071 if ((txq - tc->offset) < tc->count) 2072 if ((txq - tc->offset) < tc->count)
2072 return i; 2073 return i;
2073 } 2074 }
2074 2075
2076 /* didn't find it, just return -1 to indicate no match */
2075 return -1; 2077 return -1;
2076 } 2078 }
2077 2079
@@ -2260,7 +2262,14 @@ int __netif_set_xps_queue(struct net_device *dev, const unsigned long *mask,
2260 unsigned int nr_ids; 2262 unsigned int nr_ids;
2261 2263
2262 if (dev->num_tc) { 2264 if (dev->num_tc) {
2265 /* Do not allow XPS on subordinate device directly */
2263 num_tc = dev->num_tc; 2266 num_tc = dev->num_tc;
2267 if (num_tc < 0)
2268 return -EINVAL;
2269
2270 /* If queue belongs to subordinate dev use its map */
2271 dev = netdev_get_tx_queue(dev, index)->sb_dev ? : dev;
2272
2264 tc = netdev_txq_to_tc(dev, index); 2273 tc = netdev_txq_to_tc(dev, index);
2265 if (tc < 0) 2274 if (tc < 0)
2266 return -EINVAL; 2275 return -EINVAL;
@@ -2448,11 +2457,25 @@ int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
2448EXPORT_SYMBOL(netif_set_xps_queue); 2457EXPORT_SYMBOL(netif_set_xps_queue);
2449 2458
2450#endif 2459#endif
2460static void netdev_unbind_all_sb_channels(struct net_device *dev)
2461{
2462 struct netdev_queue *txq = &dev->_tx[dev->num_tx_queues];
2463
2464 /* Unbind any subordinate channels */
2465 while (txq-- != &dev->_tx[0]) {
2466 if (txq->sb_dev)
2467 netdev_unbind_sb_channel(dev, txq->sb_dev);
2468 }
2469}
2470
2451void netdev_reset_tc(struct net_device *dev) 2471void netdev_reset_tc(struct net_device *dev)
2452{ 2472{
2453#ifdef CONFIG_XPS 2473#ifdef CONFIG_XPS
2454 netif_reset_xps_queues_gt(dev, 0); 2474 netif_reset_xps_queues_gt(dev, 0);
2455#endif 2475#endif
2476 netdev_unbind_all_sb_channels(dev);
2477
2478 /* Reset TC configuration of device */
2456 dev->num_tc = 0; 2479 dev->num_tc = 0;
2457 memset(dev->tc_to_txq, 0, sizeof(dev->tc_to_txq)); 2480 memset(dev->tc_to_txq, 0, sizeof(dev->tc_to_txq));
2458 memset(dev->prio_tc_map, 0, sizeof(dev->prio_tc_map)); 2481 memset(dev->prio_tc_map, 0, sizeof(dev->prio_tc_map));
@@ -2481,11 +2504,77 @@ int netdev_set_num_tc(struct net_device *dev, u8 num_tc)
2481#ifdef CONFIG_XPS 2504#ifdef CONFIG_XPS
2482 netif_reset_xps_queues_gt(dev, 0); 2505 netif_reset_xps_queues_gt(dev, 0);
2483#endif 2506#endif
2507 netdev_unbind_all_sb_channels(dev);
2508
2484 dev->num_tc = num_tc; 2509 dev->num_tc = num_tc;
2485 return 0; 2510 return 0;
2486} 2511}
2487EXPORT_SYMBOL(netdev_set_num_tc); 2512EXPORT_SYMBOL(netdev_set_num_tc);
2488 2513
2514void netdev_unbind_sb_channel(struct net_device *dev,
2515 struct net_device *sb_dev)
2516{
2517 struct netdev_queue *txq = &dev->_tx[dev->num_tx_queues];
2518
2519#ifdef CONFIG_XPS
2520 netif_reset_xps_queues_gt(sb_dev, 0);
2521#endif
2522 memset(sb_dev->tc_to_txq, 0, sizeof(sb_dev->tc_to_txq));
2523 memset(sb_dev->prio_tc_map, 0, sizeof(sb_dev->prio_tc_map));
2524
2525 while (txq-- != &dev->_tx[0]) {
2526 if (txq->sb_dev == sb_dev)
2527 txq->sb_dev = NULL;
2528 }
2529}
2530EXPORT_SYMBOL(netdev_unbind_sb_channel);
2531
2532int netdev_bind_sb_channel_queue(struct net_device *dev,
2533 struct net_device *sb_dev,
2534 u8 tc, u16 count, u16 offset)
2535{
2536 /* Make certain the sb_dev and dev are already configured */
2537 if (sb_dev->num_tc >= 0 || tc >= dev->num_tc)
2538 return -EINVAL;
2539
2540 /* We cannot hand out queues we don't have */
2541 if ((offset + count) > dev->real_num_tx_queues)
2542 return -EINVAL;
2543
2544 /* Record the mapping */
2545 sb_dev->tc_to_txq[tc].count = count;
2546 sb_dev->tc_to_txq[tc].offset = offset;
2547
2548 /* Provide a way for Tx queue to find the tc_to_txq map or
2549 * XPS map for itself.
2550 */
2551 while (count--)
2552 netdev_get_tx_queue(dev, count + offset)->sb_dev = sb_dev;
2553
2554 return 0;
2555}
2556EXPORT_SYMBOL(netdev_bind_sb_channel_queue);
2557
2558int netdev_set_sb_channel(struct net_device *dev, u16 channel)
2559{
2560 /* Do not use a multiqueue device to represent a subordinate channel */
2561 if (netif_is_multiqueue(dev))
2562 return -ENODEV;
2563
2564 /* We allow channels 1 - 32767 to be used for subordinate channels.
2565 * Channel 0 is meant to be "native" mode and used only to represent
2566 * the main root device. We allow writing 0 to reset the device back
2567 * to normal mode after being used as a subordinate channel.
2568 */
2569 if (channel > S16_MAX)
2570 return -EINVAL;
2571
2572 dev->num_tc = -channel;
2573
2574 return 0;
2575}
2576EXPORT_SYMBOL(netdev_set_sb_channel);
2577
2489/* 2578/*
2490 * Routine to help set real_num_tx_queues. To avoid skbs mapped to queues 2579 * Routine to help set real_num_tx_queues. To avoid skbs mapped to queues
2491 * greater than real_num_tx_queues stale skbs on the qdisc must be flushed. 2580 * greater than real_num_tx_queues stale skbs on the qdisc must be flushed.
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index dce3ae0fbca2..ffa1d18f2c2c 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -1054,11 +1054,23 @@ static ssize_t traffic_class_show(struct netdev_queue *queue,
1054 return -ENOENT; 1054 return -ENOENT;
1055 1055
1056 index = get_netdev_queue_index(queue); 1056 index = get_netdev_queue_index(queue);
1057
1058 /* If queue belongs to subordinate dev use its TC mapping */
1059 dev = netdev_get_tx_queue(dev, index)->sb_dev ? : dev;
1060
1057 tc = netdev_txq_to_tc(dev, index); 1061 tc = netdev_txq_to_tc(dev, index);
1058 if (tc < 0) 1062 if (tc < 0)
1059 return -EINVAL; 1063 return -EINVAL;
1060 1064
1061 return sprintf(buf, "%u\n", tc); 1065 /* We can report the traffic class one of two ways:
1066 * Subordinate device traffic classes are reported with the traffic
1067 * class first, and then the subordinate class so for example TC0 on
1068 * subordinate device 2 will be reported as "0-2". If the queue
1069 * belongs to the root device it will be reported with just the
1070 * traffic class, so just "0" for TC 0 for example.
1071 */
1072 return dev->num_tc < 0 ? sprintf(buf, "%u%d\n", tc, dev->num_tc) :
1073 sprintf(buf, "%u\n", tc);
1062} 1074}
1063 1075
1064#ifdef CONFIG_XPS 1076#ifdef CONFIG_XPS
@@ -1225,7 +1237,14 @@ static ssize_t xps_cpus_show(struct netdev_queue *queue,
1225 index = get_netdev_queue_index(queue); 1237 index = get_netdev_queue_index(queue);
1226 1238
1227 if (dev->num_tc) { 1239 if (dev->num_tc) {
1240 /* Do not allow XPS on subordinate device directly */
1228 num_tc = dev->num_tc; 1241 num_tc = dev->num_tc;
1242 if (num_tc < 0)
1243 return -EINVAL;
1244
1245 /* If queue belongs to subordinate dev use its map */
1246 dev = netdev_get_tx_queue(dev, index)->sb_dev ? : dev;
1247
1229 tc = netdev_txq_to_tc(dev, index); 1248 tc = netdev_txq_to_tc(dev, index);
1230 if (tc < 0) 1249 if (tc < 0)
1231 return -EINVAL; 1250 return -EINVAL;