aboutsummaryrefslogtreecommitdiffstats
path: root/net/core/dev.c
diff options
context:
space:
mode:
authorJohn Fastabend <john.r.fastabend@intel.com>2011-01-17 03:06:04 -0500
committerDavid S. Miller <davem@davemloft.net>2011-01-20 02:31:10 -0500
commit4f57c087de9b46182545676d2c594120a20f2e58 (patch)
treebb2ed64efcafbf4d8fe2f625b432b554d05fdc47 /net/core/dev.c
parente7ed828f10bd89a28f821ae7f20e691704d61923 (diff)
net: implement mechanism for HW based QOS
This patch provides a mechanism for lower layer devices to steer traffic using skb->priority to tx queues. This allows for hardware based QOS schemes to use the default qdisc without incurring the penalties related to global state and the qdisc lock. While reliably receiving skbs on the correct tx ring to avoid head of line blocking resulting from shuffling in the LLD. Finally, all the goodness from txq caching and xps/rps can still be leveraged. Many drivers and hardware exist with the ability to implement QOS schemes in the hardware but currently these drivers tend to rely on firmware to reroute specific traffic, a driver specific select_queue or the queue_mapping action in the qdisc. By using select_queue for this drivers need to be updated for each and every traffic type and we lose the goodness of much of the upstream work. Firmware solutions are inherently inflexible. And finally if admins are expected to build a qdisc and filter rules to steer traffic this requires knowledge of how the hardware is currently configured. The number of tx queues and the queue offsets may change depending on resources. Also this approach incurs all the overhead of a qdisc with filters. With the mechanism in this patch users can set skb priority using expected methods ie setsockopt() or the stack can set the priority directly. Then the skb will be steered to the correct tx queues aligned with hardware QOS traffic classes. In the normal case with single traffic class and all queues in this class everything works as is until the LLD enables multiple tcs. To steer the skb we mask out the lower 4 bits of the priority and allow the hardware to configure upto 15 distinct classes of traffic. This is expected to be sufficient for most applications at any rate it is more then the 8021Q spec designates and is equal to the number of prio bands currently implemented in the default qdisc. This in conjunction with a userspace application such as lldpad can be used to implement 8021Q transmission selection algorithms one of these algorithms being the extended transmission selection algorithm currently being used for DCB. Signed-off-by: John Fastabend <john.r.fastabend@intel.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/core/dev.c')
-rw-r--r--net/core/dev.c55
1 files changed, 54 insertions, 1 deletions
diff --git a/net/core/dev.c b/net/core/dev.c
index 2b85d4ae981f..8b1d886ed23b 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1593,6 +1593,48 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
1593 rcu_read_unlock(); 1593 rcu_read_unlock();
1594} 1594}
1595 1595
1596/* netif_setup_tc - Handle tc mappings on real_num_tx_queues change
1597 * @dev: Network device
1598 * @txq: number of queues available
1599 *
1600 * If real_num_tx_queues is changed the tc mappings may no longer be
1601 * valid. To resolve this verify the tc mapping remains valid and if
1602 * not NULL the mapping. With no priorities mapping to this
1603 * offset/count pair it will no longer be used. In the worst case TC0
1604 * is invalid nothing can be done so disable priority mappings. If is
1605 * expected that drivers will fix this mapping if they can before
1606 * calling netif_set_real_num_tx_queues.
1607 */
1608void netif_setup_tc(struct net_device *dev, unsigned int txq)
1609{
1610 int i;
1611 struct netdev_tc_txq *tc = &dev->tc_to_txq[0];
1612
1613 /* If TC0 is invalidated disable TC mapping */
1614 if (tc->offset + tc->count > txq) {
1615 pr_warning("Number of in use tx queues changed "
1616 "invalidating tc mappings. Priority "
1617 "traffic classification disabled!\n");
1618 dev->num_tc = 0;
1619 return;
1620 }
1621
1622 /* Invalidated prio to tc mappings set to TC0 */
1623 for (i = 1; i < TC_BITMASK + 1; i++) {
1624 int q = netdev_get_prio_tc_map(dev, i);
1625
1626 tc = &dev->tc_to_txq[q];
1627 if (tc->offset + tc->count > txq) {
1628 pr_warning("Number of in use tx queues "
1629 "changed. Priority %i to tc "
1630 "mapping %i is no longer valid "
1631 "setting map to 0\n",
1632 i, q);
1633 netdev_set_prio_tc_map(dev, i, 0);
1634 }
1635 }
1636}
1637
1596/* 1638/*
1597 * Routine to help set real_num_tx_queues. To avoid skbs mapped to queues 1639 * Routine to help set real_num_tx_queues. To avoid skbs mapped to queues
1598 * greater then real_num_tx_queues stale skbs on the qdisc must be flushed. 1640 * greater then real_num_tx_queues stale skbs on the qdisc must be flushed.
@@ -1612,6 +1654,9 @@ int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq)
1612 if (rc) 1654 if (rc)
1613 return rc; 1655 return rc;
1614 1656
1657 if (dev->num_tc)
1658 netif_setup_tc(dev, txq);
1659
1615 if (txq < dev->real_num_tx_queues) 1660 if (txq < dev->real_num_tx_queues)
1616 qdisc_reset_all_tx_gt(dev, txq); 1661 qdisc_reset_all_tx_gt(dev, txq);
1617 } 1662 }
@@ -2161,6 +2206,8 @@ u16 __skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb,
2161 unsigned int num_tx_queues) 2206 unsigned int num_tx_queues)
2162{ 2207{
2163 u32 hash; 2208 u32 hash;
2209 u16 qoffset = 0;
2210 u16 qcount = num_tx_queues;
2164 2211
2165 if (skb_rx_queue_recorded(skb)) { 2212 if (skb_rx_queue_recorded(skb)) {
2166 hash = skb_get_rx_queue(skb); 2213 hash = skb_get_rx_queue(skb);
@@ -2169,13 +2216,19 @@ u16 __skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb,
2169 return hash; 2216 return hash;
2170 } 2217 }
2171 2218
2219 if (dev->num_tc) {
2220 u8 tc = netdev_get_prio_tc_map(dev, skb->priority);
2221 qoffset = dev->tc_to_txq[tc].offset;
2222 qcount = dev->tc_to_txq[tc].count;
2223 }
2224
2172 if (skb->sk && skb->sk->sk_hash) 2225 if (skb->sk && skb->sk->sk_hash)
2173 hash = skb->sk->sk_hash; 2226 hash = skb->sk->sk_hash;
2174 else 2227 else
2175 hash = (__force u16) skb->protocol ^ skb->rxhash; 2228 hash = (__force u16) skb->protocol ^ skb->rxhash;
2176 hash = jhash_1word(hash, hashrnd); 2229 hash = jhash_1word(hash, hashrnd);
2177 2230
2178 return (u16) (((u64) hash * num_tx_queues) >> 32); 2231 return (u16) (((u64) hash * qcount) >> 32) + qoffset;
2179} 2232}
2180EXPORT_SYMBOL(__skb_tx_hash); 2233EXPORT_SYMBOL(__skb_tx_hash);
2181 2234