aboutsummaryrefslogtreecommitdiffstats
path: root/include/linux/netdevice.h
diff options
context:
space:
mode:
authorJohn Fastabend <john.r.fastabend@intel.com>2011-01-17 03:06:04 -0500
committerDavid S. Miller <davem@davemloft.net>2011-01-20 02:31:10 -0500
commit4f57c087de9b46182545676d2c594120a20f2e58 (patch)
treebb2ed64efcafbf4d8fe2f625b432b554d05fdc47 /include/linux/netdevice.h
parente7ed828f10bd89a28f821ae7f20e691704d61923 (diff)
net: implement mechanism for HW based QOS
This patch provides a mechanism for lower layer devices to steer traffic using skb->priority to tx queues. This allows for hardware based QOS schemes to use the default qdisc without incurring the penalties related to global state and the qdisc lock. While reliably receiving skbs on the correct tx ring to avoid head of line blocking resulting from shuffling in the LLD. Finally, all the goodness from txq caching and xps/rps can still be leveraged. Many drivers and hardware exist with the ability to implement QOS schemes in the hardware but currently these drivers tend to rely on firmware to reroute specific traffic, a driver specific select_queue or the queue_mapping action in the qdisc. By using select_queue for this drivers need to be updated for each and every traffic type and we lose the goodness of much of the upstream work. Firmware solutions are inherently inflexible. And finally if admins are expected to build a qdisc and filter rules to steer traffic this requires knowledge of how the hardware is currently configured. The number of tx queues and the queue offsets may change depending on resources. Also this approach incurs all the overhead of a qdisc with filters. With the mechanism in this patch users can set skb priority using expected methods ie setsockopt() or the stack can set the priority directly. Then the skb will be steered to the correct tx queues aligned with hardware QOS traffic classes. In the normal case with single traffic class and all queues in this class everything works as is until the LLD enables multiple tcs. To steer the skb we mask out the lower 4 bits of the priority and allow the hardware to configure upto 15 distinct classes of traffic. This is expected to be sufficient for most applications at any rate it is more then the 8021Q spec designates and is equal to the number of prio bands currently implemented in the default qdisc. This in conjunction with a userspace application such as lldpad can be used to implement 8021Q transmission selection algorithms one of these algorithms being the extended transmission selection algorithm currently being used for DCB. Signed-off-by: John Fastabend <john.r.fastabend@intel.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'include/linux/netdevice.h')
-rw-r--r--include/linux/netdevice.h68
1 files changed, 68 insertions, 0 deletions
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 68a4627b74f5..371fa8839d51 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -646,6 +646,14 @@ struct xps_dev_maps {
646 (nr_cpu_ids * sizeof(struct xps_map *))) 646 (nr_cpu_ids * sizeof(struct xps_map *)))
647#endif /* CONFIG_XPS */ 647#endif /* CONFIG_XPS */
648 648
649#define TC_MAX_QUEUE 16
650#define TC_BITMASK 15
651/* HW offloaded queuing disciplines txq count and offset maps */
652struct netdev_tc_txq {
653 u16 count;
654 u16 offset;
655};
656
649/* 657/*
650 * This structure defines the management hooks for network devices. 658 * This structure defines the management hooks for network devices.
651 * The following hooks can be defined; unless noted otherwise, they are 659 * The following hooks can be defined; unless noted otherwise, they are
@@ -756,6 +764,11 @@ struct xps_dev_maps {
756 * int (*ndo_set_vf_port)(struct net_device *dev, int vf, 764 * int (*ndo_set_vf_port)(struct net_device *dev, int vf,
757 * struct nlattr *port[]); 765 * struct nlattr *port[]);
758 * int (*ndo_get_vf_port)(struct net_device *dev, int vf, struct sk_buff *skb); 766 * int (*ndo_get_vf_port)(struct net_device *dev, int vf, struct sk_buff *skb);
767 * int (*ndo_setup_tc)(struct net_device *dev, u8 tc)
768 * Called to setup 'tc' number of traffic classes in the net device. This
769 * is always called from the stack with the rtnl lock held and netif tx
770 * queues stopped. This allows the netdevice to perform queue management
771 * safely.
759 */ 772 */
760#define HAVE_NET_DEVICE_OPS 773#define HAVE_NET_DEVICE_OPS
761struct net_device_ops { 774struct net_device_ops {
@@ -814,6 +827,7 @@ struct net_device_ops {
814 struct nlattr *port[]); 827 struct nlattr *port[]);
815 int (*ndo_get_vf_port)(struct net_device *dev, 828 int (*ndo_get_vf_port)(struct net_device *dev,
816 int vf, struct sk_buff *skb); 829 int vf, struct sk_buff *skb);
830 int (*ndo_setup_tc)(struct net_device *dev, u8 tc);
817#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE) 831#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE)
818 int (*ndo_fcoe_enable)(struct net_device *dev); 832 int (*ndo_fcoe_enable)(struct net_device *dev);
819 int (*ndo_fcoe_disable)(struct net_device *dev); 833 int (*ndo_fcoe_disable)(struct net_device *dev);
@@ -1146,6 +1160,9 @@ struct net_device {
1146 /* Data Center Bridging netlink ops */ 1160 /* Data Center Bridging netlink ops */
1147 const struct dcbnl_rtnl_ops *dcbnl_ops; 1161 const struct dcbnl_rtnl_ops *dcbnl_ops;
1148#endif 1162#endif
1163 u8 num_tc;
1164 struct netdev_tc_txq tc_to_txq[TC_MAX_QUEUE];
1165 u8 prio_tc_map[TC_BITMASK + 1];
1149 1166
1150#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE) 1167#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE)
1151 /* max exchange id for FCoE LRO by ddp */ 1168 /* max exchange id for FCoE LRO by ddp */
@@ -1165,6 +1182,57 @@ struct net_device {
1165#define NETDEV_ALIGN 32 1182#define NETDEV_ALIGN 32
1166 1183
1167static inline 1184static inline
1185int netdev_get_prio_tc_map(const struct net_device *dev, u32 prio)
1186{
1187 return dev->prio_tc_map[prio & TC_BITMASK];
1188}
1189
1190static inline
1191int netdev_set_prio_tc_map(struct net_device *dev, u8 prio, u8 tc)
1192{
1193 if (tc >= dev->num_tc)
1194 return -EINVAL;
1195
1196 dev->prio_tc_map[prio & TC_BITMASK] = tc & TC_BITMASK;
1197 return 0;
1198}
1199
1200static inline
1201void netdev_reset_tc(struct net_device *dev)
1202{
1203 dev->num_tc = 0;
1204 memset(dev->tc_to_txq, 0, sizeof(dev->tc_to_txq));
1205 memset(dev->prio_tc_map, 0, sizeof(dev->prio_tc_map));
1206}
1207
1208static inline
1209int netdev_set_tc_queue(struct net_device *dev, u8 tc, u16 count, u16 offset)
1210{
1211 if (tc >= dev->num_tc)
1212 return -EINVAL;
1213
1214 dev->tc_to_txq[tc].count = count;
1215 dev->tc_to_txq[tc].offset = offset;
1216 return 0;
1217}
1218
1219static inline
1220int netdev_set_num_tc(struct net_device *dev, u8 num_tc)
1221{
1222 if (num_tc > TC_MAX_QUEUE)
1223 return -EINVAL;
1224
1225 dev->num_tc = num_tc;
1226 return 0;
1227}
1228
1229static inline
1230int netdev_get_num_tc(struct net_device *dev)
1231{
1232 return dev->num_tc;
1233}
1234
1235static inline
1168struct netdev_queue *netdev_get_tx_queue(const struct net_device *dev, 1236struct netdev_queue *netdev_get_tx_queue(const struct net_device *dev,
1169 unsigned int index) 1237 unsigned int index)
1170{ 1238{