aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEric Dumazet <dada1@cosmosbay.com>2009-05-17 23:55:16 -0400
committerDavid S. Miller <davem@davemloft.net>2009-05-17 23:55:16 -0400
commit9d21493b4beb8f918ba248032fefa393074a5e2b (patch)
tree653590f3e325da5c4c1fc7d2c00bc196a3167f9d
parent0a305720ee597aad41af61e6b6844321d3e24251 (diff)
net: tx scalability works : trans_start
struct net_device trans_start field is a hot spot on SMP and high performance devices, particularly multi queues ones, because every transmitter dirties it. Is main use is tx watchdog and bonding alive checks. But as most devices dont use NETIF_F_LLTX, we have to lock a netdev_queue before calling their ndo_start_xmit(). So it makes sense to move trans_start from net_device to netdev_queue. Its update will occur on a already present (and in exclusive state) cache line, for free. We can do this transition smoothly. An old driver continue to update dev->trans_start, while an updated one updates txq->trans_start. Further patches could also put tx_bytes/tx_packets counters in netdev_queue to avoid dirtying dev->stats (vlan device comes to mind) Signed-off-by: Eric Dumazet <dada1@cosmosbay.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--drivers/net/bonding/bond_main.c8
-rw-r--r--include/linux/netdevice.h11
-rw-r--r--net/sched/sch_generic.c40
3 files changed, 46 insertions, 13 deletions
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 815191dd03c3..30b9ea6d62b0 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -2795,7 +2795,7 @@ void bond_loadbalance_arp_mon(struct work_struct *work)
2795 */ 2795 */
2796 bond_for_each_slave(bond, slave, i) { 2796 bond_for_each_slave(bond, slave, i) {
2797 if (slave->link != BOND_LINK_UP) { 2797 if (slave->link != BOND_LINK_UP) {
2798 if (time_before_eq(jiffies, slave->dev->trans_start + delta_in_ticks) && 2798 if (time_before_eq(jiffies, dev_trans_start(slave->dev) + delta_in_ticks) &&
2799 time_before_eq(jiffies, slave->dev->last_rx + delta_in_ticks)) { 2799 time_before_eq(jiffies, slave->dev->last_rx + delta_in_ticks)) {
2800 2800
2801 slave->link = BOND_LINK_UP; 2801 slave->link = BOND_LINK_UP;
@@ -2827,7 +2827,7 @@ void bond_loadbalance_arp_mon(struct work_struct *work)
2827 * when the source ip is 0, so don't take the link down 2827 * when the source ip is 0, so don't take the link down
2828 * if we don't know our ip yet 2828 * if we don't know our ip yet
2829 */ 2829 */
2830 if (time_after_eq(jiffies, slave->dev->trans_start + 2*delta_in_ticks) || 2830 if (time_after_eq(jiffies, dev_trans_start(slave->dev) + 2*delta_in_ticks) ||
2831 (time_after_eq(jiffies, slave->dev->last_rx + 2*delta_in_ticks))) { 2831 (time_after_eq(jiffies, slave->dev->last_rx + 2*delta_in_ticks))) {
2832 2832
2833 slave->link = BOND_LINK_DOWN; 2833 slave->link = BOND_LINK_DOWN;
@@ -2938,7 +2938,7 @@ static int bond_ab_arp_inspect(struct bonding *bond, int delta_in_ticks)
2938 * the bond has an IP address) 2938 * the bond has an IP address)
2939 */ 2939 */
2940 if ((slave->state == BOND_STATE_ACTIVE) && 2940 if ((slave->state == BOND_STATE_ACTIVE) &&
2941 (time_after_eq(jiffies, slave->dev->trans_start + 2941 (time_after_eq(jiffies, dev_trans_start(slave->dev) +
2942 2 * delta_in_ticks) || 2942 2 * delta_in_ticks) ||
2943 (time_after_eq(jiffies, slave_last_rx(bond, slave) 2943 (time_after_eq(jiffies, slave_last_rx(bond, slave)
2944 + 2 * delta_in_ticks)))) { 2944 + 2 * delta_in_ticks)))) {
@@ -2982,7 +2982,7 @@ static void bond_ab_arp_commit(struct bonding *bond, int delta_in_ticks)
2982 write_lock_bh(&bond->curr_slave_lock); 2982 write_lock_bh(&bond->curr_slave_lock);
2983 2983
2984 if (!bond->curr_active_slave && 2984 if (!bond->curr_active_slave &&
2985 time_before_eq(jiffies, slave->dev->trans_start + 2985 time_before_eq(jiffies, dev_trans_start(slave->dev) +
2986 delta_in_ticks)) { 2986 delta_in_ticks)) {
2987 slave->link = BOND_LINK_UP; 2987 slave->link = BOND_LINK_UP;
2988 bond_change_active_slave(bond, slave); 2988 bond_change_active_slave(bond, slave);
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 2af89b662cad..cd547d04a8ce 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -470,6 +470,10 @@ struct netdev_queue {
470 */ 470 */
471 spinlock_t _xmit_lock ____cacheline_aligned_in_smp; 471 spinlock_t _xmit_lock ____cacheline_aligned_in_smp;
472 int xmit_lock_owner; 472 int xmit_lock_owner;
473 /*
474 * please use this field instead of dev->trans_start
475 */
476 unsigned long trans_start;
473} ____cacheline_aligned_in_smp; 477} ____cacheline_aligned_in_smp;
474 478
475 479
@@ -819,6 +823,11 @@ struct net_device
819 * One part is mostly used on xmit path (device) 823 * One part is mostly used on xmit path (device)
820 */ 824 */
821 /* These may be needed for future network-power-down code. */ 825 /* These may be needed for future network-power-down code. */
826
827 /*
828 * trans_start here is expensive for high speed devices on SMP,
829 * please use netdev_queue->trans_start instead.
830 */
822 unsigned long trans_start; /* Time (in jiffies) of last Tx */ 831 unsigned long trans_start; /* Time (in jiffies) of last Tx */
823 832
824 int watchdog_timeo; /* used by dev_watchdog() */ 833 int watchdog_timeo; /* used by dev_watchdog() */
@@ -1541,6 +1550,8 @@ static inline int netif_carrier_ok(const struct net_device *dev)
1541 return !test_bit(__LINK_STATE_NOCARRIER, &dev->state); 1550 return !test_bit(__LINK_STATE_NOCARRIER, &dev->state);
1542} 1551}
1543 1552
1553extern unsigned long dev_trans_start(struct net_device *dev);
1554
1544extern void __netdev_watchdog_up(struct net_device *dev); 1555extern void __netdev_watchdog_up(struct net_device *dev);
1545 1556
1546extern void netif_carrier_on(struct net_device *dev); 1557extern void netif_carrier_on(struct net_device *dev);
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 5f5efe4e6072..27d03816ec3e 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -196,6 +196,21 @@ void __qdisc_run(struct Qdisc *q)
196 clear_bit(__QDISC_STATE_RUNNING, &q->state); 196 clear_bit(__QDISC_STATE_RUNNING, &q->state);
197} 197}
198 198
199unsigned long dev_trans_start(struct net_device *dev)
200{
201 unsigned long val, res = dev->trans_start;
202 unsigned int i;
203
204 for (i = 0; i < dev->num_tx_queues; i++) {
205 val = netdev_get_tx_queue(dev, i)->trans_start;
206 if (val && time_after(val, res))
207 res = val;
208 }
209 dev->trans_start = res;
210 return res;
211}
212EXPORT_SYMBOL(dev_trans_start);
213
199static void dev_watchdog(unsigned long arg) 214static void dev_watchdog(unsigned long arg)
200{ 215{
201 struct net_device *dev = (struct net_device *)arg; 216 struct net_device *dev = (struct net_device *)arg;
@@ -205,25 +220,30 @@ static void dev_watchdog(unsigned long arg)
205 if (netif_device_present(dev) && 220 if (netif_device_present(dev) &&
206 netif_running(dev) && 221 netif_running(dev) &&
207 netif_carrier_ok(dev)) { 222 netif_carrier_ok(dev)) {
208 int some_queue_stopped = 0; 223 int some_queue_timedout = 0;
209 unsigned int i; 224 unsigned int i;
225 unsigned long trans_start;
210 226
211 for (i = 0; i < dev->num_tx_queues; i++) { 227 for (i = 0; i < dev->num_tx_queues; i++) {
212 struct netdev_queue *txq; 228 struct netdev_queue *txq;
213 229
214 txq = netdev_get_tx_queue(dev, i); 230 txq = netdev_get_tx_queue(dev, i);
215 if (netif_tx_queue_stopped(txq)) { 231 /*
216 some_queue_stopped = 1; 232 * old device drivers set dev->trans_start
233 */
234 trans_start = txq->trans_start ? : dev->trans_start;
235 if (netif_tx_queue_stopped(txq) &&
236 time_after(jiffies, (trans_start +
237 dev->watchdog_timeo))) {
238 some_queue_timedout = 1;
217 break; 239 break;
218 } 240 }
219 } 241 }
220 242
221 if (some_queue_stopped && 243 if (some_queue_timedout) {
222 time_after(jiffies, (dev->trans_start +
223 dev->watchdog_timeo))) {
224 char drivername[64]; 244 char drivername[64];
225 WARN_ONCE(1, KERN_INFO "NETDEV WATCHDOG: %s (%s): transmit timed out\n", 245 WARN_ONCE(1, KERN_INFO "NETDEV WATCHDOG: %s (%s): transmit queue %u timed out\n",
226 dev->name, netdev_drivername(dev, drivername, 64)); 246 dev->name, netdev_drivername(dev, drivername, 64), i);
227 dev->netdev_ops->ndo_tx_timeout(dev); 247 dev->netdev_ops->ndo_tx_timeout(dev);
228 } 248 }
229 if (!mod_timer(&dev->watchdog_timer, 249 if (!mod_timer(&dev->watchdog_timer,
@@ -602,8 +622,10 @@ static void transition_one_qdisc(struct net_device *dev,
602 clear_bit(__QDISC_STATE_DEACTIVATED, &new_qdisc->state); 622 clear_bit(__QDISC_STATE_DEACTIVATED, &new_qdisc->state);
603 623
604 rcu_assign_pointer(dev_queue->qdisc, new_qdisc); 624 rcu_assign_pointer(dev_queue->qdisc, new_qdisc);
605 if (need_watchdog_p && new_qdisc != &noqueue_qdisc) 625 if (need_watchdog_p && new_qdisc != &noqueue_qdisc) {
626 dev_queue->trans_start = 0;
606 *need_watchdog_p = 1; 627 *need_watchdog_p = 1;
628 }
607} 629}
608 630
609void dev_activate(struct net_device *dev) 631void dev_activate(struct net_device *dev)