aboutsummaryrefslogtreecommitdiffstats
path: root/include/linux/netdevice.h
diff options
context:
space:
mode:
authorEric Dumazet <eric.dumazet@gmail.com>2010-10-11 06:22:12 -0400
committerDavid S. Miller <davem@davemloft.net>2010-10-12 15:35:25 -0400
commit29b4433d991c88d86ca48a4c1cc33c671475be4b (patch)
tree2ad21b86aab8193c4533820c40cd31af97a7377f /include/linux/netdevice.h
parentf0b9f4725180ea58c8da78b3de0b4e0ad180fc2c (diff)
net: percpu net_device refcount
We tried very hard to remove all possible dev_hold()/dev_put() pairs in network stack, using RCU conversions. There is still an unavoidable device refcount change for every dst we create/destroy, and this can slow down some workloads (routers or some app servers, mmap af_packet) We can switch to a percpu refcount implementation, now dynamic per_cpu infrastructure is mature. On a 64 cpus machine, this consumes 256 bytes per device. On x86, dev_hold(dev) code : before lock incl 0x280(%ebx) after: movl 0x260(%ebx),%eax incl fs:(%eax) Stress bench : (Sending 160.000.000 UDP frames, IP route cache disabled, dual E5540 @2.53GHz, 32bit kernel, FIB_TRIE) Before: real 1m1.662s user 0m14.373s sys 12m55.960s After: real 0m51.179s user 0m15.329s sys 10m15.942s Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'include/linux/netdevice.h')
-rw-r--r--include/linux/netdevice.h7
1 files changed, 4 insertions, 3 deletions
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 4160db3721ba..14fbb04c459d 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1026,7 +1026,7 @@ struct net_device {
1026 struct timer_list watchdog_timer; 1026 struct timer_list watchdog_timer;
1027 1027
1028 /* Number of references to this device */ 1028 /* Number of references to this device */
1029 atomic_t refcnt ____cacheline_aligned_in_smp; 1029 int __percpu *pcpu_refcnt;
1030 1030
1031 /* delayed register/unregister */ 1031 /* delayed register/unregister */
1032 struct list_head todo_list; 1032 struct list_head todo_list;
@@ -1330,6 +1330,7 @@ static inline void unregister_netdevice(struct net_device *dev)
1330 unregister_netdevice_queue(dev, NULL); 1330 unregister_netdevice_queue(dev, NULL);
1331} 1331}
1332 1332
1333extern int netdev_refcnt_read(const struct net_device *dev);
1333extern void free_netdev(struct net_device *dev); 1334extern void free_netdev(struct net_device *dev);
1334extern void synchronize_net(void); 1335extern void synchronize_net(void);
1335extern int register_netdevice_notifier(struct notifier_block *nb); 1336extern int register_netdevice_notifier(struct notifier_block *nb);
@@ -1798,7 +1799,7 @@ extern void netdev_run_todo(void);
1798 */ 1799 */
1799static inline void dev_put(struct net_device *dev) 1800static inline void dev_put(struct net_device *dev)
1800{ 1801{
1801 atomic_dec(&dev->refcnt); 1802 irqsafe_cpu_dec(*dev->pcpu_refcnt);
1802} 1803}
1803 1804
1804/** 1805/**
@@ -1809,7 +1810,7 @@ static inline void dev_put(struct net_device *dev)
1809 */ 1810 */
1810static inline void dev_hold(struct net_device *dev) 1811static inline void dev_hold(struct net_device *dev)
1811{ 1812{
1812 atomic_inc(&dev->refcnt); 1813 irqsafe_cpu_inc(*dev->pcpu_refcnt);
1813} 1814}
1814 1815
1815/* Carrier loss detection, dial on demand. The functions netif_carrier_on 1816/* Carrier loss detection, dial on demand. The functions netif_carrier_on