aboutsummaryrefslogtreecommitdiffstats
path: root/net/core/dev.c
diff options
context:
space:
mode:
authorStephen Hemminger <shemminger@linux-foundation.org>2007-10-03 19:41:36 -0400
committerDavid S. Miller <davem@sunset.davemloft.net>2007-10-10 19:47:45 -0400
commitbea3348eef27e6044b6161fd04c3152215f96411 (patch)
treef0990b263e5ce42505d290a4c346fe990bcd4c33 /net/core/dev.c
parentdde4e47e8fe333a5649a3fa0e7db1fa7c08d6158 (diff)
[NET]: Make NAPI polling independent of struct net_device objects.
Several devices have multiple independant RX queues per net device, and some have a single interrupt doorbell for several queues. In either case, it's easier to support layouts like that if the structure representing the poll is independant from the net device itself. The signature of the ->poll() call back goes from: int foo_poll(struct net_device *dev, int *budget) to int foo_poll(struct napi_struct *napi, int budget) The caller is returned the number of RX packets processed (or the number of "NAPI credits" consumed if you want to get abstract). The callee no longer messes around bumping dev->quota, *budget, etc. because that is all handled in the caller upon return. The napi_struct is to be embedded in the device driver private data structures. Furthermore, it is the driver's responsibility to disable all NAPI instances in it's ->stop() device close handler. Since the napi_struct is privatized into the driver's private data structures, only the driver knows how to get at all of the napi_struct instances it may have per-device. With lots of help and suggestions from Rusty Russell, Roland Dreier, Michael Chan, Jeff Garzik, and Jamal Hadi Salim. Bug fixes from Thomas Graf, Roland Dreier, Peter Zijlstra, Joseph Fannin, Scott Wood, Hans J. Koch, and Michael Chan. [ Ported to current tree and all drivers converted. Integrated Stephen's follow-on kerneldoc additions, and restored poll_list handling to the old style to fix mutual exclusion issues. -DaveM ] Signed-off-by: Stephen Hemminger <shemminger@linux-foundation.org> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/core/dev.c')
-rw-r--r--net/core/dev.c180
1 files changed, 106 insertions, 74 deletions
diff --git a/net/core/dev.c b/net/core/dev.c
index a76021c71207..29cf00c5d865 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -220,7 +220,8 @@ static RAW_NOTIFIER_HEAD(netdev_chain);
220 * Device drivers call our routines to queue packets here. We empty the 220 * Device drivers call our routines to queue packets here. We empty the
221 * queue in the local softnet handler. 221 * queue in the local softnet handler.
222 */ 222 */
223DEFINE_PER_CPU(struct softnet_data, softnet_data) = { NULL }; 223
224DEFINE_PER_CPU(struct softnet_data, softnet_data);
224 225
225#ifdef CONFIG_SYSFS 226#ifdef CONFIG_SYSFS
226extern int netdev_sysfs_init(void); 227extern int netdev_sysfs_init(void);
@@ -1018,16 +1019,12 @@ int dev_close(struct net_device *dev)
1018 clear_bit(__LINK_STATE_START, &dev->state); 1019 clear_bit(__LINK_STATE_START, &dev->state);
1019 1020
1020 /* Synchronize to scheduled poll. We cannot touch poll list, 1021 /* Synchronize to scheduled poll. We cannot touch poll list,
1021 * it can be even on different cpu. So just clear netif_running(), 1022 * it can be even on different cpu. So just clear netif_running().
1022 * and wait when poll really will happen. Actually, the best place 1023 *
1023 * for this is inside dev->stop() after device stopped its irq 1024 * dev->stop() will invoke napi_disable() on all of it's
1024 * engine, but this requires more changes in devices. */ 1025 * napi_struct instances on this device.
1025 1026 */
1026 smp_mb__after_clear_bit(); /* Commit netif_running(). */ 1027 smp_mb__after_clear_bit(); /* Commit netif_running(). */
1027 while (test_bit(__LINK_STATE_RX_SCHED, &dev->state)) {
1028 /* No hurry. */
1029 msleep(1);
1030 }
1031 1028
1032 /* 1029 /*
1033 * Call the device specific close. This cannot fail. 1030 * Call the device specific close. This cannot fail.
@@ -1233,21 +1230,21 @@ void __netif_schedule(struct net_device *dev)
1233} 1230}
1234EXPORT_SYMBOL(__netif_schedule); 1231EXPORT_SYMBOL(__netif_schedule);
1235 1232
1236void __netif_rx_schedule(struct net_device *dev) 1233void dev_kfree_skb_irq(struct sk_buff *skb)
1237{ 1234{
1238 unsigned long flags; 1235 if (atomic_dec_and_test(&skb->users)) {
1236 struct softnet_data *sd;
1237 unsigned long flags;
1239 1238
1240 local_irq_save(flags); 1239 local_irq_save(flags);
1241 dev_hold(dev); 1240 sd = &__get_cpu_var(softnet_data);
1242 list_add_tail(&dev->poll_list, &__get_cpu_var(softnet_data).poll_list); 1241 skb->next = sd->completion_queue;
1243 if (dev->quota < 0) 1242 sd->completion_queue = skb;
1244 dev->quota += dev->weight; 1243 raise_softirq_irqoff(NET_TX_SOFTIRQ);
1245 else 1244 local_irq_restore(flags);
1246 dev->quota = dev->weight; 1245 }
1247 __raise_softirq_irqoff(NET_RX_SOFTIRQ);
1248 local_irq_restore(flags);
1249} 1246}
1250EXPORT_SYMBOL(__netif_rx_schedule); 1247EXPORT_SYMBOL(dev_kfree_skb_irq);
1251 1248
1252void dev_kfree_skb_any(struct sk_buff *skb) 1249void dev_kfree_skb_any(struct sk_buff *skb)
1253{ 1250{
@@ -1259,7 +1256,12 @@ void dev_kfree_skb_any(struct sk_buff *skb)
1259EXPORT_SYMBOL(dev_kfree_skb_any); 1256EXPORT_SYMBOL(dev_kfree_skb_any);
1260 1257
1261 1258
1262/* Hot-plugging. */ 1259/**
1260 * netif_device_detach - mark device as removed
1261 * @dev: network device
1262 *
1263 * Mark device as removed from system and therefore no longer available.
1264 */
1263void netif_device_detach(struct net_device *dev) 1265void netif_device_detach(struct net_device *dev)
1264{ 1266{
1265 if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) && 1267 if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) &&
@@ -1269,6 +1271,12 @@ void netif_device_detach(struct net_device *dev)
1269} 1271}
1270EXPORT_SYMBOL(netif_device_detach); 1272EXPORT_SYMBOL(netif_device_detach);
1271 1273
1274/**
1275 * netif_device_attach - mark device as attached
1276 * @dev: network device
1277 *
1278 * Mark device as attached from system and restart if needed.
1279 */
1272void netif_device_attach(struct net_device *dev) 1280void netif_device_attach(struct net_device *dev)
1273{ 1281{
1274 if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) && 1282 if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) &&
@@ -1730,7 +1738,7 @@ enqueue:
1730 return NET_RX_SUCCESS; 1738 return NET_RX_SUCCESS;
1731 } 1739 }
1732 1740
1733 netif_rx_schedule(&queue->backlog_dev); 1741 napi_schedule(&queue->backlog);
1734 goto enqueue; 1742 goto enqueue;
1735 } 1743 }
1736 1744
@@ -1771,6 +1779,7 @@ static inline struct net_device *skb_bond(struct sk_buff *skb)
1771 return dev; 1779 return dev;
1772} 1780}
1773 1781
1782
1774static void net_tx_action(struct softirq_action *h) 1783static void net_tx_action(struct softirq_action *h)
1775{ 1784{
1776 struct softnet_data *sd = &__get_cpu_var(softnet_data); 1785 struct softnet_data *sd = &__get_cpu_var(softnet_data);
@@ -1927,7 +1936,7 @@ int netif_receive_skb(struct sk_buff *skb)
1927 __be16 type; 1936 __be16 type;
1928 1937
1929 /* if we've gotten here through NAPI, check netpoll */ 1938 /* if we've gotten here through NAPI, check netpoll */
1930 if (skb->dev->poll && netpoll_rx(skb)) 1939 if (netpoll_receive_skb(skb))
1931 return NET_RX_DROP; 1940 return NET_RX_DROP;
1932 1941
1933 if (!skb->tstamp.tv64) 1942 if (!skb->tstamp.tv64)
@@ -2017,22 +2026,25 @@ out:
2017 return ret; 2026 return ret;
2018} 2027}
2019 2028
2020static int process_backlog(struct net_device *backlog_dev, int *budget) 2029static int process_backlog(struct napi_struct *napi, int quota)
2021{ 2030{
2022 int work = 0; 2031 int work = 0;
2023 int quota = min(backlog_dev->quota, *budget);
2024 struct softnet_data *queue = &__get_cpu_var(softnet_data); 2032 struct softnet_data *queue = &__get_cpu_var(softnet_data);
2025 unsigned long start_time = jiffies; 2033 unsigned long start_time = jiffies;
2026 2034
2027 backlog_dev->weight = weight_p; 2035 napi->weight = weight_p;
2028 for (;;) { 2036 do {
2029 struct sk_buff *skb; 2037 struct sk_buff *skb;
2030 struct net_device *dev; 2038 struct net_device *dev;
2031 2039
2032 local_irq_disable(); 2040 local_irq_disable();
2033 skb = __skb_dequeue(&queue->input_pkt_queue); 2041 skb = __skb_dequeue(&queue->input_pkt_queue);
2034 if (!skb) 2042 if (!skb) {
2035 goto job_done; 2043 __napi_complete(napi);
2044 local_irq_enable();
2045 break;
2046 }
2047
2036 local_irq_enable(); 2048 local_irq_enable();
2037 2049
2038 dev = skb->dev; 2050 dev = skb->dev;
@@ -2040,67 +2052,86 @@ static int process_backlog(struct net_device *backlog_dev, int *budget)
2040 netif_receive_skb(skb); 2052 netif_receive_skb(skb);
2041 2053
2042 dev_put(dev); 2054 dev_put(dev);
2055 } while (++work < quota && jiffies == start_time);
2043 2056
2044 work++; 2057 return work;
2045 2058}
2046 if (work >= quota || jiffies - start_time > 1)
2047 break;
2048
2049 }
2050
2051 backlog_dev->quota -= work;
2052 *budget -= work;
2053 return -1;
2054
2055job_done:
2056 backlog_dev->quota -= work;
2057 *budget -= work;
2058 2059
2059 list_del(&backlog_dev->poll_list); 2060/**
2060 smp_mb__before_clear_bit(); 2061 * __napi_schedule - schedule for receive
2061 netif_poll_enable(backlog_dev); 2062 * @napi: entry to schedule
2063 *
2064 * The entry's receive function will be scheduled to run
2065 */
2066void fastcall __napi_schedule(struct napi_struct *n)
2067{
2068 unsigned long flags;
2062 2069
2063 local_irq_enable(); 2070 local_irq_save(flags);
2064 return 0; 2071 list_add_tail(&n->poll_list, &__get_cpu_var(softnet_data).poll_list);
2072 __raise_softirq_irqoff(NET_RX_SOFTIRQ);
2073 local_irq_restore(flags);
2065} 2074}
2075EXPORT_SYMBOL(__napi_schedule);
2076
2066 2077
2067static void net_rx_action(struct softirq_action *h) 2078static void net_rx_action(struct softirq_action *h)
2068{ 2079{
2069 struct softnet_data *queue = &__get_cpu_var(softnet_data); 2080 struct list_head *list = &__get_cpu_var(softnet_data).poll_list;
2070 unsigned long start_time = jiffies; 2081 unsigned long start_time = jiffies;
2071 int budget = netdev_budget; 2082 int budget = netdev_budget;
2072 void *have; 2083 void *have;
2073 2084
2074 local_irq_disable(); 2085 local_irq_disable();
2075 2086
2076 while (!list_empty(&queue->poll_list)) { 2087 while (!list_empty(list)) {
2077 struct net_device *dev; 2088 struct napi_struct *n;
2089 int work, weight;
2078 2090
2079 if (budget <= 0 || jiffies - start_time > 1) 2091 /* If softirq window is exhuasted then punt.
2092 *
2093 * Note that this is a slight policy change from the
2094 * previous NAPI code, which would allow up to 2
2095 * jiffies to pass before breaking out. The test
2096 * used to be "jiffies - start_time > 1".
2097 */
2098 if (unlikely(budget <= 0 || jiffies != start_time))
2080 goto softnet_break; 2099 goto softnet_break;
2081 2100
2082 local_irq_enable(); 2101 local_irq_enable();
2083 2102
2084 dev = list_entry(queue->poll_list.next, 2103 /* Even though interrupts have been re-enabled, this
2085 struct net_device, poll_list); 2104 * access is safe because interrupts can only add new
2086 have = netpoll_poll_lock(dev); 2105 * entries to the tail of this list, and only ->poll()
2106 * calls can remove this head entry from the list.
2107 */
2108 n = list_entry(list->next, struct napi_struct, poll_list);
2087 2109
2088 if (dev->quota <= 0 || dev->poll(dev, &budget)) { 2110 have = netpoll_poll_lock(n);
2089 netpoll_poll_unlock(have); 2111
2090 local_irq_disable(); 2112 weight = n->weight;
2091 list_move_tail(&dev->poll_list, &queue->poll_list); 2113
2092 if (dev->quota < 0) 2114 work = n->poll(n, weight);
2093 dev->quota += dev->weight; 2115
2094 else 2116 WARN_ON_ONCE(work > weight);
2095 dev->quota = dev->weight; 2117
2096 } else { 2118 budget -= work;
2097 netpoll_poll_unlock(have); 2119
2098 dev_put(dev); 2120 local_irq_disable();
2099 local_irq_disable(); 2121
2100 } 2122 /* Drivers must not modify the NAPI state if they
2123 * consume the entire weight. In such cases this code
2124 * still "owns" the NAPI instance and therefore can
2125 * move the instance around on the list at-will.
2126 */
2127 if (unlikely(work == weight))
2128 list_move_tail(&n->poll_list, list);
2129
2130 netpoll_poll_unlock(have);
2101 } 2131 }
2102out: 2132out:
2103 local_irq_enable(); 2133 local_irq_enable();
2134
2104#ifdef CONFIG_NET_DMA 2135#ifdef CONFIG_NET_DMA
2105 /* 2136 /*
2106 * There may not be any more sk_buffs coming right now, so push 2137 * There may not be any more sk_buffs coming right now, so push
@@ -2115,6 +2146,7 @@ out:
2115 } 2146 }
2116 } 2147 }
2117#endif 2148#endif
2149
2118 return; 2150 return;
2119 2151
2120softnet_break: 2152softnet_break:
@@ -3704,6 +3736,7 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
3704 dev->egress_subqueue_count = queue_count; 3736 dev->egress_subqueue_count = queue_count;
3705 3737
3706 dev->get_stats = internal_stats; 3738 dev->get_stats = internal_stats;
3739 netpoll_netdev_init(dev);
3707 setup(dev); 3740 setup(dev);
3708 strcpy(dev->name, name); 3741 strcpy(dev->name, name);
3709 return dev; 3742 return dev;
@@ -4076,10 +4109,9 @@ static int __init net_dev_init(void)
4076 skb_queue_head_init(&queue->input_pkt_queue); 4109 skb_queue_head_init(&queue->input_pkt_queue);
4077 queue->completion_queue = NULL; 4110 queue->completion_queue = NULL;
4078 INIT_LIST_HEAD(&queue->poll_list); 4111 INIT_LIST_HEAD(&queue->poll_list);
4079 set_bit(__LINK_STATE_START, &queue->backlog_dev.state); 4112
4080 queue->backlog_dev.weight = weight_p; 4113 queue->backlog.poll = process_backlog;
4081 queue->backlog_dev.poll = process_backlog; 4114 queue->backlog.weight = weight_p;
4082 atomic_set(&queue->backlog_dev.refcnt, 1);
4083 } 4115 }
4084 4116
4085 netdev_dma_register(); 4117 netdev_dma_register();