aboutsummaryrefslogtreecommitdiffstats
path: root/net/core
diff options
context:
space:
mode:
Diffstat (limited to 'net/core')
-rw-r--r--net/core/dev.c180
-rw-r--r--net/core/net-sysfs.c15
-rw-r--r--net/core/netpoll.c39
-rw-r--r--net/core/rtnetlink.c6
4 files changed, 131 insertions, 109 deletions
diff --git a/net/core/dev.c b/net/core/dev.c
index a76021c71207..29cf00c5d865 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -220,7 +220,8 @@ static RAW_NOTIFIER_HEAD(netdev_chain);
220 * Device drivers call our routines to queue packets here. We empty the 220 * Device drivers call our routines to queue packets here. We empty the
221 * queue in the local softnet handler. 221 * queue in the local softnet handler.
222 */ 222 */
223DEFINE_PER_CPU(struct softnet_data, softnet_data) = { NULL }; 223
224DEFINE_PER_CPU(struct softnet_data, softnet_data);
224 225
225#ifdef CONFIG_SYSFS 226#ifdef CONFIG_SYSFS
226extern int netdev_sysfs_init(void); 227extern int netdev_sysfs_init(void);
@@ -1018,16 +1019,12 @@ int dev_close(struct net_device *dev)
1018 clear_bit(__LINK_STATE_START, &dev->state); 1019 clear_bit(__LINK_STATE_START, &dev->state);
1019 1020
1020 /* Synchronize to scheduled poll. We cannot touch poll list, 1021 /* Synchronize to scheduled poll. We cannot touch poll list,
1021 * it can be even on different cpu. So just clear netif_running(), 1022 * it can be even on different cpu. So just clear netif_running().
1022 * and wait when poll really will happen. Actually, the best place 1023 *
1023 * for this is inside dev->stop() after device stopped its irq 1024 * dev->stop() will invoke napi_disable() on all of it's
1024 * engine, but this requires more changes in devices. */ 1025 * napi_struct instances on this device.
1025 1026 */
1026 smp_mb__after_clear_bit(); /* Commit netif_running(). */ 1027 smp_mb__after_clear_bit(); /* Commit netif_running(). */
1027 while (test_bit(__LINK_STATE_RX_SCHED, &dev->state)) {
1028 /* No hurry. */
1029 msleep(1);
1030 }
1031 1028
1032 /* 1029 /*
1033 * Call the device specific close. This cannot fail. 1030 * Call the device specific close. This cannot fail.
@@ -1233,21 +1230,21 @@ void __netif_schedule(struct net_device *dev)
1233} 1230}
1234EXPORT_SYMBOL(__netif_schedule); 1231EXPORT_SYMBOL(__netif_schedule);
1235 1232
1236void __netif_rx_schedule(struct net_device *dev) 1233void dev_kfree_skb_irq(struct sk_buff *skb)
1237{ 1234{
1238 unsigned long flags; 1235 if (atomic_dec_and_test(&skb->users)) {
1236 struct softnet_data *sd;
1237 unsigned long flags;
1239 1238
1240 local_irq_save(flags); 1239 local_irq_save(flags);
1241 dev_hold(dev); 1240 sd = &__get_cpu_var(softnet_data);
1242 list_add_tail(&dev->poll_list, &__get_cpu_var(softnet_data).poll_list); 1241 skb->next = sd->completion_queue;
1243 if (dev->quota < 0) 1242 sd->completion_queue = skb;
1244 dev->quota += dev->weight; 1243 raise_softirq_irqoff(NET_TX_SOFTIRQ);
1245 else 1244 local_irq_restore(flags);
1246 dev->quota = dev->weight; 1245 }
1247 __raise_softirq_irqoff(NET_RX_SOFTIRQ);
1248 local_irq_restore(flags);
1249} 1246}
1250EXPORT_SYMBOL(__netif_rx_schedule); 1247EXPORT_SYMBOL(dev_kfree_skb_irq);
1251 1248
1252void dev_kfree_skb_any(struct sk_buff *skb) 1249void dev_kfree_skb_any(struct sk_buff *skb)
1253{ 1250{
@@ -1259,7 +1256,12 @@ void dev_kfree_skb_any(struct sk_buff *skb)
1259EXPORT_SYMBOL(dev_kfree_skb_any); 1256EXPORT_SYMBOL(dev_kfree_skb_any);
1260 1257
1261 1258
1262/* Hot-plugging. */ 1259/**
1260 * netif_device_detach - mark device as removed
1261 * @dev: network device
1262 *
1263 * Mark device as removed from system and therefore no longer available.
1264 */
1263void netif_device_detach(struct net_device *dev) 1265void netif_device_detach(struct net_device *dev)
1264{ 1266{
1265 if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) && 1267 if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) &&
@@ -1269,6 +1271,12 @@ void netif_device_detach(struct net_device *dev)
1269} 1271}
1270EXPORT_SYMBOL(netif_device_detach); 1272EXPORT_SYMBOL(netif_device_detach);
1271 1273
1274/**
1275 * netif_device_attach - mark device as attached
1276 * @dev: network device
1277 *
1278 * Mark device as attached from system and restart if needed.
1279 */
1272void netif_device_attach(struct net_device *dev) 1280void netif_device_attach(struct net_device *dev)
1273{ 1281{
1274 if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) && 1282 if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) &&
@@ -1730,7 +1738,7 @@ enqueue:
1730 return NET_RX_SUCCESS; 1738 return NET_RX_SUCCESS;
1731 } 1739 }
1732 1740
1733 netif_rx_schedule(&queue->backlog_dev); 1741 napi_schedule(&queue->backlog);
1734 goto enqueue; 1742 goto enqueue;
1735 } 1743 }
1736 1744
@@ -1771,6 +1779,7 @@ static inline struct net_device *skb_bond(struct sk_buff *skb)
1771 return dev; 1779 return dev;
1772} 1780}
1773 1781
1782
1774static void net_tx_action(struct softirq_action *h) 1783static void net_tx_action(struct softirq_action *h)
1775{ 1784{
1776 struct softnet_data *sd = &__get_cpu_var(softnet_data); 1785 struct softnet_data *sd = &__get_cpu_var(softnet_data);
@@ -1927,7 +1936,7 @@ int netif_receive_skb(struct sk_buff *skb)
1927 __be16 type; 1936 __be16 type;
1928 1937
1929 /* if we've gotten here through NAPI, check netpoll */ 1938 /* if we've gotten here through NAPI, check netpoll */
1930 if (skb->dev->poll && netpoll_rx(skb)) 1939 if (netpoll_receive_skb(skb))
1931 return NET_RX_DROP; 1940 return NET_RX_DROP;
1932 1941
1933 if (!skb->tstamp.tv64) 1942 if (!skb->tstamp.tv64)
@@ -2017,22 +2026,25 @@ out:
2017 return ret; 2026 return ret;
2018} 2027}
2019 2028
2020static int process_backlog(struct net_device *backlog_dev, int *budget) 2029static int process_backlog(struct napi_struct *napi, int quota)
2021{ 2030{
2022 int work = 0; 2031 int work = 0;
2023 int quota = min(backlog_dev->quota, *budget);
2024 struct softnet_data *queue = &__get_cpu_var(softnet_data); 2032 struct softnet_data *queue = &__get_cpu_var(softnet_data);
2025 unsigned long start_time = jiffies; 2033 unsigned long start_time = jiffies;
2026 2034
2027 backlog_dev->weight = weight_p; 2035 napi->weight = weight_p;
2028 for (;;) { 2036 do {
2029 struct sk_buff *skb; 2037 struct sk_buff *skb;
2030 struct net_device *dev; 2038 struct net_device *dev;
2031 2039
2032 local_irq_disable(); 2040 local_irq_disable();
2033 skb = __skb_dequeue(&queue->input_pkt_queue); 2041 skb = __skb_dequeue(&queue->input_pkt_queue);
2034 if (!skb) 2042 if (!skb) {
2035 goto job_done; 2043 __napi_complete(napi);
2044 local_irq_enable();
2045 break;
2046 }
2047
2036 local_irq_enable(); 2048 local_irq_enable();
2037 2049
2038 dev = skb->dev; 2050 dev = skb->dev;
@@ -2040,67 +2052,86 @@ static int process_backlog(struct net_device *backlog_dev, int *budget)
2040 netif_receive_skb(skb); 2052 netif_receive_skb(skb);
2041 2053
2042 dev_put(dev); 2054 dev_put(dev);
2055 } while (++work < quota && jiffies == start_time);
2043 2056
2044 work++; 2057 return work;
2045 2058}
2046 if (work >= quota || jiffies - start_time > 1)
2047 break;
2048
2049 }
2050
2051 backlog_dev->quota -= work;
2052 *budget -= work;
2053 return -1;
2054
2055job_done:
2056 backlog_dev->quota -= work;
2057 *budget -= work;
2058 2059
2059 list_del(&backlog_dev->poll_list); 2060/**
2060 smp_mb__before_clear_bit(); 2061 * __napi_schedule - schedule for receive
2061 netif_poll_enable(backlog_dev); 2062 * @napi: entry to schedule
2063 *
2064 * The entry's receive function will be scheduled to run
2065 */
2066void fastcall __napi_schedule(struct napi_struct *n)
2067{
2068 unsigned long flags;
2062 2069
2063 local_irq_enable(); 2070 local_irq_save(flags);
2064 return 0; 2071 list_add_tail(&n->poll_list, &__get_cpu_var(softnet_data).poll_list);
2072 __raise_softirq_irqoff(NET_RX_SOFTIRQ);
2073 local_irq_restore(flags);
2065} 2074}
2075EXPORT_SYMBOL(__napi_schedule);
2076
2066 2077
2067static void net_rx_action(struct softirq_action *h) 2078static void net_rx_action(struct softirq_action *h)
2068{ 2079{
2069 struct softnet_data *queue = &__get_cpu_var(softnet_data); 2080 struct list_head *list = &__get_cpu_var(softnet_data).poll_list;
2070 unsigned long start_time = jiffies; 2081 unsigned long start_time = jiffies;
2071 int budget = netdev_budget; 2082 int budget = netdev_budget;
2072 void *have; 2083 void *have;
2073 2084
2074 local_irq_disable(); 2085 local_irq_disable();
2075 2086
2076 while (!list_empty(&queue->poll_list)) { 2087 while (!list_empty(list)) {
2077 struct net_device *dev; 2088 struct napi_struct *n;
2089 int work, weight;
2078 2090
2079 if (budget <= 0 || jiffies - start_time > 1) 2091 /* If softirq window is exhuasted then punt.
2092 *
2093 * Note that this is a slight policy change from the
2094 * previous NAPI code, which would allow up to 2
2095 * jiffies to pass before breaking out. The test
2096 * used to be "jiffies - start_time > 1".
2097 */
2098 if (unlikely(budget <= 0 || jiffies != start_time))
2080 goto softnet_break; 2099 goto softnet_break;
2081 2100
2082 local_irq_enable(); 2101 local_irq_enable();
2083 2102
2084 dev = list_entry(queue->poll_list.next, 2103 /* Even though interrupts have been re-enabled, this
2085 struct net_device, poll_list); 2104 * access is safe because interrupts can only add new
2086 have = netpoll_poll_lock(dev); 2105 * entries to the tail of this list, and only ->poll()
2106 * calls can remove this head entry from the list.
2107 */
2108 n = list_entry(list->next, struct napi_struct, poll_list);
2087 2109
2088 if (dev->quota <= 0 || dev->poll(dev, &budget)) { 2110 have = netpoll_poll_lock(n);
2089 netpoll_poll_unlock(have); 2111
2090 local_irq_disable(); 2112 weight = n->weight;
2091 list_move_tail(&dev->poll_list, &queue->poll_list); 2113
2092 if (dev->quota < 0) 2114 work = n->poll(n, weight);
2093 dev->quota += dev->weight; 2115
2094 else 2116 WARN_ON_ONCE(work > weight);
2095 dev->quota = dev->weight; 2117
2096 } else { 2118 budget -= work;
2097 netpoll_poll_unlock(have); 2119
2098 dev_put(dev); 2120 local_irq_disable();
2099 local_irq_disable(); 2121
2100 } 2122 /* Drivers must not modify the NAPI state if they
2123 * consume the entire weight. In such cases this code
2124 * still "owns" the NAPI instance and therefore can
2125 * move the instance around on the list at-will.
2126 */
2127 if (unlikely(work == weight))
2128 list_move_tail(&n->poll_list, list);
2129
2130 netpoll_poll_unlock(have);
2101 } 2131 }
2102out: 2132out:
2103 local_irq_enable(); 2133 local_irq_enable();
2134
2104#ifdef CONFIG_NET_DMA 2135#ifdef CONFIG_NET_DMA
2105 /* 2136 /*
2106 * There may not be any more sk_buffs coming right now, so push 2137 * There may not be any more sk_buffs coming right now, so push
@@ -2115,6 +2146,7 @@ out:
2115 } 2146 }
2116 } 2147 }
2117#endif 2148#endif
2149
2118 return; 2150 return;
2119 2151
2120softnet_break: 2152softnet_break:
@@ -3704,6 +3736,7 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
3704 dev->egress_subqueue_count = queue_count; 3736 dev->egress_subqueue_count = queue_count;
3705 3737
3706 dev->get_stats = internal_stats; 3738 dev->get_stats = internal_stats;
3739 netpoll_netdev_init(dev);
3707 setup(dev); 3740 setup(dev);
3708 strcpy(dev->name, name); 3741 strcpy(dev->name, name);
3709 return dev; 3742 return dev;
@@ -4076,10 +4109,9 @@ static int __init net_dev_init(void)
4076 skb_queue_head_init(&queue->input_pkt_queue); 4109 skb_queue_head_init(&queue->input_pkt_queue);
4077 queue->completion_queue = NULL; 4110 queue->completion_queue = NULL;
4078 INIT_LIST_HEAD(&queue->poll_list); 4111 INIT_LIST_HEAD(&queue->poll_list);
4079 set_bit(__LINK_STATE_START, &queue->backlog_dev.state); 4112
4080 queue->backlog_dev.weight = weight_p; 4113 queue->backlog.poll = process_backlog;
4081 queue->backlog_dev.poll = process_backlog; 4114 queue->backlog.weight = weight_p;
4082 atomic_set(&queue->backlog_dev.refcnt, 1);
4083 } 4115 }
4084 4116
4085 netdev_dma_register(); 4117 netdev_dma_register();
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 5c19b0646d7a..79159db6acb9 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -216,20 +216,6 @@ static ssize_t store_tx_queue_len(struct device *dev,
216 return netdev_store(dev, attr, buf, len, change_tx_queue_len); 216 return netdev_store(dev, attr, buf, len, change_tx_queue_len);
217} 217}
218 218
219NETDEVICE_SHOW(weight, fmt_dec);
220
221static int change_weight(struct net_device *net, unsigned long new_weight)
222{
223 net->weight = new_weight;
224 return 0;
225}
226
227static ssize_t store_weight(struct device *dev, struct device_attribute *attr,
228 const char *buf, size_t len)
229{
230 return netdev_store(dev, attr, buf, len, change_weight);
231}
232
233static struct device_attribute net_class_attributes[] = { 219static struct device_attribute net_class_attributes[] = {
234 __ATTR(addr_len, S_IRUGO, show_addr_len, NULL), 220 __ATTR(addr_len, S_IRUGO, show_addr_len, NULL),
235 __ATTR(iflink, S_IRUGO, show_iflink, NULL), 221 __ATTR(iflink, S_IRUGO, show_iflink, NULL),
@@ -246,7 +232,6 @@ static struct device_attribute net_class_attributes[] = {
246 __ATTR(flags, S_IRUGO | S_IWUSR, show_flags, store_flags), 232 __ATTR(flags, S_IRUGO | S_IWUSR, show_flags, store_flags),
247 __ATTR(tx_queue_len, S_IRUGO | S_IWUSR, show_tx_queue_len, 233 __ATTR(tx_queue_len, S_IRUGO | S_IWUSR, show_tx_queue_len,
248 store_tx_queue_len), 234 store_tx_queue_len),
249 __ATTR(weight, S_IRUGO | S_IWUSR, show_weight, store_weight),
250 {} 235 {}
251}; 236};
252 237
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index de1b26aa5720..abe6e3a4cc44 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -119,19 +119,22 @@ static __sum16 checksum_udp(struct sk_buff *skb, struct udphdr *uh,
119static void poll_napi(struct netpoll *np) 119static void poll_napi(struct netpoll *np)
120{ 120{
121 struct netpoll_info *npinfo = np->dev->npinfo; 121 struct netpoll_info *npinfo = np->dev->npinfo;
122 struct napi_struct *napi;
122 int budget = 16; 123 int budget = 16;
123 124
124 if (test_bit(__LINK_STATE_RX_SCHED, &np->dev->state) && 125 list_for_each_entry(napi, &np->dev->napi_list, dev_list) {
125 npinfo->poll_owner != smp_processor_id() && 126 if (test_bit(NAPI_STATE_SCHED, &napi->state) &&
126 spin_trylock(&npinfo->poll_lock)) { 127 napi->poll_owner != smp_processor_id() &&
127 npinfo->rx_flags |= NETPOLL_RX_DROP; 128 spin_trylock(&napi->poll_lock)) {
128 atomic_inc(&trapped); 129 npinfo->rx_flags |= NETPOLL_RX_DROP;
130 atomic_inc(&trapped);
129 131
130 np->dev->poll(np->dev, &budget); 132 napi->poll(napi, budget);
131 133
132 atomic_dec(&trapped); 134 atomic_dec(&trapped);
133 npinfo->rx_flags &= ~NETPOLL_RX_DROP; 135 npinfo->rx_flags &= ~NETPOLL_RX_DROP;
134 spin_unlock(&npinfo->poll_lock); 136 spin_unlock(&napi->poll_lock);
137 }
135 } 138 }
136} 139}
137 140
@@ -157,7 +160,7 @@ void netpoll_poll(struct netpoll *np)
157 160
158 /* Process pending work on NIC */ 161 /* Process pending work on NIC */
159 np->dev->poll_controller(np->dev); 162 np->dev->poll_controller(np->dev);
160 if (np->dev->poll) 163 if (!list_empty(&np->dev->napi_list))
161 poll_napi(np); 164 poll_napi(np);
162 165
163 service_arp_queue(np->dev->npinfo); 166 service_arp_queue(np->dev->npinfo);
@@ -233,6 +236,17 @@ repeat:
233 return skb; 236 return skb;
234} 237}
235 238
239static int netpoll_owner_active(struct net_device *dev)
240{
241 struct napi_struct *napi;
242
243 list_for_each_entry(napi, &dev->napi_list, dev_list) {
244 if (napi->poll_owner == smp_processor_id())
245 return 1;
246 }
247 return 0;
248}
249
236static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) 250static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
237{ 251{
238 int status = NETDEV_TX_BUSY; 252 int status = NETDEV_TX_BUSY;
@@ -246,8 +260,7 @@ static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
246 } 260 }
247 261
248 /* don't get messages out of order, and no recursion */ 262 /* don't get messages out of order, and no recursion */
249 if (skb_queue_len(&npinfo->txq) == 0 && 263 if (skb_queue_len(&npinfo->txq) == 0 && !netpoll_owner_active(dev)) {
250 npinfo->poll_owner != smp_processor_id()) {
251 unsigned long flags; 264 unsigned long flags;
252 265
253 local_irq_save(flags); 266 local_irq_save(flags);
@@ -652,8 +665,6 @@ int netpoll_setup(struct netpoll *np)
652 665
653 npinfo->rx_flags = 0; 666 npinfo->rx_flags = 0;
654 npinfo->rx_np = NULL; 667 npinfo->rx_np = NULL;
655 spin_lock_init(&npinfo->poll_lock);
656 npinfo->poll_owner = -1;
657 668
658 spin_lock_init(&npinfo->rx_lock); 669 spin_lock_init(&npinfo->rx_lock);
659 skb_queue_head_init(&npinfo->arp_tx); 670 skb_queue_head_init(&npinfo->arp_tx);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 4756d5857abf..2b0b6fac6cef 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -634,7 +634,6 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
634 634
635 NLA_PUT_STRING(skb, IFLA_IFNAME, dev->name); 635 NLA_PUT_STRING(skb, IFLA_IFNAME, dev->name);
636 NLA_PUT_U32(skb, IFLA_TXQLEN, dev->tx_queue_len); 636 NLA_PUT_U32(skb, IFLA_TXQLEN, dev->tx_queue_len);
637 NLA_PUT_U32(skb, IFLA_WEIGHT, dev->weight);
638 NLA_PUT_U8(skb, IFLA_OPERSTATE, 637 NLA_PUT_U8(skb, IFLA_OPERSTATE,
639 netif_running(dev) ? dev->operstate : IF_OPER_DOWN); 638 netif_running(dev) ? dev->operstate : IF_OPER_DOWN);
640 NLA_PUT_U8(skb, IFLA_LINKMODE, dev->link_mode); 639 NLA_PUT_U8(skb, IFLA_LINKMODE, dev->link_mode);
@@ -834,9 +833,6 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
834 if (tb[IFLA_TXQLEN]) 833 if (tb[IFLA_TXQLEN])
835 dev->tx_queue_len = nla_get_u32(tb[IFLA_TXQLEN]); 834 dev->tx_queue_len = nla_get_u32(tb[IFLA_TXQLEN]);
836 835
837 if (tb[IFLA_WEIGHT])
838 dev->weight = nla_get_u32(tb[IFLA_WEIGHT]);
839
840 if (tb[IFLA_OPERSTATE]) 836 if (tb[IFLA_OPERSTATE])
841 set_operstate(dev, nla_get_u8(tb[IFLA_OPERSTATE])); 837 set_operstate(dev, nla_get_u8(tb[IFLA_OPERSTATE]));
842 838
@@ -1074,8 +1070,6 @@ replay:
1074 nla_len(tb[IFLA_BROADCAST])); 1070 nla_len(tb[IFLA_BROADCAST]));
1075 if (tb[IFLA_TXQLEN]) 1071 if (tb[IFLA_TXQLEN])
1076 dev->tx_queue_len = nla_get_u32(tb[IFLA_TXQLEN]); 1072 dev->tx_queue_len = nla_get_u32(tb[IFLA_TXQLEN]);
1077 if (tb[IFLA_WEIGHT])
1078 dev->weight = nla_get_u32(tb[IFLA_WEIGHT]);
1079 if (tb[IFLA_OPERSTATE]) 1073 if (tb[IFLA_OPERSTATE])
1080 set_operstate(dev, nla_get_u8(tb[IFLA_OPERSTATE])); 1074 set_operstate(dev, nla_get_u8(tb[IFLA_OPERSTATE]));
1081 if (tb[IFLA_LINKMODE]) 1075 if (tb[IFLA_LINKMODE])