aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/net/xen-netfront.c
diff options
context:
space:
mode:
authorDavid Vrabel <david.vrabel@citrix.com>2014-10-22 06:17:06 -0400
committerDavid S. Miller <davem@davemloft.net>2014-10-26 22:21:32 -0400
commit1f3c2eba1e2d866ef99bb9b10ade4096e3d7607c (patch)
tree6f79f2819816bc679e6155e32141f1a407a5f070 /drivers/net/xen-netfront.c
parent9286ae01ac7669e1b0a56151a3132804ed82c3d4 (diff)
xen-netfront: always keep the Rx ring full of requests
A full Rx ring only requires 1 MiB of memory. This is not enough memory that it is useful to dynamically scale the number of Rx requests in the ring based on traffic rates, because: a) Even the full 1 MiB is a tiny fraction of a typically modern Linux VM (for example, the AWS micro instance still has 1 GiB of memory). b) Netfront would have used up to 1 MiB already even with moderate data rates (there was no adjustment of target based on memory pressure). c) Small VMs are going to typically have one VCPU and hence only one queue. Keeping the ring full of Rx requests handles bursty traffic better than trying to converge on an optimal number of requests to keep filled. On a 4 core host, an iperf -P 64 -t 60 run from dom0 to a 4 VCPU guest improved from 5.1 Gbit/s to 5.6 Gbit/s. Gains with more bursty traffic are expected to be higher. Signed-off-by: David Vrabel <david.vrabel@citrix.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'drivers/net/xen-netfront.c')
-rw-r--r--drivers/net/xen-netfront.c253
1 files changed, 62 insertions, 191 deletions
diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
index cca871346a0f..88a70f5ed594 100644
--- a/drivers/net/xen-netfront.c
+++ b/drivers/net/xen-netfront.c
@@ -77,7 +77,9 @@ struct netfront_cb {
77 77
78#define NET_TX_RING_SIZE __CONST_RING_SIZE(xen_netif_tx, PAGE_SIZE) 78#define NET_TX_RING_SIZE __CONST_RING_SIZE(xen_netif_tx, PAGE_SIZE)
79#define NET_RX_RING_SIZE __CONST_RING_SIZE(xen_netif_rx, PAGE_SIZE) 79#define NET_RX_RING_SIZE __CONST_RING_SIZE(xen_netif_rx, PAGE_SIZE)
80#define TX_MAX_TARGET min_t(int, NET_TX_RING_SIZE, 256) 80
81/* Minimum number of Rx slots (includes slot for GSO metadata). */
82#define NET_RX_SLOTS_MIN (XEN_NETIF_NR_SLOTS_MIN + 1)
81 83
82/* Queue name is interface name with "-qNNN" appended */ 84/* Queue name is interface name with "-qNNN" appended */
83#define QUEUE_NAME_SIZE (IFNAMSIZ + 6) 85#define QUEUE_NAME_SIZE (IFNAMSIZ + 6)
@@ -137,13 +139,6 @@ struct netfront_queue {
137 struct xen_netif_rx_front_ring rx; 139 struct xen_netif_rx_front_ring rx;
138 int rx_ring_ref; 140 int rx_ring_ref;
139 141
140 /* Receive-ring batched refills. */
141#define RX_MIN_TARGET 8
142#define RX_DFL_MIN_TARGET 64
143#define RX_MAX_TARGET min_t(int, NET_RX_RING_SIZE, 256)
144 unsigned rx_min_target, rx_max_target, rx_target;
145 struct sk_buff_head rx_batch;
146
147 struct timer_list rx_refill_timer; 142 struct timer_list rx_refill_timer;
148 143
149 struct sk_buff *rx_skbs[NET_RX_RING_SIZE]; 144 struct sk_buff *rx_skbs[NET_RX_RING_SIZE];
@@ -251,7 +246,7 @@ static void rx_refill_timeout(unsigned long data)
251static int netfront_tx_slot_available(struct netfront_queue *queue) 246static int netfront_tx_slot_available(struct netfront_queue *queue)
252{ 247{
253 return (queue->tx.req_prod_pvt - queue->tx.rsp_cons) < 248 return (queue->tx.req_prod_pvt - queue->tx.rsp_cons) <
254 (TX_MAX_TARGET - MAX_SKB_FRAGS - 2); 249 (NET_TX_RING_SIZE - MAX_SKB_FRAGS - 2);
255} 250}
256 251
257static void xennet_maybe_wake_tx(struct netfront_queue *queue) 252static void xennet_maybe_wake_tx(struct netfront_queue *queue)
@@ -265,77 +260,55 @@ static void xennet_maybe_wake_tx(struct netfront_queue *queue)
265 netif_tx_wake_queue(netdev_get_tx_queue(dev, queue->id)); 260 netif_tx_wake_queue(netdev_get_tx_queue(dev, queue->id));
266} 261}
267 262
268static void xennet_alloc_rx_buffers(struct netfront_queue *queue) 263
264static struct sk_buff *xennet_alloc_one_rx_buffer(struct netfront_queue *queue)
269{ 265{
270 unsigned short id;
271 struct sk_buff *skb; 266 struct sk_buff *skb;
272 struct page *page; 267 struct page *page;
273 int i, batch_target, notify;
274 RING_IDX req_prod = queue->rx.req_prod_pvt;
275 grant_ref_t ref;
276 unsigned long pfn;
277 void *vaddr;
278 struct xen_netif_rx_request *req;
279 268
280 if (unlikely(!netif_carrier_ok(queue->info->netdev))) 269 skb = __netdev_alloc_skb(queue->info->netdev,
281 return; 270 RX_COPY_THRESHOLD + NET_IP_ALIGN,
271 GFP_ATOMIC | __GFP_NOWARN);
272 if (unlikely(!skb))
273 return NULL;
282 274
283 /* 275 page = alloc_page(GFP_ATOMIC | __GFP_NOWARN);
284 * Allocate skbuffs greedily, even though we batch updates to the 276 if (!page) {
285 * receive ring. This creates a less bursty demand on the memory 277 kfree_skb(skb);
286 * allocator, so should reduce the chance of failed allocation requests 278 return NULL;
287 * both for ourself and for other kernel subsystems.
288 */
289 batch_target = queue->rx_target - (req_prod - queue->rx.rsp_cons);
290 for (i = skb_queue_len(&queue->rx_batch); i < batch_target; i++) {
291 skb = __netdev_alloc_skb(queue->info->netdev,
292 RX_COPY_THRESHOLD + NET_IP_ALIGN,
293 GFP_ATOMIC | __GFP_NOWARN);
294 if (unlikely(!skb))
295 goto no_skb;
296
297 /* Align ip header to a 16 bytes boundary */
298 skb_reserve(skb, NET_IP_ALIGN);
299
300 page = alloc_page(GFP_ATOMIC | __GFP_NOWARN);
301 if (!page) {
302 kfree_skb(skb);
303no_skb:
304 /* Could not allocate any skbuffs. Try again later. */
305 mod_timer(&queue->rx_refill_timer,
306 jiffies + (HZ/10));
307
308 /* Any skbuffs queued for refill? Force them out. */
309 if (i != 0)
310 goto refill;
311 break;
312 }
313
314 skb_add_rx_frag(skb, 0, page, 0, 0, PAGE_SIZE);
315 __skb_queue_tail(&queue->rx_batch, skb);
316 } 279 }
280 skb_add_rx_frag(skb, 0, page, 0, 0, PAGE_SIZE);
281
282 /* Align ip header to a 16 bytes boundary */
283 skb_reserve(skb, NET_IP_ALIGN);
284 skb->dev = queue->info->netdev;
285
286 return skb;
287}
288
317 289
318 /* Is the batch large enough to be worthwhile? */ 290static void xennet_alloc_rx_buffers(struct netfront_queue *queue)
319 if (i < (queue->rx_target/2)) { 291{
320 if (req_prod > queue->rx.sring->req_prod) 292 RING_IDX req_prod = queue->rx.req_prod_pvt;
321 goto push; 293 int notify;
294
295 if (unlikely(!netif_carrier_ok(queue->info->netdev)))
322 return; 296 return;
323 }
324 297
325 /* Adjust our fill target if we risked running out of buffers. */ 298 for (req_prod = queue->rx.req_prod_pvt;
326 if (((req_prod - queue->rx.sring->rsp_prod) < (queue->rx_target / 4)) && 299 req_prod - queue->rx.rsp_cons < NET_RX_RING_SIZE;
327 ((queue->rx_target *= 2) > queue->rx_max_target)) 300 req_prod++) {
328 queue->rx_target = queue->rx_max_target; 301 struct sk_buff *skb;
302 unsigned short id;
303 grant_ref_t ref;
304 unsigned long pfn;
305 struct xen_netif_rx_request *req;
329 306
330 refill: 307 skb = xennet_alloc_one_rx_buffer(queue);
331 for (i = 0; ; i++) { 308 if (!skb)
332 skb = __skb_dequeue(&queue->rx_batch);
333 if (skb == NULL)
334 break; 309 break;
335 310
336 skb->dev = queue->info->netdev; 311 id = xennet_rxidx(req_prod);
337
338 id = xennet_rxidx(req_prod + i);
339 312
340 BUG_ON(queue->rx_skbs[id]); 313 BUG_ON(queue->rx_skbs[id]);
341 queue->rx_skbs[id] = skb; 314 queue->rx_skbs[id] = skb;
@@ -345,9 +318,8 @@ no_skb:
345 queue->grant_rx_ref[id] = ref; 318 queue->grant_rx_ref[id] = ref;
346 319
347 pfn = page_to_pfn(skb_frag_page(&skb_shinfo(skb)->frags[0])); 320 pfn = page_to_pfn(skb_frag_page(&skb_shinfo(skb)->frags[0]));
348 vaddr = page_address(skb_frag_page(&skb_shinfo(skb)->frags[0]));
349 321
350 req = RING_GET_REQUEST(&queue->rx, req_prod + i); 322 req = RING_GET_REQUEST(&queue->rx, req_prod);
351 gnttab_grant_foreign_access_ref(ref, 323 gnttab_grant_foreign_access_ref(ref,
352 queue->info->xbdev->otherend_id, 324 queue->info->xbdev->otherend_id,
353 pfn_to_mfn(pfn), 325 pfn_to_mfn(pfn),
@@ -357,11 +329,16 @@ no_skb:
357 req->gref = ref; 329 req->gref = ref;
358 } 330 }
359 331
332 queue->rx.req_prod_pvt = req_prod;
333
334 /* Not enough requests? Try again later. */
335 if (req_prod - queue->rx.rsp_cons < NET_RX_SLOTS_MIN) {
336 mod_timer(&queue->rx_refill_timer, jiffies + (HZ/10));
337 return;
338 }
339
360 wmb(); /* barrier so backend seens requests */ 340 wmb(); /* barrier so backend seens requests */
361 341
362 /* Above is a suitable barrier to ensure backend will see requests. */
363 queue->rx.req_prod_pvt = req_prod + i;
364 push:
365 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&queue->rx, notify); 342 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&queue->rx, notify);
366 if (notify) 343 if (notify)
367 notify_remote_via_irq(queue->rx_irq); 344 notify_remote_via_irq(queue->rx_irq);
@@ -1070,13 +1047,6 @@ err:
1070 1047
1071 work_done -= handle_incoming_queue(queue, &rxq); 1048 work_done -= handle_incoming_queue(queue, &rxq);
1072 1049
1073 /* If we get a callback with very few responses, reduce fill target. */
1074 /* NB. Note exponential increase, linear decrease. */
1075 if (((queue->rx.req_prod_pvt - queue->rx.sring->rsp_prod) >
1076 ((3*queue->rx_target) / 4)) &&
1077 (--queue->rx_target < queue->rx_min_target))
1078 queue->rx_target = queue->rx_min_target;
1079
1080 xennet_alloc_rx_buffers(queue); 1050 xennet_alloc_rx_buffers(queue);
1081 1051
1082 if (work_done < budget) { 1052 if (work_done < budget) {
@@ -1643,11 +1613,6 @@ static int xennet_init_queue(struct netfront_queue *queue)
1643 spin_lock_init(&queue->tx_lock); 1613 spin_lock_init(&queue->tx_lock);
1644 spin_lock_init(&queue->rx_lock); 1614 spin_lock_init(&queue->rx_lock);
1645 1615
1646 skb_queue_head_init(&queue->rx_batch);
1647 queue->rx_target = RX_DFL_MIN_TARGET;
1648 queue->rx_min_target = RX_DFL_MIN_TARGET;
1649 queue->rx_max_target = RX_MAX_TARGET;
1650
1651 init_timer(&queue->rx_refill_timer); 1616 init_timer(&queue->rx_refill_timer);
1652 queue->rx_refill_timer.data = (unsigned long)queue; 1617 queue->rx_refill_timer.data = (unsigned long)queue;
1653 queue->rx_refill_timer.function = rx_refill_timeout; 1618 queue->rx_refill_timer.function = rx_refill_timeout;
@@ -1670,7 +1635,7 @@ static int xennet_init_queue(struct netfront_queue *queue)
1670 } 1635 }
1671 1636
1672 /* A grant for every tx ring slot */ 1637 /* A grant for every tx ring slot */
1673 if (gnttab_alloc_grant_references(TX_MAX_TARGET, 1638 if (gnttab_alloc_grant_references(NET_TX_RING_SIZE,
1674 &queue->gref_tx_head) < 0) { 1639 &queue->gref_tx_head) < 0) {
1675 pr_alert("can't alloc tx grant refs\n"); 1640 pr_alert("can't alloc tx grant refs\n");
1676 err = -ENOMEM; 1641 err = -ENOMEM;
@@ -1678,7 +1643,7 @@ static int xennet_init_queue(struct netfront_queue *queue)
1678 } 1643 }
1679 1644
1680 /* A grant for every rx ring slot */ 1645 /* A grant for every rx ring slot */
1681 if (gnttab_alloc_grant_references(RX_MAX_TARGET, 1646 if (gnttab_alloc_grant_references(NET_RX_RING_SIZE,
1682 &queue->gref_rx_head) < 0) { 1647 &queue->gref_rx_head) < 0) {
1683 pr_alert("can't alloc rx grant refs\n"); 1648 pr_alert("can't alloc rx grant refs\n");
1684 err = -ENOMEM; 1649 err = -ENOMEM;
@@ -2146,83 +2111,18 @@ static const struct ethtool_ops xennet_ethtool_ops =
2146}; 2111};
2147 2112
2148#ifdef CONFIG_SYSFS 2113#ifdef CONFIG_SYSFS
2149static ssize_t show_rxbuf_min(struct device *dev, 2114static ssize_t show_rxbuf(struct device *dev,
2150 struct device_attribute *attr, char *buf) 2115 struct device_attribute *attr, char *buf)
2151{
2152 struct net_device *netdev = to_net_dev(dev);
2153 struct netfront_info *info = netdev_priv(netdev);
2154 unsigned int num_queues = netdev->real_num_tx_queues;
2155
2156 if (num_queues)
2157 return sprintf(buf, "%u\n", info->queues[0].rx_min_target);
2158 else
2159 return sprintf(buf, "%u\n", RX_MIN_TARGET);
2160}
2161
2162static ssize_t store_rxbuf_min(struct device *dev,
2163 struct device_attribute *attr,
2164 const char *buf, size_t len)
2165{ 2116{
2166 struct net_device *netdev = to_net_dev(dev); 2117 return sprintf(buf, "%lu\n", NET_RX_RING_SIZE);
2167 struct netfront_info *np = netdev_priv(netdev);
2168 unsigned int num_queues = netdev->real_num_tx_queues;
2169 char *endp;
2170 unsigned long target;
2171 unsigned int i;
2172 struct netfront_queue *queue;
2173
2174 if (!capable(CAP_NET_ADMIN))
2175 return -EPERM;
2176
2177 target = simple_strtoul(buf, &endp, 0);
2178 if (endp == buf)
2179 return -EBADMSG;
2180
2181 if (target < RX_MIN_TARGET)
2182 target = RX_MIN_TARGET;
2183 if (target > RX_MAX_TARGET)
2184 target = RX_MAX_TARGET;
2185
2186 for (i = 0; i < num_queues; ++i) {
2187 queue = &np->queues[i];
2188 spin_lock_bh(&queue->rx_lock);
2189 if (target > queue->rx_max_target)
2190 queue->rx_max_target = target;
2191 queue->rx_min_target = target;
2192 if (target > queue->rx_target)
2193 queue->rx_target = target;
2194
2195 xennet_alloc_rx_buffers(queue);
2196
2197 spin_unlock_bh(&queue->rx_lock);
2198 }
2199 return len;
2200}
2201
2202static ssize_t show_rxbuf_max(struct device *dev,
2203 struct device_attribute *attr, char *buf)
2204{
2205 struct net_device *netdev = to_net_dev(dev);
2206 struct netfront_info *info = netdev_priv(netdev);
2207 unsigned int num_queues = netdev->real_num_tx_queues;
2208
2209 if (num_queues)
2210 return sprintf(buf, "%u\n", info->queues[0].rx_max_target);
2211 else
2212 return sprintf(buf, "%u\n", RX_MAX_TARGET);
2213} 2118}
2214 2119
2215static ssize_t store_rxbuf_max(struct device *dev, 2120static ssize_t store_rxbuf(struct device *dev,
2216 struct device_attribute *attr, 2121 struct device_attribute *attr,
2217 const char *buf, size_t len) 2122 const char *buf, size_t len)
2218{ 2123{
2219 struct net_device *netdev = to_net_dev(dev);
2220 struct netfront_info *np = netdev_priv(netdev);
2221 unsigned int num_queues = netdev->real_num_tx_queues;
2222 char *endp; 2124 char *endp;
2223 unsigned long target; 2125 unsigned long target;
2224 unsigned int i = 0;
2225 struct netfront_queue *queue = NULL;
2226 2126
2227 if (!capable(CAP_NET_ADMIN)) 2127 if (!capable(CAP_NET_ADMIN))
2228 return -EPERM; 2128 return -EPERM;
@@ -2231,44 +2131,15 @@ static ssize_t store_rxbuf_max(struct device *dev,
2231 if (endp == buf) 2131 if (endp == buf)
2232 return -EBADMSG; 2132 return -EBADMSG;
2233 2133
2234 if (target < RX_MIN_TARGET) 2134 /* rxbuf_min and rxbuf_max are no longer configurable. */
2235 target = RX_MIN_TARGET;
2236 if (target > RX_MAX_TARGET)
2237 target = RX_MAX_TARGET;
2238
2239 for (i = 0; i < num_queues; ++i) {
2240 queue = &np->queues[i];
2241 spin_lock_bh(&queue->rx_lock);
2242 if (target < queue->rx_min_target)
2243 queue->rx_min_target = target;
2244 queue->rx_max_target = target;
2245 if (target < queue->rx_target)
2246 queue->rx_target = target;
2247
2248 xennet_alloc_rx_buffers(queue);
2249 2135
2250 spin_unlock_bh(&queue->rx_lock);
2251 }
2252 return len; 2136 return len;
2253} 2137}
2254 2138
2255static ssize_t show_rxbuf_cur(struct device *dev,
2256 struct device_attribute *attr, char *buf)
2257{
2258 struct net_device *netdev = to_net_dev(dev);
2259 struct netfront_info *info = netdev_priv(netdev);
2260 unsigned int num_queues = netdev->real_num_tx_queues;
2261
2262 if (num_queues)
2263 return sprintf(buf, "%u\n", info->queues[0].rx_target);
2264 else
2265 return sprintf(buf, "0\n");
2266}
2267
2268static struct device_attribute xennet_attrs[] = { 2139static struct device_attribute xennet_attrs[] = {
2269 __ATTR(rxbuf_min, S_IRUGO|S_IWUSR, show_rxbuf_min, store_rxbuf_min), 2140 __ATTR(rxbuf_min, S_IRUGO|S_IWUSR, show_rxbuf, store_rxbuf),
2270 __ATTR(rxbuf_max, S_IRUGO|S_IWUSR, show_rxbuf_max, store_rxbuf_max), 2141 __ATTR(rxbuf_max, S_IRUGO|S_IWUSR, show_rxbuf, store_rxbuf),
2271 __ATTR(rxbuf_cur, S_IRUGO, show_rxbuf_cur, NULL), 2142 __ATTR(rxbuf_cur, S_IRUGO, show_rxbuf, NULL),
2272}; 2143};
2273 2144
2274static int xennet_sysfs_addif(struct net_device *netdev) 2145static int xennet_sysfs_addif(struct net_device *netdev)