diff options
author | KY Srinivasan <kys@microsoft.com> | 2014-04-30 13:14:31 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2014-04-30 13:48:46 -0400 |
commit | c25aaf814a63f9d9c4e45416f13d70ef0aa0be2e (patch) | |
tree | be6bb2c8784382684365e47b202f02bd5a95babe | |
parent | cc80ee13609dc5926ad563d1a793991c80675e65 (diff) |
hyperv: Enable sendbuf mechanism on the send path
We send packets using a copy-free mechanism (this is the Guest to Host transport
via VMBUS). While this is obviously optimal for large packets,
it may not be optimal for small packets. Hyper-V host supports
a second mechanism for sending packets that is "copy based". We implement that
mechanism in this patch.
In this version of the patch I have addressed a comment from David Miller.
With this patch (and all of the other offload and VRSS patches), we are now able
to almost saturate a 10G interface between Linux VMs on Hyper-V
on different hosts - close to 9 Gbps as measured via iperf.
Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Reviewed-by: Haiyang Zhang <haiyangz@microsoft.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | drivers/net/hyperv/hyperv_net.h | 14 | ||||
-rw-r--r-- | drivers/net/hyperv/netvsc.c | 226 | ||||
-rw-r--r-- | drivers/net/hyperv/netvsc_drv.c | 3 |
3 files changed, 234 insertions, 9 deletions
diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h index d1f7826aa75f..4b7df5a5c966 100644 --- a/drivers/net/hyperv/hyperv_net.h +++ b/drivers/net/hyperv/hyperv_net.h | |||
@@ -140,6 +140,8 @@ struct hv_netvsc_packet { | |||
140 | void *send_completion_ctx; | 140 | void *send_completion_ctx; |
141 | void (*send_completion)(void *context); | 141 | void (*send_completion)(void *context); |
142 | 142 | ||
143 | u32 send_buf_index; | ||
144 | |||
143 | /* This points to the memory after page_buf */ | 145 | /* This points to the memory after page_buf */ |
144 | struct rndis_message *rndis_msg; | 146 | struct rndis_message *rndis_msg; |
145 | 147 | ||
@@ -582,6 +584,9 @@ struct nvsp_message { | |||
582 | 584 | ||
583 | #define NETVSC_RECEIVE_BUFFER_SIZE (1024*1024*16) /* 16MB */ | 585 | #define NETVSC_RECEIVE_BUFFER_SIZE (1024*1024*16) /* 16MB */ |
584 | #define NETVSC_RECEIVE_BUFFER_SIZE_LEGACY (1024*1024*15) /* 15MB */ | 586 | #define NETVSC_RECEIVE_BUFFER_SIZE_LEGACY (1024*1024*15) /* 15MB */ |
587 | #define NETVSC_SEND_BUFFER_SIZE (1024 * 1024) /* 1MB */ | ||
588 | #define NETVSC_INVALID_INDEX -1 | ||
589 | |||
585 | 590 | ||
586 | #define NETVSC_RECEIVE_BUFFER_ID 0xcafe | 591 | #define NETVSC_RECEIVE_BUFFER_ID 0xcafe |
587 | 592 | ||
@@ -607,6 +612,15 @@ struct netvsc_device { | |||
607 | u32 recv_section_cnt; | 612 | u32 recv_section_cnt; |
608 | struct nvsp_1_receive_buffer_section *recv_section; | 613 | struct nvsp_1_receive_buffer_section *recv_section; |
609 | 614 | ||
615 | /* Send buffer allocated by us */ | ||
616 | void *send_buf; | ||
617 | u32 send_buf_size; | ||
618 | u32 send_buf_gpadl_handle; | ||
619 | u32 send_section_cnt; | ||
620 | u32 send_section_size; | ||
621 | unsigned long *send_section_map; | ||
622 | int map_words; | ||
623 | |||
610 | /* Used for NetVSP initialization protocol */ | 624 | /* Used for NetVSP initialization protocol */ |
611 | struct completion channel_init_wait; | 625 | struct completion channel_init_wait; |
612 | struct nvsp_message channel_init_pkt; | 626 | struct nvsp_message channel_init_pkt; |
diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index bbee44635035..c041f63a6d30 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c | |||
@@ -28,6 +28,7 @@ | |||
28 | #include <linux/slab.h> | 28 | #include <linux/slab.h> |
29 | #include <linux/netdevice.h> | 29 | #include <linux/netdevice.h> |
30 | #include <linux/if_ether.h> | 30 | #include <linux/if_ether.h> |
31 | #include <asm/sync_bitops.h> | ||
31 | 32 | ||
32 | #include "hyperv_net.h" | 33 | #include "hyperv_net.h" |
33 | 34 | ||
@@ -80,7 +81,7 @@ get_in_err: | |||
80 | } | 81 | } |
81 | 82 | ||
82 | 83 | ||
83 | static int netvsc_destroy_recv_buf(struct netvsc_device *net_device) | 84 | static int netvsc_destroy_buf(struct netvsc_device *net_device) |
84 | { | 85 | { |
85 | struct nvsp_message *revoke_packet; | 86 | struct nvsp_message *revoke_packet; |
86 | int ret = 0; | 87 | int ret = 0; |
@@ -146,10 +147,62 @@ static int netvsc_destroy_recv_buf(struct netvsc_device *net_device) | |||
146 | net_device->recv_section = NULL; | 147 | net_device->recv_section = NULL; |
147 | } | 148 | } |
148 | 149 | ||
150 | /* Deal with the send buffer we may have setup. | ||
151 | * If we got a send section size, it means we received a | ||
152 | * SendsendBufferComplete msg (ie sent | ||
153 | * NvspMessage1TypeSendReceiveBuffer msg) therefore, we need | ||
154 | * to send a revoke msg here | ||
155 | */ | ||
156 | if (net_device->send_section_size) { | ||
157 | /* Send the revoke receive buffer */ | ||
158 | revoke_packet = &net_device->revoke_packet; | ||
159 | memset(revoke_packet, 0, sizeof(struct nvsp_message)); | ||
160 | |||
161 | revoke_packet->hdr.msg_type = | ||
162 | NVSP_MSG1_TYPE_REVOKE_SEND_BUF; | ||
163 | revoke_packet->msg.v1_msg.revoke_recv_buf.id = 0; | ||
164 | |||
165 | ret = vmbus_sendpacket(net_device->dev->channel, | ||
166 | revoke_packet, | ||
167 | sizeof(struct nvsp_message), | ||
168 | (unsigned long)revoke_packet, | ||
169 | VM_PKT_DATA_INBAND, 0); | ||
170 | /* If we failed here, we might as well return and | ||
171 | * have a leak rather than continue and a bugchk | ||
172 | */ | ||
173 | if (ret != 0) { | ||
174 | netdev_err(ndev, "unable to send " | ||
175 | "revoke send buffer to netvsp\n"); | ||
176 | return ret; | ||
177 | } | ||
178 | } | ||
179 | /* Teardown the gpadl on the vsp end */ | ||
180 | if (net_device->send_buf_gpadl_handle) { | ||
181 | ret = vmbus_teardown_gpadl(net_device->dev->channel, | ||
182 | net_device->send_buf_gpadl_handle); | ||
183 | |||
184 | /* If we failed here, we might as well return and have a leak | ||
185 | * rather than continue and a bugchk | ||
186 | */ | ||
187 | if (ret != 0) { | ||
188 | netdev_err(ndev, | ||
189 | "unable to teardown send buffer's gpadl\n"); | ||
190 | return ret; | ||
191 | } | ||
192 | net_device->recv_buf_gpadl_handle = 0; | ||
193 | } | ||
194 | if (net_device->send_buf) { | ||
195 | /* Free up the receive buffer */ | ||
196 | free_pages((unsigned long)net_device->send_buf, | ||
197 | get_order(net_device->send_buf_size)); | ||
198 | net_device->send_buf = NULL; | ||
199 | } | ||
200 | kfree(net_device->send_section_map); | ||
201 | |||
149 | return ret; | 202 | return ret; |
150 | } | 203 | } |
151 | 204 | ||
152 | static int netvsc_init_recv_buf(struct hv_device *device) | 205 | static int netvsc_init_buf(struct hv_device *device) |
153 | { | 206 | { |
154 | int ret = 0; | 207 | int ret = 0; |
155 | int t; | 208 | int t; |
@@ -248,10 +301,90 @@ static int netvsc_init_recv_buf(struct hv_device *device) | |||
248 | goto cleanup; | 301 | goto cleanup; |
249 | } | 302 | } |
250 | 303 | ||
304 | /* Now setup the send buffer. | ||
305 | */ | ||
306 | net_device->send_buf = | ||
307 | (void *)__get_free_pages(GFP_KERNEL|__GFP_ZERO, | ||
308 | get_order(net_device->send_buf_size)); | ||
309 | if (!net_device->send_buf) { | ||
310 | netdev_err(ndev, "unable to allocate send " | ||
311 | "buffer of size %d\n", net_device->send_buf_size); | ||
312 | ret = -ENOMEM; | ||
313 | goto cleanup; | ||
314 | } | ||
315 | |||
316 | /* Establish the gpadl handle for this buffer on this | ||
317 | * channel. Note: This call uses the vmbus connection rather | ||
318 | * than the channel to establish the gpadl handle. | ||
319 | */ | ||
320 | ret = vmbus_establish_gpadl(device->channel, net_device->send_buf, | ||
321 | net_device->send_buf_size, | ||
322 | &net_device->send_buf_gpadl_handle); | ||
323 | if (ret != 0) { | ||
324 | netdev_err(ndev, | ||
325 | "unable to establish send buffer's gpadl\n"); | ||
326 | goto cleanup; | ||
327 | } | ||
328 | |||
329 | /* Notify the NetVsp of the gpadl handle */ | ||
330 | init_packet = &net_device->channel_init_pkt; | ||
331 | memset(init_packet, 0, sizeof(struct nvsp_message)); | ||
332 | init_packet->hdr.msg_type = NVSP_MSG1_TYPE_SEND_SEND_BUF; | ||
333 | init_packet->msg.v1_msg.send_recv_buf.gpadl_handle = | ||
334 | net_device->send_buf_gpadl_handle; | ||
335 | init_packet->msg.v1_msg.send_recv_buf.id = 0; | ||
336 | |||
337 | /* Send the gpadl notification request */ | ||
338 | ret = vmbus_sendpacket(device->channel, init_packet, | ||
339 | sizeof(struct nvsp_message), | ||
340 | (unsigned long)init_packet, | ||
341 | VM_PKT_DATA_INBAND, | ||
342 | VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED); | ||
343 | if (ret != 0) { | ||
344 | netdev_err(ndev, | ||
345 | "unable to send send buffer's gpadl to netvsp\n"); | ||
346 | goto cleanup; | ||
347 | } | ||
348 | |||
349 | t = wait_for_completion_timeout(&net_device->channel_init_wait, 5*HZ); | ||
350 | BUG_ON(t == 0); | ||
351 | |||
352 | /* Check the response */ | ||
353 | if (init_packet->msg.v1_msg. | ||
354 | send_send_buf_complete.status != NVSP_STAT_SUCCESS) { | ||
355 | netdev_err(ndev, "Unable to complete send buffer " | ||
356 | "initialization with NetVsp - status %d\n", | ||
357 | init_packet->msg.v1_msg. | ||
358 | send_recv_buf_complete.status); | ||
359 | ret = -EINVAL; | ||
360 | goto cleanup; | ||
361 | } | ||
362 | |||
363 | /* Parse the response */ | ||
364 | net_device->send_section_size = init_packet->msg. | ||
365 | v1_msg.send_send_buf_complete.section_size; | ||
366 | |||
367 | /* Section count is simply the size divided by the section size. | ||
368 | */ | ||
369 | net_device->send_section_cnt = | ||
370 | net_device->send_buf_size/net_device->send_section_size; | ||
371 | |||
372 | dev_info(&device->device, "Send section size: %d, Section count:%d\n", | ||
373 | net_device->send_section_size, net_device->send_section_cnt); | ||
374 | |||
375 | /* Setup state for managing the send buffer. */ | ||
376 | net_device->map_words = DIV_ROUND_UP(net_device->send_section_cnt, | ||
377 | BITS_PER_LONG); | ||
378 | |||
379 | net_device->send_section_map = | ||
380 | kzalloc(net_device->map_words * sizeof(ulong), GFP_KERNEL); | ||
381 | if (net_device->send_section_map == NULL) | ||
382 | goto cleanup; | ||
383 | |||
251 | goto exit; | 384 | goto exit; |
252 | 385 | ||
253 | cleanup: | 386 | cleanup: |
254 | netvsc_destroy_recv_buf(net_device); | 387 | netvsc_destroy_buf(net_device); |
255 | 388 | ||
256 | exit: | 389 | exit: |
257 | return ret; | 390 | return ret; |
@@ -369,8 +502,9 @@ static int netvsc_connect_vsp(struct hv_device *device) | |||
369 | net_device->recv_buf_size = NETVSC_RECEIVE_BUFFER_SIZE_LEGACY; | 502 | net_device->recv_buf_size = NETVSC_RECEIVE_BUFFER_SIZE_LEGACY; |
370 | else | 503 | else |
371 | net_device->recv_buf_size = NETVSC_RECEIVE_BUFFER_SIZE; | 504 | net_device->recv_buf_size = NETVSC_RECEIVE_BUFFER_SIZE; |
505 | net_device->send_buf_size = NETVSC_SEND_BUFFER_SIZE; | ||
372 | 506 | ||
373 | ret = netvsc_init_recv_buf(device); | 507 | ret = netvsc_init_buf(device); |
374 | 508 | ||
375 | cleanup: | 509 | cleanup: |
376 | return ret; | 510 | return ret; |
@@ -378,7 +512,7 @@ cleanup: | |||
378 | 512 | ||
379 | static void netvsc_disconnect_vsp(struct netvsc_device *net_device) | 513 | static void netvsc_disconnect_vsp(struct netvsc_device *net_device) |
380 | { | 514 | { |
381 | netvsc_destroy_recv_buf(net_device); | 515 | netvsc_destroy_buf(net_device); |
382 | } | 516 | } |
383 | 517 | ||
384 | /* | 518 | /* |
@@ -440,6 +574,12 @@ static inline u32 hv_ringbuf_avail_percent( | |||
440 | return avail_write * 100 / ring_info->ring_datasize; | 574 | return avail_write * 100 / ring_info->ring_datasize; |
441 | } | 575 | } |
442 | 576 | ||
577 | static inline void netvsc_free_send_slot(struct netvsc_device *net_device, | ||
578 | u32 index) | ||
579 | { | ||
580 | sync_change_bit(index, net_device->send_section_map); | ||
581 | } | ||
582 | |||
443 | static void netvsc_send_completion(struct netvsc_device *net_device, | 583 | static void netvsc_send_completion(struct netvsc_device *net_device, |
444 | struct hv_device *device, | 584 | struct hv_device *device, |
445 | struct vmpacket_descriptor *packet) | 585 | struct vmpacket_descriptor *packet) |
@@ -447,6 +587,7 @@ static void netvsc_send_completion(struct netvsc_device *net_device, | |||
447 | struct nvsp_message *nvsp_packet; | 587 | struct nvsp_message *nvsp_packet; |
448 | struct hv_netvsc_packet *nvsc_packet; | 588 | struct hv_netvsc_packet *nvsc_packet; |
449 | struct net_device *ndev; | 589 | struct net_device *ndev; |
590 | u32 send_index; | ||
450 | 591 | ||
451 | ndev = net_device->ndev; | 592 | ndev = net_device->ndev; |
452 | 593 | ||
@@ -477,6 +618,9 @@ static void netvsc_send_completion(struct netvsc_device *net_device, | |||
477 | 618 | ||
478 | /* Notify the layer above us */ | 619 | /* Notify the layer above us */ |
479 | if (nvsc_packet) { | 620 | if (nvsc_packet) { |
621 | send_index = nvsc_packet->send_buf_index; | ||
622 | if (send_index != NETVSC_INVALID_INDEX) | ||
623 | netvsc_free_send_slot(net_device, send_index); | ||
480 | q_idx = nvsc_packet->q_idx; | 624 | q_idx = nvsc_packet->q_idx; |
481 | channel = nvsc_packet->channel; | 625 | channel = nvsc_packet->channel; |
482 | nvsc_packet->send_completion(nvsc_packet-> | 626 | nvsc_packet->send_completion(nvsc_packet-> |
@@ -504,6 +648,52 @@ static void netvsc_send_completion(struct netvsc_device *net_device, | |||
504 | 648 | ||
505 | } | 649 | } |
506 | 650 | ||
651 | static u32 netvsc_get_next_send_section(struct netvsc_device *net_device) | ||
652 | { | ||
653 | unsigned long index; | ||
654 | u32 max_words = net_device->map_words; | ||
655 | unsigned long *map_addr = (unsigned long *)net_device->send_section_map; | ||
656 | u32 section_cnt = net_device->send_section_cnt; | ||
657 | int ret_val = NETVSC_INVALID_INDEX; | ||
658 | int i; | ||
659 | int prev_val; | ||
660 | |||
661 | for (i = 0; i < max_words; i++) { | ||
662 | if (!~(map_addr[i])) | ||
663 | continue; | ||
664 | index = ffz(map_addr[i]); | ||
665 | prev_val = sync_test_and_set_bit(index, &map_addr[i]); | ||
666 | if (prev_val) | ||
667 | continue; | ||
668 | if ((index + (i * BITS_PER_LONG)) >= section_cnt) | ||
669 | break; | ||
670 | ret_val = (index + (i * BITS_PER_LONG)); | ||
671 | break; | ||
672 | } | ||
673 | return ret_val; | ||
674 | } | ||
675 | |||
676 | u32 netvsc_copy_to_send_buf(struct netvsc_device *net_device, | ||
677 | unsigned int section_index, | ||
678 | struct hv_netvsc_packet *packet) | ||
679 | { | ||
680 | char *start = net_device->send_buf; | ||
681 | char *dest = (start + (section_index * net_device->send_section_size)); | ||
682 | int i; | ||
683 | u32 msg_size = 0; | ||
684 | |||
685 | for (i = 0; i < packet->page_buf_cnt; i++) { | ||
686 | char *src = phys_to_virt(packet->page_buf[i].pfn << PAGE_SHIFT); | ||
687 | u32 offset = packet->page_buf[i].offset; | ||
688 | u32 len = packet->page_buf[i].len; | ||
689 | |||
690 | memcpy(dest, (src + offset), len); | ||
691 | msg_size += len; | ||
692 | dest += len; | ||
693 | } | ||
694 | return msg_size; | ||
695 | } | ||
696 | |||
507 | int netvsc_send(struct hv_device *device, | 697 | int netvsc_send(struct hv_device *device, |
508 | struct hv_netvsc_packet *packet) | 698 | struct hv_netvsc_packet *packet) |
509 | { | 699 | { |
@@ -513,6 +703,10 @@ int netvsc_send(struct hv_device *device, | |||
513 | struct net_device *ndev; | 703 | struct net_device *ndev; |
514 | struct vmbus_channel *out_channel = NULL; | 704 | struct vmbus_channel *out_channel = NULL; |
515 | u64 req_id; | 705 | u64 req_id; |
706 | unsigned int section_index = NETVSC_INVALID_INDEX; | ||
707 | u32 msg_size = 0; | ||
708 | struct sk_buff *skb; | ||
709 | |||
516 | 710 | ||
517 | net_device = get_outbound_net_device(device); | 711 | net_device = get_outbound_net_device(device); |
518 | if (!net_device) | 712 | if (!net_device) |
@@ -528,10 +722,26 @@ int netvsc_send(struct hv_device *device, | |||
528 | sendMessage.msg.v1_msg.send_rndis_pkt.channel_type = 1; | 722 | sendMessage.msg.v1_msg.send_rndis_pkt.channel_type = 1; |
529 | } | 723 | } |
530 | 724 | ||
531 | /* Not using send buffer section */ | 725 | /* Attempt to send via sendbuf */ |
726 | if (packet->total_data_buflen < net_device->send_section_size) { | ||
727 | section_index = netvsc_get_next_send_section(net_device); | ||
728 | if (section_index != NETVSC_INVALID_INDEX) { | ||
729 | msg_size = netvsc_copy_to_send_buf(net_device, | ||
730 | section_index, | ||
731 | packet); | ||
732 | skb = (struct sk_buff *) | ||
733 | (unsigned long)packet->send_completion_tid; | ||
734 | if (skb) | ||
735 | dev_kfree_skb_any(skb); | ||
736 | packet->page_buf_cnt = 0; | ||
737 | } | ||
738 | } | ||
739 | packet->send_buf_index = section_index; | ||
740 | |||
741 | |||
532 | sendMessage.msg.v1_msg.send_rndis_pkt.send_buf_section_index = | 742 | sendMessage.msg.v1_msg.send_rndis_pkt.send_buf_section_index = |
533 | 0xFFFFFFFF; | 743 | section_index; |
534 | sendMessage.msg.v1_msg.send_rndis_pkt.send_buf_section_size = 0; | 744 | sendMessage.msg.v1_msg.send_rndis_pkt.send_buf_section_size = msg_size; |
535 | 745 | ||
536 | if (packet->send_completion) | 746 | if (packet->send_completion) |
537 | req_id = (ulong)packet; | 747 | req_id = (ulong)packet; |
diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index c76b66515e92..939e3af60ec4 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c | |||
@@ -236,10 +236,11 @@ static void netvsc_xmit_completion(void *context) | |||
236 | struct hv_netvsc_packet *packet = (struct hv_netvsc_packet *)context; | 236 | struct hv_netvsc_packet *packet = (struct hv_netvsc_packet *)context; |
237 | struct sk_buff *skb = (struct sk_buff *) | 237 | struct sk_buff *skb = (struct sk_buff *) |
238 | (unsigned long)packet->send_completion_tid; | 238 | (unsigned long)packet->send_completion_tid; |
239 | u32 index = packet->send_buf_index; | ||
239 | 240 | ||
240 | kfree(packet); | 241 | kfree(packet); |
241 | 242 | ||
242 | if (skb) | 243 | if (skb && (index == NETVSC_INVALID_INDEX)) |
243 | dev_kfree_skb_any(skb); | 244 | dev_kfree_skb_any(skb); |
244 | } | 245 | } |
245 | 246 | ||