aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/hv/hv_balloon.c
diff options
context:
space:
mode:
authorK. Y. Srinivasan <kys@microsoft.com>2012-11-14 04:09:02 -0500
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>2012-11-15 18:42:09 -0500
commit9aa8b50b2b3d3a70728438a15a0fdd03a6794a84 (patch)
tree8300bea35ccd9fb2f815e53df022b6bbbbc85767 /drivers/hv/hv_balloon.c
parent997071bcb34005f42e0fe5bc7930e895b070f251 (diff)
Drivers: hv: Add Hyper-V balloon driver
Add the basic balloon driver. Windows hosts dynamically manage the guest memory allocation via a combination memory hot add and ballooning. Memory hot add is used to grow the guest memory upto the maximum memory that can be allocatted to the guest. Ballooning is used to both shrink as well as expand up to the max memory. Supporting hot add needs additional support from the host. We will support hot add when this support is available. For now, by setting the VM startup memory to the VM max memory, we can use ballooning alone to dynamically manage memory allocation amongst competing guests on a given host. Signed-off-by: K. Y. Srinivasan <kys@microsoft.com> Reviewed-by: Haiyang Zhang <haiyangz@microsoft.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Diffstat (limited to 'drivers/hv/hv_balloon.c')
-rw-r--r--drivers/hv/hv_balloon.c1041
1 files changed, 1041 insertions, 0 deletions
diff --git a/drivers/hv/hv_balloon.c b/drivers/hv/hv_balloon.c
new file mode 100644
index 000000000000..bbc497373aaf
--- /dev/null
+++ b/drivers/hv/hv_balloon.c
@@ -0,0 +1,1041 @@
1/*
2 * Copyright (c) 2012, Microsoft Corporation.
3 *
4 * Author:
5 * K. Y. Srinivasan <kys@microsoft.com>
6 *
7 * This program is free software; you can redistribute it and/or modify it
8 * under the terms of the GNU General Public License version 2 as published
9 * by the Free Software Foundation.
10 *
11 * This program is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
14 * NON INFRINGEMENT. See the GNU General Public License for more
15 * details.
16 *
17 */
18
19#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
20
21#include <linux/kernel.h>
22#include <linux/mman.h>
23#include <linux/delay.h>
24#include <linux/init.h>
25#include <linux/module.h>
26#include <linux/slab.h>
27#include <linux/kthread.h>
28#include <linux/completion.h>
29#include <linux/memory_hotplug.h>
30#include <linux/memory.h>
31#include <linux/notifier.h>
32#include <linux/mman.h>
33#include <linux/percpu_counter.h>
34
35#include <linux/hyperv.h>
36
37/*
38 * We begin with definitions supporting the Dynamic Memory protocol
39 * with the host.
40 *
41 * Begin protocol definitions.
42 */
43
44
45
46/*
47 * Protocol versions. The low word is the minor version, the high word the major
48 * version.
49 *
50 * History:
51 * Initial version 1.0
52 * Changed to 0.1 on 2009/03/25
53 * Changes to 0.2 on 2009/05/14
54 * Changes to 0.3 on 2009/12/03
55 * Changed to 1.0 on 2011/04/05
56 */
57
58#define DYNMEM_MAKE_VERSION(Major, Minor) ((__u32)(((Major) << 16) | (Minor)))
59#define DYNMEM_MAJOR_VERSION(Version) ((__u32)(Version) >> 16)
60#define DYNMEM_MINOR_VERSION(Version) ((__u32)(Version) & 0xff)
61
62enum {
63 DYNMEM_PROTOCOL_VERSION_1 = DYNMEM_MAKE_VERSION(0, 3),
64 DYNMEM_PROTOCOL_VERSION_2 = DYNMEM_MAKE_VERSION(1, 0),
65
66 DYNMEM_PROTOCOL_VERSION_WIN7 = DYNMEM_PROTOCOL_VERSION_1,
67 DYNMEM_PROTOCOL_VERSION_WIN8 = DYNMEM_PROTOCOL_VERSION_2,
68
69 DYNMEM_PROTOCOL_VERSION_CURRENT = DYNMEM_PROTOCOL_VERSION_WIN8
70};
71
72
73
74/*
75 * Message Types
76 */
77
78enum dm_message_type {
79 /*
80 * Version 0.3
81 */
82 DM_ERROR = 0,
83 DM_VERSION_REQUEST = 1,
84 DM_VERSION_RESPONSE = 2,
85 DM_CAPABILITIES_REPORT = 3,
86 DM_CAPABILITIES_RESPONSE = 4,
87 DM_STATUS_REPORT = 5,
88 DM_BALLOON_REQUEST = 6,
89 DM_BALLOON_RESPONSE = 7,
90 DM_UNBALLOON_REQUEST = 8,
91 DM_UNBALLOON_RESPONSE = 9,
92 DM_MEM_HOT_ADD_REQUEST = 10,
93 DM_MEM_HOT_ADD_RESPONSE = 11,
94 DM_VERSION_03_MAX = 11,
95 /*
96 * Version 1.0.
97 */
98 DM_INFO_MESSAGE = 12,
99 DM_VERSION_1_MAX = 12
100};
101
102
103/*
104 * Structures defining the dynamic memory management
105 * protocol.
106 */
107
108union dm_version {
109 struct {
110 __u16 minor_version;
111 __u16 major_version;
112 };
113 __u32 version;
114} __packed;
115
116
117union dm_caps {
118 struct {
119 __u64 balloon:1;
120 __u64 hot_add:1;
121 __u64 reservedz:62;
122 } cap_bits;
123 __u64 caps;
124} __packed;
125
126union dm_mem_page_range {
127 struct {
128 /*
129 * The PFN number of the first page in the range.
130 * 40 bits is the architectural limit of a PFN
131 * number for AMD64.
132 */
133 __u64 start_page:40;
134 /*
135 * The number of pages in the range.
136 */
137 __u64 page_cnt:24;
138 } finfo;
139 __u64 page_range;
140} __packed;
141
142
143
144/*
145 * The header for all dynamic memory messages:
146 *
147 * type: Type of the message.
148 * size: Size of the message in bytes; including the header.
149 * trans_id: The guest is responsible for manufacturing this ID.
150 */
151
152struct dm_header {
153 __u16 type;
154 __u16 size;
155 __u32 trans_id;
156} __packed;
157
158/*
159 * A generic message format for dynamic memory.
160 * Specific message formats are defined later in the file.
161 */
162
163struct dm_message {
164 struct dm_header hdr;
165 __u8 data[]; /* enclosed message */
166} __packed;
167
168
169/*
170 * Specific message types supporting the dynamic memory protocol.
171 */
172
173/*
174 * Version negotiation message. Sent from the guest to the host.
175 * The guest is free to try different versions until the host
176 * accepts the version.
177 *
178 * dm_version: The protocol version requested.
179 * is_last_attempt: If TRUE, this is the last version guest will request.
180 * reservedz: Reserved field, set to zero.
181 */
182
183struct dm_version_request {
184 struct dm_header hdr;
185 union dm_version version;
186 __u32 is_last_attempt:1;
187 __u32 reservedz:31;
188} __packed;
189
190/*
191 * Version response message; Host to Guest and indicates
192 * if the host has accepted the version sent by the guest.
193 *
194 * is_accepted: If TRUE, host has accepted the version and the guest
195 * should proceed to the next stage of the protocol. FALSE indicates that
196 * guest should re-try with a different version.
197 *
198 * reservedz: Reserved field, set to zero.
199 */
200
201struct dm_version_response {
202 struct dm_header hdr;
203 __u64 is_accepted:1;
204 __u64 reservedz:63;
205} __packed;
206
207/*
208 * Message reporting capabilities. This is sent from the guest to the
209 * host.
210 */
211
212struct dm_capabilities {
213 struct dm_header hdr;
214 union dm_caps caps;
215 __u64 min_page_cnt;
216 __u64 max_page_number;
217} __packed;
218
219/*
220 * Response to the capabilities message. This is sent from the host to the
221 * guest. This message notifies if the host has accepted the guest's
222 * capabilities. If the host has not accepted, the guest must shutdown
223 * the service.
224 *
225 * is_accepted: Indicates if the host has accepted guest's capabilities.
226 * reservedz: Must be 0.
227 */
228
229struct dm_capabilities_resp_msg {
230 struct dm_header hdr;
231 __u64 is_accepted:1;
232 __u64 reservedz:63;
233} __packed;
234
235/*
236 * This message is used to report memory pressure from the guest.
237 * This message is not part of any transaction and there is no
238 * response to this message.
239 *
240 * num_avail: Available memory in pages.
241 * num_committed: Committed memory in pages.
242 * page_file_size: The accumulated size of all page files
243 * in the system in pages.
244 * zero_free: The nunber of zero and free pages.
245 * page_file_writes: The writes to the page file in pages.
246 * io_diff: An indicator of file cache efficiency or page file activity,
247 * calculated as File Cache Page Fault Count - Page Read Count.
248 * This value is in pages.
249 *
250 * Some of these metrics are Windows specific and fortunately
251 * the algorithm on the host side that computes the guest memory
252 * pressure only uses num_committed value.
253 */
254
255struct dm_status {
256 struct dm_header hdr;
257 __u64 num_avail;
258 __u64 num_committed;
259 __u64 page_file_size;
260 __u64 zero_free;
261 __u32 page_file_writes;
262 __u32 io_diff;
263} __packed;
264
265
266/*
267 * Message to ask the guest to allocate memory - balloon up message.
268 * This message is sent from the host to the guest. The guest may not be
269 * able to allocate as much memory as requested.
270 *
271 * num_pages: number of pages to allocate.
272 */
273
274struct dm_balloon {
275 struct dm_header hdr;
276 __u32 num_pages;
277 __u32 reservedz;
278} __packed;
279
280
281/*
282 * Balloon response message; this message is sent from the guest
283 * to the host in response to the balloon message.
284 *
285 * reservedz: Reserved; must be set to zero.
286 * more_pages: If FALSE, this is the last message of the transaction.
287 * if TRUE there will atleast one more message from the guest.
288 *
289 * range_count: The number of ranges in the range array.
290 *
291 * range_array: An array of page ranges returned to the host.
292 *
293 */
294
295struct dm_balloon_response {
296 struct dm_header hdr;
297 __u32 reservedz;
298 __u32 more_pages:1;
299 __u32 range_count:31;
300 union dm_mem_page_range range_array[];
301} __packed;
302
303/*
304 * Un-balloon message; this message is sent from the host
305 * to the guest to give guest more memory.
306 *
307 * more_pages: If FALSE, this is the last message of the transaction.
308 * if TRUE there will atleast one more message from the guest.
309 *
310 * reservedz: Reserved; must be set to zero.
311 *
312 * range_count: The number of ranges in the range array.
313 *
314 * range_array: An array of page ranges returned to the host.
315 *
316 */
317
318struct dm_unballoon_request {
319 struct dm_header hdr;
320 __u32 more_pages:1;
321 __u32 reservedz:31;
322 __u32 range_count;
323 union dm_mem_page_range range_array[];
324} __packed;
325
326/*
327 * Un-balloon response message; this message is sent from the guest
328 * to the host in response to an unballoon request.
329 *
330 */
331
332struct dm_unballoon_response {
333 struct dm_header hdr;
334} __packed;
335
336
337/*
338 * Hot add request message. Message sent from the host to the guest.
339 *
340 * mem_range: Memory range to hot add.
341 *
342 * On Linux we currently don't support this since we cannot hot add
343 * arbitrary granularity of memory.
344 */
345
346struct dm_hot_add {
347 struct dm_header hdr;
348 union dm_mem_page_range range;
349} __packed;
350
351/*
352 * Hot add response message.
353 * This message is sent by the guest to report the status of a hot add request.
354 * If page_count is less than the requested page count, then the host should
355 * assume all further hot add requests will fail, since this indicates that
356 * the guest has hit an upper physical memory barrier.
357 *
358 * Hot adds may also fail due to low resources; in this case, the guest must
359 * not complete this message until the hot add can succeed, and the host must
360 * not send a new hot add request until the response is sent.
361 * If VSC fails to hot add memory DYNMEM_NUMBER_OF_UNSUCCESSFUL_HOTADD_ATTEMPTS
362 * times it fails the request.
363 *
364 *
365 * page_count: number of pages that were successfully hot added.
366 *
367 * result: result of the operation 1: success, 0: failure.
368 *
369 */
370
371struct dm_hot_add_response {
372 struct dm_header hdr;
373 __u32 page_count;
374 __u32 result;
375} __packed;
376
377/*
378 * Types of information sent from host to the guest.
379 */
380
381enum dm_info_type {
382 INFO_TYPE_MAX_PAGE_CNT = 0,
383 MAX_INFO_TYPE
384};
385
386
387/*
388 * Header for the information message.
389 */
390
391struct dm_info_header {
392 enum dm_info_type type;
393 __u32 data_size;
394} __packed;
395
396/*
397 * This message is sent from the host to the guest to pass
398 * some relevant information (win8 addition).
399 *
400 * reserved: no used.
401 * info_size: size of the information blob.
402 * info: information blob.
403 */
404
405struct dm_info_msg {
406 struct dm_info_header header;
407 __u32 reserved;
408 __u32 info_size;
409 __u8 info[];
410};
411
412/*
413 * End protocol definitions.
414 */
415
416static bool hot_add;
417static bool do_hot_add;
418
419module_param(hot_add, bool, (S_IRUGO | S_IWUSR));
420MODULE_PARM_DESC(hot_add, "If set attempt memory hot_add");
421
422static atomic_t trans_id = ATOMIC_INIT(0);
423
424static int dm_ring_size = (5 * PAGE_SIZE);
425
426/*
427 * Driver specific state.
428 */
429
430enum hv_dm_state {
431 DM_INITIALIZING = 0,
432 DM_INITIALIZED,
433 DM_BALLOON_UP,
434 DM_BALLOON_DOWN,
435 DM_HOT_ADD,
436 DM_INIT_ERROR
437};
438
439
440static __u8 recv_buffer[PAGE_SIZE];
441static __u8 *send_buffer;
442#define PAGES_IN_2M 512
443
444struct hv_dynmem_device {
445 struct hv_device *dev;
446 enum hv_dm_state state;
447 struct completion host_event;
448 struct completion config_event;
449
450 /*
451 * Number of pages we have currently ballooned out.
452 */
453 unsigned int num_pages_ballooned;
454
455 /*
456 * This thread handles both balloon/hot-add
457 * requests from the host as well as notifying
458 * the host with regards to memory pressure in
459 * the guest.
460 */
461 struct task_struct *thread;
462
463 /*
464 * We start with the highest version we can support
465 * and downgrade based on the host; we save here the
466 * next version to try.
467 */
468 __u32 next_version;
469};
470
471static struct hv_dynmem_device dm_device;
472
473static void hot_add_req(struct hv_dynmem_device *dm, struct dm_hot_add *msg)
474{
475
476 struct dm_hot_add_response resp;
477
478 if (do_hot_add) {
479
480 pr_info("Memory hot add not supported\n");
481
482 /*
483 * Currently we do not support hot add.
484 * Just fail the request.
485 */
486 }
487
488 memset(&resp, 0, sizeof(struct dm_hot_add_response));
489 resp.hdr.type = DM_MEM_HOT_ADD_RESPONSE;
490 resp.hdr.size = sizeof(struct dm_hot_add_response);
491 resp.hdr.trans_id = atomic_inc_return(&trans_id);
492
493 resp.page_count = 0;
494 resp.result = 0;
495
496 dm->state = DM_INITIALIZED;
497 vmbus_sendpacket(dm->dev->channel, &resp,
498 sizeof(struct dm_hot_add_response),
499 (unsigned long)NULL,
500 VM_PKT_DATA_INBAND, 0);
501
502}
503
504static void process_info(struct hv_dynmem_device *dm, struct dm_info_msg *msg)
505{
506 switch (msg->header.type) {
507 case INFO_TYPE_MAX_PAGE_CNT:
508 pr_info("Received INFO_TYPE_MAX_PAGE_CNT\n");
509 pr_info("Data Size is %d\n", msg->header.data_size);
510 break;
511 default:
512 pr_info("Received Unknown type: %d\n", msg->header.type);
513 }
514}
515
516/*
517 * Post our status as it relates memory pressure to the
518 * host. Host expects the guests to post this status
519 * periodically at 1 second intervals.
520 *
521 * The metrics specified in this protocol are very Windows
522 * specific and so we cook up numbers here to convey our memory
523 * pressure.
524 */
525
526static void post_status(struct hv_dynmem_device *dm)
527{
528 struct dm_status status;
529
530
531 memset(&status, 0, sizeof(struct dm_status));
532 status.hdr.type = DM_STATUS_REPORT;
533 status.hdr.size = sizeof(struct dm_status);
534 status.hdr.trans_id = atomic_inc_return(&trans_id);
535
536
537 status.num_committed = vm_memory_committed();
538
539 vmbus_sendpacket(dm->dev->channel, &status,
540 sizeof(struct dm_status),
541 (unsigned long)NULL,
542 VM_PKT_DATA_INBAND, 0);
543
544}
545
546
547
548void free_balloon_pages(struct hv_dynmem_device *dm,
549 union dm_mem_page_range *range_array)
550{
551 int num_pages = range_array->finfo.page_cnt;
552 __u64 start_frame = range_array->finfo.start_page;
553 struct page *pg;
554 int i;
555
556 for (i = 0; i < num_pages; i++) {
557 pg = pfn_to_page(i + start_frame);
558 __free_page(pg);
559 dm->num_pages_ballooned--;
560 }
561}
562
563
564
565static int alloc_balloon_pages(struct hv_dynmem_device *dm, int num_pages,
566 struct dm_balloon_response *bl_resp, int alloc_unit,
567 bool *alloc_error)
568{
569 int i = 0;
570 struct page *pg;
571
572 if (num_pages < alloc_unit)
573 return 0;
574
575 for (i = 0; (i * alloc_unit) < num_pages; i++) {
576 if (bl_resp->hdr.size + sizeof(union dm_mem_page_range) >
577 PAGE_SIZE)
578 return i * alloc_unit;
579
580 /*
581 * We execute this code in a thread context. Furthermore,
582 * we don't want the kernel to try too hard.
583 */
584 pg = alloc_pages(GFP_HIGHUSER | __GFP_NORETRY |
585 __GFP_NOMEMALLOC | __GFP_NOWARN,
586 get_order(alloc_unit << PAGE_SHIFT));
587
588 if (!pg) {
589 *alloc_error = true;
590 return i * alloc_unit;
591 }
592
593
594 dm->num_pages_ballooned += alloc_unit;
595
596 bl_resp->range_count++;
597 bl_resp->range_array[i].finfo.start_page =
598 page_to_pfn(pg);
599 bl_resp->range_array[i].finfo.page_cnt = alloc_unit;
600 bl_resp->hdr.size += sizeof(union dm_mem_page_range);
601
602 }
603
604 return num_pages;
605}
606
607
608
609static void balloon_up(struct hv_dynmem_device *dm, struct dm_balloon *req)
610{
611 int num_pages = req->num_pages;
612 int num_ballooned = 0;
613 struct dm_balloon_response *bl_resp;
614 int alloc_unit;
615 int ret;
616 bool alloc_error = false;
617 bool done = false;
618 int i;
619
620
621 /*
622 * Currently, we only support 4k allocations.
623 */
624 alloc_unit = 1;
625
626 while (!done) {
627 bl_resp = (struct dm_balloon_response *)send_buffer;
628 memset(send_buffer, 0, PAGE_SIZE);
629 bl_resp->hdr.type = DM_BALLOON_RESPONSE;
630 bl_resp->hdr.trans_id = atomic_inc_return(&trans_id);
631 bl_resp->hdr.size = sizeof(struct dm_balloon_response);
632 bl_resp->more_pages = 1;
633
634
635 num_pages -= num_ballooned;
636 num_ballooned = alloc_balloon_pages(dm, num_pages,
637 bl_resp, alloc_unit,
638 &alloc_error);
639
640 if ((alloc_error) || (num_ballooned == num_pages)) {
641 bl_resp->more_pages = 0;
642 done = true;
643 dm->state = DM_INITIALIZED;
644 }
645
646 /*
647 * We are pushing a lot of data through the channel;
648 * deal with transient failures caused because of the
649 * lack of space in the ring buffer.
650 */
651
652 do {
653 ret = vmbus_sendpacket(dm_device.dev->channel,
654 bl_resp,
655 bl_resp->hdr.size,
656 (unsigned long)NULL,
657 VM_PKT_DATA_INBAND, 0);
658
659 if (ret == -EAGAIN)
660 msleep(20);
661
662 } while (ret == -EAGAIN);
663
664 if (ret) {
665 /*
666 * Free up the memory we allocatted.
667 */
668 pr_info("Balloon response failed\n");
669
670 for (i = 0; i < bl_resp->range_count; i++)
671 free_balloon_pages(dm,
672 &bl_resp->range_array[i]);
673
674 done = true;
675 }
676 }
677
678}
679
680static void balloon_down(struct hv_dynmem_device *dm,
681 struct dm_unballoon_request *req)
682{
683 union dm_mem_page_range *range_array = req->range_array;
684 int range_count = req->range_count;
685 struct dm_unballoon_response resp;
686 int i;
687
688 for (i = 0; i < range_count; i++)
689 free_balloon_pages(dm, &range_array[i]);
690
691 if (req->more_pages == 1)
692 return;
693
694 memset(&resp, 0, sizeof(struct dm_unballoon_response));
695 resp.hdr.type = DM_UNBALLOON_RESPONSE;
696 resp.hdr.trans_id = atomic_inc_return(&trans_id);
697 resp.hdr.size = sizeof(struct dm_unballoon_response);
698
699 vmbus_sendpacket(dm_device.dev->channel, &resp,
700 sizeof(struct dm_unballoon_response),
701 (unsigned long)NULL,
702 VM_PKT_DATA_INBAND, 0);
703
704 dm->state = DM_INITIALIZED;
705}
706
707static void balloon_onchannelcallback(void *context);
708
709static int dm_thread_func(void *dm_dev)
710{
711 struct hv_dynmem_device *dm = dm_dev;
712 int t;
713 unsigned long scan_start;
714
715 while (!kthread_should_stop()) {
716 t = wait_for_completion_timeout(&dm_device.config_event, 1*HZ);
717 /*
718 * The host expects us to post information on the memory
719 * pressure every second.
720 */
721
722 if (t == 0)
723 post_status(dm);
724
725 scan_start = jiffies;
726 switch (dm->state) {
727 case DM_BALLOON_UP:
728 balloon_up(dm, (struct dm_balloon *)recv_buffer);
729 break;
730
731 case DM_HOT_ADD:
732 hot_add_req(dm, (struct dm_hot_add *)recv_buffer);
733 break;
734 default:
735 break;
736 }
737
738 if (!time_in_range(jiffies, scan_start, scan_start + HZ))
739 post_status(dm);
740
741 }
742
743 return 0;
744}
745
746
747static void version_resp(struct hv_dynmem_device *dm,
748 struct dm_version_response *vresp)
749{
750 struct dm_version_request version_req;
751 int ret;
752
753 if (vresp->is_accepted) {
754 /*
755 * We are done; wakeup the
756 * context waiting for version
757 * negotiation.
758 */
759 complete(&dm->host_event);
760 return;
761 }
762 /*
763 * If there are more versions to try, continue
764 * with negotiations; if not
765 * shutdown the service since we are not able
766 * to negotiate a suitable version number
767 * with the host.
768 */
769 if (dm->next_version == 0)
770 goto version_error;
771
772 dm->next_version = 0;
773 memset(&version_req, 0, sizeof(struct dm_version_request));
774 version_req.hdr.type = DM_VERSION_REQUEST;
775 version_req.hdr.size = sizeof(struct dm_version_request);
776 version_req.hdr.trans_id = atomic_inc_return(&trans_id);
777 version_req.version.version = DYNMEM_PROTOCOL_VERSION_WIN7;
778 version_req.is_last_attempt = 1;
779
780 ret = vmbus_sendpacket(dm->dev->channel, &version_req,
781 sizeof(struct dm_version_request),
782 (unsigned long)NULL,
783 VM_PKT_DATA_INBAND, 0);
784
785 if (ret)
786 goto version_error;
787
788 return;
789
790version_error:
791 dm->state = DM_INIT_ERROR;
792 complete(&dm->host_event);
793}
794
795static void cap_resp(struct hv_dynmem_device *dm,
796 struct dm_capabilities_resp_msg *cap_resp)
797{
798 if (!cap_resp->is_accepted) {
799 pr_info("Capabilities not accepted by host\n");
800 dm->state = DM_INIT_ERROR;
801 }
802 complete(&dm->host_event);
803}
804
805static void balloon_onchannelcallback(void *context)
806{
807 struct hv_device *dev = context;
808 u32 recvlen;
809 u64 requestid;
810 struct dm_message *dm_msg;
811 struct dm_header *dm_hdr;
812 struct hv_dynmem_device *dm = hv_get_drvdata(dev);
813
814 memset(recv_buffer, 0, sizeof(recv_buffer));
815 vmbus_recvpacket(dev->channel, recv_buffer,
816 PAGE_SIZE, &recvlen, &requestid);
817
818 if (recvlen > 0) {
819 dm_msg = (struct dm_message *)recv_buffer;
820 dm_hdr = &dm_msg->hdr;
821
822 switch (dm_hdr->type) {
823 case DM_VERSION_RESPONSE:
824 version_resp(dm,
825 (struct dm_version_response *)dm_msg);
826 break;
827
828 case DM_CAPABILITIES_RESPONSE:
829 cap_resp(dm,
830 (struct dm_capabilities_resp_msg *)dm_msg);
831 break;
832
833 case DM_BALLOON_REQUEST:
834 dm->state = DM_BALLOON_UP;
835 complete(&dm->config_event);
836 break;
837
838 case DM_UNBALLOON_REQUEST:
839 dm->state = DM_BALLOON_DOWN;
840 balloon_down(dm,
841 (struct dm_unballoon_request *)recv_buffer);
842 break;
843
844 case DM_MEM_HOT_ADD_REQUEST:
845 dm->state = DM_HOT_ADD;
846 complete(&dm->config_event);
847 break;
848
849 case DM_INFO_MESSAGE:
850 process_info(dm, (struct dm_info_msg *)dm_msg);
851 break;
852
853 default:
854 pr_err("Unhandled message: type: %d\n", dm_hdr->type);
855
856 }
857 }
858
859}
860
861static int balloon_probe(struct hv_device *dev,
862 const struct hv_vmbus_device_id *dev_id)
863{
864 int ret, t;
865 struct dm_version_request version_req;
866 struct dm_capabilities cap_msg;
867
868 do_hot_add = hot_add;
869
870 /*
871 * First allocate a send buffer.
872 */
873
874 send_buffer = kmalloc(PAGE_SIZE, GFP_KERNEL);
875 if (!send_buffer)
876 return -ENOMEM;
877
878 ret = vmbus_open(dev->channel, dm_ring_size, dm_ring_size, NULL, 0,
879 balloon_onchannelcallback, dev);
880
881 if (ret)
882 return ret;
883
884 dm_device.dev = dev;
885 dm_device.state = DM_INITIALIZING;
886 dm_device.next_version = DYNMEM_PROTOCOL_VERSION_WIN7;
887 init_completion(&dm_device.host_event);
888 init_completion(&dm_device.config_event);
889
890 dm_device.thread =
891 kthread_run(dm_thread_func, &dm_device, "hv_balloon");
892 if (IS_ERR(dm_device.thread)) {
893 ret = PTR_ERR(dm_device.thread);
894 goto probe_error0;
895 }
896
897 hv_set_drvdata(dev, &dm_device);
898 /*
899 * Initiate the hand shake with the host and negotiate
900 * a version that the host can support. We start with the
901 * highest version number and go down if the host cannot
902 * support it.
903 */
904 memset(&version_req, 0, sizeof(struct dm_version_request));
905 version_req.hdr.type = DM_VERSION_REQUEST;
906 version_req.hdr.size = sizeof(struct dm_version_request);
907 version_req.hdr.trans_id = atomic_inc_return(&trans_id);
908 version_req.version.version = DYNMEM_PROTOCOL_VERSION_WIN8;
909 version_req.is_last_attempt = 0;
910
911 ret = vmbus_sendpacket(dev->channel, &version_req,
912 sizeof(struct dm_version_request),
913 (unsigned long)NULL,
914 VM_PKT_DATA_INBAND,
915 VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
916 if (ret)
917 goto probe_error1;
918
919 t = wait_for_completion_timeout(&dm_device.host_event, 5*HZ);
920 if (t == 0) {
921 ret = -ETIMEDOUT;
922 goto probe_error1;
923 }
924
925 /*
926 * If we could not negotiate a compatible version with the host
927 * fail the probe function.
928 */
929 if (dm_device.state == DM_INIT_ERROR) {
930 ret = -ETIMEDOUT;
931 goto probe_error1;
932 }
933 /*
934 * Now submit our capabilities to the host.
935 */
936 memset(&cap_msg, 0, sizeof(struct dm_capabilities));
937 cap_msg.hdr.type = DM_CAPABILITIES_REPORT;
938 cap_msg.hdr.size = sizeof(struct dm_capabilities);
939 cap_msg.hdr.trans_id = atomic_inc_return(&trans_id);
940
941 cap_msg.caps.cap_bits.balloon = 1;
942 /*
943 * While we currently don't support hot-add,
944 * we still advertise this capability since the
945 * host requires that guests partcipating in the
946 * dynamic memory protocol support hot add.
947 */
948 cap_msg.caps.cap_bits.hot_add = 1;
949
950 /*
951 * Currently the host does not use these
952 * values and we set them to what is done in the
953 * Windows driver.
954 */
955 cap_msg.min_page_cnt = 0;
956 cap_msg.max_page_number = -1;
957
958 ret = vmbus_sendpacket(dev->channel, &cap_msg,
959 sizeof(struct dm_capabilities),
960 (unsigned long)NULL,
961 VM_PKT_DATA_INBAND,
962 VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
963 if (ret)
964 goto probe_error1;
965
966 t = wait_for_completion_timeout(&dm_device.host_event, 5*HZ);
967 if (t == 0) {
968 ret = -ETIMEDOUT;
969 goto probe_error1;
970 }
971
972 /*
973 * If the host does not like our capabilities,
974 * fail the probe function.
975 */
976 if (dm_device.state == DM_INIT_ERROR) {
977 ret = -ETIMEDOUT;
978 goto probe_error1;
979 }
980
981 dm_device.state = DM_INITIALIZED;
982
983 return 0;
984
985probe_error1:
986 kthread_stop(dm_device.thread);
987
988probe_error0:
989 vmbus_close(dev->channel);
990 return ret;
991}
992
993static int balloon_remove(struct hv_device *dev)
994{
995 struct hv_dynmem_device *dm = hv_get_drvdata(dev);
996
997 if (dm->num_pages_ballooned != 0)
998 pr_warn("Ballooned pages: %d\n", dm->num_pages_ballooned);
999
1000 vmbus_close(dev->channel);
1001 kthread_stop(dm->thread);
1002
1003 return 0;
1004}
1005
1006static const struct hv_vmbus_device_id id_table[] = {
1007 /* Dynamic Memory Class ID */
1008 /* 525074DC-8985-46e2-8057-A307DC18A502 */
1009 { VMBUS_DEVICE(0xdc, 0x74, 0x50, 0X52, 0x85, 0x89, 0xe2, 0x46,
1010 0x80, 0x57, 0xa3, 0x07, 0xdc, 0x18, 0xa5, 0x02)
1011 },
1012 { },
1013};
1014
1015MODULE_DEVICE_TABLE(vmbus, id_table);
1016
1017static struct hv_driver balloon_drv = {
1018 .name = "hv_balloon",
1019 .id_table = id_table,
1020 .probe = balloon_probe,
1021 .remove = balloon_remove,
1022};
1023
1024static int __init init_balloon_drv(void)
1025{
1026
1027 return vmbus_driver_register(&balloon_drv);
1028}
1029
1030static void exit_balloon_drv(void)
1031{
1032
1033 vmbus_driver_unregister(&balloon_drv);
1034}
1035
1036module_init(init_balloon_drv);
1037module_exit(exit_balloon_drv);
1038
1039MODULE_DESCRIPTION("Hyper-V Balloon");
1040MODULE_VERSION(HV_DRV_VERSION);
1041MODULE_LICENSE("GPL");