aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/hv/hv_balloon.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/hv/hv_balloon.c')
-rw-r--r--drivers/hv/hv_balloon.c544
1 files changed, 488 insertions, 56 deletions
diff --git a/drivers/hv/hv_balloon.c b/drivers/hv/hv_balloon.c
index 37873213e24f..4c605c70ebf9 100644
--- a/drivers/hv/hv_balloon.c
+++ b/drivers/hv/hv_balloon.c
@@ -117,7 +117,14 @@ union dm_caps {
117 struct { 117 struct {
118 __u64 balloon:1; 118 __u64 balloon:1;
119 __u64 hot_add:1; 119 __u64 hot_add:1;
120 __u64 reservedz:62; 120 /*
121 * To support guests that may have alignment
122 * limitations on hot-add, the guest can specify
123 * its alignment requirements; a value of n
124 * represents an alignment of 2^n in mega bytes.
125 */
126 __u64 hot_add_alignment:4;
127 __u64 reservedz:58;
121 } cap_bits; 128 } cap_bits;
122 __u64 caps; 129 __u64 caps;
123} __packed; 130} __packed;
@@ -412,13 +419,45 @@ struct dm_info_msg {
412 * End protocol definitions. 419 * End protocol definitions.
413 */ 420 */
414 421
415static bool hot_add; 422/*
423 * State to manage hot adding memory into the guest.
424 * The range start_pfn : end_pfn specifies the range
425 * that the host has asked us to hot add. The range
426 * start_pfn : ha_end_pfn specifies the range that we have
427 * currently hot added. We hot add in multiples of 128M
428 * chunks; it is possible that we may not be able to bring
429 * online all the pages in the region. The range
430 * covered_start_pfn : covered_end_pfn defines the pages that can
431 * be brough online.
432 */
433
434struct hv_hotadd_state {
435 struct list_head list;
436 unsigned long start_pfn;
437 unsigned long covered_start_pfn;
438 unsigned long covered_end_pfn;
439 unsigned long ha_end_pfn;
440 unsigned long end_pfn;
441};
442
443struct balloon_state {
444 __u32 num_pages;
445 struct work_struct wrk;
446};
447
448struct hot_add_wrk {
449 union dm_mem_page_range ha_page_range;
450 union dm_mem_page_range ha_region_range;
451 struct work_struct wrk;
452};
453
454static bool hot_add = true;
416static bool do_hot_add; 455static bool do_hot_add;
417/* 456/*
418 * Delay reporting memory pressure by 457 * Delay reporting memory pressure by
419 * the specified number of seconds. 458 * the specified number of seconds.
420 */ 459 */
421static uint pressure_report_delay = 30; 460static uint pressure_report_delay = 45;
422 461
423module_param(hot_add, bool, (S_IRUGO | S_IWUSR)); 462module_param(hot_add, bool, (S_IRUGO | S_IWUSR));
424MODULE_PARM_DESC(hot_add, "If set attempt memory hot_add"); 463MODULE_PARM_DESC(hot_add, "If set attempt memory hot_add");
@@ -446,6 +485,7 @@ enum hv_dm_state {
446static __u8 recv_buffer[PAGE_SIZE]; 485static __u8 recv_buffer[PAGE_SIZE];
447static __u8 *send_buffer; 486static __u8 *send_buffer;
448#define PAGES_IN_2M 512 487#define PAGES_IN_2M 512
488#define HA_CHUNK (32 * 1024)
449 489
450struct hv_dynmem_device { 490struct hv_dynmem_device {
451 struct hv_device *dev; 491 struct hv_device *dev;
@@ -459,7 +499,28 @@ struct hv_dynmem_device {
459 unsigned int num_pages_ballooned; 499 unsigned int num_pages_ballooned;
460 500
461 /* 501 /*
462 * This thread handles both balloon/hot-add 502 * State to manage the ballooning (up) operation.
503 */
504 struct balloon_state balloon_wrk;
505
506 /*
507 * State to execute the "hot-add" operation.
508 */
509 struct hot_add_wrk ha_wrk;
510
511 /*
512 * This state tracks if the host has specified a hot-add
513 * region.
514 */
515 bool host_specified_ha_region;
516
517 /*
518 * State to synchronize hot-add.
519 */
520 struct completion ol_waitevent;
521 bool ha_waiting;
522 /*
523 * This thread handles hot-add
463 * requests from the host as well as notifying 524 * requests from the host as well as notifying
464 * the host with regards to memory pressure in 525 * the host with regards to memory pressure in
465 * the guest. 526 * the guest.
@@ -467,6 +528,11 @@ struct hv_dynmem_device {
467 struct task_struct *thread; 528 struct task_struct *thread;
468 529
469 /* 530 /*
531 * A list of hot-add regions.
532 */
533 struct list_head ha_region_list;
534
535 /*
470 * We start with the highest version we can support 536 * We start with the highest version we can support
471 * and downgrade based on the host; we save here the 537 * and downgrade based on the host; we save here the
472 * next version to try. 538 * next version to try.
@@ -476,35 +542,358 @@ struct hv_dynmem_device {
476 542
477static struct hv_dynmem_device dm_device; 543static struct hv_dynmem_device dm_device;
478 544
479static void hot_add_req(struct hv_dynmem_device *dm, struct dm_hot_add *msg) 545#ifdef CONFIG_MEMORY_HOTPLUG
546
547static void hv_bring_pgs_online(unsigned long start_pfn, unsigned long size)
480{ 548{
549 int i;
481 550
482 struct dm_hot_add_response resp; 551 for (i = 0; i < size; i++) {
552 struct page *pg;
553 pg = pfn_to_page(start_pfn + i);
554 __online_page_set_limits(pg);
555 __online_page_increment_counters(pg);
556 __online_page_free(pg);
557 }
558}
483 559
484 if (do_hot_add) { 560static void hv_mem_hot_add(unsigned long start, unsigned long size,
561 unsigned long pfn_count,
562 struct hv_hotadd_state *has)
563{
564 int ret = 0;
565 int i, nid, t;
566 unsigned long start_pfn;
567 unsigned long processed_pfn;
568 unsigned long total_pfn = pfn_count;
569
570 for (i = 0; i < (size/HA_CHUNK); i++) {
571 start_pfn = start + (i * HA_CHUNK);
572 has->ha_end_pfn += HA_CHUNK;
573
574 if (total_pfn > HA_CHUNK) {
575 processed_pfn = HA_CHUNK;
576 total_pfn -= HA_CHUNK;
577 } else {
578 processed_pfn = total_pfn;
579 total_pfn = 0;
580 }
581
582 has->covered_end_pfn += processed_pfn;
583
584 init_completion(&dm_device.ol_waitevent);
585 dm_device.ha_waiting = true;
586
587 nid = memory_add_physaddr_to_nid(PFN_PHYS(start_pfn));
588 ret = add_memory(nid, PFN_PHYS((start_pfn)),
589 (HA_CHUNK << PAGE_SHIFT));
485 590
486 pr_info("Memory hot add not supported\n"); 591 if (ret) {
592 pr_info("hot_add memory failed error is %d\n", ret);
593 if (ret == -EEXIST) {
594 /*
595 * This error indicates that the error
596 * is not a transient failure. This is the
597 * case where the guest's physical address map
598 * precludes hot adding memory. Stop all further
599 * memory hot-add.
600 */
601 do_hot_add = false;
602 }
603 has->ha_end_pfn -= HA_CHUNK;
604 has->covered_end_pfn -= processed_pfn;
605 break;
606 }
487 607
488 /* 608 /*
489 * Currently we do not support hot add. 609 * Wait for the memory block to be onlined.
490 * Just fail the request.
491 */ 610 */
611 t = wait_for_completion_timeout(&dm_device.ol_waitevent, 5*HZ);
612 if (t == 0) {
613 pr_info("hot_add memory timedout\n");
614 has->ha_end_pfn -= HA_CHUNK;
615 has->covered_end_pfn -= processed_pfn;
616 break;
617 }
618
619 }
620
621 return;
622}
623
624static void hv_online_page(struct page *pg)
625{
626 struct list_head *cur;
627 struct hv_hotadd_state *has;
628 unsigned long cur_start_pgp;
629 unsigned long cur_end_pgp;
630
631 if (dm_device.ha_waiting) {
632 dm_device.ha_waiting = false;
633 complete(&dm_device.ol_waitevent);
634 }
635
636 list_for_each(cur, &dm_device.ha_region_list) {
637 has = list_entry(cur, struct hv_hotadd_state, list);
638 cur_start_pgp = (unsigned long)
639 pfn_to_page(has->covered_start_pfn);
640 cur_end_pgp = (unsigned long)pfn_to_page(has->covered_end_pfn);
641
642 if (((unsigned long)pg >= cur_start_pgp) &&
643 ((unsigned long)pg < cur_end_pgp)) {
644 /*
645 * This frame is currently backed; online the
646 * page.
647 */
648 __online_page_set_limits(pg);
649 __online_page_increment_counters(pg);
650 __online_page_free(pg);
651 has->covered_start_pfn++;
652 }
653 }
654}
655
656static bool pfn_covered(unsigned long start_pfn, unsigned long pfn_cnt)
657{
658 struct list_head *cur;
659 struct hv_hotadd_state *has;
660 unsigned long residual, new_inc;
661
662 if (list_empty(&dm_device.ha_region_list))
663 return false;
664
665 list_for_each(cur, &dm_device.ha_region_list) {
666 has = list_entry(cur, struct hv_hotadd_state, list);
667
668 /*
669 * If the pfn range we are dealing with is not in the current
670 * "hot add block", move on.
671 */
672 if ((start_pfn >= has->end_pfn))
673 continue;
674 /*
675 * If the current hot add-request extends beyond
676 * our current limit; extend it.
677 */
678 if ((start_pfn + pfn_cnt) > has->end_pfn) {
679 residual = (start_pfn + pfn_cnt - has->end_pfn);
680 /*
681 * Extend the region by multiples of HA_CHUNK.
682 */
683 new_inc = (residual / HA_CHUNK) * HA_CHUNK;
684 if (residual % HA_CHUNK)
685 new_inc += HA_CHUNK;
686
687 has->end_pfn += new_inc;
688 }
689
690 /*
691 * If the current start pfn is not where the covered_end
692 * is, update it.
693 */
694
695 if (has->covered_end_pfn != start_pfn) {
696 has->covered_end_pfn = start_pfn;
697 has->covered_start_pfn = start_pfn;
698 }
699 return true;
700
492 } 701 }
493 702
703 return false;
704}
705
706static unsigned long handle_pg_range(unsigned long pg_start,
707 unsigned long pg_count)
708{
709 unsigned long start_pfn = pg_start;
710 unsigned long pfn_cnt = pg_count;
711 unsigned long size;
712 struct list_head *cur;
713 struct hv_hotadd_state *has;
714 unsigned long pgs_ol = 0;
715 unsigned long old_covered_state;
716
717 if (list_empty(&dm_device.ha_region_list))
718 return 0;
719
720 list_for_each(cur, &dm_device.ha_region_list) {
721 has = list_entry(cur, struct hv_hotadd_state, list);
722
723 /*
724 * If the pfn range we are dealing with is not in the current
725 * "hot add block", move on.
726 */
727 if ((start_pfn >= has->end_pfn))
728 continue;
729
730 old_covered_state = has->covered_end_pfn;
731
732 if (start_pfn < has->ha_end_pfn) {
733 /*
734 * This is the case where we are backing pages
735 * in an already hot added region. Bring
736 * these pages online first.
737 */
738 pgs_ol = has->ha_end_pfn - start_pfn;
739 if (pgs_ol > pfn_cnt)
740 pgs_ol = pfn_cnt;
741 hv_bring_pgs_online(start_pfn, pgs_ol);
742 has->covered_end_pfn += pgs_ol;
743 has->covered_start_pfn += pgs_ol;
744 pfn_cnt -= pgs_ol;
745 }
746
747 if ((has->ha_end_pfn < has->end_pfn) && (pfn_cnt > 0)) {
748 /*
749 * We have some residual hot add range
750 * that needs to be hot added; hot add
751 * it now. Hot add a multiple of
752 * of HA_CHUNK that fully covers the pages
753 * we have.
754 */
755 size = (has->end_pfn - has->ha_end_pfn);
756 if (pfn_cnt <= size) {
757 size = ((pfn_cnt / HA_CHUNK) * HA_CHUNK);
758 if (pfn_cnt % HA_CHUNK)
759 size += HA_CHUNK;
760 } else {
761 pfn_cnt = size;
762 }
763 hv_mem_hot_add(has->ha_end_pfn, size, pfn_cnt, has);
764 }
765 /*
766 * If we managed to online any pages that were given to us,
767 * we declare success.
768 */
769 return has->covered_end_pfn - old_covered_state;
770
771 }
772
773 return 0;
774}
775
776static unsigned long process_hot_add(unsigned long pg_start,
777 unsigned long pfn_cnt,
778 unsigned long rg_start,
779 unsigned long rg_size)
780{
781 struct hv_hotadd_state *ha_region = NULL;
782
783 if (pfn_cnt == 0)
784 return 0;
785
786 if (!dm_device.host_specified_ha_region)
787 if (pfn_covered(pg_start, pfn_cnt))
788 goto do_pg_range;
789
790 /*
791 * If the host has specified a hot-add range; deal with it first.
792 */
793
794 if (rg_size != 0) {
795 ha_region = kzalloc(sizeof(struct hv_hotadd_state), GFP_KERNEL);
796 if (!ha_region)
797 return 0;
798
799 INIT_LIST_HEAD(&ha_region->list);
800
801 list_add_tail(&ha_region->list, &dm_device.ha_region_list);
802 ha_region->start_pfn = rg_start;
803 ha_region->ha_end_pfn = rg_start;
804 ha_region->covered_start_pfn = pg_start;
805 ha_region->covered_end_pfn = pg_start;
806 ha_region->end_pfn = rg_start + rg_size;
807 }
808
809do_pg_range:
810 /*
811 * Process the page range specified; bringing them
812 * online if possible.
813 */
814 return handle_pg_range(pg_start, pfn_cnt);
815}
816
817#endif
818
819static void hot_add_req(struct work_struct *dummy)
820{
821 struct dm_hot_add_response resp;
822#ifdef CONFIG_MEMORY_HOTPLUG
823 unsigned long pg_start, pfn_cnt;
824 unsigned long rg_start, rg_sz;
825#endif
826 struct hv_dynmem_device *dm = &dm_device;
827
494 memset(&resp, 0, sizeof(struct dm_hot_add_response)); 828 memset(&resp, 0, sizeof(struct dm_hot_add_response));
495 resp.hdr.type = DM_MEM_HOT_ADD_RESPONSE; 829 resp.hdr.type = DM_MEM_HOT_ADD_RESPONSE;
496 resp.hdr.size = sizeof(struct dm_hot_add_response); 830 resp.hdr.size = sizeof(struct dm_hot_add_response);
497 resp.hdr.trans_id = atomic_inc_return(&trans_id); 831 resp.hdr.trans_id = atomic_inc_return(&trans_id);
498 832
499 resp.page_count = 0; 833#ifdef CONFIG_MEMORY_HOTPLUG
500 resp.result = 0; 834 pg_start = dm->ha_wrk.ha_page_range.finfo.start_page;
835 pfn_cnt = dm->ha_wrk.ha_page_range.finfo.page_cnt;
836
837 rg_start = dm->ha_wrk.ha_region_range.finfo.start_page;
838 rg_sz = dm->ha_wrk.ha_region_range.finfo.page_cnt;
839
840 if ((rg_start == 0) && (!dm->host_specified_ha_region)) {
841 unsigned long region_size;
842 unsigned long region_start;
843
844 /*
845 * The host has not specified the hot-add region.
846 * Based on the hot-add page range being specified,
847 * compute a hot-add region that can cover the pages
848 * that need to be hot-added while ensuring the alignment
849 * and size requirements of Linux as it relates to hot-add.
850 */
851 region_start = pg_start;
852 region_size = (pfn_cnt / HA_CHUNK) * HA_CHUNK;
853 if (pfn_cnt % HA_CHUNK)
854 region_size += HA_CHUNK;
855
856 region_start = (pg_start / HA_CHUNK) * HA_CHUNK;
857
858 rg_start = region_start;
859 rg_sz = region_size;
860 }
861
862 if (do_hot_add)
863 resp.page_count = process_hot_add(pg_start, pfn_cnt,
864 rg_start, rg_sz);
865#endif
866 /*
867 * The result field of the response structure has the
868 * following semantics:
869 *
870 * 1. If all or some pages hot-added: Guest should return success.
871 *
872 * 2. If no pages could be hot-added:
873 *
874 * If the guest returns success, then the host
875 * will not attempt any further hot-add operations. This
876 * signifies a permanent failure.
877 *
878 * If the guest returns failure, then this failure will be
879 * treated as a transient failure and the host may retry the
880 * hot-add operation after some delay.
881 */
882 if (resp.page_count > 0)
883 resp.result = 1;
884 else if (!do_hot_add)
885 resp.result = 1;
886 else
887 resp.result = 0;
888
889 if (!do_hot_add || (resp.page_count == 0))
890 pr_info("Memory hot add failed\n");
501 891
502 dm->state = DM_INITIALIZED; 892 dm->state = DM_INITIALIZED;
503 vmbus_sendpacket(dm->dev->channel, &resp, 893 vmbus_sendpacket(dm->dev->channel, &resp,
504 sizeof(struct dm_hot_add_response), 894 sizeof(struct dm_hot_add_response),
505 (unsigned long)NULL, 895 (unsigned long)NULL,
506 VM_PKT_DATA_INBAND, 0); 896 VM_PKT_DATA_INBAND, 0);
507
508} 897}
509 898
510static void process_info(struct hv_dynmem_device *dm, struct dm_info_msg *msg) 899static void process_info(struct hv_dynmem_device *dm, struct dm_info_msg *msg)
@@ -523,7 +912,7 @@ static void process_info(struct hv_dynmem_device *dm, struct dm_info_msg *msg)
523 } 912 }
524} 913}
525 914
526unsigned long compute_balloon_floor(void) 915static unsigned long compute_balloon_floor(void)
527{ 916{
528 unsigned long min_pages; 917 unsigned long min_pages;
529#define MB2PAGES(mb) ((mb) << (20 - PAGE_SHIFT)) 918#define MB2PAGES(mb) ((mb) << (20 - PAGE_SHIFT))
@@ -644,6 +1033,14 @@ static int alloc_balloon_pages(struct hv_dynmem_device *dm, int num_pages,
644 1033
645 dm->num_pages_ballooned += alloc_unit; 1034 dm->num_pages_ballooned += alloc_unit;
646 1035
1036 /*
1037 * If we allocatted 2M pages; split them so we
1038 * can free them in any order we get.
1039 */
1040
1041 if (alloc_unit != 1)
1042 split_page(pg, get_order(alloc_unit << PAGE_SHIFT));
1043
647 bl_resp->range_count++; 1044 bl_resp->range_count++;
648 bl_resp->range_array[i].finfo.start_page = 1045 bl_resp->range_array[i].finfo.start_page =
649 page_to_pfn(pg); 1046 page_to_pfn(pg);
@@ -657,9 +1054,9 @@ static int alloc_balloon_pages(struct hv_dynmem_device *dm, int num_pages,
657 1054
658 1055
659 1056
660static void balloon_up(struct hv_dynmem_device *dm, struct dm_balloon *req) 1057static void balloon_up(struct work_struct *dummy)
661{ 1058{
662 int num_pages = req->num_pages; 1059 int num_pages = dm_device.balloon_wrk.num_pages;
663 int num_ballooned = 0; 1060 int num_ballooned = 0;
664 struct dm_balloon_response *bl_resp; 1061 struct dm_balloon_response *bl_resp;
665 int alloc_unit; 1062 int alloc_unit;
@@ -670,9 +1067,10 @@ static void balloon_up(struct hv_dynmem_device *dm, struct dm_balloon *req)
670 1067
671 1068
672 /* 1069 /*
673 * Currently, we only support 4k allocations. 1070 * We will attempt 2M allocations. However, if we fail to
1071 * allocate 2M chunks, we will go back to 4k allocations.
674 */ 1072 */
675 alloc_unit = 1; 1073 alloc_unit = 512;
676 1074
677 while (!done) { 1075 while (!done) {
678 bl_resp = (struct dm_balloon_response *)send_buffer; 1076 bl_resp = (struct dm_balloon_response *)send_buffer;
@@ -684,14 +1082,19 @@ static void balloon_up(struct hv_dynmem_device *dm, struct dm_balloon *req)
684 1082
685 1083
686 num_pages -= num_ballooned; 1084 num_pages -= num_ballooned;
687 num_ballooned = alloc_balloon_pages(dm, num_pages, 1085 num_ballooned = alloc_balloon_pages(&dm_device, num_pages,
688 bl_resp, alloc_unit, 1086 bl_resp, alloc_unit,
689 &alloc_error); 1087 &alloc_error);
690 1088
1089 if ((alloc_error) && (alloc_unit != 1)) {
1090 alloc_unit = 1;
1091 continue;
1092 }
1093
691 if ((alloc_error) || (num_ballooned == num_pages)) { 1094 if ((alloc_error) || (num_ballooned == num_pages)) {
692 bl_resp->more_pages = 0; 1095 bl_resp->more_pages = 0;
693 done = true; 1096 done = true;
694 dm->state = DM_INITIALIZED; 1097 dm_device.state = DM_INITIALIZED;
695 } 1098 }
696 1099
697 /* 1100 /*
@@ -719,7 +1122,7 @@ static void balloon_up(struct hv_dynmem_device *dm, struct dm_balloon *req)
719 pr_info("Balloon response failed\n"); 1122 pr_info("Balloon response failed\n");
720 1123
721 for (i = 0; i < bl_resp->range_count; i++) 1124 for (i = 0; i < bl_resp->range_count; i++)
722 free_balloon_pages(dm, 1125 free_balloon_pages(&dm_device,
723 &bl_resp->range_array[i]); 1126 &bl_resp->range_array[i]);
724 1127
725 done = true; 1128 done = true;
@@ -761,7 +1164,6 @@ static int dm_thread_func(void *dm_dev)
761{ 1164{
762 struct hv_dynmem_device *dm = dm_dev; 1165 struct hv_dynmem_device *dm = dm_dev;
763 int t; 1166 int t;
764 unsigned long scan_start;
765 1167
766 while (!kthread_should_stop()) { 1168 while (!kthread_should_stop()) {
767 t = wait_for_completion_timeout(&dm_device.config_event, 1*HZ); 1169 t = wait_for_completion_timeout(&dm_device.config_event, 1*HZ);
@@ -773,22 +1175,6 @@ static int dm_thread_func(void *dm_dev)
773 if (t == 0) 1175 if (t == 0)
774 post_status(dm); 1176 post_status(dm);
775 1177
776 scan_start = jiffies;
777 switch (dm->state) {
778 case DM_BALLOON_UP:
779 balloon_up(dm, (struct dm_balloon *)recv_buffer);
780 break;
781
782 case DM_HOT_ADD:
783 hot_add_req(dm, (struct dm_hot_add *)recv_buffer);
784 break;
785 default:
786 break;
787 }
788
789 if (!time_in_range(jiffies, scan_start, scan_start + HZ))
790 post_status(dm);
791
792 } 1178 }
793 1179
794 return 0; 1180 return 0;
@@ -861,6 +1247,10 @@ static void balloon_onchannelcallback(void *context)
861 struct dm_message *dm_msg; 1247 struct dm_message *dm_msg;
862 struct dm_header *dm_hdr; 1248 struct dm_header *dm_hdr;
863 struct hv_dynmem_device *dm = hv_get_drvdata(dev); 1249 struct hv_dynmem_device *dm = hv_get_drvdata(dev);
1250 struct dm_balloon *bal_msg;
1251 struct dm_hot_add *ha_msg;
1252 union dm_mem_page_range *ha_pg_range;
1253 union dm_mem_page_range *ha_region;
864 1254
865 memset(recv_buffer, 0, sizeof(recv_buffer)); 1255 memset(recv_buffer, 0, sizeof(recv_buffer));
866 vmbus_recvpacket(dev->channel, recv_buffer, 1256 vmbus_recvpacket(dev->channel, recv_buffer,
@@ -882,8 +1272,12 @@ static void balloon_onchannelcallback(void *context)
882 break; 1272 break;
883 1273
884 case DM_BALLOON_REQUEST: 1274 case DM_BALLOON_REQUEST:
1275 if (dm->state == DM_BALLOON_UP)
1276 pr_warn("Currently ballooning\n");
1277 bal_msg = (struct dm_balloon *)recv_buffer;
885 dm->state = DM_BALLOON_UP; 1278 dm->state = DM_BALLOON_UP;
886 complete(&dm->config_event); 1279 dm_device.balloon_wrk.num_pages = bal_msg->num_pages;
1280 schedule_work(&dm_device.balloon_wrk.wrk);
887 break; 1281 break;
888 1282
889 case DM_UNBALLOON_REQUEST: 1283 case DM_UNBALLOON_REQUEST:
@@ -893,8 +1287,31 @@ static void balloon_onchannelcallback(void *context)
893 break; 1287 break;
894 1288
895 case DM_MEM_HOT_ADD_REQUEST: 1289 case DM_MEM_HOT_ADD_REQUEST:
1290 if (dm->state == DM_HOT_ADD)
1291 pr_warn("Currently hot-adding\n");
896 dm->state = DM_HOT_ADD; 1292 dm->state = DM_HOT_ADD;
897 complete(&dm->config_event); 1293 ha_msg = (struct dm_hot_add *)recv_buffer;
1294 if (ha_msg->hdr.size == sizeof(struct dm_hot_add)) {
1295 /*
1296 * This is a normal hot-add request specifying
1297 * hot-add memory.
1298 */
1299 ha_pg_range = &ha_msg->range;
1300 dm->ha_wrk.ha_page_range = *ha_pg_range;
1301 dm->ha_wrk.ha_region_range.page_range = 0;
1302 } else {
1303 /*
1304 * Host is specifying that we first hot-add
1305 * a region and then partially populate this
1306 * region.
1307 */
1308 dm->host_specified_ha_region = true;
1309 ha_pg_range = &ha_msg->range;
1310 ha_region = &ha_pg_range[1];
1311 dm->ha_wrk.ha_page_range = *ha_pg_range;
1312 dm->ha_wrk.ha_region_range = *ha_region;
1313 }
1314 schedule_work(&dm_device.ha_wrk.wrk);
898 break; 1315 break;
899 1316
900 case DM_INFO_MESSAGE: 1317 case DM_INFO_MESSAGE:
@@ -937,6 +1354,10 @@ static int balloon_probe(struct hv_device *dev,
937 dm_device.next_version = DYNMEM_PROTOCOL_VERSION_WIN7; 1354 dm_device.next_version = DYNMEM_PROTOCOL_VERSION_WIN7;
938 init_completion(&dm_device.host_event); 1355 init_completion(&dm_device.host_event);
939 init_completion(&dm_device.config_event); 1356 init_completion(&dm_device.config_event);
1357 INIT_LIST_HEAD(&dm_device.ha_region_list);
1358 INIT_WORK(&dm_device.balloon_wrk.wrk, balloon_up);
1359 INIT_WORK(&dm_device.ha_wrk.wrk, hot_add_req);
1360 dm_device.host_specified_ha_region = false;
940 1361
941 dm_device.thread = 1362 dm_device.thread =
942 kthread_run(dm_thread_func, &dm_device, "hv_balloon"); 1363 kthread_run(dm_thread_func, &dm_device, "hv_balloon");
@@ -945,6 +1366,10 @@ static int balloon_probe(struct hv_device *dev,
945 goto probe_error1; 1366 goto probe_error1;
946 } 1367 }
947 1368
1369#ifdef CONFIG_MEMORY_HOTPLUG
1370 set_online_page_callback(&hv_online_page);
1371#endif
1372
948 hv_set_drvdata(dev, &dm_device); 1373 hv_set_drvdata(dev, &dm_device);
949 /* 1374 /*
950 * Initiate the hand shake with the host and negotiate 1375 * Initiate the hand shake with the host and negotiate
@@ -962,8 +1387,7 @@ static int balloon_probe(struct hv_device *dev,
962 ret = vmbus_sendpacket(dev->channel, &version_req, 1387 ret = vmbus_sendpacket(dev->channel, &version_req,
963 sizeof(struct dm_version_request), 1388 sizeof(struct dm_version_request),
964 (unsigned long)NULL, 1389 (unsigned long)NULL,
965 VM_PKT_DATA_INBAND, 1390 VM_PKT_DATA_INBAND, 0);
966 VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
967 if (ret) 1391 if (ret)
968 goto probe_error2; 1392 goto probe_error2;
969 1393
@@ -990,13 +1414,13 @@ static int balloon_probe(struct hv_device *dev,
990 cap_msg.hdr.trans_id = atomic_inc_return(&trans_id); 1414 cap_msg.hdr.trans_id = atomic_inc_return(&trans_id);
991 1415
992 cap_msg.caps.cap_bits.balloon = 1; 1416 cap_msg.caps.cap_bits.balloon = 1;
1417 cap_msg.caps.cap_bits.hot_add = 1;
1418
993 /* 1419 /*
994 * While we currently don't support hot-add, 1420 * Specify our alignment requirements as it relates
995 * we still advertise this capability since the 1421 * memory hot-add. Specify 128MB alignment.
996 * host requires that guests partcipating in the
997 * dynamic memory protocol support hot add.
998 */ 1422 */
999 cap_msg.caps.cap_bits.hot_add = 1; 1423 cap_msg.caps.cap_bits.hot_add_alignment = 7;
1000 1424
1001 /* 1425 /*
1002 * Currently the host does not use these 1426 * Currently the host does not use these
@@ -1009,8 +1433,7 @@ static int balloon_probe(struct hv_device *dev,
1009 ret = vmbus_sendpacket(dev->channel, &cap_msg, 1433 ret = vmbus_sendpacket(dev->channel, &cap_msg,
1010 sizeof(struct dm_capabilities), 1434 sizeof(struct dm_capabilities),
1011 (unsigned long)NULL, 1435 (unsigned long)NULL,
1012 VM_PKT_DATA_INBAND, 1436 VM_PKT_DATA_INBAND, 0);
1013 VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
1014 if (ret) 1437 if (ret)
1015 goto probe_error2; 1438 goto probe_error2;
1016 1439
@@ -1034,6 +1457,9 @@ static int balloon_probe(struct hv_device *dev,
1034 return 0; 1457 return 0;
1035 1458
1036probe_error2: 1459probe_error2:
1460#ifdef CONFIG_MEMORY_HOTPLUG
1461 restore_online_page_callback(&hv_online_page);
1462#endif
1037 kthread_stop(dm_device.thread); 1463 kthread_stop(dm_device.thread);
1038 1464
1039probe_error1: 1465probe_error1:
@@ -1046,13 +1472,26 @@ probe_error0:
1046static int balloon_remove(struct hv_device *dev) 1472static int balloon_remove(struct hv_device *dev)
1047{ 1473{
1048 struct hv_dynmem_device *dm = hv_get_drvdata(dev); 1474 struct hv_dynmem_device *dm = hv_get_drvdata(dev);
1475 struct list_head *cur, *tmp;
1476 struct hv_hotadd_state *has;
1049 1477
1050 if (dm->num_pages_ballooned != 0) 1478 if (dm->num_pages_ballooned != 0)
1051 pr_warn("Ballooned pages: %d\n", dm->num_pages_ballooned); 1479 pr_warn("Ballooned pages: %d\n", dm->num_pages_ballooned);
1052 1480
1481 cancel_work_sync(&dm->balloon_wrk.wrk);
1482 cancel_work_sync(&dm->ha_wrk.wrk);
1483
1053 vmbus_close(dev->channel); 1484 vmbus_close(dev->channel);
1054 kthread_stop(dm->thread); 1485 kthread_stop(dm->thread);
1055 kfree(send_buffer); 1486 kfree(send_buffer);
1487#ifdef CONFIG_MEMORY_HOTPLUG
1488 restore_online_page_callback(&hv_online_page);
1489#endif
1490 list_for_each_safe(cur, tmp, &dm->ha_region_list) {
1491 has = list_entry(cur, struct hv_hotadd_state, list);
1492 list_del(&has->list);
1493 kfree(has);
1494 }
1056 1495
1057 return 0; 1496 return 0;
1058} 1497}
@@ -1079,14 +1518,7 @@ static int __init init_balloon_drv(void)
1079 return vmbus_driver_register(&balloon_drv); 1518 return vmbus_driver_register(&balloon_drv);
1080} 1519}
1081 1520
1082static void exit_balloon_drv(void)
1083{
1084
1085 vmbus_driver_unregister(&balloon_drv);
1086}
1087
1088module_init(init_balloon_drv); 1521module_init(init_balloon_drv);
1089module_exit(exit_balloon_drv);
1090 1522
1091MODULE_DESCRIPTION("Hyper-V Balloon"); 1523MODULE_DESCRIPTION("Hyper-V Balloon");
1092MODULE_VERSION(HV_DRV_VERSION); 1524MODULE_VERSION(HV_DRV_VERSION);