diff options
Diffstat (limited to 'drivers/hv/hv_balloon.c')
-rw-r--r-- | drivers/hv/hv_balloon.c | 544 |
1 files changed, 488 insertions, 56 deletions
diff --git a/drivers/hv/hv_balloon.c b/drivers/hv/hv_balloon.c index 37873213e24f..4c605c70ebf9 100644 --- a/drivers/hv/hv_balloon.c +++ b/drivers/hv/hv_balloon.c | |||
@@ -117,7 +117,14 @@ union dm_caps { | |||
117 | struct { | 117 | struct { |
118 | __u64 balloon:1; | 118 | __u64 balloon:1; |
119 | __u64 hot_add:1; | 119 | __u64 hot_add:1; |
120 | __u64 reservedz:62; | 120 | /* |
121 | * To support guests that may have alignment | ||
122 | * limitations on hot-add, the guest can specify | ||
123 | * its alignment requirements; a value of n | ||
124 | * represents an alignment of 2^n in mega bytes. | ||
125 | */ | ||
126 | __u64 hot_add_alignment:4; | ||
127 | __u64 reservedz:58; | ||
121 | } cap_bits; | 128 | } cap_bits; |
122 | __u64 caps; | 129 | __u64 caps; |
123 | } __packed; | 130 | } __packed; |
@@ -412,13 +419,45 @@ struct dm_info_msg { | |||
412 | * End protocol definitions. | 419 | * End protocol definitions. |
413 | */ | 420 | */ |
414 | 421 | ||
415 | static bool hot_add; | 422 | /* |
423 | * State to manage hot adding memory into the guest. | ||
424 | * The range start_pfn : end_pfn specifies the range | ||
425 | * that the host has asked us to hot add. The range | ||
426 | * start_pfn : ha_end_pfn specifies the range that we have | ||
427 | * currently hot added. We hot add in multiples of 128M | ||
428 | * chunks; it is possible that we may not be able to bring | ||
429 | * online all the pages in the region. The range | ||
430 | * covered_start_pfn : covered_end_pfn defines the pages that can | ||
431 | * be brough online. | ||
432 | */ | ||
433 | |||
434 | struct hv_hotadd_state { | ||
435 | struct list_head list; | ||
436 | unsigned long start_pfn; | ||
437 | unsigned long covered_start_pfn; | ||
438 | unsigned long covered_end_pfn; | ||
439 | unsigned long ha_end_pfn; | ||
440 | unsigned long end_pfn; | ||
441 | }; | ||
442 | |||
443 | struct balloon_state { | ||
444 | __u32 num_pages; | ||
445 | struct work_struct wrk; | ||
446 | }; | ||
447 | |||
448 | struct hot_add_wrk { | ||
449 | union dm_mem_page_range ha_page_range; | ||
450 | union dm_mem_page_range ha_region_range; | ||
451 | struct work_struct wrk; | ||
452 | }; | ||
453 | |||
454 | static bool hot_add = true; | ||
416 | static bool do_hot_add; | 455 | static bool do_hot_add; |
417 | /* | 456 | /* |
418 | * Delay reporting memory pressure by | 457 | * Delay reporting memory pressure by |
419 | * the specified number of seconds. | 458 | * the specified number of seconds. |
420 | */ | 459 | */ |
421 | static uint pressure_report_delay = 30; | 460 | static uint pressure_report_delay = 45; |
422 | 461 | ||
423 | module_param(hot_add, bool, (S_IRUGO | S_IWUSR)); | 462 | module_param(hot_add, bool, (S_IRUGO | S_IWUSR)); |
424 | MODULE_PARM_DESC(hot_add, "If set attempt memory hot_add"); | 463 | MODULE_PARM_DESC(hot_add, "If set attempt memory hot_add"); |
@@ -446,6 +485,7 @@ enum hv_dm_state { | |||
446 | static __u8 recv_buffer[PAGE_SIZE]; | 485 | static __u8 recv_buffer[PAGE_SIZE]; |
447 | static __u8 *send_buffer; | 486 | static __u8 *send_buffer; |
448 | #define PAGES_IN_2M 512 | 487 | #define PAGES_IN_2M 512 |
488 | #define HA_CHUNK (32 * 1024) | ||
449 | 489 | ||
450 | struct hv_dynmem_device { | 490 | struct hv_dynmem_device { |
451 | struct hv_device *dev; | 491 | struct hv_device *dev; |
@@ -459,7 +499,28 @@ struct hv_dynmem_device { | |||
459 | unsigned int num_pages_ballooned; | 499 | unsigned int num_pages_ballooned; |
460 | 500 | ||
461 | /* | 501 | /* |
462 | * This thread handles both balloon/hot-add | 502 | * State to manage the ballooning (up) operation. |
503 | */ | ||
504 | struct balloon_state balloon_wrk; | ||
505 | |||
506 | /* | ||
507 | * State to execute the "hot-add" operation. | ||
508 | */ | ||
509 | struct hot_add_wrk ha_wrk; | ||
510 | |||
511 | /* | ||
512 | * This state tracks if the host has specified a hot-add | ||
513 | * region. | ||
514 | */ | ||
515 | bool host_specified_ha_region; | ||
516 | |||
517 | /* | ||
518 | * State to synchronize hot-add. | ||
519 | */ | ||
520 | struct completion ol_waitevent; | ||
521 | bool ha_waiting; | ||
522 | /* | ||
523 | * This thread handles hot-add | ||
463 | * requests from the host as well as notifying | 524 | * requests from the host as well as notifying |
464 | * the host with regards to memory pressure in | 525 | * the host with regards to memory pressure in |
465 | * the guest. | 526 | * the guest. |
@@ -467,6 +528,11 @@ struct hv_dynmem_device { | |||
467 | struct task_struct *thread; | 528 | struct task_struct *thread; |
468 | 529 | ||
469 | /* | 530 | /* |
531 | * A list of hot-add regions. | ||
532 | */ | ||
533 | struct list_head ha_region_list; | ||
534 | |||
535 | /* | ||
470 | * We start with the highest version we can support | 536 | * We start with the highest version we can support |
471 | * and downgrade based on the host; we save here the | 537 | * and downgrade based on the host; we save here the |
472 | * next version to try. | 538 | * next version to try. |
@@ -476,35 +542,358 @@ struct hv_dynmem_device { | |||
476 | 542 | ||
477 | static struct hv_dynmem_device dm_device; | 543 | static struct hv_dynmem_device dm_device; |
478 | 544 | ||
479 | static void hot_add_req(struct hv_dynmem_device *dm, struct dm_hot_add *msg) | 545 | #ifdef CONFIG_MEMORY_HOTPLUG |
546 | |||
547 | static void hv_bring_pgs_online(unsigned long start_pfn, unsigned long size) | ||
480 | { | 548 | { |
549 | int i; | ||
481 | 550 | ||
482 | struct dm_hot_add_response resp; | 551 | for (i = 0; i < size; i++) { |
552 | struct page *pg; | ||
553 | pg = pfn_to_page(start_pfn + i); | ||
554 | __online_page_set_limits(pg); | ||
555 | __online_page_increment_counters(pg); | ||
556 | __online_page_free(pg); | ||
557 | } | ||
558 | } | ||
483 | 559 | ||
484 | if (do_hot_add) { | 560 | static void hv_mem_hot_add(unsigned long start, unsigned long size, |
561 | unsigned long pfn_count, | ||
562 | struct hv_hotadd_state *has) | ||
563 | { | ||
564 | int ret = 0; | ||
565 | int i, nid, t; | ||
566 | unsigned long start_pfn; | ||
567 | unsigned long processed_pfn; | ||
568 | unsigned long total_pfn = pfn_count; | ||
569 | |||
570 | for (i = 0; i < (size/HA_CHUNK); i++) { | ||
571 | start_pfn = start + (i * HA_CHUNK); | ||
572 | has->ha_end_pfn += HA_CHUNK; | ||
573 | |||
574 | if (total_pfn > HA_CHUNK) { | ||
575 | processed_pfn = HA_CHUNK; | ||
576 | total_pfn -= HA_CHUNK; | ||
577 | } else { | ||
578 | processed_pfn = total_pfn; | ||
579 | total_pfn = 0; | ||
580 | } | ||
581 | |||
582 | has->covered_end_pfn += processed_pfn; | ||
583 | |||
584 | init_completion(&dm_device.ol_waitevent); | ||
585 | dm_device.ha_waiting = true; | ||
586 | |||
587 | nid = memory_add_physaddr_to_nid(PFN_PHYS(start_pfn)); | ||
588 | ret = add_memory(nid, PFN_PHYS((start_pfn)), | ||
589 | (HA_CHUNK << PAGE_SHIFT)); | ||
485 | 590 | ||
486 | pr_info("Memory hot add not supported\n"); | 591 | if (ret) { |
592 | pr_info("hot_add memory failed error is %d\n", ret); | ||
593 | if (ret == -EEXIST) { | ||
594 | /* | ||
595 | * This error indicates that the error | ||
596 | * is not a transient failure. This is the | ||
597 | * case where the guest's physical address map | ||
598 | * precludes hot adding memory. Stop all further | ||
599 | * memory hot-add. | ||
600 | */ | ||
601 | do_hot_add = false; | ||
602 | } | ||
603 | has->ha_end_pfn -= HA_CHUNK; | ||
604 | has->covered_end_pfn -= processed_pfn; | ||
605 | break; | ||
606 | } | ||
487 | 607 | ||
488 | /* | 608 | /* |
489 | * Currently we do not support hot add. | 609 | * Wait for the memory block to be onlined. |
490 | * Just fail the request. | ||
491 | */ | 610 | */ |
611 | t = wait_for_completion_timeout(&dm_device.ol_waitevent, 5*HZ); | ||
612 | if (t == 0) { | ||
613 | pr_info("hot_add memory timedout\n"); | ||
614 | has->ha_end_pfn -= HA_CHUNK; | ||
615 | has->covered_end_pfn -= processed_pfn; | ||
616 | break; | ||
617 | } | ||
618 | |||
619 | } | ||
620 | |||
621 | return; | ||
622 | } | ||
623 | |||
624 | static void hv_online_page(struct page *pg) | ||
625 | { | ||
626 | struct list_head *cur; | ||
627 | struct hv_hotadd_state *has; | ||
628 | unsigned long cur_start_pgp; | ||
629 | unsigned long cur_end_pgp; | ||
630 | |||
631 | if (dm_device.ha_waiting) { | ||
632 | dm_device.ha_waiting = false; | ||
633 | complete(&dm_device.ol_waitevent); | ||
634 | } | ||
635 | |||
636 | list_for_each(cur, &dm_device.ha_region_list) { | ||
637 | has = list_entry(cur, struct hv_hotadd_state, list); | ||
638 | cur_start_pgp = (unsigned long) | ||
639 | pfn_to_page(has->covered_start_pfn); | ||
640 | cur_end_pgp = (unsigned long)pfn_to_page(has->covered_end_pfn); | ||
641 | |||
642 | if (((unsigned long)pg >= cur_start_pgp) && | ||
643 | ((unsigned long)pg < cur_end_pgp)) { | ||
644 | /* | ||
645 | * This frame is currently backed; online the | ||
646 | * page. | ||
647 | */ | ||
648 | __online_page_set_limits(pg); | ||
649 | __online_page_increment_counters(pg); | ||
650 | __online_page_free(pg); | ||
651 | has->covered_start_pfn++; | ||
652 | } | ||
653 | } | ||
654 | } | ||
655 | |||
656 | static bool pfn_covered(unsigned long start_pfn, unsigned long pfn_cnt) | ||
657 | { | ||
658 | struct list_head *cur; | ||
659 | struct hv_hotadd_state *has; | ||
660 | unsigned long residual, new_inc; | ||
661 | |||
662 | if (list_empty(&dm_device.ha_region_list)) | ||
663 | return false; | ||
664 | |||
665 | list_for_each(cur, &dm_device.ha_region_list) { | ||
666 | has = list_entry(cur, struct hv_hotadd_state, list); | ||
667 | |||
668 | /* | ||
669 | * If the pfn range we are dealing with is not in the current | ||
670 | * "hot add block", move on. | ||
671 | */ | ||
672 | if ((start_pfn >= has->end_pfn)) | ||
673 | continue; | ||
674 | /* | ||
675 | * If the current hot add-request extends beyond | ||
676 | * our current limit; extend it. | ||
677 | */ | ||
678 | if ((start_pfn + pfn_cnt) > has->end_pfn) { | ||
679 | residual = (start_pfn + pfn_cnt - has->end_pfn); | ||
680 | /* | ||
681 | * Extend the region by multiples of HA_CHUNK. | ||
682 | */ | ||
683 | new_inc = (residual / HA_CHUNK) * HA_CHUNK; | ||
684 | if (residual % HA_CHUNK) | ||
685 | new_inc += HA_CHUNK; | ||
686 | |||
687 | has->end_pfn += new_inc; | ||
688 | } | ||
689 | |||
690 | /* | ||
691 | * If the current start pfn is not where the covered_end | ||
692 | * is, update it. | ||
693 | */ | ||
694 | |||
695 | if (has->covered_end_pfn != start_pfn) { | ||
696 | has->covered_end_pfn = start_pfn; | ||
697 | has->covered_start_pfn = start_pfn; | ||
698 | } | ||
699 | return true; | ||
700 | |||
492 | } | 701 | } |
493 | 702 | ||
703 | return false; | ||
704 | } | ||
705 | |||
706 | static unsigned long handle_pg_range(unsigned long pg_start, | ||
707 | unsigned long pg_count) | ||
708 | { | ||
709 | unsigned long start_pfn = pg_start; | ||
710 | unsigned long pfn_cnt = pg_count; | ||
711 | unsigned long size; | ||
712 | struct list_head *cur; | ||
713 | struct hv_hotadd_state *has; | ||
714 | unsigned long pgs_ol = 0; | ||
715 | unsigned long old_covered_state; | ||
716 | |||
717 | if (list_empty(&dm_device.ha_region_list)) | ||
718 | return 0; | ||
719 | |||
720 | list_for_each(cur, &dm_device.ha_region_list) { | ||
721 | has = list_entry(cur, struct hv_hotadd_state, list); | ||
722 | |||
723 | /* | ||
724 | * If the pfn range we are dealing with is not in the current | ||
725 | * "hot add block", move on. | ||
726 | */ | ||
727 | if ((start_pfn >= has->end_pfn)) | ||
728 | continue; | ||
729 | |||
730 | old_covered_state = has->covered_end_pfn; | ||
731 | |||
732 | if (start_pfn < has->ha_end_pfn) { | ||
733 | /* | ||
734 | * This is the case where we are backing pages | ||
735 | * in an already hot added region. Bring | ||
736 | * these pages online first. | ||
737 | */ | ||
738 | pgs_ol = has->ha_end_pfn - start_pfn; | ||
739 | if (pgs_ol > pfn_cnt) | ||
740 | pgs_ol = pfn_cnt; | ||
741 | hv_bring_pgs_online(start_pfn, pgs_ol); | ||
742 | has->covered_end_pfn += pgs_ol; | ||
743 | has->covered_start_pfn += pgs_ol; | ||
744 | pfn_cnt -= pgs_ol; | ||
745 | } | ||
746 | |||
747 | if ((has->ha_end_pfn < has->end_pfn) && (pfn_cnt > 0)) { | ||
748 | /* | ||
749 | * We have some residual hot add range | ||
750 | * that needs to be hot added; hot add | ||
751 | * it now. Hot add a multiple of | ||
752 | * of HA_CHUNK that fully covers the pages | ||
753 | * we have. | ||
754 | */ | ||
755 | size = (has->end_pfn - has->ha_end_pfn); | ||
756 | if (pfn_cnt <= size) { | ||
757 | size = ((pfn_cnt / HA_CHUNK) * HA_CHUNK); | ||
758 | if (pfn_cnt % HA_CHUNK) | ||
759 | size += HA_CHUNK; | ||
760 | } else { | ||
761 | pfn_cnt = size; | ||
762 | } | ||
763 | hv_mem_hot_add(has->ha_end_pfn, size, pfn_cnt, has); | ||
764 | } | ||
765 | /* | ||
766 | * If we managed to online any pages that were given to us, | ||
767 | * we declare success. | ||
768 | */ | ||
769 | return has->covered_end_pfn - old_covered_state; | ||
770 | |||
771 | } | ||
772 | |||
773 | return 0; | ||
774 | } | ||
775 | |||
776 | static unsigned long process_hot_add(unsigned long pg_start, | ||
777 | unsigned long pfn_cnt, | ||
778 | unsigned long rg_start, | ||
779 | unsigned long rg_size) | ||
780 | { | ||
781 | struct hv_hotadd_state *ha_region = NULL; | ||
782 | |||
783 | if (pfn_cnt == 0) | ||
784 | return 0; | ||
785 | |||
786 | if (!dm_device.host_specified_ha_region) | ||
787 | if (pfn_covered(pg_start, pfn_cnt)) | ||
788 | goto do_pg_range; | ||
789 | |||
790 | /* | ||
791 | * If the host has specified a hot-add range; deal with it first. | ||
792 | */ | ||
793 | |||
794 | if (rg_size != 0) { | ||
795 | ha_region = kzalloc(sizeof(struct hv_hotadd_state), GFP_KERNEL); | ||
796 | if (!ha_region) | ||
797 | return 0; | ||
798 | |||
799 | INIT_LIST_HEAD(&ha_region->list); | ||
800 | |||
801 | list_add_tail(&ha_region->list, &dm_device.ha_region_list); | ||
802 | ha_region->start_pfn = rg_start; | ||
803 | ha_region->ha_end_pfn = rg_start; | ||
804 | ha_region->covered_start_pfn = pg_start; | ||
805 | ha_region->covered_end_pfn = pg_start; | ||
806 | ha_region->end_pfn = rg_start + rg_size; | ||
807 | } | ||
808 | |||
809 | do_pg_range: | ||
810 | /* | ||
811 | * Process the page range specified; bringing them | ||
812 | * online if possible. | ||
813 | */ | ||
814 | return handle_pg_range(pg_start, pfn_cnt); | ||
815 | } | ||
816 | |||
817 | #endif | ||
818 | |||
819 | static void hot_add_req(struct work_struct *dummy) | ||
820 | { | ||
821 | struct dm_hot_add_response resp; | ||
822 | #ifdef CONFIG_MEMORY_HOTPLUG | ||
823 | unsigned long pg_start, pfn_cnt; | ||
824 | unsigned long rg_start, rg_sz; | ||
825 | #endif | ||
826 | struct hv_dynmem_device *dm = &dm_device; | ||
827 | |||
494 | memset(&resp, 0, sizeof(struct dm_hot_add_response)); | 828 | memset(&resp, 0, sizeof(struct dm_hot_add_response)); |
495 | resp.hdr.type = DM_MEM_HOT_ADD_RESPONSE; | 829 | resp.hdr.type = DM_MEM_HOT_ADD_RESPONSE; |
496 | resp.hdr.size = sizeof(struct dm_hot_add_response); | 830 | resp.hdr.size = sizeof(struct dm_hot_add_response); |
497 | resp.hdr.trans_id = atomic_inc_return(&trans_id); | 831 | resp.hdr.trans_id = atomic_inc_return(&trans_id); |
498 | 832 | ||
499 | resp.page_count = 0; | 833 | #ifdef CONFIG_MEMORY_HOTPLUG |
500 | resp.result = 0; | 834 | pg_start = dm->ha_wrk.ha_page_range.finfo.start_page; |
835 | pfn_cnt = dm->ha_wrk.ha_page_range.finfo.page_cnt; | ||
836 | |||
837 | rg_start = dm->ha_wrk.ha_region_range.finfo.start_page; | ||
838 | rg_sz = dm->ha_wrk.ha_region_range.finfo.page_cnt; | ||
839 | |||
840 | if ((rg_start == 0) && (!dm->host_specified_ha_region)) { | ||
841 | unsigned long region_size; | ||
842 | unsigned long region_start; | ||
843 | |||
844 | /* | ||
845 | * The host has not specified the hot-add region. | ||
846 | * Based on the hot-add page range being specified, | ||
847 | * compute a hot-add region that can cover the pages | ||
848 | * that need to be hot-added while ensuring the alignment | ||
849 | * and size requirements of Linux as it relates to hot-add. | ||
850 | */ | ||
851 | region_start = pg_start; | ||
852 | region_size = (pfn_cnt / HA_CHUNK) * HA_CHUNK; | ||
853 | if (pfn_cnt % HA_CHUNK) | ||
854 | region_size += HA_CHUNK; | ||
855 | |||
856 | region_start = (pg_start / HA_CHUNK) * HA_CHUNK; | ||
857 | |||
858 | rg_start = region_start; | ||
859 | rg_sz = region_size; | ||
860 | } | ||
861 | |||
862 | if (do_hot_add) | ||
863 | resp.page_count = process_hot_add(pg_start, pfn_cnt, | ||
864 | rg_start, rg_sz); | ||
865 | #endif | ||
866 | /* | ||
867 | * The result field of the response structure has the | ||
868 | * following semantics: | ||
869 | * | ||
870 | * 1. If all or some pages hot-added: Guest should return success. | ||
871 | * | ||
872 | * 2. If no pages could be hot-added: | ||
873 | * | ||
874 | * If the guest returns success, then the host | ||
875 | * will not attempt any further hot-add operations. This | ||
876 | * signifies a permanent failure. | ||
877 | * | ||
878 | * If the guest returns failure, then this failure will be | ||
879 | * treated as a transient failure and the host may retry the | ||
880 | * hot-add operation after some delay. | ||
881 | */ | ||
882 | if (resp.page_count > 0) | ||
883 | resp.result = 1; | ||
884 | else if (!do_hot_add) | ||
885 | resp.result = 1; | ||
886 | else | ||
887 | resp.result = 0; | ||
888 | |||
889 | if (!do_hot_add || (resp.page_count == 0)) | ||
890 | pr_info("Memory hot add failed\n"); | ||
501 | 891 | ||
502 | dm->state = DM_INITIALIZED; | 892 | dm->state = DM_INITIALIZED; |
503 | vmbus_sendpacket(dm->dev->channel, &resp, | 893 | vmbus_sendpacket(dm->dev->channel, &resp, |
504 | sizeof(struct dm_hot_add_response), | 894 | sizeof(struct dm_hot_add_response), |
505 | (unsigned long)NULL, | 895 | (unsigned long)NULL, |
506 | VM_PKT_DATA_INBAND, 0); | 896 | VM_PKT_DATA_INBAND, 0); |
507 | |||
508 | } | 897 | } |
509 | 898 | ||
510 | static void process_info(struct hv_dynmem_device *dm, struct dm_info_msg *msg) | 899 | static void process_info(struct hv_dynmem_device *dm, struct dm_info_msg *msg) |
@@ -523,7 +912,7 @@ static void process_info(struct hv_dynmem_device *dm, struct dm_info_msg *msg) | |||
523 | } | 912 | } |
524 | } | 913 | } |
525 | 914 | ||
526 | unsigned long compute_balloon_floor(void) | 915 | static unsigned long compute_balloon_floor(void) |
527 | { | 916 | { |
528 | unsigned long min_pages; | 917 | unsigned long min_pages; |
529 | #define MB2PAGES(mb) ((mb) << (20 - PAGE_SHIFT)) | 918 | #define MB2PAGES(mb) ((mb) << (20 - PAGE_SHIFT)) |
@@ -644,6 +1033,14 @@ static int alloc_balloon_pages(struct hv_dynmem_device *dm, int num_pages, | |||
644 | 1033 | ||
645 | dm->num_pages_ballooned += alloc_unit; | 1034 | dm->num_pages_ballooned += alloc_unit; |
646 | 1035 | ||
1036 | /* | ||
1037 | * If we allocatted 2M pages; split them so we | ||
1038 | * can free them in any order we get. | ||
1039 | */ | ||
1040 | |||
1041 | if (alloc_unit != 1) | ||
1042 | split_page(pg, get_order(alloc_unit << PAGE_SHIFT)); | ||
1043 | |||
647 | bl_resp->range_count++; | 1044 | bl_resp->range_count++; |
648 | bl_resp->range_array[i].finfo.start_page = | 1045 | bl_resp->range_array[i].finfo.start_page = |
649 | page_to_pfn(pg); | 1046 | page_to_pfn(pg); |
@@ -657,9 +1054,9 @@ static int alloc_balloon_pages(struct hv_dynmem_device *dm, int num_pages, | |||
657 | 1054 | ||
658 | 1055 | ||
659 | 1056 | ||
660 | static void balloon_up(struct hv_dynmem_device *dm, struct dm_balloon *req) | 1057 | static void balloon_up(struct work_struct *dummy) |
661 | { | 1058 | { |
662 | int num_pages = req->num_pages; | 1059 | int num_pages = dm_device.balloon_wrk.num_pages; |
663 | int num_ballooned = 0; | 1060 | int num_ballooned = 0; |
664 | struct dm_balloon_response *bl_resp; | 1061 | struct dm_balloon_response *bl_resp; |
665 | int alloc_unit; | 1062 | int alloc_unit; |
@@ -670,9 +1067,10 @@ static void balloon_up(struct hv_dynmem_device *dm, struct dm_balloon *req) | |||
670 | 1067 | ||
671 | 1068 | ||
672 | /* | 1069 | /* |
673 | * Currently, we only support 4k allocations. | 1070 | * We will attempt 2M allocations. However, if we fail to |
1071 | * allocate 2M chunks, we will go back to 4k allocations. | ||
674 | */ | 1072 | */ |
675 | alloc_unit = 1; | 1073 | alloc_unit = 512; |
676 | 1074 | ||
677 | while (!done) { | 1075 | while (!done) { |
678 | bl_resp = (struct dm_balloon_response *)send_buffer; | 1076 | bl_resp = (struct dm_balloon_response *)send_buffer; |
@@ -684,14 +1082,19 @@ static void balloon_up(struct hv_dynmem_device *dm, struct dm_balloon *req) | |||
684 | 1082 | ||
685 | 1083 | ||
686 | num_pages -= num_ballooned; | 1084 | num_pages -= num_ballooned; |
687 | num_ballooned = alloc_balloon_pages(dm, num_pages, | 1085 | num_ballooned = alloc_balloon_pages(&dm_device, num_pages, |
688 | bl_resp, alloc_unit, | 1086 | bl_resp, alloc_unit, |
689 | &alloc_error); | 1087 | &alloc_error); |
690 | 1088 | ||
1089 | if ((alloc_error) && (alloc_unit != 1)) { | ||
1090 | alloc_unit = 1; | ||
1091 | continue; | ||
1092 | } | ||
1093 | |||
691 | if ((alloc_error) || (num_ballooned == num_pages)) { | 1094 | if ((alloc_error) || (num_ballooned == num_pages)) { |
692 | bl_resp->more_pages = 0; | 1095 | bl_resp->more_pages = 0; |
693 | done = true; | 1096 | done = true; |
694 | dm->state = DM_INITIALIZED; | 1097 | dm_device.state = DM_INITIALIZED; |
695 | } | 1098 | } |
696 | 1099 | ||
697 | /* | 1100 | /* |
@@ -719,7 +1122,7 @@ static void balloon_up(struct hv_dynmem_device *dm, struct dm_balloon *req) | |||
719 | pr_info("Balloon response failed\n"); | 1122 | pr_info("Balloon response failed\n"); |
720 | 1123 | ||
721 | for (i = 0; i < bl_resp->range_count; i++) | 1124 | for (i = 0; i < bl_resp->range_count; i++) |
722 | free_balloon_pages(dm, | 1125 | free_balloon_pages(&dm_device, |
723 | &bl_resp->range_array[i]); | 1126 | &bl_resp->range_array[i]); |
724 | 1127 | ||
725 | done = true; | 1128 | done = true; |
@@ -761,7 +1164,6 @@ static int dm_thread_func(void *dm_dev) | |||
761 | { | 1164 | { |
762 | struct hv_dynmem_device *dm = dm_dev; | 1165 | struct hv_dynmem_device *dm = dm_dev; |
763 | int t; | 1166 | int t; |
764 | unsigned long scan_start; | ||
765 | 1167 | ||
766 | while (!kthread_should_stop()) { | 1168 | while (!kthread_should_stop()) { |
767 | t = wait_for_completion_timeout(&dm_device.config_event, 1*HZ); | 1169 | t = wait_for_completion_timeout(&dm_device.config_event, 1*HZ); |
@@ -773,22 +1175,6 @@ static int dm_thread_func(void *dm_dev) | |||
773 | if (t == 0) | 1175 | if (t == 0) |
774 | post_status(dm); | 1176 | post_status(dm); |
775 | 1177 | ||
776 | scan_start = jiffies; | ||
777 | switch (dm->state) { | ||
778 | case DM_BALLOON_UP: | ||
779 | balloon_up(dm, (struct dm_balloon *)recv_buffer); | ||
780 | break; | ||
781 | |||
782 | case DM_HOT_ADD: | ||
783 | hot_add_req(dm, (struct dm_hot_add *)recv_buffer); | ||
784 | break; | ||
785 | default: | ||
786 | break; | ||
787 | } | ||
788 | |||
789 | if (!time_in_range(jiffies, scan_start, scan_start + HZ)) | ||
790 | post_status(dm); | ||
791 | |||
792 | } | 1178 | } |
793 | 1179 | ||
794 | return 0; | 1180 | return 0; |
@@ -861,6 +1247,10 @@ static void balloon_onchannelcallback(void *context) | |||
861 | struct dm_message *dm_msg; | 1247 | struct dm_message *dm_msg; |
862 | struct dm_header *dm_hdr; | 1248 | struct dm_header *dm_hdr; |
863 | struct hv_dynmem_device *dm = hv_get_drvdata(dev); | 1249 | struct hv_dynmem_device *dm = hv_get_drvdata(dev); |
1250 | struct dm_balloon *bal_msg; | ||
1251 | struct dm_hot_add *ha_msg; | ||
1252 | union dm_mem_page_range *ha_pg_range; | ||
1253 | union dm_mem_page_range *ha_region; | ||
864 | 1254 | ||
865 | memset(recv_buffer, 0, sizeof(recv_buffer)); | 1255 | memset(recv_buffer, 0, sizeof(recv_buffer)); |
866 | vmbus_recvpacket(dev->channel, recv_buffer, | 1256 | vmbus_recvpacket(dev->channel, recv_buffer, |
@@ -882,8 +1272,12 @@ static void balloon_onchannelcallback(void *context) | |||
882 | break; | 1272 | break; |
883 | 1273 | ||
884 | case DM_BALLOON_REQUEST: | 1274 | case DM_BALLOON_REQUEST: |
1275 | if (dm->state == DM_BALLOON_UP) | ||
1276 | pr_warn("Currently ballooning\n"); | ||
1277 | bal_msg = (struct dm_balloon *)recv_buffer; | ||
885 | dm->state = DM_BALLOON_UP; | 1278 | dm->state = DM_BALLOON_UP; |
886 | complete(&dm->config_event); | 1279 | dm_device.balloon_wrk.num_pages = bal_msg->num_pages; |
1280 | schedule_work(&dm_device.balloon_wrk.wrk); | ||
887 | break; | 1281 | break; |
888 | 1282 | ||
889 | case DM_UNBALLOON_REQUEST: | 1283 | case DM_UNBALLOON_REQUEST: |
@@ -893,8 +1287,31 @@ static void balloon_onchannelcallback(void *context) | |||
893 | break; | 1287 | break; |
894 | 1288 | ||
895 | case DM_MEM_HOT_ADD_REQUEST: | 1289 | case DM_MEM_HOT_ADD_REQUEST: |
1290 | if (dm->state == DM_HOT_ADD) | ||
1291 | pr_warn("Currently hot-adding\n"); | ||
896 | dm->state = DM_HOT_ADD; | 1292 | dm->state = DM_HOT_ADD; |
897 | complete(&dm->config_event); | 1293 | ha_msg = (struct dm_hot_add *)recv_buffer; |
1294 | if (ha_msg->hdr.size == sizeof(struct dm_hot_add)) { | ||
1295 | /* | ||
1296 | * This is a normal hot-add request specifying | ||
1297 | * hot-add memory. | ||
1298 | */ | ||
1299 | ha_pg_range = &ha_msg->range; | ||
1300 | dm->ha_wrk.ha_page_range = *ha_pg_range; | ||
1301 | dm->ha_wrk.ha_region_range.page_range = 0; | ||
1302 | } else { | ||
1303 | /* | ||
1304 | * Host is specifying that we first hot-add | ||
1305 | * a region and then partially populate this | ||
1306 | * region. | ||
1307 | */ | ||
1308 | dm->host_specified_ha_region = true; | ||
1309 | ha_pg_range = &ha_msg->range; | ||
1310 | ha_region = &ha_pg_range[1]; | ||
1311 | dm->ha_wrk.ha_page_range = *ha_pg_range; | ||
1312 | dm->ha_wrk.ha_region_range = *ha_region; | ||
1313 | } | ||
1314 | schedule_work(&dm_device.ha_wrk.wrk); | ||
898 | break; | 1315 | break; |
899 | 1316 | ||
900 | case DM_INFO_MESSAGE: | 1317 | case DM_INFO_MESSAGE: |
@@ -937,6 +1354,10 @@ static int balloon_probe(struct hv_device *dev, | |||
937 | dm_device.next_version = DYNMEM_PROTOCOL_VERSION_WIN7; | 1354 | dm_device.next_version = DYNMEM_PROTOCOL_VERSION_WIN7; |
938 | init_completion(&dm_device.host_event); | 1355 | init_completion(&dm_device.host_event); |
939 | init_completion(&dm_device.config_event); | 1356 | init_completion(&dm_device.config_event); |
1357 | INIT_LIST_HEAD(&dm_device.ha_region_list); | ||
1358 | INIT_WORK(&dm_device.balloon_wrk.wrk, balloon_up); | ||
1359 | INIT_WORK(&dm_device.ha_wrk.wrk, hot_add_req); | ||
1360 | dm_device.host_specified_ha_region = false; | ||
940 | 1361 | ||
941 | dm_device.thread = | 1362 | dm_device.thread = |
942 | kthread_run(dm_thread_func, &dm_device, "hv_balloon"); | 1363 | kthread_run(dm_thread_func, &dm_device, "hv_balloon"); |
@@ -945,6 +1366,10 @@ static int balloon_probe(struct hv_device *dev, | |||
945 | goto probe_error1; | 1366 | goto probe_error1; |
946 | } | 1367 | } |
947 | 1368 | ||
1369 | #ifdef CONFIG_MEMORY_HOTPLUG | ||
1370 | set_online_page_callback(&hv_online_page); | ||
1371 | #endif | ||
1372 | |||
948 | hv_set_drvdata(dev, &dm_device); | 1373 | hv_set_drvdata(dev, &dm_device); |
949 | /* | 1374 | /* |
950 | * Initiate the hand shake with the host and negotiate | 1375 | * Initiate the hand shake with the host and negotiate |
@@ -962,8 +1387,7 @@ static int balloon_probe(struct hv_device *dev, | |||
962 | ret = vmbus_sendpacket(dev->channel, &version_req, | 1387 | ret = vmbus_sendpacket(dev->channel, &version_req, |
963 | sizeof(struct dm_version_request), | 1388 | sizeof(struct dm_version_request), |
964 | (unsigned long)NULL, | 1389 | (unsigned long)NULL, |
965 | VM_PKT_DATA_INBAND, | 1390 | VM_PKT_DATA_INBAND, 0); |
966 | VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED); | ||
967 | if (ret) | 1391 | if (ret) |
968 | goto probe_error2; | 1392 | goto probe_error2; |
969 | 1393 | ||
@@ -990,13 +1414,13 @@ static int balloon_probe(struct hv_device *dev, | |||
990 | cap_msg.hdr.trans_id = atomic_inc_return(&trans_id); | 1414 | cap_msg.hdr.trans_id = atomic_inc_return(&trans_id); |
991 | 1415 | ||
992 | cap_msg.caps.cap_bits.balloon = 1; | 1416 | cap_msg.caps.cap_bits.balloon = 1; |
1417 | cap_msg.caps.cap_bits.hot_add = 1; | ||
1418 | |||
993 | /* | 1419 | /* |
994 | * While we currently don't support hot-add, | 1420 | * Specify our alignment requirements as it relates |
995 | * we still advertise this capability since the | 1421 | * memory hot-add. Specify 128MB alignment. |
996 | * host requires that guests partcipating in the | ||
997 | * dynamic memory protocol support hot add. | ||
998 | */ | 1422 | */ |
999 | cap_msg.caps.cap_bits.hot_add = 1; | 1423 | cap_msg.caps.cap_bits.hot_add_alignment = 7; |
1000 | 1424 | ||
1001 | /* | 1425 | /* |
1002 | * Currently the host does not use these | 1426 | * Currently the host does not use these |
@@ -1009,8 +1433,7 @@ static int balloon_probe(struct hv_device *dev, | |||
1009 | ret = vmbus_sendpacket(dev->channel, &cap_msg, | 1433 | ret = vmbus_sendpacket(dev->channel, &cap_msg, |
1010 | sizeof(struct dm_capabilities), | 1434 | sizeof(struct dm_capabilities), |
1011 | (unsigned long)NULL, | 1435 | (unsigned long)NULL, |
1012 | VM_PKT_DATA_INBAND, | 1436 | VM_PKT_DATA_INBAND, 0); |
1013 | VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED); | ||
1014 | if (ret) | 1437 | if (ret) |
1015 | goto probe_error2; | 1438 | goto probe_error2; |
1016 | 1439 | ||
@@ -1034,6 +1457,9 @@ static int balloon_probe(struct hv_device *dev, | |||
1034 | return 0; | 1457 | return 0; |
1035 | 1458 | ||
1036 | probe_error2: | 1459 | probe_error2: |
1460 | #ifdef CONFIG_MEMORY_HOTPLUG | ||
1461 | restore_online_page_callback(&hv_online_page); | ||
1462 | #endif | ||
1037 | kthread_stop(dm_device.thread); | 1463 | kthread_stop(dm_device.thread); |
1038 | 1464 | ||
1039 | probe_error1: | 1465 | probe_error1: |
@@ -1046,13 +1472,26 @@ probe_error0: | |||
1046 | static int balloon_remove(struct hv_device *dev) | 1472 | static int balloon_remove(struct hv_device *dev) |
1047 | { | 1473 | { |
1048 | struct hv_dynmem_device *dm = hv_get_drvdata(dev); | 1474 | struct hv_dynmem_device *dm = hv_get_drvdata(dev); |
1475 | struct list_head *cur, *tmp; | ||
1476 | struct hv_hotadd_state *has; | ||
1049 | 1477 | ||
1050 | if (dm->num_pages_ballooned != 0) | 1478 | if (dm->num_pages_ballooned != 0) |
1051 | pr_warn("Ballooned pages: %d\n", dm->num_pages_ballooned); | 1479 | pr_warn("Ballooned pages: %d\n", dm->num_pages_ballooned); |
1052 | 1480 | ||
1481 | cancel_work_sync(&dm->balloon_wrk.wrk); | ||
1482 | cancel_work_sync(&dm->ha_wrk.wrk); | ||
1483 | |||
1053 | vmbus_close(dev->channel); | 1484 | vmbus_close(dev->channel); |
1054 | kthread_stop(dm->thread); | 1485 | kthread_stop(dm->thread); |
1055 | kfree(send_buffer); | 1486 | kfree(send_buffer); |
1487 | #ifdef CONFIG_MEMORY_HOTPLUG | ||
1488 | restore_online_page_callback(&hv_online_page); | ||
1489 | #endif | ||
1490 | list_for_each_safe(cur, tmp, &dm->ha_region_list) { | ||
1491 | has = list_entry(cur, struct hv_hotadd_state, list); | ||
1492 | list_del(&has->list); | ||
1493 | kfree(has); | ||
1494 | } | ||
1056 | 1495 | ||
1057 | return 0; | 1496 | return 0; |
1058 | } | 1497 | } |
@@ -1079,14 +1518,7 @@ static int __init init_balloon_drv(void) | |||
1079 | return vmbus_driver_register(&balloon_drv); | 1518 | return vmbus_driver_register(&balloon_drv); |
1080 | } | 1519 | } |
1081 | 1520 | ||
1082 | static void exit_balloon_drv(void) | ||
1083 | { | ||
1084 | |||
1085 | vmbus_driver_unregister(&balloon_drv); | ||
1086 | } | ||
1087 | |||
1088 | module_init(init_balloon_drv); | 1521 | module_init(init_balloon_drv); |
1089 | module_exit(exit_balloon_drv); | ||
1090 | 1522 | ||
1091 | MODULE_DESCRIPTION("Hyper-V Balloon"); | 1523 | MODULE_DESCRIPTION("Hyper-V Balloon"); |
1092 | MODULE_VERSION(HV_DRV_VERSION); | 1524 | MODULE_VERSION(HV_DRV_VERSION); |