diff options
author | David Gibson <david@gibson.dropbear.id.au> | 2016-12-08 19:07:36 -0500 |
---|---|---|
committer | Michael Ellerman <mpe@ellerman.id.au> | 2017-02-09 21:27:55 -0500 |
commit | dbcf929c0062b758fbb6312ccaf30716c0c0a608 (patch) | |
tree | 35c79f694f29e935bc7952357313862f8edc57ba | |
parent | 64b40ffbc83029f035571cad9727e34e69dbf6d0 (diff) |
powerpc/pseries: Add support for hash table resizing
This adds support for using two hypercalls to change the size of the
main hash page table while running as a PAPR guest. For now these
hypercalls are only in experimental qemu versions.
The interface is two part: first H_RESIZE_HPT_PREPARE is used to
allocate and prepare the new hash table. This may be slow, but can be
done asynchronously. Then, H_RESIZE_HPT_COMMIT is used to switch to the
new hash table. This requires that no CPUs be concurrently updating the
HPT, and so must be run under stop_machine().
This also adds a debugfs file which can be used to manually control
HPT resizing or testing purposes.
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Paul Mackerras <paulus@samba.org>
[mpe: Rename the debugfs file to "hpt_order"]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
-rw-r--r-- | arch/powerpc/include/asm/book3s/64/mmu-hash.h | 1 | ||||
-rw-r--r-- | arch/powerpc/mm/hash_utils_64.c | 33 | ||||
-rw-r--r-- | arch/powerpc/platforms/pseries/lpar.c | 109 |
3 files changed, 143 insertions, 0 deletions
diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h index 823015cff149..52d8d1e4b772 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h +++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h | |||
@@ -157,6 +157,7 @@ struct mmu_hash_ops { | |||
157 | unsigned long addr, | 157 | unsigned long addr, |
158 | unsigned char *hpte_slot_array, | 158 | unsigned char *hpte_slot_array, |
159 | int psize, int ssize, int local); | 159 | int psize, int ssize, int local); |
160 | int (*resize_hpt)(unsigned long shift); | ||
160 | /* | 161 | /* |
161 | * Special for kexec. | 162 | * Special for kexec. |
162 | * To be called in real mode with interrupts disabled. No locks are | 163 | * To be called in real mode with interrupts disabled. No locks are |
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 67e19a0821be..a3371d4e35b6 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c | |||
@@ -35,7 +35,9 @@ | |||
35 | #include <linux/memblock.h> | 35 | #include <linux/memblock.h> |
36 | #include <linux/context_tracking.h> | 36 | #include <linux/context_tracking.h> |
37 | #include <linux/libfdt.h> | 37 | #include <linux/libfdt.h> |
38 | #include <linux/debugfs.h> | ||
38 | 39 | ||
40 | #include <asm/debug.h> | ||
39 | #include <asm/processor.h> | 41 | #include <asm/processor.h> |
40 | #include <asm/pgtable.h> | 42 | #include <asm/pgtable.h> |
41 | #include <asm/mmu.h> | 43 | #include <asm/mmu.h> |
@@ -1795,3 +1797,34 @@ void hash__setup_initial_memory_limit(phys_addr_t first_memblock_base, | |||
1795 | /* Finally limit subsequent allocations */ | 1797 | /* Finally limit subsequent allocations */ |
1796 | memblock_set_current_limit(ppc64_rma_size); | 1798 | memblock_set_current_limit(ppc64_rma_size); |
1797 | } | 1799 | } |
1800 | |||
1801 | #ifdef CONFIG_DEBUG_FS | ||
1802 | |||
1803 | static int hpt_order_get(void *data, u64 *val) | ||
1804 | { | ||
1805 | *val = ppc64_pft_size; | ||
1806 | return 0; | ||
1807 | } | ||
1808 | |||
1809 | static int hpt_order_set(void *data, u64 val) | ||
1810 | { | ||
1811 | if (!mmu_hash_ops.resize_hpt) | ||
1812 | return -ENODEV; | ||
1813 | |||
1814 | return mmu_hash_ops.resize_hpt(val); | ||
1815 | } | ||
1816 | |||
1817 | DEFINE_SIMPLE_ATTRIBUTE(fops_hpt_order, hpt_order_get, hpt_order_set, "%llu\n"); | ||
1818 | |||
1819 | static int __init hash64_debugfs(void) | ||
1820 | { | ||
1821 | if (!debugfs_create_file("hpt_order", 0600, powerpc_debugfs_root, | ||
1822 | NULL, &fops_hpt_order)) { | ||
1823 | pr_err("lpar: unable to create hpt_order debugsfs file\n"); | ||
1824 | } | ||
1825 | |||
1826 | return 0; | ||
1827 | } | ||
1828 | machine_device_initcall(pseries, hash64_debugfs); | ||
1829 | |||
1830 | #endif /* CONFIG_DEBUG_FS */ | ||
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index 5dc1c3c6e716..c2e13a51f369 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c | |||
@@ -27,6 +27,8 @@ | |||
27 | #include <linux/console.h> | 27 | #include <linux/console.h> |
28 | #include <linux/export.h> | 28 | #include <linux/export.h> |
29 | #include <linux/jump_label.h> | 29 | #include <linux/jump_label.h> |
30 | #include <linux/delay.h> | ||
31 | #include <linux/stop_machine.h> | ||
30 | #include <asm/processor.h> | 32 | #include <asm/processor.h> |
31 | #include <asm/mmu.h> | 33 | #include <asm/mmu.h> |
32 | #include <asm/page.h> | 34 | #include <asm/page.h> |
@@ -609,6 +611,112 @@ static int __init disable_bulk_remove(char *str) | |||
609 | 611 | ||
610 | __setup("bulk_remove=", disable_bulk_remove); | 612 | __setup("bulk_remove=", disable_bulk_remove); |
611 | 613 | ||
614 | #define HPT_RESIZE_TIMEOUT 10000 /* ms */ | ||
615 | |||
616 | struct hpt_resize_state { | ||
617 | unsigned long shift; | ||
618 | int commit_rc; | ||
619 | }; | ||
620 | |||
621 | static int pseries_lpar_resize_hpt_commit(void *data) | ||
622 | { | ||
623 | struct hpt_resize_state *state = data; | ||
624 | |||
625 | state->commit_rc = plpar_resize_hpt_commit(0, state->shift); | ||
626 | if (state->commit_rc != H_SUCCESS) | ||
627 | return -EIO; | ||
628 | |||
629 | /* Hypervisor has transitioned the HTAB, update our globals */ | ||
630 | ppc64_pft_size = state->shift; | ||
631 | htab_size_bytes = 1UL << ppc64_pft_size; | ||
632 | htab_hash_mask = (htab_size_bytes >> 7) - 1; | ||
633 | |||
634 | return 0; | ||
635 | } | ||
636 | |||
637 | /* Must be called in user context */ | ||
638 | static int pseries_lpar_resize_hpt(unsigned long shift) | ||
639 | { | ||
640 | struct hpt_resize_state state = { | ||
641 | .shift = shift, | ||
642 | .commit_rc = H_FUNCTION, | ||
643 | }; | ||
644 | unsigned int delay, total_delay = 0; | ||
645 | int rc; | ||
646 | ktime_t t0, t1, t2; | ||
647 | |||
648 | might_sleep(); | ||
649 | |||
650 | if (!firmware_has_feature(FW_FEATURE_HPT_RESIZE)) | ||
651 | return -ENODEV; | ||
652 | |||
653 | printk(KERN_INFO "lpar: Attempting to resize HPT to shift %lu\n", | ||
654 | shift); | ||
655 | |||
656 | t0 = ktime_get(); | ||
657 | |||
658 | rc = plpar_resize_hpt_prepare(0, shift); | ||
659 | while (H_IS_LONG_BUSY(rc)) { | ||
660 | delay = get_longbusy_msecs(rc); | ||
661 | total_delay += delay; | ||
662 | if (total_delay > HPT_RESIZE_TIMEOUT) { | ||
663 | /* prepare with shift==0 cancels an in-progress resize */ | ||
664 | rc = plpar_resize_hpt_prepare(0, 0); | ||
665 | if (rc != H_SUCCESS) | ||
666 | printk(KERN_WARNING | ||
667 | "lpar: Unexpected error %d cancelling timed out HPT resize\n", | ||
668 | rc); | ||
669 | return -ETIMEDOUT; | ||
670 | } | ||
671 | msleep(delay); | ||
672 | rc = plpar_resize_hpt_prepare(0, shift); | ||
673 | }; | ||
674 | |||
675 | switch (rc) { | ||
676 | case H_SUCCESS: | ||
677 | /* Continue on */ | ||
678 | break; | ||
679 | |||
680 | case H_PARAMETER: | ||
681 | return -EINVAL; | ||
682 | case H_RESOURCE: | ||
683 | return -EPERM; | ||
684 | default: | ||
685 | printk(KERN_WARNING | ||
686 | "lpar: Unexpected error %d from H_RESIZE_HPT_PREPARE\n", | ||
687 | rc); | ||
688 | return -EIO; | ||
689 | } | ||
690 | |||
691 | t1 = ktime_get(); | ||
692 | |||
693 | rc = stop_machine(pseries_lpar_resize_hpt_commit, &state, NULL); | ||
694 | |||
695 | t2 = ktime_get(); | ||
696 | |||
697 | if (rc != 0) { | ||
698 | switch (state.commit_rc) { | ||
699 | case H_PTEG_FULL: | ||
700 | printk(KERN_WARNING | ||
701 | "lpar: Hash collision while resizing HPT\n"); | ||
702 | return -ENOSPC; | ||
703 | |||
704 | default: | ||
705 | printk(KERN_WARNING | ||
706 | "lpar: Unexpected error %d from H_RESIZE_HPT_COMMIT\n", | ||
707 | state.commit_rc); | ||
708 | return -EIO; | ||
709 | }; | ||
710 | } | ||
711 | |||
712 | printk(KERN_INFO | ||
713 | "lpar: HPT resize to shift %lu complete (%lld ms / %lld ms)\n", | ||
714 | shift, (long long) ktime_ms_delta(t1, t0), | ||
715 | (long long) ktime_ms_delta(t2, t1)); | ||
716 | |||
717 | return 0; | ||
718 | } | ||
719 | |||
612 | void __init hpte_init_pseries(void) | 720 | void __init hpte_init_pseries(void) |
613 | { | 721 | { |
614 | mmu_hash_ops.hpte_invalidate = pSeries_lpar_hpte_invalidate; | 722 | mmu_hash_ops.hpte_invalidate = pSeries_lpar_hpte_invalidate; |
@@ -620,6 +728,7 @@ void __init hpte_init_pseries(void) | |||
620 | mmu_hash_ops.flush_hash_range = pSeries_lpar_flush_hash_range; | 728 | mmu_hash_ops.flush_hash_range = pSeries_lpar_flush_hash_range; |
621 | mmu_hash_ops.hpte_clear_all = pseries_hpte_clear_all; | 729 | mmu_hash_ops.hpte_clear_all = pseries_hpte_clear_all; |
622 | mmu_hash_ops.hugepage_invalidate = pSeries_lpar_hugepage_invalidate; | 730 | mmu_hash_ops.hugepage_invalidate = pSeries_lpar_hugepage_invalidate; |
731 | mmu_hash_ops.resize_hpt = pseries_lpar_resize_hpt; | ||
623 | } | 732 | } |
624 | 733 | ||
625 | #ifdef CONFIG_PPC_SMLPAR | 734 | #ifdef CONFIG_PPC_SMLPAR |