aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc/kvm/book3s_hv.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/kvm/book3s_hv.c')
-rw-r--r--arch/powerpc/kvm/book3s_hv.c347
1 files changed, 223 insertions, 124 deletions
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 40e5857c4b1c..79ea3d9269db 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -19,6 +19,7 @@
19 */ 19 */
20 20
21#include <linux/kvm_host.h> 21#include <linux/kvm_host.h>
22#include <linux/kernel.h>
22#include <linux/err.h> 23#include <linux/err.h>
23#include <linux/slab.h> 24#include <linux/slab.h>
24#include <linux/preempt.h> 25#include <linux/preempt.h>
@@ -98,6 +99,10 @@ static int target_smt_mode;
98module_param(target_smt_mode, int, S_IRUGO | S_IWUSR); 99module_param(target_smt_mode, int, S_IRUGO | S_IWUSR);
99MODULE_PARM_DESC(target_smt_mode, "Target threads per core (0 = max)"); 100MODULE_PARM_DESC(target_smt_mode, "Target threads per core (0 = max)");
100 101
102static bool indep_threads_mode = true;
103module_param(indep_threads_mode, bool, S_IRUGO | S_IWUSR);
104MODULE_PARM_DESC(indep_threads_mode, "Independent-threads mode (only on POWER9)");
105
101#ifdef CONFIG_KVM_XICS 106#ifdef CONFIG_KVM_XICS
102static struct kernel_param_ops module_param_ops = { 107static struct kernel_param_ops module_param_ops = {
103 .set = param_set_int, 108 .set = param_set_int,
@@ -115,6 +120,7 @@ MODULE_PARM_DESC(h_ipi_redirect, "Redirect H_IPI wakeup to a free host core");
115 120
116static void kvmppc_end_cede(struct kvm_vcpu *vcpu); 121static void kvmppc_end_cede(struct kvm_vcpu *vcpu);
117static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu); 122static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu);
123static void kvmppc_setup_partition_table(struct kvm *kvm);
118 124
119static inline struct kvm_vcpu *next_runnable_thread(struct kvmppc_vcore *vc, 125static inline struct kvm_vcpu *next_runnable_thread(struct kvmppc_vcore *vc,
120 int *ip) 126 int *ip)
@@ -1734,9 +1740,9 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
1734 * MMU mode (radix or HPT), unfortunately, but since we only support 1740 * MMU mode (radix or HPT), unfortunately, but since we only support
1735 * HPT guests on a HPT host so far, that isn't an impediment yet. 1741 * HPT guests on a HPT host so far, that isn't an impediment yet.
1736 */ 1742 */
1737static int threads_per_vcore(void) 1743static int threads_per_vcore(struct kvm *kvm)
1738{ 1744{
1739 if (cpu_has_feature(CPU_FTR_ARCH_300)) 1745 if (kvm->arch.threads_indep)
1740 return 1; 1746 return 1;
1741 return threads_per_subcore; 1747 return threads_per_subcore;
1742} 1748}
@@ -1774,7 +1780,7 @@ static struct debugfs_timings_element {
1774 {"cede", offsetof(struct kvm_vcpu, arch.cede_time)}, 1780 {"cede", offsetof(struct kvm_vcpu, arch.cede_time)},
1775}; 1781};
1776 1782
1777#define N_TIMINGS (sizeof(timings) / sizeof(timings[0])) 1783#define N_TIMINGS (ARRAY_SIZE(timings))
1778 1784
1779struct debugfs_timings_state { 1785struct debugfs_timings_state {
1780 struct kvm_vcpu *vcpu; 1786 struct kvm_vcpu *vcpu;
@@ -2228,11 +2234,10 @@ static void kvmppc_start_thread(struct kvm_vcpu *vcpu, struct kvmppc_vcore *vc)
2228 kvmppc_ipi_thread(cpu); 2234 kvmppc_ipi_thread(cpu);
2229} 2235}
2230 2236
2231static void kvmppc_wait_for_nap(void) 2237static void kvmppc_wait_for_nap(int n_threads)
2232{ 2238{
2233 int cpu = smp_processor_id(); 2239 int cpu = smp_processor_id();
2234 int i, loops; 2240 int i, loops;
2235 int n_threads = threads_per_vcore();
2236 2241
2237 if (n_threads <= 1) 2242 if (n_threads <= 1)
2238 return; 2243 return;
@@ -2319,7 +2324,7 @@ static void kvmppc_vcore_preempt(struct kvmppc_vcore *vc)
2319 2324
2320 vc->vcore_state = VCORE_PREEMPT; 2325 vc->vcore_state = VCORE_PREEMPT;
2321 vc->pcpu = smp_processor_id(); 2326 vc->pcpu = smp_processor_id();
2322 if (vc->num_threads < threads_per_vcore()) { 2327 if (vc->num_threads < threads_per_vcore(vc->kvm)) {
2323 spin_lock(&lp->lock); 2328 spin_lock(&lp->lock);
2324 list_add_tail(&vc->preempt_list, &lp->list); 2329 list_add_tail(&vc->preempt_list, &lp->list);
2325 spin_unlock(&lp->lock); 2330 spin_unlock(&lp->lock);
@@ -2357,7 +2362,7 @@ struct core_info {
2357 2362
2358/* 2363/*
2359 * This mapping means subcores 0 and 1 can use threads 0-3 and 4-7 2364 * This mapping means subcores 0 and 1 can use threads 0-3 and 4-7
2360 * respectively in 2-way micro-threading (split-core) mode. 2365 * respectively in 2-way micro-threading (split-core) mode on POWER8.
2361 */ 2366 */
2362static int subcore_thread_map[MAX_SUBCORES] = { 0, 4, 2, 6 }; 2367static int subcore_thread_map[MAX_SUBCORES] = { 0, 4, 2, 6 };
2363 2368
@@ -2373,7 +2378,14 @@ static void init_core_info(struct core_info *cip, struct kvmppc_vcore *vc)
2373 2378
2374static bool subcore_config_ok(int n_subcores, int n_threads) 2379static bool subcore_config_ok(int n_subcores, int n_threads)
2375{ 2380{
2376 /* Can only dynamically split if unsplit to begin with */ 2381 /*
2382 * POWER9 "SMT4" cores are permanently in what is effectively a 4-way split-core
2383 * mode, with one thread per subcore.
2384 */
2385 if (cpu_has_feature(CPU_FTR_ARCH_300))
2386 return n_subcores <= 4 && n_threads == 1;
2387
2388 /* On POWER8, can only dynamically split if unsplit to begin with */
2377 if (n_subcores > 1 && threads_per_subcore < MAX_SMT_THREADS) 2389 if (n_subcores > 1 && threads_per_subcore < MAX_SMT_THREADS)
2378 return false; 2390 return false;
2379 if (n_subcores > MAX_SUBCORES) 2391 if (n_subcores > MAX_SUBCORES)
@@ -2404,6 +2416,11 @@ static bool can_dynamic_split(struct kvmppc_vcore *vc, struct core_info *cip)
2404 if (!cpu_has_feature(CPU_FTR_ARCH_207S)) 2416 if (!cpu_has_feature(CPU_FTR_ARCH_207S))
2405 return false; 2417 return false;
2406 2418
2419 /* POWER9 currently requires all threads to be in the same MMU mode */
2420 if (cpu_has_feature(CPU_FTR_ARCH_300) &&
2421 kvm_is_radix(vc->kvm) != kvm_is_radix(cip->vc[0]->kvm))
2422 return false;
2423
2407 if (n_threads < cip->max_subcore_threads) 2424 if (n_threads < cip->max_subcore_threads)
2408 n_threads = cip->max_subcore_threads; 2425 n_threads = cip->max_subcore_threads;
2409 if (!subcore_config_ok(cip->n_subcores + 1, n_threads)) 2426 if (!subcore_config_ok(cip->n_subcores + 1, n_threads))
@@ -2632,6 +2649,8 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
2632 int target_threads; 2649 int target_threads;
2633 int controlled_threads; 2650 int controlled_threads;
2634 int trap; 2651 int trap;
2652 bool is_power8;
2653 bool hpt_on_radix;
2635 2654
2636 /* 2655 /*
2637 * Remove from the list any threads that have a signal pending 2656 * Remove from the list any threads that have a signal pending
@@ -2654,15 +2673,19 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
2654 * the number of threads per subcore, except on POWER9, 2673 * the number of threads per subcore, except on POWER9,
2655 * where it's 1 because the threads are (mostly) independent. 2674 * where it's 1 because the threads are (mostly) independent.
2656 */ 2675 */
2657 controlled_threads = threads_per_vcore(); 2676 controlled_threads = threads_per_vcore(vc->kvm);
2658 2677
2659 /* 2678 /*
2660 * Make sure we are running on primary threads, and that secondary 2679 * Make sure we are running on primary threads, and that secondary
2661 * threads are offline. Also check if the number of threads in this 2680 * threads are offline. Also check if the number of threads in this
2662 * guest are greater than the current system threads per guest. 2681 * guest are greater than the current system threads per guest.
2682 * On POWER9, we need to be not in independent-threads mode if
2683 * this is a HPT guest on a radix host.
2663 */ 2684 */
2664 if ((controlled_threads > 1) && 2685 hpt_on_radix = radix_enabled() && !kvm_is_radix(vc->kvm);
2665 ((vc->num_threads > threads_per_subcore) || !on_primary_thread())) { 2686 if (((controlled_threads > 1) &&
2687 ((vc->num_threads > threads_per_subcore) || !on_primary_thread())) ||
2688 (hpt_on_radix && vc->kvm->arch.threads_indep)) {
2666 for_each_runnable_thread(i, vcpu, vc) { 2689 for_each_runnable_thread(i, vcpu, vc) {
2667 vcpu->arch.ret = -EBUSY; 2690 vcpu->arch.ret = -EBUSY;
2668 kvmppc_remove_runnable(vc, vcpu); 2691 kvmppc_remove_runnable(vc, vcpu);
@@ -2699,14 +2722,13 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
2699 * Hard-disable interrupts, and check resched flag and signals. 2722 * Hard-disable interrupts, and check resched flag and signals.
2700 * If we need to reschedule or deliver a signal, clean up 2723 * If we need to reschedule or deliver a signal, clean up
2701 * and return without going into the guest(s). 2724 * and return without going into the guest(s).
2702 * If the hpte_setup_done flag has been cleared, don't go into the 2725 * If the mmu_ready flag has been cleared, don't go into the
2703 * guest because that means a HPT resize operation is in progress. 2726 * guest because that means a HPT resize operation is in progress.
2704 */ 2727 */
2705 local_irq_disable(); 2728 local_irq_disable();
2706 hard_irq_disable(); 2729 hard_irq_disable();
2707 if (lazy_irq_pending() || need_resched() || 2730 if (lazy_irq_pending() || need_resched() ||
2708 recheck_signals(&core_info) || 2731 recheck_signals(&core_info) || !vc->kvm->arch.mmu_ready) {
2709 (!kvm_is_radix(vc->kvm) && !vc->kvm->arch.hpte_setup_done)) {
2710 local_irq_enable(); 2732 local_irq_enable();
2711 vc->vcore_state = VCORE_INACTIVE; 2733 vc->vcore_state = VCORE_INACTIVE;
2712 /* Unlock all except the primary vcore */ 2734 /* Unlock all except the primary vcore */
@@ -2728,32 +2750,51 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
2728 cmd_bit = stat_bit = 0; 2750 cmd_bit = stat_bit = 0;
2729 split = core_info.n_subcores; 2751 split = core_info.n_subcores;
2730 sip = NULL; 2752 sip = NULL;
2731 if (split > 1) { 2753 is_power8 = cpu_has_feature(CPU_FTR_ARCH_207S)
2732 /* threads_per_subcore must be MAX_SMT_THREADS (8) here */ 2754 && !cpu_has_feature(CPU_FTR_ARCH_300);
2733 if (split == 2 && (dynamic_mt_modes & 2)) { 2755
2734 cmd_bit = HID0_POWER8_1TO2LPAR; 2756 if (split > 1 || hpt_on_radix) {
2735 stat_bit = HID0_POWER8_2LPARMODE;
2736 } else {
2737 split = 4;
2738 cmd_bit = HID0_POWER8_1TO4LPAR;
2739 stat_bit = HID0_POWER8_4LPARMODE;
2740 }
2741 subcore_size = MAX_SMT_THREADS / split;
2742 sip = &split_info; 2757 sip = &split_info;
2743 memset(&split_info, 0, sizeof(split_info)); 2758 memset(&split_info, 0, sizeof(split_info));
2744 split_info.rpr = mfspr(SPRN_RPR);
2745 split_info.pmmar = mfspr(SPRN_PMMAR);
2746 split_info.ldbar = mfspr(SPRN_LDBAR);
2747 split_info.subcore_size = subcore_size;
2748 for (sub = 0; sub < core_info.n_subcores; ++sub) 2759 for (sub = 0; sub < core_info.n_subcores; ++sub)
2749 split_info.vc[sub] = core_info.vc[sub]; 2760 split_info.vc[sub] = core_info.vc[sub];
2761
2762 if (is_power8) {
2763 if (split == 2 && (dynamic_mt_modes & 2)) {
2764 cmd_bit = HID0_POWER8_1TO2LPAR;
2765 stat_bit = HID0_POWER8_2LPARMODE;
2766 } else {
2767 split = 4;
2768 cmd_bit = HID0_POWER8_1TO4LPAR;
2769 stat_bit = HID0_POWER8_4LPARMODE;
2770 }
2771 subcore_size = MAX_SMT_THREADS / split;
2772 split_info.rpr = mfspr(SPRN_RPR);
2773 split_info.pmmar = mfspr(SPRN_PMMAR);
2774 split_info.ldbar = mfspr(SPRN_LDBAR);
2775 split_info.subcore_size = subcore_size;
2776 } else {
2777 split_info.subcore_size = 1;
2778 if (hpt_on_radix) {
2779 /* Use the split_info for LPCR/LPIDR changes */
2780 split_info.lpcr_req = vc->lpcr;
2781 split_info.lpidr_req = vc->kvm->arch.lpid;
2782 split_info.host_lpcr = vc->kvm->arch.host_lpcr;
2783 split_info.do_set = 1;
2784 }
2785 }
2786
2750 /* order writes to split_info before kvm_split_mode pointer */ 2787 /* order writes to split_info before kvm_split_mode pointer */
2751 smp_wmb(); 2788 smp_wmb();
2752 } 2789 }
2753 for (thr = 0; thr < controlled_threads; ++thr) 2790
2791 for (thr = 0; thr < controlled_threads; ++thr) {
2792 paca[pcpu + thr].kvm_hstate.tid = thr;
2793 paca[pcpu + thr].kvm_hstate.napping = 0;
2754 paca[pcpu + thr].kvm_hstate.kvm_split_mode = sip; 2794 paca[pcpu + thr].kvm_hstate.kvm_split_mode = sip;
2795 }
2755 2796
2756 /* Initiate micro-threading (split-core) if required */ 2797 /* Initiate micro-threading (split-core) on POWER8 if required */
2757 if (cmd_bit) { 2798 if (cmd_bit) {
2758 unsigned long hid0 = mfspr(SPRN_HID0); 2799 unsigned long hid0 = mfspr(SPRN_HID0);
2759 2800
@@ -2772,7 +2813,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
2772 /* Start all the threads */ 2813 /* Start all the threads */
2773 active = 0; 2814 active = 0;
2774 for (sub = 0; sub < core_info.n_subcores; ++sub) { 2815 for (sub = 0; sub < core_info.n_subcores; ++sub) {
2775 thr = subcore_thread_map[sub]; 2816 thr = is_power8 ? subcore_thread_map[sub] : sub;
2776 thr0_done = false; 2817 thr0_done = false;
2777 active |= 1 << thr; 2818 active |= 1 << thr;
2778 pvc = core_info.vc[sub]; 2819 pvc = core_info.vc[sub];
@@ -2799,18 +2840,20 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
2799 * the vcore pointer in the PACA of the secondaries. 2840 * the vcore pointer in the PACA of the secondaries.
2800 */ 2841 */
2801 smp_mb(); 2842 smp_mb();
2802 if (cmd_bit)
2803 split_info.do_nap = 1; /* ask secondaries to nap when done */
2804 2843
2805 /* 2844 /*
2806 * When doing micro-threading, poke the inactive threads as well. 2845 * When doing micro-threading, poke the inactive threads as well.
2807 * This gets them to the nap instruction after kvm_do_nap, 2846 * This gets them to the nap instruction after kvm_do_nap,
2808 * which reduces the time taken to unsplit later. 2847 * which reduces the time taken to unsplit later.
2848 * For POWER9 HPT guest on radix host, we need all the secondary
2849 * threads woken up so they can do the LPCR/LPIDR change.
2809 */ 2850 */
2810 if (split > 1) 2851 if (cmd_bit || hpt_on_radix) {
2852 split_info.do_nap = 1; /* ask secondaries to nap when done */
2811 for (thr = 1; thr < threads_per_subcore; ++thr) 2853 for (thr = 1; thr < threads_per_subcore; ++thr)
2812 if (!(active & (1 << thr))) 2854 if (!(active & (1 << thr)))
2813 kvmppc_ipi_thread(pcpu + thr); 2855 kvmppc_ipi_thread(pcpu + thr);
2856 }
2814 2857
2815 vc->vcore_state = VCORE_RUNNING; 2858 vc->vcore_state = VCORE_RUNNING;
2816 preempt_disable(); 2859 preempt_disable();
@@ -2844,10 +2887,10 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
2844 vc->vcore_state = VCORE_EXITING; 2887 vc->vcore_state = VCORE_EXITING;
2845 2888
2846 /* wait for secondary threads to finish writing their state to memory */ 2889 /* wait for secondary threads to finish writing their state to memory */
2847 kvmppc_wait_for_nap(); 2890 kvmppc_wait_for_nap(controlled_threads);
2848 2891
2849 /* Return to whole-core mode if we split the core earlier */ 2892 /* Return to whole-core mode if we split the core earlier */
2850 if (split > 1) { 2893 if (cmd_bit) {
2851 unsigned long hid0 = mfspr(SPRN_HID0); 2894 unsigned long hid0 = mfspr(SPRN_HID0);
2852 unsigned long loops = 0; 2895 unsigned long loops = 0;
2853 2896
@@ -2863,8 +2906,17 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
2863 cpu_relax(); 2906 cpu_relax();
2864 ++loops; 2907 ++loops;
2865 } 2908 }
2866 split_info.do_nap = 0; 2909 } else if (hpt_on_radix) {
2910 /* Wait for all threads to have seen final sync */
2911 for (thr = 1; thr < controlled_threads; ++thr) {
2912 while (paca[pcpu + thr].kvm_hstate.kvm_split_mode) {
2913 HMT_low();
2914 barrier();
2915 }
2916 HMT_medium();
2917 }
2867 } 2918 }
2919 split_info.do_nap = 0;
2868 2920
2869 kvmppc_set_host_core(pcpu); 2921 kvmppc_set_host_core(pcpu);
2870 2922
@@ -3073,6 +3125,25 @@ out:
3073 trace_kvmppc_vcore_wakeup(do_sleep, block_ns); 3125 trace_kvmppc_vcore_wakeup(do_sleep, block_ns);
3074} 3126}
3075 3127
3128static int kvmhv_setup_mmu(struct kvm_vcpu *vcpu)
3129{
3130 int r = 0;
3131 struct kvm *kvm = vcpu->kvm;
3132
3133 mutex_lock(&kvm->lock);
3134 if (!kvm->arch.mmu_ready) {
3135 if (!kvm_is_radix(kvm))
3136 r = kvmppc_hv_setup_htab_rma(vcpu);
3137 if (!r) {
3138 if (cpu_has_feature(CPU_FTR_ARCH_300))
3139 kvmppc_setup_partition_table(kvm);
3140 kvm->arch.mmu_ready = 1;
3141 }
3142 }
3143 mutex_unlock(&kvm->lock);
3144 return r;
3145}
3146
3076static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) 3147static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
3077{ 3148{
3078 int n_ceded, i, r; 3149 int n_ceded, i, r;
@@ -3129,15 +3200,15 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
3129 3200
3130 while (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE && 3201 while (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE &&
3131 !signal_pending(current)) { 3202 !signal_pending(current)) {
3132 /* See if the HPT and VRMA are ready to go */ 3203 /* See if the MMU is ready to go */
3133 if (!kvm_is_radix(vcpu->kvm) && 3204 if (!vcpu->kvm->arch.mmu_ready) {
3134 !vcpu->kvm->arch.hpte_setup_done) {
3135 spin_unlock(&vc->lock); 3205 spin_unlock(&vc->lock);
3136 r = kvmppc_hv_setup_htab_rma(vcpu); 3206 r = kvmhv_setup_mmu(vcpu);
3137 spin_lock(&vc->lock); 3207 spin_lock(&vc->lock);
3138 if (r) { 3208 if (r) {
3139 kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY; 3209 kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY;
3140 kvm_run->fail_entry.hardware_entry_failure_reason = 0; 3210 kvm_run->fail_entry.
3211 hardware_entry_failure_reason = 0;
3141 vcpu->arch.ret = r; 3212 vcpu->arch.ret = r;
3142 break; 3213 break;
3143 } 3214 }
@@ -3219,6 +3290,7 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu)
3219 unsigned long ebb_regs[3] = {}; /* shut up GCC */ 3290 unsigned long ebb_regs[3] = {}; /* shut up GCC */
3220 unsigned long user_tar = 0; 3291 unsigned long user_tar = 0;
3221 unsigned int user_vrsave; 3292 unsigned int user_vrsave;
3293 struct kvm *kvm;
3222 3294
3223 if (!vcpu->arch.sane) { 3295 if (!vcpu->arch.sane) {
3224 run->exit_reason = KVM_EXIT_INTERNAL_ERROR; 3296 run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
@@ -3256,8 +3328,9 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu)
3256 return -EINTR; 3328 return -EINTR;
3257 } 3329 }
3258 3330
3259 atomic_inc(&vcpu->kvm->arch.vcpus_running); 3331 kvm = vcpu->kvm;
3260 /* Order vcpus_running vs. hpte_setup_done, see kvmppc_alloc_reset_hpt */ 3332 atomic_inc(&kvm->arch.vcpus_running);
3333 /* Order vcpus_running vs. mmu_ready, see kvmppc_alloc_reset_hpt */
3261 smp_mb(); 3334 smp_mb();
3262 3335
3263 flush_all_to_thread(current); 3336 flush_all_to_thread(current);
@@ -3285,10 +3358,10 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu)
3285 trace_kvm_hcall_exit(vcpu, r); 3358 trace_kvm_hcall_exit(vcpu, r);
3286 kvmppc_core_prepare_to_enter(vcpu); 3359 kvmppc_core_prepare_to_enter(vcpu);
3287 } else if (r == RESUME_PAGE_FAULT) { 3360 } else if (r == RESUME_PAGE_FAULT) {
3288 srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); 3361 srcu_idx = srcu_read_lock(&kvm->srcu);
3289 r = kvmppc_book3s_hv_page_fault(run, vcpu, 3362 r = kvmppc_book3s_hv_page_fault(run, vcpu,
3290 vcpu->arch.fault_dar, vcpu->arch.fault_dsisr); 3363 vcpu->arch.fault_dar, vcpu->arch.fault_dsisr);
3291 srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx); 3364 srcu_read_unlock(&kvm->srcu, srcu_idx);
3292 } else if (r == RESUME_PASSTHROUGH) { 3365 } else if (r == RESUME_PASSTHROUGH) {
3293 if (WARN_ON(xive_enabled())) 3366 if (WARN_ON(xive_enabled()))
3294 r = H_SUCCESS; 3367 r = H_SUCCESS;
@@ -3308,27 +3381,26 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu)
3308 mtspr(SPRN_VRSAVE, user_vrsave); 3381 mtspr(SPRN_VRSAVE, user_vrsave);
3309 3382
3310 vcpu->arch.state = KVMPPC_VCPU_NOTREADY; 3383 vcpu->arch.state = KVMPPC_VCPU_NOTREADY;
3311 atomic_dec(&vcpu->kvm->arch.vcpus_running); 3384 atomic_dec(&kvm->arch.vcpus_running);
3312 return r; 3385 return r;
3313} 3386}
3314 3387
3315static void kvmppc_add_seg_page_size(struct kvm_ppc_one_seg_page_size **sps, 3388static void kvmppc_add_seg_page_size(struct kvm_ppc_one_seg_page_size **sps,
3316 int linux_psize) 3389 int shift, int sllp)
3317{ 3390{
3318 struct mmu_psize_def *def = &mmu_psize_defs[linux_psize]; 3391 (*sps)->page_shift = shift;
3319 3392 (*sps)->slb_enc = sllp;
3320 if (!def->shift) 3393 (*sps)->enc[0].page_shift = shift;
3321 return; 3394 (*sps)->enc[0].pte_enc = kvmppc_pgsize_lp_encoding(shift, shift);
3322 (*sps)->page_shift = def->shift;
3323 (*sps)->slb_enc = def->sllp;
3324 (*sps)->enc[0].page_shift = def->shift;
3325 (*sps)->enc[0].pte_enc = def->penc[linux_psize];
3326 /* 3395 /*
3327 * Add 16MB MPSS support if host supports it 3396 * Add 16MB MPSS support (may get filtered out by userspace)
3328 */ 3397 */
3329 if (linux_psize != MMU_PAGE_16M && def->penc[MMU_PAGE_16M] != -1) { 3398 if (shift != 24) {
3330 (*sps)->enc[1].page_shift = 24; 3399 int penc = kvmppc_pgsize_lp_encoding(shift, 24);
3331 (*sps)->enc[1].pte_enc = def->penc[MMU_PAGE_16M]; 3400 if (penc != -1) {
3401 (*sps)->enc[1].page_shift = 24;
3402 (*sps)->enc[1].pte_enc = penc;
3403 }
3332 } 3404 }
3333 (*sps)++; 3405 (*sps)++;
3334} 3406}
@@ -3339,13 +3411,6 @@ static int kvm_vm_ioctl_get_smmu_info_hv(struct kvm *kvm,
3339 struct kvm_ppc_one_seg_page_size *sps; 3411 struct kvm_ppc_one_seg_page_size *sps;
3340 3412
3341 /* 3413 /*
3342 * Since we don't yet support HPT guests on a radix host,
3343 * return an error if the host uses radix.
3344 */
3345 if (radix_enabled())
3346 return -EINVAL;
3347
3348 /*
3349 * POWER7, POWER8 and POWER9 all support 32 storage keys for data. 3414 * POWER7, POWER8 and POWER9 all support 32 storage keys for data.
3350 * POWER7 doesn't support keys for instruction accesses, 3415 * POWER7 doesn't support keys for instruction accesses,
3351 * POWER8 and POWER9 do. 3416 * POWER8 and POWER9 do.
@@ -3353,16 +3418,15 @@ static int kvm_vm_ioctl_get_smmu_info_hv(struct kvm *kvm,
3353 info->data_keys = 32; 3418 info->data_keys = 32;
3354 info->instr_keys = cpu_has_feature(CPU_FTR_ARCH_207S) ? 32 : 0; 3419 info->instr_keys = cpu_has_feature(CPU_FTR_ARCH_207S) ? 32 : 0;
3355 3420
3356 info->flags = KVM_PPC_PAGE_SIZES_REAL; 3421 /* POWER7, 8 and 9 all have 1T segments and 32-entry SLB */
3357 if (mmu_has_feature(MMU_FTR_1T_SEGMENT)) 3422 info->flags = KVM_PPC_PAGE_SIZES_REAL | KVM_PPC_1T_SEGMENTS;
3358 info->flags |= KVM_PPC_1T_SEGMENTS; 3423 info->slb_size = 32;
3359 info->slb_size = mmu_slb_size;
3360 3424
3361 /* We only support these sizes for now, and no muti-size segments */ 3425 /* We only support these sizes for now, and no muti-size segments */
3362 sps = &info->sps[0]; 3426 sps = &info->sps[0];
3363 kvmppc_add_seg_page_size(&sps, MMU_PAGE_4K); 3427 kvmppc_add_seg_page_size(&sps, 12, 0);
3364 kvmppc_add_seg_page_size(&sps, MMU_PAGE_64K); 3428 kvmppc_add_seg_page_size(&sps, 16, SLB_VSID_L | SLB_VSID_LP_01);
3365 kvmppc_add_seg_page_size(&sps, MMU_PAGE_16M); 3429 kvmppc_add_seg_page_size(&sps, 24, SLB_VSID_L);
3366 3430
3367 return 0; 3431 return 0;
3368} 3432}
@@ -3377,7 +3441,7 @@ static int kvm_vm_ioctl_get_dirty_log_hv(struct kvm *kvm,
3377 struct kvm_memory_slot *memslot; 3441 struct kvm_memory_slot *memslot;
3378 int i, r; 3442 int i, r;
3379 unsigned long n; 3443 unsigned long n;
3380 unsigned long *buf; 3444 unsigned long *buf, *p;
3381 struct kvm_vcpu *vcpu; 3445 struct kvm_vcpu *vcpu;
3382 3446
3383 mutex_lock(&kvm->slots_lock); 3447 mutex_lock(&kvm->slots_lock);
@@ -3393,8 +3457,8 @@ static int kvm_vm_ioctl_get_dirty_log_hv(struct kvm *kvm,
3393 goto out; 3457 goto out;
3394 3458
3395 /* 3459 /*
3396 * Use second half of bitmap area because radix accumulates 3460 * Use second half of bitmap area because both HPT and radix
3397 * bits in the first half. 3461 * accumulate bits in the first half.
3398 */ 3462 */
3399 n = kvm_dirty_bitmap_bytes(memslot); 3463 n = kvm_dirty_bitmap_bytes(memslot);
3400 buf = memslot->dirty_bitmap + n / sizeof(long); 3464 buf = memslot->dirty_bitmap + n / sizeof(long);
@@ -3407,6 +3471,16 @@ static int kvm_vm_ioctl_get_dirty_log_hv(struct kvm *kvm,
3407 if (r) 3471 if (r)
3408 goto out; 3472 goto out;
3409 3473
3474 /*
3475 * We accumulate dirty bits in the first half of the
3476 * memslot's dirty_bitmap area, for when pages are paged
3477 * out or modified by the host directly. Pick up these
3478 * bits and add them to the map.
3479 */
3480 p = memslot->dirty_bitmap;
3481 for (i = 0; i < n / sizeof(long); ++i)
3482 buf[i] |= xchg(&p[i], 0);
3483
3410 /* Harvest dirty bits from VPA and DTL updates */ 3484 /* Harvest dirty bits from VPA and DTL updates */
3411 /* Note: we never modify the SLB shadow buffer areas */ 3485 /* Note: we never modify the SLB shadow buffer areas */
3412 kvm_for_each_vcpu(i, vcpu, kvm) { 3486 kvm_for_each_vcpu(i, vcpu, kvm) {
@@ -3438,15 +3512,6 @@ static void kvmppc_core_free_memslot_hv(struct kvm_memory_slot *free,
3438static int kvmppc_core_create_memslot_hv(struct kvm_memory_slot *slot, 3512static int kvmppc_core_create_memslot_hv(struct kvm_memory_slot *slot,
3439 unsigned long npages) 3513 unsigned long npages)
3440{ 3514{
3441 /*
3442 * For now, if radix_enabled() then we only support radix guests,
3443 * and in that case we don't need the rmap array.
3444 */
3445 if (radix_enabled()) {
3446 slot->arch.rmap = NULL;
3447 return 0;
3448 }
3449
3450 slot->arch.rmap = vzalloc(npages * sizeof(*slot->arch.rmap)); 3515 slot->arch.rmap = vzalloc(npages * sizeof(*slot->arch.rmap));
3451 if (!slot->arch.rmap) 3516 if (!slot->arch.rmap)
3452 return -ENOMEM; 3517 return -ENOMEM;
@@ -3467,8 +3532,6 @@ static void kvmppc_core_commit_memory_region_hv(struct kvm *kvm,
3467 const struct kvm_memory_slot *new) 3532 const struct kvm_memory_slot *new)
3468{ 3533{
3469 unsigned long npages = mem->memory_size >> PAGE_SHIFT; 3534 unsigned long npages = mem->memory_size >> PAGE_SHIFT;
3470 struct kvm_memslots *slots;
3471 struct kvm_memory_slot *memslot;
3472 3535
3473 /* 3536 /*
3474 * If we are making a new memslot, it might make 3537 * If we are making a new memslot, it might make
@@ -3478,18 +3541,6 @@ static void kvmppc_core_commit_memory_region_hv(struct kvm *kvm,
3478 */ 3541 */
3479 if (npages) 3542 if (npages)
3480 atomic64_inc(&kvm->arch.mmio_update); 3543 atomic64_inc(&kvm->arch.mmio_update);
3481
3482 if (npages && old->npages && !kvm_is_radix(kvm)) {
3483 /*
3484 * If modifying a memslot, reset all the rmap dirty bits.
3485 * If this is a new memslot, we don't need to do anything
3486 * since the rmap array starts out as all zeroes,
3487 * i.e. no pages are dirty.
3488 */
3489 slots = kvm_memslots(kvm);
3490 memslot = id_to_memslot(slots, mem->slot);
3491 kvmppc_hv_get_dirty_log_hpt(kvm, memslot, NULL);
3492 }
3493} 3544}
3494 3545
3495/* 3546/*
@@ -3545,6 +3596,10 @@ static void kvmppc_setup_partition_table(struct kvm *kvm)
3545 mmu_partition_table_set_entry(kvm->arch.lpid, dw0, dw1); 3596 mmu_partition_table_set_entry(kvm->arch.lpid, dw0, dw1);
3546} 3597}
3547 3598
3599/*
3600 * Set up HPT (hashed page table) and RMA (real-mode area).
3601 * Must be called with kvm->lock held.
3602 */
3548static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu) 3603static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
3549{ 3604{
3550 int err = 0; 3605 int err = 0;
@@ -3556,10 +3611,6 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
3556 unsigned long psize, porder; 3611 unsigned long psize, porder;
3557 int srcu_idx; 3612 int srcu_idx;
3558 3613
3559 mutex_lock(&kvm->lock);
3560 if (kvm->arch.hpte_setup_done)
3561 goto out; /* another vcpu beat us to it */
3562
3563 /* Allocate hashed page table (if not done already) and reset it */ 3614 /* Allocate hashed page table (if not done already) and reset it */
3564 if (!kvm->arch.hpt.virt) { 3615 if (!kvm->arch.hpt.virt) {
3565 int order = KVM_DEFAULT_HPT_ORDER; 3616 int order = KVM_DEFAULT_HPT_ORDER;
@@ -3618,18 +3669,14 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
3618 /* the -4 is to account for senc values starting at 0x10 */ 3669 /* the -4 is to account for senc values starting at 0x10 */
3619 lpcr = senc << (LPCR_VRMASD_SH - 4); 3670 lpcr = senc << (LPCR_VRMASD_SH - 4);
3620 kvmppc_update_lpcr(kvm, lpcr, LPCR_VRMASD); 3671 kvmppc_update_lpcr(kvm, lpcr, LPCR_VRMASD);
3621 } else {
3622 kvmppc_setup_partition_table(kvm);
3623 } 3672 }
3624 3673
3625 /* Order updates to kvm->arch.lpcr etc. vs. hpte_setup_done */ 3674 /* Order updates to kvm->arch.lpcr etc. vs. mmu_ready */
3626 smp_wmb(); 3675 smp_wmb();
3627 kvm->arch.hpte_setup_done = 1;
3628 err = 0; 3676 err = 0;
3629 out_srcu: 3677 out_srcu:
3630 srcu_read_unlock(&kvm->srcu, srcu_idx); 3678 srcu_read_unlock(&kvm->srcu, srcu_idx);
3631 out: 3679 out:
3632 mutex_unlock(&kvm->lock);
3633 return err; 3680 return err;
3634 3681
3635 up_out: 3682 up_out:
@@ -3637,6 +3684,34 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
3637 goto out_srcu; 3684 goto out_srcu;
3638} 3685}
3639 3686
3687/* Must be called with kvm->lock held and mmu_ready = 0 and no vcpus running */
3688int kvmppc_switch_mmu_to_hpt(struct kvm *kvm)
3689{
3690 kvmppc_free_radix(kvm);
3691 kvmppc_update_lpcr(kvm, LPCR_VPM1,
3692 LPCR_VPM1 | LPCR_UPRT | LPCR_GTSE | LPCR_HR);
3693 kvmppc_rmap_reset(kvm);
3694 kvm->arch.radix = 0;
3695 kvm->arch.process_table = 0;
3696 return 0;
3697}
3698
3699/* Must be called with kvm->lock held and mmu_ready = 0 and no vcpus running */
3700int kvmppc_switch_mmu_to_radix(struct kvm *kvm)
3701{
3702 int err;
3703
3704 err = kvmppc_init_vm_radix(kvm);
3705 if (err)
3706 return err;
3707
3708 kvmppc_free_hpt(&kvm->arch.hpt);
3709 kvmppc_update_lpcr(kvm, LPCR_UPRT | LPCR_GTSE | LPCR_HR,
3710 LPCR_VPM1 | LPCR_UPRT | LPCR_GTSE | LPCR_HR);
3711 kvm->arch.radix = 1;
3712 return 0;
3713}
3714
3640#ifdef CONFIG_KVM_XICS 3715#ifdef CONFIG_KVM_XICS
3641/* 3716/*
3642 * Allocate a per-core structure for managing state about which cores are 3717 * Allocate a per-core structure for managing state about which cores are
@@ -3780,10 +3855,11 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm)
3780 } 3855 }
3781 3856
3782 /* 3857 /*
3783 * For now, if the host uses radix, the guest must be radix. 3858 * If the host uses radix, the guest starts out as radix.
3784 */ 3859 */
3785 if (radix_enabled()) { 3860 if (radix_enabled()) {
3786 kvm->arch.radix = 1; 3861 kvm->arch.radix = 1;
3862 kvm->arch.mmu_ready = 1;
3787 lpcr &= ~LPCR_VPM1; 3863 lpcr &= ~LPCR_VPM1;
3788 lpcr |= LPCR_UPRT | LPCR_GTSE | LPCR_HR; 3864 lpcr |= LPCR_UPRT | LPCR_GTSE | LPCR_HR;
3789 ret = kvmppc_init_vm_radix(kvm); 3865 ret = kvmppc_init_vm_radix(kvm);
@@ -3803,7 +3879,7 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm)
3803 * Work out how many sets the TLB has, for the use of 3879 * Work out how many sets the TLB has, for the use of
3804 * the TLB invalidation loop in book3s_hv_rmhandlers.S. 3880 * the TLB invalidation loop in book3s_hv_rmhandlers.S.
3805 */ 3881 */
3806 if (kvm_is_radix(kvm)) 3882 if (radix_enabled())
3807 kvm->arch.tlb_sets = POWER9_TLB_SETS_RADIX; /* 128 */ 3883 kvm->arch.tlb_sets = POWER9_TLB_SETS_RADIX; /* 128 */
3808 else if (cpu_has_feature(CPU_FTR_ARCH_300)) 3884 else if (cpu_has_feature(CPU_FTR_ARCH_300))
3809 kvm->arch.tlb_sets = POWER9_TLB_SETS_HASH; /* 256 */ 3885 kvm->arch.tlb_sets = POWER9_TLB_SETS_HASH; /* 256 */
@@ -3815,10 +3891,12 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm)
3815 /* 3891 /*
3816 * Track that we now have a HV mode VM active. This blocks secondary 3892 * Track that we now have a HV mode VM active. This blocks secondary
3817 * CPU threads from coming online. 3893 * CPU threads from coming online.
3818 * On POWER9, we only need to do this for HPT guests on a radix 3894 * On POWER9, we only need to do this if the "indep_threads_mode"
3819 * host, which is not yet supported. 3895 * module parameter has been set to N.
3820 */ 3896 */
3821 if (!cpu_has_feature(CPU_FTR_ARCH_300)) 3897 if (cpu_has_feature(CPU_FTR_ARCH_300))
3898 kvm->arch.threads_indep = indep_threads_mode;
3899 if (!kvm->arch.threads_indep)
3822 kvm_hv_vm_activated(); 3900 kvm_hv_vm_activated();
3823 3901
3824 /* 3902 /*
@@ -3858,7 +3936,7 @@ static void kvmppc_core_destroy_vm_hv(struct kvm *kvm)
3858{ 3936{
3859 debugfs_remove_recursive(kvm->arch.debugfs_dir); 3937 debugfs_remove_recursive(kvm->arch.debugfs_dir);
3860 3938
3861 if (!cpu_has_feature(CPU_FTR_ARCH_300)) 3939 if (!kvm->arch.threads_indep)
3862 kvm_hv_vm_deactivated(); 3940 kvm_hv_vm_deactivated();
3863 3941
3864 kvmppc_free_vcores(kvm); 3942 kvmppc_free_vcores(kvm);
@@ -4193,6 +4271,7 @@ static int kvmhv_configure_mmu(struct kvm *kvm, struct kvm_ppc_mmuv3_cfg *cfg)
4193{ 4271{
4194 unsigned long lpcr; 4272 unsigned long lpcr;
4195 int radix; 4273 int radix;
4274 int err;
4196 4275
4197 /* If not on a POWER9, reject it */ 4276 /* If not on a POWER9, reject it */
4198 if (!cpu_has_feature(CPU_FTR_ARCH_300)) 4277 if (!cpu_has_feature(CPU_FTR_ARCH_300))
@@ -4202,12 +4281,8 @@ static int kvmhv_configure_mmu(struct kvm *kvm, struct kvm_ppc_mmuv3_cfg *cfg)
4202 if (cfg->flags & ~(KVM_PPC_MMUV3_RADIX | KVM_PPC_MMUV3_GTSE)) 4281 if (cfg->flags & ~(KVM_PPC_MMUV3_RADIX | KVM_PPC_MMUV3_GTSE))
4203 return -EINVAL; 4282 return -EINVAL;
4204 4283
4205 /* We can't change a guest to/from radix yet */
4206 radix = !!(cfg->flags & KVM_PPC_MMUV3_RADIX);
4207 if (radix != kvm_is_radix(kvm))
4208 return -EINVAL;
4209
4210 /* GR (guest radix) bit in process_table field must match */ 4284 /* GR (guest radix) bit in process_table field must match */
4285 radix = !!(cfg->flags & KVM_PPC_MMUV3_RADIX);
4211 if (!!(cfg->process_table & PATB_GR) != radix) 4286 if (!!(cfg->process_table & PATB_GR) != radix)
4212 return -EINVAL; 4287 return -EINVAL;
4213 4288
@@ -4215,15 +4290,40 @@ static int kvmhv_configure_mmu(struct kvm *kvm, struct kvm_ppc_mmuv3_cfg *cfg)
4215 if ((cfg->process_table & PRTS_MASK) > 24) 4290 if ((cfg->process_table & PRTS_MASK) > 24)
4216 return -EINVAL; 4291 return -EINVAL;
4217 4292
4293 /* We can change a guest to/from radix now, if the host is radix */
4294 if (radix && !radix_enabled())
4295 return -EINVAL;
4296
4218 mutex_lock(&kvm->lock); 4297 mutex_lock(&kvm->lock);
4298 if (radix != kvm_is_radix(kvm)) {
4299 if (kvm->arch.mmu_ready) {
4300 kvm->arch.mmu_ready = 0;
4301 /* order mmu_ready vs. vcpus_running */
4302 smp_mb();
4303 if (atomic_read(&kvm->arch.vcpus_running)) {
4304 kvm->arch.mmu_ready = 1;
4305 err = -EBUSY;
4306 goto out_unlock;
4307 }
4308 }
4309 if (radix)
4310 err = kvmppc_switch_mmu_to_radix(kvm);
4311 else
4312 err = kvmppc_switch_mmu_to_hpt(kvm);
4313 if (err)
4314 goto out_unlock;
4315 }
4316
4219 kvm->arch.process_table = cfg->process_table; 4317 kvm->arch.process_table = cfg->process_table;
4220 kvmppc_setup_partition_table(kvm); 4318 kvmppc_setup_partition_table(kvm);
4221 4319
4222 lpcr = (cfg->flags & KVM_PPC_MMUV3_GTSE) ? LPCR_GTSE : 0; 4320 lpcr = (cfg->flags & KVM_PPC_MMUV3_GTSE) ? LPCR_GTSE : 0;
4223 kvmppc_update_lpcr(kvm, lpcr, LPCR_GTSE); 4321 kvmppc_update_lpcr(kvm, lpcr, LPCR_GTSE);
4224 mutex_unlock(&kvm->lock); 4322 err = 0;
4225 4323
4226 return 0; 4324 out_unlock:
4325 mutex_unlock(&kvm->lock);
4326 return err;
4227} 4327}
4228 4328
4229static struct kvmppc_ops kvm_ops_hv = { 4329static struct kvmppc_ops kvm_ops_hv = {
@@ -4365,4 +4465,3 @@ module_exit(kvmppc_book3s_exit_hv);
4365MODULE_LICENSE("GPL"); 4465MODULE_LICENSE("GPL");
4366MODULE_ALIAS_MISCDEV(KVM_MINOR); 4466MODULE_ALIAS_MISCDEV(KVM_MINOR);
4367MODULE_ALIAS("devname:kvm"); 4467MODULE_ALIAS("devname:kvm");
4368