x86/hyper-v: Support extended CPU ranges for TLB flush hypercalls

Hyper-V hosts may support more than 64 vCPUs, we need to use HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX/LIST_EX hypercalls in this case. Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com> Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com> Reviewed-by: Stephen Hemminger <sthemmin@microsoft.com> Cc: Andy Lutomirski <luto@kernel.org> Cc: Haiyang Zhang <haiyangz@microsoft.com> Cc: Jork Loeser <Jork.Loeser@microsoft.com> Cc: K. Y. Srinivasan <kys@microsoft.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Simon Xiao <sixiao@microsoft.com> Cc: Steven Rostedt <rostedt@goodmis.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: devel@linuxdriverproject.org Link: http://lkml.kernel.org/r/20170802160921.21791-9-vkuznets@redhat.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
author: Vitaly Kuznetsov <vkuznets@redhat.com> 2017-08-02 12:09:20 -0400
committer: Ingo Molnar <mingo@kernel.org> 2017-08-31 08:20:36 -0400
commit: 628f54cc6451d2706ba8a56763dbf93be02aaa80 (patch)
tree: b148e860cbabb844da9e6b9474aae2b47aafb455
parent: 3e83dfd5d8e374328078f527f1f7d189824896ab (diff)
2 files changed, 140 insertions, 3 deletions
diff --git a/arch/x86/hyperv/mmu.c b/arch/x86/hyperv/mmu.c
index 9419a20b1d75..51b44be03f50 100644
--- a/arch/x86/hyperv/mmu.c
+++ b/arch/x86/hyperv/mmu.c
@@ -18,11 +18,25 @@ struct hv_flush_pcpu {
        u64 gva_list[];
 };
+/* HvFlushVirtualAddressSpaceEx, HvFlushVirtualAddressListEx hypercalls */
+struct hv_flush_pcpu_ex {
+        u64 address_space;
+        u64 flags;
+        struct {
+                u64 format;
+                u64 valid_bank_mask;
+                u64 bank_contents[];
+        } hv_vp_set;
+        u64 gva_list[];
+};
 /* Each gva in gva_list encodes up to 4096 pages to flush */
 #define HV_TLB_FLUSH_UNIT (4096 * PAGE_SIZE)
 static struct hv_flush_pcpu __percpu *pcpu_flush;
+static struct hv_flush_pcpu_ex __percpu *pcpu_flush_ex;
 /*
 * Fills in gva_list starting from offset. Returns the number of items added.
 */
@@ -53,6 +67,34 @@ static inline int fill_gva_list(u64 gva_list[], int offset,
        return gva_n - offset;
 }
+/* Return the number of banks in the resulting vp_set */
+static inline int cpumask_to_vp_set(struct hv_flush_pcpu_ex *flush,
+                                    const struct cpumask *cpus)
+{
+        int cpu, vcpu, vcpu_bank, vcpu_offset, nr_bank = 1;
+        /*
+         * Some banks may end up being empty but this is acceptable.
+         */
+        for_each_cpu(cpu, cpus) {
+                vcpu = hv_cpu_number_to_vp_number(cpu);
+                vcpu_bank = vcpu / 64;
+                vcpu_offset = vcpu % 64;
+                /* valid_bank_mask can represent up to 64 banks */
+                if (vcpu_bank >= 64)
+                        return 0;
+                __set_bit(vcpu_offset, (unsigned long *)
+                          &flush->hv_vp_set.bank_contents[vcpu_bank]);
+                if (vcpu_bank >= nr_bank)
+                        nr_bank = vcpu_bank + 1;
+        }
+        flush->hv_vp_set.valid_bank_mask = GENMASK_ULL(nr_bank - 1, 0);
+        return nr_bank;
+}
 static void hyperv_flush_tlb_others(const struct cpumask *cpus,
                                    const struct flush_tlb_info *info)
 {
@@ -122,17 +164,102 @@ do_native:
        native_flush_tlb_others(cpus, info);
 }
+static void hyperv_flush_tlb_others_ex(const struct cpumask *cpus,
+                                       const struct flush_tlb_info *info)
+{
+        int nr_bank = 0, max_gvas, gva_n;
+        struct hv_flush_pcpu_ex *flush;
+        u64 status = U64_MAX;
+        unsigned long flags;
+        if (!pcpu_flush_ex || !hv_hypercall_pg)
+                goto do_native;
+        if (cpumask_empty(cpus))
+                return;
+        local_irq_save(flags);
+        flush = this_cpu_ptr(pcpu_flush_ex);
+        if (info->mm) {
+                flush->address_space = virt_to_phys(info->mm->pgd);
+                flush->flags = 0;
+        } else {
+                flush->address_space = 0;
+                flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
+        }
+        flush->hv_vp_set.valid_bank_mask = 0;
+        if (!cpumask_equal(cpus, cpu_present_mask)) {
+                flush->hv_vp_set.format = HV_GENERIC_SET_SPARCE_4K;
+                nr_bank = cpumask_to_vp_set(flush, cpus);
+        }
+        if (!nr_bank) {
+                flush->hv_vp_set.format = HV_GENERIC_SET_ALL;
+                flush->flags |= HV_FLUSH_ALL_PROCESSORS;
+        }
+        /*
+         * We can flush not more than max_gvas with one hypercall. Flush the
+         * whole address space if we were asked to do more.
+         */
+        max_gvas =
+                (PAGE_SIZE - sizeof(*flush) - nr_bank *
+                 sizeof(flush->hv_vp_set.bank_contents[0])) /
+                sizeof(flush->gva_list[0]);
+        if (info->end == TLB_FLUSH_ALL) {
+                flush->flags |= HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY;
+                status = hv_do_rep_hypercall(
+                        HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX,
+                        0, nr_bank + 2, flush, NULL);
+        } else if (info->end &&
+                   ((info->end - info->start)/HV_TLB_FLUSH_UNIT) > max_gvas) {
+                status = hv_do_rep_hypercall(
+                        HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX,
+                        0, nr_bank + 2, flush, NULL);
+        } else {
+                gva_n = fill_gva_list(flush->gva_list, nr_bank,
+                                      info->start, info->end);
+                status = hv_do_rep_hypercall(
+                        HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX,
+                        gva_n, nr_bank + 2, flush, NULL);
+        }
+        local_irq_restore(flags);
+        if (!(status & HV_HYPERCALL_RESULT_MASK))
+                return;
+do_native:
+        native_flush_tlb_others(cpus, info);
+}
 void hyperv_setup_mmu_ops(void)
 {
-        if (ms_hyperv.hints & HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED) {
+        if (!(ms_hyperv.hints & HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED))
+                return;
+        setup_clear_cpu_cap(X86_FEATURE_PCID);
+        if (!(ms_hyperv.hints & HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED)) {
                pr_info("Using hypercall for remote TLB flush\n");
                pv_mmu_ops.flush_tlb_others = hyperv_flush_tlb_others;
-                setup_clear_cpu_cap(X86_FEATURE_PCID);
+        } else {
+                pr_info("Using ext hypercall for remote TLB flush\n");
+                pv_mmu_ops.flush_tlb_others = hyperv_flush_tlb_others_ex;
        }
 }
 void hyper_alloc_mmu(void)
 {
-        if (ms_hyperv.hints & HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED)
+        if (!(ms_hyperv.hints & HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED))
+                return;
+        if (!(ms_hyperv.hints & HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED))
                pcpu_flush = __alloc_percpu(PAGE_SIZE, PAGE_SIZE);
+        else
+                pcpu_flush_ex = __alloc_percpu(PAGE_SIZE, PAGE_SIZE);
 }
diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h
index a6fdd3b82b4a..7032f4d8dff3 100644
--- a/arch/x86/include/uapi/asm/hyperv.h
+++ b/arch/x86/include/uapi/asm/hyperv.h
@@ -149,6 +149,9 @@
 */
 #define HV_X64_DEPRECATING_AEOI_RECOMMENDED     (1 << 9)
+/* Recommend using the newer ExProcessorMasks interface */
+#define HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED   (1 << 11)
 /*
 * HV_VP_SET available
 */
@@ -245,6 +248,8 @@
 #define HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE      0x0002
 #define HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST       0x0003
 #define HVCALL_NOTIFY_LONG_SPIN_WAIT            0x0008
+#define HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX  0x0013
+#define HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX   0x0014
 #define HVCALL_POST_MESSAGE                     0x005c
 #define HVCALL_SIGNAL_EVENT                     0x005d
@@ -266,6 +271,11 @@
 #define HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY       BIT(2)
 #define HV_FLUSH_USE_EXTENDED_RANGE_FORMAT      BIT(3)
+enum HV_GENERIC_SET_FORMAT {
+        HV_GENERIC_SET_SPARCE_4K,
+        HV_GENERIC_SET_ALL,
+};
 /* hypercall status code */
 #define HV_STATUS_SUCCESS                       0
 #define HV_STATUS_INVALID_HYPERCALL_CODE        2
author	Vitaly Kuznetsov <vkuznets@redhat.com>	2017-08-02 12:09:20 -0400
committer	Ingo Molnar <mingo@kernel.org>	2017-08-31 08:20:36 -0400
commit	628f54cc6451d2706ba8a56763dbf93be02aaa80 (patch)
tree	b148e860cbabb844da9e6b9474aae2b47aafb455
parent	3e83dfd5d8e374328078f527f1f7d189824896ab (diff)

diff --git a/arch/x86/hyperv/mmu.c b/arch/x86/hyperv/mmu.c index 9419a20b1d75..51b44be03f50 100644 --- a/arch/x86/hyperv/mmu.c +++ b/arch/x86/hyperv/mmu.c
@@ -18,11 +18,25 @@ struct hv_flush_pcpu {
18	u64 gva_list[];	18	u64 gva_list[];
19	};	19	};
20		20
		21	/* HvFlushVirtualAddressSpaceEx, HvFlushVirtualAddressListEx hypercalls */
		22	struct hv_flush_pcpu_ex {
		23	u64 address_space;
		24	u64 flags;
		25	struct {
		26	u64 format;
		27	u64 valid_bank_mask;
		28	u64 bank_contents[];
		29	} hv_vp_set;
		30	u64 gva_list[];
		31	};
		32
21	/* Each gva in gva_list encodes up to 4096 pages to flush */	33	/* Each gva in gva_list encodes up to 4096 pages to flush */
22	#define HV_TLB_FLUSH_UNIT (4096 * PAGE_SIZE)	34	#define HV_TLB_FLUSH_UNIT (4096 * PAGE_SIZE)
23		35
24	static struct hv_flush_pcpu __percpu *pcpu_flush;	36	static struct hv_flush_pcpu __percpu *pcpu_flush;
25		37
		38	static struct hv_flush_pcpu_ex __percpu *pcpu_flush_ex;
		39
26	/*	40	/*
27	* Fills in gva_list starting from offset. Returns the number of items added.	41	* Fills in gva_list starting from offset. Returns the number of items added.
28	*/	42	*/
@@ -53,6 +67,34 @@ static inline int fill_gva_list(u64 gva_list[], int offset,
53	return gva_n - offset;	67	return gva_n - offset;
54	}	68	}
55		69
		70	/* Return the number of banks in the resulting vp_set */
		71	static inline int cpumask_to_vp_set(struct hv_flush_pcpu_ex *flush,
		72	const struct cpumask *cpus)
		73	{
		74	int cpu, vcpu, vcpu_bank, vcpu_offset, nr_bank = 1;
		75
		76	/*
		77	* Some banks may end up being empty but this is acceptable.
		78	*/
		79	for_each_cpu(cpu, cpus) {
		80	vcpu = hv_cpu_number_to_vp_number(cpu);
		81	vcpu_bank = vcpu / 64;
		82	vcpu_offset = vcpu % 64;
		83
		84	/* valid_bank_mask can represent up to 64 banks */
		85	if (vcpu_bank >= 64)
		86	return 0;
		87
		88	__set_bit(vcpu_offset, (unsigned long *)
		89	&flush->hv_vp_set.bank_contents[vcpu_bank]);
		90	if (vcpu_bank >= nr_bank)
		91	nr_bank = vcpu_bank + 1;
		92	}
		93	flush->hv_vp_set.valid_bank_mask = GENMASK_ULL(nr_bank - 1, 0);
		94
		95	return nr_bank;
		96	}
		97
56	static void hyperv_flush_tlb_others(const struct cpumask *cpus,	98	static void hyperv_flush_tlb_others(const struct cpumask *cpus,
57	const struct flush_tlb_info *info)	99	const struct flush_tlb_info *info)
58	{	100	{
@@ -122,17 +164,102 @@ do_native:
122	native_flush_tlb_others(cpus, info);	164	native_flush_tlb_others(cpus, info);
123	}	165	}
124		166
		167	static void hyperv_flush_tlb_others_ex(const struct cpumask *cpus,
		168	const struct flush_tlb_info *info)
		169	{
		170	int nr_bank = 0, max_gvas, gva_n;
		171	struct hv_flush_pcpu_ex *flush;
		172	u64 status = U64_MAX;
		173	unsigned long flags;
		174
		175	if (!pcpu_flush_ex \|\| !hv_hypercall_pg)
		176	goto do_native;
		177
		178	if (cpumask_empty(cpus))
		179	return;
		180
		181	local_irq_save(flags);
		182
		183	flush = this_cpu_ptr(pcpu_flush_ex);
		184
		185	if (info->mm) {
		186	flush->address_space = virt_to_phys(info->mm->pgd);
		187	flush->flags = 0;
		188	} else {
		189	flush->address_space = 0;
		190	flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
		191	}
		192
		193	flush->hv_vp_set.valid_bank_mask = 0;
		194
		195	if (!cpumask_equal(cpus, cpu_present_mask)) {
		196	flush->hv_vp_set.format = HV_GENERIC_SET_SPARCE_4K;
		197	nr_bank = cpumask_to_vp_set(flush, cpus);
		198	}
		199
		200	if (!nr_bank) {
		201	flush->hv_vp_set.format = HV_GENERIC_SET_ALL;
		202	flush->flags \|= HV_FLUSH_ALL_PROCESSORS;
		203	}
		204
		205	/*
		206	* We can flush not more than max_gvas with one hypercall. Flush the
		207	* whole address space if we were asked to do more.
		208	*/
		209	max_gvas =
		210	(PAGE_SIZE - sizeof(flush) - nr_bank
		211	sizeof(flush->hv_vp_set.bank_contents[0])) /
		212	sizeof(flush->gva_list[0]);
		213
		214	if (info->end == TLB_FLUSH_ALL) {
		215	flush->flags \|= HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY;
		216	status = hv_do_rep_hypercall(
		217	HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX,
		218	0, nr_bank + 2, flush, NULL);
		219	} else if (info->end &&
		220	((info->end - info->start)/HV_TLB_FLUSH_UNIT) > max_gvas) {
		221	status = hv_do_rep_hypercall(
		222	HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX,
		223	0, nr_bank + 2, flush, NULL);
		224	} else {
		225	gva_n = fill_gva_list(flush->gva_list, nr_bank,
		226	info->start, info->end);
		227	status = hv_do_rep_hypercall(
		228	HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX,
		229	gva_n, nr_bank + 2, flush, NULL);
		230	}
		231
		232	local_irq_restore(flags);
		233
		234	if (!(status & HV_HYPERCALL_RESULT_MASK))
		235	return;
		236	do_native:
		237	native_flush_tlb_others(cpus, info);
		238	}
		239
125	void hyperv_setup_mmu_ops(void)	240	void hyperv_setup_mmu_ops(void)
126	{	241	{
127	if (ms_hyperv.hints & HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED) {	242	if (!(ms_hyperv.hints & HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED))
		243	return;
		244
		245	setup_clear_cpu_cap(X86_FEATURE_PCID);
		246
		247	if (!(ms_hyperv.hints & HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED)) {
128	pr_info("Using hypercall for remote TLB flush\n");	248	pr_info("Using hypercall for remote TLB flush\n");
129	pv_mmu_ops.flush_tlb_others = hyperv_flush_tlb_others;	249	pv_mmu_ops.flush_tlb_others = hyperv_flush_tlb_others;
130	setup_clear_cpu_cap(X86_FEATURE_PCID);	250	} else {
		251	pr_info("Using ext hypercall for remote TLB flush\n");
		252	pv_mmu_ops.flush_tlb_others = hyperv_flush_tlb_others_ex;
131	}	253	}
132	}	254	}
133		255
134	void hyper_alloc_mmu(void)	256	void hyper_alloc_mmu(void)
135	{	257	{
136	if (ms_hyperv.hints & HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED)	258	if (!(ms_hyperv.hints & HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED))
		259	return;
		260
		261	if (!(ms_hyperv.hints & HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED))
137	pcpu_flush = __alloc_percpu(PAGE_SIZE, PAGE_SIZE);	262	pcpu_flush = __alloc_percpu(PAGE_SIZE, PAGE_SIZE);
		263	else
		264	pcpu_flush_ex = __alloc_percpu(PAGE_SIZE, PAGE_SIZE);
138	}	265	}


diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h index a6fdd3b82b4a..7032f4d8dff3 100644 --- a/arch/x86/include/uapi/asm/hyperv.h +++ b/arch/x86/include/uapi/asm/hyperv.h
@@ -149,6 +149,9 @@
149	*/	149	*/
150	#define HV_X64_DEPRECATING_AEOI_RECOMMENDED (1 << 9)	150	#define HV_X64_DEPRECATING_AEOI_RECOMMENDED (1 << 9)
151		151
		152	/* Recommend using the newer ExProcessorMasks interface */
		153	#define HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED (1 << 11)
		154
152	/*	155	/*
153	* HV_VP_SET available	156	* HV_VP_SET available
154	*/	157	*/
@@ -245,6 +248,8 @@
245	#define HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE 0x0002	248	#define HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE 0x0002
246	#define HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST 0x0003	249	#define HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST 0x0003
247	#define HVCALL_NOTIFY_LONG_SPIN_WAIT 0x0008	250	#define HVCALL_NOTIFY_LONG_SPIN_WAIT 0x0008
		251	#define HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX 0x0013
		252	#define HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX 0x0014
248	#define HVCALL_POST_MESSAGE 0x005c	253	#define HVCALL_POST_MESSAGE 0x005c
249	#define HVCALL_SIGNAL_EVENT 0x005d	254	#define HVCALL_SIGNAL_EVENT 0x005d
250		255
@@ -266,6 +271,11 @@
266	#define HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY BIT(2)	271	#define HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY BIT(2)
267	#define HV_FLUSH_USE_EXTENDED_RANGE_FORMAT BIT(3)	272	#define HV_FLUSH_USE_EXTENDED_RANGE_FORMAT BIT(3)
268		273
		274	enum HV_GENERIC_SET_FORMAT {
		275	HV_GENERIC_SET_SPARCE_4K,
		276	HV_GENERIC_SET_ALL,
		277	};
		278
269	/* hypercall status code */	279	/* hypercall status code */
270	#define HV_STATUS_SUCCESS 0	280	#define HV_STATUS_SUCCESS 0
271	#define HV_STATUS_INVALID_HYPERCALL_CODE 2	281	#define HV_STATUS_INVALID_HYPERCALL_CODE 2