x86/tlb: enable tlb flush range support for x86

Not every tlb_flush execution moment is really need to evacuate all TLB entries, like in munmap, just few 'invlpg' is better for whole process performance, since it leaves most of TLB entries for later accessing. This patch also rewrite flush_tlb_range for 2 purposes: 1, split it out to get flush_blt_mm_range function. 2, clean up to reduce line breaking, thanks for Borislav's input. My micro benchmark 'mummap' http://lkml.org/lkml/2012/5/17/59 show that the random memory access on other CPU has 0~50% speed up on a 2P * 4cores * HT NHM EP while do 'munmap'. Thanks Yongjie's testing on this patch: ------------- I used Linux 3.4-RC6 w/ and w/o his patches as Xen dom0 and guest kernel. After running two benchmarks in Xen HVM guest, I found his patches brought about 1%~3% performance gain in 'kernel build' and 'netperf' testing, though the performance gain was not very stable in 'kernel build' testing. Some detailed testing results are below. Testing Environment: Hardware: Romley-EP platform Xen version: latest upstream Linux kernel: 3.4-RC6 Guest vCPU number: 8 NIC: Intel 82599 (10GB bandwidth) In 'kernel build' testing in guest: Command line | performance gain make -j 4 | 3.81% make -j 8 | 0.37% make -j 16 | -0.52% In 'netperf' testing, we tested TCP_STREAM with default socket size 16384 byte as large packet and 64 byte as small packet. I used several clients to add networking pressure, then 'netperf' server automatically generated several threads to response them. I also used large-size packet and small-size packet in the testing. Packet size | Thread number | performance gain 16384 bytes | 4 | 0.02% 16384 bytes | 8 | 2.21% 16384 bytes | 16 | 2.04% 64 bytes | 4 | 1.07% 64 bytes | 8 | 3.31% 64 bytes | 16 | 0.71% Signed-off-by: Alex Shi <alex.shi@intel.com> Link: http://lkml.kernel.org/r/1340845344-27557-8-git-send-email-alex.shi@intel.com Tested-by: Ren, Yongjie <yongjie.ren@intel.com> Signed-off-by: H. Peter Anvin <hpa@zytor.com>
author: Alex Shi <alex.shi@intel.com> 2012-06-27 21:02:22 -0400
committer: H. Peter Anvin <hpa@zytor.com> 2012-06-27 22:29:11 -0400
commit: 611ae8e3f5204f7480b3b405993b3352cfa16662 (patch)
tree: fc8d829c331eafccc0939f2ed10655f605bac8c7 /arch/x86/mm
parent: 597e1c3580b7cfd95bb0f3167e2b297bf8a5a3ae (diff)
1 files changed, 46 insertions, 66 deletions
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 5911f61e300e..481737def84a 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -301,23 +301,10 @@ void flush_tlb_current_task(void)
        preempt_enable();
 }
-void flush_tlb_mm(struct mm_struct *mm)
+/*
-{
+ * It can find out the THP large page, or
-        preempt_disable();
+ * HUGETLB page in tlb_flush when THP disabled
+ */
-        if (current->active_mm == mm) {
-                if (current->mm)
-                        local_flush_tlb();
-                else
-                        leave_mm(smp_processor_id());
-        }
-        if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids)
-                flush_tlb_others(mm_cpumask(mm), mm, 0UL, TLB_FLUSH_ALL);
-        preempt_enable();
-}
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
 static inline unsigned long has_large_page(struct mm_struct *mm,
                                 unsigned long start, unsigned long end)
 {
@@ -339,68 +326,61 @@ static inline unsigned long has_large_page(struct mm_struct *mm,
        }
        return 0;
 }
-#else
-static inline unsigned long has_large_page(struct mm_struct *mm,
-                                 unsigned long start, unsigned long end)
-{
-        return 0;
-}
-#endif
-void flush_tlb_range(struct vm_area_struct *vma,
-                                   unsigned long start, unsigned long end)
-{
-        struct mm_struct *mm;
-        if (vma->vm_flags & VM_HUGETLB || tlb_flushall_shift == -1) {
+void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
-flush_all:
+                                unsigned long end, unsigned long vmflag)
-                flush_tlb_mm(vma->vm_mm);
+{
-                return;
+        unsigned long addr;
-        }
+        unsigned act_entries, tlb_entries = 0;
        preempt_disable();
-        mm = vma->vm_mm;
+        if (current->active_mm != mm)
-        if (current->active_mm == mm) {
+                goto flush_all;
-                if (current->mm) {
-                        unsigned long addr, vmflag = vma->vm_flags;
-                        unsigned act_entries, tlb_entries = 0;
-                        if (vmflag & VM_EXEC)
+        if (!current->mm) {
-                                tlb_entries = tlb_lli_4k[ENTRIES];
+                leave_mm(smp_processor_id());
-                        else
+                goto flush_all;
-                                tlb_entries = tlb_lld_4k[ENTRIES];
+        }
-                        act_entries = tlb_entries > mm->total_vm ?
-                                        mm->total_vm : tlb_entries;
-                        if ((end - start) >> PAGE_SHIFT >
+        if (end == TLB_FLUSH_ALL || tlb_flushall_shift == -1
-                                        act_entries >> tlb_flushall_shift)
+                                        || vmflag == VM_HUGETLB) {
-                                local_flush_tlb();
+                local_flush_tlb();
-                        else {
+                goto flush_all;
-                                if (has_large_page(mm, start, end)) {
+        }
-                                        preempt_enable();
-                                        goto flush_all;
-                                }
-                                for (addr = start; addr < end;
-                                                addr += PAGE_SIZE)
-                                        __flush_tlb_single(addr);
-                                if (cpumask_any_but(mm_cpumask(mm),
+        /* In modern CPU, last level tlb used for both data/ins */
-                                        smp_processor_id()) < nr_cpu_ids)
+        if (vmflag & VM_EXEC)
-                                        flush_tlb_others(mm_cpumask(mm), mm,
+                tlb_entries = tlb_lli_4k[ENTRIES];
-                                                                start, end);
+        else
-                                preempt_enable();
+                tlb_entries = tlb_lld_4k[ENTRIES];
-                                return;
+        /* Assume all of TLB entries was occupied by this task */
-                        }
+        act_entries = mm->total_vm > tlb_entries ? tlb_entries : mm->total_vm;
-                } else {
-                        leave_mm(smp_processor_id());
+        /* tlb_flushall_shift is on balance point, details in commit log */
+        if ((end - start) >> PAGE_SHIFT > act_entries >> tlb_flushall_shift)
+                local_flush_tlb();
+        else {
+                if (has_large_page(mm, start, end)) {
+                        local_flush_tlb();
+                        goto flush_all;
                }
+                /* flush range by one by one 'invlpg' */
+                for (addr = start; addr < end;  addr += PAGE_SIZE)
+                        __flush_tlb_single(addr);
+                if (cpumask_any_but(mm_cpumask(mm),
+                                smp_processor_id()) < nr_cpu_ids)
+                        flush_tlb_others(mm_cpumask(mm), mm, start, end);
+                preempt_enable();
+                return;
        }
+flush_all:
        if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids)
                flush_tlb_others(mm_cpumask(mm), mm, 0UL, TLB_FLUSH_ALL);
        preempt_enable();
 }
 void flush_tlb_page(struct vm_area_struct *vma, unsigned long start)
 {
        struct mm_struct *mm = vma->vm_mm;
author	Alex Shi <alex.shi@intel.com>	2012-06-27 21:02:22 -0400
committer	H. Peter Anvin <hpa@zytor.com>	2012-06-27 22:29:11 -0400
commit	611ae8e3f5204f7480b3b405993b3352cfa16662 (patch)
tree	fc8d829c331eafccc0939f2ed10655f605bac8c7 /arch/x86/mm
parent	597e1c3580b7cfd95bb0f3167e2b297bf8a5a3ae (diff)

diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 5911f61e300e..481737def84a 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c
@@ -301,23 +301,10 @@ void flush_tlb_current_task(void)
301	preempt_enable();	301	preempt_enable();
302	}	302	}
303		303
304	void flush_tlb_mm(struct mm_struct *mm)	304	/*
305	{	305	* It can find out the THP large page, or
306	preempt_disable();	306	* HUGETLB page in tlb_flush when THP disabled
307		307	*/
308	if (current->active_mm == mm) {
309	if (current->mm)
310	local_flush_tlb();
311	else
312	leave_mm(smp_processor_id());
313	}
314	if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids)
315	flush_tlb_others(mm_cpumask(mm), mm, 0UL, TLB_FLUSH_ALL);
316
317	preempt_enable();
318	}
319
320	#ifdef CONFIG_TRANSPARENT_HUGEPAGE
321	static inline unsigned long has_large_page(struct mm_struct *mm,	308	static inline unsigned long has_large_page(struct mm_struct *mm,
322	unsigned long start, unsigned long end)	309	unsigned long start, unsigned long end)
323	{	310	{
@@ -339,68 +326,61 @@ static inline unsigned long has_large_page(struct mm_struct *mm,
339	}	326	}
340	return 0;	327	return 0;
341	}	328	}
342	#else
343	static inline unsigned long has_large_page(struct mm_struct *mm,
344	unsigned long start, unsigned long end)
345	{
346	return 0;
347	}
348	#endif
349	void flush_tlb_range(struct vm_area_struct *vma,
350	unsigned long start, unsigned long end)
351	{
352	struct mm_struct *mm;
353		329
354	if (vma->vm_flags & VM_HUGETLB \|\| tlb_flushall_shift == -1) {	330	void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
355	flush_all:	331	unsigned long end, unsigned long vmflag)
356	flush_tlb_mm(vma->vm_mm);	332	{
357	return;	333	unsigned long addr;
358	}	334	unsigned act_entries, tlb_entries = 0;
359		335
360	preempt_disable();	336	preempt_disable();
361	mm = vma->vm_mm;	337	if (current->active_mm != mm)
362	if (current->active_mm == mm) {	338	goto flush_all;
363	if (current->mm) {
364	unsigned long addr, vmflag = vma->vm_flags;
365	unsigned act_entries, tlb_entries = 0;
366		339
367	if (vmflag & VM_EXEC)	340	if (!current->mm) {
368	tlb_entries = tlb_lli_4k[ENTRIES];	341	leave_mm(smp_processor_id());
369	else	342	goto flush_all;
370	tlb_entries = tlb_lld_4k[ENTRIES];	343	}
371
372	act_entries = tlb_entries > mm->total_vm ?
373	mm->total_vm : tlb_entries;
374		344
375	if ((end - start) >> PAGE_SHIFT >	345	if (end == TLB_FLUSH_ALL \|\| tlb_flushall_shift == -1
376	act_entries >> tlb_flushall_shift)	346	\|\| vmflag == VM_HUGETLB) {
377	local_flush_tlb();	347	local_flush_tlb();
378	else {	348	goto flush_all;
379	if (has_large_page(mm, start, end)) {	349	}
380	preempt_enable();
381	goto flush_all;
382	}
383	for (addr = start; addr < end;
384	addr += PAGE_SIZE)
385	__flush_tlb_single(addr);
386		350
387	if (cpumask_any_but(mm_cpumask(mm),	351	/* In modern CPU, last level tlb used for both data/ins */
388	smp_processor_id()) < nr_cpu_ids)	352	if (vmflag & VM_EXEC)
389	flush_tlb_others(mm_cpumask(mm), mm,	353	tlb_entries = tlb_lli_4k[ENTRIES];
390	start, end);	354	else
391	preempt_enable();	355	tlb_entries = tlb_lld_4k[ENTRIES];
392	return;	356	/* Assume all of TLB entries was occupied by this task */
393	}	357	act_entries = mm->total_vm > tlb_entries ? tlb_entries : mm->total_vm;
394	} else {	358
395	leave_mm(smp_processor_id());	359	/* tlb_flushall_shift is on balance point, details in commit log */
		360	if ((end - start) >> PAGE_SHIFT > act_entries >> tlb_flushall_shift)
		361	local_flush_tlb();
		362	else {
		363	if (has_large_page(mm, start, end)) {
		364	local_flush_tlb();
		365	goto flush_all;
396	}	366	}
		367	/* flush range by one by one 'invlpg' */
		368	for (addr = start; addr < end; addr += PAGE_SIZE)
		369	__flush_tlb_single(addr);
		370
		371	if (cpumask_any_but(mm_cpumask(mm),
		372	smp_processor_id()) < nr_cpu_ids)
		373	flush_tlb_others(mm_cpumask(mm), mm, start, end);
		374	preempt_enable();
		375	return;
397	}	376	}
		377
		378	flush_all:
398	if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids)	379	if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids)
399	flush_tlb_others(mm_cpumask(mm), mm, 0UL, TLB_FLUSH_ALL);	380	flush_tlb_others(mm_cpumask(mm), mm, 0UL, TLB_FLUSH_ALL);
400	preempt_enable();	381	preempt_enable();
401	}	382	}
402		383
403
404	void flush_tlb_page(struct vm_area_struct *vma, unsigned long start)	384	void flush_tlb_page(struct vm_area_struct *vma, unsigned long start)
405	{	385	{
406	struct mm_struct *mm = vma->vm_mm;	386	struct mm_struct *mm = vma->vm_mm;