aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAlex Shi <alex.shi@intel.com>2012-06-27 21:02:17 -0400
committerH. Peter Anvin <hpa@zytor.com>2012-06-27 22:29:07 -0400
commite7b52ffd45a6d834473f43b349e7d86593d763c7 (patch)
tree12a930bdf1c43608e932f422505bf228afaf9880
parente0ba94f14f747c2661c4d21f8c44e5b0b8cd8e48 (diff)
x86/flush_tlb: try flush_tlb_single one by one in flush_tlb_range
x86 has no flush_tlb_range support in instruction level. Currently the flush_tlb_range just implemented by flushing all page table. That is not the best solution for all scenarios. In fact, if we just use 'invlpg' to flush few lines from TLB, we can get the performance gain from later remain TLB lines accessing. But the 'invlpg' instruction costs much of time. Its execution time can compete with cr3 rewriting, and even a bit more on SNB CPU. So, on a 512 4KB TLB entries CPU, the balance points is at: (512 - X) * 100ns(assumed TLB refill cost) = X(TLB flush entries) * 100ns(assumed invlpg cost) Here, X is 256, that is 1/2 of 512 entries. But with the mysterious CPU pre-fetcher and page miss handler Unit, the assumed TLB refill cost is far lower then 100ns in sequential access. And 2 HT siblings in one core makes the memory access more faster if they are accessing the same memory. So, in the patch, I just do the change when the target entries is less than 1/16 of whole active tlb entries. Actually, I have no data support for the percentage '1/16', so any suggestions are welcomed. As to hugetlb, guess due to smaller page table, and smaller active TLB entries, I didn't see benefit via my benchmark, so no optimizing now. My micro benchmark show in ideal scenarios, the performance improves 70 percent in reading. And in worst scenario, the reading/writing performance is similar with unpatched 3.4-rc4 kernel. Here is the reading data on my 2P * 4cores *HT NHM EP machine, with THP 'always': multi thread testing, '-t' paramter is thread number: with patch unpatched 3.4-rc4 ./mprotect -t 1 14ns 24ns ./mprotect -t 2 13ns 22ns ./mprotect -t 4 12ns 19ns ./mprotect -t 8 14ns 16ns ./mprotect -t 16 28ns 26ns ./mprotect -t 32 54ns 51ns ./mprotect -t 128 200ns 199ns Single process with sequencial flushing and memory accessing: with patch unpatched 3.4-rc4 ./mprotect 7ns 11ns ./mprotect -p 4096 -l 8 -n 10240 21ns 21ns [ hpa: http://lkml.kernel.org/r/1B4B44D9196EFF41AE41FDA404FC0A100BFF94@SHSMSX101.ccr.corp.intel.com has additional performance numbers. ] Signed-off-by: Alex Shi <alex.shi@intel.com> Link: http://lkml.kernel.org/r/1340845344-27557-3-git-send-email-alex.shi@intel.com Signed-off-by: H. Peter Anvin <hpa@zytor.com>
-rw-r--r--arch/x86/include/asm/paravirt.h5
-rw-r--r--arch/x86/include/asm/paravirt_types.h3
-rw-r--r--arch/x86/include/asm/tlbflush.h23
-rw-r--r--arch/x86/include/asm/uv/uv.h5
-rw-r--r--arch/x86/mm/tlb.c97
-rw-r--r--arch/x86/platform/uv/tlb_uv.c6
-rw-r--r--arch/x86/xen/mmu.c12
-rw-r--r--include/trace/events/xen.h12
8 files changed, 114 insertions, 49 deletions
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 6cbbabf52707..7e2c2a635737 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -397,9 +397,10 @@ static inline void __flush_tlb_single(unsigned long addr)
397 397
398static inline void flush_tlb_others(const struct cpumask *cpumask, 398static inline void flush_tlb_others(const struct cpumask *cpumask,
399 struct mm_struct *mm, 399 struct mm_struct *mm,
400 unsigned long va) 400 unsigned long start,
401 unsigned long end)
401{ 402{
402 PVOP_VCALL3(pv_mmu_ops.flush_tlb_others, cpumask, mm, va); 403 PVOP_VCALL4(pv_mmu_ops.flush_tlb_others, cpumask, mm, start, end);
403} 404}
404 405
405static inline int paravirt_pgd_alloc(struct mm_struct *mm) 406static inline int paravirt_pgd_alloc(struct mm_struct *mm)
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index 8e8b9a4987ee..600a5fcac9cd 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -250,7 +250,8 @@ struct pv_mmu_ops {
250 void (*flush_tlb_single)(unsigned long addr); 250 void (*flush_tlb_single)(unsigned long addr);
251 void (*flush_tlb_others)(const struct cpumask *cpus, 251 void (*flush_tlb_others)(const struct cpumask *cpus,
252 struct mm_struct *mm, 252 struct mm_struct *mm,
253 unsigned long va); 253 unsigned long start,
254 unsigned long end);
254 255
255 /* Hooks for allocating and freeing a pagetable top-level */ 256 /* Hooks for allocating and freeing a pagetable top-level */
256 int (*pgd_alloc)(struct mm_struct *mm); 257 int (*pgd_alloc)(struct mm_struct *mm);
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index 36a1a2ab87d2..33608d96d68b 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -73,14 +73,10 @@ static inline void __flush_tlb_one(unsigned long addr)
73 * - flush_tlb_page(vma, vmaddr) flushes one page 73 * - flush_tlb_page(vma, vmaddr) flushes one page
74 * - flush_tlb_range(vma, start, end) flushes a range of pages 74 * - flush_tlb_range(vma, start, end) flushes a range of pages
75 * - flush_tlb_kernel_range(start, end) flushes a range of kernel pages 75 * - flush_tlb_kernel_range(start, end) flushes a range of kernel pages
76 * - flush_tlb_others(cpumask, mm, va) flushes TLBs on other cpus 76 * - flush_tlb_others(cpumask, mm, start, end) flushes TLBs on other cpus
77 * 77 *
78 * ..but the i386 has somewhat limited tlb flushing capabilities, 78 * ..but the i386 has somewhat limited tlb flushing capabilities,
79 * and page-granular flushes are available only on i486 and up. 79 * and page-granular flushes are available only on i486 and up.
80 *
81 * x86-64 can only flush individual pages or full VMs. For a range flush
82 * we always do the full VM. Might be worth trying if for a small
83 * range a few INVLPGs in a row are a win.
84 */ 80 */
85 81
86#ifndef CONFIG_SMP 82#ifndef CONFIG_SMP
@@ -111,7 +107,8 @@ static inline void flush_tlb_range(struct vm_area_struct *vma,
111 107
112static inline void native_flush_tlb_others(const struct cpumask *cpumask, 108static inline void native_flush_tlb_others(const struct cpumask *cpumask,
113 struct mm_struct *mm, 109 struct mm_struct *mm,
114 unsigned long va) 110 unsigned long start,
111 unsigned long end)
115{ 112{
116} 113}
117 114
@@ -129,17 +126,14 @@ extern void flush_tlb_all(void);
129extern void flush_tlb_current_task(void); 126extern void flush_tlb_current_task(void);
130extern void flush_tlb_mm(struct mm_struct *); 127extern void flush_tlb_mm(struct mm_struct *);
131extern void flush_tlb_page(struct vm_area_struct *, unsigned long); 128extern void flush_tlb_page(struct vm_area_struct *, unsigned long);
129extern void flush_tlb_range(struct vm_area_struct *vma,
130 unsigned long start, unsigned long end);
132 131
133#define flush_tlb() flush_tlb_current_task() 132#define flush_tlb() flush_tlb_current_task()
134 133
135static inline void flush_tlb_range(struct vm_area_struct *vma,
136 unsigned long start, unsigned long end)
137{
138 flush_tlb_mm(vma->vm_mm);
139}
140
141void native_flush_tlb_others(const struct cpumask *cpumask, 134void native_flush_tlb_others(const struct cpumask *cpumask,
142 struct mm_struct *mm, unsigned long va); 135 struct mm_struct *mm,
136 unsigned long start, unsigned long end);
143 137
144#define TLBSTATE_OK 1 138#define TLBSTATE_OK 1
145#define TLBSTATE_LAZY 2 139#define TLBSTATE_LAZY 2
@@ -159,7 +153,8 @@ static inline void reset_lazy_tlbstate(void)
159#endif /* SMP */ 153#endif /* SMP */
160 154
161#ifndef CONFIG_PARAVIRT 155#ifndef CONFIG_PARAVIRT
162#define flush_tlb_others(mask, mm, va) native_flush_tlb_others(mask, mm, va) 156#define flush_tlb_others(mask, mm, start, end) \
157 native_flush_tlb_others(mask, mm, start, end)
163#endif 158#endif
164 159
165static inline void flush_tlb_kernel_range(unsigned long start, 160static inline void flush_tlb_kernel_range(unsigned long start,
diff --git a/arch/x86/include/asm/uv/uv.h b/arch/x86/include/asm/uv/uv.h
index 3bb9491b7659..b47c2a82ff15 100644
--- a/arch/x86/include/asm/uv/uv.h
+++ b/arch/x86/include/asm/uv/uv.h
@@ -15,7 +15,8 @@ extern void uv_nmi_init(void);
15extern void uv_system_init(void); 15extern void uv_system_init(void);
16extern const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask, 16extern const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
17 struct mm_struct *mm, 17 struct mm_struct *mm,
18 unsigned long va, 18 unsigned long start,
19 unsigned end,
19 unsigned int cpu); 20 unsigned int cpu);
20 21
21#else /* X86_UV */ 22#else /* X86_UV */
@@ -26,7 +27,7 @@ static inline void uv_cpu_init(void) { }
26static inline void uv_system_init(void) { } 27static inline void uv_system_init(void) { }
27static inline const struct cpumask * 28static inline const struct cpumask *
28uv_flush_tlb_others(const struct cpumask *cpumask, struct mm_struct *mm, 29uv_flush_tlb_others(const struct cpumask *cpumask, struct mm_struct *mm,
29 unsigned long va, unsigned int cpu) 30 unsigned long start, unsigned long end, unsigned int cpu)
30{ return cpumask; } 31{ return cpumask; }
31 32
32#endif /* X86_UV */ 33#endif /* X86_UV */
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 5e57e113b72c..3b91c981a27f 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -41,7 +41,8 @@ DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate)
41union smp_flush_state { 41union smp_flush_state {
42 struct { 42 struct {
43 struct mm_struct *flush_mm; 43 struct mm_struct *flush_mm;
44 unsigned long flush_va; 44 unsigned long flush_start;
45 unsigned long flush_end;
45 raw_spinlock_t tlbstate_lock; 46 raw_spinlock_t tlbstate_lock;
46 DECLARE_BITMAP(flush_cpumask, NR_CPUS); 47 DECLARE_BITMAP(flush_cpumask, NR_CPUS);
47 }; 48 };
@@ -156,10 +157,19 @@ void smp_invalidate_interrupt(struct pt_regs *regs)
156 157
157 if (f->flush_mm == this_cpu_read(cpu_tlbstate.active_mm)) { 158 if (f->flush_mm == this_cpu_read(cpu_tlbstate.active_mm)) {
158 if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_OK) { 159 if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_OK) {
159 if (f->flush_va == TLB_FLUSH_ALL) 160 if (f->flush_end == TLB_FLUSH_ALL
161 || !cpu_has_invlpg)
160 local_flush_tlb(); 162 local_flush_tlb();
161 else 163 else if (!f->flush_end)
162 __flush_tlb_one(f->flush_va); 164 __flush_tlb_single(f->flush_start);
165 else {
166 unsigned long addr;
167 addr = f->flush_start;
168 while (addr < f->flush_end) {
169 __flush_tlb_single(addr);
170 addr += PAGE_SIZE;
171 }
172 }
163 } else 173 } else
164 leave_mm(cpu); 174 leave_mm(cpu);
165 } 175 }
@@ -172,7 +182,8 @@ out:
172} 182}
173 183
174static void flush_tlb_others_ipi(const struct cpumask *cpumask, 184static void flush_tlb_others_ipi(const struct cpumask *cpumask,
175 struct mm_struct *mm, unsigned long va) 185 struct mm_struct *mm, unsigned long start,
186 unsigned long end)
176{ 187{
177 unsigned int sender; 188 unsigned int sender;
178 union smp_flush_state *f; 189 union smp_flush_state *f;
@@ -185,7 +196,8 @@ static void flush_tlb_others_ipi(const struct cpumask *cpumask,
185 raw_spin_lock(&f->tlbstate_lock); 196 raw_spin_lock(&f->tlbstate_lock);
186 197
187 f->flush_mm = mm; 198 f->flush_mm = mm;
188 f->flush_va = va; 199 f->flush_start = start;
200 f->flush_end = end;
189 if (cpumask_andnot(to_cpumask(f->flush_cpumask), cpumask, cpumask_of(smp_processor_id()))) { 201 if (cpumask_andnot(to_cpumask(f->flush_cpumask), cpumask, cpumask_of(smp_processor_id()))) {
190 /* 202 /*
191 * We have to send the IPI only to 203 * We have to send the IPI only to
@@ -199,24 +211,26 @@ static void flush_tlb_others_ipi(const struct cpumask *cpumask,
199 } 211 }
200 212
201 f->flush_mm = NULL; 213 f->flush_mm = NULL;
202 f->flush_va = 0; 214 f->flush_start = 0;
215 f->flush_end = 0;
203 if (nr_cpu_ids > NUM_INVALIDATE_TLB_VECTORS) 216 if (nr_cpu_ids > NUM_INVALIDATE_TLB_VECTORS)
204 raw_spin_unlock(&f->tlbstate_lock); 217 raw_spin_unlock(&f->tlbstate_lock);
205} 218}
206 219
207void native_flush_tlb_others(const struct cpumask *cpumask, 220void native_flush_tlb_others(const struct cpumask *cpumask,
208 struct mm_struct *mm, unsigned long va) 221 struct mm_struct *mm, unsigned long start,
222 unsigned long end)
209{ 223{
210 if (is_uv_system()) { 224 if (is_uv_system()) {
211 unsigned int cpu; 225 unsigned int cpu;
212 226
213 cpu = smp_processor_id(); 227 cpu = smp_processor_id();
214 cpumask = uv_flush_tlb_others(cpumask, mm, va, cpu); 228 cpumask = uv_flush_tlb_others(cpumask, mm, start, end, cpu);
215 if (cpumask) 229 if (cpumask)
216 flush_tlb_others_ipi(cpumask, mm, va); 230 flush_tlb_others_ipi(cpumask, mm, start, end);
217 return; 231 return;
218 } 232 }
219 flush_tlb_others_ipi(cpumask, mm, va); 233 flush_tlb_others_ipi(cpumask, mm, start, end);
220} 234}
221 235
222static void __cpuinit calculate_tlb_offset(void) 236static void __cpuinit calculate_tlb_offset(void)
@@ -282,7 +296,7 @@ void flush_tlb_current_task(void)
282 296
283 local_flush_tlb(); 297 local_flush_tlb();
284 if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids) 298 if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids)
285 flush_tlb_others(mm_cpumask(mm), mm, TLB_FLUSH_ALL); 299 flush_tlb_others(mm_cpumask(mm), mm, 0UL, TLB_FLUSH_ALL);
286 preempt_enable(); 300 preempt_enable();
287} 301}
288 302
@@ -297,12 +311,63 @@ void flush_tlb_mm(struct mm_struct *mm)
297 leave_mm(smp_processor_id()); 311 leave_mm(smp_processor_id());
298 } 312 }
299 if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids) 313 if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids)
300 flush_tlb_others(mm_cpumask(mm), mm, TLB_FLUSH_ALL); 314 flush_tlb_others(mm_cpumask(mm), mm, 0UL, TLB_FLUSH_ALL);
315
316 preempt_enable();
317}
318
319#define FLUSHALL_BAR 16
320
321void flush_tlb_range(struct vm_area_struct *vma,
322 unsigned long start, unsigned long end)
323{
324 struct mm_struct *mm;
325
326 if (!cpu_has_invlpg || vma->vm_flags & VM_HUGETLB) {
327 flush_tlb_mm(vma->vm_mm);
328 return;
329 }
330
331 preempt_disable();
332 mm = vma->vm_mm;
333 if (current->active_mm == mm) {
334 if (current->mm) {
335 unsigned long addr, vmflag = vma->vm_flags;
336 unsigned act_entries, tlb_entries = 0;
337
338 if (vmflag & VM_EXEC)
339 tlb_entries = tlb_lli_4k[ENTRIES];
340 else
341 tlb_entries = tlb_lld_4k[ENTRIES];
342
343 act_entries = tlb_entries > mm->total_vm ?
344 mm->total_vm : tlb_entries;
301 345
346 if ((end - start)/PAGE_SIZE > act_entries/FLUSHALL_BAR)
347 local_flush_tlb();
348 else {
349 for (addr = start; addr < end;
350 addr += PAGE_SIZE)
351 __flush_tlb_single(addr);
352
353 if (cpumask_any_but(mm_cpumask(mm),
354 smp_processor_id()) < nr_cpu_ids)
355 flush_tlb_others(mm_cpumask(mm), mm,
356 start, end);
357 preempt_enable();
358 return;
359 }
360 } else {
361 leave_mm(smp_processor_id());
362 }
363 }
364 if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids)
365 flush_tlb_others(mm_cpumask(mm), mm, 0UL, TLB_FLUSH_ALL);
302 preempt_enable(); 366 preempt_enable();
303} 367}
304 368
305void flush_tlb_page(struct vm_area_struct *vma, unsigned long va) 369
370void flush_tlb_page(struct vm_area_struct *vma, unsigned long start)
306{ 371{
307 struct mm_struct *mm = vma->vm_mm; 372 struct mm_struct *mm = vma->vm_mm;
308 373
@@ -310,13 +375,13 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long va)
310 375
311 if (current->active_mm == mm) { 376 if (current->active_mm == mm) {
312 if (current->mm) 377 if (current->mm)
313 __flush_tlb_one(va); 378 __flush_tlb_one(start);
314 else 379 else
315 leave_mm(smp_processor_id()); 380 leave_mm(smp_processor_id());
316 } 381 }
317 382
318 if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids) 383 if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids)
319 flush_tlb_others(mm_cpumask(mm), mm, va); 384 flush_tlb_others(mm_cpumask(mm), mm, start, 0UL);
320 385
321 preempt_enable(); 386 preempt_enable();
322} 387}
diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c
index 59880afa851f..f1bef8e1d633 100644
--- a/arch/x86/platform/uv/tlb_uv.c
+++ b/arch/x86/platform/uv/tlb_uv.c
@@ -1068,8 +1068,8 @@ static int set_distrib_bits(struct cpumask *flush_mask, struct bau_control *bcp,
1068 * done. The returned pointer is valid till preemption is re-enabled. 1068 * done. The returned pointer is valid till preemption is re-enabled.
1069 */ 1069 */
1070const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask, 1070const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
1071 struct mm_struct *mm, unsigned long va, 1071 struct mm_struct *mm, unsigned long start,
1072 unsigned int cpu) 1072 unsigned end, unsigned int cpu)
1073{ 1073{
1074 int locals = 0; 1074 int locals = 0;
1075 int remotes = 0; 1075 int remotes = 0;
@@ -1112,7 +1112,7 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
1112 1112
1113 record_send_statistics(stat, locals, hubs, remotes, bau_desc); 1113 record_send_statistics(stat, locals, hubs, remotes, bau_desc);
1114 1114
1115 bau_desc->payload.address = va; 1115 bau_desc->payload.address = start;
1116 bau_desc->payload.sending_cpu = cpu; 1116 bau_desc->payload.sending_cpu = cpu;
1117 /* 1117 /*
1118 * uv_flush_send_and_wait returns 0 if all cpu's were messaged, 1118 * uv_flush_send_and_wait returns 0 if all cpu's were messaged,
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 3a73785631ce..39ed56789f68 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1244,7 +1244,8 @@ static void xen_flush_tlb_single(unsigned long addr)
1244} 1244}
1245 1245
1246static void xen_flush_tlb_others(const struct cpumask *cpus, 1246static void xen_flush_tlb_others(const struct cpumask *cpus,
1247 struct mm_struct *mm, unsigned long va) 1247 struct mm_struct *mm, unsigned long start,
1248 unsigned long end)
1248{ 1249{
1249 struct { 1250 struct {
1250 struct mmuext_op op; 1251 struct mmuext_op op;
@@ -1256,7 +1257,7 @@ static void xen_flush_tlb_others(const struct cpumask *cpus,
1256 } *args; 1257 } *args;
1257 struct multicall_space mcs; 1258 struct multicall_space mcs;
1258 1259
1259 trace_xen_mmu_flush_tlb_others(cpus, mm, va); 1260 trace_xen_mmu_flush_tlb_others(cpus, mm, start, end);
1260 1261
1261 if (cpumask_empty(cpus)) 1262 if (cpumask_empty(cpus))
1262 return; /* nothing to do */ 1263 return; /* nothing to do */
@@ -1269,11 +1270,10 @@ static void xen_flush_tlb_others(const struct cpumask *cpus,
1269 cpumask_and(to_cpumask(args->mask), cpus, cpu_online_mask); 1270 cpumask_and(to_cpumask(args->mask), cpus, cpu_online_mask);
1270 cpumask_clear_cpu(smp_processor_id(), to_cpumask(args->mask)); 1271 cpumask_clear_cpu(smp_processor_id(), to_cpumask(args->mask));
1271 1272
1272 if (va == TLB_FLUSH_ALL) { 1273 args->op.cmd = MMUEXT_TLB_FLUSH_MULTI;
1273 args->op.cmd = MMUEXT_TLB_FLUSH_MULTI; 1274 if (start != TLB_FLUSH_ALL && (end - start) <= PAGE_SIZE) {
1274 } else {
1275 args->op.cmd = MMUEXT_INVLPG_MULTI; 1275 args->op.cmd = MMUEXT_INVLPG_MULTI;
1276 args->op.arg1.linear_addr = va; 1276 args->op.arg1.linear_addr = start;
1277 } 1277 }
1278 1278
1279 MULTI_mmuext_op(mcs.mc, &args->op, 1, NULL, DOMID_SELF); 1279 MULTI_mmuext_op(mcs.mc, &args->op, 1, NULL, DOMID_SELF);
diff --git a/include/trace/events/xen.h b/include/trace/events/xen.h
index 92f1a796829e..15ba03bdd7c6 100644
--- a/include/trace/events/xen.h
+++ b/include/trace/events/xen.h
@@ -397,18 +397,20 @@ TRACE_EVENT(xen_mmu_flush_tlb_single,
397 397
398TRACE_EVENT(xen_mmu_flush_tlb_others, 398TRACE_EVENT(xen_mmu_flush_tlb_others,
399 TP_PROTO(const struct cpumask *cpus, struct mm_struct *mm, 399 TP_PROTO(const struct cpumask *cpus, struct mm_struct *mm,
400 unsigned long addr), 400 unsigned long addr, unsigned long end),
401 TP_ARGS(cpus, mm, addr), 401 TP_ARGS(cpus, mm, addr, end),
402 TP_STRUCT__entry( 402 TP_STRUCT__entry(
403 __field(unsigned, ncpus) 403 __field(unsigned, ncpus)
404 __field(struct mm_struct *, mm) 404 __field(struct mm_struct *, mm)
405 __field(unsigned long, addr) 405 __field(unsigned long, addr)
406 __field(unsigned long, end)
406 ), 407 ),
407 TP_fast_assign(__entry->ncpus = cpumask_weight(cpus); 408 TP_fast_assign(__entry->ncpus = cpumask_weight(cpus);
408 __entry->mm = mm; 409 __entry->mm = mm;
409 __entry->addr = addr), 410 __entry->addr = addr,
410 TP_printk("ncpus %d mm %p addr %lx", 411 __entry->end = end),
411 __entry->ncpus, __entry->mm, __entry->addr) 412 TP_printk("ncpus %d mm %p addr %lx, end %lx",
413 __entry->ncpus, __entry->mm, __entry->addr, __entry->end)
412 ); 414 );
413 415
414TRACE_EVENT(xen_mmu_write_cr3, 416TRACE_EVENT(xen_mmu_write_cr3,