4 files changed, 92 insertions, 76 deletions
diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c
index 3af6de36a48..5add0bcf87a 100644
--- a/arch/ia64/kernel/setup.c
+++ b/arch/ia64/kernel/setup.c
@@ -461,6 +461,7 @@ setup_arch (char **cmdline_p)
 #endif
        cpu_init();     /* initialize the bootstrap CPU */
+        mmu_context_init();     /* initialize context_id bitmap */
 #ifdef CONFIG_ACPI
        acpi_boot_init();
diff --git a/arch/ia64/mm/tlb.c b/arch/ia64/mm/tlb.c
index c79a9b96d02..41105d45442 100644
--- a/arch/ia64/mm/tlb.c
+++ b/arch/ia64/mm/tlb.c
@@ -8,6 +8,8 @@
 *              Modified RID allocation for SMP
 *          Goutham Rao <goutham.rao@intel.com>
 *              IPI based ptc implementation and A-step IPI implementation.
+ * Rohit Seth <rohit.seth@intel.com>
+ * Ken Chen <kenneth.w.chen@intel.com>
 */
 #include <linux/config.h>
 #include <linux/module.h>
@@ -16,78 +18,75 @@
 #include <linux/sched.h>
 #include <linux/smp.h>
 #include <linux/mm.h>
+#include <linux/bootmem.h>
 #include <asm/delay.h>
 #include <asm/mmu_context.h>
 #include <asm/pgalloc.h>
 #include <asm/pal.h>
 #include <asm/tlbflush.h>
+#include <asm/dma.h>
 static struct {
        unsigned long mask;     /* mask of supported purge page-sizes */
-        unsigned long max_bits; /* log2() of largest supported purge page-size */
+        unsigned long max_bits; /* log2 of largest supported purge page-size */
 } purge;
 struct ia64_ctx ia64_ctx = {
        .lock =         SPIN_LOCK_UNLOCKED,
        .next =         1,
-        .limit =        (1 << 15) - 1,          /* start out with the safe (architected) limit */
        .max_ctx =      ~0U
 };
 DEFINE_PER_CPU(u8, ia64_need_tlb_flush);
 /*
+ * Initializes the ia64_ctx.bitmap array based on max_ctx+1.
+ * Called after cpu_init() has setup ia64_ctx.max_ctx based on
+ * maximum RID that is supported by boot CPU.
+ */
+void __init
+mmu_context_init (void)
+{
+        ia64_ctx.bitmap = alloc_bootmem((ia64_ctx.max_ctx+1)>>3);
+        ia64_ctx.flushmap = alloc_bootmem((ia64_ctx.max_ctx+1)>>3);
+}
+/*
 * Acquire the ia64_ctx.lock before calling this function!
 */
 void
 wrap_mmu_context (struct mm_struct *mm)
 {
-        unsigned long tsk_context, max_ctx = ia64_ctx.max_ctx;
+        int i, cpu;
-        struct task_struct *tsk;
+        unsigned long flush_bit;
-        int i;
-        if (ia64_ctx.next > max_ctx)
+        for (i=0; i <= ia64_ctx.max_ctx / BITS_PER_LONG; i++) {
-                ia64_ctx.next = 300;    /* skip daemons */
+                flush_bit = xchg(&ia64_ctx.flushmap[i], 0);
-        ia64_ctx.limit = max_ctx + 1;
+                ia64_ctx.bitmap[i] ^= flush_bit;
+        }
+ 
+        /* use offset at 300 to skip daemons */
+        ia64_ctx.next = find_next_zero_bit(ia64_ctx.bitmap,
+                                ia64_ctx.max_ctx, 300);
+        ia64_ctx.limit = find_next_bit(ia64_ctx.bitmap,
+                                ia64_ctx.max_ctx, ia64_ctx.next);
        /*
-         * Scan all the task's mm->context and set proper safe range
+         * can't call flush_tlb_all() here because of race condition
+         * with O(1) scheduler [EF]
         */
+        cpu = get_cpu(); /* prevent preemption/migration */
-        read_lock(&tasklist_lock);
+        for_each_online_cpu(i)
-  repeat:
+                if (i != cpu)
-        for_each_process(tsk) {
+                        per_cpu(ia64_need_tlb_flush, i) = 1;
-                if (!tsk->mm)
+        put_cpu();
-                        continue;
-                tsk_context = tsk->mm->context;
-                if (tsk_context == ia64_ctx.next) {
-                        if (++ia64_ctx.next >= ia64_ctx.limit) {
-                                /* empty range: reset the range limit and start over */
-                                if (ia64_ctx.next > max_ctx)
-                                        ia64_ctx.next = 300;
-                                ia64_ctx.limit = max_ctx + 1;
-                                goto repeat;
-                        }
-                }
-                if ((tsk_context > ia64_ctx.next) && (tsk_context < ia64_ctx.limit))
-                        ia64_ctx.limit = tsk_context;
-        }
-        read_unlock(&tasklist_lock);
-        /* can't call flush_tlb_all() here because of race condition with O(1) scheduler [EF] */
-        {
-                int cpu = get_cpu(); /* prevent preemption/migration */
-                for_each_online_cpu(i) {
-                        if (i != cpu)
-                                per_cpu(ia64_need_tlb_flush, i) = 1;
-                }
-                put_cpu();
-        }
        local_flush_tlb_all();
 }
 void
-ia64_global_tlb_purge (struct mm_struct *mm, unsigned long start, unsigned long end, unsigned long nbits)
+ia64_global_tlb_purge (struct mm_struct *mm, unsigned long start,
+                       unsigned long end, unsigned long nbits)
 {
        static DEFINE_SPINLOCK(ptcg_lock);
@@ -135,7 +134,8 @@ local_flush_tlb_all (void)
 }
 void
-flush_tlb_range (struct vm_area_struct *vma, unsigned long start, unsigned long end)
+flush_tlb_range (struct vm_area_struct *vma, unsigned long start,
+                 unsigned long end)
 {
        struct mm_struct *mm = vma->vm_mm;
        unsigned long size = end - start;
@@ -149,7 +149,8 @@ flush_tlb_range (struct vm_area_struct *vma, unsigned long start, unsigned long
 #endif
        nbits = ia64_fls(size + 0xfff);
-        while (unlikely (((1UL << nbits) & purge.mask) == 0) && (nbits < purge.max_bits))
+        while (unlikely (((1UL << nbits) & purge.mask) == 0) &&
+                        (nbits < purge.max_bits))
                ++nbits;
        if (nbits > purge.max_bits)
                nbits = purge.max_bits;
@@ -191,5 +192,5 @@ ia64_tlb_init (void)
        local_cpu_data->ptce_stride[0] = ptce_info.stride[0];
        local_cpu_data->ptce_stride[1] = ptce_info.stride[1];
-        local_flush_tlb_all();          /* nuke left overs from bootstrapping... */
+        local_flush_tlb_all();  /* nuke left overs from bootstrapping... */
 }
diff --git a/include/asm-ia64/mmu_context.h b/include/asm-ia64/mmu_context.h
index 8d6e72f7b08..b5c65081a3a 100644
--- a/include/asm-ia64/mmu_context.h
+++ b/include/asm-ia64/mmu_context.h
@@ -7,12 +7,13 @@
 */
 /*
- * Routines to manage the allocation of task context numbers.  Task context numbers are
+ * Routines to manage the allocation of task context numbers.  Task context
- * used to reduce or eliminate the need to perform TLB flushes due to context switches.
+ * numbers are used to reduce or eliminate the need to perform TLB flushes
- * Context numbers are implemented using ia-64 region ids.  Since the IA-64 TLB does not
+ * due to context switches.  Context numbers are implemented using ia-64
- * consider the region number when performing a TLB lookup, we need to assign a unique
+ * region ids.  Since the IA-64 TLB does not consider the region number when
- * region id to each region in a process.  We use the least significant three bits in a
+ * performing a TLB lookup, we need to assign a unique region id to each
- * region id for this purpose.
+ * region in a process.  We use the least significant three bits in aregion
+ * id for this purpose.
 */
 #define IA64_REGION_ID_KERNEL   0 /* the kernel's region id (tlb.c depends on this being 0) */
@@ -32,13 +33,17 @@
 struct ia64_ctx {
        spinlock_t lock;
        unsigned int next;      /* next context number to use */
-        unsigned int limit;     /* next >= limit => must call wrap_mmu_context() */
+        unsigned int limit;     /* available free range */
-        unsigned int max_ctx;   /* max. context value supported by all CPUs */
+        unsigned int max_ctx;   /* max. context value supported by all CPUs */
+                                /* call wrap_mmu_context when next >= max */
+        unsigned long *bitmap;  /* bitmap size is max_ctx+1 */
+        unsigned long *flushmap;/* pending rid to be flushed */
 };
 extern struct ia64_ctx ia64_ctx;
 DECLARE_PER_CPU(u8, ia64_need_tlb_flush);
+extern void mmu_context_init (void);
 extern void wrap_mmu_context (struct mm_struct *mm);
 static inline void
@@ -47,10 +52,10 @@ enter_lazy_tlb (struct mm_struct *mm, struct task_struct *tsk)
 }
 /*
- * When the context counter wraps around all TLBs need to be flushed because an old
+ * When the context counter wraps around all TLBs need to be flushed because
- * context number might have been reused. This is signalled by the ia64_need_tlb_flush
+ * an old context number might have been reused. This is signalled by the
- * per-CPU variable, which is checked in the routine below. Called by activate_mm().
+ * ia64_need_tlb_flush per-CPU variable, which is checked in the routine
- * <efocht@ess.nec.de>
+ * below. Called by activate_mm(). <efocht@ess.nec.de>
 */
 static inline void
 delayed_tlb_flush (void)
@@ -60,11 +65,9 @@ delayed_tlb_flush (void)
        if (unlikely(__ia64_per_cpu_var(ia64_need_tlb_flush))) {
                spin_lock_irqsave(&ia64_ctx.lock, flags);
-                {
+                if (__ia64_per_cpu_var(ia64_need_tlb_flush)) {
-                        if (__ia64_per_cpu_var(ia64_need_tlb_flush)) {
+                        local_flush_tlb_all();
-                                local_flush_tlb_all();
+                        __ia64_per_cpu_var(ia64_need_tlb_flush) = 0;
-                                __ia64_per_cpu_var(ia64_need_tlb_flush) = 0;
-                        }
                }
                spin_unlock_irqrestore(&ia64_ctx.lock, flags);
        }
@@ -76,20 +79,27 @@ get_mmu_context (struct mm_struct *mm)
        unsigned long flags;
        nv_mm_context_t context = mm->context;
-        if (unlikely(!context)) {
+        if (likely(context))
-                spin_lock_irqsave(&ia64_ctx.lock, flags);
+                goto out;
-                {
-                        /* re-check, now that we've got the lock: */
+        spin_lock_irqsave(&ia64_ctx.lock, flags);
-                        context = mm->context;
+        /* re-check, now that we've got the lock: */
-                        if (context == 0) {
+        context = mm->context;
-                                cpus_clear(mm->cpu_vm_mask);
+        if (context == 0) {
-                                if (ia64_ctx.next >= ia64_ctx.limit)
+                cpus_clear(mm->cpu_vm_mask);
-                                        wrap_mmu_context(mm);
+                if (ia64_ctx.next >= ia64_ctx.limit) {
-                                mm->context = context = ia64_ctx.next++;
+                        ia64_ctx.next = find_next_zero_bit(ia64_ctx.bitmap,
-                        }
+                                        ia64_ctx.max_ctx, ia64_ctx.next);
+                        ia64_ctx.limit = find_next_bit(ia64_ctx.bitmap,
+                                        ia64_ctx.max_ctx, ia64_ctx.next);
+                        if (ia64_ctx.next >= ia64_ctx.max_ctx)
+                                wrap_mmu_context(mm);
                }
-                spin_unlock_irqrestore(&ia64_ctx.lock, flags);
+                mm->context = context = ia64_ctx.next++;
+                __set_bit(context, ia64_ctx.bitmap);
        }
+        spin_unlock_irqrestore(&ia64_ctx.lock, flags);
+out:
        /*
         * Ensure we're not starting to use "context" before any old
         * uses of it are gone from our TLB.
@@ -100,8 +110,8 @@ get_mmu_context (struct mm_struct *mm)
 }
 /*
- * Initialize context number to some sane value.  MM is guaranteed to be a brand-new
+ * Initialize context number to some sane value.  MM is guaranteed to be a
- * address-space, so no TLB flushing is needed, ever.
+ * brand-new address-space, so no TLB flushing is needed, ever.
 */
 static inline int
 init_new_context (struct task_struct *p, struct mm_struct *mm)
@@ -162,7 +172,10 @@ activate_context (struct mm_struct *mm)
                if (!cpu_isset(smp_processor_id(), mm->cpu_vm_mask))
                        cpu_set(smp_processor_id(), mm->cpu_vm_mask);
                reload_context(context);
-                /* in the unlikely event of a TLB-flush by another thread, redo the load: */
+                /*
+                 * in the unlikely event of a TLB-flush by another thread,
+                 * redo the load.
+                 */
        } while (unlikely(context != mm->context));
 }
@@ -175,8 +188,8 @@ static inline void
 activate_mm (struct mm_struct *prev, struct mm_struct *next)
 {
        /*
-         * We may get interrupts here, but that's OK because interrupt handlers cannot
+         * We may get interrupts here, but that's OK because interrupt
-         * touch user-space.
+         * handlers cannot touch user-space.
         */
        ia64_set_kr(IA64_KR_PT_BASE, __pa(next->pgd));
        activate_context(next);
diff --git a/include/asm-ia64/tlbflush.h b/include/asm-ia64/tlbflush.h
index b65c6270272..a35b323bae4 100644
--- a/include/asm-ia64/tlbflush.h
+++ b/include/asm-ia64/tlbflush.h
@@ -51,6 +51,7 @@ flush_tlb_mm (struct mm_struct *mm)
        if (!mm)
                return;
+        set_bit(mm->context, ia64_ctx.flushmap);
        mm->context = 0;
        if (atomic_read(&mm->mm_users) == 0)

diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c index 3af6de36a48..5add0bcf87a 100644 --- a/arch/ia64/kernel/setup.c +++ b/arch/ia64/kernel/setup.c
@@ -461,6 +461,7 @@ setup_arch (char **cmdline_p)
461	#endif	461	#endif
462		462
463	cpu_init(); /* initialize the bootstrap CPU */	463	cpu_init(); /* initialize the bootstrap CPU */
		464	mmu_context_init(); /* initialize context_id bitmap */
464		465
465	#ifdef CONFIG_ACPI	466	#ifdef CONFIG_ACPI
466	acpi_boot_init();	467	acpi_boot_init();


diff --git a/arch/ia64/mm/tlb.c b/arch/ia64/mm/tlb.c index c79a9b96d02..41105d45442 100644 --- a/arch/ia64/mm/tlb.c +++ b/arch/ia64/mm/tlb.c
@@ -8,6 +8,8 @@
8	* Modified RID allocation for SMP	8	* Modified RID allocation for SMP
9	* Goutham Rao <goutham.rao@intel.com>	9	* Goutham Rao <goutham.rao@intel.com>
10	* IPI based ptc implementation and A-step IPI implementation.	10	* IPI based ptc implementation and A-step IPI implementation.
		11	* Rohit Seth <rohit.seth@intel.com>
		12	* Ken Chen <kenneth.w.chen@intel.com>
11	*/	13	*/
12	#include <linux/config.h>	14	#include <linux/config.h>
13	#include <linux/module.h>	15	#include <linux/module.h>
@@ -16,78 +18,75 @@
16	#include <linux/sched.h>	18	#include <linux/sched.h>
17	#include <linux/smp.h>	19	#include <linux/smp.h>
18	#include <linux/mm.h>	20	#include <linux/mm.h>
		21	#include <linux/bootmem.h>
19		22
20	#include <asm/delay.h>	23	#include <asm/delay.h>
21	#include <asm/mmu_context.h>	24	#include <asm/mmu_context.h>
22	#include <asm/pgalloc.h>	25	#include <asm/pgalloc.h>
23	#include <asm/pal.h>	26	#include <asm/pal.h>
24	#include <asm/tlbflush.h>	27	#include <asm/tlbflush.h>
		28	#include <asm/dma.h>
25		29
26	static struct {	30	static struct {
27	unsigned long mask; /* mask of supported purge page-sizes */	31	unsigned long mask; /* mask of supported purge page-sizes */
28	unsigned long max_bits; /* log2() of largest supported purge page-size */	32	unsigned long max_bits; /* log2 of largest supported purge page-size */
29	} purge;	33	} purge;
30		34
31	struct ia64_ctx ia64_ctx = {	35	struct ia64_ctx ia64_ctx = {
32	.lock = SPIN_LOCK_UNLOCKED,	36	.lock = SPIN_LOCK_UNLOCKED,
33	.next = 1,	37	.next = 1,
34	.limit = (1 << 15) - 1, /* start out with the safe (architected) limit */
35	.max_ctx = ~0U	38	.max_ctx = ~0U
36	};	39	};
37		40
38	DEFINE_PER_CPU(u8, ia64_need_tlb_flush);	41	DEFINE_PER_CPU(u8, ia64_need_tlb_flush);
39		42
40	/*	43	/*
		44	* Initializes the ia64_ctx.bitmap array based on max_ctx+1.
		45	* Called after cpu_init() has setup ia64_ctx.max_ctx based on
		46	* maximum RID that is supported by boot CPU.
		47	*/
		48	void __init
		49	mmu_context_init (void)
		50	{
		51	ia64_ctx.bitmap = alloc_bootmem((ia64_ctx.max_ctx+1)>>3);
		52	ia64_ctx.flushmap = alloc_bootmem((ia64_ctx.max_ctx+1)>>3);
		53	}
		54
		55	/*
41	* Acquire the ia64_ctx.lock before calling this function!	56	* Acquire the ia64_ctx.lock before calling this function!
42	*/	57	*/
43	void	58	void
44	wrap_mmu_context (struct mm_struct *mm)	59	wrap_mmu_context (struct mm_struct *mm)
45	{	60	{
46	unsigned long tsk_context, max_ctx = ia64_ctx.max_ctx;	61	int i, cpu;
47	struct task_struct *tsk;	62	unsigned long flush_bit;
48	int i;
49		63
50	if (ia64_ctx.next > max_ctx)	64	for (i=0; i <= ia64_ctx.max_ctx / BITS_PER_LONG; i++) {
51	ia64_ctx.next = 300; /* skip daemons */	65	flush_bit = xchg(&ia64_ctx.flushmap[i], 0);
52	ia64_ctx.limit = max_ctx + 1;	66	ia64_ctx.bitmap[i] ^= flush_bit;
		67	}
		68
		69	/* use offset at 300 to skip daemons */
		70	ia64_ctx.next = find_next_zero_bit(ia64_ctx.bitmap,
		71	ia64_ctx.max_ctx, 300);
		72	ia64_ctx.limit = find_next_bit(ia64_ctx.bitmap,
		73	ia64_ctx.max_ctx, ia64_ctx.next);
53		74
54	/*	75	/*
55	* Scan all the task's mm->context and set proper safe range	76	* can't call flush_tlb_all() here because of race condition
		77	* with O(1) scheduler [EF]
56	*/	78	*/
57		79	cpu = get_cpu(); /* prevent preemption/migration */
58	read_lock(&tasklist_lock);	80	for_each_online_cpu(i)
59	repeat:	81	if (i != cpu)
60	for_each_process(tsk) {	82	per_cpu(ia64_need_tlb_flush, i) = 1;
61	if (!tsk->mm)	83	put_cpu();
62	continue;
63	tsk_context = tsk->mm->context;
64	if (tsk_context == ia64_ctx.next) {
65	if (++ia64_ctx.next >= ia64_ctx.limit) {
66	/* empty range: reset the range limit and start over */
67	if (ia64_ctx.next > max_ctx)
68	ia64_ctx.next = 300;
69	ia64_ctx.limit = max_ctx + 1;
70	goto repeat;
71	}
72	}
73	if ((tsk_context > ia64_ctx.next) && (tsk_context < ia64_ctx.limit))
74	ia64_ctx.limit = tsk_context;
75	}
76	read_unlock(&tasklist_lock);
77	/* can't call flush_tlb_all() here because of race condition with O(1) scheduler [EF] */
78	{
79	int cpu = get_cpu(); /* prevent preemption/migration */
80	for_each_online_cpu(i) {
81	if (i != cpu)
82	per_cpu(ia64_need_tlb_flush, i) = 1;
83	}
84	put_cpu();
85	}
86	local_flush_tlb_all();	84	local_flush_tlb_all();
87	}	85	}
88		86
89	void	87	void
90	ia64_global_tlb_purge (struct mm_struct *mm, unsigned long start, unsigned long end, unsigned long nbits)	88	ia64_global_tlb_purge (struct mm_struct *mm, unsigned long start,
		89	unsigned long end, unsigned long nbits)
91	{	90	{
92	static DEFINE_SPINLOCK(ptcg_lock);	91	static DEFINE_SPINLOCK(ptcg_lock);
93		92
@@ -135,7 +134,8 @@ local_flush_tlb_all (void)
135	}	134	}
136		135
137	void	136	void
138	flush_tlb_range (struct vm_area_struct *vma, unsigned long start, unsigned long end)	137	flush_tlb_range (struct vm_area_struct *vma, unsigned long start,
		138	unsigned long end)
139	{	139	{
140	struct mm_struct *mm = vma->vm_mm;	140	struct mm_struct *mm = vma->vm_mm;
141	unsigned long size = end - start;	141	unsigned long size = end - start;
@@ -149,7 +149,8 @@ flush_tlb_range (struct vm_area_struct *vma, unsigned long start, unsigned long
149	#endif	149	#endif
150		150
151	nbits = ia64_fls(size + 0xfff);	151	nbits = ia64_fls(size + 0xfff);
152	while (unlikely (((1UL << nbits) & purge.mask) == 0) && (nbits < purge.max_bits))	152	while (unlikely (((1UL << nbits) & purge.mask) == 0) &&
		153	(nbits < purge.max_bits))
153	++nbits;	154	++nbits;
154	if (nbits > purge.max_bits)	155	if (nbits > purge.max_bits)
155	nbits = purge.max_bits;	156	nbits = purge.max_bits;
@@ -191,5 +192,5 @@ ia64_tlb_init (void)
191	local_cpu_data->ptce_stride[0] = ptce_info.stride[0];	192	local_cpu_data->ptce_stride[0] = ptce_info.stride[0];
192	local_cpu_data->ptce_stride[1] = ptce_info.stride[1];	193	local_cpu_data->ptce_stride[1] = ptce_info.stride[1];
193		194
194	local_flush_tlb_all(); /* nuke left overs from bootstrapping... */	195	local_flush_tlb_all(); /* nuke left overs from bootstrapping... */
195	}	196	}


diff --git a/include/asm-ia64/mmu_context.h b/include/asm-ia64/mmu_context.h index 8d6e72f7b08..b5c65081a3a 100644 --- a/include/asm-ia64/mmu_context.h +++ b/include/asm-ia64/mmu_context.h
@@ -7,12 +7,13 @@
7	*/	7	*/
8		8
9	/*	9	/*
10	* Routines to manage the allocation of task context numbers. Task context numbers are	10	* Routines to manage the allocation of task context numbers. Task context
11	* used to reduce or eliminate the need to perform TLB flushes due to context switches.	11	* numbers are used to reduce or eliminate the need to perform TLB flushes
12	* Context numbers are implemented using ia-64 region ids. Since the IA-64 TLB does not	12	* due to context switches. Context numbers are implemented using ia-64
13	* consider the region number when performing a TLB lookup, we need to assign a unique	13	* region ids. Since the IA-64 TLB does not consider the region number when
14	* region id to each region in a process. We use the least significant three bits in a	14	* performing a TLB lookup, we need to assign a unique region id to each
15	* region id for this purpose.	15	* region in a process. We use the least significant three bits in aregion
		16	* id for this purpose.
16	*/	17	*/
17		18
18	#define IA64_REGION_ID_KERNEL 0 /* the kernel's region id (tlb.c depends on this being 0) */	19	#define IA64_REGION_ID_KERNEL 0 /* the kernel's region id (tlb.c depends on this being 0) */
@@ -32,13 +33,17 @@
32	struct ia64_ctx {	33	struct ia64_ctx {
33	spinlock_t lock;	34	spinlock_t lock;
34	unsigned int next; /* next context number to use */	35	unsigned int next; /* next context number to use */
35	unsigned int limit; /* next >= limit => must call wrap_mmu_context() */	36	unsigned int limit; /* available free range */
36	unsigned int max_ctx; /* max. context value supported by all CPUs */	37	unsigned int max_ctx; /* max. context value supported by all CPUs */
		38	/* call wrap_mmu_context when next >= max */
		39	unsigned long bitmap; / bitmap size is max_ctx+1 */
		40	unsigned long flushmap;/ pending rid to be flushed */
37	};	41	};
38		42
39	extern struct ia64_ctx ia64_ctx;	43	extern struct ia64_ctx ia64_ctx;
40	DECLARE_PER_CPU(u8, ia64_need_tlb_flush);	44	DECLARE_PER_CPU(u8, ia64_need_tlb_flush);
41		45
		46	extern void mmu_context_init (void);
42	extern void wrap_mmu_context (struct mm_struct *mm);	47	extern void wrap_mmu_context (struct mm_struct *mm);
43		48
44	static inline void	49	static inline void
@@ -47,10 +52,10 @@ enter_lazy_tlb (struct mm_struct mm, struct task_struct tsk)
47	}	52	}
48		53
49	/*	54	/*
50	* When the context counter wraps around all TLBs need to be flushed because an old	55	* When the context counter wraps around all TLBs need to be flushed because
51	* context number might have been reused. This is signalled by the ia64_need_tlb_flush	56	* an old context number might have been reused. This is signalled by the
52	* per-CPU variable, which is checked in the routine below. Called by activate_mm().	57	* ia64_need_tlb_flush per-CPU variable, which is checked in the routine
53	* <efocht@ess.nec.de>	58	* below. Called by activate_mm(). <efocht@ess.nec.de>
54	*/	59	*/
55	static inline void	60	static inline void
56	delayed_tlb_flush (void)	61	delayed_tlb_flush (void)
@@ -60,11 +65,9 @@ delayed_tlb_flush (void)
60		65
61	if (unlikely(__ia64_per_cpu_var(ia64_need_tlb_flush))) {	66	if (unlikely(__ia64_per_cpu_var(ia64_need_tlb_flush))) {
62	spin_lock_irqsave(&ia64_ctx.lock, flags);	67	spin_lock_irqsave(&ia64_ctx.lock, flags);
63	{	68	if (__ia64_per_cpu_var(ia64_need_tlb_flush)) {
64	if (__ia64_per_cpu_var(ia64_need_tlb_flush)) {	69	local_flush_tlb_all();
65	local_flush_tlb_all();	70	__ia64_per_cpu_var(ia64_need_tlb_flush) = 0;
66	__ia64_per_cpu_var(ia64_need_tlb_flush) = 0;
67	}
68	}	71	}
69	spin_unlock_irqrestore(&ia64_ctx.lock, flags);	72	spin_unlock_irqrestore(&ia64_ctx.lock, flags);
70	}	73	}
@@ -76,20 +79,27 @@ get_mmu_context (struct mm_struct *mm)
76	unsigned long flags;	79	unsigned long flags;
77	nv_mm_context_t context = mm->context;	80	nv_mm_context_t context = mm->context;
78		81
79	if (unlikely(!context)) {	82	if (likely(context))
80	spin_lock_irqsave(&ia64_ctx.lock, flags);	83	goto out;
81	{	84
82	/* re-check, now that we've got the lock: */	85	spin_lock_irqsave(&ia64_ctx.lock, flags);
83	context = mm->context;	86	/* re-check, now that we've got the lock: */
84	if (context == 0) {	87	context = mm->context;
85	cpus_clear(mm->cpu_vm_mask);	88	if (context == 0) {
86	if (ia64_ctx.next >= ia64_ctx.limit)	89	cpus_clear(mm->cpu_vm_mask);
87	wrap_mmu_context(mm);	90	if (ia64_ctx.next >= ia64_ctx.limit) {
88	mm->context = context = ia64_ctx.next++;	91	ia64_ctx.next = find_next_zero_bit(ia64_ctx.bitmap,
89	}	92	ia64_ctx.max_ctx, ia64_ctx.next);
		93	ia64_ctx.limit = find_next_bit(ia64_ctx.bitmap,
		94	ia64_ctx.max_ctx, ia64_ctx.next);
		95	if (ia64_ctx.next >= ia64_ctx.max_ctx)
		96	wrap_mmu_context(mm);
90	}	97	}
91	spin_unlock_irqrestore(&ia64_ctx.lock, flags);	98	mm->context = context = ia64_ctx.next++;
		99	__set_bit(context, ia64_ctx.bitmap);
92	}	100	}
		101	spin_unlock_irqrestore(&ia64_ctx.lock, flags);
		102	out:
93	/*	103	/*
94	* Ensure we're not starting to use "context" before any old	104	* Ensure we're not starting to use "context" before any old
95	* uses of it are gone from our TLB.	105	* uses of it are gone from our TLB.
@@ -100,8 +110,8 @@ get_mmu_context (struct mm_struct *mm)
100	}	110	}
101		111
102	/*	112	/*
103	* Initialize context number to some sane value. MM is guaranteed to be a brand-new	113	* Initialize context number to some sane value. MM is guaranteed to be a
104	* address-space, so no TLB flushing is needed, ever.	114	* brand-new address-space, so no TLB flushing is needed, ever.
105	*/	115	*/
106	static inline int	116	static inline int
107	init_new_context (struct task_struct p, struct mm_struct mm)	117	init_new_context (struct task_struct p, struct mm_struct mm)
@@ -162,7 +172,10 @@ activate_context (struct mm_struct *mm)
162	if (!cpu_isset(smp_processor_id(), mm->cpu_vm_mask))	172	if (!cpu_isset(smp_processor_id(), mm->cpu_vm_mask))
163	cpu_set(smp_processor_id(), mm->cpu_vm_mask);	173	cpu_set(smp_processor_id(), mm->cpu_vm_mask);
164	reload_context(context);	174	reload_context(context);
165	/* in the unlikely event of a TLB-flush by another thread, redo the load: */	175	/*
		176	* in the unlikely event of a TLB-flush by another thread,
		177	* redo the load.
		178	*/
166	} while (unlikely(context != mm->context));	179	} while (unlikely(context != mm->context));
167	}	180	}
168		181
@@ -175,8 +188,8 @@ static inline void
175	activate_mm (struct mm_struct prev, struct mm_struct next)	188	activate_mm (struct mm_struct prev, struct mm_struct next)
176	{	189	{
177	/*	190	/*
178	* We may get interrupts here, but that's OK because interrupt handlers cannot	191	* We may get interrupts here, but that's OK because interrupt
179	* touch user-space.	192	* handlers cannot touch user-space.
180	*/	193	*/
181	ia64_set_kr(IA64_KR_PT_BASE, __pa(next->pgd));	194	ia64_set_kr(IA64_KR_PT_BASE, __pa(next->pgd));
182	activate_context(next);	195	activate_context(next);


diff --git a/include/asm-ia64/tlbflush.h b/include/asm-ia64/tlbflush.h index b65c6270272..a35b323bae4 100644 --- a/include/asm-ia64/tlbflush.h +++ b/include/asm-ia64/tlbflush.h
@@ -51,6 +51,7 @@ flush_tlb_mm (struct mm_struct *mm)
51	if (!mm)	51	if (!mm)
52	return;	52	return;
53		53
		54	set_bit(mm->context, ia64_ctx.flushmap);
54	mm->context = 0;	55	mm->context = 0;
55		56
56	if (atomic_read(&mm->mm_users) == 0)	57	if (atomic_read(&mm->mm_users) == 0)