1 files changed, 53 insertions, 47 deletions
diff --git a/arch/ia64/mm/tlb.c b/arch/ia64/mm/tlb.c
index 464557e4ed82..41105d454423 100644
--- a/arch/ia64/mm/tlb.c
+++ b/arch/ia64/mm/tlb.c
@@ -8,6 +8,8 @@
 *              Modified RID allocation for SMP
 *          Goutham Rao <goutham.rao@intel.com>
 *              IPI based ptc implementation and A-step IPI implementation.
+ * Rohit Seth <rohit.seth@intel.com>
+ * Ken Chen <kenneth.w.chen@intel.com>
 */
 #include <linux/config.h>
 #include <linux/module.h>
@@ -16,80 +18,83 @@
 #include <linux/sched.h>
 #include <linux/smp.h>
 #include <linux/mm.h>
+#include <linux/bootmem.h>
 #include <asm/delay.h>
 #include <asm/mmu_context.h>
 #include <asm/pgalloc.h>
 #include <asm/pal.h>
 #include <asm/tlbflush.h>
+#include <asm/dma.h>
 static struct {
        unsigned long mask;     /* mask of supported purge page-sizes */
-        unsigned long max_bits; /* log2() of largest supported purge page-size */
+        unsigned long max_bits; /* log2 of largest supported purge page-size */
 } purge;
 struct ia64_ctx ia64_ctx = {
        .lock =         SPIN_LOCK_UNLOCKED,
        .next =         1,
-        .limit =        (1 << 15) - 1,          /* start out with the safe (architected) limit */
        .max_ctx =      ~0U
 };
 DEFINE_PER_CPU(u8, ia64_need_tlb_flush);
 /*
+ * Initializes the ia64_ctx.bitmap array based on max_ctx+1.
+ * Called after cpu_init() has setup ia64_ctx.max_ctx based on
+ * maximum RID that is supported by boot CPU.
+ */
+void __init
+mmu_context_init (void)
+{
+        ia64_ctx.bitmap = alloc_bootmem((ia64_ctx.max_ctx+1)>>3);
+        ia64_ctx.flushmap = alloc_bootmem((ia64_ctx.max_ctx+1)>>3);
+}
+/*
 * Acquire the ia64_ctx.lock before calling this function!
 */
 void
 wrap_mmu_context (struct mm_struct *mm)
 {
-        unsigned long tsk_context, max_ctx = ia64_ctx.max_ctx;
+        int i, cpu;
-        struct task_struct *tsk;
+        unsigned long flush_bit;
-        int i;
-        if (ia64_ctx.next > max_ctx)
+        for (i=0; i <= ia64_ctx.max_ctx / BITS_PER_LONG; i++) {
-                ia64_ctx.next = 300;    /* skip daemons */
+                flush_bit = xchg(&ia64_ctx.flushmap[i], 0);
-        ia64_ctx.limit = max_ctx + 1;
+                ia64_ctx.bitmap[i] ^= flush_bit;
+        }
+ 
+        /* use offset at 300 to skip daemons */
+        ia64_ctx.next = find_next_zero_bit(ia64_ctx.bitmap,
+                                ia64_ctx.max_ctx, 300);
+        ia64_ctx.limit = find_next_bit(ia64_ctx.bitmap,
+                                ia64_ctx.max_ctx, ia64_ctx.next);
        /*
-         * Scan all the task's mm->context and set proper safe range
+         * can't call flush_tlb_all() here because of race condition
+         * with O(1) scheduler [EF]
         */
+        cpu = get_cpu(); /* prevent preemption/migration */
-        read_lock(&tasklist_lock);
+        for_each_online_cpu(i)
-  repeat:
+                if (i != cpu)
-        for_each_process(tsk) {
+                        per_cpu(ia64_need_tlb_flush, i) = 1;
-                if (!tsk->mm)
+        put_cpu();
-                        continue;
-                tsk_context = tsk->mm->context;
-                if (tsk_context == ia64_ctx.next) {
-                        if (++ia64_ctx.next >= ia64_ctx.limit) {
-                                /* empty range: reset the range limit and start over */
-                                if (ia64_ctx.next > max_ctx)
-                                        ia64_ctx.next = 300;
-                                ia64_ctx.limit = max_ctx + 1;
-                                goto repeat;
-                        }
-                }
-                if ((tsk_context > ia64_ctx.next) && (tsk_context < ia64_ctx.limit))
-                        ia64_ctx.limit = tsk_context;
-        }
-        read_unlock(&tasklist_lock);
-        /* can't call flush_tlb_all() here because of race condition with O(1) scheduler [EF] */
-        {
-                int cpu = get_cpu(); /* prevent preemption/migration */
-                for (i = 0; i < NR_CPUS; ++i)
-                        if (cpu_online(i) && (i != cpu))
-                                per_cpu(ia64_need_tlb_flush, i) = 1;
-                put_cpu();
-        }
        local_flush_tlb_all();
 }
 void
-ia64_global_tlb_purge (unsigned long start, unsigned long end, unsigned long nbits)
+ia64_global_tlb_purge (struct mm_struct *mm, unsigned long start,
+                       unsigned long end, unsigned long nbits)
 {
        static DEFINE_SPINLOCK(ptcg_lock);
+        if (mm != current->active_mm) {
+                flush_tlb_all();
+                return;
+        }
        /* HW requires global serialization of ptc.ga.  */
        spin_lock(&ptcg_lock);
        {
@@ -129,36 +134,37 @@ local_flush_tlb_all (void)
 }
 void
-flush_tlb_range (struct vm_area_struct *vma, unsigned long start, unsigned long end)
+flush_tlb_range (struct vm_area_struct *vma, unsigned long start,
+                 unsigned long end)
 {
        struct mm_struct *mm = vma->vm_mm;
        unsigned long size = end - start;
        unsigned long nbits;
+#ifndef CONFIG_SMP
        if (mm != current->active_mm) {
-                /* this does happen, but perhaps it's not worth optimizing for? */
-#ifdef CONFIG_SMP
-                flush_tlb_all();
-#else
                mm->context = 0;
-#endif
                return;
        }
+#endif
        nbits = ia64_fls(size + 0xfff);
-        while (unlikely (((1UL << nbits) & purge.mask) == 0) && (nbits < purge.max_bits))
+        while (unlikely (((1UL << nbits) & purge.mask) == 0) &&
+                        (nbits < purge.max_bits))
                ++nbits;
        if (nbits > purge.max_bits)
                nbits = purge.max_bits;
        start &= ~((1UL << nbits) - 1);
 # ifdef CONFIG_SMP
-        platform_global_tlb_purge(start, end, nbits);
+        platform_global_tlb_purge(mm, start, end, nbits);
 # else
+        preempt_disable();
        do {
                ia64_ptcl(start, (nbits<<2));
                start += (1UL << nbits);
        } while (start < end);
+        preempt_enable();
 # endif
        ia64_srlz_i();                  /* srlz.i implies srlz.d */
@@ -186,5 +192,5 @@ ia64_tlb_init (void)
        local_cpu_data->ptce_stride[0] = ptce_info.stride[0];
        local_cpu_data->ptce_stride[1] = ptce_info.stride[1];
-        local_flush_tlb_all();          /* nuke left overs from bootstrapping... */
+        local_flush_tlb_all();  /* nuke left overs from bootstrapping... */
 }

diff --git a/arch/ia64/mm/tlb.c b/arch/ia64/mm/tlb.c index 464557e4ed82..41105d454423 100644 --- a/arch/ia64/mm/tlb.c +++ b/arch/ia64/mm/tlb.c
@@ -8,6 +8,8 @@
8	* Modified RID allocation for SMP	8	* Modified RID allocation for SMP
9	* Goutham Rao <goutham.rao@intel.com>	9	* Goutham Rao <goutham.rao@intel.com>
10	* IPI based ptc implementation and A-step IPI implementation.	10	* IPI based ptc implementation and A-step IPI implementation.
		11	* Rohit Seth <rohit.seth@intel.com>
		12	* Ken Chen <kenneth.w.chen@intel.com>
11	*/	13	*/
12	#include <linux/config.h>	14	#include <linux/config.h>
13	#include <linux/module.h>	15	#include <linux/module.h>
@@ -16,80 +18,83 @@
16	#include <linux/sched.h>	18	#include <linux/sched.h>
17	#include <linux/smp.h>	19	#include <linux/smp.h>
18	#include <linux/mm.h>	20	#include <linux/mm.h>
		21	#include <linux/bootmem.h>
19		22
20	#include <asm/delay.h>	23	#include <asm/delay.h>
21	#include <asm/mmu_context.h>	24	#include <asm/mmu_context.h>
22	#include <asm/pgalloc.h>	25	#include <asm/pgalloc.h>
23	#include <asm/pal.h>	26	#include <asm/pal.h>
24	#include <asm/tlbflush.h>	27	#include <asm/tlbflush.h>
		28	#include <asm/dma.h>
25		29
26	static struct {	30	static struct {
27	unsigned long mask; /* mask of supported purge page-sizes */	31	unsigned long mask; /* mask of supported purge page-sizes */
28	unsigned long max_bits; /* log2() of largest supported purge page-size */	32	unsigned long max_bits; /* log2 of largest supported purge page-size */
29	} purge;	33	} purge;
30		34
31	struct ia64_ctx ia64_ctx = {	35	struct ia64_ctx ia64_ctx = {
32	.lock = SPIN_LOCK_UNLOCKED,	36	.lock = SPIN_LOCK_UNLOCKED,
33	.next = 1,	37	.next = 1,
34	.limit = (1 << 15) - 1, /* start out with the safe (architected) limit */
35	.max_ctx = ~0U	38	.max_ctx = ~0U
36	};	39	};
37		40
38	DEFINE_PER_CPU(u8, ia64_need_tlb_flush);	41	DEFINE_PER_CPU(u8, ia64_need_tlb_flush);
39		42
40	/*	43	/*
		44	* Initializes the ia64_ctx.bitmap array based on max_ctx+1.
		45	* Called after cpu_init() has setup ia64_ctx.max_ctx based on
		46	* maximum RID that is supported by boot CPU.
		47	*/
		48	void __init
		49	mmu_context_init (void)
		50	{
		51	ia64_ctx.bitmap = alloc_bootmem((ia64_ctx.max_ctx+1)>>3);
		52	ia64_ctx.flushmap = alloc_bootmem((ia64_ctx.max_ctx+1)>>3);
		53	}
		54
		55	/*
41	* Acquire the ia64_ctx.lock before calling this function!	56	* Acquire the ia64_ctx.lock before calling this function!
42	*/	57	*/
43	void	58	void
44	wrap_mmu_context (struct mm_struct *mm)	59	wrap_mmu_context (struct mm_struct *mm)
45	{	60	{
46	unsigned long tsk_context, max_ctx = ia64_ctx.max_ctx;	61	int i, cpu;
47	struct task_struct *tsk;	62	unsigned long flush_bit;
48	int i;
49		63
50	if (ia64_ctx.next > max_ctx)	64	for (i=0; i <= ia64_ctx.max_ctx / BITS_PER_LONG; i++) {
51	ia64_ctx.next = 300; /* skip daemons */	65	flush_bit = xchg(&ia64_ctx.flushmap[i], 0);
52	ia64_ctx.limit = max_ctx + 1;	66	ia64_ctx.bitmap[i] ^= flush_bit;
		67	}
		68
		69	/* use offset at 300 to skip daemons */
		70	ia64_ctx.next = find_next_zero_bit(ia64_ctx.bitmap,
		71	ia64_ctx.max_ctx, 300);
		72	ia64_ctx.limit = find_next_bit(ia64_ctx.bitmap,
		73	ia64_ctx.max_ctx, ia64_ctx.next);
53		74
54	/*	75	/*
55	* Scan all the task's mm->context and set proper safe range	76	* can't call flush_tlb_all() here because of race condition
		77	* with O(1) scheduler [EF]
56	*/	78	*/
57		79	cpu = get_cpu(); /* prevent preemption/migration */
58	read_lock(&tasklist_lock);	80	for_each_online_cpu(i)
59	repeat:	81	if (i != cpu)
60	for_each_process(tsk) {	82	per_cpu(ia64_need_tlb_flush, i) = 1;
61	if (!tsk->mm)	83	put_cpu();
62	continue;
63	tsk_context = tsk->mm->context;
64	if (tsk_context == ia64_ctx.next) {
65	if (++ia64_ctx.next >= ia64_ctx.limit) {
66	/* empty range: reset the range limit and start over */
67	if (ia64_ctx.next > max_ctx)
68	ia64_ctx.next = 300;
69	ia64_ctx.limit = max_ctx + 1;
70	goto repeat;
71	}
72	}
73	if ((tsk_context > ia64_ctx.next) && (tsk_context < ia64_ctx.limit))
74	ia64_ctx.limit = tsk_context;
75	}
76	read_unlock(&tasklist_lock);
77	/* can't call flush_tlb_all() here because of race condition with O(1) scheduler [EF] */
78	{
79	int cpu = get_cpu(); /* prevent preemption/migration */
80	for (i = 0; i < NR_CPUS; ++i)
81	if (cpu_online(i) && (i != cpu))
82	per_cpu(ia64_need_tlb_flush, i) = 1;
83	put_cpu();
84	}
85	local_flush_tlb_all();	84	local_flush_tlb_all();
86	}	85	}
87		86
88	void	87	void
89	ia64_global_tlb_purge (unsigned long start, unsigned long end, unsigned long nbits)	88	ia64_global_tlb_purge (struct mm_struct *mm, unsigned long start,
		89	unsigned long end, unsigned long nbits)
90	{	90	{
91	static DEFINE_SPINLOCK(ptcg_lock);	91	static DEFINE_SPINLOCK(ptcg_lock);
92		92
		93	if (mm != current->active_mm) {
		94	flush_tlb_all();
		95	return;
		96	}
		97
93	/* HW requires global serialization of ptc.ga. */	98	/* HW requires global serialization of ptc.ga. */
94	spin_lock(&ptcg_lock);	99	spin_lock(&ptcg_lock);
95	{	100	{
@@ -129,36 +134,37 @@ local_flush_tlb_all (void)
129	}	134	}
130		135
131	void	136	void
132	flush_tlb_range (struct vm_area_struct *vma, unsigned long start, unsigned long end)	137	flush_tlb_range (struct vm_area_struct *vma, unsigned long start,
		138	unsigned long end)
133	{	139	{
134	struct mm_struct *mm = vma->vm_mm;	140	struct mm_struct *mm = vma->vm_mm;
135	unsigned long size = end - start;	141	unsigned long size = end - start;
136	unsigned long nbits;	142	unsigned long nbits;
137		143
		144	#ifndef CONFIG_SMP
138	if (mm != current->active_mm) {	145	if (mm != current->active_mm) {
139	/* this does happen, but perhaps it's not worth optimizing for? */
140	#ifdef CONFIG_SMP
141	flush_tlb_all();
142	#else
143	mm->context = 0;	146	mm->context = 0;
144	#endif
145	return;	147	return;
146	}	148	}
		149	#endif
147		150
148	nbits = ia64_fls(size + 0xfff);	151	nbits = ia64_fls(size + 0xfff);
149	while (unlikely (((1UL << nbits) & purge.mask) == 0) && (nbits < purge.max_bits))	152	while (unlikely (((1UL << nbits) & purge.mask) == 0) &&
		153	(nbits < purge.max_bits))
150	++nbits;	154	++nbits;
151	if (nbits > purge.max_bits)	155	if (nbits > purge.max_bits)
152	nbits = purge.max_bits;	156	nbits = purge.max_bits;
153	start &= ~((1UL << nbits) - 1);	157	start &= ~((1UL << nbits) - 1);
154		158
155	# ifdef CONFIG_SMP	159	# ifdef CONFIG_SMP
156	platform_global_tlb_purge(start, end, nbits);	160	platform_global_tlb_purge(mm, start, end, nbits);
157	# else	161	# else
		162	preempt_disable();
158	do {	163	do {
159	ia64_ptcl(start, (nbits<<2));	164	ia64_ptcl(start, (nbits<<2));
160	start += (1UL << nbits);	165	start += (1UL << nbits);
161	} while (start < end);	166	} while (start < end);
		167	preempt_enable();
162	# endif	168	# endif
163		169
164	ia64_srlz_i(); /* srlz.i implies srlz.d */	170	ia64_srlz_i(); /* srlz.i implies srlz.d */
@@ -186,5 +192,5 @@ ia64_tlb_init (void)
186	local_cpu_data->ptce_stride[0] = ptce_info.stride[0];	192	local_cpu_data->ptce_stride[0] = ptce_info.stride[0];
187	local_cpu_data->ptce_stride[1] = ptce_info.stride[1];	193	local_cpu_data->ptce_stride[1] = ptce_info.stride[1];
188		194
189	local_flush_tlb_all(); /* nuke left overs from bootstrapping... */	195	local_flush_tlb_all(); /* nuke left overs from bootstrapping... */
190	}	196	}