[IA64] Use bitmaps for efficient context allocation/free

Corrects the very inefficent method of finding free context_ids in get_mmu_context(). Instead of walking the task_list of all processes, 2 bitmaps are used to efficently store and lookup state, inuse and needs flushing. The entire rid address space is now used before calling wrap_mmu_context and global tlb flushing. Special thanks to Ken and Rohit for their review and modifications in using a bit flushmap. Signed-off-by: Peter Keilty <peter.keilty@hp.com> Signed-off-by: Tony Luck <tony.luck@intel.com>
author: Peter Keilty <peter.keilty@hp.com> 2005-10-31 16:44:47 -0500
committer: Tony Luck <tony.luck@intel.com> 2005-10-31 17:36:05 -0500
commit: dcc17d1baef3721d1574e5b2f4f2d4607514bcff (patch)
tree: 78b19a9b54f57aa010f50201e7639786b0e5f770
parent: f2c84c0e84bfa637a7161eac10157cf3b05b4a73 (diff)
4 files changed, 44 insertions, 33 deletions
diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c
index fc56ca2da35..c9388a92cf4 100644
--- a/arch/ia64/kernel/setup.c
+++ b/arch/ia64/kernel/setup.c
@@ -454,6 +454,7 @@ setup_arch (char **cmdline_p)
 #endif
        cpu_init();     /* initialize the bootstrap CPU */
+        mmu_context_init();     /* initialize context_id bitmap */
 #ifdef CONFIG_ACPI
        acpi_boot_init();
diff --git a/arch/ia64/mm/tlb.c b/arch/ia64/mm/tlb.c
index c79a9b96d02..39628fca274 100644
--- a/arch/ia64/mm/tlb.c
+++ b/arch/ia64/mm/tlb.c
@@ -8,6 +8,8 @@
 *              Modified RID allocation for SMP
 *          Goutham Rao <goutham.rao@intel.com>
 *              IPI based ptc implementation and A-step IPI implementation.
+ * Rohit Seth <rohit.seth@intel.com>
+ * Ken Chen <kenneth.w.chen@intel.com>
 */
 #include <linux/config.h>
 #include <linux/module.h>
@@ -16,12 +18,14 @@
 #include <linux/sched.h>
 #include <linux/smp.h>
 #include <linux/mm.h>
+#include <linux/bootmem.h>
 #include <asm/delay.h>
 #include <asm/mmu_context.h>
 #include <asm/pgalloc.h>
 #include <asm/pal.h>
 #include <asm/tlbflush.h>
+#include <asm/dma.h>
 static struct {
        unsigned long mask;     /* mask of supported purge page-sizes */
@@ -31,49 +35,43 @@ static struct {
 struct ia64_ctx ia64_ctx = {
        .lock =         SPIN_LOCK_UNLOCKED,
        .next =         1,
-        .limit =        (1 << 15) - 1,          /* start out with the safe (architected) limit */
        .max_ctx =      ~0U
 };
 DEFINE_PER_CPU(u8, ia64_need_tlb_flush);
 /*
+ * Initializes the ia64_ctx.bitmap array based on max_ctx+1.
+ * Called after cpu_init() has setup ia64_ctx.max_ctx based on
+ * maximum RID that is supported by boot CPU.
+ */
+void __init
+mmu_context_init (void)
+{
+        ia64_ctx.bitmap = alloc_bootmem((ia64_ctx.max_ctx+1)>>3);
+        ia64_ctx.flushmap = alloc_bootmem((ia64_ctx.max_ctx+1)>>3);
+}
+/*
 * Acquire the ia64_ctx.lock before calling this function!
 */
 void
 wrap_mmu_context (struct mm_struct *mm)
 {
-        unsigned long tsk_context, max_ctx = ia64_ctx.max_ctx;
-        struct task_struct *tsk;
        int i;
+        unsigned long flush_bit;
-        if (ia64_ctx.next > max_ctx)
+        for (i=0; i <= ia64_ctx.max_ctx / BITS_PER_LONG; i++) {
-                ia64_ctx.next = 300;    /* skip daemons */
+                flush_bit = xchg(&ia64_ctx.flushmap[i], 0);
-        ia64_ctx.limit = max_ctx + 1;
+                ia64_ctx.bitmap[i] ^= flush_bit;
-        /*
-         * Scan all the task's mm->context and set proper safe range
-         */
-        read_lock(&tasklist_lock);
-  repeat:
-        for_each_process(tsk) {
-                if (!tsk->mm)
-                        continue;
-                tsk_context = tsk->mm->context;
-                if (tsk_context == ia64_ctx.next) {
-                        if (++ia64_ctx.next >= ia64_ctx.limit) {
-                                /* empty range: reset the range limit and start over */
-                                if (ia64_ctx.next > max_ctx)
-                                        ia64_ctx.next = 300;
-                                ia64_ctx.limit = max_ctx + 1;
-                                goto repeat;
-                        }
-                }
-                if ((tsk_context > ia64_ctx.next) && (tsk_context < ia64_ctx.limit))
-                        ia64_ctx.limit = tsk_context;
        }
-        read_unlock(&tasklist_lock);
+ 
+        /* use offset at 300 to skip daemons */
+        ia64_ctx.next = find_next_zero_bit(ia64_ctx.bitmap,
+                                ia64_ctx.max_ctx, 300);
+        ia64_ctx.limit = find_next_bit(ia64_ctx.bitmap,
+                                ia64_ctx.max_ctx, ia64_ctx.next);
        /* can't call flush_tlb_all() here because of race condition with O(1) scheduler [EF] */
        {
                int cpu = get_cpu(); /* prevent preemption/migration */
diff --git a/include/asm-ia64/mmu_context.h b/include/asm-ia64/mmu_context.h
index 8d6e72f7b08..8d9b30b5f7d 100644
--- a/include/asm-ia64/mmu_context.h
+++ b/include/asm-ia64/mmu_context.h
@@ -32,13 +32,17 @@
 struct ia64_ctx {
        spinlock_t lock;
        unsigned int next;      /* next context number to use */
-        unsigned int limit;     /* next >= limit => must call wrap_mmu_context() */
+        unsigned int limit;     /* available free range */
-        unsigned int max_ctx;   /* max. context value supported by all CPUs */
+        unsigned int max_ctx;   /* max. context value supported by all CPUs */
+                                /* call wrap_mmu_context when next >= max */
+        unsigned long *bitmap;  /* bitmap size is max_ctx+1 */
+        unsigned long *flushmap;/* pending rid to be flushed */
 };
 extern struct ia64_ctx ia64_ctx;
 DECLARE_PER_CPU(u8, ia64_need_tlb_flush);
+extern void mmu_context_init (void);
 extern void wrap_mmu_context (struct mm_struct *mm);
 static inline void
@@ -83,9 +87,16 @@ get_mmu_context (struct mm_struct *mm)
                        context = mm->context;
                        if (context == 0) {
                                cpus_clear(mm->cpu_vm_mask);
-                                if (ia64_ctx.next >= ia64_ctx.limit)
+                                if (ia64_ctx.next >= ia64_ctx.limit) {
-                                        wrap_mmu_context(mm);
+                                        ia64_ctx.next = find_next_zero_bit(ia64_ctx.bitmap,
+                                                        ia64_ctx.max_ctx, ia64_ctx.next);
+                                        ia64_ctx.limit = find_next_bit(ia64_ctx.bitmap,
+                                                        ia64_ctx.max_ctx, ia64_ctx.next);
+                                        if (ia64_ctx.next >= ia64_ctx.max_ctx)
+                                                wrap_mmu_context(mm);
+                                }
                                mm->context = context = ia64_ctx.next++;
+                                __set_bit(context, ia64_ctx.bitmap);
                        }
                }
                spin_unlock_irqrestore(&ia64_ctx.lock, flags);
diff --git a/include/asm-ia64/tlbflush.h b/include/asm-ia64/tlbflush.h
index b65c6270272..a35b323bae4 100644
--- a/include/asm-ia64/tlbflush.h
+++ b/include/asm-ia64/tlbflush.h
@@ -51,6 +51,7 @@ flush_tlb_mm (struct mm_struct *mm)
        if (!mm)
                return;
+        set_bit(mm->context, ia64_ctx.flushmap);
        mm->context = 0;
        if (atomic_read(&mm->mm_users) == 0)
author	Peter Keilty <peter.keilty@hp.com>	2005-10-31 16:44:47 -0500
committer	Tony Luck <tony.luck@intel.com>	2005-10-31 17:36:05 -0500
commit	dcc17d1baef3721d1574e5b2f4f2d4607514bcff (patch)
tree	78b19a9b54f57aa010f50201e7639786b0e5f770
parent	f2c84c0e84bfa637a7161eac10157cf3b05b4a73 (diff)

diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c index fc56ca2da35..c9388a92cf4 100644 --- a/arch/ia64/kernel/setup.c +++ b/arch/ia64/kernel/setup.c
@@ -454,6 +454,7 @@ setup_arch (char **cmdline_p)
454	#endif	454	#endif
455		455
456	cpu_init(); /* initialize the bootstrap CPU */	456	cpu_init(); /* initialize the bootstrap CPU */
		457	mmu_context_init(); /* initialize context_id bitmap */
457		458
458	#ifdef CONFIG_ACPI	459	#ifdef CONFIG_ACPI
459	acpi_boot_init();	460	acpi_boot_init();


diff --git a/arch/ia64/mm/tlb.c b/arch/ia64/mm/tlb.c index c79a9b96d02..39628fca274 100644 --- a/arch/ia64/mm/tlb.c +++ b/arch/ia64/mm/tlb.c
@@ -8,6 +8,8 @@
8	* Modified RID allocation for SMP	8	* Modified RID allocation for SMP
9	* Goutham Rao <goutham.rao@intel.com>	9	* Goutham Rao <goutham.rao@intel.com>
10	* IPI based ptc implementation and A-step IPI implementation.	10	* IPI based ptc implementation and A-step IPI implementation.
		11	* Rohit Seth <rohit.seth@intel.com>
		12	* Ken Chen <kenneth.w.chen@intel.com>
11	*/	13	*/
12	#include <linux/config.h>	14	#include <linux/config.h>
13	#include <linux/module.h>	15	#include <linux/module.h>
@@ -16,12 +18,14 @@
16	#include <linux/sched.h>	18	#include <linux/sched.h>
17	#include <linux/smp.h>	19	#include <linux/smp.h>
18	#include <linux/mm.h>	20	#include <linux/mm.h>
		21	#include <linux/bootmem.h>
19		22
20	#include <asm/delay.h>	23	#include <asm/delay.h>
21	#include <asm/mmu_context.h>	24	#include <asm/mmu_context.h>
22	#include <asm/pgalloc.h>	25	#include <asm/pgalloc.h>
23	#include <asm/pal.h>	26	#include <asm/pal.h>
24	#include <asm/tlbflush.h>	27	#include <asm/tlbflush.h>
		28	#include <asm/dma.h>
25		29
26	static struct {	30	static struct {
27	unsigned long mask; /* mask of supported purge page-sizes */	31	unsigned long mask; /* mask of supported purge page-sizes */
@@ -31,49 +35,43 @@ static struct {
31	struct ia64_ctx ia64_ctx = {	35	struct ia64_ctx ia64_ctx = {
32	.lock = SPIN_LOCK_UNLOCKED,	36	.lock = SPIN_LOCK_UNLOCKED,
33	.next = 1,	37	.next = 1,
34	.limit = (1 << 15) - 1, /* start out with the safe (architected) limit */
35	.max_ctx = ~0U	38	.max_ctx = ~0U
36	};	39	};
37		40
38	DEFINE_PER_CPU(u8, ia64_need_tlb_flush);	41	DEFINE_PER_CPU(u8, ia64_need_tlb_flush);
39		42
40	/*	43	/*
		44	* Initializes the ia64_ctx.bitmap array based on max_ctx+1.
		45	* Called after cpu_init() has setup ia64_ctx.max_ctx based on
		46	* maximum RID that is supported by boot CPU.
		47	*/
		48	void __init
		49	mmu_context_init (void)
		50	{
		51	ia64_ctx.bitmap = alloc_bootmem((ia64_ctx.max_ctx+1)>>3);
		52	ia64_ctx.flushmap = alloc_bootmem((ia64_ctx.max_ctx+1)>>3);
		53	}
		54
		55	/*
41	* Acquire the ia64_ctx.lock before calling this function!	56	* Acquire the ia64_ctx.lock before calling this function!
42	*/	57	*/
43	void	58	void
44	wrap_mmu_context (struct mm_struct *mm)	59	wrap_mmu_context (struct mm_struct *mm)
45	{	60	{
46	unsigned long tsk_context, max_ctx = ia64_ctx.max_ctx;
47	struct task_struct *tsk;
48	int i;	61	int i;
		62	unsigned long flush_bit;
49		63
50	if (ia64_ctx.next > max_ctx)	64	for (i=0; i <= ia64_ctx.max_ctx / BITS_PER_LONG; i++) {
51	ia64_ctx.next = 300; /* skip daemons */	65	flush_bit = xchg(&ia64_ctx.flushmap[i], 0);
52	ia64_ctx.limit = max_ctx + 1;	66	ia64_ctx.bitmap[i] ^= flush_bit;
53
54	/*
55	* Scan all the task's mm->context and set proper safe range
56	*/
57
58	read_lock(&tasklist_lock);
59	repeat:
60	for_each_process(tsk) {
61	if (!tsk->mm)
62	continue;
63	tsk_context = tsk->mm->context;
64	if (tsk_context == ia64_ctx.next) {
65	if (++ia64_ctx.next >= ia64_ctx.limit) {
66	/* empty range: reset the range limit and start over */
67	if (ia64_ctx.next > max_ctx)
68	ia64_ctx.next = 300;
69	ia64_ctx.limit = max_ctx + 1;
70	goto repeat;
71	}
72	}
73	if ((tsk_context > ia64_ctx.next) && (tsk_context < ia64_ctx.limit))
74	ia64_ctx.limit = tsk_context;
75	}	67	}
76	read_unlock(&tasklist_lock);	68
		69	/* use offset at 300 to skip daemons */
		70	ia64_ctx.next = find_next_zero_bit(ia64_ctx.bitmap,
		71	ia64_ctx.max_ctx, 300);
		72	ia64_ctx.limit = find_next_bit(ia64_ctx.bitmap,
		73	ia64_ctx.max_ctx, ia64_ctx.next);
		74
77	/* can't call flush_tlb_all() here because of race condition with O(1) scheduler [EF] */	75	/* can't call flush_tlb_all() here because of race condition with O(1) scheduler [EF] */
78	{	76	{
79	int cpu = get_cpu(); /* prevent preemption/migration */	77	int cpu = get_cpu(); /* prevent preemption/migration */


diff --git a/include/asm-ia64/mmu_context.h b/include/asm-ia64/mmu_context.h index 8d6e72f7b08..8d9b30b5f7d 100644 --- a/include/asm-ia64/mmu_context.h +++ b/include/asm-ia64/mmu_context.h
@@ -32,13 +32,17 @@
32	struct ia64_ctx {	32	struct ia64_ctx {
33	spinlock_t lock;	33	spinlock_t lock;
34	unsigned int next; /* next context number to use */	34	unsigned int next; /* next context number to use */
35	unsigned int limit; /* next >= limit => must call wrap_mmu_context() */	35	unsigned int limit; /* available free range */
36	unsigned int max_ctx; /* max. context value supported by all CPUs */	36	unsigned int max_ctx; /* max. context value supported by all CPUs */
		37	/* call wrap_mmu_context when next >= max */
		38	unsigned long bitmap; / bitmap size is max_ctx+1 */
		39	unsigned long flushmap;/ pending rid to be flushed */
37	};	40	};
38		41
39	extern struct ia64_ctx ia64_ctx;	42	extern struct ia64_ctx ia64_ctx;
40	DECLARE_PER_CPU(u8, ia64_need_tlb_flush);	43	DECLARE_PER_CPU(u8, ia64_need_tlb_flush);
41		44
		45	extern void mmu_context_init (void);
42	extern void wrap_mmu_context (struct mm_struct *mm);	46	extern void wrap_mmu_context (struct mm_struct *mm);
43		47
44	static inline void	48	static inline void
@@ -83,9 +87,16 @@ get_mmu_context (struct mm_struct *mm)
83	context = mm->context;	87	context = mm->context;
84	if (context == 0) {	88	if (context == 0) {
85	cpus_clear(mm->cpu_vm_mask);	89	cpus_clear(mm->cpu_vm_mask);
86	if (ia64_ctx.next >= ia64_ctx.limit)	90	if (ia64_ctx.next >= ia64_ctx.limit) {
87	wrap_mmu_context(mm);	91	ia64_ctx.next = find_next_zero_bit(ia64_ctx.bitmap,
		92	ia64_ctx.max_ctx, ia64_ctx.next);
		93	ia64_ctx.limit = find_next_bit(ia64_ctx.bitmap,
		94	ia64_ctx.max_ctx, ia64_ctx.next);
		95	if (ia64_ctx.next >= ia64_ctx.max_ctx)
		96	wrap_mmu_context(mm);
		97	}
88	mm->context = context = ia64_ctx.next++;	98	mm->context = context = ia64_ctx.next++;
		99	__set_bit(context, ia64_ctx.bitmap);
89	}	100	}
90	}	101	}
91	spin_unlock_irqrestore(&ia64_ctx.lock, flags);	102	spin_unlock_irqrestore(&ia64_ctx.lock, flags);


diff --git a/include/asm-ia64/tlbflush.h b/include/asm-ia64/tlbflush.h index b65c6270272..a35b323bae4 100644 --- a/include/asm-ia64/tlbflush.h +++ b/include/asm-ia64/tlbflush.h
@@ -51,6 +51,7 @@ flush_tlb_mm (struct mm_struct *mm)
51	if (!mm)	51	if (!mm)
52	return;	52	return;
53		53
		54	set_bit(mm->context, ia64_ctx.flushmap);
54	mm->context = 0;	55	mm->context = 0;
55		56
56	if (atomic_read(&mm->mm_users) == 0)	57	if (atomic_read(&mm->mm_users) == 0)