aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSukadev Bhattiprolu <sukadev@us.ibm.com>2006-10-02 05:17:24 -0400
committerLinus Torvalds <torvalds@g5.osdl.org>2006-10-02 10:57:15 -0400
commit3fbc96486459324e182717b03c50c90c880be6ec (patch)
treec2b5ccb3f64913daeb040c21652e4b421cc76bca
parentaa5a6662f93f52605b6c447ba6f7291e92f515c5 (diff)
[PATCH] Define struct pspace
Define a per-container pid space object. And create one instance of this object, init_pspace, to define the entire pid space. Subsequent patches will provide/use interfaces to create/destroy pid spaces. Its a subset/rework of Eric Biederman's patch http://lkml.org/lkml/2006/2/6/285 . Signed-off-by: Eric Biederman <ebiederm@xmission.com> Signed-off-by: Sukadev Bhattiprolu <sukadev@us.ibm.com> Cc: Dave Hansen <haveblue@us.ibm.com> Cc: Serge Hallyn <serue@us.ibm.com> Cc: Cedric Le Goater <clg@fr.ibm.com> Cc: Kirill Korotaev <dev@sw.ru> Cc: Andrey Savochkin <saw@sw.ru> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r--fs/proc/proc_misc.c3
-rw-r--r--include/linux/pspace.h7
-rw-r--r--include/linux/sched.h1
-rw-r--r--kernel/pid.c53
4 files changed, 40 insertions, 24 deletions
diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index 66bc425f2f3d..8d88e58ed5cc 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -45,6 +45,7 @@
45#include <linux/sysrq.h> 45#include <linux/sysrq.h>
46#include <linux/vmalloc.h> 46#include <linux/vmalloc.h>
47#include <linux/crash_dump.h> 47#include <linux/crash_dump.h>
48#include <linux/pspace.h>
48#include <asm/uaccess.h> 49#include <asm/uaccess.h>
49#include <asm/pgtable.h> 50#include <asm/pgtable.h>
50#include <asm/io.h> 51#include <asm/io.h>
@@ -91,7 +92,7 @@ static int loadavg_read_proc(char *page, char **start, off_t off,
91 LOAD_INT(a), LOAD_FRAC(a), 92 LOAD_INT(a), LOAD_FRAC(a),
92 LOAD_INT(b), LOAD_FRAC(b), 93 LOAD_INT(b), LOAD_FRAC(b),
93 LOAD_INT(c), LOAD_FRAC(c), 94 LOAD_INT(c), LOAD_FRAC(c),
94 nr_running(), nr_threads, last_pid); 95 nr_running(), nr_threads, init_pspace.last_pid);
95 return proc_calc_metrics(page, start, off, count, eof, len); 96 return proc_calc_metrics(page, start, off, count, eof, len);
96} 97}
97 98
diff --git a/include/linux/pspace.h b/include/linux/pspace.h
index a8a064b0ad18..91d48b8b2d99 100644
--- a/include/linux/pspace.h
+++ b/include/linux/pspace.h
@@ -13,4 +13,11 @@ struct pidmap {
13 13
14#define PIDMAP_ENTRIES ((PID_MAX_LIMIT + 8*PAGE_SIZE - 1)/PAGE_SIZE/8) 14#define PIDMAP_ENTRIES ((PID_MAX_LIMIT + 8*PAGE_SIZE - 1)/PAGE_SIZE/8)
15 15
16struct pspace {
17 struct pidmap pidmap[PIDMAP_ENTRIES];
18 int last_pid;
19};
20
21extern struct pspace init_pspace;
22
16#endif /* _LINUX_PSPACE_H */ 23#endif /* _LINUX_PSPACE_H */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index dd6c2164e4a4..3b7c99265ace 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -118,7 +118,6 @@ extern unsigned long avenrun[]; /* Load averages */
118 118
119extern unsigned long total_forks; 119extern unsigned long total_forks;
120extern int nr_threads; 120extern int nr_threads;
121extern int last_pid;
122DECLARE_PER_CPU(unsigned long, process_counts); 121DECLARE_PER_CPU(unsigned long, process_counts);
123extern int nr_processes(void); 122extern int nr_processes(void);
124extern unsigned long nr_running(void); 123extern unsigned long nr_running(void);
diff --git a/kernel/pid.c b/kernel/pid.c
index 8234bd08a3cb..89107b7481af 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -34,7 +34,6 @@ static int pidhash_shift;
34static kmem_cache_t *pid_cachep; 34static kmem_cache_t *pid_cachep;
35 35
36int pid_max = PID_MAX_DEFAULT; 36int pid_max = PID_MAX_DEFAULT;
37int last_pid;
38 37
39#define RESERVED_PIDS 300 38#define RESERVED_PIDS 300
40 39
@@ -43,7 +42,12 @@ int pid_max_max = PID_MAX_LIMIT;
43 42
44#define BITS_PER_PAGE (PAGE_SIZE*8) 43#define BITS_PER_PAGE (PAGE_SIZE*8)
45#define BITS_PER_PAGE_MASK (BITS_PER_PAGE-1) 44#define BITS_PER_PAGE_MASK (BITS_PER_PAGE-1)
46#define mk_pid(map, off) (((map) - pidmap_array)*BITS_PER_PAGE + (off)) 45
46static inline int mk_pid(struct pspace *pspace, struct pidmap *map, int off)
47{
48 return (map - pspace->pidmap)*BITS_PER_PAGE + off;
49}
50
47#define find_next_offset(map, off) \ 51#define find_next_offset(map, off) \
48 find_next_zero_bit((map)->page, BITS_PER_PAGE, off) 52 find_next_zero_bit((map)->page, BITS_PER_PAGE, off)
49 53
@@ -53,8 +57,12 @@ int pid_max_max = PID_MAX_LIMIT;
53 * value does not cause lots of bitmaps to be allocated, but 57 * value does not cause lots of bitmaps to be allocated, but
54 * the scheme scales to up to 4 million PIDs, runtime. 58 * the scheme scales to up to 4 million PIDs, runtime.
55 */ 59 */
56static struct pidmap pidmap_array[PIDMAP_ENTRIES] = 60struct pspace init_pspace = {
57 { [ 0 ... PIDMAP_ENTRIES-1 ] = { ATOMIC_INIT(BITS_PER_PAGE), NULL } }; 61 .pidmap = {
62 [ 0 ... PIDMAP_ENTRIES-1] = { ATOMIC_INIT(BITS_PER_PAGE), NULL }
63 },
64 .last_pid = 0
65};
58 66
59/* 67/*
60 * Note: disable interrupts while the pidmap_lock is held as an 68 * Note: disable interrupts while the pidmap_lock is held as an
@@ -69,40 +77,41 @@ static struct pidmap pidmap_array[PIDMAP_ENTRIES] =
69 * irq handlers that take it we can leave the interrupts enabled. 77 * irq handlers that take it we can leave the interrupts enabled.
70 * For now it is easier to be safe than to prove it can't happen. 78 * For now it is easier to be safe than to prove it can't happen.
71 */ 79 */
80
72static __cacheline_aligned_in_smp DEFINE_SPINLOCK(pidmap_lock); 81static __cacheline_aligned_in_smp DEFINE_SPINLOCK(pidmap_lock);
73 82
74static fastcall void free_pidmap(int pid) 83static fastcall void free_pidmap(struct pspace *pspace, int pid)
75{ 84{
76 struct pidmap *map = pidmap_array + pid / BITS_PER_PAGE; 85 struct pidmap *map = pspace->pidmap + pid / BITS_PER_PAGE;
77 int offset = pid & BITS_PER_PAGE_MASK; 86 int offset = pid & BITS_PER_PAGE_MASK;
78 87
79 clear_bit(offset, map->page); 88 clear_bit(offset, map->page);
80 atomic_inc(&map->nr_free); 89 atomic_inc(&map->nr_free);
81} 90}
82 91
83static int alloc_pidmap(void) 92static int alloc_pidmap(struct pspace *pspace)
84{ 93{
85 int i, offset, max_scan, pid, last = last_pid; 94 int i, offset, max_scan, pid, last = pspace->last_pid;
86 struct pidmap *map; 95 struct pidmap *map;
87 96
88 pid = last + 1; 97 pid = last + 1;
89 if (pid >= pid_max) 98 if (pid >= pid_max)
90 pid = RESERVED_PIDS; 99 pid = RESERVED_PIDS;
91 offset = pid & BITS_PER_PAGE_MASK; 100 offset = pid & BITS_PER_PAGE_MASK;
92 map = &pidmap_array[pid/BITS_PER_PAGE]; 101 map = &pspace->pidmap[pid/BITS_PER_PAGE];
93 max_scan = (pid_max + BITS_PER_PAGE - 1)/BITS_PER_PAGE - !offset; 102 max_scan = (pid_max + BITS_PER_PAGE - 1)/BITS_PER_PAGE - !offset;
94 for (i = 0; i <= max_scan; ++i) { 103 for (i = 0; i <= max_scan; ++i) {
95 if (unlikely(!map->page)) { 104 if (unlikely(!map->page)) {
96 unsigned long page = get_zeroed_page(GFP_KERNEL); 105 void *page = kzalloc(PAGE_SIZE, GFP_KERNEL);
97 /* 106 /*
98 * Free the page if someone raced with us 107 * Free the page if someone raced with us
99 * installing it: 108 * installing it:
100 */ 109 */
101 spin_lock_irq(&pidmap_lock); 110 spin_lock_irq(&pidmap_lock);
102 if (map->page) 111 if (map->page)
103 free_page(page); 112 kfree(page);
104 else 113 else
105 map->page = (void *)page; 114 map->page = page;
106 spin_unlock_irq(&pidmap_lock); 115 spin_unlock_irq(&pidmap_lock);
107 if (unlikely(!map->page)) 116 if (unlikely(!map->page))
108 break; 117 break;
@@ -111,11 +120,11 @@ static int alloc_pidmap(void)
111 do { 120 do {
112 if (!test_and_set_bit(offset, map->page)) { 121 if (!test_and_set_bit(offset, map->page)) {
113 atomic_dec(&map->nr_free); 122 atomic_dec(&map->nr_free);
114 last_pid = pid; 123 pspace->last_pid = pid;
115 return pid; 124 return pid;
116 } 125 }
117 offset = find_next_offset(map, offset); 126 offset = find_next_offset(map, offset);
118 pid = mk_pid(map, offset); 127 pid = mk_pid(pspace, map, offset);
119 /* 128 /*
120 * find_next_offset() found a bit, the pid from it 129 * find_next_offset() found a bit, the pid from it
121 * is in-bounds, and if we fell back to the last 130 * is in-bounds, and if we fell back to the last
@@ -126,16 +135,16 @@ static int alloc_pidmap(void)
126 (i != max_scan || pid < last || 135 (i != max_scan || pid < last ||
127 !((last+1) & BITS_PER_PAGE_MASK))); 136 !((last+1) & BITS_PER_PAGE_MASK)));
128 } 137 }
129 if (map < &pidmap_array[(pid_max-1)/BITS_PER_PAGE]) { 138 if (map < &pspace->pidmap[(pid_max-1)/BITS_PER_PAGE]) {
130 ++map; 139 ++map;
131 offset = 0; 140 offset = 0;
132 } else { 141 } else {
133 map = &pidmap_array[0]; 142 map = &pspace->pidmap[0];
134 offset = RESERVED_PIDS; 143 offset = RESERVED_PIDS;
135 if (unlikely(last == offset)) 144 if (unlikely(last == offset))
136 break; 145 break;
137 } 146 }
138 pid = mk_pid(map, offset); 147 pid = mk_pid(pspace, map, offset);
139 } 148 }
140 return -1; 149 return -1;
141} 150}
@@ -182,7 +191,7 @@ fastcall void free_pid(struct pid *pid)
182 hlist_del_rcu(&pid->pid_chain); 191 hlist_del_rcu(&pid->pid_chain);
183 spin_unlock_irqrestore(&pidmap_lock, flags); 192 spin_unlock_irqrestore(&pidmap_lock, flags);
184 193
185 free_pidmap(pid->nr); 194 free_pidmap(&init_pspace, pid->nr);
186 call_rcu(&pid->rcu, delayed_put_pid); 195 call_rcu(&pid->rcu, delayed_put_pid);
187} 196}
188 197
@@ -196,7 +205,7 @@ struct pid *alloc_pid(void)
196 if (!pid) 205 if (!pid)
197 goto out; 206 goto out;
198 207
199 nr = alloc_pidmap(); 208 nr = alloc_pidmap(&init_pspace);
200 if (nr < 0) 209 if (nr < 0)
201 goto out_free; 210 goto out_free;
202 211
@@ -363,10 +372,10 @@ void __init pidhash_init(void)
363 372
364void __init pidmap_init(void) 373void __init pidmap_init(void)
365{ 374{
366 pidmap_array->page = (void *)get_zeroed_page(GFP_KERNEL); 375 init_pspace.pidmap[0].page = kzalloc(PAGE_SIZE, GFP_KERNEL);
367 /* Reserve PID 0. We never call free_pidmap(0) */ 376 /* Reserve PID 0. We never call free_pidmap(0) */
368 set_bit(0, pidmap_array->page); 377 set_bit(0, init_pspace.pidmap[0].page);
369 atomic_dec(&pidmap_array->nr_free); 378 atomic_dec(&init_pspace.pidmap[0].nr_free);
370 379
371 pid_cachep = kmem_cache_create("pid", sizeof(struct pid), 380 pid_cachep = kmem_cache_create("pid", sizeof(struct pid),
372 __alignof__(struct pid), 381 __alignof__(struct pid),