diff options
Diffstat (limited to 'kernel/pid.c')
| -rw-r--r-- | kernel/pid.c | 212 |
1 files changed, 146 insertions, 66 deletions
diff --git a/kernel/pid.c b/kernel/pid.c index a9f2dfd006d2..eeb836b65ca4 100644 --- a/kernel/pid.c +++ b/kernel/pid.c | |||
| @@ -28,8 +28,9 @@ | |||
| 28 | #include <linux/hash.h> | 28 | #include <linux/hash.h> |
| 29 | 29 | ||
| 30 | #define pid_hashfn(nr) hash_long((unsigned long)nr, pidhash_shift) | 30 | #define pid_hashfn(nr) hash_long((unsigned long)nr, pidhash_shift) |
| 31 | static struct hlist_head *pid_hash[PIDTYPE_MAX]; | 31 | static struct hlist_head *pid_hash; |
| 32 | static int pidhash_shift; | 32 | static int pidhash_shift; |
| 33 | static kmem_cache_t *pid_cachep; | ||
| 33 | 34 | ||
| 34 | int pid_max = PID_MAX_DEFAULT; | 35 | int pid_max = PID_MAX_DEFAULT; |
| 35 | int last_pid; | 36 | int last_pid; |
| @@ -60,9 +61,22 @@ typedef struct pidmap { | |||
| 60 | static pidmap_t pidmap_array[PIDMAP_ENTRIES] = | 61 | static pidmap_t pidmap_array[PIDMAP_ENTRIES] = |
| 61 | { [ 0 ... PIDMAP_ENTRIES-1 ] = { ATOMIC_INIT(BITS_PER_PAGE), NULL } }; | 62 | { [ 0 ... PIDMAP_ENTRIES-1 ] = { ATOMIC_INIT(BITS_PER_PAGE), NULL } }; |
| 62 | 63 | ||
| 64 | /* | ||
| 65 | * Note: disable interrupts while the pidmap_lock is held as an | ||
| 66 | * interrupt might come in and do read_lock(&tasklist_lock). | ||
| 67 | * | ||
| 68 | * If we don't disable interrupts there is a nasty deadlock between | ||
| 69 | * detach_pid()->free_pid() and another cpu that does | ||
| 70 | * spin_lock(&pidmap_lock) followed by an interrupt routine that does | ||
| 71 | * read_lock(&tasklist_lock); | ||
| 72 | * | ||
| 73 | * After we clean up the tasklist_lock and know there are no | ||
| 74 | * irq handlers that take it we can leave the interrupts enabled. | ||
| 75 | * For now it is easier to be safe than to prove it can't happen. | ||
| 76 | */ | ||
| 63 | static __cacheline_aligned_in_smp DEFINE_SPINLOCK(pidmap_lock); | 77 | static __cacheline_aligned_in_smp DEFINE_SPINLOCK(pidmap_lock); |
| 64 | 78 | ||
| 65 | fastcall void free_pidmap(int pid) | 79 | static fastcall void free_pidmap(int pid) |
| 66 | { | 80 | { |
| 67 | pidmap_t *map = pidmap_array + pid / BITS_PER_PAGE; | 81 | pidmap_t *map = pidmap_array + pid / BITS_PER_PAGE; |
| 68 | int offset = pid & BITS_PER_PAGE_MASK; | 82 | int offset = pid & BITS_PER_PAGE_MASK; |
| @@ -71,7 +85,7 @@ fastcall void free_pidmap(int pid) | |||
| 71 | atomic_inc(&map->nr_free); | 85 | atomic_inc(&map->nr_free); |
| 72 | } | 86 | } |
| 73 | 87 | ||
| 74 | int alloc_pidmap(void) | 88 | static int alloc_pidmap(void) |
| 75 | { | 89 | { |
| 76 | int i, offset, max_scan, pid, last = last_pid; | 90 | int i, offset, max_scan, pid, last = last_pid; |
| 77 | pidmap_t *map; | 91 | pidmap_t *map; |
| @@ -89,12 +103,12 @@ int alloc_pidmap(void) | |||
| 89 | * Free the page if someone raced with us | 103 | * Free the page if someone raced with us |
| 90 | * installing it: | 104 | * installing it: |
| 91 | */ | 105 | */ |
| 92 | spin_lock(&pidmap_lock); | 106 | spin_lock_irq(&pidmap_lock); |
| 93 | if (map->page) | 107 | if (map->page) |
| 94 | free_page(page); | 108 | free_page(page); |
| 95 | else | 109 | else |
| 96 | map->page = (void *)page; | 110 | map->page = (void *)page; |
| 97 | spin_unlock(&pidmap_lock); | 111 | spin_unlock_irq(&pidmap_lock); |
| 98 | if (unlikely(!map->page)) | 112 | if (unlikely(!map->page)) |
| 99 | break; | 113 | break; |
| 100 | } | 114 | } |
| @@ -131,13 +145,73 @@ int alloc_pidmap(void) | |||
| 131 | return -1; | 145 | return -1; |
| 132 | } | 146 | } |
| 133 | 147 | ||
| 134 | struct pid * fastcall find_pid(enum pid_type type, int nr) | 148 | fastcall void put_pid(struct pid *pid) |
| 149 | { | ||
| 150 | if (!pid) | ||
| 151 | return; | ||
| 152 | if ((atomic_read(&pid->count) == 1) || | ||
| 153 | atomic_dec_and_test(&pid->count)) | ||
| 154 | kmem_cache_free(pid_cachep, pid); | ||
| 155 | } | ||
| 156 | |||
| 157 | static void delayed_put_pid(struct rcu_head *rhp) | ||
| 158 | { | ||
| 159 | struct pid *pid = container_of(rhp, struct pid, rcu); | ||
| 160 | put_pid(pid); | ||
| 161 | } | ||
| 162 | |||
| 163 | fastcall void free_pid(struct pid *pid) | ||
| 164 | { | ||
| 165 | /* We can be called with write_lock_irq(&tasklist_lock) held */ | ||
| 166 | unsigned long flags; | ||
| 167 | |||
| 168 | spin_lock_irqsave(&pidmap_lock, flags); | ||
| 169 | hlist_del_rcu(&pid->pid_chain); | ||
| 170 | spin_unlock_irqrestore(&pidmap_lock, flags); | ||
| 171 | |||
| 172 | free_pidmap(pid->nr); | ||
| 173 | call_rcu(&pid->rcu, delayed_put_pid); | ||
| 174 | } | ||
| 175 | |||
| 176 | struct pid *alloc_pid(void) | ||
| 177 | { | ||
| 178 | struct pid *pid; | ||
| 179 | enum pid_type type; | ||
| 180 | int nr = -1; | ||
| 181 | |||
| 182 | pid = kmem_cache_alloc(pid_cachep, GFP_KERNEL); | ||
| 183 | if (!pid) | ||
| 184 | goto out; | ||
| 185 | |||
| 186 | nr = alloc_pidmap(); | ||
| 187 | if (nr < 0) | ||
| 188 | goto out_free; | ||
| 189 | |||
| 190 | atomic_set(&pid->count, 1); | ||
| 191 | pid->nr = nr; | ||
| 192 | for (type = 0; type < PIDTYPE_MAX; ++type) | ||
| 193 | INIT_HLIST_HEAD(&pid->tasks[type]); | ||
| 194 | |||
| 195 | spin_lock_irq(&pidmap_lock); | ||
| 196 | hlist_add_head_rcu(&pid->pid_chain, &pid_hash[pid_hashfn(pid->nr)]); | ||
| 197 | spin_unlock_irq(&pidmap_lock); | ||
| 198 | |||
| 199 | out: | ||
| 200 | return pid; | ||
| 201 | |||
| 202 | out_free: | ||
| 203 | kmem_cache_free(pid_cachep, pid); | ||
| 204 | pid = NULL; | ||
| 205 | goto out; | ||
| 206 | } | ||
| 207 | |||
| 208 | struct pid * fastcall find_pid(int nr) | ||
| 135 | { | 209 | { |
| 136 | struct hlist_node *elem; | 210 | struct hlist_node *elem; |
| 137 | struct pid *pid; | 211 | struct pid *pid; |
| 138 | 212 | ||
| 139 | hlist_for_each_entry_rcu(pid, elem, | 213 | hlist_for_each_entry_rcu(pid, elem, |
| 140 | &pid_hash[type][pid_hashfn(nr)], pid_chain) { | 214 | &pid_hash[pid_hashfn(nr)], pid_chain) { |
| 141 | if (pid->nr == nr) | 215 | if (pid->nr == nr) |
| 142 | return pid; | 216 | return pid; |
| 143 | } | 217 | } |
| @@ -146,77 +220,82 @@ struct pid * fastcall find_pid(enum pid_type type, int nr) | |||
| 146 | 220 | ||
| 147 | int fastcall attach_pid(task_t *task, enum pid_type type, int nr) | 221 | int fastcall attach_pid(task_t *task, enum pid_type type, int nr) |
| 148 | { | 222 | { |
| 149 | struct pid *pid, *task_pid; | 223 | struct pid_link *link; |
| 150 | 224 | struct pid *pid; | |
| 151 | task_pid = &task->pids[type]; | 225 | |
| 152 | pid = find_pid(type, nr); | 226 | WARN_ON(!task->pid); /* to be removed soon */ |
| 153 | task_pid->nr = nr; | 227 | WARN_ON(!nr); /* to be removed soon */ |
| 154 | if (pid == NULL) { | 228 | |
| 155 | INIT_LIST_HEAD(&task_pid->pid_list); | 229 | link = &task->pids[type]; |
| 156 | hlist_add_head_rcu(&task_pid->pid_chain, | 230 | link->pid = pid = find_pid(nr); |
| 157 | &pid_hash[type][pid_hashfn(nr)]); | 231 | hlist_add_head_rcu(&link->node, &pid->tasks[type]); |
| 158 | } else { | ||
| 159 | INIT_HLIST_NODE(&task_pid->pid_chain); | ||
| 160 | list_add_tail_rcu(&task_pid->pid_list, &pid->pid_list); | ||
| 161 | } | ||
| 162 | 232 | ||
| 163 | return 0; | 233 | return 0; |
| 164 | } | 234 | } |
| 165 | 235 | ||
| 166 | static fastcall int __detach_pid(task_t *task, enum pid_type type) | 236 | void fastcall detach_pid(task_t *task, enum pid_type type) |
| 167 | { | 237 | { |
| 168 | struct pid *pid, *pid_next; | 238 | struct pid_link *link; |
| 169 | int nr = 0; | 239 | struct pid *pid; |
| 240 | int tmp; | ||
| 170 | 241 | ||
| 171 | pid = &task->pids[type]; | 242 | link = &task->pids[type]; |
| 172 | if (!hlist_unhashed(&pid->pid_chain)) { | 243 | pid = link->pid; |
| 173 | 244 | ||
| 174 | if (list_empty(&pid->pid_list)) { | 245 | hlist_del_rcu(&link->node); |
| 175 | nr = pid->nr; | 246 | link->pid = NULL; |
| 176 | hlist_del_rcu(&pid->pid_chain); | ||
| 177 | } else { | ||
| 178 | pid_next = list_entry(pid->pid_list.next, | ||
| 179 | struct pid, pid_list); | ||
| 180 | /* insert next pid from pid_list to hash */ | ||
| 181 | hlist_replace_rcu(&pid->pid_chain, | ||
| 182 | &pid_next->pid_chain); | ||
| 183 | } | ||
| 184 | } | ||
| 185 | 247 | ||
| 186 | list_del_rcu(&pid->pid_list); | 248 | for (tmp = PIDTYPE_MAX; --tmp >= 0; ) |
| 187 | pid->nr = 0; | 249 | if (!hlist_empty(&pid->tasks[tmp])) |
| 250 | return; | ||
| 188 | 251 | ||
| 189 | return nr; | 252 | free_pid(pid); |
| 190 | } | 253 | } |
| 191 | 254 | ||
| 192 | void fastcall detach_pid(task_t *task, enum pid_type type) | 255 | struct task_struct * fastcall pid_task(struct pid *pid, enum pid_type type) |
| 193 | { | 256 | { |
| 194 | int tmp, nr; | 257 | struct task_struct *result = NULL; |
| 258 | if (pid) { | ||
| 259 | struct hlist_node *first; | ||
| 260 | first = rcu_dereference(pid->tasks[type].first); | ||
| 261 | if (first) | ||
| 262 | result = hlist_entry(first, struct task_struct, pids[(type)].node); | ||
| 263 | } | ||
| 264 | return result; | ||
| 265 | } | ||
| 195 | 266 | ||
| 196 | nr = __detach_pid(task, type); | 267 | /* |
| 197 | if (!nr) | 268 | * Must be called under rcu_read_lock() or with tasklist_lock read-held. |
| 198 | return; | 269 | */ |
| 270 | task_t *find_task_by_pid_type(int type, int nr) | ||
| 271 | { | ||
| 272 | return pid_task(find_pid(nr), type); | ||
| 273 | } | ||
| 199 | 274 | ||
| 200 | for (tmp = PIDTYPE_MAX; --tmp >= 0; ) | 275 | EXPORT_SYMBOL(find_task_by_pid_type); |
| 201 | if (tmp != type && find_pid(tmp, nr)) | ||
| 202 | return; | ||
| 203 | 276 | ||
| 204 | free_pidmap(nr); | 277 | struct task_struct *fastcall get_pid_task(struct pid *pid, enum pid_type type) |
| 278 | { | ||
| 279 | struct task_struct *result; | ||
| 280 | rcu_read_lock(); | ||
| 281 | result = pid_task(pid, type); | ||
| 282 | if (result) | ||
| 283 | get_task_struct(result); | ||
| 284 | rcu_read_unlock(); | ||
| 285 | return result; | ||
| 205 | } | 286 | } |
| 206 | 287 | ||
| 207 | task_t *find_task_by_pid_type(int type, int nr) | 288 | struct pid *find_get_pid(pid_t nr) |
| 208 | { | 289 | { |
| 209 | struct pid *pid; | 290 | struct pid *pid; |
| 210 | 291 | ||
| 211 | pid = find_pid(type, nr); | 292 | rcu_read_lock(); |
| 212 | if (!pid) | 293 | pid = get_pid(find_pid(nr)); |
| 213 | return NULL; | 294 | rcu_read_unlock(); |
| 214 | 295 | ||
| 215 | return pid_task(&pid->pid_list, type); | 296 | return pid; |
| 216 | } | 297 | } |
| 217 | 298 | ||
| 218 | EXPORT_SYMBOL(find_task_by_pid_type); | ||
| 219 | |||
| 220 | /* | 299 | /* |
| 221 | * The pid hash table is scaled according to the amount of memory in the | 300 | * The pid hash table is scaled according to the amount of memory in the |
| 222 | * machine. From a minimum of 16 slots up to 4096 slots at one gigabyte or | 301 | * machine. From a minimum of 16 slots up to 4096 slots at one gigabyte or |
| @@ -224,7 +303,7 @@ EXPORT_SYMBOL(find_task_by_pid_type); | |||
| 224 | */ | 303 | */ |
| 225 | void __init pidhash_init(void) | 304 | void __init pidhash_init(void) |
| 226 | { | 305 | { |
| 227 | int i, j, pidhash_size; | 306 | int i, pidhash_size; |
| 228 | unsigned long megabytes = nr_kernel_pages >> (20 - PAGE_SHIFT); | 307 | unsigned long megabytes = nr_kernel_pages >> (20 - PAGE_SHIFT); |
| 229 | 308 | ||
| 230 | pidhash_shift = max(4, fls(megabytes * 4)); | 309 | pidhash_shift = max(4, fls(megabytes * 4)); |
| @@ -233,16 +312,13 @@ void __init pidhash_init(void) | |||
| 233 | 312 | ||
| 234 | printk("PID hash table entries: %d (order: %d, %Zd bytes)\n", | 313 | printk("PID hash table entries: %d (order: %d, %Zd bytes)\n", |
| 235 | pidhash_size, pidhash_shift, | 314 | pidhash_size, pidhash_shift, |
| 236 | PIDTYPE_MAX * pidhash_size * sizeof(struct hlist_head)); | 315 | pidhash_size * sizeof(struct hlist_head)); |
| 237 | 316 | ||
| 238 | for (i = 0; i < PIDTYPE_MAX; i++) { | 317 | pid_hash = alloc_bootmem(pidhash_size * sizeof(*(pid_hash))); |
| 239 | pid_hash[i] = alloc_bootmem(pidhash_size * | 318 | if (!pid_hash) |
| 240 | sizeof(*(pid_hash[i]))); | 319 | panic("Could not alloc pidhash!\n"); |
| 241 | if (!pid_hash[i]) | 320 | for (i = 0; i < pidhash_size; i++) |
| 242 | panic("Could not alloc pidhash!\n"); | 321 | INIT_HLIST_HEAD(&pid_hash[i]); |
| 243 | for (j = 0; j < pidhash_size; j++) | ||
| 244 | INIT_HLIST_HEAD(&pid_hash[i][j]); | ||
| 245 | } | ||
| 246 | } | 322 | } |
| 247 | 323 | ||
| 248 | void __init pidmap_init(void) | 324 | void __init pidmap_init(void) |
| @@ -251,4 +327,8 @@ void __init pidmap_init(void) | |||
| 251 | /* Reserve PID 0. We never call free_pidmap(0) */ | 327 | /* Reserve PID 0. We never call free_pidmap(0) */ |
| 252 | set_bit(0, pidmap_array->page); | 328 | set_bit(0, pidmap_array->page); |
| 253 | atomic_dec(&pidmap_array->nr_free); | 329 | atomic_dec(&pidmap_array->nr_free); |
| 330 | |||
| 331 | pid_cachep = kmem_cache_create("pid", sizeof(struct pid), | ||
| 332 | __alignof__(struct pid), | ||
| 333 | SLAB_PANIC, NULL, NULL); | ||
| 254 | } | 334 | } |
