diff options
Diffstat (limited to 'kernel')
| -rw-r--r-- | kernel/acct.c | 26 | ||||
| -rw-r--r-- | kernel/auditsc.c | 6 | ||||
| -rw-r--r-- | kernel/cpuset.c | 104 | ||||
| -rw-r--r-- | kernel/exit.c | 75 | ||||
| -rw-r--r-- | kernel/fork.c | 83 | ||||
| -rw-r--r-- | kernel/futex.c | 10 | ||||
| -rw-r--r-- | kernel/irq/proc.c | 3 | ||||
| -rw-r--r-- | kernel/kallsyms.c | 16 | ||||
| -rw-r--r-- | kernel/kmod.c | 2 | ||||
| -rw-r--r-- | kernel/lockdep.c | 203 | ||||
| -rw-r--r-- | kernel/module.c | 25 | ||||
| -rw-r--r-- | kernel/mutex.c | 9 | ||||
| -rw-r--r-- | kernel/nsproxy.c | 38 | ||||
| -rw-r--r-- | kernel/pid.c | 75 | ||||
| -rw-r--r-- | kernel/power/Kconfig | 9 | ||||
| -rw-r--r-- | kernel/power/disk.c | 8 | ||||
| -rw-r--r-- | kernel/power/main.c | 2 | ||||
| -rw-r--r-- | kernel/power/process.c | 21 | ||||
| -rw-r--r-- | kernel/relay.c | 8 | ||||
| -rw-r--r-- | kernel/sched.c | 515 | ||||
| -rw-r--r-- | kernel/signal.c | 17 | ||||
| -rw-r--r-- | kernel/sys.c | 23 | ||||
| -rw-r--r-- | kernel/sysctl.c | 390 | ||||
| -rw-r--r-- | kernel/time/clocksource.c | 8 | ||||
| -rw-r--r-- | kernel/timer.c | 162 | ||||
| -rw-r--r-- | kernel/tsacct.c | 9 | ||||
| -rw-r--r-- | kernel/workqueue.c | 21 |
27 files changed, 1132 insertions, 736 deletions
diff --git a/kernel/acct.c b/kernel/acct.c index dc12db8600e7..70d0d88e5554 100644 --- a/kernel/acct.c +++ b/kernel/acct.c | |||
| @@ -118,7 +118,7 @@ static int check_free_space(struct file *file) | |||
| 118 | spin_unlock(&acct_globals.lock); | 118 | spin_unlock(&acct_globals.lock); |
| 119 | 119 | ||
| 120 | /* May block */ | 120 | /* May block */ |
| 121 | if (vfs_statfs(file->f_dentry, &sbuf)) | 121 | if (vfs_statfs(file->f_path.dentry, &sbuf)) |
| 122 | return res; | 122 | return res; |
| 123 | suspend = sbuf.f_blocks * SUSPEND; | 123 | suspend = sbuf.f_blocks * SUSPEND; |
| 124 | resume = sbuf.f_blocks * RESUME; | 124 | resume = sbuf.f_blocks * RESUME; |
| @@ -194,7 +194,7 @@ static void acct_file_reopen(struct file *file) | |||
| 194 | add_timer(&acct_globals.timer); | 194 | add_timer(&acct_globals.timer); |
| 195 | } | 195 | } |
| 196 | if (old_acct) { | 196 | if (old_acct) { |
| 197 | mnt_unpin(old_acct->f_vfsmnt); | 197 | mnt_unpin(old_acct->f_path.mnt); |
| 198 | spin_unlock(&acct_globals.lock); | 198 | spin_unlock(&acct_globals.lock); |
| 199 | do_acct_process(old_acct); | 199 | do_acct_process(old_acct); |
| 200 | filp_close(old_acct, NULL); | 200 | filp_close(old_acct, NULL); |
| @@ -212,7 +212,7 @@ static int acct_on(char *name) | |||
| 212 | if (IS_ERR(file)) | 212 | if (IS_ERR(file)) |
| 213 | return PTR_ERR(file); | 213 | return PTR_ERR(file); |
| 214 | 214 | ||
| 215 | if (!S_ISREG(file->f_dentry->d_inode->i_mode)) { | 215 | if (!S_ISREG(file->f_path.dentry->d_inode->i_mode)) { |
| 216 | filp_close(file, NULL); | 216 | filp_close(file, NULL); |
| 217 | return -EACCES; | 217 | return -EACCES; |
| 218 | } | 218 | } |
| @@ -229,11 +229,11 @@ static int acct_on(char *name) | |||
| 229 | } | 229 | } |
| 230 | 230 | ||
| 231 | spin_lock(&acct_globals.lock); | 231 | spin_lock(&acct_globals.lock); |
| 232 | mnt_pin(file->f_vfsmnt); | 232 | mnt_pin(file->f_path.mnt); |
| 233 | acct_file_reopen(file); | 233 | acct_file_reopen(file); |
| 234 | spin_unlock(&acct_globals.lock); | 234 | spin_unlock(&acct_globals.lock); |
| 235 | 235 | ||
| 236 | mntput(file->f_vfsmnt); /* it's pinned, now give up active reference */ | 236 | mntput(file->f_path.mnt); /* it's pinned, now give up active reference */ |
| 237 | 237 | ||
| 238 | return 0; | 238 | return 0; |
| 239 | } | 239 | } |
| @@ -283,7 +283,7 @@ asmlinkage long sys_acct(const char __user *name) | |||
| 283 | void acct_auto_close_mnt(struct vfsmount *m) | 283 | void acct_auto_close_mnt(struct vfsmount *m) |
| 284 | { | 284 | { |
| 285 | spin_lock(&acct_globals.lock); | 285 | spin_lock(&acct_globals.lock); |
| 286 | if (acct_globals.file && acct_globals.file->f_vfsmnt == m) | 286 | if (acct_globals.file && acct_globals.file->f_path.mnt == m) |
| 287 | acct_file_reopen(NULL); | 287 | acct_file_reopen(NULL); |
| 288 | spin_unlock(&acct_globals.lock); | 288 | spin_unlock(&acct_globals.lock); |
| 289 | } | 289 | } |
| @@ -299,7 +299,7 @@ void acct_auto_close(struct super_block *sb) | |||
| 299 | { | 299 | { |
| 300 | spin_lock(&acct_globals.lock); | 300 | spin_lock(&acct_globals.lock); |
| 301 | if (acct_globals.file && | 301 | if (acct_globals.file && |
| 302 | acct_globals.file->f_vfsmnt->mnt_sb == sb) { | 302 | acct_globals.file->f_path.mnt->mnt_sb == sb) { |
| 303 | acct_file_reopen(NULL); | 303 | acct_file_reopen(NULL); |
| 304 | } | 304 | } |
| 305 | spin_unlock(&acct_globals.lock); | 305 | spin_unlock(&acct_globals.lock); |
| @@ -428,6 +428,7 @@ static void do_acct_process(struct file *file) | |||
| 428 | u64 elapsed; | 428 | u64 elapsed; |
| 429 | u64 run_time; | 429 | u64 run_time; |
| 430 | struct timespec uptime; | 430 | struct timespec uptime; |
| 431 | struct tty_struct *tty; | ||
| 431 | 432 | ||
| 432 | /* | 433 | /* |
| 433 | * First check to see if there is enough free_space to continue | 434 | * First check to see if there is enough free_space to continue |
| @@ -484,16 +485,9 @@ static void do_acct_process(struct file *file) | |||
| 484 | ac.ac_ppid = current->parent->tgid; | 485 | ac.ac_ppid = current->parent->tgid; |
| 485 | #endif | 486 | #endif |
| 486 | 487 | ||
| 487 | mutex_lock(&tty_mutex); | ||
| 488 | /* FIXME: Whoever is responsible for current->signal locking needs | ||
| 489 | to use the same locking all over the kernel and document it */ | ||
| 490 | read_lock(&tasklist_lock); | ||
| 491 | ac.ac_tty = current->signal->tty ? | ||
| 492 | old_encode_dev(tty_devnum(current->signal->tty)) : 0; | ||
| 493 | read_unlock(&tasklist_lock); | ||
| 494 | mutex_unlock(&tty_mutex); | ||
| 495 | |||
| 496 | spin_lock_irq(¤t->sighand->siglock); | 488 | spin_lock_irq(¤t->sighand->siglock); |
| 489 | tty = current->signal->tty; | ||
| 490 | ac.ac_tty = tty ? old_encode_dev(tty_devnum(tty)) : 0; | ||
| 497 | ac.ac_utime = encode_comp_t(jiffies_to_AHZ(cputime_to_jiffies(pacct->ac_utime))); | 491 | ac.ac_utime = encode_comp_t(jiffies_to_AHZ(cputime_to_jiffies(pacct->ac_utime))); |
| 498 | ac.ac_stime = encode_comp_t(jiffies_to_AHZ(cputime_to_jiffies(pacct->ac_stime))); | 492 | ac.ac_stime = encode_comp_t(jiffies_to_AHZ(cputime_to_jiffies(pacct->ac_stime))); |
| 499 | ac.ac_flag = pacct->ac_flag; | 493 | ac.ac_flag = pacct->ac_flag; |
diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 40722e26de98..298897559ca4 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c | |||
| @@ -781,8 +781,8 @@ static void audit_log_task_info(struct audit_buffer *ab, struct task_struct *tsk | |||
| 781 | if ((vma->vm_flags & VM_EXECUTABLE) && | 781 | if ((vma->vm_flags & VM_EXECUTABLE) && |
| 782 | vma->vm_file) { | 782 | vma->vm_file) { |
| 783 | audit_log_d_path(ab, "exe=", | 783 | audit_log_d_path(ab, "exe=", |
| 784 | vma->vm_file->f_dentry, | 784 | vma->vm_file->f_path.dentry, |
| 785 | vma->vm_file->f_vfsmnt); | 785 | vma->vm_file->f_path.mnt); |
| 786 | break; | 786 | break; |
| 787 | } | 787 | } |
| 788 | vma = vma->vm_next; | 788 | vma = vma->vm_next; |
| @@ -826,10 +826,12 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts | |||
| 826 | context->return_code); | 826 | context->return_code); |
| 827 | 827 | ||
| 828 | mutex_lock(&tty_mutex); | 828 | mutex_lock(&tty_mutex); |
| 829 | read_lock(&tasklist_lock); | ||
| 829 | if (tsk->signal && tsk->signal->tty && tsk->signal->tty->name) | 830 | if (tsk->signal && tsk->signal->tty && tsk->signal->tty->name) |
| 830 | tty = tsk->signal->tty->name; | 831 | tty = tsk->signal->tty->name; |
| 831 | else | 832 | else |
| 832 | tty = "(none)"; | 833 | tty = "(none)"; |
| 834 | read_unlock(&tasklist_lock); | ||
| 833 | audit_log_format(ab, | 835 | audit_log_format(ab, |
| 834 | " a0=%lx a1=%lx a2=%lx a3=%lx items=%d" | 836 | " a0=%lx a1=%lx a2=%lx a3=%lx items=%d" |
| 835 | " ppid=%d pid=%d auid=%u uid=%u gid=%u" | 837 | " ppid=%d pid=%d auid=%u uid=%u gid=%u" |
diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 0a6b4d89f9a0..232aed2b10f9 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c | |||
| @@ -413,8 +413,8 @@ static struct file_system_type cpuset_fs_type = { | |||
| 413 | * | 413 | * |
| 414 | * | 414 | * |
| 415 | * When reading/writing to a file: | 415 | * When reading/writing to a file: |
| 416 | * - the cpuset to use in file->f_dentry->d_parent->d_fsdata | 416 | * - the cpuset to use in file->f_path.dentry->d_parent->d_fsdata |
| 417 | * - the 'cftype' of the file is file->f_dentry->d_fsdata | 417 | * - the 'cftype' of the file is file->f_path.dentry->d_fsdata |
| 418 | */ | 418 | */ |
| 419 | 419 | ||
| 420 | struct cftype { | 420 | struct cftype { |
| @@ -1284,8 +1284,8 @@ static ssize_t cpuset_common_file_write(struct file *file, | |||
| 1284 | const char __user *userbuf, | 1284 | const char __user *userbuf, |
| 1285 | size_t nbytes, loff_t *unused_ppos) | 1285 | size_t nbytes, loff_t *unused_ppos) |
| 1286 | { | 1286 | { |
| 1287 | struct cpuset *cs = __d_cs(file->f_dentry->d_parent); | 1287 | struct cpuset *cs = __d_cs(file->f_path.dentry->d_parent); |
| 1288 | struct cftype *cft = __d_cft(file->f_dentry); | 1288 | struct cftype *cft = __d_cft(file->f_path.dentry); |
| 1289 | cpuset_filetype_t type = cft->private; | 1289 | cpuset_filetype_t type = cft->private; |
| 1290 | char *buffer; | 1290 | char *buffer; |
| 1291 | char *pathbuf = NULL; | 1291 | char *pathbuf = NULL; |
| @@ -1367,7 +1367,7 @@ static ssize_t cpuset_file_write(struct file *file, const char __user *buf, | |||
| 1367 | size_t nbytes, loff_t *ppos) | 1367 | size_t nbytes, loff_t *ppos) |
| 1368 | { | 1368 | { |
| 1369 | ssize_t retval = 0; | 1369 | ssize_t retval = 0; |
| 1370 | struct cftype *cft = __d_cft(file->f_dentry); | 1370 | struct cftype *cft = __d_cft(file->f_path.dentry); |
| 1371 | if (!cft) | 1371 | if (!cft) |
| 1372 | return -ENODEV; | 1372 | return -ENODEV; |
| 1373 | 1373 | ||
| @@ -1417,8 +1417,8 @@ static int cpuset_sprintf_memlist(char *page, struct cpuset *cs) | |||
| 1417 | static ssize_t cpuset_common_file_read(struct file *file, char __user *buf, | 1417 | static ssize_t cpuset_common_file_read(struct file *file, char __user *buf, |
| 1418 | size_t nbytes, loff_t *ppos) | 1418 | size_t nbytes, loff_t *ppos) |
| 1419 | { | 1419 | { |
| 1420 | struct cftype *cft = __d_cft(file->f_dentry); | 1420 | struct cftype *cft = __d_cft(file->f_path.dentry); |
| 1421 | struct cpuset *cs = __d_cs(file->f_dentry->d_parent); | 1421 | struct cpuset *cs = __d_cs(file->f_path.dentry->d_parent); |
| 1422 | cpuset_filetype_t type = cft->private; | 1422 | cpuset_filetype_t type = cft->private; |
| 1423 | char *page; | 1423 | char *page; |
| 1424 | ssize_t retval = 0; | 1424 | ssize_t retval = 0; |
| @@ -1476,7 +1476,7 @@ static ssize_t cpuset_file_read(struct file *file, char __user *buf, size_t nbyt | |||
| 1476 | loff_t *ppos) | 1476 | loff_t *ppos) |
| 1477 | { | 1477 | { |
| 1478 | ssize_t retval = 0; | 1478 | ssize_t retval = 0; |
| 1479 | struct cftype *cft = __d_cft(file->f_dentry); | 1479 | struct cftype *cft = __d_cft(file->f_path.dentry); |
| 1480 | if (!cft) | 1480 | if (!cft) |
| 1481 | return -ENODEV; | 1481 | return -ENODEV; |
| 1482 | 1482 | ||
| @@ -1498,7 +1498,7 @@ static int cpuset_file_open(struct inode *inode, struct file *file) | |||
| 1498 | if (err) | 1498 | if (err) |
| 1499 | return err; | 1499 | return err; |
| 1500 | 1500 | ||
| 1501 | cft = __d_cft(file->f_dentry); | 1501 | cft = __d_cft(file->f_path.dentry); |
| 1502 | if (!cft) | 1502 | if (!cft) |
| 1503 | return -ENODEV; | 1503 | return -ENODEV; |
| 1504 | if (cft->open) | 1504 | if (cft->open) |
| @@ -1511,7 +1511,7 @@ static int cpuset_file_open(struct inode *inode, struct file *file) | |||
| 1511 | 1511 | ||
| 1512 | static int cpuset_file_release(struct inode *inode, struct file *file) | 1512 | static int cpuset_file_release(struct inode *inode, struct file *file) |
| 1513 | { | 1513 | { |
| 1514 | struct cftype *cft = __d_cft(file->f_dentry); | 1514 | struct cftype *cft = __d_cft(file->f_path.dentry); |
| 1515 | if (cft->release) | 1515 | if (cft->release) |
| 1516 | return cft->release(inode, file); | 1516 | return cft->release(inode, file); |
| 1517 | return 0; | 1517 | return 0; |
| @@ -1700,7 +1700,7 @@ static int pid_array_to_buf(char *buf, int sz, pid_t *a, int npids) | |||
| 1700 | */ | 1700 | */ |
| 1701 | static int cpuset_tasks_open(struct inode *unused, struct file *file) | 1701 | static int cpuset_tasks_open(struct inode *unused, struct file *file) |
| 1702 | { | 1702 | { |
| 1703 | struct cpuset *cs = __d_cs(file->f_dentry->d_parent); | 1703 | struct cpuset *cs = __d_cs(file->f_path.dentry->d_parent); |
| 1704 | struct ctr_struct *ctr; | 1704 | struct ctr_struct *ctr; |
| 1705 | pid_t *pidarray; | 1705 | pid_t *pidarray; |
| 1706 | int npids; | 1706 | int npids; |
| @@ -2342,32 +2342,48 @@ static const struct cpuset *nearest_exclusive_ancestor(const struct cpuset *cs) | |||
| 2342 | } | 2342 | } |
| 2343 | 2343 | ||
| 2344 | /** | 2344 | /** |
| 2345 | * cpuset_zone_allowed - Can we allocate memory on zone z's memory node? | 2345 | * cpuset_zone_allowed_softwall - Can we allocate on zone z's memory node? |
| 2346 | * @z: is this zone on an allowed node? | 2346 | * @z: is this zone on an allowed node? |
| 2347 | * @gfp_mask: memory allocation flags (we use __GFP_HARDWALL) | 2347 | * @gfp_mask: memory allocation flags |
| 2348 | * | 2348 | * |
| 2349 | * If we're in interrupt, yes, we can always allocate. If zone | 2349 | * If we're in interrupt, yes, we can always allocate. If |
| 2350 | * __GFP_THISNODE is set, yes, we can always allocate. If zone | ||
| 2350 | * z's node is in our tasks mems_allowed, yes. If it's not a | 2351 | * z's node is in our tasks mems_allowed, yes. If it's not a |
| 2351 | * __GFP_HARDWALL request and this zone's nodes is in the nearest | 2352 | * __GFP_HARDWALL request and this zone's nodes is in the nearest |
| 2352 | * mem_exclusive cpuset ancestor to this tasks cpuset, yes. | 2353 | * mem_exclusive cpuset ancestor to this tasks cpuset, yes. |
| 2353 | * Otherwise, no. | 2354 | * Otherwise, no. |
| 2354 | * | 2355 | * |
| 2356 | * If __GFP_HARDWALL is set, cpuset_zone_allowed_softwall() | ||
| 2357 | * reduces to cpuset_zone_allowed_hardwall(). Otherwise, | ||
| 2358 | * cpuset_zone_allowed_softwall() might sleep, and might allow a zone | ||
| 2359 | * from an enclosing cpuset. | ||
| 2360 | * | ||
| 2361 | * cpuset_zone_allowed_hardwall() only handles the simpler case of | ||
| 2362 | * hardwall cpusets, and never sleeps. | ||
| 2363 | * | ||
| 2364 | * The __GFP_THISNODE placement logic is really handled elsewhere, | ||
| 2365 | * by forcibly using a zonelist starting at a specified node, and by | ||
| 2366 | * (in get_page_from_freelist()) refusing to consider the zones for | ||
| 2367 | * any node on the zonelist except the first. By the time any such | ||
| 2368 | * calls get to this routine, we should just shut up and say 'yes'. | ||
| 2369 | * | ||
| 2355 | * GFP_USER allocations are marked with the __GFP_HARDWALL bit, | 2370 | * GFP_USER allocations are marked with the __GFP_HARDWALL bit, |
| 2356 | * and do not allow allocations outside the current tasks cpuset. | 2371 | * and do not allow allocations outside the current tasks cpuset. |
| 2357 | * GFP_KERNEL allocations are not so marked, so can escape to the | 2372 | * GFP_KERNEL allocations are not so marked, so can escape to the |
| 2358 | * nearest mem_exclusive ancestor cpuset. | 2373 | * nearest enclosing mem_exclusive ancestor cpuset. |
| 2359 | * | 2374 | * |
| 2360 | * Scanning up parent cpusets requires callback_mutex. The __alloc_pages() | 2375 | * Scanning up parent cpusets requires callback_mutex. The |
| 2361 | * routine only calls here with __GFP_HARDWALL bit _not_ set if | 2376 | * __alloc_pages() routine only calls here with __GFP_HARDWALL bit |
| 2362 | * it's a GFP_KERNEL allocation, and all nodes in the current tasks | 2377 | * _not_ set if it's a GFP_KERNEL allocation, and all nodes in the |
| 2363 | * mems_allowed came up empty on the first pass over the zonelist. | 2378 | * current tasks mems_allowed came up empty on the first pass over |
| 2364 | * So only GFP_KERNEL allocations, if all nodes in the cpuset are | 2379 | * the zonelist. So only GFP_KERNEL allocations, if all nodes in the |
| 2365 | * short of memory, might require taking the callback_mutex mutex. | 2380 | * cpuset are short of memory, might require taking the callback_mutex |
| 2381 | * mutex. | ||
| 2366 | * | 2382 | * |
| 2367 | * The first call here from mm/page_alloc:get_page_from_freelist() | 2383 | * The first call here from mm/page_alloc:get_page_from_freelist() |
| 2368 | * has __GFP_HARDWALL set in gfp_mask, enforcing hardwall cpusets, so | 2384 | * has __GFP_HARDWALL set in gfp_mask, enforcing hardwall cpusets, |
| 2369 | * no allocation on a node outside the cpuset is allowed (unless in | 2385 | * so no allocation on a node outside the cpuset is allowed (unless |
| 2370 | * interrupt, of course). | 2386 | * in interrupt, of course). |
| 2371 | * | 2387 | * |
| 2372 | * The second pass through get_page_from_freelist() doesn't even call | 2388 | * The second pass through get_page_from_freelist() doesn't even call |
| 2373 | * here for GFP_ATOMIC calls. For those calls, the __alloc_pages() | 2389 | * here for GFP_ATOMIC calls. For those calls, the __alloc_pages() |
| @@ -2380,12 +2396,12 @@ static const struct cpuset *nearest_exclusive_ancestor(const struct cpuset *cs) | |||
| 2380 | * GFP_USER - only nodes in current tasks mems allowed ok. | 2396 | * GFP_USER - only nodes in current tasks mems allowed ok. |
| 2381 | * | 2397 | * |
| 2382 | * Rule: | 2398 | * Rule: |
| 2383 | * Don't call cpuset_zone_allowed() if you can't sleep, unless you | 2399 | * Don't call cpuset_zone_allowed_softwall if you can't sleep, unless you |
| 2384 | * pass in the __GFP_HARDWALL flag set in gfp_flag, which disables | 2400 | * pass in the __GFP_HARDWALL flag set in gfp_flag, which disables |
| 2385 | * the code that might scan up ancestor cpusets and sleep. | 2401 | * the code that might scan up ancestor cpusets and sleep. |
| 2386 | **/ | 2402 | */ |
| 2387 | 2403 | ||
| 2388 | int __cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask) | 2404 | int __cpuset_zone_allowed_softwall(struct zone *z, gfp_t gfp_mask) |
| 2389 | { | 2405 | { |
| 2390 | int node; /* node that zone z is on */ | 2406 | int node; /* node that zone z is on */ |
| 2391 | const struct cpuset *cs; /* current cpuset ancestors */ | 2407 | const struct cpuset *cs; /* current cpuset ancestors */ |
| @@ -2415,6 +2431,40 @@ int __cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask) | |||
| 2415 | return allowed; | 2431 | return allowed; |
| 2416 | } | 2432 | } |
| 2417 | 2433 | ||
| 2434 | /* | ||
| 2435 | * cpuset_zone_allowed_hardwall - Can we allocate on zone z's memory node? | ||
| 2436 | * @z: is this zone on an allowed node? | ||
| 2437 | * @gfp_mask: memory allocation flags | ||
| 2438 | * | ||
| 2439 | * If we're in interrupt, yes, we can always allocate. | ||
| 2440 | * If __GFP_THISNODE is set, yes, we can always allocate. If zone | ||
| 2441 | * z's node is in our tasks mems_allowed, yes. Otherwise, no. | ||
| 2442 | * | ||
| 2443 | * The __GFP_THISNODE placement logic is really handled elsewhere, | ||
| 2444 | * by forcibly using a zonelist starting at a specified node, and by | ||
| 2445 | * (in get_page_from_freelist()) refusing to consider the zones for | ||
| 2446 | * any node on the zonelist except the first. By the time any such | ||
| 2447 | * calls get to this routine, we should just shut up and say 'yes'. | ||
| 2448 | * | ||
| 2449 | * Unlike the cpuset_zone_allowed_softwall() variant, above, | ||
| 2450 | * this variant requires that the zone be in the current tasks | ||
| 2451 | * mems_allowed or that we're in interrupt. It does not scan up the | ||
| 2452 | * cpuset hierarchy for the nearest enclosing mem_exclusive cpuset. | ||
| 2453 | * It never sleeps. | ||
| 2454 | */ | ||
| 2455 | |||
| 2456 | int __cpuset_zone_allowed_hardwall(struct zone *z, gfp_t gfp_mask) | ||
| 2457 | { | ||
| 2458 | int node; /* node that zone z is on */ | ||
| 2459 | |||
| 2460 | if (in_interrupt() || (gfp_mask & __GFP_THISNODE)) | ||
| 2461 | return 1; | ||
| 2462 | node = zone_to_nid(z); | ||
| 2463 | if (node_isset(node, current->mems_allowed)) | ||
| 2464 | return 1; | ||
| 2465 | return 0; | ||
| 2466 | } | ||
| 2467 | |||
| 2418 | /** | 2468 | /** |
| 2419 | * cpuset_lock - lock out any changes to cpuset structures | 2469 | * cpuset_lock - lock out any changes to cpuset structures |
| 2420 | * | 2470 | * |
diff --git a/kernel/exit.c b/kernel/exit.c index 4e3f919edc48..122fadb972fc 100644 --- a/kernel/exit.c +++ b/kernel/exit.c | |||
| @@ -13,7 +13,7 @@ | |||
| 13 | #include <linux/completion.h> | 13 | #include <linux/completion.h> |
| 14 | #include <linux/personality.h> | 14 | #include <linux/personality.h> |
| 15 | #include <linux/tty.h> | 15 | #include <linux/tty.h> |
| 16 | #include <linux/namespace.h> | 16 | #include <linux/mnt_namespace.h> |
| 17 | #include <linux/key.h> | 17 | #include <linux/key.h> |
| 18 | #include <linux/security.h> | 18 | #include <linux/security.h> |
| 19 | #include <linux/cpu.h> | 19 | #include <linux/cpu.h> |
| @@ -22,6 +22,7 @@ | |||
| 22 | #include <linux/file.h> | 22 | #include <linux/file.h> |
| 23 | #include <linux/binfmts.h> | 23 | #include <linux/binfmts.h> |
| 24 | #include <linux/nsproxy.h> | 24 | #include <linux/nsproxy.h> |
| 25 | #include <linux/pid_namespace.h> | ||
| 25 | #include <linux/ptrace.h> | 26 | #include <linux/ptrace.h> |
| 26 | #include <linux/profile.h> | 27 | #include <linux/profile.h> |
| 27 | #include <linux/mount.h> | 28 | #include <linux/mount.h> |
| @@ -48,7 +49,6 @@ | |||
| 48 | #include <asm/mmu_context.h> | 49 | #include <asm/mmu_context.h> |
| 49 | 50 | ||
| 50 | extern void sem_exit (void); | 51 | extern void sem_exit (void); |
| 51 | extern struct task_struct *child_reaper; | ||
| 52 | 52 | ||
| 53 | static void exit_mm(struct task_struct * tsk); | 53 | static void exit_mm(struct task_struct * tsk); |
| 54 | 54 | ||
| @@ -189,21 +189,18 @@ repeat: | |||
| 189 | int session_of_pgrp(int pgrp) | 189 | int session_of_pgrp(int pgrp) |
| 190 | { | 190 | { |
| 191 | struct task_struct *p; | 191 | struct task_struct *p; |
| 192 | int sid = -1; | 192 | int sid = 0; |
| 193 | 193 | ||
| 194 | read_lock(&tasklist_lock); | 194 | read_lock(&tasklist_lock); |
| 195 | do_each_task_pid(pgrp, PIDTYPE_PGID, p) { | 195 | |
| 196 | if (p->signal->session > 0) { | 196 | p = find_task_by_pid_type(PIDTYPE_PGID, pgrp); |
| 197 | sid = p->signal->session; | 197 | if (p == NULL) |
| 198 | goto out; | 198 | p = find_task_by_pid(pgrp); |
| 199 | } | 199 | if (p != NULL) |
| 200 | } while_each_task_pid(pgrp, PIDTYPE_PGID, p); | 200 | sid = process_session(p); |
| 201 | p = find_task_by_pid(pgrp); | 201 | |
| 202 | if (p) | ||
| 203 | sid = p->signal->session; | ||
| 204 | out: | ||
| 205 | read_unlock(&tasklist_lock); | 202 | read_unlock(&tasklist_lock); |
| 206 | 203 | ||
| 207 | return sid; | 204 | return sid; |
| 208 | } | 205 | } |
| 209 | 206 | ||
| @@ -225,8 +222,8 @@ static int will_become_orphaned_pgrp(int pgrp, struct task_struct *ignored_task) | |||
| 225 | || p->exit_state | 222 | || p->exit_state |
| 226 | || is_init(p->real_parent)) | 223 | || is_init(p->real_parent)) |
| 227 | continue; | 224 | continue; |
| 228 | if (process_group(p->real_parent) != pgrp | 225 | if (process_group(p->real_parent) != pgrp && |
| 229 | && p->real_parent->signal->session == p->signal->session) { | 226 | process_session(p->real_parent) == process_session(p)) { |
| 230 | ret = 0; | 227 | ret = 0; |
| 231 | break; | 228 | break; |
| 232 | } | 229 | } |
| @@ -260,7 +257,8 @@ static int has_stopped_jobs(int pgrp) | |||
| 260 | } | 257 | } |
| 261 | 258 | ||
| 262 | /** | 259 | /** |
| 263 | * reparent_to_init - Reparent the calling kernel thread to the init task. | 260 | * reparent_to_init - Reparent the calling kernel thread to the init task |
| 261 | * of the pid space that the thread belongs to. | ||
| 264 | * | 262 | * |
| 265 | * If a kernel thread is launched as a result of a system call, or if | 263 | * If a kernel thread is launched as a result of a system call, or if |
| 266 | * it ever exits, it should generally reparent itself to init so that | 264 | * it ever exits, it should generally reparent itself to init so that |
| @@ -278,8 +276,8 @@ static void reparent_to_init(void) | |||
| 278 | ptrace_unlink(current); | 276 | ptrace_unlink(current); |
| 279 | /* Reparent to init */ | 277 | /* Reparent to init */ |
| 280 | remove_parent(current); | 278 | remove_parent(current); |
| 281 | current->parent = child_reaper; | 279 | current->parent = child_reaper(current); |
| 282 | current->real_parent = child_reaper; | 280 | current->real_parent = child_reaper(current); |
| 283 | add_parent(current); | 281 | add_parent(current); |
| 284 | 282 | ||
| 285 | /* Set the exit signal to SIGCHLD so we signal init on exit */ | 283 | /* Set the exit signal to SIGCHLD so we signal init on exit */ |
| @@ -302,9 +300,9 @@ void __set_special_pids(pid_t session, pid_t pgrp) | |||
| 302 | { | 300 | { |
| 303 | struct task_struct *curr = current->group_leader; | 301 | struct task_struct *curr = current->group_leader; |
| 304 | 302 | ||
| 305 | if (curr->signal->session != session) { | 303 | if (process_session(curr) != session) { |
| 306 | detach_pid(curr, PIDTYPE_SID); | 304 | detach_pid(curr, PIDTYPE_SID); |
| 307 | curr->signal->session = session; | 305 | set_signal_session(curr->signal, session); |
| 308 | attach_pid(curr, PIDTYPE_SID, session); | 306 | attach_pid(curr, PIDTYPE_SID, session); |
| 309 | } | 307 | } |
| 310 | if (process_group(curr) != pgrp) { | 308 | if (process_group(curr) != pgrp) { |
| @@ -314,7 +312,7 @@ void __set_special_pids(pid_t session, pid_t pgrp) | |||
| 314 | } | 312 | } |
| 315 | } | 313 | } |
| 316 | 314 | ||
| 317 | void set_special_pids(pid_t session, pid_t pgrp) | 315 | static void set_special_pids(pid_t session, pid_t pgrp) |
| 318 | { | 316 | { |
| 319 | write_lock_irq(&tasklist_lock); | 317 | write_lock_irq(&tasklist_lock); |
| 320 | __set_special_pids(session, pgrp); | 318 | __set_special_pids(session, pgrp); |
| @@ -384,9 +382,7 @@ void daemonize(const char *name, ...) | |||
| 384 | exit_mm(current); | 382 | exit_mm(current); |
| 385 | 383 | ||
| 386 | set_special_pids(1, 1); | 384 | set_special_pids(1, 1); |
| 387 | mutex_lock(&tty_mutex); | 385 | proc_clear_tty(current); |
| 388 | current->signal->tty = NULL; | ||
| 389 | mutex_unlock(&tty_mutex); | ||
| 390 | 386 | ||
| 391 | /* Block and flush all signals */ | 387 | /* Block and flush all signals */ |
| 392 | sigfillset(&blocked); | 388 | sigfillset(&blocked); |
| @@ -429,7 +425,7 @@ static void close_files(struct files_struct * files) | |||
| 429 | for (;;) { | 425 | for (;;) { |
| 430 | unsigned long set; | 426 | unsigned long set; |
| 431 | i = j * __NFDBITS; | 427 | i = j * __NFDBITS; |
| 432 | if (i >= fdt->max_fdset || i >= fdt->max_fds) | 428 | if (i >= fdt->max_fds) |
| 433 | break; | 429 | break; |
| 434 | set = fdt->open_fds->fds_bits[j++]; | 430 | set = fdt->open_fds->fds_bits[j++]; |
| 435 | while (set) { | 431 | while (set) { |
| @@ -470,11 +466,9 @@ void fastcall put_files_struct(struct files_struct *files) | |||
| 470 | * you can free files immediately. | 466 | * you can free files immediately. |
| 471 | */ | 467 | */ |
| 472 | fdt = files_fdtable(files); | 468 | fdt = files_fdtable(files); |
| 473 | if (fdt == &files->fdtab) | 469 | if (fdt != &files->fdtab) |
| 474 | fdt->free_files = files; | ||
| 475 | else | ||
| 476 | kmem_cache_free(files_cachep, files); | 470 | kmem_cache_free(files_cachep, files); |
| 477 | free_fdtable(fdt); | 471 | call_rcu(&fdt->rcu, free_fdtable_rcu); |
| 478 | } | 472 | } |
| 479 | } | 473 | } |
| 480 | 474 | ||
| @@ -649,10 +643,11 @@ reparent_thread(struct task_struct *p, struct task_struct *father, int traced) | |||
| 649 | * outside, so the child pgrp is now orphaned. | 643 | * outside, so the child pgrp is now orphaned. |
| 650 | */ | 644 | */ |
| 651 | if ((process_group(p) != process_group(father)) && | 645 | if ((process_group(p) != process_group(father)) && |
| 652 | (p->signal->session == father->signal->session)) { | 646 | (process_session(p) == process_session(father))) { |
| 653 | int pgrp = process_group(p); | 647 | int pgrp = process_group(p); |
| 654 | 648 | ||
| 655 | if (will_become_orphaned_pgrp(pgrp, NULL) && has_stopped_jobs(pgrp)) { | 649 | if (will_become_orphaned_pgrp(pgrp, NULL) && |
| 650 | has_stopped_jobs(pgrp)) { | ||
| 656 | __kill_pg_info(SIGHUP, SEND_SIG_PRIV, pgrp); | 651 | __kill_pg_info(SIGHUP, SEND_SIG_PRIV, pgrp); |
| 657 | __kill_pg_info(SIGCONT, SEND_SIG_PRIV, pgrp); | 652 | __kill_pg_info(SIGCONT, SEND_SIG_PRIV, pgrp); |
| 658 | } | 653 | } |
| @@ -663,7 +658,8 @@ reparent_thread(struct task_struct *p, struct task_struct *father, int traced) | |||
| 663 | * When we die, we re-parent all our children. | 658 | * When we die, we re-parent all our children. |
| 664 | * Try to give them to another thread in our thread | 659 | * Try to give them to another thread in our thread |
| 665 | * group, and if no such member exists, give it to | 660 | * group, and if no such member exists, give it to |
| 666 | * the global child reaper process (ie "init") | 661 | * the child reaper process (ie "init") in our pid |
| 662 | * space. | ||
| 667 | */ | 663 | */ |
| 668 | static void | 664 | static void |
| 669 | forget_original_parent(struct task_struct *father, struct list_head *to_release) | 665 | forget_original_parent(struct task_struct *father, struct list_head *to_release) |
| @@ -674,7 +670,7 @@ forget_original_parent(struct task_struct *father, struct list_head *to_release) | |||
| 674 | do { | 670 | do { |
| 675 | reaper = next_thread(reaper); | 671 | reaper = next_thread(reaper); |
| 676 | if (reaper == father) { | 672 | if (reaper == father) { |
| 677 | reaper = child_reaper; | 673 | reaper = child_reaper(father); |
| 678 | break; | 674 | break; |
| 679 | } | 675 | } |
| 680 | } while (reaper->exit_state); | 676 | } while (reaper->exit_state); |
| @@ -786,7 +782,7 @@ static void exit_notify(struct task_struct *tsk) | |||
| 786 | t = tsk->real_parent; | 782 | t = tsk->real_parent; |
| 787 | 783 | ||
| 788 | if ((process_group(t) != process_group(tsk)) && | 784 | if ((process_group(t) != process_group(tsk)) && |
| 789 | (t->signal->session == tsk->signal->session) && | 785 | (process_session(t) == process_session(tsk)) && |
| 790 | will_become_orphaned_pgrp(process_group(tsk), tsk) && | 786 | will_become_orphaned_pgrp(process_group(tsk), tsk) && |
| 791 | has_stopped_jobs(process_group(tsk))) { | 787 | has_stopped_jobs(process_group(tsk))) { |
| 792 | __kill_pg_info(SIGHUP, SEND_SIG_PRIV, process_group(tsk)); | 788 | __kill_pg_info(SIGHUP, SEND_SIG_PRIV, process_group(tsk)); |
| @@ -860,8 +856,13 @@ fastcall NORET_TYPE void do_exit(long code) | |||
| 860 | panic("Aiee, killing interrupt handler!"); | 856 | panic("Aiee, killing interrupt handler!"); |
| 861 | if (unlikely(!tsk->pid)) | 857 | if (unlikely(!tsk->pid)) |
| 862 | panic("Attempted to kill the idle task!"); | 858 | panic("Attempted to kill the idle task!"); |
| 863 | if (unlikely(tsk == child_reaper)) | 859 | if (unlikely(tsk == child_reaper(tsk))) { |
| 864 | panic("Attempted to kill init!"); | 860 | if (tsk->nsproxy->pid_ns != &init_pid_ns) |
| 861 | tsk->nsproxy->pid_ns->child_reaper = init_pid_ns.child_reaper; | ||
| 862 | else | ||
| 863 | panic("Attempted to kill init!"); | ||
| 864 | } | ||
| 865 | |||
| 865 | 866 | ||
| 866 | if (unlikely(current->ptrace & PT_TRACE_EXIT)) { | 867 | if (unlikely(current->ptrace & PT_TRACE_EXIT)) { |
| 867 | current->ptrace_message = code; | 868 | current->ptrace_message = code; |
diff --git a/kernel/fork.c b/kernel/fork.c index 7f2e31ba33af..fc723e595cd5 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
| @@ -18,7 +18,7 @@ | |||
| 18 | #include <linux/module.h> | 18 | #include <linux/module.h> |
| 19 | #include <linux/vmalloc.h> | 19 | #include <linux/vmalloc.h> |
| 20 | #include <linux/completion.h> | 20 | #include <linux/completion.h> |
| 21 | #include <linux/namespace.h> | 21 | #include <linux/mnt_namespace.h> |
| 22 | #include <linux/personality.h> | 22 | #include <linux/personality.h> |
| 23 | #include <linux/mempolicy.h> | 23 | #include <linux/mempolicy.h> |
| 24 | #include <linux/sem.h> | 24 | #include <linux/sem.h> |
| @@ -36,6 +36,7 @@ | |||
| 36 | #include <linux/syscalls.h> | 36 | #include <linux/syscalls.h> |
| 37 | #include <linux/jiffies.h> | 37 | #include <linux/jiffies.h> |
| 38 | #include <linux/futex.h> | 38 | #include <linux/futex.h> |
| 39 | #include <linux/task_io_accounting_ops.h> | ||
| 39 | #include <linux/rcupdate.h> | 40 | #include <linux/rcupdate.h> |
| 40 | #include <linux/ptrace.h> | 41 | #include <linux/ptrace.h> |
| 41 | #include <linux/mount.h> | 42 | #include <linux/mount.h> |
| @@ -202,7 +203,7 @@ static inline int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) | |||
| 202 | struct mempolicy *pol; | 203 | struct mempolicy *pol; |
| 203 | 204 | ||
| 204 | down_write(&oldmm->mmap_sem); | 205 | down_write(&oldmm->mmap_sem); |
| 205 | flush_cache_mm(oldmm); | 206 | flush_cache_dup_mm(oldmm); |
| 206 | /* | 207 | /* |
| 207 | * Not linked in yet - no deadlock potential: | 208 | * Not linked in yet - no deadlock potential: |
| 208 | */ | 209 | */ |
| @@ -252,7 +253,7 @@ static inline int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) | |||
| 252 | anon_vma_link(tmp); | 253 | anon_vma_link(tmp); |
| 253 | file = tmp->vm_file; | 254 | file = tmp->vm_file; |
| 254 | if (file) { | 255 | if (file) { |
| 255 | struct inode *inode = file->f_dentry->d_inode; | 256 | struct inode *inode = file->f_path.dentry->d_inode; |
| 256 | get_file(file); | 257 | get_file(file); |
| 257 | if (tmp->vm_flags & VM_DENYWRITE) | 258 | if (tmp->vm_flags & VM_DENYWRITE) |
| 258 | atomic_dec(&inode->i_writecount); | 259 | atomic_dec(&inode->i_writecount); |
| @@ -613,7 +614,7 @@ static inline int copy_fs(unsigned long clone_flags, struct task_struct * tsk) | |||
| 613 | 614 | ||
| 614 | static int count_open_files(struct fdtable *fdt) | 615 | static int count_open_files(struct fdtable *fdt) |
| 615 | { | 616 | { |
| 616 | int size = fdt->max_fdset; | 617 | int size = fdt->max_fds; |
| 617 | int i; | 618 | int i; |
| 618 | 619 | ||
| 619 | /* Find the last open fd */ | 620 | /* Find the last open fd */ |
| @@ -640,12 +641,10 @@ static struct files_struct *alloc_files(void) | |||
| 640 | newf->next_fd = 0; | 641 | newf->next_fd = 0; |
| 641 | fdt = &newf->fdtab; | 642 | fdt = &newf->fdtab; |
| 642 | fdt->max_fds = NR_OPEN_DEFAULT; | 643 | fdt->max_fds = NR_OPEN_DEFAULT; |
| 643 | fdt->max_fdset = EMBEDDED_FD_SET_SIZE; | ||
| 644 | fdt->close_on_exec = (fd_set *)&newf->close_on_exec_init; | 644 | fdt->close_on_exec = (fd_set *)&newf->close_on_exec_init; |
| 645 | fdt->open_fds = (fd_set *)&newf->open_fds_init; | 645 | fdt->open_fds = (fd_set *)&newf->open_fds_init; |
| 646 | fdt->fd = &newf->fd_array[0]; | 646 | fdt->fd = &newf->fd_array[0]; |
| 647 | INIT_RCU_HEAD(&fdt->rcu); | 647 | INIT_RCU_HEAD(&fdt->rcu); |
| 648 | fdt->free_files = NULL; | ||
| 649 | fdt->next = NULL; | 648 | fdt->next = NULL; |
| 650 | rcu_assign_pointer(newf->fdt, fdt); | 649 | rcu_assign_pointer(newf->fdt, fdt); |
| 651 | out: | 650 | out: |
| @@ -661,7 +660,7 @@ static struct files_struct *dup_fd(struct files_struct *oldf, int *errorp) | |||
| 661 | { | 660 | { |
| 662 | struct files_struct *newf; | 661 | struct files_struct *newf; |
| 663 | struct file **old_fds, **new_fds; | 662 | struct file **old_fds, **new_fds; |
| 664 | int open_files, size, i, expand; | 663 | int open_files, size, i; |
| 665 | struct fdtable *old_fdt, *new_fdt; | 664 | struct fdtable *old_fdt, *new_fdt; |
| 666 | 665 | ||
| 667 | *errorp = -ENOMEM; | 666 | *errorp = -ENOMEM; |
| @@ -672,25 +671,14 @@ static struct files_struct *dup_fd(struct files_struct *oldf, int *errorp) | |||
| 672 | spin_lock(&oldf->file_lock); | 671 | spin_lock(&oldf->file_lock); |
| 673 | old_fdt = files_fdtable(oldf); | 672 | old_fdt = files_fdtable(oldf); |
| 674 | new_fdt = files_fdtable(newf); | 673 | new_fdt = files_fdtable(newf); |
| 675 | size = old_fdt->max_fdset; | ||
| 676 | open_files = count_open_files(old_fdt); | 674 | open_files = count_open_files(old_fdt); |
| 677 | expand = 0; | ||
| 678 | 675 | ||
| 679 | /* | 676 | /* |
| 680 | * Check whether we need to allocate a larger fd array or fd set. | 677 | * Check whether we need to allocate a larger fd array and fd set. |
| 681 | * Note: we're not a clone task, so the open count won't change. | 678 | * Note: we're not a clone task, so the open count won't change. |
| 682 | */ | 679 | */ |
| 683 | if (open_files > new_fdt->max_fdset) { | ||
| 684 | new_fdt->max_fdset = 0; | ||
| 685 | expand = 1; | ||
| 686 | } | ||
| 687 | if (open_files > new_fdt->max_fds) { | 680 | if (open_files > new_fdt->max_fds) { |
| 688 | new_fdt->max_fds = 0; | 681 | new_fdt->max_fds = 0; |
| 689 | expand = 1; | ||
| 690 | } | ||
| 691 | |||
| 692 | /* if the old fdset gets grown now, we'll only copy up to "size" fds */ | ||
| 693 | if (expand) { | ||
| 694 | spin_unlock(&oldf->file_lock); | 682 | spin_unlock(&oldf->file_lock); |
| 695 | spin_lock(&newf->file_lock); | 683 | spin_lock(&newf->file_lock); |
| 696 | *errorp = expand_files(newf, open_files-1); | 684 | *errorp = expand_files(newf, open_files-1); |
| @@ -710,8 +698,10 @@ static struct files_struct *dup_fd(struct files_struct *oldf, int *errorp) | |||
| 710 | old_fds = old_fdt->fd; | 698 | old_fds = old_fdt->fd; |
| 711 | new_fds = new_fdt->fd; | 699 | new_fds = new_fdt->fd; |
| 712 | 700 | ||
| 713 | memcpy(new_fdt->open_fds->fds_bits, old_fdt->open_fds->fds_bits, open_files/8); | 701 | memcpy(new_fdt->open_fds->fds_bits, |
| 714 | memcpy(new_fdt->close_on_exec->fds_bits, old_fdt->close_on_exec->fds_bits, open_files/8); | 702 | old_fdt->open_fds->fds_bits, open_files/8); |
| 703 | memcpy(new_fdt->close_on_exec->fds_bits, | ||
| 704 | old_fdt->close_on_exec->fds_bits, open_files/8); | ||
| 715 | 705 | ||
| 716 | for (i = open_files; i != 0; i--) { | 706 | for (i = open_files; i != 0; i--) { |
| 717 | struct file *f = *old_fds++; | 707 | struct file *f = *old_fds++; |
| @@ -736,22 +726,19 @@ static struct files_struct *dup_fd(struct files_struct *oldf, int *errorp) | |||
| 736 | /* This is long word aligned thus could use a optimized version */ | 726 | /* This is long word aligned thus could use a optimized version */ |
| 737 | memset(new_fds, 0, size); | 727 | memset(new_fds, 0, size); |
| 738 | 728 | ||
| 739 | if (new_fdt->max_fdset > open_files) { | 729 | if (new_fdt->max_fds > open_files) { |
| 740 | int left = (new_fdt->max_fdset-open_files)/8; | 730 | int left = (new_fdt->max_fds-open_files)/8; |
| 741 | int start = open_files / (8 * sizeof(unsigned long)); | 731 | int start = open_files / (8 * sizeof(unsigned long)); |
| 742 | 732 | ||
| 743 | memset(&new_fdt->open_fds->fds_bits[start], 0, left); | 733 | memset(&new_fdt->open_fds->fds_bits[start], 0, left); |
| 744 | memset(&new_fdt->close_on_exec->fds_bits[start], 0, left); | 734 | memset(&new_fdt->close_on_exec->fds_bits[start], 0, left); |
| 745 | } | 735 | } |
| 746 | 736 | ||
| 747 | out: | ||
| 748 | return newf; | 737 | return newf; |
| 749 | 738 | ||
| 750 | out_release: | 739 | out_release: |
| 751 | free_fdset (new_fdt->close_on_exec, new_fdt->max_fdset); | ||
| 752 | free_fdset (new_fdt->open_fds, new_fdt->max_fdset); | ||
| 753 | free_fd_array(new_fdt->fd, new_fdt->max_fds); | ||
| 754 | kmem_cache_free(files_cachep, newf); | 740 | kmem_cache_free(files_cachep, newf); |
| 741 | out: | ||
| 755 | return NULL; | 742 | return NULL; |
| 756 | } | 743 | } |
| 757 | 744 | ||
| @@ -1055,6 +1042,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
| 1055 | p->wchar = 0; /* I/O counter: bytes written */ | 1042 | p->wchar = 0; /* I/O counter: bytes written */ |
| 1056 | p->syscr = 0; /* I/O counter: read syscalls */ | 1043 | p->syscr = 0; /* I/O counter: read syscalls */ |
| 1057 | p->syscw = 0; /* I/O counter: write syscalls */ | 1044 | p->syscw = 0; /* I/O counter: write syscalls */ |
| 1045 | task_io_accounting_init(p); | ||
| 1058 | acct_clear_integrals(p); | 1046 | acct_clear_integrals(p); |
| 1059 | 1047 | ||
| 1060 | p->it_virt_expires = cputime_zero; | 1048 | p->it_virt_expires = cputime_zero; |
| @@ -1259,9 +1247,9 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
| 1259 | if (thread_group_leader(p)) { | 1247 | if (thread_group_leader(p)) { |
| 1260 | p->signal->tty = current->signal->tty; | 1248 | p->signal->tty = current->signal->tty; |
| 1261 | p->signal->pgrp = process_group(current); | 1249 | p->signal->pgrp = process_group(current); |
| 1262 | p->signal->session = current->signal->session; | 1250 | set_signal_session(p->signal, process_session(current)); |
| 1263 | attach_pid(p, PIDTYPE_PGID, process_group(p)); | 1251 | attach_pid(p, PIDTYPE_PGID, process_group(p)); |
| 1264 | attach_pid(p, PIDTYPE_SID, p->signal->session); | 1252 | attach_pid(p, PIDTYPE_SID, process_session(p)); |
| 1265 | 1253 | ||
| 1266 | list_add_tail_rcu(&p->tasks, &init_task.tasks); | 1254 | list_add_tail_rcu(&p->tasks, &init_task.tasks); |
| 1267 | __get_cpu_var(process_counts)++; | 1255 | __get_cpu_var(process_counts)++; |
| @@ -1525,17 +1513,18 @@ static int unshare_fs(unsigned long unshare_flags, struct fs_struct **new_fsp) | |||
| 1525 | } | 1513 | } |
| 1526 | 1514 | ||
| 1527 | /* | 1515 | /* |
| 1528 | * Unshare the namespace structure if it is being shared | 1516 | * Unshare the mnt_namespace structure if it is being shared |
| 1529 | */ | 1517 | */ |
| 1530 | static int unshare_namespace(unsigned long unshare_flags, struct namespace **new_nsp, struct fs_struct *new_fs) | 1518 | static int unshare_mnt_namespace(unsigned long unshare_flags, |
| 1519 | struct mnt_namespace **new_nsp, struct fs_struct *new_fs) | ||
| 1531 | { | 1520 | { |
| 1532 | struct namespace *ns = current->nsproxy->namespace; | 1521 | struct mnt_namespace *ns = current->nsproxy->mnt_ns; |
| 1533 | 1522 | ||
| 1534 | if ((unshare_flags & CLONE_NEWNS) && ns) { | 1523 | if ((unshare_flags & CLONE_NEWNS) && ns) { |
| 1535 | if (!capable(CAP_SYS_ADMIN)) | 1524 | if (!capable(CAP_SYS_ADMIN)) |
| 1536 | return -EPERM; | 1525 | return -EPERM; |
| 1537 | 1526 | ||
| 1538 | *new_nsp = dup_namespace(current, new_fs ? new_fs : current->fs); | 1527 | *new_nsp = dup_mnt_ns(current, new_fs ? new_fs : current->fs); |
| 1539 | if (!*new_nsp) | 1528 | if (!*new_nsp) |
| 1540 | return -ENOMEM; | 1529 | return -ENOMEM; |
| 1541 | } | 1530 | } |
| @@ -1544,15 +1533,13 @@ static int unshare_namespace(unsigned long unshare_flags, struct namespace **new | |||
| 1544 | } | 1533 | } |
| 1545 | 1534 | ||
| 1546 | /* | 1535 | /* |
| 1547 | * Unsharing of sighand for tasks created with CLONE_SIGHAND is not | 1536 | * Unsharing of sighand is not supported yet |
| 1548 | * supported yet | ||
| 1549 | */ | 1537 | */ |
| 1550 | static int unshare_sighand(unsigned long unshare_flags, struct sighand_struct **new_sighp) | 1538 | static int unshare_sighand(unsigned long unshare_flags, struct sighand_struct **new_sighp) |
| 1551 | { | 1539 | { |
| 1552 | struct sighand_struct *sigh = current->sighand; | 1540 | struct sighand_struct *sigh = current->sighand; |
| 1553 | 1541 | ||
| 1554 | if ((unshare_flags & CLONE_SIGHAND) && | 1542 | if ((unshare_flags & CLONE_SIGHAND) && atomic_read(&sigh->count) > 1) |
| 1555 | (sigh && atomic_read(&sigh->count) > 1)) | ||
| 1556 | return -EINVAL; | 1543 | return -EINVAL; |
| 1557 | else | 1544 | else |
| 1558 | return 0; | 1545 | return 0; |
| @@ -1625,8 +1612,8 @@ asmlinkage long sys_unshare(unsigned long unshare_flags) | |||
| 1625 | { | 1612 | { |
| 1626 | int err = 0; | 1613 | int err = 0; |
| 1627 | struct fs_struct *fs, *new_fs = NULL; | 1614 | struct fs_struct *fs, *new_fs = NULL; |
| 1628 | struct namespace *ns, *new_ns = NULL; | 1615 | struct mnt_namespace *ns, *new_ns = NULL; |
| 1629 | struct sighand_struct *sigh, *new_sigh = NULL; | 1616 | struct sighand_struct *new_sigh = NULL; |
| 1630 | struct mm_struct *mm, *new_mm = NULL, *active_mm = NULL; | 1617 | struct mm_struct *mm, *new_mm = NULL, *active_mm = NULL; |
| 1631 | struct files_struct *fd, *new_fd = NULL; | 1618 | struct files_struct *fd, *new_fd = NULL; |
| 1632 | struct sem_undo_list *new_ulist = NULL; | 1619 | struct sem_undo_list *new_ulist = NULL; |
| @@ -1647,7 +1634,7 @@ asmlinkage long sys_unshare(unsigned long unshare_flags) | |||
| 1647 | goto bad_unshare_out; | 1634 | goto bad_unshare_out; |
| 1648 | if ((err = unshare_fs(unshare_flags, &new_fs))) | 1635 | if ((err = unshare_fs(unshare_flags, &new_fs))) |
| 1649 | goto bad_unshare_cleanup_thread; | 1636 | goto bad_unshare_cleanup_thread; |
| 1650 | if ((err = unshare_namespace(unshare_flags, &new_ns, new_fs))) | 1637 | if ((err = unshare_mnt_namespace(unshare_flags, &new_ns, new_fs))) |
| 1651 | goto bad_unshare_cleanup_fs; | 1638 | goto bad_unshare_cleanup_fs; |
| 1652 | if ((err = unshare_sighand(unshare_flags, &new_sigh))) | 1639 | if ((err = unshare_sighand(unshare_flags, &new_sigh))) |
| 1653 | goto bad_unshare_cleanup_ns; | 1640 | goto bad_unshare_cleanup_ns; |
| @@ -1671,7 +1658,7 @@ asmlinkage long sys_unshare(unsigned long unshare_flags) | |||
| 1671 | } | 1658 | } |
| 1672 | } | 1659 | } |
| 1673 | 1660 | ||
| 1674 | if (new_fs || new_ns || new_sigh || new_mm || new_fd || new_ulist || | 1661 | if (new_fs || new_ns || new_mm || new_fd || new_ulist || |
| 1675 | new_uts || new_ipc) { | 1662 | new_uts || new_ipc) { |
| 1676 | 1663 | ||
| 1677 | task_lock(current); | 1664 | task_lock(current); |
| @@ -1688,17 +1675,11 @@ asmlinkage long sys_unshare(unsigned long unshare_flags) | |||
| 1688 | } | 1675 | } |
| 1689 | 1676 | ||
| 1690 | if (new_ns) { | 1677 | if (new_ns) { |
| 1691 | ns = current->nsproxy->namespace; | 1678 | ns = current->nsproxy->mnt_ns; |
| 1692 | current->nsproxy->namespace = new_ns; | 1679 | current->nsproxy->mnt_ns = new_ns; |
| 1693 | new_ns = ns; | 1680 | new_ns = ns; |
| 1694 | } | 1681 | } |
| 1695 | 1682 | ||
| 1696 | if (new_sigh) { | ||
| 1697 | sigh = current->sighand; | ||
| 1698 | rcu_assign_pointer(current->sighand, new_sigh); | ||
| 1699 | new_sigh = sigh; | ||
| 1700 | } | ||
| 1701 | |||
| 1702 | if (new_mm) { | 1683 | if (new_mm) { |
| 1703 | mm = current->mm; | 1684 | mm = current->mm; |
| 1704 | active_mm = current->active_mm; | 1685 | active_mm = current->active_mm; |
| @@ -1756,7 +1737,7 @@ bad_unshare_cleanup_sigh: | |||
| 1756 | 1737 | ||
| 1757 | bad_unshare_cleanup_ns: | 1738 | bad_unshare_cleanup_ns: |
| 1758 | if (new_ns) | 1739 | if (new_ns) |
| 1759 | put_namespace(new_ns); | 1740 | put_mnt_ns(new_ns); |
| 1760 | 1741 | ||
| 1761 | bad_unshare_cleanup_fs: | 1742 | bad_unshare_cleanup_fs: |
| 1762 | if (new_fs) | 1743 | if (new_fs) |
diff --git a/kernel/futex.c b/kernel/futex.c index 95989a3b4168..5a737de857d3 100644 --- a/kernel/futex.c +++ b/kernel/futex.c | |||
| @@ -166,7 +166,7 @@ static inline int match_futex(union futex_key *key1, union futex_key *key2) | |||
| 166 | /* | 166 | /* |
| 167 | * Get parameters which are the keys for a futex. | 167 | * Get parameters which are the keys for a futex. |
| 168 | * | 168 | * |
| 169 | * For shared mappings, it's (page->index, vma->vm_file->f_dentry->d_inode, | 169 | * For shared mappings, it's (page->index, vma->vm_file->f_path.dentry->d_inode, |
| 170 | * offset_within_page). For private mappings, it's (uaddr, current->mm). | 170 | * offset_within_page). For private mappings, it's (uaddr, current->mm). |
| 171 | * We can usually work out the index without swapping in the page. | 171 | * We can usually work out the index without swapping in the page. |
| 172 | * | 172 | * |
| @@ -223,7 +223,7 @@ static int get_futex_key(u32 __user *uaddr, union futex_key *key) | |||
| 223 | /* | 223 | /* |
| 224 | * Linear file mappings are also simple. | 224 | * Linear file mappings are also simple. |
| 225 | */ | 225 | */ |
| 226 | key->shared.inode = vma->vm_file->f_dentry->d_inode; | 226 | key->shared.inode = vma->vm_file->f_path.dentry->d_inode; |
| 227 | key->both.offset++; /* Bit 0 of offset indicates inode-based key. */ | 227 | key->both.offset++; /* Bit 0 of offset indicates inode-based key. */ |
| 228 | if (likely(!(vma->vm_flags & VM_NONLINEAR))) { | 228 | if (likely(!(vma->vm_flags & VM_NONLINEAR))) { |
| 229 | key->shared.pgoff = (((address - vma->vm_start) >> PAGE_SHIFT) | 229 | key->shared.pgoff = (((address - vma->vm_start) >> PAGE_SHIFT) |
| @@ -1528,9 +1528,9 @@ static int futex_fd(u32 __user *uaddr, int signal) | |||
| 1528 | goto out; | 1528 | goto out; |
| 1529 | } | 1529 | } |
| 1530 | filp->f_op = &futex_fops; | 1530 | filp->f_op = &futex_fops; |
| 1531 | filp->f_vfsmnt = mntget(futex_mnt); | 1531 | filp->f_path.mnt = mntget(futex_mnt); |
| 1532 | filp->f_dentry = dget(futex_mnt->mnt_root); | 1532 | filp->f_path.dentry = dget(futex_mnt->mnt_root); |
| 1533 | filp->f_mapping = filp->f_dentry->d_inode->i_mapping; | 1533 | filp->f_mapping = filp->f_path.dentry->d_inode->i_mapping; |
| 1534 | 1534 | ||
| 1535 | if (signal) { | 1535 | if (signal) { |
| 1536 | err = __f_setown(filp, task_pid(current), PIDTYPE_PID, 1); | 1536 | err = __f_setown(filp, task_pid(current), PIDTYPE_PID, 1); |
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c index 9a352667007c..61f5c717a8f5 100644 --- a/kernel/irq/proc.c +++ b/kernel/irq/proc.c | |||
| @@ -54,7 +54,8 @@ static int irq_affinity_write_proc(struct file *file, const char __user *buffer, | |||
| 54 | unsigned int irq = (int)(long)data, full_count = count, err; | 54 | unsigned int irq = (int)(long)data, full_count = count, err; |
| 55 | cpumask_t new_value, tmp; | 55 | cpumask_t new_value, tmp; |
| 56 | 56 | ||
| 57 | if (!irq_desc[irq].chip->set_affinity || no_irq_affinity) | 57 | if (!irq_desc[irq].chip->set_affinity || no_irq_affinity || |
| 58 | CHECK_IRQ_PER_CPU(irq_desc[irq].status)) | ||
| 58 | return -EIO; | 59 | return -EIO; |
| 59 | 60 | ||
| 60 | err = cpumask_parse_user(buffer, count, new_value); | 61 | err = cpumask_parse_user(buffer, count, new_value); |
diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c index ab63cfc42992..6f294ff4f9ee 100644 --- a/kernel/kallsyms.c +++ b/kernel/kallsyms.c | |||
| @@ -31,14 +31,14 @@ | |||
| 31 | #endif | 31 | #endif |
| 32 | 32 | ||
| 33 | /* These will be re-linked against their real values during the second link stage */ | 33 | /* These will be re-linked against their real values during the second link stage */ |
| 34 | extern unsigned long kallsyms_addresses[] __attribute__((weak)); | 34 | extern const unsigned long kallsyms_addresses[] __attribute__((weak)); |
| 35 | extern unsigned long kallsyms_num_syms __attribute__((weak,section("data"))); | 35 | extern const unsigned long kallsyms_num_syms __attribute__((weak)); |
| 36 | extern u8 kallsyms_names[] __attribute__((weak)); | 36 | extern const u8 kallsyms_names[] __attribute__((weak)); |
| 37 | 37 | ||
| 38 | extern u8 kallsyms_token_table[] __attribute__((weak)); | 38 | extern const u8 kallsyms_token_table[] __attribute__((weak)); |
| 39 | extern u16 kallsyms_token_index[] __attribute__((weak)); | 39 | extern const u16 kallsyms_token_index[] __attribute__((weak)); |
| 40 | 40 | ||
| 41 | extern unsigned long kallsyms_markers[] __attribute__((weak)); | 41 | extern const unsigned long kallsyms_markers[] __attribute__((weak)); |
| 42 | 42 | ||
| 43 | static inline int is_kernel_inittext(unsigned long addr) | 43 | static inline int is_kernel_inittext(unsigned long addr) |
| 44 | { | 44 | { |
| @@ -84,7 +84,7 @@ static int is_ksym_addr(unsigned long addr) | |||
| 84 | static unsigned int kallsyms_expand_symbol(unsigned int off, char *result) | 84 | static unsigned int kallsyms_expand_symbol(unsigned int off, char *result) |
| 85 | { | 85 | { |
| 86 | int len, skipped_first = 0; | 86 | int len, skipped_first = 0; |
| 87 | u8 *tptr, *data; | 87 | const u8 *tptr, *data; |
| 88 | 88 | ||
| 89 | /* get the compressed symbol length from the first symbol byte */ | 89 | /* get the compressed symbol length from the first symbol byte */ |
| 90 | data = &kallsyms_names[off]; | 90 | data = &kallsyms_names[off]; |
| @@ -132,7 +132,7 @@ static char kallsyms_get_symbol_type(unsigned int off) | |||
| 132 | * kallsyms array */ | 132 | * kallsyms array */ |
| 133 | static unsigned int get_symbol_offset(unsigned long pos) | 133 | static unsigned int get_symbol_offset(unsigned long pos) |
| 134 | { | 134 | { |
| 135 | u8 *name; | 135 | const u8 *name; |
| 136 | int i; | 136 | int i; |
| 137 | 137 | ||
| 138 | /* use the closest marker we have. We have markers every 256 positions, | 138 | /* use the closest marker we have. We have markers every 256 positions, |
diff --git a/kernel/kmod.c b/kernel/kmod.c index 8d2bea09a4ec..3a7379aa31ca 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c | |||
| @@ -25,7 +25,7 @@ | |||
| 25 | #include <linux/kmod.h> | 25 | #include <linux/kmod.h> |
| 26 | #include <linux/smp_lock.h> | 26 | #include <linux/smp_lock.h> |
| 27 | #include <linux/slab.h> | 27 | #include <linux/slab.h> |
| 28 | #include <linux/namespace.h> | 28 | #include <linux/mnt_namespace.h> |
| 29 | #include <linux/completion.h> | 29 | #include <linux/completion.h> |
| 30 | #include <linux/file.h> | 30 | #include <linux/file.h> |
| 31 | #include <linux/workqueue.h> | 31 | #include <linux/workqueue.h> |
diff --git a/kernel/lockdep.c b/kernel/lockdep.c index b02032476dc2..01e750559034 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c | |||
| @@ -43,13 +43,49 @@ | |||
| 43 | #include "lockdep_internals.h" | 43 | #include "lockdep_internals.h" |
| 44 | 44 | ||
| 45 | /* | 45 | /* |
| 46 | * hash_lock: protects the lockdep hashes and class/list/hash allocators. | 46 | * lockdep_lock: protects the lockdep graph, the hashes and the |
| 47 | * class/list/hash allocators. | ||
| 47 | * | 48 | * |
| 48 | * This is one of the rare exceptions where it's justified | 49 | * This is one of the rare exceptions where it's justified |
| 49 | * to use a raw spinlock - we really dont want the spinlock | 50 | * to use a raw spinlock - we really dont want the spinlock |
| 50 | * code to recurse back into the lockdep code. | 51 | * code to recurse back into the lockdep code... |
| 51 | */ | 52 | */ |
| 52 | static raw_spinlock_t hash_lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; | 53 | static raw_spinlock_t lockdep_lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; |
| 54 | |||
| 55 | static int graph_lock(void) | ||
| 56 | { | ||
| 57 | __raw_spin_lock(&lockdep_lock); | ||
| 58 | /* | ||
| 59 | * Make sure that if another CPU detected a bug while | ||
| 60 | * walking the graph we dont change it (while the other | ||
| 61 | * CPU is busy printing out stuff with the graph lock | ||
| 62 | * dropped already) | ||
| 63 | */ | ||
| 64 | if (!debug_locks) { | ||
| 65 | __raw_spin_unlock(&lockdep_lock); | ||
| 66 | return 0; | ||
| 67 | } | ||
| 68 | return 1; | ||
| 69 | } | ||
| 70 | |||
| 71 | static inline int graph_unlock(void) | ||
| 72 | { | ||
| 73 | __raw_spin_unlock(&lockdep_lock); | ||
| 74 | return 0; | ||
| 75 | } | ||
| 76 | |||
| 77 | /* | ||
| 78 | * Turn lock debugging off and return with 0 if it was off already, | ||
| 79 | * and also release the graph lock: | ||
| 80 | */ | ||
| 81 | static inline int debug_locks_off_graph_unlock(void) | ||
| 82 | { | ||
| 83 | int ret = debug_locks_off(); | ||
| 84 | |||
| 85 | __raw_spin_unlock(&lockdep_lock); | ||
| 86 | |||
| 87 | return ret; | ||
| 88 | } | ||
| 53 | 89 | ||
| 54 | static int lockdep_initialized; | 90 | static int lockdep_initialized; |
| 55 | 91 | ||
| @@ -57,14 +93,15 @@ unsigned long nr_list_entries; | |||
| 57 | static struct lock_list list_entries[MAX_LOCKDEP_ENTRIES]; | 93 | static struct lock_list list_entries[MAX_LOCKDEP_ENTRIES]; |
| 58 | 94 | ||
| 59 | /* | 95 | /* |
| 60 | * Allocate a lockdep entry. (assumes hash_lock held, returns | 96 | * Allocate a lockdep entry. (assumes the graph_lock held, returns |
| 61 | * with NULL on failure) | 97 | * with NULL on failure) |
| 62 | */ | 98 | */ |
| 63 | static struct lock_list *alloc_list_entry(void) | 99 | static struct lock_list *alloc_list_entry(void) |
| 64 | { | 100 | { |
| 65 | if (nr_list_entries >= MAX_LOCKDEP_ENTRIES) { | 101 | if (nr_list_entries >= MAX_LOCKDEP_ENTRIES) { |
| 66 | __raw_spin_unlock(&hash_lock); | 102 | if (!debug_locks_off_graph_unlock()) |
| 67 | debug_locks_off(); | 103 | return NULL; |
| 104 | |||
| 68 | printk("BUG: MAX_LOCKDEP_ENTRIES too low!\n"); | 105 | printk("BUG: MAX_LOCKDEP_ENTRIES too low!\n"); |
| 69 | printk("turning off the locking correctness validator.\n"); | 106 | printk("turning off the locking correctness validator.\n"); |
| 70 | return NULL; | 107 | return NULL; |
| @@ -145,9 +182,7 @@ EXPORT_SYMBOL(lockdep_on); | |||
| 145 | */ | 182 | */ |
| 146 | 183 | ||
| 147 | #define VERBOSE 0 | 184 | #define VERBOSE 0 |
| 148 | #ifdef VERBOSE | 185 | #define VERY_VERBOSE 0 |
| 149 | # define VERY_VERBOSE 0 | ||
| 150 | #endif | ||
| 151 | 186 | ||
| 152 | #if VERBOSE | 187 | #if VERBOSE |
| 153 | # define HARDIRQ_VERBOSE 1 | 188 | # define HARDIRQ_VERBOSE 1 |
| @@ -172,8 +207,8 @@ static int class_filter(struct lock_class *class) | |||
| 172 | !strcmp(class->name, "&struct->lockfield")) | 207 | !strcmp(class->name, "&struct->lockfield")) |
| 173 | return 1; | 208 | return 1; |
| 174 | #endif | 209 | #endif |
| 175 | /* Allow everything else. 0 would be filter everything else */ | 210 | /* Filter everything else. 1 would be to allow everything else */ |
| 176 | return 1; | 211 | return 0; |
| 177 | } | 212 | } |
| 178 | #endif | 213 | #endif |
| 179 | 214 | ||
| @@ -207,7 +242,7 @@ static int softirq_verbose(struct lock_class *class) | |||
| 207 | 242 | ||
| 208 | /* | 243 | /* |
| 209 | * Stack-trace: tightly packed array of stack backtrace | 244 | * Stack-trace: tightly packed array of stack backtrace |
| 210 | * addresses. Protected by the hash_lock. | 245 | * addresses. Protected by the graph_lock. |
| 211 | */ | 246 | */ |
| 212 | unsigned long nr_stack_trace_entries; | 247 | unsigned long nr_stack_trace_entries; |
| 213 | static unsigned long stack_trace[MAX_STACK_TRACE_ENTRIES]; | 248 | static unsigned long stack_trace[MAX_STACK_TRACE_ENTRIES]; |
| @@ -226,18 +261,15 @@ static int save_trace(struct stack_trace *trace) | |||
| 226 | trace->max_entries = trace->nr_entries; | 261 | trace->max_entries = trace->nr_entries; |
| 227 | 262 | ||
| 228 | nr_stack_trace_entries += trace->nr_entries; | 263 | nr_stack_trace_entries += trace->nr_entries; |
| 229 | if (DEBUG_LOCKS_WARN_ON(nr_stack_trace_entries > MAX_STACK_TRACE_ENTRIES)) { | ||
| 230 | __raw_spin_unlock(&hash_lock); | ||
| 231 | return 0; | ||
| 232 | } | ||
| 233 | 264 | ||
| 234 | if (nr_stack_trace_entries == MAX_STACK_TRACE_ENTRIES) { | 265 | if (nr_stack_trace_entries == MAX_STACK_TRACE_ENTRIES) { |
| 235 | __raw_spin_unlock(&hash_lock); | 266 | if (!debug_locks_off_graph_unlock()) |
| 236 | if (debug_locks_off()) { | 267 | return 0; |
| 237 | printk("BUG: MAX_STACK_TRACE_ENTRIES too low!\n"); | 268 | |
| 238 | printk("turning off the locking correctness validator.\n"); | 269 | printk("BUG: MAX_STACK_TRACE_ENTRIES too low!\n"); |
| 239 | dump_stack(); | 270 | printk("turning off the locking correctness validator.\n"); |
| 240 | } | 271 | dump_stack(); |
| 272 | |||
| 241 | return 0; | 273 | return 0; |
| 242 | } | 274 | } |
| 243 | 275 | ||
| @@ -526,9 +558,7 @@ print_circular_bug_header(struct lock_list *entry, unsigned int depth) | |||
| 526 | { | 558 | { |
| 527 | struct task_struct *curr = current; | 559 | struct task_struct *curr = current; |
| 528 | 560 | ||
| 529 | __raw_spin_unlock(&hash_lock); | 561 | if (!debug_locks_off_graph_unlock() || debug_locks_silent) |
| 530 | debug_locks_off(); | ||
| 531 | if (debug_locks_silent) | ||
| 532 | return 0; | 562 | return 0; |
| 533 | 563 | ||
| 534 | printk("\n=======================================================\n"); | 564 | printk("\n=======================================================\n"); |
| @@ -556,12 +586,10 @@ static noinline int print_circular_bug_tail(void) | |||
| 556 | if (debug_locks_silent) | 586 | if (debug_locks_silent) |
| 557 | return 0; | 587 | return 0; |
| 558 | 588 | ||
| 559 | /* hash_lock unlocked by the header */ | ||
| 560 | __raw_spin_lock(&hash_lock); | ||
| 561 | this.class = check_source->class; | 589 | this.class = check_source->class; |
| 562 | if (!save_trace(&this.trace)) | 590 | if (!save_trace(&this.trace)) |
| 563 | return 0; | 591 | return 0; |
| 564 | __raw_spin_unlock(&hash_lock); | 592 | |
| 565 | print_circular_bug_entry(&this, 0); | 593 | print_circular_bug_entry(&this, 0); |
| 566 | 594 | ||
| 567 | printk("\nother info that might help us debug this:\n\n"); | 595 | printk("\nother info that might help us debug this:\n\n"); |
| @@ -577,8 +605,10 @@ static noinline int print_circular_bug_tail(void) | |||
| 577 | 605 | ||
| 578 | static int noinline print_infinite_recursion_bug(void) | 606 | static int noinline print_infinite_recursion_bug(void) |
| 579 | { | 607 | { |
| 580 | __raw_spin_unlock(&hash_lock); | 608 | if (!debug_locks_off_graph_unlock()) |
| 581 | DEBUG_LOCKS_WARN_ON(1); | 609 | return 0; |
| 610 | |||
| 611 | WARN_ON(1); | ||
| 582 | 612 | ||
| 583 | return 0; | 613 | return 0; |
| 584 | } | 614 | } |
| @@ -713,9 +743,7 @@ print_bad_irq_dependency(struct task_struct *curr, | |||
| 713 | enum lock_usage_bit bit2, | 743 | enum lock_usage_bit bit2, |
| 714 | const char *irqclass) | 744 | const char *irqclass) |
| 715 | { | 745 | { |
| 716 | __raw_spin_unlock(&hash_lock); | 746 | if (!debug_locks_off_graph_unlock() || debug_locks_silent) |
| 717 | debug_locks_off(); | ||
| 718 | if (debug_locks_silent) | ||
| 719 | return 0; | 747 | return 0; |
| 720 | 748 | ||
| 721 | printk("\n======================================================\n"); | 749 | printk("\n======================================================\n"); |
| @@ -796,9 +824,7 @@ static int | |||
| 796 | print_deadlock_bug(struct task_struct *curr, struct held_lock *prev, | 824 | print_deadlock_bug(struct task_struct *curr, struct held_lock *prev, |
| 797 | struct held_lock *next) | 825 | struct held_lock *next) |
| 798 | { | 826 | { |
| 799 | debug_locks_off(); | 827 | if (!debug_locks_off_graph_unlock() || debug_locks_silent) |
| 800 | __raw_spin_unlock(&hash_lock); | ||
| 801 | if (debug_locks_silent) | ||
| 802 | return 0; | 828 | return 0; |
| 803 | 829 | ||
| 804 | printk("\n=============================================\n"); | 830 | printk("\n=============================================\n"); |
| @@ -974,14 +1000,14 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev, | |||
| 974 | * Debugging printouts: | 1000 | * Debugging printouts: |
| 975 | */ | 1001 | */ |
| 976 | if (verbose(prev->class) || verbose(next->class)) { | 1002 | if (verbose(prev->class) || verbose(next->class)) { |
| 977 | __raw_spin_unlock(&hash_lock); | 1003 | graph_unlock(); |
| 978 | printk("\n new dependency: "); | 1004 | printk("\n new dependency: "); |
| 979 | print_lock_name(prev->class); | 1005 | print_lock_name(prev->class); |
| 980 | printk(" => "); | 1006 | printk(" => "); |
| 981 | print_lock_name(next->class); | 1007 | print_lock_name(next->class); |
| 982 | printk("\n"); | 1008 | printk("\n"); |
| 983 | dump_stack(); | 1009 | dump_stack(); |
| 984 | __raw_spin_lock(&hash_lock); | 1010 | return graph_lock(); |
| 985 | } | 1011 | } |
| 986 | return 1; | 1012 | return 1; |
| 987 | } | 1013 | } |
| @@ -1046,8 +1072,10 @@ check_prevs_add(struct task_struct *curr, struct held_lock *next) | |||
| 1046 | } | 1072 | } |
| 1047 | return 1; | 1073 | return 1; |
| 1048 | out_bug: | 1074 | out_bug: |
| 1049 | __raw_spin_unlock(&hash_lock); | 1075 | if (!debug_locks_off_graph_unlock()) |
| 1050 | DEBUG_LOCKS_WARN_ON(1); | 1076 | return 0; |
| 1077 | |||
| 1078 | WARN_ON(1); | ||
| 1051 | 1079 | ||
| 1052 | return 0; | 1080 | return 0; |
| 1053 | } | 1081 | } |
| @@ -1201,7 +1229,10 @@ register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force) | |||
| 1201 | hash_head = classhashentry(key); | 1229 | hash_head = classhashentry(key); |
| 1202 | 1230 | ||
| 1203 | raw_local_irq_save(flags); | 1231 | raw_local_irq_save(flags); |
| 1204 | __raw_spin_lock(&hash_lock); | 1232 | if (!graph_lock()) { |
| 1233 | raw_local_irq_restore(flags); | ||
| 1234 | return NULL; | ||
| 1235 | } | ||
| 1205 | /* | 1236 | /* |
| 1206 | * We have to do the hash-walk again, to avoid races | 1237 | * We have to do the hash-walk again, to avoid races |
| 1207 | * with another CPU: | 1238 | * with another CPU: |
| @@ -1214,9 +1245,12 @@ register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force) | |||
| 1214 | * the hash: | 1245 | * the hash: |
| 1215 | */ | 1246 | */ |
| 1216 | if (nr_lock_classes >= MAX_LOCKDEP_KEYS) { | 1247 | if (nr_lock_classes >= MAX_LOCKDEP_KEYS) { |
| 1217 | __raw_spin_unlock(&hash_lock); | 1248 | if (!debug_locks_off_graph_unlock()) { |
| 1249 | raw_local_irq_restore(flags); | ||
| 1250 | return NULL; | ||
| 1251 | } | ||
| 1218 | raw_local_irq_restore(flags); | 1252 | raw_local_irq_restore(flags); |
| 1219 | debug_locks_off(); | 1253 | |
| 1220 | printk("BUG: MAX_LOCKDEP_KEYS too low!\n"); | 1254 | printk("BUG: MAX_LOCKDEP_KEYS too low!\n"); |
| 1221 | printk("turning off the locking correctness validator.\n"); | 1255 | printk("turning off the locking correctness validator.\n"); |
| 1222 | return NULL; | 1256 | return NULL; |
| @@ -1237,18 +1271,23 @@ register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force) | |||
| 1237 | list_add_tail_rcu(&class->hash_entry, hash_head); | 1271 | list_add_tail_rcu(&class->hash_entry, hash_head); |
| 1238 | 1272 | ||
| 1239 | if (verbose(class)) { | 1273 | if (verbose(class)) { |
| 1240 | __raw_spin_unlock(&hash_lock); | 1274 | graph_unlock(); |
| 1241 | raw_local_irq_restore(flags); | 1275 | raw_local_irq_restore(flags); |
| 1276 | |||
| 1242 | printk("\nnew class %p: %s", class->key, class->name); | 1277 | printk("\nnew class %p: %s", class->key, class->name); |
| 1243 | if (class->name_version > 1) | 1278 | if (class->name_version > 1) |
| 1244 | printk("#%d", class->name_version); | 1279 | printk("#%d", class->name_version); |
| 1245 | printk("\n"); | 1280 | printk("\n"); |
| 1246 | dump_stack(); | 1281 | dump_stack(); |
| 1282 | |||
| 1247 | raw_local_irq_save(flags); | 1283 | raw_local_irq_save(flags); |
| 1248 | __raw_spin_lock(&hash_lock); | 1284 | if (!graph_lock()) { |
| 1285 | raw_local_irq_restore(flags); | ||
| 1286 | return NULL; | ||
| 1287 | } | ||
| 1249 | } | 1288 | } |
| 1250 | out_unlock_set: | 1289 | out_unlock_set: |
| 1251 | __raw_spin_unlock(&hash_lock); | 1290 | graph_unlock(); |
| 1252 | raw_local_irq_restore(flags); | 1291 | raw_local_irq_restore(flags); |
| 1253 | 1292 | ||
| 1254 | if (!subclass || force) | 1293 | if (!subclass || force) |
| @@ -1264,7 +1303,7 @@ out_unlock_set: | |||
| 1264 | * add it and return 0 - in this case the new dependency chain is | 1303 | * add it and return 0 - in this case the new dependency chain is |
| 1265 | * validated. If the key is already hashed, return 1. | 1304 | * validated. If the key is already hashed, return 1. |
| 1266 | */ | 1305 | */ |
| 1267 | static inline int lookup_chain_cache(u64 chain_key) | 1306 | static inline int lookup_chain_cache(u64 chain_key, struct lock_class *class) |
| 1268 | { | 1307 | { |
| 1269 | struct list_head *hash_head = chainhashentry(chain_key); | 1308 | struct list_head *hash_head = chainhashentry(chain_key); |
| 1270 | struct lock_chain *chain; | 1309 | struct lock_chain *chain; |
| @@ -1278,34 +1317,32 @@ static inline int lookup_chain_cache(u64 chain_key) | |||
| 1278 | if (chain->chain_key == chain_key) { | 1317 | if (chain->chain_key == chain_key) { |
| 1279 | cache_hit: | 1318 | cache_hit: |
| 1280 | debug_atomic_inc(&chain_lookup_hits); | 1319 | debug_atomic_inc(&chain_lookup_hits); |
| 1281 | /* | 1320 | if (very_verbose(class)) |
| 1282 | * In the debugging case, force redundant checking | 1321 | printk("\nhash chain already cached, key: %016Lx tail class: [%p] %s\n", chain_key, class->key, class->name); |
| 1283 | * by returning 1: | ||
| 1284 | */ | ||
| 1285 | #ifdef CONFIG_DEBUG_LOCKDEP | ||
| 1286 | __raw_spin_lock(&hash_lock); | ||
| 1287 | return 1; | ||
| 1288 | #endif | ||
| 1289 | return 0; | 1322 | return 0; |
| 1290 | } | 1323 | } |
| 1291 | } | 1324 | } |
| 1325 | if (very_verbose(class)) | ||
| 1326 | printk("\nnew hash chain, key: %016Lx tail class: [%p] %s\n", chain_key, class->key, class->name); | ||
| 1292 | /* | 1327 | /* |
| 1293 | * Allocate a new chain entry from the static array, and add | 1328 | * Allocate a new chain entry from the static array, and add |
| 1294 | * it to the hash: | 1329 | * it to the hash: |
| 1295 | */ | 1330 | */ |
| 1296 | __raw_spin_lock(&hash_lock); | 1331 | if (!graph_lock()) |
| 1332 | return 0; | ||
| 1297 | /* | 1333 | /* |
| 1298 | * We have to walk the chain again locked - to avoid duplicates: | 1334 | * We have to walk the chain again locked - to avoid duplicates: |
| 1299 | */ | 1335 | */ |
| 1300 | list_for_each_entry(chain, hash_head, entry) { | 1336 | list_for_each_entry(chain, hash_head, entry) { |
| 1301 | if (chain->chain_key == chain_key) { | 1337 | if (chain->chain_key == chain_key) { |
| 1302 | __raw_spin_unlock(&hash_lock); | 1338 | graph_unlock(); |
| 1303 | goto cache_hit; | 1339 | goto cache_hit; |
| 1304 | } | 1340 | } |
| 1305 | } | 1341 | } |
| 1306 | if (unlikely(nr_lock_chains >= MAX_LOCKDEP_CHAINS)) { | 1342 | if (unlikely(nr_lock_chains >= MAX_LOCKDEP_CHAINS)) { |
| 1307 | __raw_spin_unlock(&hash_lock); | 1343 | if (!debug_locks_off_graph_unlock()) |
| 1308 | debug_locks_off(); | 1344 | return 0; |
| 1345 | |||
| 1309 | printk("BUG: MAX_LOCKDEP_CHAINS too low!\n"); | 1346 | printk("BUG: MAX_LOCKDEP_CHAINS too low!\n"); |
| 1310 | printk("turning off the locking correctness validator.\n"); | 1347 | printk("turning off the locking correctness validator.\n"); |
| 1311 | return 0; | 1348 | return 0; |
| @@ -1381,9 +1418,7 @@ print_irq_inversion_bug(struct task_struct *curr, struct lock_class *other, | |||
| 1381 | struct held_lock *this, int forwards, | 1418 | struct held_lock *this, int forwards, |
| 1382 | const char *irqclass) | 1419 | const char *irqclass) |
| 1383 | { | 1420 | { |
| 1384 | __raw_spin_unlock(&hash_lock); | 1421 | if (!debug_locks_off_graph_unlock() || debug_locks_silent) |
| 1385 | debug_locks_off(); | ||
| 1386 | if (debug_locks_silent) | ||
| 1387 | return 0; | 1422 | return 0; |
| 1388 | 1423 | ||
| 1389 | printk("\n=========================================================\n"); | 1424 | printk("\n=========================================================\n"); |
| @@ -1453,7 +1488,7 @@ check_usage_backwards(struct task_struct *curr, struct held_lock *this, | |||
| 1453 | return print_irq_inversion_bug(curr, backwards_match, this, 0, irqclass); | 1488 | return print_irq_inversion_bug(curr, backwards_match, this, 0, irqclass); |
| 1454 | } | 1489 | } |
| 1455 | 1490 | ||
| 1456 | static inline void print_irqtrace_events(struct task_struct *curr) | 1491 | void print_irqtrace_events(struct task_struct *curr) |
| 1457 | { | 1492 | { |
| 1458 | printk("irq event stamp: %u\n", curr->irq_events); | 1493 | printk("irq event stamp: %u\n", curr->irq_events); |
| 1459 | printk("hardirqs last enabled at (%u): ", curr->hardirq_enable_event); | 1494 | printk("hardirqs last enabled at (%u): ", curr->hardirq_enable_event); |
| @@ -1466,19 +1501,13 @@ static inline void print_irqtrace_events(struct task_struct *curr) | |||
| 1466 | print_ip_sym(curr->softirq_disable_ip); | 1501 | print_ip_sym(curr->softirq_disable_ip); |
| 1467 | } | 1502 | } |
| 1468 | 1503 | ||
| 1469 | #else | ||
| 1470 | static inline void print_irqtrace_events(struct task_struct *curr) | ||
| 1471 | { | ||
| 1472 | } | ||
| 1473 | #endif | 1504 | #endif |
| 1474 | 1505 | ||
| 1475 | static int | 1506 | static int |
| 1476 | print_usage_bug(struct task_struct *curr, struct held_lock *this, | 1507 | print_usage_bug(struct task_struct *curr, struct held_lock *this, |
| 1477 | enum lock_usage_bit prev_bit, enum lock_usage_bit new_bit) | 1508 | enum lock_usage_bit prev_bit, enum lock_usage_bit new_bit) |
| 1478 | { | 1509 | { |
| 1479 | __raw_spin_unlock(&hash_lock); | 1510 | if (!debug_locks_off_graph_unlock() || debug_locks_silent) |
| 1480 | debug_locks_off(); | ||
| 1481 | if (debug_locks_silent) | ||
| 1482 | return 0; | 1511 | return 0; |
| 1483 | 1512 | ||
| 1484 | printk("\n=================================\n"); | 1513 | printk("\n=================================\n"); |
| @@ -1539,12 +1568,13 @@ static int mark_lock(struct task_struct *curr, struct held_lock *this, | |||
| 1539 | if (likely(this->class->usage_mask & new_mask)) | 1568 | if (likely(this->class->usage_mask & new_mask)) |
| 1540 | return 1; | 1569 | return 1; |
| 1541 | 1570 | ||
| 1542 | __raw_spin_lock(&hash_lock); | 1571 | if (!graph_lock()) |
| 1572 | return 0; | ||
| 1543 | /* | 1573 | /* |
| 1544 | * Make sure we didnt race: | 1574 | * Make sure we didnt race: |
| 1545 | */ | 1575 | */ |
| 1546 | if (unlikely(this->class->usage_mask & new_mask)) { | 1576 | if (unlikely(this->class->usage_mask & new_mask)) { |
| 1547 | __raw_spin_unlock(&hash_lock); | 1577 | graph_unlock(); |
| 1548 | return 1; | 1578 | return 1; |
| 1549 | } | 1579 | } |
| 1550 | 1580 | ||
| @@ -1730,16 +1760,16 @@ static int mark_lock(struct task_struct *curr, struct held_lock *this, | |||
| 1730 | debug_atomic_dec(&nr_unused_locks); | 1760 | debug_atomic_dec(&nr_unused_locks); |
| 1731 | break; | 1761 | break; |
| 1732 | default: | 1762 | default: |
| 1733 | __raw_spin_unlock(&hash_lock); | 1763 | if (!debug_locks_off_graph_unlock()) |
| 1734 | debug_locks_off(); | 1764 | return 0; |
| 1735 | WARN_ON(1); | 1765 | WARN_ON(1); |
| 1736 | return 0; | 1766 | return 0; |
| 1737 | } | 1767 | } |
| 1738 | 1768 | ||
| 1739 | __raw_spin_unlock(&hash_lock); | 1769 | graph_unlock(); |
| 1740 | 1770 | ||
| 1741 | /* | 1771 | /* |
| 1742 | * We must printk outside of the hash_lock: | 1772 | * We must printk outside of the graph_lock: |
| 1743 | */ | 1773 | */ |
| 1744 | if (ret == 2) { | 1774 | if (ret == 2) { |
| 1745 | printk("\nmarked lock as {%s}:\n", usage_str[new_bit]); | 1775 | printk("\nmarked lock as {%s}:\n", usage_str[new_bit]); |
| @@ -2137,9 +2167,9 @@ out_calc_hash: | |||
| 2137 | * We look up the chain_key and do the O(N^2) check and update of | 2167 | * We look up the chain_key and do the O(N^2) check and update of |
| 2138 | * the dependencies only if this is a new dependency chain. | 2168 | * the dependencies only if this is a new dependency chain. |
| 2139 | * (If lookup_chain_cache() returns with 1 it acquires | 2169 | * (If lookup_chain_cache() returns with 1 it acquires |
| 2140 | * hash_lock for us) | 2170 | * graph_lock for us) |
| 2141 | */ | 2171 | */ |
| 2142 | if (!trylock && (check == 2) && lookup_chain_cache(chain_key)) { | 2172 | if (!trylock && (check == 2) && lookup_chain_cache(chain_key, class)) { |
| 2143 | /* | 2173 | /* |
| 2144 | * Check whether last held lock: | 2174 | * Check whether last held lock: |
| 2145 | * | 2175 | * |
| @@ -2170,7 +2200,7 @@ out_calc_hash: | |||
| 2170 | if (!chain_head && ret != 2) | 2200 | if (!chain_head && ret != 2) |
| 2171 | if (!check_prevs_add(curr, hlock)) | 2201 | if (!check_prevs_add(curr, hlock)) |
| 2172 | return 0; | 2202 | return 0; |
| 2173 | __raw_spin_unlock(&hash_lock); | 2203 | graph_unlock(); |
| 2174 | } | 2204 | } |
| 2175 | curr->lockdep_depth++; | 2205 | curr->lockdep_depth++; |
| 2176 | check_chain_key(curr); | 2206 | check_chain_key(curr); |
| @@ -2433,6 +2463,7 @@ EXPORT_SYMBOL_GPL(lock_release); | |||
| 2433 | void lockdep_reset(void) | 2463 | void lockdep_reset(void) |
| 2434 | { | 2464 | { |
| 2435 | unsigned long flags; | 2465 | unsigned long flags; |
| 2466 | int i; | ||
| 2436 | 2467 | ||
| 2437 | raw_local_irq_save(flags); | 2468 | raw_local_irq_save(flags); |
| 2438 | current->curr_chain_key = 0; | 2469 | current->curr_chain_key = 0; |
| @@ -2443,6 +2474,8 @@ void lockdep_reset(void) | |||
| 2443 | nr_softirq_chains = 0; | 2474 | nr_softirq_chains = 0; |
| 2444 | nr_process_chains = 0; | 2475 | nr_process_chains = 0; |
| 2445 | debug_locks = 1; | 2476 | debug_locks = 1; |
| 2477 | for (i = 0; i < CHAINHASH_SIZE; i++) | ||
| 2478 | INIT_LIST_HEAD(chainhash_table + i); | ||
| 2446 | raw_local_irq_restore(flags); | 2479 | raw_local_irq_restore(flags); |
| 2447 | } | 2480 | } |
| 2448 | 2481 | ||
| @@ -2479,7 +2512,7 @@ void lockdep_free_key_range(void *start, unsigned long size) | |||
| 2479 | int i; | 2512 | int i; |
| 2480 | 2513 | ||
| 2481 | raw_local_irq_save(flags); | 2514 | raw_local_irq_save(flags); |
| 2482 | __raw_spin_lock(&hash_lock); | 2515 | graph_lock(); |
| 2483 | 2516 | ||
| 2484 | /* | 2517 | /* |
| 2485 | * Unhash all classes that were created by this module: | 2518 | * Unhash all classes that were created by this module: |
| @@ -2493,7 +2526,7 @@ void lockdep_free_key_range(void *start, unsigned long size) | |||
| 2493 | zap_class(class); | 2526 | zap_class(class); |
| 2494 | } | 2527 | } |
| 2495 | 2528 | ||
| 2496 | __raw_spin_unlock(&hash_lock); | 2529 | graph_unlock(); |
| 2497 | raw_local_irq_restore(flags); | 2530 | raw_local_irq_restore(flags); |
| 2498 | } | 2531 | } |
| 2499 | 2532 | ||
| @@ -2521,20 +2554,20 @@ void lockdep_reset_lock(struct lockdep_map *lock) | |||
| 2521 | * Debug check: in the end all mapped classes should | 2554 | * Debug check: in the end all mapped classes should |
| 2522 | * be gone. | 2555 | * be gone. |
| 2523 | */ | 2556 | */ |
| 2524 | __raw_spin_lock(&hash_lock); | 2557 | graph_lock(); |
| 2525 | for (i = 0; i < CLASSHASH_SIZE; i++) { | 2558 | for (i = 0; i < CLASSHASH_SIZE; i++) { |
| 2526 | head = classhash_table + i; | 2559 | head = classhash_table + i; |
| 2527 | if (list_empty(head)) | 2560 | if (list_empty(head)) |
| 2528 | continue; | 2561 | continue; |
| 2529 | list_for_each_entry_safe(class, next, head, hash_entry) { | 2562 | list_for_each_entry_safe(class, next, head, hash_entry) { |
| 2530 | if (unlikely(class == lock->class_cache)) { | 2563 | if (unlikely(class == lock->class_cache)) { |
| 2531 | __raw_spin_unlock(&hash_lock); | 2564 | if (debug_locks_off_graph_unlock()) |
| 2532 | DEBUG_LOCKS_WARN_ON(1); | 2565 | WARN_ON(1); |
| 2533 | goto out_restore; | 2566 | goto out_restore; |
| 2534 | } | 2567 | } |
| 2535 | } | 2568 | } |
| 2536 | } | 2569 | } |
| 2537 | __raw_spin_unlock(&hash_lock); | 2570 | graph_unlock(); |
| 2538 | 2571 | ||
| 2539 | out_restore: | 2572 | out_restore: |
| 2540 | raw_local_irq_restore(flags); | 2573 | raw_local_irq_restore(flags); |
diff --git a/kernel/module.c b/kernel/module.c index d9eae45d0145..b565eaeff7e6 100644 --- a/kernel/module.c +++ b/kernel/module.c | |||
| @@ -824,9 +824,34 @@ static inline void module_unload_init(struct module *mod) | |||
| 824 | } | 824 | } |
| 825 | #endif /* CONFIG_MODULE_UNLOAD */ | 825 | #endif /* CONFIG_MODULE_UNLOAD */ |
| 826 | 826 | ||
| 827 | static ssize_t show_initstate(struct module_attribute *mattr, | ||
| 828 | struct module *mod, char *buffer) | ||
| 829 | { | ||
| 830 | const char *state = "unknown"; | ||
| 831 | |||
| 832 | switch (mod->state) { | ||
| 833 | case MODULE_STATE_LIVE: | ||
| 834 | state = "live"; | ||
| 835 | break; | ||
| 836 | case MODULE_STATE_COMING: | ||
| 837 | state = "coming"; | ||
| 838 | break; | ||
| 839 | case MODULE_STATE_GOING: | ||
| 840 | state = "going"; | ||
| 841 | break; | ||
| 842 | } | ||
| 843 | return sprintf(buffer, "%s\n", state); | ||
| 844 | } | ||
| 845 | |||
| 846 | static struct module_attribute initstate = { | ||
| 847 | .attr = { .name = "initstate", .mode = 0444, .owner = THIS_MODULE }, | ||
| 848 | .show = show_initstate, | ||
| 849 | }; | ||
| 850 | |||
| 827 | static struct module_attribute *modinfo_attrs[] = { | 851 | static struct module_attribute *modinfo_attrs[] = { |
| 828 | &modinfo_version, | 852 | &modinfo_version, |
| 829 | &modinfo_srcversion, | 853 | &modinfo_srcversion, |
| 854 | &initstate, | ||
| 830 | #ifdef CONFIG_MODULE_UNLOAD | 855 | #ifdef CONFIG_MODULE_UNLOAD |
| 831 | &refcnt, | 856 | &refcnt, |
| 832 | #endif | 857 | #endif |
diff --git a/kernel/mutex.c b/kernel/mutex.c index 8c71cf72a497..e7cbbb82765b 100644 --- a/kernel/mutex.c +++ b/kernel/mutex.c | |||
| @@ -206,6 +206,15 @@ mutex_lock_nested(struct mutex *lock, unsigned int subclass) | |||
| 206 | } | 206 | } |
| 207 | 207 | ||
| 208 | EXPORT_SYMBOL_GPL(mutex_lock_nested); | 208 | EXPORT_SYMBOL_GPL(mutex_lock_nested); |
| 209 | |||
| 210 | int __sched | ||
| 211 | mutex_lock_interruptible_nested(struct mutex *lock, unsigned int subclass) | ||
| 212 | { | ||
| 213 | might_sleep(); | ||
| 214 | return __mutex_lock_common(lock, TASK_INTERRUPTIBLE, subclass); | ||
| 215 | } | ||
| 216 | |||
| 217 | EXPORT_SYMBOL_GPL(mutex_lock_interruptible_nested); | ||
| 209 | #endif | 218 | #endif |
| 210 | 219 | ||
| 211 | /* | 220 | /* |
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c index 674aceb7335a..f5b9ee6f6bbb 100644 --- a/kernel/nsproxy.c +++ b/kernel/nsproxy.c | |||
| @@ -17,8 +17,9 @@ | |||
| 17 | #include <linux/version.h> | 17 | #include <linux/version.h> |
| 18 | #include <linux/nsproxy.h> | 18 | #include <linux/nsproxy.h> |
| 19 | #include <linux/init_task.h> | 19 | #include <linux/init_task.h> |
| 20 | #include <linux/namespace.h> | 20 | #include <linux/mnt_namespace.h> |
| 21 | #include <linux/utsname.h> | 21 | #include <linux/utsname.h> |
| 22 | #include <linux/pid_namespace.h> | ||
| 22 | 23 | ||
| 23 | struct nsproxy init_nsproxy = INIT_NSPROXY(init_nsproxy); | 24 | struct nsproxy init_nsproxy = INIT_NSPROXY(init_nsproxy); |
| 24 | 25 | ||
| @@ -60,12 +61,14 @@ struct nsproxy *dup_namespaces(struct nsproxy *orig) | |||
| 60 | struct nsproxy *ns = clone_namespaces(orig); | 61 | struct nsproxy *ns = clone_namespaces(orig); |
| 61 | 62 | ||
| 62 | if (ns) { | 63 | if (ns) { |
| 63 | if (ns->namespace) | 64 | if (ns->mnt_ns) |
| 64 | get_namespace(ns->namespace); | 65 | get_mnt_ns(ns->mnt_ns); |
| 65 | if (ns->uts_ns) | 66 | if (ns->uts_ns) |
| 66 | get_uts_ns(ns->uts_ns); | 67 | get_uts_ns(ns->uts_ns); |
| 67 | if (ns->ipc_ns) | 68 | if (ns->ipc_ns) |
| 68 | get_ipc_ns(ns->ipc_ns); | 69 | get_ipc_ns(ns->ipc_ns); |
| 70 | if (ns->pid_ns) | ||
| 71 | get_pid_ns(ns->pid_ns); | ||
| 69 | } | 72 | } |
| 70 | 73 | ||
| 71 | return ns; | 74 | return ns; |
| @@ -97,7 +100,7 @@ int copy_namespaces(int flags, struct task_struct *tsk) | |||
| 97 | 100 | ||
| 98 | tsk->nsproxy = new_ns; | 101 | tsk->nsproxy = new_ns; |
| 99 | 102 | ||
| 100 | err = copy_namespace(flags, tsk); | 103 | err = copy_mnt_ns(flags, tsk); |
| 101 | if (err) | 104 | if (err) |
| 102 | goto out_ns; | 105 | goto out_ns; |
| 103 | 106 | ||
| @@ -109,16 +112,23 @@ int copy_namespaces(int flags, struct task_struct *tsk) | |||
| 109 | if (err) | 112 | if (err) |
| 110 | goto out_ipc; | 113 | goto out_ipc; |
| 111 | 114 | ||
| 115 | err = copy_pid_ns(flags, tsk); | ||
| 116 | if (err) | ||
| 117 | goto out_pid; | ||
| 118 | |||
| 112 | out: | 119 | out: |
| 113 | put_nsproxy(old_ns); | 120 | put_nsproxy(old_ns); |
| 114 | return err; | 121 | return err; |
| 115 | 122 | ||
| 123 | out_pid: | ||
| 124 | if (new_ns->ipc_ns) | ||
| 125 | put_ipc_ns(new_ns->ipc_ns); | ||
| 116 | out_ipc: | 126 | out_ipc: |
| 117 | if (new_ns->uts_ns) | 127 | if (new_ns->uts_ns) |
| 118 | put_uts_ns(new_ns->uts_ns); | 128 | put_uts_ns(new_ns->uts_ns); |
| 119 | out_uts: | 129 | out_uts: |
| 120 | if (new_ns->namespace) | 130 | if (new_ns->mnt_ns) |
| 121 | put_namespace(new_ns->namespace); | 131 | put_mnt_ns(new_ns->mnt_ns); |
| 122 | out_ns: | 132 | out_ns: |
| 123 | tsk->nsproxy = old_ns; | 133 | tsk->nsproxy = old_ns; |
| 124 | kfree(new_ns); | 134 | kfree(new_ns); |
| @@ -127,11 +137,13 @@ out_ns: | |||
| 127 | 137 | ||
| 128 | void free_nsproxy(struct nsproxy *ns) | 138 | void free_nsproxy(struct nsproxy *ns) |
| 129 | { | 139 | { |
| 130 | if (ns->namespace) | 140 | if (ns->mnt_ns) |
| 131 | put_namespace(ns->namespace); | 141 | put_mnt_ns(ns->mnt_ns); |
| 132 | if (ns->uts_ns) | 142 | if (ns->uts_ns) |
| 133 | put_uts_ns(ns->uts_ns); | 143 | put_uts_ns(ns->uts_ns); |
| 134 | if (ns->ipc_ns) | 144 | if (ns->ipc_ns) |
| 135 | put_ipc_ns(ns->ipc_ns); | 145 | put_ipc_ns(ns->ipc_ns); |
| 136 | kfree(ns); | 146 | if (ns->pid_ns) |
| 147 | put_pid_ns(ns->pid_ns); | ||
| 148 | kfree(ns); | ||
| 137 | } | 149 | } |
diff --git a/kernel/pid.c b/kernel/pid.c index a48879b0b921..2efe9d8d367b 100644 --- a/kernel/pid.c +++ b/kernel/pid.c | |||
| @@ -26,7 +26,7 @@ | |||
| 26 | #include <linux/init.h> | 26 | #include <linux/init.h> |
| 27 | #include <linux/bootmem.h> | 27 | #include <linux/bootmem.h> |
| 28 | #include <linux/hash.h> | 28 | #include <linux/hash.h> |
| 29 | #include <linux/pspace.h> | 29 | #include <linux/pid_namespace.h> |
| 30 | 30 | ||
| 31 | #define pid_hashfn(nr) hash_long((unsigned long)nr, pidhash_shift) | 31 | #define pid_hashfn(nr) hash_long((unsigned long)nr, pidhash_shift) |
| 32 | static struct hlist_head *pid_hash; | 32 | static struct hlist_head *pid_hash; |
| @@ -43,9 +43,10 @@ int pid_max_max = PID_MAX_LIMIT; | |||
| 43 | #define BITS_PER_PAGE (PAGE_SIZE*8) | 43 | #define BITS_PER_PAGE (PAGE_SIZE*8) |
| 44 | #define BITS_PER_PAGE_MASK (BITS_PER_PAGE-1) | 44 | #define BITS_PER_PAGE_MASK (BITS_PER_PAGE-1) |
| 45 | 45 | ||
| 46 | static inline int mk_pid(struct pspace *pspace, struct pidmap *map, int off) | 46 | static inline int mk_pid(struct pid_namespace *pid_ns, |
| 47 | struct pidmap *map, int off) | ||
| 47 | { | 48 | { |
| 48 | return (map - pspace->pidmap)*BITS_PER_PAGE + off; | 49 | return (map - pid_ns->pidmap)*BITS_PER_PAGE + off; |
| 49 | } | 50 | } |
| 50 | 51 | ||
| 51 | #define find_next_offset(map, off) \ | 52 | #define find_next_offset(map, off) \ |
| @@ -57,11 +58,15 @@ static inline int mk_pid(struct pspace *pspace, struct pidmap *map, int off) | |||
| 57 | * value does not cause lots of bitmaps to be allocated, but | 58 | * value does not cause lots of bitmaps to be allocated, but |
| 58 | * the scheme scales to up to 4 million PIDs, runtime. | 59 | * the scheme scales to up to 4 million PIDs, runtime. |
| 59 | */ | 60 | */ |
| 60 | struct pspace init_pspace = { | 61 | struct pid_namespace init_pid_ns = { |
| 62 | .kref = { | ||
| 63 | .refcount = ATOMIC_INIT(2), | ||
| 64 | }, | ||
| 61 | .pidmap = { | 65 | .pidmap = { |
| 62 | [ 0 ... PIDMAP_ENTRIES-1] = { ATOMIC_INIT(BITS_PER_PAGE), NULL } | 66 | [ 0 ... PIDMAP_ENTRIES-1] = { ATOMIC_INIT(BITS_PER_PAGE), NULL } |
| 63 | }, | 67 | }, |
| 64 | .last_pid = 0 | 68 | .last_pid = 0, |
| 69 | .child_reaper = &init_task | ||
| 65 | }; | 70 | }; |
| 66 | 71 | ||
| 67 | /* | 72 | /* |
| @@ -80,25 +85,25 @@ struct pspace init_pspace = { | |||
| 80 | 85 | ||
| 81 | static __cacheline_aligned_in_smp DEFINE_SPINLOCK(pidmap_lock); | 86 | static __cacheline_aligned_in_smp DEFINE_SPINLOCK(pidmap_lock); |
| 82 | 87 | ||
| 83 | static fastcall void free_pidmap(struct pspace *pspace, int pid) | 88 | static fastcall void free_pidmap(struct pid_namespace *pid_ns, int pid) |
| 84 | { | 89 | { |
| 85 | struct pidmap *map = pspace->pidmap + pid / BITS_PER_PAGE; | 90 | struct pidmap *map = pid_ns->pidmap + pid / BITS_PER_PAGE; |
| 86 | int offset = pid & BITS_PER_PAGE_MASK; | 91 | int offset = pid & BITS_PER_PAGE_MASK; |
| 87 | 92 | ||
| 88 | clear_bit(offset, map->page); | 93 | clear_bit(offset, map->page); |
| 89 | atomic_inc(&map->nr_free); | 94 | atomic_inc(&map->nr_free); |
| 90 | } | 95 | } |
| 91 | 96 | ||
| 92 | static int alloc_pidmap(struct pspace *pspace) | 97 | static int alloc_pidmap(struct pid_namespace *pid_ns) |
| 93 | { | 98 | { |
| 94 | int i, offset, max_scan, pid, last = pspace->last_pid; | 99 | int i, offset, max_scan, pid, last = pid_ns->last_pid; |
| 95 | struct pidmap *map; | 100 | struct pidmap *map; |
| 96 | 101 | ||
| 97 | pid = last + 1; | 102 | pid = last + 1; |
| 98 | if (pid >= pid_max) | 103 | if (pid >= pid_max) |
| 99 | pid = RESERVED_PIDS; | 104 | pid = RESERVED_PIDS; |
| 100 | offset = pid & BITS_PER_PAGE_MASK; | 105 | offset = pid & BITS_PER_PAGE_MASK; |
| 101 | map = &pspace->pidmap[pid/BITS_PER_PAGE]; | 106 | map = &pid_ns->pidmap[pid/BITS_PER_PAGE]; |
| 102 | max_scan = (pid_max + BITS_PER_PAGE - 1)/BITS_PER_PAGE - !offset; | 107 | max_scan = (pid_max + BITS_PER_PAGE - 1)/BITS_PER_PAGE - !offset; |
| 103 | for (i = 0; i <= max_scan; ++i) { | 108 | for (i = 0; i <= max_scan; ++i) { |
| 104 | if (unlikely(!map->page)) { | 109 | if (unlikely(!map->page)) { |
| @@ -120,11 +125,11 @@ static int alloc_pidmap(struct pspace *pspace) | |||
| 120 | do { | 125 | do { |
| 121 | if (!test_and_set_bit(offset, map->page)) { | 126 | if (!test_and_set_bit(offset, map->page)) { |
| 122 | atomic_dec(&map->nr_free); | 127 | atomic_dec(&map->nr_free); |
| 123 | pspace->last_pid = pid; | 128 | pid_ns->last_pid = pid; |
| 124 | return pid; | 129 | return pid; |
| 125 | } | 130 | } |
| 126 | offset = find_next_offset(map, offset); | 131 | offset = find_next_offset(map, offset); |
| 127 | pid = mk_pid(pspace, map, offset); | 132 | pid = mk_pid(pid_ns, map, offset); |
| 128 | /* | 133 | /* |
| 129 | * find_next_offset() found a bit, the pid from it | 134 | * find_next_offset() found a bit, the pid from it |
| 130 | * is in-bounds, and if we fell back to the last | 135 | * is in-bounds, and if we fell back to the last |
| @@ -135,34 +140,34 @@ static int alloc_pidmap(struct pspace *pspace) | |||
| 135 | (i != max_scan || pid < last || | 140 | (i != max_scan || pid < last || |
| 136 | !((last+1) & BITS_PER_PAGE_MASK))); | 141 | !((last+1) & BITS_PER_PAGE_MASK))); |
| 137 | } | 142 | } |
| 138 | if (map < &pspace->pidmap[(pid_max-1)/BITS_PER_PAGE]) { | 143 | if (map < &pid_ns->pidmap[(pid_max-1)/BITS_PER_PAGE]) { |
| 139 | ++map; | 144 | ++map; |
| 140 | offset = 0; | 145 | offset = 0; |
| 141 | } else { | 146 | } else { |
| 142 | map = &pspace->pidmap[0]; | 147 | map = &pid_ns->pidmap[0]; |
| 143 | offset = RESERVED_PIDS; | 148 | offset = RESERVED_PIDS; |
| 144 | if (unlikely(last == offset)) | 149 | if (unlikely(last == offset)) |
| 145 | break; | 150 | break; |
| 146 | } | 151 | } |
| 147 | pid = mk_pid(pspace, map, offset); | 152 | pid = mk_pid(pid_ns, map, offset); |
| 148 | } | 153 | } |
| 149 | return -1; | 154 | return -1; |
| 150 | } | 155 | } |
| 151 | 156 | ||
| 152 | static int next_pidmap(struct pspace *pspace, int last) | 157 | static int next_pidmap(struct pid_namespace *pid_ns, int last) |
| 153 | { | 158 | { |
| 154 | int offset; | 159 | int offset; |
| 155 | struct pidmap *map, *end; | 160 | struct pidmap *map, *end; |
| 156 | 161 | ||
| 157 | offset = (last + 1) & BITS_PER_PAGE_MASK; | 162 | offset = (last + 1) & BITS_PER_PAGE_MASK; |
| 158 | map = &pspace->pidmap[(last + 1)/BITS_PER_PAGE]; | 163 | map = &pid_ns->pidmap[(last + 1)/BITS_PER_PAGE]; |
| 159 | end = &pspace->pidmap[PIDMAP_ENTRIES]; | 164 | end = &pid_ns->pidmap[PIDMAP_ENTRIES]; |
| 160 | for (; map < end; map++, offset = 0) { | 165 | for (; map < end; map++, offset = 0) { |
| 161 | if (unlikely(!map->page)) | 166 | if (unlikely(!map->page)) |
| 162 | continue; | 167 | continue; |
| 163 | offset = find_next_bit((map)->page, BITS_PER_PAGE, offset); | 168 | offset = find_next_bit((map)->page, BITS_PER_PAGE, offset); |
| 164 | if (offset < BITS_PER_PAGE) | 169 | if (offset < BITS_PER_PAGE) |
| 165 | return mk_pid(pspace, map, offset); | 170 | return mk_pid(pid_ns, map, offset); |
| 166 | } | 171 | } |
| 167 | return -1; | 172 | return -1; |
| 168 | } | 173 | } |
| @@ -192,7 +197,7 @@ fastcall void free_pid(struct pid *pid) | |||
| 192 | hlist_del_rcu(&pid->pid_chain); | 197 | hlist_del_rcu(&pid->pid_chain); |
| 193 | spin_unlock_irqrestore(&pidmap_lock, flags); | 198 | spin_unlock_irqrestore(&pidmap_lock, flags); |
| 194 | 199 | ||
| 195 | free_pidmap(&init_pspace, pid->nr); | 200 | free_pidmap(current->nsproxy->pid_ns, pid->nr); |
| 196 | call_rcu(&pid->rcu, delayed_put_pid); | 201 | call_rcu(&pid->rcu, delayed_put_pid); |
| 197 | } | 202 | } |
| 198 | 203 | ||
| @@ -206,7 +211,7 @@ struct pid *alloc_pid(void) | |||
| 206 | if (!pid) | 211 | if (!pid) |
| 207 | goto out; | 212 | goto out; |
| 208 | 213 | ||
| 209 | nr = alloc_pidmap(&init_pspace); | 214 | nr = alloc_pidmap(current->nsproxy->pid_ns); |
| 210 | if (nr < 0) | 215 | if (nr < 0) |
| 211 | goto out_free; | 216 | goto out_free; |
| 212 | 217 | ||
| @@ -348,13 +353,33 @@ struct pid *find_ge_pid(int nr) | |||
| 348 | pid = find_pid(nr); | 353 | pid = find_pid(nr); |
| 349 | if (pid) | 354 | if (pid) |
| 350 | break; | 355 | break; |
| 351 | nr = next_pidmap(&init_pspace, nr); | 356 | nr = next_pidmap(current->nsproxy->pid_ns, nr); |
| 352 | } while (nr > 0); | 357 | } while (nr > 0); |
| 353 | 358 | ||
| 354 | return pid; | 359 | return pid; |
| 355 | } | 360 | } |
| 356 | EXPORT_SYMBOL_GPL(find_get_pid); | 361 | EXPORT_SYMBOL_GPL(find_get_pid); |
| 357 | 362 | ||
| 363 | int copy_pid_ns(int flags, struct task_struct *tsk) | ||
| 364 | { | ||
| 365 | struct pid_namespace *old_ns = tsk->nsproxy->pid_ns; | ||
| 366 | int err = 0; | ||
| 367 | |||
| 368 | if (!old_ns) | ||
| 369 | return 0; | ||
| 370 | |||
| 371 | get_pid_ns(old_ns); | ||
| 372 | return err; | ||
| 373 | } | ||
| 374 | |||
| 375 | void free_pid_ns(struct kref *kref) | ||
| 376 | { | ||
| 377 | struct pid_namespace *ns; | ||
| 378 | |||
| 379 | ns = container_of(kref, struct pid_namespace, kref); | ||
| 380 | kfree(ns); | ||
| 381 | } | ||
| 382 | |||
| 358 | /* | 383 | /* |
| 359 | * The pid hash table is scaled according to the amount of memory in the | 384 | * The pid hash table is scaled according to the amount of memory in the |
| 360 | * machine. From a minimum of 16 slots up to 4096 slots at one gigabyte or | 385 | * machine. From a minimum of 16 slots up to 4096 slots at one gigabyte or |
| @@ -382,10 +407,10 @@ void __init pidhash_init(void) | |||
| 382 | 407 | ||
| 383 | void __init pidmap_init(void) | 408 | void __init pidmap_init(void) |
| 384 | { | 409 | { |
| 385 | init_pspace.pidmap[0].page = kzalloc(PAGE_SIZE, GFP_KERNEL); | 410 | init_pid_ns.pidmap[0].page = kzalloc(PAGE_SIZE, GFP_KERNEL); |
| 386 | /* Reserve PID 0. We never call free_pidmap(0) */ | 411 | /* Reserve PID 0. We never call free_pidmap(0) */ |
| 387 | set_bit(0, init_pspace.pidmap[0].page); | 412 | set_bit(0, init_pid_ns.pidmap[0].page); |
| 388 | atomic_dec(&init_pspace.pidmap[0].nr_free); | 413 | atomic_dec(&init_pid_ns.pidmap[0].nr_free); |
| 389 | 414 | ||
| 390 | pid_cachep = kmem_cache_create("pid", sizeof(struct pid), | 415 | pid_cachep = kmem_cache_create("pid", sizeof(struct pid), |
| 391 | __alignof__(struct pid), | 416 | __alignof__(struct pid), |
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig index 710ed084e7c5..ed296225dcd4 100644 --- a/kernel/power/Kconfig +++ b/kernel/power/Kconfig | |||
| @@ -20,13 +20,14 @@ config PM | |||
| 20 | sending the processor to sleep and saving power. | 20 | sending the processor to sleep and saving power. |
| 21 | 21 | ||
| 22 | config PM_LEGACY | 22 | config PM_LEGACY |
| 23 | bool "Legacy Power Management API" | 23 | bool "Legacy Power Management API (DEPRECATED)" |
| 24 | depends on PM | 24 | depends on PM |
| 25 | default y | 25 | default n |
| 26 | ---help--- | 26 | ---help--- |
| 27 | Support for pm_register() and friends. | 27 | Support for pm_register() and friends. This old API is obsoleted |
| 28 | by the driver model. | ||
| 28 | 29 | ||
| 29 | If unsure, say Y. | 30 | If unsure, say N. |
| 30 | 31 | ||
| 31 | config PM_DEBUG | 32 | config PM_DEBUG |
| 32 | bool "Power Management Debug Support" | 33 | bool "Power Management Debug Support" |
diff --git a/kernel/power/disk.c b/kernel/power/disk.c index 0b00f56c2ad0..88fc5d7ac737 100644 --- a/kernel/power/disk.c +++ b/kernel/power/disk.c | |||
| @@ -60,9 +60,11 @@ static void power_down(suspend_disk_method_t mode) | |||
| 60 | { | 60 | { |
| 61 | switch(mode) { | 61 | switch(mode) { |
| 62 | case PM_DISK_PLATFORM: | 62 | case PM_DISK_PLATFORM: |
| 63 | kernel_shutdown_prepare(SYSTEM_SUSPEND_DISK); | 63 | if (pm_ops && pm_ops->enter) { |
| 64 | pm_ops->enter(PM_SUSPEND_DISK); | 64 | kernel_shutdown_prepare(SYSTEM_SUSPEND_DISK); |
| 65 | break; | 65 | pm_ops->enter(PM_SUSPEND_DISK); |
| 66 | break; | ||
| 67 | } | ||
| 66 | case PM_DISK_SHUTDOWN: | 68 | case PM_DISK_SHUTDOWN: |
| 67 | kernel_power_off(); | 69 | kernel_power_off(); |
| 68 | break; | 70 | break; |
diff --git a/kernel/power/main.c b/kernel/power/main.c index 500eb87f643d..ff3a6182f5f0 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c | |||
| @@ -29,7 +29,7 @@ | |||
| 29 | DEFINE_MUTEX(pm_mutex); | 29 | DEFINE_MUTEX(pm_mutex); |
| 30 | 30 | ||
| 31 | struct pm_ops *pm_ops; | 31 | struct pm_ops *pm_ops; |
| 32 | suspend_disk_method_t pm_disk_mode = PM_DISK_SHUTDOWN; | 32 | suspend_disk_method_t pm_disk_mode = PM_DISK_PLATFORM; |
| 33 | 33 | ||
| 34 | /** | 34 | /** |
| 35 | * pm_set_ops - Set the global power method table. | 35 | * pm_set_ops - Set the global power method table. |
diff --git a/kernel/power/process.c b/kernel/power/process.c index 99eeb119b06d..6d566bf7085c 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c | |||
| @@ -28,8 +28,7 @@ static inline int freezeable(struct task_struct * p) | |||
| 28 | if ((p == current) || | 28 | if ((p == current) || |
| 29 | (p->flags & PF_NOFREEZE) || | 29 | (p->flags & PF_NOFREEZE) || |
| 30 | (p->exit_state == EXIT_ZOMBIE) || | 30 | (p->exit_state == EXIT_ZOMBIE) || |
| 31 | (p->exit_state == EXIT_DEAD) || | 31 | (p->exit_state == EXIT_DEAD)) |
| 32 | (p->state == TASK_STOPPED)) | ||
| 33 | return 0; | 32 | return 0; |
| 34 | return 1; | 33 | return 1; |
| 35 | } | 34 | } |
| @@ -61,10 +60,16 @@ static inline void freeze_process(struct task_struct *p) | |||
| 61 | unsigned long flags; | 60 | unsigned long flags; |
| 62 | 61 | ||
| 63 | if (!freezing(p)) { | 62 | if (!freezing(p)) { |
| 64 | freeze(p); | 63 | rmb(); |
| 65 | spin_lock_irqsave(&p->sighand->siglock, flags); | 64 | if (!frozen(p)) { |
| 66 | signal_wake_up(p, 0); | 65 | if (p->state == TASK_STOPPED) |
| 67 | spin_unlock_irqrestore(&p->sighand->siglock, flags); | 66 | force_sig_specific(SIGSTOP, p); |
| 67 | |||
| 68 | freeze(p); | ||
| 69 | spin_lock_irqsave(&p->sighand->siglock, flags); | ||
| 70 | signal_wake_up(p, p->state == TASK_STOPPED); | ||
| 71 | spin_unlock_irqrestore(&p->sighand->siglock, flags); | ||
| 72 | } | ||
| 68 | } | 73 | } |
| 69 | } | 74 | } |
| 70 | 75 | ||
| @@ -103,9 +108,7 @@ static unsigned int try_to_freeze_tasks(int freeze_user_space) | |||
| 103 | if (frozen(p)) | 108 | if (frozen(p)) |
| 104 | continue; | 109 | continue; |
| 105 | 110 | ||
| 106 | if (p->state == TASK_TRACED && | 111 | if (p->state == TASK_TRACED && frozen(p->parent)) { |
| 107 | (frozen(p->parent) || | ||
| 108 | p->parent->state == TASK_STOPPED)) { | ||
| 109 | cancel_freezing(p); | 112 | cancel_freezing(p); |
| 110 | continue; | 113 | continue; |
| 111 | } | 114 | } |
diff --git a/kernel/relay.c b/kernel/relay.c index 75a3a9a7efc2..a4701e7ba7d0 100644 --- a/kernel/relay.c +++ b/kernel/relay.c | |||
| @@ -138,7 +138,7 @@ depopulate: | |||
| 138 | */ | 138 | */ |
| 139 | struct rchan_buf *relay_create_buf(struct rchan *chan) | 139 | struct rchan_buf *relay_create_buf(struct rchan *chan) |
| 140 | { | 140 | { |
| 141 | struct rchan_buf *buf = kcalloc(1, sizeof(struct rchan_buf), GFP_KERNEL); | 141 | struct rchan_buf *buf = kzalloc(sizeof(struct rchan_buf), GFP_KERNEL); |
| 142 | if (!buf) | 142 | if (!buf) |
| 143 | return NULL; | 143 | return NULL; |
| 144 | 144 | ||
| @@ -479,7 +479,7 @@ struct rchan *relay_open(const char *base_filename, | |||
| 479 | if (!(subbuf_size && n_subbufs)) | 479 | if (!(subbuf_size && n_subbufs)) |
| 480 | return NULL; | 480 | return NULL; |
| 481 | 481 | ||
| 482 | chan = kcalloc(1, sizeof(struct rchan), GFP_KERNEL); | 482 | chan = kzalloc(sizeof(struct rchan), GFP_KERNEL); |
| 483 | if (!chan) | 483 | if (!chan) |
| 484 | return NULL; | 484 | return NULL; |
| 485 | 485 | ||
| @@ -959,7 +959,7 @@ static inline ssize_t relay_file_read_subbufs(struct file *filp, | |||
| 959 | if (!desc->count) | 959 | if (!desc->count) |
| 960 | return 0; | 960 | return 0; |
| 961 | 961 | ||
| 962 | mutex_lock(&filp->f_dentry->d_inode->i_mutex); | 962 | mutex_lock(&filp->f_path.dentry->d_inode->i_mutex); |
| 963 | do { | 963 | do { |
| 964 | if (!relay_file_read_avail(buf, *ppos)) | 964 | if (!relay_file_read_avail(buf, *ppos)) |
| 965 | break; | 965 | break; |
| @@ -979,7 +979,7 @@ static inline ssize_t relay_file_read_subbufs(struct file *filp, | |||
| 979 | *ppos = relay_file_read_end_pos(buf, read_start, ret); | 979 | *ppos = relay_file_read_end_pos(buf, read_start, ret); |
| 980 | } | 980 | } |
| 981 | } while (desc->count && ret); | 981 | } while (desc->count && ret); |
| 982 | mutex_unlock(&filp->f_dentry->d_inode->i_mutex); | 982 | mutex_unlock(&filp->f_path.dentry->d_inode->i_mutex); |
| 983 | 983 | ||
| 984 | return desc->written; | 984 | return desc->written; |
| 985 | } | 985 | } |
diff --git a/kernel/sched.c b/kernel/sched.c index f385eff4682d..5cd833bc2173 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
| @@ -225,8 +225,10 @@ struct rq { | |||
| 225 | unsigned long nr_uninterruptible; | 225 | unsigned long nr_uninterruptible; |
| 226 | 226 | ||
| 227 | unsigned long expired_timestamp; | 227 | unsigned long expired_timestamp; |
| 228 | unsigned long long timestamp_last_tick; | 228 | /* Cached timestamp set by update_cpu_clock() */ |
| 229 | unsigned long long most_recent_timestamp; | ||
| 229 | struct task_struct *curr, *idle; | 230 | struct task_struct *curr, *idle; |
| 231 | unsigned long next_balance; | ||
| 230 | struct mm_struct *prev_mm; | 232 | struct mm_struct *prev_mm; |
| 231 | struct prio_array *active, *expired, arrays[2]; | 233 | struct prio_array *active, *expired, arrays[2]; |
| 232 | int best_expired_prio; | 234 | int best_expired_prio; |
| @@ -426,7 +428,7 @@ static inline void task_rq_unlock(struct rq *rq, unsigned long *flags) | |||
| 426 | * bump this up when changing the output format or the meaning of an existing | 428 | * bump this up when changing the output format or the meaning of an existing |
| 427 | * format, so that tools can adapt (or abort) | 429 | * format, so that tools can adapt (or abort) |
| 428 | */ | 430 | */ |
| 429 | #define SCHEDSTAT_VERSION 12 | 431 | #define SCHEDSTAT_VERSION 14 |
| 430 | 432 | ||
| 431 | static int show_schedstat(struct seq_file *seq, void *v) | 433 | static int show_schedstat(struct seq_file *seq, void *v) |
| 432 | { | 434 | { |
| @@ -464,7 +466,8 @@ static int show_schedstat(struct seq_file *seq, void *v) | |||
| 464 | seq_printf(seq, "domain%d %s", dcnt++, mask_str); | 466 | seq_printf(seq, "domain%d %s", dcnt++, mask_str); |
| 465 | for (itype = SCHED_IDLE; itype < MAX_IDLE_TYPES; | 467 | for (itype = SCHED_IDLE; itype < MAX_IDLE_TYPES; |
| 466 | itype++) { | 468 | itype++) { |
| 467 | seq_printf(seq, " %lu %lu %lu %lu %lu %lu %lu %lu", | 469 | seq_printf(seq, " %lu %lu %lu %lu %lu %lu %lu " |
| 470 | "%lu", | ||
| 468 | sd->lb_cnt[itype], | 471 | sd->lb_cnt[itype], |
| 469 | sd->lb_balanced[itype], | 472 | sd->lb_balanced[itype], |
| 470 | sd->lb_failed[itype], | 473 | sd->lb_failed[itype], |
| @@ -474,11 +477,13 @@ static int show_schedstat(struct seq_file *seq, void *v) | |||
| 474 | sd->lb_nobusyq[itype], | 477 | sd->lb_nobusyq[itype], |
| 475 | sd->lb_nobusyg[itype]); | 478 | sd->lb_nobusyg[itype]); |
| 476 | } | 479 | } |
| 477 | seq_printf(seq, " %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu\n", | 480 | seq_printf(seq, " %lu %lu %lu %lu %lu %lu %lu %lu %lu" |
| 481 | " %lu %lu %lu\n", | ||
| 478 | sd->alb_cnt, sd->alb_failed, sd->alb_pushed, | 482 | sd->alb_cnt, sd->alb_failed, sd->alb_pushed, |
| 479 | sd->sbe_cnt, sd->sbe_balanced, sd->sbe_pushed, | 483 | sd->sbe_cnt, sd->sbe_balanced, sd->sbe_pushed, |
| 480 | sd->sbf_cnt, sd->sbf_balanced, sd->sbf_pushed, | 484 | sd->sbf_cnt, sd->sbf_balanced, sd->sbf_pushed, |
| 481 | sd->ttwu_wake_remote, sd->ttwu_move_affine, sd->ttwu_move_balance); | 485 | sd->ttwu_wake_remote, sd->ttwu_move_affine, |
| 486 | sd->ttwu_move_balance); | ||
| 482 | } | 487 | } |
| 483 | preempt_enable(); | 488 | preempt_enable(); |
| 484 | #endif | 489 | #endif |
| @@ -547,7 +552,7 @@ rq_sched_info_depart(struct rq *rq, unsigned long delta_jiffies) | |||
| 547 | #endif | 552 | #endif |
| 548 | 553 | ||
| 549 | /* | 554 | /* |
| 550 | * rq_lock - lock a given runqueue and disable interrupts. | 555 | * this_rq_lock - lock this runqueue and disable interrupts. |
| 551 | */ | 556 | */ |
| 552 | static inline struct rq *this_rq_lock(void) | 557 | static inline struct rq *this_rq_lock(void) |
| 553 | __acquires(rq->lock) | 558 | __acquires(rq->lock) |
| @@ -938,13 +943,16 @@ static void activate_task(struct task_struct *p, struct rq *rq, int local) | |||
| 938 | { | 943 | { |
| 939 | unsigned long long now; | 944 | unsigned long long now; |
| 940 | 945 | ||
| 946 | if (rt_task(p)) | ||
| 947 | goto out; | ||
| 948 | |||
| 941 | now = sched_clock(); | 949 | now = sched_clock(); |
| 942 | #ifdef CONFIG_SMP | 950 | #ifdef CONFIG_SMP |
| 943 | if (!local) { | 951 | if (!local) { |
| 944 | /* Compensate for drifting sched_clock */ | 952 | /* Compensate for drifting sched_clock */ |
| 945 | struct rq *this_rq = this_rq(); | 953 | struct rq *this_rq = this_rq(); |
| 946 | now = (now - this_rq->timestamp_last_tick) | 954 | now = (now - this_rq->most_recent_timestamp) |
| 947 | + rq->timestamp_last_tick; | 955 | + rq->most_recent_timestamp; |
| 948 | } | 956 | } |
| 949 | #endif | 957 | #endif |
| 950 | 958 | ||
| @@ -959,8 +967,7 @@ static void activate_task(struct task_struct *p, struct rq *rq, int local) | |||
| 959 | (now - p->timestamp) >> 20); | 967 | (now - p->timestamp) >> 20); |
| 960 | } | 968 | } |
| 961 | 969 | ||
| 962 | if (!rt_task(p)) | 970 | p->prio = recalc_task_prio(p, now); |
| 963 | p->prio = recalc_task_prio(p, now); | ||
| 964 | 971 | ||
| 965 | /* | 972 | /* |
| 966 | * This checks to make sure it's not an uninterruptible task | 973 | * This checks to make sure it's not an uninterruptible task |
| @@ -985,7 +992,7 @@ static void activate_task(struct task_struct *p, struct rq *rq, int local) | |||
| 985 | } | 992 | } |
| 986 | } | 993 | } |
| 987 | p->timestamp = now; | 994 | p->timestamp = now; |
| 988 | 995 | out: | |
| 989 | __activate_task(p, rq); | 996 | __activate_task(p, rq); |
| 990 | } | 997 | } |
| 991 | 998 | ||
| @@ -1450,7 +1457,9 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync) | |||
| 1450 | 1457 | ||
| 1451 | if (this_sd->flags & SD_WAKE_AFFINE) { | 1458 | if (this_sd->flags & SD_WAKE_AFFINE) { |
| 1452 | unsigned long tl = this_load; | 1459 | unsigned long tl = this_load; |
| 1453 | unsigned long tl_per_task = cpu_avg_load_per_task(this_cpu); | 1460 | unsigned long tl_per_task; |
| 1461 | |||
| 1462 | tl_per_task = cpu_avg_load_per_task(this_cpu); | ||
| 1454 | 1463 | ||
| 1455 | /* | 1464 | /* |
| 1456 | * If sync wakeup then subtract the (maximum possible) | 1465 | * If sync wakeup then subtract the (maximum possible) |
| @@ -1688,8 +1697,8 @@ void fastcall wake_up_new_task(struct task_struct *p, unsigned long clone_flags) | |||
| 1688 | * Not the local CPU - must adjust timestamp. This should | 1697 | * Not the local CPU - must adjust timestamp. This should |
| 1689 | * get optimised away in the !CONFIG_SMP case. | 1698 | * get optimised away in the !CONFIG_SMP case. |
| 1690 | */ | 1699 | */ |
| 1691 | p->timestamp = (p->timestamp - this_rq->timestamp_last_tick) | 1700 | p->timestamp = (p->timestamp - this_rq->most_recent_timestamp) |
| 1692 | + rq->timestamp_last_tick; | 1701 | + rq->most_recent_timestamp; |
| 1693 | __activate_task(p, rq); | 1702 | __activate_task(p, rq); |
| 1694 | if (TASK_PREEMPTS_CURR(p, rq)) | 1703 | if (TASK_PREEMPTS_CURR(p, rq)) |
| 1695 | resched_task(rq->curr); | 1704 | resched_task(rq->curr); |
| @@ -1952,6 +1961,7 @@ static void double_rq_lock(struct rq *rq1, struct rq *rq2) | |||
| 1952 | __acquires(rq1->lock) | 1961 | __acquires(rq1->lock) |
| 1953 | __acquires(rq2->lock) | 1962 | __acquires(rq2->lock) |
| 1954 | { | 1963 | { |
| 1964 | BUG_ON(!irqs_disabled()); | ||
| 1955 | if (rq1 == rq2) { | 1965 | if (rq1 == rq2) { |
| 1956 | spin_lock(&rq1->lock); | 1966 | spin_lock(&rq1->lock); |
| 1957 | __acquire(rq2->lock); /* Fake it out ;) */ | 1967 | __acquire(rq2->lock); /* Fake it out ;) */ |
| @@ -1991,6 +2001,11 @@ static void double_lock_balance(struct rq *this_rq, struct rq *busiest) | |||
| 1991 | __acquires(busiest->lock) | 2001 | __acquires(busiest->lock) |
| 1992 | __acquires(this_rq->lock) | 2002 | __acquires(this_rq->lock) |
| 1993 | { | 2003 | { |
| 2004 | if (unlikely(!irqs_disabled())) { | ||
| 2005 | /* printk() doesn't work good under rq->lock */ | ||
| 2006 | spin_unlock(&this_rq->lock); | ||
| 2007 | BUG_ON(1); | ||
| 2008 | } | ||
| 1994 | if (unlikely(!spin_trylock(&busiest->lock))) { | 2009 | if (unlikely(!spin_trylock(&busiest->lock))) { |
| 1995 | if (busiest < this_rq) { | 2010 | if (busiest < this_rq) { |
| 1996 | spin_unlock(&this_rq->lock); | 2011 | spin_unlock(&this_rq->lock); |
| @@ -2061,8 +2076,8 @@ static void pull_task(struct rq *src_rq, struct prio_array *src_array, | |||
| 2061 | set_task_cpu(p, this_cpu); | 2076 | set_task_cpu(p, this_cpu); |
| 2062 | inc_nr_running(p, this_rq); | 2077 | inc_nr_running(p, this_rq); |
| 2063 | enqueue_task(p, this_array); | 2078 | enqueue_task(p, this_array); |
| 2064 | p->timestamp = (p->timestamp - src_rq->timestamp_last_tick) | 2079 | p->timestamp = (p->timestamp - src_rq->most_recent_timestamp) |
| 2065 | + this_rq->timestamp_last_tick; | 2080 | + this_rq->most_recent_timestamp; |
| 2066 | /* | 2081 | /* |
| 2067 | * Note that idle threads have a prio of MAX_PRIO, for this test | 2082 | * Note that idle threads have a prio of MAX_PRIO, for this test |
| 2068 | * to be always true for them. | 2083 | * to be always true for them. |
| @@ -2098,10 +2113,15 @@ int can_migrate_task(struct task_struct *p, struct rq *rq, int this_cpu, | |||
| 2098 | * 2) too many balance attempts have failed. | 2113 | * 2) too many balance attempts have failed. |
| 2099 | */ | 2114 | */ |
| 2100 | 2115 | ||
| 2101 | if (sd->nr_balance_failed > sd->cache_nice_tries) | 2116 | if (sd->nr_balance_failed > sd->cache_nice_tries) { |
| 2117 | #ifdef CONFIG_SCHEDSTATS | ||
| 2118 | if (task_hot(p, rq->most_recent_timestamp, sd)) | ||
| 2119 | schedstat_inc(sd, lb_hot_gained[idle]); | ||
| 2120 | #endif | ||
| 2102 | return 1; | 2121 | return 1; |
| 2122 | } | ||
| 2103 | 2123 | ||
| 2104 | if (task_hot(p, rq->timestamp_last_tick, sd)) | 2124 | if (task_hot(p, rq->most_recent_timestamp, sd)) |
| 2105 | return 0; | 2125 | return 0; |
| 2106 | return 1; | 2126 | return 1; |
| 2107 | } | 2127 | } |
| @@ -2199,11 +2219,6 @@ skip_queue: | |||
| 2199 | goto skip_bitmap; | 2219 | goto skip_bitmap; |
| 2200 | } | 2220 | } |
| 2201 | 2221 | ||
| 2202 | #ifdef CONFIG_SCHEDSTATS | ||
| 2203 | if (task_hot(tmp, busiest->timestamp_last_tick, sd)) | ||
| 2204 | schedstat_inc(sd, lb_hot_gained[idle]); | ||
| 2205 | #endif | ||
| 2206 | |||
| 2207 | pull_task(busiest, array, tmp, this_rq, dst_array, this_cpu); | 2222 | pull_task(busiest, array, tmp, this_rq, dst_array, this_cpu); |
| 2208 | pulled++; | 2223 | pulled++; |
| 2209 | rem_load_move -= tmp->load_weight; | 2224 | rem_load_move -= tmp->load_weight; |
| @@ -2241,7 +2256,7 @@ out: | |||
| 2241 | static struct sched_group * | 2256 | static struct sched_group * |
| 2242 | find_busiest_group(struct sched_domain *sd, int this_cpu, | 2257 | find_busiest_group(struct sched_domain *sd, int this_cpu, |
| 2243 | unsigned long *imbalance, enum idle_type idle, int *sd_idle, | 2258 | unsigned long *imbalance, enum idle_type idle, int *sd_idle, |
| 2244 | cpumask_t *cpus) | 2259 | cpumask_t *cpus, int *balance) |
| 2245 | { | 2260 | { |
| 2246 | struct sched_group *busiest = NULL, *this = NULL, *group = sd->groups; | 2261 | struct sched_group *busiest = NULL, *this = NULL, *group = sd->groups; |
| 2247 | unsigned long max_load, avg_load, total_load, this_load, total_pwr; | 2262 | unsigned long max_load, avg_load, total_load, this_load, total_pwr; |
| @@ -2270,10 +2285,14 @@ find_busiest_group(struct sched_domain *sd, int this_cpu, | |||
| 2270 | unsigned long load, group_capacity; | 2285 | unsigned long load, group_capacity; |
| 2271 | int local_group; | 2286 | int local_group; |
| 2272 | int i; | 2287 | int i; |
| 2288 | unsigned int balance_cpu = -1, first_idle_cpu = 0; | ||
| 2273 | unsigned long sum_nr_running, sum_weighted_load; | 2289 | unsigned long sum_nr_running, sum_weighted_load; |
| 2274 | 2290 | ||
| 2275 | local_group = cpu_isset(this_cpu, group->cpumask); | 2291 | local_group = cpu_isset(this_cpu, group->cpumask); |
| 2276 | 2292 | ||
| 2293 | if (local_group) | ||
| 2294 | balance_cpu = first_cpu(group->cpumask); | ||
| 2295 | |||
| 2277 | /* Tally up the load of all CPUs in the group */ | 2296 | /* Tally up the load of all CPUs in the group */ |
| 2278 | sum_weighted_load = sum_nr_running = avg_load = 0; | 2297 | sum_weighted_load = sum_nr_running = avg_load = 0; |
| 2279 | 2298 | ||
| @@ -2289,9 +2308,14 @@ find_busiest_group(struct sched_domain *sd, int this_cpu, | |||
| 2289 | *sd_idle = 0; | 2308 | *sd_idle = 0; |
| 2290 | 2309 | ||
| 2291 | /* Bias balancing toward cpus of our domain */ | 2310 | /* Bias balancing toward cpus of our domain */ |
| 2292 | if (local_group) | 2311 | if (local_group) { |
| 2312 | if (idle_cpu(i) && !first_idle_cpu) { | ||
| 2313 | first_idle_cpu = 1; | ||
| 2314 | balance_cpu = i; | ||
| 2315 | } | ||
| 2316 | |||
| 2293 | load = target_load(i, load_idx); | 2317 | load = target_load(i, load_idx); |
| 2294 | else | 2318 | } else |
| 2295 | load = source_load(i, load_idx); | 2319 | load = source_load(i, load_idx); |
| 2296 | 2320 | ||
| 2297 | avg_load += load; | 2321 | avg_load += load; |
| @@ -2299,6 +2323,16 @@ find_busiest_group(struct sched_domain *sd, int this_cpu, | |||
| 2299 | sum_weighted_load += rq->raw_weighted_load; | 2323 | sum_weighted_load += rq->raw_weighted_load; |
| 2300 | } | 2324 | } |
| 2301 | 2325 | ||
| 2326 | /* | ||
| 2327 | * First idle cpu or the first cpu(busiest) in this sched group | ||
| 2328 | * is eligible for doing load balancing at this and above | ||
| 2329 | * domains. | ||
| 2330 | */ | ||
| 2331 | if (local_group && balance_cpu != this_cpu && balance) { | ||
| 2332 | *balance = 0; | ||
| 2333 | goto ret; | ||
| 2334 | } | ||
| 2335 | |||
| 2302 | total_load += avg_load; | 2336 | total_load += avg_load; |
| 2303 | total_pwr += group->cpu_power; | 2337 | total_pwr += group->cpu_power; |
| 2304 | 2338 | ||
| @@ -2458,18 +2492,21 @@ small_imbalance: | |||
| 2458 | pwr_now /= SCHED_LOAD_SCALE; | 2492 | pwr_now /= SCHED_LOAD_SCALE; |
| 2459 | 2493 | ||
| 2460 | /* Amount of load we'd subtract */ | 2494 | /* Amount of load we'd subtract */ |
| 2461 | tmp = busiest_load_per_task*SCHED_LOAD_SCALE/busiest->cpu_power; | 2495 | tmp = busiest_load_per_task * SCHED_LOAD_SCALE / |
| 2496 | busiest->cpu_power; | ||
| 2462 | if (max_load > tmp) | 2497 | if (max_load > tmp) |
| 2463 | pwr_move += busiest->cpu_power * | 2498 | pwr_move += busiest->cpu_power * |
| 2464 | min(busiest_load_per_task, max_load - tmp); | 2499 | min(busiest_load_per_task, max_load - tmp); |
| 2465 | 2500 | ||
| 2466 | /* Amount of load we'd add */ | 2501 | /* Amount of load we'd add */ |
| 2467 | if (max_load*busiest->cpu_power < | 2502 | if (max_load * busiest->cpu_power < |
| 2468 | busiest_load_per_task*SCHED_LOAD_SCALE) | 2503 | busiest_load_per_task * SCHED_LOAD_SCALE) |
| 2469 | tmp = max_load*busiest->cpu_power/this->cpu_power; | 2504 | tmp = max_load * busiest->cpu_power / this->cpu_power; |
| 2470 | else | 2505 | else |
| 2471 | tmp = busiest_load_per_task*SCHED_LOAD_SCALE/this->cpu_power; | 2506 | tmp = busiest_load_per_task * SCHED_LOAD_SCALE / |
| 2472 | pwr_move += this->cpu_power*min(this_load_per_task, this_load + tmp); | 2507 | this->cpu_power; |
| 2508 | pwr_move += this->cpu_power * | ||
| 2509 | min(this_load_per_task, this_load + tmp); | ||
| 2473 | pwr_move /= SCHED_LOAD_SCALE; | 2510 | pwr_move /= SCHED_LOAD_SCALE; |
| 2474 | 2511 | ||
| 2475 | /* Move if we gain throughput */ | 2512 | /* Move if we gain throughput */ |
| @@ -2490,8 +2527,8 @@ out_balanced: | |||
| 2490 | *imbalance = min_load_per_task; | 2527 | *imbalance = min_load_per_task; |
| 2491 | return group_min; | 2528 | return group_min; |
| 2492 | } | 2529 | } |
| 2493 | ret: | ||
| 2494 | #endif | 2530 | #endif |
| 2531 | ret: | ||
| 2495 | *imbalance = 0; | 2532 | *imbalance = 0; |
| 2496 | return NULL; | 2533 | return NULL; |
| 2497 | } | 2534 | } |
| @@ -2540,17 +2577,17 @@ static inline unsigned long minus_1_or_zero(unsigned long n) | |||
| 2540 | /* | 2577 | /* |
| 2541 | * Check this_cpu to ensure it is balanced within domain. Attempt to move | 2578 | * Check this_cpu to ensure it is balanced within domain. Attempt to move |
| 2542 | * tasks if there is an imbalance. | 2579 | * tasks if there is an imbalance. |
| 2543 | * | ||
| 2544 | * Called with this_rq unlocked. | ||
| 2545 | */ | 2580 | */ |
| 2546 | static int load_balance(int this_cpu, struct rq *this_rq, | 2581 | static int load_balance(int this_cpu, struct rq *this_rq, |
| 2547 | struct sched_domain *sd, enum idle_type idle) | 2582 | struct sched_domain *sd, enum idle_type idle, |
| 2583 | int *balance) | ||
| 2548 | { | 2584 | { |
| 2549 | int nr_moved, all_pinned = 0, active_balance = 0, sd_idle = 0; | 2585 | int nr_moved, all_pinned = 0, active_balance = 0, sd_idle = 0; |
| 2550 | struct sched_group *group; | 2586 | struct sched_group *group; |
| 2551 | unsigned long imbalance; | 2587 | unsigned long imbalance; |
| 2552 | struct rq *busiest; | 2588 | struct rq *busiest; |
| 2553 | cpumask_t cpus = CPU_MASK_ALL; | 2589 | cpumask_t cpus = CPU_MASK_ALL; |
| 2590 | unsigned long flags; | ||
| 2554 | 2591 | ||
| 2555 | /* | 2592 | /* |
| 2556 | * When power savings policy is enabled for the parent domain, idle | 2593 | * When power savings policy is enabled for the parent domain, idle |
| @@ -2566,7 +2603,11 @@ static int load_balance(int this_cpu, struct rq *this_rq, | |||
| 2566 | 2603 | ||
| 2567 | redo: | 2604 | redo: |
| 2568 | group = find_busiest_group(sd, this_cpu, &imbalance, idle, &sd_idle, | 2605 | group = find_busiest_group(sd, this_cpu, &imbalance, idle, &sd_idle, |
| 2569 | &cpus); | 2606 | &cpus, balance); |
| 2607 | |||
| 2608 | if (*balance == 0) | ||
| 2609 | goto out_balanced; | ||
| 2610 | |||
| 2570 | if (!group) { | 2611 | if (!group) { |
| 2571 | schedstat_inc(sd, lb_nobusyg[idle]); | 2612 | schedstat_inc(sd, lb_nobusyg[idle]); |
| 2572 | goto out_balanced; | 2613 | goto out_balanced; |
| @@ -2590,11 +2631,13 @@ redo: | |||
| 2590 | * still unbalanced. nr_moved simply stays zero, so it is | 2631 | * still unbalanced. nr_moved simply stays zero, so it is |
| 2591 | * correctly treated as an imbalance. | 2632 | * correctly treated as an imbalance. |
| 2592 | */ | 2633 | */ |
| 2634 | local_irq_save(flags); | ||
| 2593 | double_rq_lock(this_rq, busiest); | 2635 | double_rq_lock(this_rq, busiest); |
| 2594 | nr_moved = move_tasks(this_rq, this_cpu, busiest, | 2636 | nr_moved = move_tasks(this_rq, this_cpu, busiest, |
| 2595 | minus_1_or_zero(busiest->nr_running), | 2637 | minus_1_or_zero(busiest->nr_running), |
| 2596 | imbalance, sd, idle, &all_pinned); | 2638 | imbalance, sd, idle, &all_pinned); |
| 2597 | double_rq_unlock(this_rq, busiest); | 2639 | double_rq_unlock(this_rq, busiest); |
| 2640 | local_irq_restore(flags); | ||
| 2598 | 2641 | ||
| 2599 | /* All tasks on this runqueue were pinned by CPU affinity */ | 2642 | /* All tasks on this runqueue were pinned by CPU affinity */ |
| 2600 | if (unlikely(all_pinned)) { | 2643 | if (unlikely(all_pinned)) { |
| @@ -2611,13 +2654,13 @@ redo: | |||
| 2611 | 2654 | ||
| 2612 | if (unlikely(sd->nr_balance_failed > sd->cache_nice_tries+2)) { | 2655 | if (unlikely(sd->nr_balance_failed > sd->cache_nice_tries+2)) { |
| 2613 | 2656 | ||
| 2614 | spin_lock(&busiest->lock); | 2657 | spin_lock_irqsave(&busiest->lock, flags); |
| 2615 | 2658 | ||
| 2616 | /* don't kick the migration_thread, if the curr | 2659 | /* don't kick the migration_thread, if the curr |
| 2617 | * task on busiest cpu can't be moved to this_cpu | 2660 | * task on busiest cpu can't be moved to this_cpu |
| 2618 | */ | 2661 | */ |
| 2619 | if (!cpu_isset(this_cpu, busiest->curr->cpus_allowed)) { | 2662 | if (!cpu_isset(this_cpu, busiest->curr->cpus_allowed)) { |
| 2620 | spin_unlock(&busiest->lock); | 2663 | spin_unlock_irqrestore(&busiest->lock, flags); |
| 2621 | all_pinned = 1; | 2664 | all_pinned = 1; |
| 2622 | goto out_one_pinned; | 2665 | goto out_one_pinned; |
| 2623 | } | 2666 | } |
| @@ -2627,7 +2670,7 @@ redo: | |||
| 2627 | busiest->push_cpu = this_cpu; | 2670 | busiest->push_cpu = this_cpu; |
| 2628 | active_balance = 1; | 2671 | active_balance = 1; |
| 2629 | } | 2672 | } |
| 2630 | spin_unlock(&busiest->lock); | 2673 | spin_unlock_irqrestore(&busiest->lock, flags); |
| 2631 | if (active_balance) | 2674 | if (active_balance) |
| 2632 | wake_up_process(busiest->migration_thread); | 2675 | wake_up_process(busiest->migration_thread); |
| 2633 | 2676 | ||
| @@ -2706,7 +2749,7 @@ load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd) | |||
| 2706 | schedstat_inc(sd, lb_cnt[NEWLY_IDLE]); | 2749 | schedstat_inc(sd, lb_cnt[NEWLY_IDLE]); |
| 2707 | redo: | 2750 | redo: |
| 2708 | group = find_busiest_group(sd, this_cpu, &imbalance, NEWLY_IDLE, | 2751 | group = find_busiest_group(sd, this_cpu, &imbalance, NEWLY_IDLE, |
| 2709 | &sd_idle, &cpus); | 2752 | &sd_idle, &cpus, NULL); |
| 2710 | if (!group) { | 2753 | if (!group) { |
| 2711 | schedstat_inc(sd, lb_nobusyg[NEWLY_IDLE]); | 2754 | schedstat_inc(sd, lb_nobusyg[NEWLY_IDLE]); |
| 2712 | goto out_balanced; | 2755 | goto out_balanced; |
| @@ -2766,14 +2809,28 @@ out_balanced: | |||
| 2766 | static void idle_balance(int this_cpu, struct rq *this_rq) | 2809 | static void idle_balance(int this_cpu, struct rq *this_rq) |
| 2767 | { | 2810 | { |
| 2768 | struct sched_domain *sd; | 2811 | struct sched_domain *sd; |
| 2812 | int pulled_task = 0; | ||
| 2813 | unsigned long next_balance = jiffies + 60 * HZ; | ||
| 2769 | 2814 | ||
| 2770 | for_each_domain(this_cpu, sd) { | 2815 | for_each_domain(this_cpu, sd) { |
| 2771 | if (sd->flags & SD_BALANCE_NEWIDLE) { | 2816 | if (sd->flags & SD_BALANCE_NEWIDLE) { |
| 2772 | /* If we've pulled tasks over stop searching: */ | 2817 | /* If we've pulled tasks over stop searching: */ |
| 2773 | if (load_balance_newidle(this_cpu, this_rq, sd)) | 2818 | pulled_task = load_balance_newidle(this_cpu, |
| 2819 | this_rq, sd); | ||
| 2820 | if (time_after(next_balance, | ||
| 2821 | sd->last_balance + sd->balance_interval)) | ||
| 2822 | next_balance = sd->last_balance | ||
| 2823 | + sd->balance_interval; | ||
| 2824 | if (pulled_task) | ||
| 2774 | break; | 2825 | break; |
| 2775 | } | 2826 | } |
| 2776 | } | 2827 | } |
| 2828 | if (!pulled_task) | ||
| 2829 | /* | ||
| 2830 | * We are going idle. next_balance may be set based on | ||
| 2831 | * a busy processor. So reset next_balance. | ||
| 2832 | */ | ||
| 2833 | this_rq->next_balance = next_balance; | ||
| 2777 | } | 2834 | } |
| 2778 | 2835 | ||
| 2779 | /* | 2836 | /* |
| @@ -2826,26 +2883,9 @@ static void active_load_balance(struct rq *busiest_rq, int busiest_cpu) | |||
| 2826 | spin_unlock(&target_rq->lock); | 2883 | spin_unlock(&target_rq->lock); |
| 2827 | } | 2884 | } |
| 2828 | 2885 | ||
| 2829 | /* | 2886 | static void update_load(struct rq *this_rq) |
| 2830 | * rebalance_tick will get called every timer tick, on every CPU. | ||
| 2831 | * | ||
| 2832 | * It checks each scheduling domain to see if it is due to be balanced, | ||
| 2833 | * and initiates a balancing operation if so. | ||
| 2834 | * | ||
| 2835 | * Balancing parameters are set up in arch_init_sched_domains. | ||
| 2836 | */ | ||
| 2837 | |||
| 2838 | /* Don't have all balancing operations going off at once: */ | ||
| 2839 | static inline unsigned long cpu_offset(int cpu) | ||
| 2840 | { | 2887 | { |
| 2841 | return jiffies + cpu * HZ / NR_CPUS; | 2888 | unsigned long this_load; |
| 2842 | } | ||
| 2843 | |||
| 2844 | static void | ||
| 2845 | rebalance_tick(int this_cpu, struct rq *this_rq, enum idle_type idle) | ||
| 2846 | { | ||
| 2847 | unsigned long this_load, interval, j = cpu_offset(this_cpu); | ||
| 2848 | struct sched_domain *sd; | ||
| 2849 | int i, scale; | 2889 | int i, scale; |
| 2850 | 2890 | ||
| 2851 | this_load = this_rq->raw_weighted_load; | 2891 | this_load = this_rq->raw_weighted_load; |
| @@ -2865,6 +2905,32 @@ rebalance_tick(int this_cpu, struct rq *this_rq, enum idle_type idle) | |||
| 2865 | new_load += scale-1; | 2905 | new_load += scale-1; |
| 2866 | this_rq->cpu_load[i] = (old_load*(scale-1) + new_load) / scale; | 2906 | this_rq->cpu_load[i] = (old_load*(scale-1) + new_load) / scale; |
| 2867 | } | 2907 | } |
| 2908 | } | ||
| 2909 | |||
| 2910 | /* | ||
| 2911 | * run_rebalance_domains is triggered when needed from the scheduler tick. | ||
| 2912 | * | ||
| 2913 | * It checks each scheduling domain to see if it is due to be balanced, | ||
| 2914 | * and initiates a balancing operation if so. | ||
| 2915 | * | ||
| 2916 | * Balancing parameters are set up in arch_init_sched_domains. | ||
| 2917 | */ | ||
| 2918 | static DEFINE_SPINLOCK(balancing); | ||
| 2919 | |||
| 2920 | static void run_rebalance_domains(struct softirq_action *h) | ||
| 2921 | { | ||
| 2922 | int this_cpu = smp_processor_id(), balance = 1; | ||
| 2923 | struct rq *this_rq = cpu_rq(this_cpu); | ||
| 2924 | unsigned long interval; | ||
| 2925 | struct sched_domain *sd; | ||
| 2926 | /* | ||
| 2927 | * We are idle if there are no processes running. This | ||
| 2928 | * is valid even if we are the idle process (SMT). | ||
| 2929 | */ | ||
| 2930 | enum idle_type idle = !this_rq->nr_running ? | ||
| 2931 | SCHED_IDLE : NOT_IDLE; | ||
| 2932 | /* Earliest time when we have to call run_rebalance_domains again */ | ||
| 2933 | unsigned long next_balance = jiffies + 60*HZ; | ||
| 2868 | 2934 | ||
| 2869 | for_each_domain(this_cpu, sd) { | 2935 | for_each_domain(this_cpu, sd) { |
| 2870 | if (!(sd->flags & SD_LOAD_BALANCE)) | 2936 | if (!(sd->flags & SD_LOAD_BALANCE)) |
| @@ -2879,8 +2945,13 @@ rebalance_tick(int this_cpu, struct rq *this_rq, enum idle_type idle) | |||
| 2879 | if (unlikely(!interval)) | 2945 | if (unlikely(!interval)) |
| 2880 | interval = 1; | 2946 | interval = 1; |
| 2881 | 2947 | ||
| 2882 | if (j - sd->last_balance >= interval) { | 2948 | if (sd->flags & SD_SERIALIZE) { |
| 2883 | if (load_balance(this_cpu, this_rq, sd, idle)) { | 2949 | if (!spin_trylock(&balancing)) |
| 2950 | goto out; | ||
| 2951 | } | ||
| 2952 | |||
| 2953 | if (time_after_eq(jiffies, sd->last_balance + interval)) { | ||
| 2954 | if (load_balance(this_cpu, this_rq, sd, idle, &balance)) { | ||
| 2884 | /* | 2955 | /* |
| 2885 | * We've pulled tasks over so either we're no | 2956 | * We've pulled tasks over so either we're no |
| 2886 | * longer idle, or one of our SMT siblings is | 2957 | * longer idle, or one of our SMT siblings is |
| @@ -2888,39 +2959,48 @@ rebalance_tick(int this_cpu, struct rq *this_rq, enum idle_type idle) | |||
| 2888 | */ | 2959 | */ |
| 2889 | idle = NOT_IDLE; | 2960 | idle = NOT_IDLE; |
| 2890 | } | 2961 | } |
| 2891 | sd->last_balance += interval; | 2962 | sd->last_balance = jiffies; |
| 2892 | } | 2963 | } |
| 2964 | if (sd->flags & SD_SERIALIZE) | ||
| 2965 | spin_unlock(&balancing); | ||
| 2966 | out: | ||
| 2967 | if (time_after(next_balance, sd->last_balance + interval)) | ||
| 2968 | next_balance = sd->last_balance + interval; | ||
| 2969 | |||
| 2970 | /* | ||
| 2971 | * Stop the load balance at this level. There is another | ||
| 2972 | * CPU in our sched group which is doing load balancing more | ||
| 2973 | * actively. | ||
| 2974 | */ | ||
| 2975 | if (!balance) | ||
| 2976 | break; | ||
| 2893 | } | 2977 | } |
| 2978 | this_rq->next_balance = next_balance; | ||
| 2894 | } | 2979 | } |
| 2895 | #else | 2980 | #else |
| 2896 | /* | 2981 | /* |
| 2897 | * on UP we do not need to balance between CPUs: | 2982 | * on UP we do not need to balance between CPUs: |
| 2898 | */ | 2983 | */ |
| 2899 | static inline void rebalance_tick(int cpu, struct rq *rq, enum idle_type idle) | ||
| 2900 | { | ||
| 2901 | } | ||
| 2902 | static inline void idle_balance(int cpu, struct rq *rq) | 2984 | static inline void idle_balance(int cpu, struct rq *rq) |
| 2903 | { | 2985 | { |
| 2904 | } | 2986 | } |
| 2905 | #endif | 2987 | #endif |
| 2906 | 2988 | ||
| 2907 | static inline int wake_priority_sleeper(struct rq *rq) | 2989 | static inline void wake_priority_sleeper(struct rq *rq) |
| 2908 | { | 2990 | { |
| 2909 | int ret = 0; | ||
| 2910 | |||
| 2911 | #ifdef CONFIG_SCHED_SMT | 2991 | #ifdef CONFIG_SCHED_SMT |
| 2992 | if (!rq->nr_running) | ||
| 2993 | return; | ||
| 2994 | |||
| 2912 | spin_lock(&rq->lock); | 2995 | spin_lock(&rq->lock); |
| 2913 | /* | 2996 | /* |
| 2914 | * If an SMT sibling task has been put to sleep for priority | 2997 | * If an SMT sibling task has been put to sleep for priority |
| 2915 | * reasons reschedule the idle task to see if it can now run. | 2998 | * reasons reschedule the idle task to see if it can now run. |
| 2916 | */ | 2999 | */ |
| 2917 | if (rq->nr_running) { | 3000 | if (rq->nr_running) |
| 2918 | resched_task(rq->idle); | 3001 | resched_task(rq->idle); |
| 2919 | ret = 1; | ||
| 2920 | } | ||
| 2921 | spin_unlock(&rq->lock); | 3002 | spin_unlock(&rq->lock); |
| 2922 | #endif | 3003 | #endif |
| 2923 | return ret; | ||
| 2924 | } | 3004 | } |
| 2925 | 3005 | ||
| 2926 | DEFINE_PER_CPU(struct kernel_stat, kstat); | 3006 | DEFINE_PER_CPU(struct kernel_stat, kstat); |
| @@ -2934,7 +3014,8 @@ EXPORT_PER_CPU_SYMBOL(kstat); | |||
| 2934 | static inline void | 3014 | static inline void |
| 2935 | update_cpu_clock(struct task_struct *p, struct rq *rq, unsigned long long now) | 3015 | update_cpu_clock(struct task_struct *p, struct rq *rq, unsigned long long now) |
| 2936 | { | 3016 | { |
| 2937 | p->sched_time += now - max(p->timestamp, rq->timestamp_last_tick); | 3017 | p->sched_time += now - p->last_ran; |
| 3018 | p->last_ran = rq->most_recent_timestamp = now; | ||
| 2938 | } | 3019 | } |
| 2939 | 3020 | ||
| 2940 | /* | 3021 | /* |
| @@ -2947,8 +3028,7 @@ unsigned long long current_sched_time(const struct task_struct *p) | |||
| 2947 | unsigned long flags; | 3028 | unsigned long flags; |
| 2948 | 3029 | ||
| 2949 | local_irq_save(flags); | 3030 | local_irq_save(flags); |
| 2950 | ns = max(p->timestamp, task_rq(p)->timestamp_last_tick); | 3031 | ns = p->sched_time + sched_clock() - p->last_ran; |
| 2951 | ns = p->sched_time + sched_clock() - ns; | ||
| 2952 | local_irq_restore(flags); | 3032 | local_irq_restore(flags); |
| 2953 | 3033 | ||
| 2954 | return ns; | 3034 | return ns; |
| @@ -3048,35 +3128,12 @@ void account_steal_time(struct task_struct *p, cputime_t steal) | |||
| 3048 | cpustat->steal = cputime64_add(cpustat->steal, tmp); | 3128 | cpustat->steal = cputime64_add(cpustat->steal, tmp); |
| 3049 | } | 3129 | } |
| 3050 | 3130 | ||
| 3051 | /* | 3131 | static void task_running_tick(struct rq *rq, struct task_struct *p) |
| 3052 | * This function gets called by the timer code, with HZ frequency. | ||
| 3053 | * We call it with interrupts disabled. | ||
| 3054 | * | ||
| 3055 | * It also gets called by the fork code, when changing the parent's | ||
| 3056 | * timeslices. | ||
| 3057 | */ | ||
| 3058 | void scheduler_tick(void) | ||
| 3059 | { | 3132 | { |
| 3060 | unsigned long long now = sched_clock(); | ||
| 3061 | struct task_struct *p = current; | ||
| 3062 | int cpu = smp_processor_id(); | ||
| 3063 | struct rq *rq = cpu_rq(cpu); | ||
| 3064 | |||
| 3065 | update_cpu_clock(p, rq, now); | ||
| 3066 | |||
| 3067 | rq->timestamp_last_tick = now; | ||
| 3068 | |||
| 3069 | if (p == rq->idle) { | ||
| 3070 | if (wake_priority_sleeper(rq)) | ||
| 3071 | goto out; | ||
| 3072 | rebalance_tick(cpu, rq, SCHED_IDLE); | ||
| 3073 | return; | ||
| 3074 | } | ||
| 3075 | |||
| 3076 | /* Task might have expired already, but not scheduled off yet */ | ||
| 3077 | if (p->array != rq->active) { | 3133 | if (p->array != rq->active) { |
| 3134 | /* Task has expired but was not scheduled yet */ | ||
| 3078 | set_tsk_need_resched(p); | 3135 | set_tsk_need_resched(p); |
| 3079 | goto out; | 3136 | return; |
| 3080 | } | 3137 | } |
| 3081 | spin_lock(&rq->lock); | 3138 | spin_lock(&rq->lock); |
| 3082 | /* | 3139 | /* |
| @@ -3144,8 +3201,34 @@ void scheduler_tick(void) | |||
| 3144 | } | 3201 | } |
| 3145 | out_unlock: | 3202 | out_unlock: |
| 3146 | spin_unlock(&rq->lock); | 3203 | spin_unlock(&rq->lock); |
| 3147 | out: | 3204 | } |
| 3148 | rebalance_tick(cpu, rq, NOT_IDLE); | 3205 | |
| 3206 | /* | ||
| 3207 | * This function gets called by the timer code, with HZ frequency. | ||
| 3208 | * We call it with interrupts disabled. | ||
| 3209 | * | ||
| 3210 | * It also gets called by the fork code, when changing the parent's | ||
| 3211 | * timeslices. | ||
| 3212 | */ | ||
| 3213 | void scheduler_tick(void) | ||
| 3214 | { | ||
| 3215 | unsigned long long now = sched_clock(); | ||
| 3216 | struct task_struct *p = current; | ||
| 3217 | int cpu = smp_processor_id(); | ||
| 3218 | struct rq *rq = cpu_rq(cpu); | ||
| 3219 | |||
| 3220 | update_cpu_clock(p, rq, now); | ||
| 3221 | |||
| 3222 | if (p == rq->idle) | ||
| 3223 | /* Task on the idle queue */ | ||
| 3224 | wake_priority_sleeper(rq); | ||
| 3225 | else | ||
| 3226 | task_running_tick(rq, p); | ||
| 3227 | #ifdef CONFIG_SMP | ||
| 3228 | update_load(rq); | ||
| 3229 | if (time_after_eq(jiffies, rq->next_balance)) | ||
| 3230 | raise_softirq(SCHED_SOFTIRQ); | ||
| 3231 | #endif | ||
| 3149 | } | 3232 | } |
| 3150 | 3233 | ||
| 3151 | #ifdef CONFIG_SCHED_SMT | 3234 | #ifdef CONFIG_SCHED_SMT |
| @@ -3291,7 +3374,8 @@ void fastcall add_preempt_count(int val) | |||
| 3291 | /* | 3374 | /* |
| 3292 | * Spinlock count overflowing soon? | 3375 | * Spinlock count overflowing soon? |
| 3293 | */ | 3376 | */ |
| 3294 | DEBUG_LOCKS_WARN_ON((preempt_count() & PREEMPT_MASK) >= PREEMPT_MASK-10); | 3377 | DEBUG_LOCKS_WARN_ON((preempt_count() & PREEMPT_MASK) >= |
| 3378 | PREEMPT_MASK - 10); | ||
| 3295 | } | 3379 | } |
| 3296 | EXPORT_SYMBOL(add_preempt_count); | 3380 | EXPORT_SYMBOL(add_preempt_count); |
| 3297 | 3381 | ||
| @@ -3345,6 +3429,8 @@ asmlinkage void __sched schedule(void) | |||
| 3345 | "%s/0x%08x/%d\n", | 3429 | "%s/0x%08x/%d\n", |
| 3346 | current->comm, preempt_count(), current->pid); | 3430 | current->comm, preempt_count(), current->pid); |
| 3347 | debug_show_held_locks(current); | 3431 | debug_show_held_locks(current); |
| 3432 | if (irqs_disabled()) | ||
| 3433 | print_irqtrace_events(current); | ||
| 3348 | dump_stack(); | 3434 | dump_stack(); |
| 3349 | } | 3435 | } |
| 3350 | profile_hit(SCHED_PROFILING, __builtin_return_address(0)); | 3436 | profile_hit(SCHED_PROFILING, __builtin_return_address(0)); |
| @@ -4990,8 +5076,8 @@ static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu) | |||
| 4990 | * afterwards, and pretending it was a local activate. | 5076 | * afterwards, and pretending it was a local activate. |
| 4991 | * This way is cleaner and logically correct. | 5077 | * This way is cleaner and logically correct. |
| 4992 | */ | 5078 | */ |
| 4993 | p->timestamp = p->timestamp - rq_src->timestamp_last_tick | 5079 | p->timestamp = p->timestamp - rq_src->most_recent_timestamp |
| 4994 | + rq_dest->timestamp_last_tick; | 5080 | + rq_dest->most_recent_timestamp; |
| 4995 | deactivate_task(p, rq_src); | 5081 | deactivate_task(p, rq_src); |
| 4996 | __activate_task(p, rq_dest); | 5082 | __activate_task(p, rq_dest); |
| 4997 | if (TASK_PREEMPTS_CURR(p, rq_dest)) | 5083 | if (TASK_PREEMPTS_CURR(p, rq_dest)) |
| @@ -5067,7 +5153,10 @@ wait_to_die: | |||
| 5067 | } | 5153 | } |
| 5068 | 5154 | ||
| 5069 | #ifdef CONFIG_HOTPLUG_CPU | 5155 | #ifdef CONFIG_HOTPLUG_CPU |
| 5070 | /* Figure out where task on dead CPU should go, use force if neccessary. */ | 5156 | /* |
| 5157 | * Figure out where task on dead CPU should go, use force if neccessary. | ||
| 5158 | * NOTE: interrupts should be disabled by the caller | ||
| 5159 | */ | ||
| 5071 | static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p) | 5160 | static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p) |
| 5072 | { | 5161 | { |
| 5073 | unsigned long flags; | 5162 | unsigned long flags; |
| @@ -5187,6 +5276,7 @@ void idle_task_exit(void) | |||
| 5187 | mmdrop(mm); | 5276 | mmdrop(mm); |
| 5188 | } | 5277 | } |
| 5189 | 5278 | ||
| 5279 | /* called under rq->lock with disabled interrupts */ | ||
| 5190 | static void migrate_dead(unsigned int dead_cpu, struct task_struct *p) | 5280 | static void migrate_dead(unsigned int dead_cpu, struct task_struct *p) |
| 5191 | { | 5281 | { |
| 5192 | struct rq *rq = cpu_rq(dead_cpu); | 5282 | struct rq *rq = cpu_rq(dead_cpu); |
| @@ -5203,10 +5293,11 @@ static void migrate_dead(unsigned int dead_cpu, struct task_struct *p) | |||
| 5203 | * Drop lock around migration; if someone else moves it, | 5293 | * Drop lock around migration; if someone else moves it, |
| 5204 | * that's OK. No task can be added to this CPU, so iteration is | 5294 | * that's OK. No task can be added to this CPU, so iteration is |
| 5205 | * fine. | 5295 | * fine. |
| 5296 | * NOTE: interrupts should be left disabled --dev@ | ||
| 5206 | */ | 5297 | */ |
| 5207 | spin_unlock_irq(&rq->lock); | 5298 | spin_unlock(&rq->lock); |
| 5208 | move_task_off_dead_cpu(dead_cpu, p); | 5299 | move_task_off_dead_cpu(dead_cpu, p); |
| 5209 | spin_lock_irq(&rq->lock); | 5300 | spin_lock(&rq->lock); |
| 5210 | 5301 | ||
| 5211 | put_task_struct(p); | 5302 | put_task_struct(p); |
| 5212 | } | 5303 | } |
| @@ -5359,16 +5450,19 @@ static void sched_domain_debug(struct sched_domain *sd, int cpu) | |||
| 5359 | if (!(sd->flags & SD_LOAD_BALANCE)) { | 5450 | if (!(sd->flags & SD_LOAD_BALANCE)) { |
| 5360 | printk("does not load-balance\n"); | 5451 | printk("does not load-balance\n"); |
| 5361 | if (sd->parent) | 5452 | if (sd->parent) |
| 5362 | printk(KERN_ERR "ERROR: !SD_LOAD_BALANCE domain has parent"); | 5453 | printk(KERN_ERR "ERROR: !SD_LOAD_BALANCE domain" |
| 5454 | " has parent"); | ||
| 5363 | break; | 5455 | break; |
| 5364 | } | 5456 | } |
| 5365 | 5457 | ||
| 5366 | printk("span %s\n", str); | 5458 | printk("span %s\n", str); |
| 5367 | 5459 | ||
| 5368 | if (!cpu_isset(cpu, sd->span)) | 5460 | if (!cpu_isset(cpu, sd->span)) |
| 5369 | printk(KERN_ERR "ERROR: domain->span does not contain CPU%d\n", cpu); | 5461 | printk(KERN_ERR "ERROR: domain->span does not contain " |
| 5462 | "CPU%d\n", cpu); | ||
| 5370 | if (!cpu_isset(cpu, group->cpumask)) | 5463 | if (!cpu_isset(cpu, group->cpumask)) |
| 5371 | printk(KERN_ERR "ERROR: domain->groups does not contain CPU%d\n", cpu); | 5464 | printk(KERN_ERR "ERROR: domain->groups does not contain" |
| 5465 | " CPU%d\n", cpu); | ||
| 5372 | 5466 | ||
| 5373 | printk(KERN_DEBUG); | 5467 | printk(KERN_DEBUG); |
| 5374 | for (i = 0; i < level + 2; i++) | 5468 | for (i = 0; i < level + 2; i++) |
| @@ -5383,7 +5477,8 @@ static void sched_domain_debug(struct sched_domain *sd, int cpu) | |||
| 5383 | 5477 | ||
| 5384 | if (!group->cpu_power) { | 5478 | if (!group->cpu_power) { |
| 5385 | printk("\n"); | 5479 | printk("\n"); |
| 5386 | printk(KERN_ERR "ERROR: domain->cpu_power not set\n"); | 5480 | printk(KERN_ERR "ERROR: domain->cpu_power not " |
| 5481 | "set\n"); | ||
| 5387 | } | 5482 | } |
| 5388 | 5483 | ||
| 5389 | if (!cpus_weight(group->cpumask)) { | 5484 | if (!cpus_weight(group->cpumask)) { |
| @@ -5406,15 +5501,17 @@ static void sched_domain_debug(struct sched_domain *sd, int cpu) | |||
| 5406 | printk("\n"); | 5501 | printk("\n"); |
| 5407 | 5502 | ||
| 5408 | if (!cpus_equal(sd->span, groupmask)) | 5503 | if (!cpus_equal(sd->span, groupmask)) |
| 5409 | printk(KERN_ERR "ERROR: groups don't span domain->span\n"); | 5504 | printk(KERN_ERR "ERROR: groups don't span " |
| 5505 | "domain->span\n"); | ||
| 5410 | 5506 | ||
| 5411 | level++; | 5507 | level++; |
| 5412 | sd = sd->parent; | 5508 | sd = sd->parent; |
| 5509 | if (!sd) | ||
| 5510 | continue; | ||
| 5413 | 5511 | ||
| 5414 | if (sd) { | 5512 | if (!cpus_subset(groupmask, sd->span)) |
| 5415 | if (!cpus_subset(groupmask, sd->span)) | 5513 | printk(KERN_ERR "ERROR: parent span is not a superset " |
| 5416 | printk(KERN_ERR "ERROR: parent span is not a superset of domain->span\n"); | 5514 | "of domain->span\n"); |
| 5417 | } | ||
| 5418 | 5515 | ||
| 5419 | } while (sd); | 5516 | } while (sd); |
| 5420 | } | 5517 | } |
| @@ -5528,28 +5625,27 @@ static int __init isolated_cpu_setup(char *str) | |||
| 5528 | __setup ("isolcpus=", isolated_cpu_setup); | 5625 | __setup ("isolcpus=", isolated_cpu_setup); |
| 5529 | 5626 | ||
| 5530 | /* | 5627 | /* |
| 5531 | * init_sched_build_groups takes an array of groups, the cpumask we wish | 5628 | * init_sched_build_groups takes the cpumask we wish to span, and a pointer |
| 5532 | * to span, and a pointer to a function which identifies what group a CPU | 5629 | * to a function which identifies what group(along with sched group) a CPU |
| 5533 | * belongs to. The return value of group_fn must be a valid index into the | 5630 | * belongs to. The return value of group_fn must be a >= 0 and < NR_CPUS |
| 5534 | * groups[] array, and must be >= 0 and < NR_CPUS (due to the fact that we | 5631 | * (due to the fact that we keep track of groups covered with a cpumask_t). |
| 5535 | * keep track of groups covered with a cpumask_t). | ||
| 5536 | * | 5632 | * |
| 5537 | * init_sched_build_groups will build a circular linked list of the groups | 5633 | * init_sched_build_groups will build a circular linked list of the groups |
| 5538 | * covered by the given span, and will set each group's ->cpumask correctly, | 5634 | * covered by the given span, and will set each group's ->cpumask correctly, |
| 5539 | * and ->cpu_power to 0. | 5635 | * and ->cpu_power to 0. |
| 5540 | */ | 5636 | */ |
| 5541 | static void | 5637 | static void |
| 5542 | init_sched_build_groups(struct sched_group groups[], cpumask_t span, | 5638 | init_sched_build_groups(cpumask_t span, const cpumask_t *cpu_map, |
| 5543 | const cpumask_t *cpu_map, | 5639 | int (*group_fn)(int cpu, const cpumask_t *cpu_map, |
| 5544 | int (*group_fn)(int cpu, const cpumask_t *cpu_map)) | 5640 | struct sched_group **sg)) |
| 5545 | { | 5641 | { |
| 5546 | struct sched_group *first = NULL, *last = NULL; | 5642 | struct sched_group *first = NULL, *last = NULL; |
| 5547 | cpumask_t covered = CPU_MASK_NONE; | 5643 | cpumask_t covered = CPU_MASK_NONE; |
| 5548 | int i; | 5644 | int i; |
| 5549 | 5645 | ||
| 5550 | for_each_cpu_mask(i, span) { | 5646 | for_each_cpu_mask(i, span) { |
| 5551 | int group = group_fn(i, cpu_map); | 5647 | struct sched_group *sg; |
| 5552 | struct sched_group *sg = &groups[group]; | 5648 | int group = group_fn(i, cpu_map, &sg); |
| 5553 | int j; | 5649 | int j; |
| 5554 | 5650 | ||
| 5555 | if (cpu_isset(i, covered)) | 5651 | if (cpu_isset(i, covered)) |
| @@ -5559,7 +5655,7 @@ init_sched_build_groups(struct sched_group groups[], cpumask_t span, | |||
| 5559 | sg->cpu_power = 0; | 5655 | sg->cpu_power = 0; |
| 5560 | 5656 | ||
| 5561 | for_each_cpu_mask(j, span) { | 5657 | for_each_cpu_mask(j, span) { |
| 5562 | if (group_fn(j, cpu_map) != group) | 5658 | if (group_fn(j, cpu_map, NULL) != group) |
| 5563 | continue; | 5659 | continue; |
| 5564 | 5660 | ||
| 5565 | cpu_set(j, covered); | 5661 | cpu_set(j, covered); |
| @@ -5733,8 +5829,9 @@ __setup("max_cache_size=", setup_max_cache_size); | |||
| 5733 | */ | 5829 | */ |
| 5734 | static void touch_cache(void *__cache, unsigned long __size) | 5830 | static void touch_cache(void *__cache, unsigned long __size) |
| 5735 | { | 5831 | { |
| 5736 | unsigned long size = __size/sizeof(long), chunk1 = size/3, | 5832 | unsigned long size = __size / sizeof(long); |
| 5737 | chunk2 = 2*size/3; | 5833 | unsigned long chunk1 = size / 3; |
| 5834 | unsigned long chunk2 = 2 * size / 3; | ||
| 5738 | unsigned long *cache = __cache; | 5835 | unsigned long *cache = __cache; |
| 5739 | int i; | 5836 | int i; |
| 5740 | 5837 | ||
| @@ -5843,11 +5940,11 @@ measure_cost(int cpu1, int cpu2, void *cache, unsigned int size) | |||
| 5843 | */ | 5940 | */ |
| 5844 | measure_one(cache, size, cpu1, cpu2); | 5941 | measure_one(cache, size, cpu1, cpu2); |
| 5845 | for (i = 0; i < ITERATIONS; i++) | 5942 | for (i = 0; i < ITERATIONS; i++) |
| 5846 | cost1 += measure_one(cache, size - i*1024, cpu1, cpu2); | 5943 | cost1 += measure_one(cache, size - i * 1024, cpu1, cpu2); |
| 5847 | 5944 | ||
| 5848 | measure_one(cache, size, cpu2, cpu1); | 5945 | measure_one(cache, size, cpu2, cpu1); |
| 5849 | for (i = 0; i < ITERATIONS; i++) | 5946 | for (i = 0; i < ITERATIONS; i++) |
| 5850 | cost1 += measure_one(cache, size - i*1024, cpu2, cpu1); | 5947 | cost1 += measure_one(cache, size - i * 1024, cpu2, cpu1); |
| 5851 | 5948 | ||
| 5852 | /* | 5949 | /* |
| 5853 | * (We measure the non-migrating [cached] cost on both | 5950 | * (We measure the non-migrating [cached] cost on both |
| @@ -5857,17 +5954,17 @@ measure_cost(int cpu1, int cpu2, void *cache, unsigned int size) | |||
| 5857 | 5954 | ||
| 5858 | measure_one(cache, size, cpu1, cpu1); | 5955 | measure_one(cache, size, cpu1, cpu1); |
| 5859 | for (i = 0; i < ITERATIONS; i++) | 5956 | for (i = 0; i < ITERATIONS; i++) |
| 5860 | cost2 += measure_one(cache, size - i*1024, cpu1, cpu1); | 5957 | cost2 += measure_one(cache, size - i * 1024, cpu1, cpu1); |
| 5861 | 5958 | ||
| 5862 | measure_one(cache, size, cpu2, cpu2); | 5959 | measure_one(cache, size, cpu2, cpu2); |
| 5863 | for (i = 0; i < ITERATIONS; i++) | 5960 | for (i = 0; i < ITERATIONS; i++) |
| 5864 | cost2 += measure_one(cache, size - i*1024, cpu2, cpu2); | 5961 | cost2 += measure_one(cache, size - i * 1024, cpu2, cpu2); |
| 5865 | 5962 | ||
| 5866 | /* | 5963 | /* |
| 5867 | * Get the per-iteration migration cost: | 5964 | * Get the per-iteration migration cost: |
| 5868 | */ | 5965 | */ |
| 5869 | do_div(cost1, 2*ITERATIONS); | 5966 | do_div(cost1, 2 * ITERATIONS); |
| 5870 | do_div(cost2, 2*ITERATIONS); | 5967 | do_div(cost2, 2 * ITERATIONS); |
| 5871 | 5968 | ||
| 5872 | return cost1 - cost2; | 5969 | return cost1 - cost2; |
| 5873 | } | 5970 | } |
| @@ -5905,7 +6002,7 @@ static unsigned long long measure_migration_cost(int cpu1, int cpu2) | |||
| 5905 | */ | 6002 | */ |
| 5906 | cache = vmalloc(max_size); | 6003 | cache = vmalloc(max_size); |
| 5907 | if (!cache) { | 6004 | if (!cache) { |
| 5908 | printk("could not vmalloc %d bytes for cache!\n", 2*max_size); | 6005 | printk("could not vmalloc %d bytes for cache!\n", 2 * max_size); |
| 5909 | return 1000000; /* return 1 msec on very small boxen */ | 6006 | return 1000000; /* return 1 msec on very small boxen */ |
| 5910 | } | 6007 | } |
| 5911 | 6008 | ||
| @@ -5930,7 +6027,8 @@ static unsigned long long measure_migration_cost(int cpu1, int cpu2) | |||
| 5930 | avg_fluct = (avg_fluct + fluct)/2; | 6027 | avg_fluct = (avg_fluct + fluct)/2; |
| 5931 | 6028 | ||
| 5932 | if (migration_debug) | 6029 | if (migration_debug) |
| 5933 | printk("-> [%d][%d][%7d] %3ld.%ld [%3ld.%ld] (%ld): (%8Ld %8Ld)\n", | 6030 | printk("-> [%d][%d][%7d] %3ld.%ld [%3ld.%ld] (%ld): " |
| 6031 | "(%8Ld %8Ld)\n", | ||
| 5934 | cpu1, cpu2, size, | 6032 | cpu1, cpu2, size, |
| 5935 | (long)cost / 1000000, | 6033 | (long)cost / 1000000, |
| 5936 | ((long)cost / 100000) % 10, | 6034 | ((long)cost / 100000) % 10, |
| @@ -6025,20 +6123,18 @@ static void calibrate_migration_costs(const cpumask_t *cpu_map) | |||
| 6025 | -1 | 6123 | -1 |
| 6026 | #endif | 6124 | #endif |
| 6027 | ); | 6125 | ); |
| 6028 | if (system_state == SYSTEM_BOOTING) { | 6126 | if (system_state == SYSTEM_BOOTING && num_online_cpus() > 1) { |
| 6029 | if (num_online_cpus() > 1) { | 6127 | printk("migration_cost="); |
| 6030 | printk("migration_cost="); | 6128 | for (distance = 0; distance <= max_distance; distance++) { |
| 6031 | for (distance = 0; distance <= max_distance; distance++) { | 6129 | if (distance) |
| 6032 | if (distance) | 6130 | printk(","); |
| 6033 | printk(","); | 6131 | printk("%ld", (long)migration_cost[distance] / 1000); |
| 6034 | printk("%ld", (long)migration_cost[distance] / 1000); | ||
| 6035 | } | ||
| 6036 | printk("\n"); | ||
| 6037 | } | 6132 | } |
| 6133 | printk("\n"); | ||
| 6038 | } | 6134 | } |
| 6039 | j1 = jiffies; | 6135 | j1 = jiffies; |
| 6040 | if (migration_debug) | 6136 | if (migration_debug) |
| 6041 | printk("migration: %ld seconds\n", (j1-j0)/HZ); | 6137 | printk("migration: %ld seconds\n", (j1-j0) / HZ); |
| 6042 | 6138 | ||
| 6043 | /* | 6139 | /* |
| 6044 | * Move back to the original CPU. NUMA-Q gets confused | 6140 | * Move back to the original CPU. NUMA-Q gets confused |
| @@ -6135,10 +6231,13 @@ int sched_smt_power_savings = 0, sched_mc_power_savings = 0; | |||
| 6135 | */ | 6231 | */ |
| 6136 | #ifdef CONFIG_SCHED_SMT | 6232 | #ifdef CONFIG_SCHED_SMT |
| 6137 | static DEFINE_PER_CPU(struct sched_domain, cpu_domains); | 6233 | static DEFINE_PER_CPU(struct sched_domain, cpu_domains); |
| 6138 | static struct sched_group sched_group_cpus[NR_CPUS]; | 6234 | static DEFINE_PER_CPU(struct sched_group, sched_group_cpus); |
| 6139 | 6235 | ||
| 6140 | static int cpu_to_cpu_group(int cpu, const cpumask_t *cpu_map) | 6236 | static int cpu_to_cpu_group(int cpu, const cpumask_t *cpu_map, |
| 6237 | struct sched_group **sg) | ||
| 6141 | { | 6238 | { |
| 6239 | if (sg) | ||
| 6240 | *sg = &per_cpu(sched_group_cpus, cpu); | ||
| 6142 | return cpu; | 6241 | return cpu; |
| 6143 | } | 6242 | } |
| 6144 | #endif | 6243 | #endif |
| @@ -6148,39 +6247,52 @@ static int cpu_to_cpu_group(int cpu, const cpumask_t *cpu_map) | |||
| 6148 | */ | 6247 | */ |
| 6149 | #ifdef CONFIG_SCHED_MC | 6248 | #ifdef CONFIG_SCHED_MC |
| 6150 | static DEFINE_PER_CPU(struct sched_domain, core_domains); | 6249 | static DEFINE_PER_CPU(struct sched_domain, core_domains); |
| 6151 | static struct sched_group sched_group_core[NR_CPUS]; | 6250 | static DEFINE_PER_CPU(struct sched_group, sched_group_core); |
| 6152 | #endif | 6251 | #endif |
| 6153 | 6252 | ||
| 6154 | #if defined(CONFIG_SCHED_MC) && defined(CONFIG_SCHED_SMT) | 6253 | #if defined(CONFIG_SCHED_MC) && defined(CONFIG_SCHED_SMT) |
| 6155 | static int cpu_to_core_group(int cpu, const cpumask_t *cpu_map) | 6254 | static int cpu_to_core_group(int cpu, const cpumask_t *cpu_map, |
| 6255 | struct sched_group **sg) | ||
| 6156 | { | 6256 | { |
| 6257 | int group; | ||
| 6157 | cpumask_t mask = cpu_sibling_map[cpu]; | 6258 | cpumask_t mask = cpu_sibling_map[cpu]; |
| 6158 | cpus_and(mask, mask, *cpu_map); | 6259 | cpus_and(mask, mask, *cpu_map); |
| 6159 | return first_cpu(mask); | 6260 | group = first_cpu(mask); |
| 6261 | if (sg) | ||
| 6262 | *sg = &per_cpu(sched_group_core, group); | ||
| 6263 | return group; | ||
| 6160 | } | 6264 | } |
| 6161 | #elif defined(CONFIG_SCHED_MC) | 6265 | #elif defined(CONFIG_SCHED_MC) |
| 6162 | static int cpu_to_core_group(int cpu, const cpumask_t *cpu_map) | 6266 | static int cpu_to_core_group(int cpu, const cpumask_t *cpu_map, |
| 6267 | struct sched_group **sg) | ||
| 6163 | { | 6268 | { |
| 6269 | if (sg) | ||
| 6270 | *sg = &per_cpu(sched_group_core, cpu); | ||
| 6164 | return cpu; | 6271 | return cpu; |
| 6165 | } | 6272 | } |
| 6166 | #endif | 6273 | #endif |
| 6167 | 6274 | ||
| 6168 | static DEFINE_PER_CPU(struct sched_domain, phys_domains); | 6275 | static DEFINE_PER_CPU(struct sched_domain, phys_domains); |
| 6169 | static struct sched_group sched_group_phys[NR_CPUS]; | 6276 | static DEFINE_PER_CPU(struct sched_group, sched_group_phys); |
| 6170 | 6277 | ||
| 6171 | static int cpu_to_phys_group(int cpu, const cpumask_t *cpu_map) | 6278 | static int cpu_to_phys_group(int cpu, const cpumask_t *cpu_map, |
| 6279 | struct sched_group **sg) | ||
| 6172 | { | 6280 | { |
| 6281 | int group; | ||
| 6173 | #ifdef CONFIG_SCHED_MC | 6282 | #ifdef CONFIG_SCHED_MC |
| 6174 | cpumask_t mask = cpu_coregroup_map(cpu); | 6283 | cpumask_t mask = cpu_coregroup_map(cpu); |
| 6175 | cpus_and(mask, mask, *cpu_map); | 6284 | cpus_and(mask, mask, *cpu_map); |
| 6176 | return first_cpu(mask); | 6285 | group = first_cpu(mask); |
| 6177 | #elif defined(CONFIG_SCHED_SMT) | 6286 | #elif defined(CONFIG_SCHED_SMT) |
| 6178 | cpumask_t mask = cpu_sibling_map[cpu]; | 6287 | cpumask_t mask = cpu_sibling_map[cpu]; |
| 6179 | cpus_and(mask, mask, *cpu_map); | 6288 | cpus_and(mask, mask, *cpu_map); |
| 6180 | return first_cpu(mask); | 6289 | group = first_cpu(mask); |
| 6181 | #else | 6290 | #else |
| 6182 | return cpu; | 6291 | group = cpu; |
| 6183 | #endif | 6292 | #endif |
| 6293 | if (sg) | ||
| 6294 | *sg = &per_cpu(sched_group_phys, group); | ||
| 6295 | return group; | ||
| 6184 | } | 6296 | } |
| 6185 | 6297 | ||
| 6186 | #ifdef CONFIG_NUMA | 6298 | #ifdef CONFIG_NUMA |
| @@ -6193,12 +6305,22 @@ static DEFINE_PER_CPU(struct sched_domain, node_domains); | |||
| 6193 | static struct sched_group **sched_group_nodes_bycpu[NR_CPUS]; | 6305 | static struct sched_group **sched_group_nodes_bycpu[NR_CPUS]; |
| 6194 | 6306 | ||
| 6195 | static DEFINE_PER_CPU(struct sched_domain, allnodes_domains); | 6307 | static DEFINE_PER_CPU(struct sched_domain, allnodes_domains); |
| 6196 | static struct sched_group *sched_group_allnodes_bycpu[NR_CPUS]; | 6308 | static DEFINE_PER_CPU(struct sched_group, sched_group_allnodes); |
| 6197 | 6309 | ||
| 6198 | static int cpu_to_allnodes_group(int cpu, const cpumask_t *cpu_map) | 6310 | static int cpu_to_allnodes_group(int cpu, const cpumask_t *cpu_map, |
| 6311 | struct sched_group **sg) | ||
| 6199 | { | 6312 | { |
| 6200 | return cpu_to_node(cpu); | 6313 | cpumask_t nodemask = node_to_cpumask(cpu_to_node(cpu)); |
| 6314 | int group; | ||
| 6315 | |||
| 6316 | cpus_and(nodemask, nodemask, *cpu_map); | ||
| 6317 | group = first_cpu(nodemask); | ||
| 6318 | |||
| 6319 | if (sg) | ||
| 6320 | *sg = &per_cpu(sched_group_allnodes, group); | ||
| 6321 | return group; | ||
| 6201 | } | 6322 | } |
| 6323 | |||
| 6202 | static void init_numa_sched_groups_power(struct sched_group *group_head) | 6324 | static void init_numa_sched_groups_power(struct sched_group *group_head) |
| 6203 | { | 6325 | { |
| 6204 | struct sched_group *sg = group_head; | 6326 | struct sched_group *sg = group_head; |
| @@ -6234,16 +6356,9 @@ static void free_sched_groups(const cpumask_t *cpu_map) | |||
| 6234 | int cpu, i; | 6356 | int cpu, i; |
| 6235 | 6357 | ||
| 6236 | for_each_cpu_mask(cpu, *cpu_map) { | 6358 | for_each_cpu_mask(cpu, *cpu_map) { |
| 6237 | struct sched_group *sched_group_allnodes | ||
| 6238 | = sched_group_allnodes_bycpu[cpu]; | ||
| 6239 | struct sched_group **sched_group_nodes | 6359 | struct sched_group **sched_group_nodes |
| 6240 | = sched_group_nodes_bycpu[cpu]; | 6360 | = sched_group_nodes_bycpu[cpu]; |
| 6241 | 6361 | ||
| 6242 | if (sched_group_allnodes) { | ||
| 6243 | kfree(sched_group_allnodes); | ||
| 6244 | sched_group_allnodes_bycpu[cpu] = NULL; | ||
| 6245 | } | ||
| 6246 | |||
| 6247 | if (!sched_group_nodes) | 6362 | if (!sched_group_nodes) |
| 6248 | continue; | 6363 | continue; |
| 6249 | 6364 | ||
| @@ -6337,7 +6452,7 @@ static int build_sched_domains(const cpumask_t *cpu_map) | |||
| 6337 | struct sched_domain *sd; | 6452 | struct sched_domain *sd; |
| 6338 | #ifdef CONFIG_NUMA | 6453 | #ifdef CONFIG_NUMA |
| 6339 | struct sched_group **sched_group_nodes = NULL; | 6454 | struct sched_group **sched_group_nodes = NULL; |
| 6340 | struct sched_group *sched_group_allnodes = NULL; | 6455 | int sd_allnodes = 0; |
| 6341 | 6456 | ||
| 6342 | /* | 6457 | /* |
| 6343 | * Allocate the per-node list of sched groups | 6458 | * Allocate the per-node list of sched groups |
| @@ -6355,7 +6470,6 @@ static int build_sched_domains(const cpumask_t *cpu_map) | |||
| 6355 | * Set up domains for cpus specified by the cpu_map. | 6470 | * Set up domains for cpus specified by the cpu_map. |
| 6356 | */ | 6471 | */ |
| 6357 | for_each_cpu_mask(i, *cpu_map) { | 6472 | for_each_cpu_mask(i, *cpu_map) { |
| 6358 | int group; | ||
| 6359 | struct sched_domain *sd = NULL, *p; | 6473 | struct sched_domain *sd = NULL, *p; |
| 6360 | cpumask_t nodemask = node_to_cpumask(cpu_to_node(i)); | 6474 | cpumask_t nodemask = node_to_cpumask(cpu_to_node(i)); |
| 6361 | 6475 | ||
| @@ -6364,26 +6478,12 @@ static int build_sched_domains(const cpumask_t *cpu_map) | |||
| 6364 | #ifdef CONFIG_NUMA | 6478 | #ifdef CONFIG_NUMA |
| 6365 | if (cpus_weight(*cpu_map) | 6479 | if (cpus_weight(*cpu_map) |
| 6366 | > SD_NODES_PER_DOMAIN*cpus_weight(nodemask)) { | 6480 | > SD_NODES_PER_DOMAIN*cpus_weight(nodemask)) { |
| 6367 | if (!sched_group_allnodes) { | ||
| 6368 | sched_group_allnodes | ||
| 6369 | = kmalloc_node(sizeof(struct sched_group) | ||
| 6370 | * MAX_NUMNODES, | ||
| 6371 | GFP_KERNEL, | ||
| 6372 | cpu_to_node(i)); | ||
| 6373 | if (!sched_group_allnodes) { | ||
| 6374 | printk(KERN_WARNING | ||
| 6375 | "Can not alloc allnodes sched group\n"); | ||
| 6376 | goto error; | ||
| 6377 | } | ||
| 6378 | sched_group_allnodes_bycpu[i] | ||
| 6379 | = sched_group_allnodes; | ||
| 6380 | } | ||
| 6381 | sd = &per_cpu(allnodes_domains, i); | 6481 | sd = &per_cpu(allnodes_domains, i); |
| 6382 | *sd = SD_ALLNODES_INIT; | 6482 | *sd = SD_ALLNODES_INIT; |
| 6383 | sd->span = *cpu_map; | 6483 | sd->span = *cpu_map; |
| 6384 | group = cpu_to_allnodes_group(i, cpu_map); | 6484 | cpu_to_allnodes_group(i, cpu_map, &sd->groups); |
| 6385 | sd->groups = &sched_group_allnodes[group]; | ||
| 6386 | p = sd; | 6485 | p = sd; |
| 6486 | sd_allnodes = 1; | ||
| 6387 | } else | 6487 | } else |
| 6388 | p = NULL; | 6488 | p = NULL; |
| 6389 | 6489 | ||
| @@ -6398,36 +6498,33 @@ static int build_sched_domains(const cpumask_t *cpu_map) | |||
| 6398 | 6498 | ||
| 6399 | p = sd; | 6499 | p = sd; |
| 6400 | sd = &per_cpu(phys_domains, i); | 6500 | sd = &per_cpu(phys_domains, i); |
| 6401 | group = cpu_to_phys_group(i, cpu_map); | ||
| 6402 | *sd = SD_CPU_INIT; | 6501 | *sd = SD_CPU_INIT; |
| 6403 | sd->span = nodemask; | 6502 | sd->span = nodemask; |
| 6404 | sd->parent = p; | 6503 | sd->parent = p; |
| 6405 | if (p) | 6504 | if (p) |
| 6406 | p->child = sd; | 6505 | p->child = sd; |
| 6407 | sd->groups = &sched_group_phys[group]; | 6506 | cpu_to_phys_group(i, cpu_map, &sd->groups); |
| 6408 | 6507 | ||
| 6409 | #ifdef CONFIG_SCHED_MC | 6508 | #ifdef CONFIG_SCHED_MC |
| 6410 | p = sd; | 6509 | p = sd; |
| 6411 | sd = &per_cpu(core_domains, i); | 6510 | sd = &per_cpu(core_domains, i); |
| 6412 | group = cpu_to_core_group(i, cpu_map); | ||
| 6413 | *sd = SD_MC_INIT; | 6511 | *sd = SD_MC_INIT; |
| 6414 | sd->span = cpu_coregroup_map(i); | 6512 | sd->span = cpu_coregroup_map(i); |
| 6415 | cpus_and(sd->span, sd->span, *cpu_map); | 6513 | cpus_and(sd->span, sd->span, *cpu_map); |
| 6416 | sd->parent = p; | 6514 | sd->parent = p; |
| 6417 | p->child = sd; | 6515 | p->child = sd; |
| 6418 | sd->groups = &sched_group_core[group]; | 6516 | cpu_to_core_group(i, cpu_map, &sd->groups); |
| 6419 | #endif | 6517 | #endif |
| 6420 | 6518 | ||
| 6421 | #ifdef CONFIG_SCHED_SMT | 6519 | #ifdef CONFIG_SCHED_SMT |
| 6422 | p = sd; | 6520 | p = sd; |
| 6423 | sd = &per_cpu(cpu_domains, i); | 6521 | sd = &per_cpu(cpu_domains, i); |
| 6424 | group = cpu_to_cpu_group(i, cpu_map); | ||
| 6425 | *sd = SD_SIBLING_INIT; | 6522 | *sd = SD_SIBLING_INIT; |
| 6426 | sd->span = cpu_sibling_map[i]; | 6523 | sd->span = cpu_sibling_map[i]; |
| 6427 | cpus_and(sd->span, sd->span, *cpu_map); | 6524 | cpus_and(sd->span, sd->span, *cpu_map); |
| 6428 | sd->parent = p; | 6525 | sd->parent = p; |
| 6429 | p->child = sd; | 6526 | p->child = sd; |
| 6430 | sd->groups = &sched_group_cpus[group]; | 6527 | cpu_to_cpu_group(i, cpu_map, &sd->groups); |
| 6431 | #endif | 6528 | #endif |
| 6432 | } | 6529 | } |
| 6433 | 6530 | ||
| @@ -6439,8 +6536,7 @@ static int build_sched_domains(const cpumask_t *cpu_map) | |||
| 6439 | if (i != first_cpu(this_sibling_map)) | 6536 | if (i != first_cpu(this_sibling_map)) |
| 6440 | continue; | 6537 | continue; |
| 6441 | 6538 | ||
| 6442 | init_sched_build_groups(sched_group_cpus, this_sibling_map, | 6539 | init_sched_build_groups(this_sibling_map, cpu_map, &cpu_to_cpu_group); |
| 6443 | cpu_map, &cpu_to_cpu_group); | ||
| 6444 | } | 6540 | } |
| 6445 | #endif | 6541 | #endif |
| 6446 | 6542 | ||
| @@ -6451,8 +6547,7 @@ static int build_sched_domains(const cpumask_t *cpu_map) | |||
| 6451 | cpus_and(this_core_map, this_core_map, *cpu_map); | 6547 | cpus_and(this_core_map, this_core_map, *cpu_map); |
| 6452 | if (i != first_cpu(this_core_map)) | 6548 | if (i != first_cpu(this_core_map)) |
| 6453 | continue; | 6549 | continue; |
| 6454 | init_sched_build_groups(sched_group_core, this_core_map, | 6550 | init_sched_build_groups(this_core_map, cpu_map, &cpu_to_core_group); |
| 6455 | cpu_map, &cpu_to_core_group); | ||
| 6456 | } | 6551 | } |
| 6457 | #endif | 6552 | #endif |
| 6458 | 6553 | ||
| @@ -6465,15 +6560,13 @@ static int build_sched_domains(const cpumask_t *cpu_map) | |||
| 6465 | if (cpus_empty(nodemask)) | 6560 | if (cpus_empty(nodemask)) |
| 6466 | continue; | 6561 | continue; |
| 6467 | 6562 | ||
| 6468 | init_sched_build_groups(sched_group_phys, nodemask, | 6563 | init_sched_build_groups(nodemask, cpu_map, &cpu_to_phys_group); |
| 6469 | cpu_map, &cpu_to_phys_group); | ||
| 6470 | } | 6564 | } |
| 6471 | 6565 | ||
| 6472 | #ifdef CONFIG_NUMA | 6566 | #ifdef CONFIG_NUMA |
| 6473 | /* Set up node groups */ | 6567 | /* Set up node groups */ |
| 6474 | if (sched_group_allnodes) | 6568 | if (sd_allnodes) |
| 6475 | init_sched_build_groups(sched_group_allnodes, *cpu_map, | 6569 | init_sched_build_groups(*cpu_map, cpu_map, &cpu_to_allnodes_group); |
| 6476 | cpu_map, &cpu_to_allnodes_group); | ||
| 6477 | 6570 | ||
| 6478 | for (i = 0; i < MAX_NUMNODES; i++) { | 6571 | for (i = 0; i < MAX_NUMNODES; i++) { |
| 6479 | /* Set up node groups */ | 6572 | /* Set up node groups */ |
| @@ -6565,10 +6658,10 @@ static int build_sched_domains(const cpumask_t *cpu_map) | |||
| 6565 | for (i = 0; i < MAX_NUMNODES; i++) | 6658 | for (i = 0; i < MAX_NUMNODES; i++) |
| 6566 | init_numa_sched_groups_power(sched_group_nodes[i]); | 6659 | init_numa_sched_groups_power(sched_group_nodes[i]); |
| 6567 | 6660 | ||
| 6568 | if (sched_group_allnodes) { | 6661 | if (sd_allnodes) { |
| 6569 | int group = cpu_to_allnodes_group(first_cpu(*cpu_map), cpu_map); | 6662 | struct sched_group *sg; |
| 6570 | struct sched_group *sg = &sched_group_allnodes[group]; | ||
| 6571 | 6663 | ||
| 6664 | cpu_to_allnodes_group(first_cpu(*cpu_map), cpu_map, &sg); | ||
| 6572 | init_numa_sched_groups_power(sg); | 6665 | init_numa_sched_groups_power(sg); |
| 6573 | } | 6666 | } |
| 6574 | #endif | 6667 | #endif |
| @@ -6847,6 +6940,10 @@ void __init sched_init(void) | |||
| 6847 | 6940 | ||
| 6848 | set_load_weight(&init_task); | 6941 | set_load_weight(&init_task); |
| 6849 | 6942 | ||
| 6943 | #ifdef CONFIG_SMP | ||
| 6944 | open_softirq(SCHED_SOFTIRQ, run_rebalance_domains, NULL); | ||
| 6945 | #endif | ||
| 6946 | |||
| 6850 | #ifdef CONFIG_RT_MUTEXES | 6947 | #ifdef CONFIG_RT_MUTEXES |
| 6851 | plist_head_init(&init_task.pi_waiters, &init_task.pi_lock); | 6948 | plist_head_init(&init_task.pi_waiters, &init_task.pi_lock); |
| 6852 | #endif | 6949 | #endif |
| @@ -6882,6 +6979,8 @@ void __might_sleep(char *file, int line) | |||
| 6882 | printk("in_atomic():%d, irqs_disabled():%d\n", | 6979 | printk("in_atomic():%d, irqs_disabled():%d\n", |
| 6883 | in_atomic(), irqs_disabled()); | 6980 | in_atomic(), irqs_disabled()); |
| 6884 | debug_show_held_locks(current); | 6981 | debug_show_held_locks(current); |
| 6982 | if (irqs_disabled()) | ||
| 6983 | print_irqtrace_events(current); | ||
| 6885 | dump_stack(); | 6984 | dump_stack(); |
| 6886 | } | 6985 | } |
| 6887 | #endif | 6986 | #endif |
diff --git a/kernel/signal.c b/kernel/signal.c index ec81defde339..5630255d2e2a 100644 --- a/kernel/signal.c +++ b/kernel/signal.c | |||
| @@ -24,6 +24,9 @@ | |||
| 24 | #include <linux/signal.h> | 24 | #include <linux/signal.h> |
| 25 | #include <linux/capability.h> | 25 | #include <linux/capability.h> |
| 26 | #include <linux/freezer.h> | 26 | #include <linux/freezer.h> |
| 27 | #include <linux/pid_namespace.h> | ||
| 28 | #include <linux/nsproxy.h> | ||
| 29 | |||
| 27 | #include <asm/param.h> | 30 | #include <asm/param.h> |
| 28 | #include <asm/uaccess.h> | 31 | #include <asm/uaccess.h> |
| 29 | #include <asm/unistd.h> | 32 | #include <asm/unistd.h> |
| @@ -583,7 +586,7 @@ static int check_kill_permission(int sig, struct siginfo *info, | |||
| 583 | error = -EPERM; | 586 | error = -EPERM; |
| 584 | if ((info == SEND_SIG_NOINFO || (!is_si_special(info) && SI_FROMUSER(info))) | 587 | if ((info == SEND_SIG_NOINFO || (!is_si_special(info) && SI_FROMUSER(info))) |
| 585 | && ((sig != SIGCONT) || | 588 | && ((sig != SIGCONT) || |
| 586 | (current->signal->session != t->signal->session)) | 589 | (process_session(current) != process_session(t))) |
| 587 | && (current->euid ^ t->suid) && (current->euid ^ t->uid) | 590 | && (current->euid ^ t->suid) && (current->euid ^ t->uid) |
| 588 | && (current->uid ^ t->suid) && (current->uid ^ t->uid) | 591 | && (current->uid ^ t->suid) && (current->uid ^ t->uid) |
| 589 | && !capable(CAP_KILL)) | 592 | && !capable(CAP_KILL)) |
| @@ -1702,7 +1705,9 @@ finish_stop(int stop_count) | |||
| 1702 | read_unlock(&tasklist_lock); | 1705 | read_unlock(&tasklist_lock); |
| 1703 | } | 1706 | } |
| 1704 | 1707 | ||
| 1705 | schedule(); | 1708 | do { |
| 1709 | schedule(); | ||
| 1710 | } while (try_to_freeze()); | ||
| 1706 | /* | 1711 | /* |
| 1707 | * Now we don't run again until continued. | 1712 | * Now we don't run again until continued. |
| 1708 | */ | 1713 | */ |
| @@ -1877,8 +1882,12 @@ relock: | |||
| 1877 | if (sig_kernel_ignore(signr)) /* Default is nothing. */ | 1882 | if (sig_kernel_ignore(signr)) /* Default is nothing. */ |
| 1878 | continue; | 1883 | continue; |
| 1879 | 1884 | ||
| 1880 | /* Init gets no signals it doesn't want. */ | 1885 | /* |
| 1881 | if (current == child_reaper) | 1886 | * Init of a pid space gets no signals it doesn't want from |
| 1887 | * within that pid space. It can of course get signals from | ||
| 1888 | * its parent pid space. | ||
| 1889 | */ | ||
| 1890 | if (current == child_reaper(current)) | ||
| 1882 | continue; | 1891 | continue; |
| 1883 | 1892 | ||
| 1884 | if (sig_kernel_stop(signr)) { | 1893 | if (sig_kernel_stop(signr)) { |
diff --git a/kernel/sys.c b/kernel/sys.c index a0c1a29a507f..c7675c1bfdf2 100644 --- a/kernel/sys.c +++ b/kernel/sys.c | |||
| @@ -1381,7 +1381,7 @@ asmlinkage long sys_setpgid(pid_t pid, pid_t pgid) | |||
| 1381 | 1381 | ||
| 1382 | if (p->real_parent == group_leader) { | 1382 | if (p->real_parent == group_leader) { |
| 1383 | err = -EPERM; | 1383 | err = -EPERM; |
| 1384 | if (p->signal->session != group_leader->signal->session) | 1384 | if (process_session(p) != process_session(group_leader)) |
| 1385 | goto out; | 1385 | goto out; |
| 1386 | err = -EACCES; | 1386 | err = -EACCES; |
| 1387 | if (p->did_exec) | 1387 | if (p->did_exec) |
| @@ -1397,16 +1397,13 @@ asmlinkage long sys_setpgid(pid_t pid, pid_t pgid) | |||
| 1397 | goto out; | 1397 | goto out; |
| 1398 | 1398 | ||
| 1399 | if (pgid != pid) { | 1399 | if (pgid != pid) { |
| 1400 | struct task_struct *p; | 1400 | struct task_struct *g = |
| 1401 | find_task_by_pid_type(PIDTYPE_PGID, pgid); | ||
| 1401 | 1402 | ||
| 1402 | do_each_task_pid(pgid, PIDTYPE_PGID, p) { | 1403 | if (!g || process_session(g) != process_session(group_leader)) |
| 1403 | if (p->signal->session == group_leader->signal->session) | 1404 | goto out; |
| 1404 | goto ok_pgid; | ||
| 1405 | } while_each_task_pid(pgid, PIDTYPE_PGID, p); | ||
| 1406 | goto out; | ||
| 1407 | } | 1405 | } |
| 1408 | 1406 | ||
| 1409 | ok_pgid: | ||
| 1410 | err = security_task_setpgid(p, pgid); | 1407 | err = security_task_setpgid(p, pgid); |
| 1411 | if (err) | 1408 | if (err) |
| 1412 | goto out; | 1409 | goto out; |
| @@ -1459,7 +1456,7 @@ asmlinkage long sys_getpgrp(void) | |||
| 1459 | asmlinkage long sys_getsid(pid_t pid) | 1456 | asmlinkage long sys_getsid(pid_t pid) |
| 1460 | { | 1457 | { |
| 1461 | if (!pid) | 1458 | if (!pid) |
| 1462 | return current->signal->session; | 1459 | return process_session(current); |
| 1463 | else { | 1460 | else { |
| 1464 | int retval; | 1461 | int retval; |
| 1465 | struct task_struct *p; | 1462 | struct task_struct *p; |
| @@ -1471,7 +1468,7 @@ asmlinkage long sys_getsid(pid_t pid) | |||
| 1471 | if (p) { | 1468 | if (p) { |
| 1472 | retval = security_task_getsid(p); | 1469 | retval = security_task_getsid(p); |
| 1473 | if (!retval) | 1470 | if (!retval) |
| 1474 | retval = p->signal->session; | 1471 | retval = process_session(p); |
| 1475 | } | 1472 | } |
| 1476 | read_unlock(&tasklist_lock); | 1473 | read_unlock(&tasklist_lock); |
| 1477 | return retval; | 1474 | return retval; |
| @@ -1484,7 +1481,6 @@ asmlinkage long sys_setsid(void) | |||
| 1484 | pid_t session; | 1481 | pid_t session; |
| 1485 | int err = -EPERM; | 1482 | int err = -EPERM; |
| 1486 | 1483 | ||
| 1487 | mutex_lock(&tty_mutex); | ||
| 1488 | write_lock_irq(&tasklist_lock); | 1484 | write_lock_irq(&tasklist_lock); |
| 1489 | 1485 | ||
| 1490 | /* Fail if I am already a session leader */ | 1486 | /* Fail if I am already a session leader */ |
| @@ -1504,12 +1500,15 @@ asmlinkage long sys_setsid(void) | |||
| 1504 | 1500 | ||
| 1505 | group_leader->signal->leader = 1; | 1501 | group_leader->signal->leader = 1; |
| 1506 | __set_special_pids(session, session); | 1502 | __set_special_pids(session, session); |
| 1503 | |||
| 1504 | spin_lock(&group_leader->sighand->siglock); | ||
| 1507 | group_leader->signal->tty = NULL; | 1505 | group_leader->signal->tty = NULL; |
| 1508 | group_leader->signal->tty_old_pgrp = 0; | 1506 | group_leader->signal->tty_old_pgrp = 0; |
| 1507 | spin_unlock(&group_leader->sighand->siglock); | ||
| 1508 | |||
| 1509 | err = process_group(group_leader); | 1509 | err = process_group(group_leader); |
| 1510 | out: | 1510 | out: |
| 1511 | write_unlock_irq(&tasklist_lock); | 1511 | write_unlock_irq(&tasklist_lock); |
| 1512 | mutex_unlock(&tty_mutex); | ||
| 1513 | return err; | 1512 | return err; |
| 1514 | } | 1513 | } |
| 1515 | 1514 | ||
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 8e9f00fd6d18..600b33358ded 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
| @@ -65,7 +65,6 @@ extern int sysctl_overcommit_memory; | |||
| 65 | extern int sysctl_overcommit_ratio; | 65 | extern int sysctl_overcommit_ratio; |
| 66 | extern int sysctl_panic_on_oom; | 66 | extern int sysctl_panic_on_oom; |
| 67 | extern int max_threads; | 67 | extern int max_threads; |
| 68 | extern int sysrq_enabled; | ||
| 69 | extern int core_uses_pid; | 68 | extern int core_uses_pid; |
| 70 | extern int suid_dumpable; | 69 | extern int suid_dumpable; |
| 71 | extern char core_pattern[]; | 70 | extern char core_pattern[]; |
| @@ -92,7 +91,9 @@ extern char modprobe_path[]; | |||
| 92 | extern int sg_big_buff; | 91 | extern int sg_big_buff; |
| 93 | #endif | 92 | #endif |
| 94 | #ifdef CONFIG_SYSVIPC | 93 | #ifdef CONFIG_SYSVIPC |
| 95 | static int proc_do_ipc_string(ctl_table *table, int write, struct file *filp, | 94 | static int proc_ipc_dointvec(ctl_table *table, int write, struct file *filp, |
| 95 | void __user *buffer, size_t *lenp, loff_t *ppos); | ||
| 96 | static int proc_ipc_doulongvec_minmax(ctl_table *table, int write, struct file *filp, | ||
| 96 | void __user *buffer, size_t *lenp, loff_t *ppos); | 97 | void __user *buffer, size_t *lenp, loff_t *ppos); |
| 97 | #endif | 98 | #endif |
| 98 | 99 | ||
| @@ -131,12 +132,22 @@ extern int max_lock_depth; | |||
| 131 | 132 | ||
| 132 | #ifdef CONFIG_SYSCTL_SYSCALL | 133 | #ifdef CONFIG_SYSCTL_SYSCALL |
| 133 | static int parse_table(int __user *, int, void __user *, size_t __user *, | 134 | static int parse_table(int __user *, int, void __user *, size_t __user *, |
| 134 | void __user *, size_t, ctl_table *, void **); | 135 | void __user *, size_t, ctl_table *); |
| 135 | #endif | 136 | #endif |
| 136 | 137 | ||
| 137 | static int proc_do_uts_string(ctl_table *table, int write, struct file *filp, | 138 | static int proc_do_uts_string(ctl_table *table, int write, struct file *filp, |
| 138 | void __user *buffer, size_t *lenp, loff_t *ppos); | 139 | void __user *buffer, size_t *lenp, loff_t *ppos); |
| 139 | 140 | ||
| 141 | static int sysctl_uts_string(ctl_table *table, int __user *name, int nlen, | ||
| 142 | void __user *oldval, size_t __user *oldlenp, | ||
| 143 | void __user *newval, size_t newlen); | ||
| 144 | |||
| 145 | #ifdef CONFIG_SYSVIPC | ||
| 146 | static int sysctl_ipc_data(ctl_table *table, int __user *name, int nlen, | ||
| 147 | void __user *oldval, size_t __user *oldlenp, | ||
| 148 | void __user *newval, size_t newlen); | ||
| 149 | #endif | ||
| 150 | |||
| 140 | #ifdef CONFIG_PROC_SYSCTL | 151 | #ifdef CONFIG_PROC_SYSCTL |
| 141 | static int proc_do_cad_pid(ctl_table *table, int write, struct file *filp, | 152 | static int proc_do_cad_pid(ctl_table *table, int write, struct file *filp, |
| 142 | void __user *buffer, size_t *lenp, loff_t *ppos); | 153 | void __user *buffer, size_t *lenp, loff_t *ppos); |
| @@ -163,6 +174,40 @@ extern ctl_table inotify_table[]; | |||
| 163 | int sysctl_legacy_va_layout; | 174 | int sysctl_legacy_va_layout; |
| 164 | #endif | 175 | #endif |
| 165 | 176 | ||
| 177 | static void *get_uts(ctl_table *table, int write) | ||
| 178 | { | ||
| 179 | char *which = table->data; | ||
| 180 | #ifdef CONFIG_UTS_NS | ||
| 181 | struct uts_namespace *uts_ns = current->nsproxy->uts_ns; | ||
| 182 | which = (which - (char *)&init_uts_ns) + (char *)uts_ns; | ||
| 183 | #endif | ||
| 184 | if (!write) | ||
| 185 | down_read(&uts_sem); | ||
| 186 | else | ||
| 187 | down_write(&uts_sem); | ||
| 188 | return which; | ||
| 189 | } | ||
| 190 | |||
| 191 | static void put_uts(ctl_table *table, int write, void *which) | ||
| 192 | { | ||
| 193 | if (!write) | ||
| 194 | up_read(&uts_sem); | ||
| 195 | else | ||
| 196 | up_write(&uts_sem); | ||
| 197 | } | ||
| 198 | |||
| 199 | #ifdef CONFIG_SYSVIPC | ||
| 200 | static void *get_ipc(ctl_table *table, int write) | ||
| 201 | { | ||
| 202 | char *which = table->data; | ||
| 203 | struct ipc_namespace *ipc_ns = current->nsproxy->ipc_ns; | ||
| 204 | which = (which - (char *)&init_ipc_ns) + (char *)ipc_ns; | ||
| 205 | return which; | ||
| 206 | } | ||
| 207 | #else | ||
| 208 | #define get_ipc(T,W) ((T)->data) | ||
| 209 | #endif | ||
| 210 | |||
| 166 | /* /proc declarations: */ | 211 | /* /proc declarations: */ |
| 167 | 212 | ||
| 168 | #ifdef CONFIG_PROC_SYSCTL | 213 | #ifdef CONFIG_PROC_SYSCTL |
| @@ -229,7 +274,6 @@ static ctl_table root_table[] = { | |||
| 229 | }; | 274 | }; |
| 230 | 275 | ||
| 231 | static ctl_table kern_table[] = { | 276 | static ctl_table kern_table[] = { |
| 232 | #ifndef CONFIG_UTS_NS | ||
| 233 | { | 277 | { |
| 234 | .ctl_name = KERN_OSTYPE, | 278 | .ctl_name = KERN_OSTYPE, |
| 235 | .procname = "ostype", | 279 | .procname = "ostype", |
| @@ -237,7 +281,7 @@ static ctl_table kern_table[] = { | |||
| 237 | .maxlen = sizeof(init_uts_ns.name.sysname), | 281 | .maxlen = sizeof(init_uts_ns.name.sysname), |
| 238 | .mode = 0444, | 282 | .mode = 0444, |
| 239 | .proc_handler = &proc_do_uts_string, | 283 | .proc_handler = &proc_do_uts_string, |
| 240 | .strategy = &sysctl_string, | 284 | .strategy = &sysctl_uts_string, |
| 241 | }, | 285 | }, |
| 242 | { | 286 | { |
| 243 | .ctl_name = KERN_OSRELEASE, | 287 | .ctl_name = KERN_OSRELEASE, |
| @@ -246,7 +290,7 @@ static ctl_table kern_table[] = { | |||
| 246 | .maxlen = sizeof(init_uts_ns.name.release), | 290 | .maxlen = sizeof(init_uts_ns.name.release), |
| 247 | .mode = 0444, | 291 | .mode = 0444, |
| 248 | .proc_handler = &proc_do_uts_string, | 292 | .proc_handler = &proc_do_uts_string, |
| 249 | .strategy = &sysctl_string, | 293 | .strategy = &sysctl_uts_string, |
| 250 | }, | 294 | }, |
| 251 | { | 295 | { |
| 252 | .ctl_name = KERN_VERSION, | 296 | .ctl_name = KERN_VERSION, |
| @@ -255,7 +299,7 @@ static ctl_table kern_table[] = { | |||
| 255 | .maxlen = sizeof(init_uts_ns.name.version), | 299 | .maxlen = sizeof(init_uts_ns.name.version), |
| 256 | .mode = 0444, | 300 | .mode = 0444, |
| 257 | .proc_handler = &proc_do_uts_string, | 301 | .proc_handler = &proc_do_uts_string, |
| 258 | .strategy = &sysctl_string, | 302 | .strategy = &sysctl_uts_string, |
| 259 | }, | 303 | }, |
| 260 | { | 304 | { |
| 261 | .ctl_name = KERN_NODENAME, | 305 | .ctl_name = KERN_NODENAME, |
| @@ -264,7 +308,7 @@ static ctl_table kern_table[] = { | |||
| 264 | .maxlen = sizeof(init_uts_ns.name.nodename), | 308 | .maxlen = sizeof(init_uts_ns.name.nodename), |
| 265 | .mode = 0644, | 309 | .mode = 0644, |
| 266 | .proc_handler = &proc_do_uts_string, | 310 | .proc_handler = &proc_do_uts_string, |
| 267 | .strategy = &sysctl_string, | 311 | .strategy = &sysctl_uts_string, |
| 268 | }, | 312 | }, |
| 269 | { | 313 | { |
| 270 | .ctl_name = KERN_DOMAINNAME, | 314 | .ctl_name = KERN_DOMAINNAME, |
| @@ -273,56 +317,8 @@ static ctl_table kern_table[] = { | |||
| 273 | .maxlen = sizeof(init_uts_ns.name.domainname), | 317 | .maxlen = sizeof(init_uts_ns.name.domainname), |
| 274 | .mode = 0644, | 318 | .mode = 0644, |
| 275 | .proc_handler = &proc_do_uts_string, | 319 | .proc_handler = &proc_do_uts_string, |
| 276 | .strategy = &sysctl_string, | 320 | .strategy = &sysctl_uts_string, |
| 277 | }, | ||
| 278 | #else /* !CONFIG_UTS_NS */ | ||
| 279 | { | ||
| 280 | .ctl_name = KERN_OSTYPE, | ||
| 281 | .procname = "ostype", | ||
| 282 | .data = NULL, | ||
| 283 | /* could maybe use __NEW_UTS_LEN here? */ | ||
| 284 | .maxlen = FIELD_SIZEOF(struct new_utsname, sysname), | ||
| 285 | .mode = 0444, | ||
| 286 | .proc_handler = &proc_do_uts_string, | ||
| 287 | .strategy = &sysctl_string, | ||
| 288 | }, | ||
| 289 | { | ||
| 290 | .ctl_name = KERN_OSRELEASE, | ||
| 291 | .procname = "osrelease", | ||
| 292 | .data = NULL, | ||
| 293 | .maxlen = FIELD_SIZEOF(struct new_utsname, release), | ||
| 294 | .mode = 0444, | ||
| 295 | .proc_handler = &proc_do_uts_string, | ||
| 296 | .strategy = &sysctl_string, | ||
| 297 | }, | ||
| 298 | { | ||
| 299 | .ctl_name = KERN_VERSION, | ||
| 300 | .procname = "version", | ||
| 301 | .data = NULL, | ||
| 302 | .maxlen = FIELD_SIZEOF(struct new_utsname, version), | ||
| 303 | .mode = 0444, | ||
| 304 | .proc_handler = &proc_do_uts_string, | ||
| 305 | .strategy = &sysctl_string, | ||
| 306 | }, | ||
| 307 | { | ||
| 308 | .ctl_name = KERN_NODENAME, | ||
| 309 | .procname = "hostname", | ||
| 310 | .data = NULL, | ||
| 311 | .maxlen = FIELD_SIZEOF(struct new_utsname, nodename), | ||
| 312 | .mode = 0644, | ||
| 313 | .proc_handler = &proc_do_uts_string, | ||
| 314 | .strategy = &sysctl_string, | ||
| 315 | }, | ||
| 316 | { | ||
| 317 | .ctl_name = KERN_DOMAINNAME, | ||
| 318 | .procname = "domainname", | ||
| 319 | .data = NULL, | ||
| 320 | .maxlen = FIELD_SIZEOF(struct new_utsname, domainname), | ||
| 321 | .mode = 0644, | ||
| 322 | .proc_handler = &proc_do_uts_string, | ||
| 323 | .strategy = &sysctl_string, | ||
| 324 | }, | 321 | }, |
| 325 | #endif /* !CONFIG_UTS_NS */ | ||
| 326 | { | 322 | { |
| 327 | .ctl_name = KERN_PANIC, | 323 | .ctl_name = KERN_PANIC, |
| 328 | .procname = "panic", | 324 | .procname = "panic", |
| @@ -481,65 +477,72 @@ static ctl_table kern_table[] = { | |||
| 481 | { | 477 | { |
| 482 | .ctl_name = KERN_SHMMAX, | 478 | .ctl_name = KERN_SHMMAX, |
| 483 | .procname = "shmmax", | 479 | .procname = "shmmax", |
| 484 | .data = NULL, | 480 | .data = &init_ipc_ns.shm_ctlmax, |
| 485 | .maxlen = sizeof (size_t), | 481 | .maxlen = sizeof (init_ipc_ns.shm_ctlmax), |
| 486 | .mode = 0644, | 482 | .mode = 0644, |
| 487 | .proc_handler = &proc_do_ipc_string, | 483 | .proc_handler = &proc_ipc_doulongvec_minmax, |
| 484 | .strategy = sysctl_ipc_data, | ||
| 488 | }, | 485 | }, |
| 489 | { | 486 | { |
| 490 | .ctl_name = KERN_SHMALL, | 487 | .ctl_name = KERN_SHMALL, |
| 491 | .procname = "shmall", | 488 | .procname = "shmall", |
| 492 | .data = NULL, | 489 | .data = &init_ipc_ns.shm_ctlall, |
| 493 | .maxlen = sizeof (size_t), | 490 | .maxlen = sizeof (init_ipc_ns.shm_ctlall), |
| 494 | .mode = 0644, | 491 | .mode = 0644, |
| 495 | .proc_handler = &proc_do_ipc_string, | 492 | .proc_handler = &proc_ipc_doulongvec_minmax, |
| 493 | .strategy = sysctl_ipc_data, | ||
| 496 | }, | 494 | }, |
| 497 | { | 495 | { |
| 498 | .ctl_name = KERN_SHMMNI, | 496 | .ctl_name = KERN_SHMMNI, |
| 499 | .procname = "shmmni", | 497 | .procname = "shmmni", |
| 500 | .data = NULL, | 498 | .data = &init_ipc_ns.shm_ctlmni, |
| 501 | .maxlen = sizeof (int), | 499 | .maxlen = sizeof (init_ipc_ns.shm_ctlmni), |
| 502 | .mode = 0644, | 500 | .mode = 0644, |
| 503 | .proc_handler = &proc_do_ipc_string, | 501 | .proc_handler = &proc_ipc_dointvec, |
| 502 | .strategy = sysctl_ipc_data, | ||
| 504 | }, | 503 | }, |
| 505 | { | 504 | { |
| 506 | .ctl_name = KERN_MSGMAX, | 505 | .ctl_name = KERN_MSGMAX, |
| 507 | .procname = "msgmax", | 506 | .procname = "msgmax", |
| 508 | .data = NULL, | 507 | .data = &init_ipc_ns.msg_ctlmax, |
| 509 | .maxlen = sizeof (int), | 508 | .maxlen = sizeof (init_ipc_ns.msg_ctlmax), |
| 510 | .mode = 0644, | 509 | .mode = 0644, |
| 511 | .proc_handler = &proc_do_ipc_string, | 510 | .proc_handler = &proc_ipc_dointvec, |
| 511 | .strategy = sysctl_ipc_data, | ||
| 512 | }, | 512 | }, |
| 513 | { | 513 | { |
| 514 | .ctl_name = KERN_MSGMNI, | 514 | .ctl_name = KERN_MSGMNI, |
| 515 | .procname = "msgmni", | 515 | .procname = "msgmni", |
| 516 | .data = NULL, | 516 | .data = &init_ipc_ns.msg_ctlmni, |
| 517 | .maxlen = sizeof (int), | 517 | .maxlen = sizeof (init_ipc_ns.msg_ctlmni), |
| 518 | .mode = 0644, | 518 | .mode = 0644, |
| 519 | .proc_handler = &proc_do_ipc_string, | 519 | .proc_handler = &proc_ipc_dointvec, |
| 520 | .strategy = sysctl_ipc_data, | ||
| 520 | }, | 521 | }, |
| 521 | { | 522 | { |
| 522 | .ctl_name = KERN_MSGMNB, | 523 | .ctl_name = KERN_MSGMNB, |
| 523 | .procname = "msgmnb", | 524 | .procname = "msgmnb", |
| 524 | .data = NULL, | 525 | .data = &init_ipc_ns.msg_ctlmnb, |
| 525 | .maxlen = sizeof (int), | 526 | .maxlen = sizeof (init_ipc_ns.msg_ctlmnb), |
| 526 | .mode = 0644, | 527 | .mode = 0644, |
| 527 | .proc_handler = &proc_do_ipc_string, | 528 | .proc_handler = &proc_ipc_dointvec, |
| 529 | .strategy = sysctl_ipc_data, | ||
| 528 | }, | 530 | }, |
| 529 | { | 531 | { |
| 530 | .ctl_name = KERN_SEM, | 532 | .ctl_name = KERN_SEM, |
| 531 | .procname = "sem", | 533 | .procname = "sem", |
| 532 | .data = NULL, | 534 | .data = &init_ipc_ns.sem_ctls, |
| 533 | .maxlen = 4*sizeof (int), | 535 | .maxlen = 4*sizeof (int), |
| 534 | .mode = 0644, | 536 | .mode = 0644, |
| 535 | .proc_handler = &proc_do_ipc_string, | 537 | .proc_handler = &proc_ipc_dointvec, |
| 538 | .strategy = sysctl_ipc_data, | ||
| 536 | }, | 539 | }, |
| 537 | #endif | 540 | #endif |
| 538 | #ifdef CONFIG_MAGIC_SYSRQ | 541 | #ifdef CONFIG_MAGIC_SYSRQ |
| 539 | { | 542 | { |
| 540 | .ctl_name = KERN_SYSRQ, | 543 | .ctl_name = KERN_SYSRQ, |
| 541 | .procname = "sysrq", | 544 | .procname = "sysrq", |
| 542 | .data = &sysrq_enabled, | 545 | .data = &__sysrq_enabled, |
| 543 | .maxlen = sizeof (int), | 546 | .maxlen = sizeof (int), |
| 544 | .mode = 0644, | 547 | .mode = 0644, |
| 545 | .proc_handler = &proc_dointvec, | 548 | .proc_handler = &proc_dointvec, |
| @@ -1239,7 +1242,6 @@ int do_sysctl(int __user *name, int nlen, void __user *oldval, size_t __user *ol | |||
| 1239 | do { | 1242 | do { |
| 1240 | struct ctl_table_header *head = | 1243 | struct ctl_table_header *head = |
| 1241 | list_entry(tmp, struct ctl_table_header, ctl_entry); | 1244 | list_entry(tmp, struct ctl_table_header, ctl_entry); |
| 1242 | void *context = NULL; | ||
| 1243 | 1245 | ||
| 1244 | if (!use_table(head)) | 1246 | if (!use_table(head)) |
| 1245 | continue; | 1247 | continue; |
| @@ -1247,9 +1249,7 @@ int do_sysctl(int __user *name, int nlen, void __user *oldval, size_t __user *ol | |||
| 1247 | spin_unlock(&sysctl_lock); | 1249 | spin_unlock(&sysctl_lock); |
| 1248 | 1250 | ||
| 1249 | error = parse_table(name, nlen, oldval, oldlenp, | 1251 | error = parse_table(name, nlen, oldval, oldlenp, |
| 1250 | newval, newlen, head->ctl_table, | 1252 | newval, newlen, head->ctl_table); |
| 1251 | &context); | ||
| 1252 | kfree(context); | ||
| 1253 | 1253 | ||
| 1254 | spin_lock(&sysctl_lock); | 1254 | spin_lock(&sysctl_lock); |
| 1255 | unuse_table(head); | 1255 | unuse_table(head); |
| @@ -1305,7 +1305,7 @@ static inline int ctl_perm(ctl_table *table, int op) | |||
| 1305 | static int parse_table(int __user *name, int nlen, | 1305 | static int parse_table(int __user *name, int nlen, |
| 1306 | void __user *oldval, size_t __user *oldlenp, | 1306 | void __user *oldval, size_t __user *oldlenp, |
| 1307 | void __user *newval, size_t newlen, | 1307 | void __user *newval, size_t newlen, |
| 1308 | ctl_table *table, void **context) | 1308 | ctl_table *table) |
| 1309 | { | 1309 | { |
| 1310 | int n; | 1310 | int n; |
| 1311 | repeat: | 1311 | repeat: |
| @@ -1325,7 +1325,7 @@ repeat: | |||
| 1325 | error = table->strategy( | 1325 | error = table->strategy( |
| 1326 | table, name, nlen, | 1326 | table, name, nlen, |
| 1327 | oldval, oldlenp, | 1327 | oldval, oldlenp, |
| 1328 | newval, newlen, context); | 1328 | newval, newlen); |
| 1329 | if (error) | 1329 | if (error) |
| 1330 | return error; | 1330 | return error; |
| 1331 | } | 1331 | } |
| @@ -1336,7 +1336,7 @@ repeat: | |||
| 1336 | } | 1336 | } |
| 1337 | error = do_sysctl_strategy(table, name, nlen, | 1337 | error = do_sysctl_strategy(table, name, nlen, |
| 1338 | oldval, oldlenp, | 1338 | oldval, oldlenp, |
| 1339 | newval, newlen, context); | 1339 | newval, newlen); |
| 1340 | return error; | 1340 | return error; |
| 1341 | } | 1341 | } |
| 1342 | } | 1342 | } |
| @@ -1347,7 +1347,7 @@ repeat: | |||
| 1347 | int do_sysctl_strategy (ctl_table *table, | 1347 | int do_sysctl_strategy (ctl_table *table, |
| 1348 | int __user *name, int nlen, | 1348 | int __user *name, int nlen, |
| 1349 | void __user *oldval, size_t __user *oldlenp, | 1349 | void __user *oldval, size_t __user *oldlenp, |
| 1350 | void __user *newval, size_t newlen, void **context) | 1350 | void __user *newval, size_t newlen) |
| 1351 | { | 1351 | { |
| 1352 | int op = 0, rc; | 1352 | int op = 0, rc; |
| 1353 | size_t len; | 1353 | size_t len; |
| @@ -1361,7 +1361,7 @@ int do_sysctl_strategy (ctl_table *table, | |||
| 1361 | 1361 | ||
| 1362 | if (table->strategy) { | 1362 | if (table->strategy) { |
| 1363 | rc = table->strategy(table, name, nlen, oldval, oldlenp, | 1363 | rc = table->strategy(table, name, nlen, oldval, oldlenp, |
| 1364 | newval, newlen, context); | 1364 | newval, newlen); |
| 1365 | if (rc < 0) | 1365 | if (rc < 0) |
| 1366 | return rc; | 1366 | return rc; |
| 1367 | if (rc > 0) | 1367 | if (rc > 0) |
| @@ -1614,7 +1614,7 @@ static ssize_t do_rw_proc(int write, struct file * file, char __user * buf, | |||
| 1614 | size_t count, loff_t *ppos) | 1614 | size_t count, loff_t *ppos) |
| 1615 | { | 1615 | { |
| 1616 | int op; | 1616 | int op; |
| 1617 | struct proc_dir_entry *de = PDE(file->f_dentry->d_inode); | 1617 | struct proc_dir_entry *de = PDE(file->f_path.dentry->d_inode); |
| 1618 | struct ctl_table *table; | 1618 | struct ctl_table *table; |
| 1619 | size_t res; | 1619 | size_t res; |
| 1620 | ssize_t error = -ENOTDIR; | 1620 | ssize_t error = -ENOTDIR; |
| @@ -1753,66 +1753,17 @@ int proc_dostring(ctl_table *table, int write, struct file *filp, | |||
| 1753 | * Special case of dostring for the UTS structure. This has locks | 1753 | * Special case of dostring for the UTS structure. This has locks |
| 1754 | * to observe. Should this be in kernel/sys.c ???? | 1754 | * to observe. Should this be in kernel/sys.c ???? |
| 1755 | */ | 1755 | */ |
| 1756 | |||
| 1757 | #ifndef CONFIG_UTS_NS | ||
| 1758 | static int proc_do_uts_string(ctl_table *table, int write, struct file *filp, | ||
| 1759 | void __user *buffer, size_t *lenp, loff_t *ppos) | ||
| 1760 | { | ||
| 1761 | int r; | ||
| 1762 | 1756 | ||
| 1763 | if (!write) { | ||
| 1764 | down_read(&uts_sem); | ||
| 1765 | r=proc_dostring(table,0,filp,buffer,lenp, ppos); | ||
| 1766 | up_read(&uts_sem); | ||
| 1767 | } else { | ||
| 1768 | down_write(&uts_sem); | ||
| 1769 | r=proc_dostring(table,1,filp,buffer,lenp, ppos); | ||
| 1770 | up_write(&uts_sem); | ||
| 1771 | } | ||
| 1772 | return r; | ||
| 1773 | } | ||
| 1774 | #else /* !CONFIG_UTS_NS */ | ||
| 1775 | static int proc_do_uts_string(ctl_table *table, int write, struct file *filp, | 1757 | static int proc_do_uts_string(ctl_table *table, int write, struct file *filp, |
| 1776 | void __user *buffer, size_t *lenp, loff_t *ppos) | 1758 | void __user *buffer, size_t *lenp, loff_t *ppos) |
| 1777 | { | 1759 | { |
| 1778 | int r; | 1760 | int r; |
| 1779 | struct uts_namespace* uts_ns = current->nsproxy->uts_ns; | 1761 | void *which; |
| 1780 | char* which; | 1762 | which = get_uts(table, write); |
| 1781 | 1763 | r = _proc_do_string(which, table->maxlen,write,filp,buffer,lenp, ppos); | |
| 1782 | switch (table->ctl_name) { | 1764 | put_uts(table, write, which); |
| 1783 | case KERN_OSTYPE: | ||
| 1784 | which = uts_ns->name.sysname; | ||
| 1785 | break; | ||
| 1786 | case KERN_NODENAME: | ||
| 1787 | which = uts_ns->name.nodename; | ||
| 1788 | break; | ||
| 1789 | case KERN_OSRELEASE: | ||
| 1790 | which = uts_ns->name.release; | ||
| 1791 | break; | ||
| 1792 | case KERN_VERSION: | ||
| 1793 | which = uts_ns->name.version; | ||
| 1794 | break; | ||
| 1795 | case KERN_DOMAINNAME: | ||
| 1796 | which = uts_ns->name.domainname; | ||
| 1797 | break; | ||
| 1798 | default: | ||
| 1799 | r = -EINVAL; | ||
| 1800 | goto out; | ||
| 1801 | } | ||
| 1802 | |||
| 1803 | if (!write) { | ||
| 1804 | down_read(&uts_sem); | ||
| 1805 | r=_proc_do_string(which,table->maxlen,0,filp,buffer,lenp, ppos); | ||
| 1806 | up_read(&uts_sem); | ||
| 1807 | } else { | ||
| 1808 | down_write(&uts_sem); | ||
| 1809 | r=_proc_do_string(which,table->maxlen,1,filp,buffer,lenp, ppos); | ||
| 1810 | up_write(&uts_sem); | ||
| 1811 | } | ||
| 1812 | out: | ||
| 1813 | return r; | 1765 | return r; |
| 1814 | } | 1766 | } |
| 1815 | #endif /* !CONFIG_UTS_NS */ | ||
| 1816 | 1767 | ||
| 1817 | static int do_proc_dointvec_conv(int *negp, unsigned long *lvalp, | 1768 | static int do_proc_dointvec_conv(int *negp, unsigned long *lvalp, |
| 1818 | int *valp, | 1769 | int *valp, |
| @@ -1976,9 +1927,6 @@ int proc_dointvec(ctl_table *table, int write, struct file *filp, | |||
| 1976 | 1927 | ||
| 1977 | #define OP_SET 0 | 1928 | #define OP_SET 0 |
| 1978 | #define OP_AND 1 | 1929 | #define OP_AND 1 |
| 1979 | #define OP_OR 2 | ||
| 1980 | #define OP_MAX 3 | ||
| 1981 | #define OP_MIN 4 | ||
| 1982 | 1930 | ||
| 1983 | static int do_proc_dointvec_bset_conv(int *negp, unsigned long *lvalp, | 1931 | static int do_proc_dointvec_bset_conv(int *negp, unsigned long *lvalp, |
| 1984 | int *valp, | 1932 | int *valp, |
| @@ -1990,13 +1938,6 @@ static int do_proc_dointvec_bset_conv(int *negp, unsigned long *lvalp, | |||
| 1990 | switch(op) { | 1938 | switch(op) { |
| 1991 | case OP_SET: *valp = val; break; | 1939 | case OP_SET: *valp = val; break; |
| 1992 | case OP_AND: *valp &= val; break; | 1940 | case OP_AND: *valp &= val; break; |
| 1993 | case OP_OR: *valp |= val; break; | ||
| 1994 | case OP_MAX: if(*valp < val) | ||
| 1995 | *valp = val; | ||
| 1996 | break; | ||
| 1997 | case OP_MIN: if(*valp > val) | ||
| 1998 | *valp = val; | ||
| 1999 | break; | ||
| 2000 | } | 1941 | } |
| 2001 | } else { | 1942 | } else { |
| 2002 | int val = *valp; | 1943 | int val = *valp; |
| @@ -2391,46 +2332,24 @@ int proc_dointvec_ms_jiffies(ctl_table *table, int write, struct file *filp, | |||
| 2391 | } | 2332 | } |
| 2392 | 2333 | ||
| 2393 | #ifdef CONFIG_SYSVIPC | 2334 | #ifdef CONFIG_SYSVIPC |
| 2394 | static int proc_do_ipc_string(ctl_table *table, int write, struct file *filp, | 2335 | static int proc_ipc_dointvec(ctl_table *table, int write, struct file *filp, |
| 2395 | void __user *buffer, size_t *lenp, loff_t *ppos) | 2336 | void __user *buffer, size_t *lenp, loff_t *ppos) |
| 2396 | { | 2337 | { |
| 2397 | void *data; | 2338 | void *which; |
| 2398 | struct ipc_namespace *ns; | 2339 | which = get_ipc(table, write); |
| 2399 | 2340 | return __do_proc_dointvec(which, table, write, filp, buffer, | |
| 2400 | ns = current->nsproxy->ipc_ns; | ||
| 2401 | |||
| 2402 | switch (table->ctl_name) { | ||
| 2403 | case KERN_SHMMAX: | ||
| 2404 | data = &ns->shm_ctlmax; | ||
| 2405 | goto proc_minmax; | ||
| 2406 | case KERN_SHMALL: | ||
| 2407 | data = &ns->shm_ctlall; | ||
| 2408 | goto proc_minmax; | ||
| 2409 | case KERN_SHMMNI: | ||
| 2410 | data = &ns->shm_ctlmni; | ||
| 2411 | break; | ||
| 2412 | case KERN_MSGMAX: | ||
| 2413 | data = &ns->msg_ctlmax; | ||
| 2414 | break; | ||
| 2415 | case KERN_MSGMNI: | ||
| 2416 | data = &ns->msg_ctlmni; | ||
| 2417 | break; | ||
| 2418 | case KERN_MSGMNB: | ||
| 2419 | data = &ns->msg_ctlmnb; | ||
| 2420 | break; | ||
| 2421 | case KERN_SEM: | ||
| 2422 | data = &ns->sem_ctls; | ||
| 2423 | break; | ||
| 2424 | default: | ||
| 2425 | return -EINVAL; | ||
| 2426 | } | ||
| 2427 | |||
| 2428 | return __do_proc_dointvec(data, table, write, filp, buffer, | ||
| 2429 | lenp, ppos, NULL, NULL); | 2341 | lenp, ppos, NULL, NULL); |
| 2430 | proc_minmax: | 2342 | } |
| 2431 | return __do_proc_doulongvec_minmax(data, table, write, filp, buffer, | 2343 | |
| 2344 | static int proc_ipc_doulongvec_minmax(ctl_table *table, int write, | ||
| 2345 | struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos) | ||
| 2346 | { | ||
| 2347 | void *which; | ||
| 2348 | which = get_ipc(table, write); | ||
| 2349 | return __do_proc_doulongvec_minmax(which, table, write, filp, buffer, | ||
| 2432 | lenp, ppos, 1l, 1l); | 2350 | lenp, ppos, 1l, 1l); |
| 2433 | } | 2351 | } |
| 2352 | |||
| 2434 | #endif | 2353 | #endif |
| 2435 | 2354 | ||
| 2436 | static int proc_do_cad_pid(ctl_table *table, int write, struct file *filp, | 2355 | static int proc_do_cad_pid(ctl_table *table, int write, struct file *filp, |
| @@ -2475,6 +2394,17 @@ static int proc_do_ipc_string(ctl_table *table, int write, struct file *filp, | |||
| 2475 | { | 2394 | { |
| 2476 | return -ENOSYS; | 2395 | return -ENOSYS; |
| 2477 | } | 2396 | } |
| 2397 | static int proc_ipc_dointvec(ctl_table *table, int write, struct file *filp, | ||
| 2398 | void __user *buffer, size_t *lenp, loff_t *ppos) | ||
| 2399 | { | ||
| 2400 | return -ENOSYS; | ||
| 2401 | } | ||
| 2402 | static int proc_ipc_doulongvec_minmax(ctl_table *table, int write, | ||
| 2403 | struct file *filp, void __user *buffer, | ||
| 2404 | size_t *lenp, loff_t *ppos) | ||
| 2405 | { | ||
| 2406 | return -ENOSYS; | ||
| 2407 | } | ||
| 2478 | #endif | 2408 | #endif |
| 2479 | 2409 | ||
| 2480 | int proc_dointvec(ctl_table *table, int write, struct file *filp, | 2410 | int proc_dointvec(ctl_table *table, int write, struct file *filp, |
| @@ -2539,7 +2469,7 @@ int proc_doulongvec_ms_jiffies_minmax(ctl_table *table, int write, | |||
| 2539 | /* The generic string strategy routine: */ | 2469 | /* The generic string strategy routine: */ |
| 2540 | int sysctl_string(ctl_table *table, int __user *name, int nlen, | 2470 | int sysctl_string(ctl_table *table, int __user *name, int nlen, |
| 2541 | void __user *oldval, size_t __user *oldlenp, | 2471 | void __user *oldval, size_t __user *oldlenp, |
| 2542 | void __user *newval, size_t newlen, void **context) | 2472 | void __user *newval, size_t newlen) |
| 2543 | { | 2473 | { |
| 2544 | if (!table->data || !table->maxlen) | 2474 | if (!table->data || !table->maxlen) |
| 2545 | return -ENOTDIR; | 2475 | return -ENOTDIR; |
| @@ -2585,7 +2515,7 @@ int sysctl_string(ctl_table *table, int __user *name, int nlen, | |||
| 2585 | */ | 2515 | */ |
| 2586 | int sysctl_intvec(ctl_table *table, int __user *name, int nlen, | 2516 | int sysctl_intvec(ctl_table *table, int __user *name, int nlen, |
| 2587 | void __user *oldval, size_t __user *oldlenp, | 2517 | void __user *oldval, size_t __user *oldlenp, |
| 2588 | void __user *newval, size_t newlen, void **context) | 2518 | void __user *newval, size_t newlen) |
| 2589 | { | 2519 | { |
| 2590 | 2520 | ||
| 2591 | if (newval && newlen) { | 2521 | if (newval && newlen) { |
| @@ -2621,7 +2551,7 @@ int sysctl_intvec(ctl_table *table, int __user *name, int nlen, | |||
| 2621 | /* Strategy function to convert jiffies to seconds */ | 2551 | /* Strategy function to convert jiffies to seconds */ |
| 2622 | int sysctl_jiffies(ctl_table *table, int __user *name, int nlen, | 2552 | int sysctl_jiffies(ctl_table *table, int __user *name, int nlen, |
| 2623 | void __user *oldval, size_t __user *oldlenp, | 2553 | void __user *oldval, size_t __user *oldlenp, |
| 2624 | void __user *newval, size_t newlen, void **context) | 2554 | void __user *newval, size_t newlen) |
| 2625 | { | 2555 | { |
| 2626 | if (oldval) { | 2556 | if (oldval) { |
| 2627 | size_t olen; | 2557 | size_t olen; |
| @@ -2649,7 +2579,7 @@ int sysctl_jiffies(ctl_table *table, int __user *name, int nlen, | |||
| 2649 | /* Strategy function to convert jiffies to seconds */ | 2579 | /* Strategy function to convert jiffies to seconds */ |
| 2650 | int sysctl_ms_jiffies(ctl_table *table, int __user *name, int nlen, | 2580 | int sysctl_ms_jiffies(ctl_table *table, int __user *name, int nlen, |
| 2651 | void __user *oldval, size_t __user *oldlenp, | 2581 | void __user *oldval, size_t __user *oldlenp, |
| 2652 | void __user *newval, size_t newlen, void **context) | 2582 | void __user *newval, size_t newlen) |
| 2653 | { | 2583 | { |
| 2654 | if (oldval) { | 2584 | if (oldval) { |
| 2655 | size_t olen; | 2585 | size_t olen; |
| @@ -2674,6 +2604,64 @@ int sysctl_ms_jiffies(ctl_table *table, int __user *name, int nlen, | |||
| 2674 | return 1; | 2604 | return 1; |
| 2675 | } | 2605 | } |
| 2676 | 2606 | ||
| 2607 | |||
| 2608 | /* The generic string strategy routine: */ | ||
| 2609 | static int sysctl_uts_string(ctl_table *table, int __user *name, int nlen, | ||
| 2610 | void __user *oldval, size_t __user *oldlenp, | ||
| 2611 | void __user *newval, size_t newlen) | ||
| 2612 | { | ||
| 2613 | struct ctl_table uts_table; | ||
| 2614 | int r, write; | ||
| 2615 | write = newval && newlen; | ||
| 2616 | memcpy(&uts_table, table, sizeof(uts_table)); | ||
| 2617 | uts_table.data = get_uts(table, write); | ||
| 2618 | r = sysctl_string(&uts_table, name, nlen, | ||
| 2619 | oldval, oldlenp, newval, newlen); | ||
| 2620 | put_uts(table, write, uts_table.data); | ||
| 2621 | return r; | ||
| 2622 | } | ||
| 2623 | |||
| 2624 | #ifdef CONFIG_SYSVIPC | ||
| 2625 | /* The generic sysctl ipc data routine. */ | ||
| 2626 | static int sysctl_ipc_data(ctl_table *table, int __user *name, int nlen, | ||
| 2627 | void __user *oldval, size_t __user *oldlenp, | ||
| 2628 | void __user *newval, size_t newlen) | ||
| 2629 | { | ||
| 2630 | size_t len; | ||
| 2631 | void *data; | ||
| 2632 | |||
| 2633 | /* Get out of I don't have a variable */ | ||
| 2634 | if (!table->data || !table->maxlen) | ||
| 2635 | return -ENOTDIR; | ||
| 2636 | |||
| 2637 | data = get_ipc(table, 1); | ||
| 2638 | if (!data) | ||
| 2639 | return -ENOTDIR; | ||
| 2640 | |||
| 2641 | if (oldval && oldlenp) { | ||
| 2642 | if (get_user(len, oldlenp)) | ||
| 2643 | return -EFAULT; | ||
| 2644 | if (len) { | ||
| 2645 | if (len > table->maxlen) | ||
| 2646 | len = table->maxlen; | ||
| 2647 | if (copy_to_user(oldval, data, len)) | ||
| 2648 | return -EFAULT; | ||
| 2649 | if (put_user(len, oldlenp)) | ||
| 2650 | return -EFAULT; | ||
| 2651 | } | ||
| 2652 | } | ||
| 2653 | |||
| 2654 | if (newval && newlen) { | ||
| 2655 | if (newlen > table->maxlen) | ||
| 2656 | newlen = table->maxlen; | ||
| 2657 | |||
| 2658 | if (copy_from_user(data, newval, newlen)) | ||
| 2659 | return -EFAULT; | ||
| 2660 | } | ||
| 2661 | return 1; | ||
| 2662 | } | ||
| 2663 | #endif | ||
| 2664 | |||
| 2677 | #else /* CONFIG_SYSCTL_SYSCALL */ | 2665 | #else /* CONFIG_SYSCTL_SYSCALL */ |
| 2678 | 2666 | ||
| 2679 | 2667 | ||
| @@ -2712,32 +2700,44 @@ out: | |||
| 2712 | 2700 | ||
| 2713 | int sysctl_string(ctl_table *table, int __user *name, int nlen, | 2701 | int sysctl_string(ctl_table *table, int __user *name, int nlen, |
| 2714 | void __user *oldval, size_t __user *oldlenp, | 2702 | void __user *oldval, size_t __user *oldlenp, |
| 2715 | void __user *newval, size_t newlen, void **context) | 2703 | void __user *newval, size_t newlen) |
| 2716 | { | 2704 | { |
| 2717 | return -ENOSYS; | 2705 | return -ENOSYS; |
| 2718 | } | 2706 | } |
| 2719 | 2707 | ||
| 2720 | int sysctl_intvec(ctl_table *table, int __user *name, int nlen, | 2708 | int sysctl_intvec(ctl_table *table, int __user *name, int nlen, |
| 2721 | void __user *oldval, size_t __user *oldlenp, | 2709 | void __user *oldval, size_t __user *oldlenp, |
| 2722 | void __user *newval, size_t newlen, void **context) | 2710 | void __user *newval, size_t newlen) |
| 2723 | { | 2711 | { |
| 2724 | return -ENOSYS; | 2712 | return -ENOSYS; |
| 2725 | } | 2713 | } |
| 2726 | 2714 | ||
| 2727 | int sysctl_jiffies(ctl_table *table, int __user *name, int nlen, | 2715 | int sysctl_jiffies(ctl_table *table, int __user *name, int nlen, |
| 2728 | void __user *oldval, size_t __user *oldlenp, | 2716 | void __user *oldval, size_t __user *oldlenp, |
| 2729 | void __user *newval, size_t newlen, void **context) | 2717 | void __user *newval, size_t newlen) |
| 2730 | { | 2718 | { |
| 2731 | return -ENOSYS; | 2719 | return -ENOSYS; |
| 2732 | } | 2720 | } |
| 2733 | 2721 | ||
| 2734 | int sysctl_ms_jiffies(ctl_table *table, int __user *name, int nlen, | 2722 | int sysctl_ms_jiffies(ctl_table *table, int __user *name, int nlen, |
| 2735 | void __user *oldval, size_t __user *oldlenp, | 2723 | void __user *oldval, size_t __user *oldlenp, |
| 2736 | void __user *newval, size_t newlen, void **context) | 2724 | void __user *newval, size_t newlen) |
| 2737 | { | 2725 | { |
| 2738 | return -ENOSYS; | 2726 | return -ENOSYS; |
| 2739 | } | 2727 | } |
| 2740 | 2728 | ||
| 2729 | static int sysctl_uts_string(ctl_table *table, int __user *name, int nlen, | ||
| 2730 | void __user *oldval, size_t __user *oldlenp, | ||
| 2731 | void __user *newval, size_t newlen) | ||
| 2732 | { | ||
| 2733 | return -ENOSYS; | ||
| 2734 | } | ||
| 2735 | static int sysctl_ipc_data(ctl_table *table, int __user *name, int nlen, | ||
| 2736 | void __user *oldval, size_t __user *oldlenp, | ||
| 2737 | void __user *newval, size_t newlen) | ||
| 2738 | { | ||
| 2739 | return -ENOSYS; | ||
| 2740 | } | ||
| 2741 | #endif /* CONFIG_SYSCTL_SYSCALL */ | 2741 | #endif /* CONFIG_SYSCTL_SYSCALL */ |
| 2742 | 2742 | ||
| 2743 | /* | 2743 | /* |
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index 74eca5939bd9..22504afc0d34 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c | |||
| @@ -156,7 +156,7 @@ int clocksource_register(struct clocksource *c) | |||
| 156 | /* check if clocksource is already registered */ | 156 | /* check if clocksource is already registered */ |
| 157 | if (is_registered_source(c)) { | 157 | if (is_registered_source(c)) { |
| 158 | printk("register_clocksource: Cannot register %s. " | 158 | printk("register_clocksource: Cannot register %s. " |
| 159 | "Already registered!", c->name); | 159 | "Already registered!", c->name); |
| 160 | ret = -EBUSY; | 160 | ret = -EBUSY; |
| 161 | } else { | 161 | } else { |
| 162 | /* register it */ | 162 | /* register it */ |
| @@ -186,6 +186,7 @@ void clocksource_reselect(void) | |||
| 186 | } | 186 | } |
| 187 | EXPORT_SYMBOL(clocksource_reselect); | 187 | EXPORT_SYMBOL(clocksource_reselect); |
| 188 | 188 | ||
| 189 | #ifdef CONFIG_SYSFS | ||
| 189 | /** | 190 | /** |
| 190 | * sysfs_show_current_clocksources - sysfs interface for current clocksource | 191 | * sysfs_show_current_clocksources - sysfs interface for current clocksource |
| 191 | * @dev: unused | 192 | * @dev: unused |
| @@ -275,10 +276,10 @@ sysfs_show_available_clocksources(struct sys_device *dev, char *buf) | |||
| 275 | * Sysfs setup bits: | 276 | * Sysfs setup bits: |
| 276 | */ | 277 | */ |
| 277 | static SYSDEV_ATTR(current_clocksource, 0600, sysfs_show_current_clocksources, | 278 | static SYSDEV_ATTR(current_clocksource, 0600, sysfs_show_current_clocksources, |
| 278 | sysfs_override_clocksource); | 279 | sysfs_override_clocksource); |
| 279 | 280 | ||
| 280 | static SYSDEV_ATTR(available_clocksource, 0600, | 281 | static SYSDEV_ATTR(available_clocksource, 0600, |
| 281 | sysfs_show_available_clocksources, NULL); | 282 | sysfs_show_available_clocksources, NULL); |
| 282 | 283 | ||
| 283 | static struct sysdev_class clocksource_sysclass = { | 284 | static struct sysdev_class clocksource_sysclass = { |
| 284 | set_kset_name("clocksource"), | 285 | set_kset_name("clocksource"), |
| @@ -307,6 +308,7 @@ static int __init init_clocksource_sysfs(void) | |||
| 307 | } | 308 | } |
| 308 | 309 | ||
| 309 | device_initcall(init_clocksource_sysfs); | 310 | device_initcall(init_clocksource_sysfs); |
| 311 | #endif /* CONFIG_SYSFS */ | ||
| 310 | 312 | ||
| 311 | /** | 313 | /** |
| 312 | * boot_override_clocksource - boot clock override | 314 | * boot_override_clocksource - boot clock override |
diff --git a/kernel/timer.c b/kernel/timer.c index c1c7fbcffec1..feddf817baa5 100644 --- a/kernel/timer.c +++ b/kernel/timer.c | |||
| @@ -80,6 +80,138 @@ tvec_base_t boot_tvec_bases; | |||
| 80 | EXPORT_SYMBOL(boot_tvec_bases); | 80 | EXPORT_SYMBOL(boot_tvec_bases); |
| 81 | static DEFINE_PER_CPU(tvec_base_t *, tvec_bases) = &boot_tvec_bases; | 81 | static DEFINE_PER_CPU(tvec_base_t *, tvec_bases) = &boot_tvec_bases; |
| 82 | 82 | ||
| 83 | /** | ||
| 84 | * __round_jiffies - function to round jiffies to a full second | ||
| 85 | * @j: the time in (absolute) jiffies that should be rounded | ||
| 86 | * @cpu: the processor number on which the timeout will happen | ||
| 87 | * | ||
| 88 | * __round_jiffies rounds an absolute time in the future (in jiffies) | ||
| 89 | * up or down to (approximately) full seconds. This is useful for timers | ||
| 90 | * for which the exact time they fire does not matter too much, as long as | ||
| 91 | * they fire approximately every X seconds. | ||
| 92 | * | ||
| 93 | * By rounding these timers to whole seconds, all such timers will fire | ||
| 94 | * at the same time, rather than at various times spread out. The goal | ||
| 95 | * of this is to have the CPU wake up less, which saves power. | ||
| 96 | * | ||
| 97 | * The exact rounding is skewed for each processor to avoid all | ||
| 98 | * processors firing at the exact same time, which could lead | ||
| 99 | * to lock contention or spurious cache line bouncing. | ||
| 100 | * | ||
| 101 | * The return value is the rounded version of the "j" parameter. | ||
| 102 | */ | ||
| 103 | unsigned long __round_jiffies(unsigned long j, int cpu) | ||
| 104 | { | ||
| 105 | int rem; | ||
| 106 | unsigned long original = j; | ||
| 107 | |||
| 108 | /* | ||
| 109 | * We don't want all cpus firing their timers at once hitting the | ||
| 110 | * same lock or cachelines, so we skew each extra cpu with an extra | ||
| 111 | * 3 jiffies. This 3 jiffies came originally from the mm/ code which | ||
| 112 | * already did this. | ||
| 113 | * The skew is done by adding 3*cpunr, then round, then subtract this | ||
| 114 | * extra offset again. | ||
| 115 | */ | ||
| 116 | j += cpu * 3; | ||
| 117 | |||
| 118 | rem = j % HZ; | ||
| 119 | |||
| 120 | /* | ||
| 121 | * If the target jiffie is just after a whole second (which can happen | ||
| 122 | * due to delays of the timer irq, long irq off times etc etc) then | ||
| 123 | * we should round down to the whole second, not up. Use 1/4th second | ||
| 124 | * as cutoff for this rounding as an extreme upper bound for this. | ||
| 125 | */ | ||
| 126 | if (rem < HZ/4) /* round down */ | ||
| 127 | j = j - rem; | ||
| 128 | else /* round up */ | ||
| 129 | j = j - rem + HZ; | ||
| 130 | |||
| 131 | /* now that we have rounded, subtract the extra skew again */ | ||
| 132 | j -= cpu * 3; | ||
| 133 | |||
| 134 | if (j <= jiffies) /* rounding ate our timeout entirely; */ | ||
| 135 | return original; | ||
| 136 | return j; | ||
| 137 | } | ||
| 138 | EXPORT_SYMBOL_GPL(__round_jiffies); | ||
| 139 | |||
| 140 | /** | ||
| 141 | * __round_jiffies_relative - function to round jiffies to a full second | ||
| 142 | * @j: the time in (relative) jiffies that should be rounded | ||
| 143 | * @cpu: the processor number on which the timeout will happen | ||
| 144 | * | ||
| 145 | * __round_jiffies_relative rounds a time delta in the future (in jiffies) | ||
| 146 | * up or down to (approximately) full seconds. This is useful for timers | ||
| 147 | * for which the exact time they fire does not matter too much, as long as | ||
| 148 | * they fire approximately every X seconds. | ||
| 149 | * | ||
| 150 | * By rounding these timers to whole seconds, all such timers will fire | ||
| 151 | * at the same time, rather than at various times spread out. The goal | ||
| 152 | * of this is to have the CPU wake up less, which saves power. | ||
| 153 | * | ||
| 154 | * The exact rounding is skewed for each processor to avoid all | ||
| 155 | * processors firing at the exact same time, which could lead | ||
| 156 | * to lock contention or spurious cache line bouncing. | ||
| 157 | * | ||
| 158 | * The return value is the rounded version of the "j" parameter. | ||
| 159 | */ | ||
| 160 | unsigned long __round_jiffies_relative(unsigned long j, int cpu) | ||
| 161 | { | ||
| 162 | /* | ||
| 163 | * In theory the following code can skip a jiffy in case jiffies | ||
| 164 | * increments right between the addition and the later subtraction. | ||
| 165 | * However since the entire point of this function is to use approximate | ||
| 166 | * timeouts, it's entirely ok to not handle that. | ||
| 167 | */ | ||
| 168 | return __round_jiffies(j + jiffies, cpu) - jiffies; | ||
| 169 | } | ||
| 170 | EXPORT_SYMBOL_GPL(__round_jiffies_relative); | ||
| 171 | |||
| 172 | /** | ||
| 173 | * round_jiffies - function to round jiffies to a full second | ||
| 174 | * @j: the time in (absolute) jiffies that should be rounded | ||
| 175 | * | ||
| 176 | * round_jiffies rounds an absolute time in the future (in jiffies) | ||
| 177 | * up or down to (approximately) full seconds. This is useful for timers | ||
| 178 | * for which the exact time they fire does not matter too much, as long as | ||
| 179 | * they fire approximately every X seconds. | ||
| 180 | * | ||
| 181 | * By rounding these timers to whole seconds, all such timers will fire | ||
| 182 | * at the same time, rather than at various times spread out. The goal | ||
| 183 | * of this is to have the CPU wake up less, which saves power. | ||
| 184 | * | ||
| 185 | * The return value is the rounded version of the "j" parameter. | ||
| 186 | */ | ||
| 187 | unsigned long round_jiffies(unsigned long j) | ||
| 188 | { | ||
| 189 | return __round_jiffies(j, raw_smp_processor_id()); | ||
| 190 | } | ||
| 191 | EXPORT_SYMBOL_GPL(round_jiffies); | ||
| 192 | |||
| 193 | /** | ||
| 194 | * round_jiffies_relative - function to round jiffies to a full second | ||
| 195 | * @j: the time in (relative) jiffies that should be rounded | ||
| 196 | * | ||
| 197 | * round_jiffies_relative rounds a time delta in the future (in jiffies) | ||
| 198 | * up or down to (approximately) full seconds. This is useful for timers | ||
| 199 | * for which the exact time they fire does not matter too much, as long as | ||
| 200 | * they fire approximately every X seconds. | ||
| 201 | * | ||
| 202 | * By rounding these timers to whole seconds, all such timers will fire | ||
| 203 | * at the same time, rather than at various times spread out. The goal | ||
| 204 | * of this is to have the CPU wake up less, which saves power. | ||
| 205 | * | ||
| 206 | * The return value is the rounded version of the "j" parameter. | ||
| 207 | */ | ||
| 208 | unsigned long round_jiffies_relative(unsigned long j) | ||
| 209 | { | ||
| 210 | return __round_jiffies_relative(j, raw_smp_processor_id()); | ||
| 211 | } | ||
| 212 | EXPORT_SYMBOL_GPL(round_jiffies_relative); | ||
| 213 | |||
| 214 | |||
| 83 | static inline void set_running_timer(tvec_base_t *base, | 215 | static inline void set_running_timer(tvec_base_t *base, |
| 84 | struct timer_list *timer) | 216 | struct timer_list *timer) |
| 85 | { | 217 | { |
| @@ -714,7 +846,7 @@ static int change_clocksource(void) | |||
| 714 | clock = new; | 846 | clock = new; |
| 715 | clock->cycle_last = now; | 847 | clock->cycle_last = now; |
| 716 | printk(KERN_INFO "Time: %s clocksource has been installed.\n", | 848 | printk(KERN_INFO "Time: %s clocksource has been installed.\n", |
| 717 | clock->name); | 849 | clock->name); |
| 718 | return 1; | 850 | return 1; |
| 719 | } else if (clock->update_callback) { | 851 | } else if (clock->update_callback) { |
| 720 | return clock->update_callback(); | 852 | return clock->update_callback(); |
| @@ -722,7 +854,10 @@ static int change_clocksource(void) | |||
| 722 | return 0; | 854 | return 0; |
| 723 | } | 855 | } |
| 724 | #else | 856 | #else |
| 725 | #define change_clocksource() (0) | 857 | static inline int change_clocksource(void) |
| 858 | { | ||
| 859 | return 0; | ||
| 860 | } | ||
| 726 | #endif | 861 | #endif |
| 727 | 862 | ||
| 728 | /** | 863 | /** |
| @@ -820,7 +955,8 @@ device_initcall(timekeeping_init_device); | |||
| 820 | * If the error is already larger, we look ahead even further | 955 | * If the error is already larger, we look ahead even further |
| 821 | * to compensate for late or lost adjustments. | 956 | * to compensate for late or lost adjustments. |
| 822 | */ | 957 | */ |
| 823 | static __always_inline int clocksource_bigadjust(s64 error, s64 *interval, s64 *offset) | 958 | static __always_inline int clocksource_bigadjust(s64 error, s64 *interval, |
| 959 | s64 *offset) | ||
| 824 | { | 960 | { |
| 825 | s64 tick_error, i; | 961 | s64 tick_error, i; |
| 826 | u32 look_ahead, adj; | 962 | u32 look_ahead, adj; |
| @@ -844,7 +980,8 @@ static __always_inline int clocksource_bigadjust(s64 error, s64 *interval, s64 * | |||
| 844 | * Now calculate the error in (1 << look_ahead) ticks, but first | 980 | * Now calculate the error in (1 << look_ahead) ticks, but first |
| 845 | * remove the single look ahead already included in the error. | 981 | * remove the single look ahead already included in the error. |
| 846 | */ | 982 | */ |
| 847 | tick_error = current_tick_length() >> (TICK_LENGTH_SHIFT - clock->shift + 1); | 983 | tick_error = current_tick_length() >> |
| 984 | (TICK_LENGTH_SHIFT - clock->shift + 1); | ||
| 848 | tick_error -= clock->xtime_interval >> 1; | 985 | tick_error -= clock->xtime_interval >> 1; |
| 849 | error = ((error - tick_error) >> look_ahead) + tick_error; | 986 | error = ((error - tick_error) >> look_ahead) + tick_error; |
| 850 | 987 | ||
| @@ -896,7 +1033,8 @@ static void clocksource_adjust(struct clocksource *clock, s64 offset) | |||
| 896 | clock->mult += adj; | 1033 | clock->mult += adj; |
| 897 | clock->xtime_interval += interval; | 1034 | clock->xtime_interval += interval; |
| 898 | clock->xtime_nsec -= offset; | 1035 | clock->xtime_nsec -= offset; |
| 899 | clock->error -= (interval - offset) << (TICK_LENGTH_SHIFT - clock->shift); | 1036 | clock->error -= (interval - offset) << |
| 1037 | (TICK_LENGTH_SHIFT - clock->shift); | ||
| 900 | } | 1038 | } |
| 901 | 1039 | ||
| 902 | /** | 1040 | /** |
| @@ -1008,11 +1146,15 @@ static inline void calc_load(unsigned long ticks) | |||
| 1008 | unsigned long active_tasks; /* fixed-point */ | 1146 | unsigned long active_tasks; /* fixed-point */ |
| 1009 | static int count = LOAD_FREQ; | 1147 | static int count = LOAD_FREQ; |
| 1010 | 1148 | ||
| 1011 | active_tasks = count_active_tasks(); | 1149 | count -= ticks; |
| 1012 | for (count -= ticks; count < 0; count += LOAD_FREQ) { | 1150 | if (unlikely(count < 0)) { |
| 1013 | CALC_LOAD(avenrun[0], EXP_1, active_tasks); | 1151 | active_tasks = count_active_tasks(); |
| 1014 | CALC_LOAD(avenrun[1], EXP_5, active_tasks); | 1152 | do { |
| 1015 | CALC_LOAD(avenrun[2], EXP_15, active_tasks); | 1153 | CALC_LOAD(avenrun[0], EXP_1, active_tasks); |
| 1154 | CALC_LOAD(avenrun[1], EXP_5, active_tasks); | ||
| 1155 | CALC_LOAD(avenrun[2], EXP_15, active_tasks); | ||
| 1156 | count += LOAD_FREQ; | ||
| 1157 | } while (count < 0); | ||
| 1016 | } | 1158 | } |
| 1017 | } | 1159 | } |
| 1018 | 1160 | ||
diff --git a/kernel/tsacct.c b/kernel/tsacct.c index 96f77013d3f0..baacc3691415 100644 --- a/kernel/tsacct.c +++ b/kernel/tsacct.c | |||
| @@ -96,6 +96,15 @@ void xacct_add_tsk(struct taskstats *stats, struct task_struct *p) | |||
| 96 | stats->write_char = p->wchar; | 96 | stats->write_char = p->wchar; |
| 97 | stats->read_syscalls = p->syscr; | 97 | stats->read_syscalls = p->syscr; |
| 98 | stats->write_syscalls = p->syscw; | 98 | stats->write_syscalls = p->syscw; |
| 99 | #ifdef CONFIG_TASK_IO_ACCOUNTING | ||
| 100 | stats->read_bytes = p->ioac.read_bytes; | ||
| 101 | stats->write_bytes = p->ioac.write_bytes; | ||
| 102 | stats->cancelled_write_bytes = p->ioac.cancelled_write_bytes; | ||
| 103 | #else | ||
| 104 | stats->read_bytes = 0; | ||
| 105 | stats->write_bytes = 0; | ||
| 106 | stats->cancelled_write_bytes = 0; | ||
| 107 | #endif | ||
| 99 | } | 108 | } |
| 100 | #undef KB | 109 | #undef KB |
| 101 | #undef MB | 110 | #undef MB |
diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 6b186750e9be..db49886bfae1 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c | |||
| @@ -85,22 +85,19 @@ static inline int is_single_threaded(struct workqueue_struct *wq) | |||
| 85 | return list_empty(&wq->list); | 85 | return list_empty(&wq->list); |
| 86 | } | 86 | } |
| 87 | 87 | ||
| 88 | /* | ||
| 89 | * Set the workqueue on which a work item is to be run | ||
| 90 | * - Must *only* be called if the pending flag is set | ||
| 91 | */ | ||
| 88 | static inline void set_wq_data(struct work_struct *work, void *wq) | 92 | static inline void set_wq_data(struct work_struct *work, void *wq) |
| 89 | { | 93 | { |
| 90 | unsigned long new, old, res; | 94 | unsigned long new; |
| 95 | |||
| 96 | BUG_ON(!work_pending(work)); | ||
| 91 | 97 | ||
| 92 | /* assume the pending flag is already set and that the task has already | ||
| 93 | * been queued on this workqueue */ | ||
| 94 | new = (unsigned long) wq | (1UL << WORK_STRUCT_PENDING); | 98 | new = (unsigned long) wq | (1UL << WORK_STRUCT_PENDING); |
| 95 | res = work->management; | 99 | new |= work->management & WORK_STRUCT_FLAG_MASK; |
| 96 | if (res != new) { | 100 | work->management = new; |
| 97 | do { | ||
| 98 | old = res; | ||
| 99 | new = (unsigned long) wq; | ||
| 100 | new |= (old & WORK_STRUCT_FLAG_MASK); | ||
| 101 | res = cmpxchg(&work->management, old, new); | ||
| 102 | } while (res != old); | ||
| 103 | } | ||
| 104 | } | 101 | } |
| 105 | 102 | ||
| 106 | static inline void *get_wq_data(struct work_struct *work) | 103 | static inline void *get_wq_data(struct work_struct *work) |
