diff options
author | Lachlan McIlroy <lachlan@redback.melbourne.sgi.com> | 2008-02-17 21:51:42 -0500 |
---|---|---|
committer | Lachlan McIlroy <lachlan@redback.melbourne.sgi.com> | 2008-02-17 21:51:42 -0500 |
commit | c58310bf4933986513020fa90b4190c7492995ae (patch) | |
tree | 143f2c7578d02ebef5db8fc57ae69e951ae0e2ee /kernel | |
parent | 269cdfaf769f5cd831284cc831790c7c5038040f (diff) | |
parent | 1309d4e68497184d2fd87e892ddf14076c2bda98 (diff) |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6 into for-linus
Diffstat (limited to 'kernel')
73 files changed, 3711 insertions, 2034 deletions
diff --git a/kernel/.gitignore b/kernel/.gitignore index f2ab70073bd4..ab4f1090f437 100644 --- a/kernel/.gitignore +++ b/kernel/.gitignore | |||
@@ -3,3 +3,4 @@ | |||
3 | # | 3 | # |
4 | config_data.h | 4 | config_data.h |
5 | config_data.gz | 5 | config_data.gz |
6 | timeconst.h | ||
diff --git a/kernel/Makefile b/kernel/Makefile index db9af707ff5b..6c584c55a6e9 100644 --- a/kernel/Makefile +++ b/kernel/Makefile | |||
@@ -4,12 +4,12 @@ | |||
4 | 4 | ||
5 | obj-y = sched.o fork.o exec_domain.o panic.o printk.o profile.o \ | 5 | obj-y = sched.o fork.o exec_domain.o panic.o printk.o profile.o \ |
6 | exit.o itimer.o time.o softirq.o resource.o \ | 6 | exit.o itimer.o time.o softirq.o resource.o \ |
7 | sysctl.o capability.o ptrace.o timer.o user.o user_namespace.o \ | 7 | sysctl.o capability.o ptrace.o timer.o user.o \ |
8 | signal.o sys.o kmod.o workqueue.o pid.o \ | 8 | signal.o sys.o kmod.o workqueue.o pid.o \ |
9 | rcupdate.o extable.o params.o posix-timers.o \ | 9 | rcupdate.o extable.o params.o posix-timers.o \ |
10 | kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \ | 10 | kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \ |
11 | hrtimer.o rwsem.o latency.o nsproxy.o srcu.o \ | 11 | hrtimer.o rwsem.o nsproxy.o srcu.o \ |
12 | utsname.o notifier.o ksysfs.o | 12 | notifier.o ksysfs.o pm_qos_params.o |
13 | 13 | ||
14 | obj-$(CONFIG_SYSCTL) += sysctl_check.o | 14 | obj-$(CONFIG_SYSCTL) += sysctl_check.o |
15 | obj-$(CONFIG_STACKTRACE) += stacktrace.o | 15 | obj-$(CONFIG_STACKTRACE) += stacktrace.o |
@@ -42,7 +42,11 @@ obj-$(CONFIG_CGROUPS) += cgroup.o | |||
42 | obj-$(CONFIG_CGROUP_DEBUG) += cgroup_debug.o | 42 | obj-$(CONFIG_CGROUP_DEBUG) += cgroup_debug.o |
43 | obj-$(CONFIG_CPUSETS) += cpuset.o | 43 | obj-$(CONFIG_CPUSETS) += cpuset.o |
44 | obj-$(CONFIG_CGROUP_NS) += ns_cgroup.o | 44 | obj-$(CONFIG_CGROUP_NS) += ns_cgroup.o |
45 | obj-$(CONFIG_UTS_NS) += utsname.o | ||
46 | obj-$(CONFIG_USER_NS) += user_namespace.o | ||
47 | obj-$(CONFIG_PID_NS) += pid_namespace.o | ||
45 | obj-$(CONFIG_IKCONFIG) += configs.o | 48 | obj-$(CONFIG_IKCONFIG) += configs.o |
49 | obj-$(CONFIG_RESOURCE_COUNTERS) += res_counter.o | ||
46 | obj-$(CONFIG_STOP_MACHINE) += stop_machine.o | 50 | obj-$(CONFIG_STOP_MACHINE) += stop_machine.o |
47 | obj-$(CONFIG_KPROBES_SANITY_TEST) += test_kprobes.o | 51 | obj-$(CONFIG_KPROBES_SANITY_TEST) += test_kprobes.o |
48 | obj-$(CONFIG_AUDIT) += audit.o auditfilter.o | 52 | obj-$(CONFIG_AUDIT) += audit.o auditfilter.o |
@@ -87,3 +91,11 @@ quiet_cmd_ikconfiggz = IKCFG $@ | |||
87 | targets += config_data.h | 91 | targets += config_data.h |
88 | $(obj)/config_data.h: $(obj)/config_data.gz FORCE | 92 | $(obj)/config_data.h: $(obj)/config_data.gz FORCE |
89 | $(call if_changed,ikconfiggz) | 93 | $(call if_changed,ikconfiggz) |
94 | |||
95 | $(obj)/time.o: $(obj)/timeconst.h | ||
96 | |||
97 | quiet_cmd_timeconst = TIMEC $@ | ||
98 | cmd_timeconst = $(PERL) $< $(CONFIG_HZ) > $@ | ||
99 | targets += timeconst.h | ||
100 | $(obj)/timeconst.h: $(src)/timeconst.pl FORCE | ||
101 | $(call if_changed,timeconst) | ||
diff --git a/kernel/audit.c b/kernel/audit.c index c8555b180213..2eeea9a14240 100644 --- a/kernel/audit.c +++ b/kernel/audit.c | |||
@@ -1312,26 +1312,26 @@ void audit_log_untrustedstring(struct audit_buffer *ab, const char *string) | |||
1312 | 1312 | ||
1313 | /* This is a helper-function to print the escaped d_path */ | 1313 | /* This is a helper-function to print the escaped d_path */ |
1314 | void audit_log_d_path(struct audit_buffer *ab, const char *prefix, | 1314 | void audit_log_d_path(struct audit_buffer *ab, const char *prefix, |
1315 | struct dentry *dentry, struct vfsmount *vfsmnt) | 1315 | struct path *path) |
1316 | { | 1316 | { |
1317 | char *p, *path; | 1317 | char *p, *pathname; |
1318 | 1318 | ||
1319 | if (prefix) | 1319 | if (prefix) |
1320 | audit_log_format(ab, " %s", prefix); | 1320 | audit_log_format(ab, " %s", prefix); |
1321 | 1321 | ||
1322 | /* We will allow 11 spaces for ' (deleted)' to be appended */ | 1322 | /* We will allow 11 spaces for ' (deleted)' to be appended */ |
1323 | path = kmalloc(PATH_MAX+11, ab->gfp_mask); | 1323 | pathname = kmalloc(PATH_MAX+11, ab->gfp_mask); |
1324 | if (!path) { | 1324 | if (!pathname) { |
1325 | audit_log_format(ab, "<no memory>"); | 1325 | audit_log_format(ab, "<no memory>"); |
1326 | return; | 1326 | return; |
1327 | } | 1327 | } |
1328 | p = d_path(dentry, vfsmnt, path, PATH_MAX+11); | 1328 | p = d_path(path, pathname, PATH_MAX+11); |
1329 | if (IS_ERR(p)) { /* Should never happen since we send PATH_MAX */ | 1329 | if (IS_ERR(p)) { /* Should never happen since we send PATH_MAX */ |
1330 | /* FIXME: can we save some information here? */ | 1330 | /* FIXME: can we save some information here? */ |
1331 | audit_log_format(ab, "<too long>"); | 1331 | audit_log_format(ab, "<too long>"); |
1332 | } else | 1332 | } else |
1333 | audit_log_untrustedstring(ab, p); | 1333 | audit_log_untrustedstring(ab, p); |
1334 | kfree(path); | 1334 | kfree(pathname); |
1335 | } | 1335 | } |
1336 | 1336 | ||
1337 | /** | 1337 | /** |
diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c index f4fcf58f20f8..9ef5e0aacc3c 100644 --- a/kernel/audit_tree.c +++ b/kernel/audit_tree.c | |||
@@ -549,8 +549,8 @@ void audit_trim_trees(void) | |||
549 | if (err) | 549 | if (err) |
550 | goto skip_it; | 550 | goto skip_it; |
551 | 551 | ||
552 | root_mnt = collect_mounts(nd.mnt, nd.dentry); | 552 | root_mnt = collect_mounts(nd.path.mnt, nd.path.dentry); |
553 | path_release(&nd); | 553 | path_put(&nd.path); |
554 | if (!root_mnt) | 554 | if (!root_mnt) |
555 | goto skip_it; | 555 | goto skip_it; |
556 | 556 | ||
@@ -583,17 +583,17 @@ skip_it: | |||
583 | static int is_under(struct vfsmount *mnt, struct dentry *dentry, | 583 | static int is_under(struct vfsmount *mnt, struct dentry *dentry, |
584 | struct nameidata *nd) | 584 | struct nameidata *nd) |
585 | { | 585 | { |
586 | if (mnt != nd->mnt) { | 586 | if (mnt != nd->path.mnt) { |
587 | for (;;) { | 587 | for (;;) { |
588 | if (mnt->mnt_parent == mnt) | 588 | if (mnt->mnt_parent == mnt) |
589 | return 0; | 589 | return 0; |
590 | if (mnt->mnt_parent == nd->mnt) | 590 | if (mnt->mnt_parent == nd->path.mnt) |
591 | break; | 591 | break; |
592 | mnt = mnt->mnt_parent; | 592 | mnt = mnt->mnt_parent; |
593 | } | 593 | } |
594 | dentry = mnt->mnt_mountpoint; | 594 | dentry = mnt->mnt_mountpoint; |
595 | } | 595 | } |
596 | return is_subdir(dentry, nd->dentry); | 596 | return is_subdir(dentry, nd->path.dentry); |
597 | } | 597 | } |
598 | 598 | ||
599 | int audit_make_tree(struct audit_krule *rule, char *pathname, u32 op) | 599 | int audit_make_tree(struct audit_krule *rule, char *pathname, u32 op) |
@@ -641,8 +641,8 @@ int audit_add_tree_rule(struct audit_krule *rule) | |||
641 | err = path_lookup(tree->pathname, 0, &nd); | 641 | err = path_lookup(tree->pathname, 0, &nd); |
642 | if (err) | 642 | if (err) |
643 | goto Err; | 643 | goto Err; |
644 | mnt = collect_mounts(nd.mnt, nd.dentry); | 644 | mnt = collect_mounts(nd.path.mnt, nd.path.dentry); |
645 | path_release(&nd); | 645 | path_put(&nd.path); |
646 | if (!mnt) { | 646 | if (!mnt) { |
647 | err = -ENOMEM; | 647 | err = -ENOMEM; |
648 | goto Err; | 648 | goto Err; |
@@ -701,8 +701,8 @@ int audit_tag_tree(char *old, char *new) | |||
701 | err = path_lookup(new, 0, &nd); | 701 | err = path_lookup(new, 0, &nd); |
702 | if (err) | 702 | if (err) |
703 | return err; | 703 | return err; |
704 | tagged = collect_mounts(nd.mnt, nd.dentry); | 704 | tagged = collect_mounts(nd.path.mnt, nd.path.dentry); |
705 | path_release(&nd); | 705 | path_put(&nd.path); |
706 | if (!tagged) | 706 | if (!tagged) |
707 | return -ENOMEM; | 707 | return -ENOMEM; |
708 | 708 | ||
@@ -711,9 +711,9 @@ int audit_tag_tree(char *old, char *new) | |||
711 | drop_collected_mounts(tagged); | 711 | drop_collected_mounts(tagged); |
712 | return err; | 712 | return err; |
713 | } | 713 | } |
714 | mnt = mntget(nd.mnt); | 714 | mnt = mntget(nd.path.mnt); |
715 | dentry = dget(nd.dentry); | 715 | dentry = dget(nd.path.dentry); |
716 | path_release(&nd); | 716 | path_put(&nd.path); |
717 | 717 | ||
718 | if (dentry == tagged->mnt_root && dentry == mnt->mnt_root) | 718 | if (dentry == tagged->mnt_root && dentry == mnt->mnt_root) |
719 | follow_up(&mnt, &dentry); | 719 | follow_up(&mnt, &dentry); |
@@ -744,13 +744,13 @@ int audit_tag_tree(char *old, char *new) | |||
744 | spin_lock(&vfsmount_lock); | 744 | spin_lock(&vfsmount_lock); |
745 | if (!is_under(mnt, dentry, &nd)) { | 745 | if (!is_under(mnt, dentry, &nd)) { |
746 | spin_unlock(&vfsmount_lock); | 746 | spin_unlock(&vfsmount_lock); |
747 | path_release(&nd); | 747 | path_put(&nd.path); |
748 | put_tree(tree); | 748 | put_tree(tree); |
749 | mutex_lock(&audit_filter_mutex); | 749 | mutex_lock(&audit_filter_mutex); |
750 | continue; | 750 | continue; |
751 | } | 751 | } |
752 | spin_unlock(&vfsmount_lock); | 752 | spin_unlock(&vfsmount_lock); |
753 | path_release(&nd); | 753 | path_put(&nd.path); |
754 | 754 | ||
755 | list_for_each_entry(p, &list, mnt_list) { | 755 | list_for_each_entry(p, &list, mnt_list) { |
756 | failed = tag_chunk(p->mnt_root->d_inode, tree); | 756 | failed = tag_chunk(p->mnt_root->d_inode, tree); |
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c index 6f19fd477aac..2f2914b7cc30 100644 --- a/kernel/auditfilter.c +++ b/kernel/auditfilter.c | |||
@@ -169,8 +169,8 @@ static struct audit_parent *audit_init_parent(struct nameidata *ndp) | |||
169 | inotify_init_watch(&parent->wdata); | 169 | inotify_init_watch(&parent->wdata); |
170 | /* grab a ref so inotify watch hangs around until we take audit_filter_mutex */ | 170 | /* grab a ref so inotify watch hangs around until we take audit_filter_mutex */ |
171 | get_inotify_watch(&parent->wdata); | 171 | get_inotify_watch(&parent->wdata); |
172 | wd = inotify_add_watch(audit_ih, &parent->wdata, ndp->dentry->d_inode, | 172 | wd = inotify_add_watch(audit_ih, &parent->wdata, |
173 | AUDIT_IN_WATCH); | 173 | ndp->path.dentry->d_inode, AUDIT_IN_WATCH); |
174 | if (wd < 0) { | 174 | if (wd < 0) { |
175 | audit_free_parent(&parent->wdata); | 175 | audit_free_parent(&parent->wdata); |
176 | return ERR_PTR(wd); | 176 | return ERR_PTR(wd); |
@@ -1161,11 +1161,11 @@ static int audit_get_nd(char *path, struct nameidata **ndp, | |||
1161 | static void audit_put_nd(struct nameidata *ndp, struct nameidata *ndw) | 1161 | static void audit_put_nd(struct nameidata *ndp, struct nameidata *ndw) |
1162 | { | 1162 | { |
1163 | if (ndp) { | 1163 | if (ndp) { |
1164 | path_release(ndp); | 1164 | path_put(&ndp->path); |
1165 | kfree(ndp); | 1165 | kfree(ndp); |
1166 | } | 1166 | } |
1167 | if (ndw) { | 1167 | if (ndw) { |
1168 | path_release(ndw); | 1168 | path_put(&ndw->path); |
1169 | kfree(ndw); | 1169 | kfree(ndw); |
1170 | } | 1170 | } |
1171 | } | 1171 | } |
@@ -1214,8 +1214,8 @@ static int audit_add_watch(struct audit_krule *krule, struct nameidata *ndp, | |||
1214 | 1214 | ||
1215 | /* update watch filter fields */ | 1215 | /* update watch filter fields */ |
1216 | if (ndw) { | 1216 | if (ndw) { |
1217 | watch->dev = ndw->dentry->d_inode->i_sb->s_dev; | 1217 | watch->dev = ndw->path.dentry->d_inode->i_sb->s_dev; |
1218 | watch->ino = ndw->dentry->d_inode->i_ino; | 1218 | watch->ino = ndw->path.dentry->d_inode->i_ino; |
1219 | } | 1219 | } |
1220 | 1220 | ||
1221 | /* The audit_filter_mutex must not be held during inotify calls because | 1221 | /* The audit_filter_mutex must not be held during inotify calls because |
@@ -1225,7 +1225,8 @@ static int audit_add_watch(struct audit_krule *krule, struct nameidata *ndp, | |||
1225 | */ | 1225 | */ |
1226 | mutex_unlock(&audit_filter_mutex); | 1226 | mutex_unlock(&audit_filter_mutex); |
1227 | 1227 | ||
1228 | if (inotify_find_watch(audit_ih, ndp->dentry->d_inode, &i_watch) < 0) { | 1228 | if (inotify_find_watch(audit_ih, ndp->path.dentry->d_inode, |
1229 | &i_watch) < 0) { | ||
1229 | parent = audit_init_parent(ndp); | 1230 | parent = audit_init_parent(ndp); |
1230 | if (IS_ERR(parent)) { | 1231 | if (IS_ERR(parent)) { |
1231 | /* caller expects mutex locked */ | 1232 | /* caller expects mutex locked */ |
diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 1c06ecf38d7b..ac6d9b23b018 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c | |||
@@ -208,8 +208,7 @@ struct audit_context { | |||
208 | int name_count; | 208 | int name_count; |
209 | struct audit_names names[AUDIT_NAMES]; | 209 | struct audit_names names[AUDIT_NAMES]; |
210 | char * filterkey; /* key for rule that triggered record */ | 210 | char * filterkey; /* key for rule that triggered record */ |
211 | struct dentry * pwd; | 211 | struct path pwd; |
212 | struct vfsmount * pwdmnt; | ||
213 | struct audit_context *previous; /* For nested syscalls */ | 212 | struct audit_context *previous; /* For nested syscalls */ |
214 | struct audit_aux_data *aux; | 213 | struct audit_aux_data *aux; |
215 | struct audit_aux_data *aux_pids; | 214 | struct audit_aux_data *aux_pids; |
@@ -786,12 +785,9 @@ static inline void audit_free_names(struct audit_context *context) | |||
786 | __putname(context->names[i].name); | 785 | __putname(context->names[i].name); |
787 | } | 786 | } |
788 | context->name_count = 0; | 787 | context->name_count = 0; |
789 | if (context->pwd) | 788 | path_put(&context->pwd); |
790 | dput(context->pwd); | 789 | context->pwd.dentry = NULL; |
791 | if (context->pwdmnt) | 790 | context->pwd.mnt = NULL; |
792 | mntput(context->pwdmnt); | ||
793 | context->pwd = NULL; | ||
794 | context->pwdmnt = NULL; | ||
795 | } | 791 | } |
796 | 792 | ||
797 | static inline void audit_free_aux(struct audit_context *context) | 793 | static inline void audit_free_aux(struct audit_context *context) |
@@ -930,8 +926,7 @@ static void audit_log_task_info(struct audit_buffer *ab, struct task_struct *tsk | |||
930 | if ((vma->vm_flags & VM_EXECUTABLE) && | 926 | if ((vma->vm_flags & VM_EXECUTABLE) && |
931 | vma->vm_file) { | 927 | vma->vm_file) { |
932 | audit_log_d_path(ab, "exe=", | 928 | audit_log_d_path(ab, "exe=", |
933 | vma->vm_file->f_path.dentry, | 929 | &vma->vm_file->f_path); |
934 | vma->vm_file->f_path.mnt); | ||
935 | break; | 930 | break; |
936 | } | 931 | } |
937 | vma = vma->vm_next; | 932 | vma = vma->vm_next; |
@@ -1341,10 +1336,10 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts | |||
1341 | context->target_sid, context->target_comm)) | 1336 | context->target_sid, context->target_comm)) |
1342 | call_panic = 1; | 1337 | call_panic = 1; |
1343 | 1338 | ||
1344 | if (context->pwd && context->pwdmnt) { | 1339 | if (context->pwd.dentry && context->pwd.mnt) { |
1345 | ab = audit_log_start(context, GFP_KERNEL, AUDIT_CWD); | 1340 | ab = audit_log_start(context, GFP_KERNEL, AUDIT_CWD); |
1346 | if (ab) { | 1341 | if (ab) { |
1347 | audit_log_d_path(ab, "cwd=", context->pwd, context->pwdmnt); | 1342 | audit_log_d_path(ab, "cwd=", &context->pwd); |
1348 | audit_log_end(ab); | 1343 | audit_log_end(ab); |
1349 | } | 1344 | } |
1350 | } | 1345 | } |
@@ -1367,8 +1362,7 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts | |||
1367 | case 0: | 1362 | case 0: |
1368 | /* name was specified as a relative path and the | 1363 | /* name was specified as a relative path and the |
1369 | * directory component is the cwd */ | 1364 | * directory component is the cwd */ |
1370 | audit_log_d_path(ab, " name=", context->pwd, | 1365 | audit_log_d_path(ab, " name=", &context->pwd); |
1371 | context->pwdmnt); | ||
1372 | break; | 1366 | break; |
1373 | default: | 1367 | default: |
1374 | /* log the name's directory component */ | 1368 | /* log the name's directory component */ |
@@ -1695,10 +1689,10 @@ void __audit_getname(const char *name) | |||
1695 | context->names[context->name_count].ino = (unsigned long)-1; | 1689 | context->names[context->name_count].ino = (unsigned long)-1; |
1696 | context->names[context->name_count].osid = 0; | 1690 | context->names[context->name_count].osid = 0; |
1697 | ++context->name_count; | 1691 | ++context->name_count; |
1698 | if (!context->pwd) { | 1692 | if (!context->pwd.dentry) { |
1699 | read_lock(¤t->fs->lock); | 1693 | read_lock(¤t->fs->lock); |
1700 | context->pwd = dget(current->fs->pwd); | 1694 | context->pwd = current->fs->pwd; |
1701 | context->pwdmnt = mntget(current->fs->pwdmnt); | 1695 | path_get(¤t->fs->pwd); |
1702 | read_unlock(¤t->fs->lock); | 1696 | read_unlock(¤t->fs->lock); |
1703 | } | 1697 | } |
1704 | 1698 | ||
diff --git a/kernel/capability.c b/kernel/capability.c index efbd9cdce132..39e8193b41ea 100644 --- a/kernel/capability.c +++ b/kernel/capability.c | |||
@@ -22,6 +22,37 @@ | |||
22 | static DEFINE_SPINLOCK(task_capability_lock); | 22 | static DEFINE_SPINLOCK(task_capability_lock); |
23 | 23 | ||
24 | /* | 24 | /* |
25 | * Leveraged for setting/resetting capabilities | ||
26 | */ | ||
27 | |||
28 | const kernel_cap_t __cap_empty_set = CAP_EMPTY_SET; | ||
29 | const kernel_cap_t __cap_full_set = CAP_FULL_SET; | ||
30 | const kernel_cap_t __cap_init_eff_set = CAP_INIT_EFF_SET; | ||
31 | |||
32 | EXPORT_SYMBOL(__cap_empty_set); | ||
33 | EXPORT_SYMBOL(__cap_full_set); | ||
34 | EXPORT_SYMBOL(__cap_init_eff_set); | ||
35 | |||
36 | /* | ||
37 | * More recent versions of libcap are available from: | ||
38 | * | ||
39 | * http://www.kernel.org/pub/linux/libs/security/linux-privs/ | ||
40 | */ | ||
41 | |||
42 | static void warn_legacy_capability_use(void) | ||
43 | { | ||
44 | static int warned; | ||
45 | if (!warned) { | ||
46 | char name[sizeof(current->comm)]; | ||
47 | |||
48 | printk(KERN_INFO "warning: `%s' uses 32-bit capabilities" | ||
49 | " (legacy support in use)\n", | ||
50 | get_task_comm(name, current)); | ||
51 | warned = 1; | ||
52 | } | ||
53 | } | ||
54 | |||
55 | /* | ||
25 | * For sys_getproccap() and sys_setproccap(), any of the three | 56 | * For sys_getproccap() and sys_setproccap(), any of the three |
26 | * capability set pointers may be NULL -- indicating that that set is | 57 | * capability set pointers may be NULL -- indicating that that set is |
27 | * uninteresting and/or not to be changed. | 58 | * uninteresting and/or not to be changed. |
@@ -42,12 +73,21 @@ asmlinkage long sys_capget(cap_user_header_t header, cap_user_data_t dataptr) | |||
42 | pid_t pid; | 73 | pid_t pid; |
43 | __u32 version; | 74 | __u32 version; |
44 | struct task_struct *target; | 75 | struct task_struct *target; |
45 | struct __user_cap_data_struct data; | 76 | unsigned tocopy; |
77 | kernel_cap_t pE, pI, pP; | ||
46 | 78 | ||
47 | if (get_user(version, &header->version)) | 79 | if (get_user(version, &header->version)) |
48 | return -EFAULT; | 80 | return -EFAULT; |
49 | 81 | ||
50 | if (version != _LINUX_CAPABILITY_VERSION) { | 82 | switch (version) { |
83 | case _LINUX_CAPABILITY_VERSION_1: | ||
84 | warn_legacy_capability_use(); | ||
85 | tocopy = _LINUX_CAPABILITY_U32S_1; | ||
86 | break; | ||
87 | case _LINUX_CAPABILITY_VERSION_2: | ||
88 | tocopy = _LINUX_CAPABILITY_U32S_2; | ||
89 | break; | ||
90 | default: | ||
51 | if (put_user(_LINUX_CAPABILITY_VERSION, &header->version)) | 91 | if (put_user(_LINUX_CAPABILITY_VERSION, &header->version)) |
52 | return -EFAULT; | 92 | return -EFAULT; |
53 | return -EINVAL; | 93 | return -EINVAL; |
@@ -71,14 +111,47 @@ asmlinkage long sys_capget(cap_user_header_t header, cap_user_data_t dataptr) | |||
71 | } else | 111 | } else |
72 | target = current; | 112 | target = current; |
73 | 113 | ||
74 | ret = security_capget(target, &data.effective, &data.inheritable, &data.permitted); | 114 | ret = security_capget(target, &pE, &pI, &pP); |
75 | 115 | ||
76 | out: | 116 | out: |
77 | read_unlock(&tasklist_lock); | 117 | read_unlock(&tasklist_lock); |
78 | spin_unlock(&task_capability_lock); | 118 | spin_unlock(&task_capability_lock); |
79 | 119 | ||
80 | if (!ret && copy_to_user(dataptr, &data, sizeof data)) | 120 | if (!ret) { |
81 | return -EFAULT; | 121 | struct __user_cap_data_struct kdata[_LINUX_CAPABILITY_U32S]; |
122 | unsigned i; | ||
123 | |||
124 | for (i = 0; i < tocopy; i++) { | ||
125 | kdata[i].effective = pE.cap[i]; | ||
126 | kdata[i].permitted = pP.cap[i]; | ||
127 | kdata[i].inheritable = pI.cap[i]; | ||
128 | } | ||
129 | |||
130 | /* | ||
131 | * Note, in the case, tocopy < _LINUX_CAPABILITY_U32S, | ||
132 | * we silently drop the upper capabilities here. This | ||
133 | * has the effect of making older libcap | ||
134 | * implementations implicitly drop upper capability | ||
135 | * bits when they perform a: capget/modify/capset | ||
136 | * sequence. | ||
137 | * | ||
138 | * This behavior is considered fail-safe | ||
139 | * behavior. Upgrading the application to a newer | ||
140 | * version of libcap will enable access to the newer | ||
141 | * capabilities. | ||
142 | * | ||
143 | * An alternative would be to return an error here | ||
144 | * (-ERANGE), but that causes legacy applications to | ||
145 | * unexpectidly fail; the capget/modify/capset aborts | ||
146 | * before modification is attempted and the application | ||
147 | * fails. | ||
148 | */ | ||
149 | |||
150 | if (copy_to_user(dataptr, kdata, tocopy | ||
151 | * sizeof(struct __user_cap_data_struct))) { | ||
152 | return -EFAULT; | ||
153 | } | ||
154 | } | ||
82 | 155 | ||
83 | return ret; | 156 | return ret; |
84 | } | 157 | } |
@@ -167,6 +240,8 @@ static inline int cap_set_all(kernel_cap_t *effective, | |||
167 | */ | 240 | */ |
168 | asmlinkage long sys_capset(cap_user_header_t header, const cap_user_data_t data) | 241 | asmlinkage long sys_capset(cap_user_header_t header, const cap_user_data_t data) |
169 | { | 242 | { |
243 | struct __user_cap_data_struct kdata[_LINUX_CAPABILITY_U32S]; | ||
244 | unsigned i, tocopy; | ||
170 | kernel_cap_t inheritable, permitted, effective; | 245 | kernel_cap_t inheritable, permitted, effective; |
171 | __u32 version; | 246 | __u32 version; |
172 | struct task_struct *target; | 247 | struct task_struct *target; |
@@ -176,7 +251,15 @@ asmlinkage long sys_capset(cap_user_header_t header, const cap_user_data_t data) | |||
176 | if (get_user(version, &header->version)) | 251 | if (get_user(version, &header->version)) |
177 | return -EFAULT; | 252 | return -EFAULT; |
178 | 253 | ||
179 | if (version != _LINUX_CAPABILITY_VERSION) { | 254 | switch (version) { |
255 | case _LINUX_CAPABILITY_VERSION_1: | ||
256 | warn_legacy_capability_use(); | ||
257 | tocopy = _LINUX_CAPABILITY_U32S_1; | ||
258 | break; | ||
259 | case _LINUX_CAPABILITY_VERSION_2: | ||
260 | tocopy = _LINUX_CAPABILITY_U32S_2; | ||
261 | break; | ||
262 | default: | ||
180 | if (put_user(_LINUX_CAPABILITY_VERSION, &header->version)) | 263 | if (put_user(_LINUX_CAPABILITY_VERSION, &header->version)) |
181 | return -EFAULT; | 264 | return -EFAULT; |
182 | return -EINVAL; | 265 | return -EINVAL; |
@@ -188,10 +271,22 @@ asmlinkage long sys_capset(cap_user_header_t header, const cap_user_data_t data) | |||
188 | if (pid && pid != task_pid_vnr(current) && !capable(CAP_SETPCAP)) | 271 | if (pid && pid != task_pid_vnr(current) && !capable(CAP_SETPCAP)) |
189 | return -EPERM; | 272 | return -EPERM; |
190 | 273 | ||
191 | if (copy_from_user(&effective, &data->effective, sizeof(effective)) || | 274 | if (copy_from_user(&kdata, data, tocopy |
192 | copy_from_user(&inheritable, &data->inheritable, sizeof(inheritable)) || | 275 | * sizeof(struct __user_cap_data_struct))) { |
193 | copy_from_user(&permitted, &data->permitted, sizeof(permitted))) | ||
194 | return -EFAULT; | 276 | return -EFAULT; |
277 | } | ||
278 | |||
279 | for (i = 0; i < tocopy; i++) { | ||
280 | effective.cap[i] = kdata[i].effective; | ||
281 | permitted.cap[i] = kdata[i].permitted; | ||
282 | inheritable.cap[i] = kdata[i].inheritable; | ||
283 | } | ||
284 | while (i < _LINUX_CAPABILITY_U32S) { | ||
285 | effective.cap[i] = 0; | ||
286 | permitted.cap[i] = 0; | ||
287 | inheritable.cap[i] = 0; | ||
288 | i++; | ||
289 | } | ||
195 | 290 | ||
196 | spin_lock(&task_capability_lock); | 291 | spin_lock(&task_capability_lock); |
197 | read_lock(&tasklist_lock); | 292 | read_lock(&tasklist_lock); |
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 1a3c23936d43..4766bb65e4d9 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c | |||
@@ -141,7 +141,7 @@ enum { | |||
141 | ROOT_NOPREFIX, /* mounted subsystems have no named prefix */ | 141 | ROOT_NOPREFIX, /* mounted subsystems have no named prefix */ |
142 | }; | 142 | }; |
143 | 143 | ||
144 | inline int cgroup_is_releasable(const struct cgroup *cgrp) | 144 | static int cgroup_is_releasable(const struct cgroup *cgrp) |
145 | { | 145 | { |
146 | const int bits = | 146 | const int bits = |
147 | (1 << CGRP_RELEASABLE) | | 147 | (1 << CGRP_RELEASABLE) | |
@@ -149,7 +149,7 @@ inline int cgroup_is_releasable(const struct cgroup *cgrp) | |||
149 | return (cgrp->flags & bits) == bits; | 149 | return (cgrp->flags & bits) == bits; |
150 | } | 150 | } |
151 | 151 | ||
152 | inline int notify_on_release(const struct cgroup *cgrp) | 152 | static int notify_on_release(const struct cgroup *cgrp) |
153 | { | 153 | { |
154 | return test_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags); | 154 | return test_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags); |
155 | } | 155 | } |
@@ -489,7 +489,7 @@ static struct css_set *find_css_set( | |||
489 | * Any task can increment and decrement the count field without lock. | 489 | * Any task can increment and decrement the count field without lock. |
490 | * So in general, code holding cgroup_mutex can't rely on the count | 490 | * So in general, code holding cgroup_mutex can't rely on the count |
491 | * field not changing. However, if the count goes to zero, then only | 491 | * field not changing. However, if the count goes to zero, then only |
492 | * attach_task() can increment it again. Because a count of zero | 492 | * cgroup_attach_task() can increment it again. Because a count of zero |
493 | * means that no tasks are currently attached, therefore there is no | 493 | * means that no tasks are currently attached, therefore there is no |
494 | * way a task attached to that cgroup can fork (the other way to | 494 | * way a task attached to that cgroup can fork (the other way to |
495 | * increment the count). So code holding cgroup_mutex can safely | 495 | * increment the count). So code holding cgroup_mutex can safely |
@@ -520,17 +520,17 @@ static struct css_set *find_css_set( | |||
520 | * The task_lock() exception | 520 | * The task_lock() exception |
521 | * | 521 | * |
522 | * The need for this exception arises from the action of | 522 | * The need for this exception arises from the action of |
523 | * attach_task(), which overwrites one tasks cgroup pointer with | 523 | * cgroup_attach_task(), which overwrites one tasks cgroup pointer with |
524 | * another. It does so using cgroup_mutexe, however there are | 524 | * another. It does so using cgroup_mutexe, however there are |
525 | * several performance critical places that need to reference | 525 | * several performance critical places that need to reference |
526 | * task->cgroup without the expense of grabbing a system global | 526 | * task->cgroup without the expense of grabbing a system global |
527 | * mutex. Therefore except as noted below, when dereferencing or, as | 527 | * mutex. Therefore except as noted below, when dereferencing or, as |
528 | * in attach_task(), modifying a task'ss cgroup pointer we use | 528 | * in cgroup_attach_task(), modifying a task'ss cgroup pointer we use |
529 | * task_lock(), which acts on a spinlock (task->alloc_lock) already in | 529 | * task_lock(), which acts on a spinlock (task->alloc_lock) already in |
530 | * the task_struct routinely used for such matters. | 530 | * the task_struct routinely used for such matters. |
531 | * | 531 | * |
532 | * P.S. One more locking exception. RCU is used to guard the | 532 | * P.S. One more locking exception. RCU is used to guard the |
533 | * update of a tasks cgroup pointer by attach_task() | 533 | * update of a tasks cgroup pointer by cgroup_attach_task() |
534 | */ | 534 | */ |
535 | 535 | ||
536 | /** | 536 | /** |
@@ -586,11 +586,27 @@ static struct inode *cgroup_new_inode(mode_t mode, struct super_block *sb) | |||
586 | return inode; | 586 | return inode; |
587 | } | 587 | } |
588 | 588 | ||
589 | /* | ||
590 | * Call subsys's pre_destroy handler. | ||
591 | * This is called before css refcnt check. | ||
592 | */ | ||
593 | |||
594 | static void cgroup_call_pre_destroy(struct cgroup *cgrp) | ||
595 | { | ||
596 | struct cgroup_subsys *ss; | ||
597 | for_each_subsys(cgrp->root, ss) | ||
598 | if (ss->pre_destroy && cgrp->subsys[ss->subsys_id]) | ||
599 | ss->pre_destroy(ss, cgrp); | ||
600 | return; | ||
601 | } | ||
602 | |||
603 | |||
589 | static void cgroup_diput(struct dentry *dentry, struct inode *inode) | 604 | static void cgroup_diput(struct dentry *dentry, struct inode *inode) |
590 | { | 605 | { |
591 | /* is dentry a directory ? if so, kfree() associated cgroup */ | 606 | /* is dentry a directory ? if so, kfree() associated cgroup */ |
592 | if (S_ISDIR(inode->i_mode)) { | 607 | if (S_ISDIR(inode->i_mode)) { |
593 | struct cgroup *cgrp = dentry->d_fsdata; | 608 | struct cgroup *cgrp = dentry->d_fsdata; |
609 | struct cgroup_subsys *ss; | ||
594 | BUG_ON(!(cgroup_is_removed(cgrp))); | 610 | BUG_ON(!(cgroup_is_removed(cgrp))); |
595 | /* It's possible for external users to be holding css | 611 | /* It's possible for external users to be holding css |
596 | * reference counts on a cgroup; css_put() needs to | 612 | * reference counts on a cgroup; css_put() needs to |
@@ -599,6 +615,23 @@ static void cgroup_diput(struct dentry *dentry, struct inode *inode) | |||
599 | * queue the cgroup to be handled by the release | 615 | * queue the cgroup to be handled by the release |
600 | * agent */ | 616 | * agent */ |
601 | synchronize_rcu(); | 617 | synchronize_rcu(); |
618 | |||
619 | mutex_lock(&cgroup_mutex); | ||
620 | /* | ||
621 | * Release the subsystem state objects. | ||
622 | */ | ||
623 | for_each_subsys(cgrp->root, ss) { | ||
624 | if (cgrp->subsys[ss->subsys_id]) | ||
625 | ss->destroy(ss, cgrp); | ||
626 | } | ||
627 | |||
628 | cgrp->root->number_of_cgroups--; | ||
629 | mutex_unlock(&cgroup_mutex); | ||
630 | |||
631 | /* Drop the active superblock reference that we took when we | ||
632 | * created the cgroup */ | ||
633 | deactivate_super(cgrp->root->sb); | ||
634 | |||
602 | kfree(cgrp); | 635 | kfree(cgrp); |
603 | } | 636 | } |
604 | iput(inode); | 637 | iput(inode); |
@@ -1161,7 +1194,7 @@ static void get_first_subsys(const struct cgroup *cgrp, | |||
1161 | * Call holding cgroup_mutex. May take task_lock of | 1194 | * Call holding cgroup_mutex. May take task_lock of |
1162 | * the task 'pid' during call. | 1195 | * the task 'pid' during call. |
1163 | */ | 1196 | */ |
1164 | static int attach_task(struct cgroup *cgrp, struct task_struct *tsk) | 1197 | int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk) |
1165 | { | 1198 | { |
1166 | int retval = 0; | 1199 | int retval = 0; |
1167 | struct cgroup_subsys *ss; | 1200 | struct cgroup_subsys *ss; |
@@ -1181,9 +1214,8 @@ static int attach_task(struct cgroup *cgrp, struct task_struct *tsk) | |||
1181 | for_each_subsys(root, ss) { | 1214 | for_each_subsys(root, ss) { |
1182 | if (ss->can_attach) { | 1215 | if (ss->can_attach) { |
1183 | retval = ss->can_attach(ss, cgrp, tsk); | 1216 | retval = ss->can_attach(ss, cgrp, tsk); |
1184 | if (retval) { | 1217 | if (retval) |
1185 | return retval; | 1218 | return retval; |
1186 | } | ||
1187 | } | 1219 | } |
1188 | } | 1220 | } |
1189 | 1221 | ||
@@ -1192,9 +1224,8 @@ static int attach_task(struct cgroup *cgrp, struct task_struct *tsk) | |||
1192 | * based on its final set of cgroups | 1224 | * based on its final set of cgroups |
1193 | */ | 1225 | */ |
1194 | newcg = find_css_set(cg, cgrp); | 1226 | newcg = find_css_set(cg, cgrp); |
1195 | if (!newcg) { | 1227 | if (!newcg) |
1196 | return -ENOMEM; | 1228 | return -ENOMEM; |
1197 | } | ||
1198 | 1229 | ||
1199 | task_lock(tsk); | 1230 | task_lock(tsk); |
1200 | if (tsk->flags & PF_EXITING) { | 1231 | if (tsk->flags & PF_EXITING) { |
@@ -1214,9 +1245,8 @@ static int attach_task(struct cgroup *cgrp, struct task_struct *tsk) | |||
1214 | write_unlock(&css_set_lock); | 1245 | write_unlock(&css_set_lock); |
1215 | 1246 | ||
1216 | for_each_subsys(root, ss) { | 1247 | for_each_subsys(root, ss) { |
1217 | if (ss->attach) { | 1248 | if (ss->attach) |
1218 | ss->attach(ss, cgrp, oldcgrp, tsk); | 1249 | ss->attach(ss, cgrp, oldcgrp, tsk); |
1219 | } | ||
1220 | } | 1250 | } |
1221 | set_bit(CGRP_RELEASABLE, &oldcgrp->flags); | 1251 | set_bit(CGRP_RELEASABLE, &oldcgrp->flags); |
1222 | synchronize_rcu(); | 1252 | synchronize_rcu(); |
@@ -1239,7 +1269,7 @@ static int attach_task_by_pid(struct cgroup *cgrp, char *pidbuf) | |||
1239 | 1269 | ||
1240 | if (pid) { | 1270 | if (pid) { |
1241 | rcu_read_lock(); | 1271 | rcu_read_lock(); |
1242 | tsk = find_task_by_pid(pid); | 1272 | tsk = find_task_by_vpid(pid); |
1243 | if (!tsk || tsk->flags & PF_EXITING) { | 1273 | if (!tsk || tsk->flags & PF_EXITING) { |
1244 | rcu_read_unlock(); | 1274 | rcu_read_unlock(); |
1245 | return -ESRCH; | 1275 | return -ESRCH; |
@@ -1257,7 +1287,7 @@ static int attach_task_by_pid(struct cgroup *cgrp, char *pidbuf) | |||
1257 | get_task_struct(tsk); | 1287 | get_task_struct(tsk); |
1258 | } | 1288 | } |
1259 | 1289 | ||
1260 | ret = attach_task(cgrp, tsk); | 1290 | ret = cgroup_attach_task(cgrp, tsk); |
1261 | put_task_struct(tsk); | 1291 | put_task_struct(tsk); |
1262 | return ret; | 1292 | return ret; |
1263 | } | 1293 | } |
@@ -1329,9 +1359,14 @@ static ssize_t cgroup_common_file_write(struct cgroup *cgrp, | |||
1329 | goto out1; | 1359 | goto out1; |
1330 | } | 1360 | } |
1331 | buffer[nbytes] = 0; /* nul-terminate */ | 1361 | buffer[nbytes] = 0; /* nul-terminate */ |
1362 | strstrip(buffer); /* strip -just- trailing whitespace */ | ||
1332 | 1363 | ||
1333 | mutex_lock(&cgroup_mutex); | 1364 | mutex_lock(&cgroup_mutex); |
1334 | 1365 | ||
1366 | /* | ||
1367 | * This was already checked for in cgroup_file_write(), but | ||
1368 | * check again now we're holding cgroup_mutex. | ||
1369 | */ | ||
1335 | if (cgroup_is_removed(cgrp)) { | 1370 | if (cgroup_is_removed(cgrp)) { |
1336 | retval = -ENODEV; | 1371 | retval = -ENODEV; |
1337 | goto out2; | 1372 | goto out2; |
@@ -1349,24 +1384,9 @@ static ssize_t cgroup_common_file_write(struct cgroup *cgrp, | |||
1349 | clear_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags); | 1384 | clear_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags); |
1350 | break; | 1385 | break; |
1351 | case FILE_RELEASE_AGENT: | 1386 | case FILE_RELEASE_AGENT: |
1352 | { | 1387 | BUILD_BUG_ON(sizeof(cgrp->root->release_agent_path) < PATH_MAX); |
1353 | struct cgroupfs_root *root = cgrp->root; | 1388 | strcpy(cgrp->root->release_agent_path, buffer); |
1354 | /* Strip trailing newline */ | ||
1355 | if (nbytes && (buffer[nbytes-1] == '\n')) { | ||
1356 | buffer[nbytes-1] = 0; | ||
1357 | } | ||
1358 | if (nbytes < sizeof(root->release_agent_path)) { | ||
1359 | /* We never write anything other than '\0' | ||
1360 | * into the last char of release_agent_path, | ||
1361 | * so it always remains a NUL-terminated | ||
1362 | * string */ | ||
1363 | strncpy(root->release_agent_path, buffer, nbytes); | ||
1364 | root->release_agent_path[nbytes] = 0; | ||
1365 | } else { | ||
1366 | retval = -ENOSPC; | ||
1367 | } | ||
1368 | break; | 1389 | break; |
1369 | } | ||
1370 | default: | 1390 | default: |
1371 | retval = -EINVAL; | 1391 | retval = -EINVAL; |
1372 | goto out2; | 1392 | goto out2; |
@@ -1387,7 +1407,7 @@ static ssize_t cgroup_file_write(struct file *file, const char __user *buf, | |||
1387 | struct cftype *cft = __d_cft(file->f_dentry); | 1407 | struct cftype *cft = __d_cft(file->f_dentry); |
1388 | struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent); | 1408 | struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent); |
1389 | 1409 | ||
1390 | if (!cft) | 1410 | if (!cft || cgroup_is_removed(cgrp)) |
1391 | return -ENODEV; | 1411 | return -ENODEV; |
1392 | if (cft->write) | 1412 | if (cft->write) |
1393 | return cft->write(cgrp, cft, file, buf, nbytes, ppos); | 1413 | return cft->write(cgrp, cft, file, buf, nbytes, ppos); |
@@ -1457,7 +1477,7 @@ static ssize_t cgroup_file_read(struct file *file, char __user *buf, | |||
1457 | struct cftype *cft = __d_cft(file->f_dentry); | 1477 | struct cftype *cft = __d_cft(file->f_dentry); |
1458 | struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent); | 1478 | struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent); |
1459 | 1479 | ||
1460 | if (!cft) | 1480 | if (!cft || cgroup_is_removed(cgrp)) |
1461 | return -ENODEV; | 1481 | return -ENODEV; |
1462 | 1482 | ||
1463 | if (cft->read) | 1483 | if (cft->read) |
@@ -1675,6 +1695,29 @@ static void cgroup_advance_iter(struct cgroup *cgrp, | |||
1675 | it->task = cg->tasks.next; | 1695 | it->task = cg->tasks.next; |
1676 | } | 1696 | } |
1677 | 1697 | ||
1698 | /* | ||
1699 | * To reduce the fork() overhead for systems that are not actually | ||
1700 | * using their cgroups capability, we don't maintain the lists running | ||
1701 | * through each css_set to its tasks until we see the list actually | ||
1702 | * used - in other words after the first call to cgroup_iter_start(). | ||
1703 | * | ||
1704 | * The tasklist_lock is not held here, as do_each_thread() and | ||
1705 | * while_each_thread() are protected by RCU. | ||
1706 | */ | ||
1707 | void cgroup_enable_task_cg_lists(void) | ||
1708 | { | ||
1709 | struct task_struct *p, *g; | ||
1710 | write_lock(&css_set_lock); | ||
1711 | use_task_css_set_links = 1; | ||
1712 | do_each_thread(g, p) { | ||
1713 | task_lock(p); | ||
1714 | if (list_empty(&p->cg_list)) | ||
1715 | list_add(&p->cg_list, &p->cgroups->tasks); | ||
1716 | task_unlock(p); | ||
1717 | } while_each_thread(g, p); | ||
1718 | write_unlock(&css_set_lock); | ||
1719 | } | ||
1720 | |||
1678 | void cgroup_iter_start(struct cgroup *cgrp, struct cgroup_iter *it) | 1721 | void cgroup_iter_start(struct cgroup *cgrp, struct cgroup_iter *it) |
1679 | { | 1722 | { |
1680 | /* | 1723 | /* |
@@ -1682,18 +1725,9 @@ void cgroup_iter_start(struct cgroup *cgrp, struct cgroup_iter *it) | |||
1682 | * we need to enable the list linking each css_set to its | 1725 | * we need to enable the list linking each css_set to its |
1683 | * tasks, and fix up all existing tasks. | 1726 | * tasks, and fix up all existing tasks. |
1684 | */ | 1727 | */ |
1685 | if (!use_task_css_set_links) { | 1728 | if (!use_task_css_set_links) |
1686 | struct task_struct *p, *g; | 1729 | cgroup_enable_task_cg_lists(); |
1687 | write_lock(&css_set_lock); | 1730 | |
1688 | use_task_css_set_links = 1; | ||
1689 | do_each_thread(g, p) { | ||
1690 | task_lock(p); | ||
1691 | if (list_empty(&p->cg_list)) | ||
1692 | list_add(&p->cg_list, &p->cgroups->tasks); | ||
1693 | task_unlock(p); | ||
1694 | } while_each_thread(g, p); | ||
1695 | write_unlock(&css_set_lock); | ||
1696 | } | ||
1697 | read_lock(&css_set_lock); | 1731 | read_lock(&css_set_lock); |
1698 | it->cg_link = &cgrp->css_sets; | 1732 | it->cg_link = &cgrp->css_sets; |
1699 | cgroup_advance_iter(cgrp, it); | 1733 | cgroup_advance_iter(cgrp, it); |
@@ -1726,6 +1760,166 @@ void cgroup_iter_end(struct cgroup *cgrp, struct cgroup_iter *it) | |||
1726 | read_unlock(&css_set_lock); | 1760 | read_unlock(&css_set_lock); |
1727 | } | 1761 | } |
1728 | 1762 | ||
1763 | static inline int started_after_time(struct task_struct *t1, | ||
1764 | struct timespec *time, | ||
1765 | struct task_struct *t2) | ||
1766 | { | ||
1767 | int start_diff = timespec_compare(&t1->start_time, time); | ||
1768 | if (start_diff > 0) { | ||
1769 | return 1; | ||
1770 | } else if (start_diff < 0) { | ||
1771 | return 0; | ||
1772 | } else { | ||
1773 | /* | ||
1774 | * Arbitrarily, if two processes started at the same | ||
1775 | * time, we'll say that the lower pointer value | ||
1776 | * started first. Note that t2 may have exited by now | ||
1777 | * so this may not be a valid pointer any longer, but | ||
1778 | * that's fine - it still serves to distinguish | ||
1779 | * between two tasks started (effectively) simultaneously. | ||
1780 | */ | ||
1781 | return t1 > t2; | ||
1782 | } | ||
1783 | } | ||
1784 | |||
1785 | /* | ||
1786 | * This function is a callback from heap_insert() and is used to order | ||
1787 | * the heap. | ||
1788 | * In this case we order the heap in descending task start time. | ||
1789 | */ | ||
1790 | static inline int started_after(void *p1, void *p2) | ||
1791 | { | ||
1792 | struct task_struct *t1 = p1; | ||
1793 | struct task_struct *t2 = p2; | ||
1794 | return started_after_time(t1, &t2->start_time, t2); | ||
1795 | } | ||
1796 | |||
1797 | /** | ||
1798 | * cgroup_scan_tasks - iterate though all the tasks in a cgroup | ||
1799 | * @scan: struct cgroup_scanner containing arguments for the scan | ||
1800 | * | ||
1801 | * Arguments include pointers to callback functions test_task() and | ||
1802 | * process_task(). | ||
1803 | * Iterate through all the tasks in a cgroup, calling test_task() for each, | ||
1804 | * and if it returns true, call process_task() for it also. | ||
1805 | * The test_task pointer may be NULL, meaning always true (select all tasks). | ||
1806 | * Effectively duplicates cgroup_iter_{start,next,end}() | ||
1807 | * but does not lock css_set_lock for the call to process_task(). | ||
1808 | * The struct cgroup_scanner may be embedded in any structure of the caller's | ||
1809 | * creation. | ||
1810 | * It is guaranteed that process_task() will act on every task that | ||
1811 | * is a member of the cgroup for the duration of this call. This | ||
1812 | * function may or may not call process_task() for tasks that exit | ||
1813 | * or move to a different cgroup during the call, or are forked or | ||
1814 | * move into the cgroup during the call. | ||
1815 | * | ||
1816 | * Note that test_task() may be called with locks held, and may in some | ||
1817 | * situations be called multiple times for the same task, so it should | ||
1818 | * be cheap. | ||
1819 | * If the heap pointer in the struct cgroup_scanner is non-NULL, a heap has been | ||
1820 | * pre-allocated and will be used for heap operations (and its "gt" member will | ||
1821 | * be overwritten), else a temporary heap will be used (allocation of which | ||
1822 | * may cause this function to fail). | ||
1823 | */ | ||
1824 | int cgroup_scan_tasks(struct cgroup_scanner *scan) | ||
1825 | { | ||
1826 | int retval, i; | ||
1827 | struct cgroup_iter it; | ||
1828 | struct task_struct *p, *dropped; | ||
1829 | /* Never dereference latest_task, since it's not refcounted */ | ||
1830 | struct task_struct *latest_task = NULL; | ||
1831 | struct ptr_heap tmp_heap; | ||
1832 | struct ptr_heap *heap; | ||
1833 | struct timespec latest_time = { 0, 0 }; | ||
1834 | |||
1835 | if (scan->heap) { | ||
1836 | /* The caller supplied our heap and pre-allocated its memory */ | ||
1837 | heap = scan->heap; | ||
1838 | heap->gt = &started_after; | ||
1839 | } else { | ||
1840 | /* We need to allocate our own heap memory */ | ||
1841 | heap = &tmp_heap; | ||
1842 | retval = heap_init(heap, PAGE_SIZE, GFP_KERNEL, &started_after); | ||
1843 | if (retval) | ||
1844 | /* cannot allocate the heap */ | ||
1845 | return retval; | ||
1846 | } | ||
1847 | |||
1848 | again: | ||
1849 | /* | ||
1850 | * Scan tasks in the cgroup, using the scanner's "test_task" callback | ||
1851 | * to determine which are of interest, and using the scanner's | ||
1852 | * "process_task" callback to process any of them that need an update. | ||
1853 | * Since we don't want to hold any locks during the task updates, | ||
1854 | * gather tasks to be processed in a heap structure. | ||
1855 | * The heap is sorted by descending task start time. | ||
1856 | * If the statically-sized heap fills up, we overflow tasks that | ||
1857 | * started later, and in future iterations only consider tasks that | ||
1858 | * started after the latest task in the previous pass. This | ||
1859 | * guarantees forward progress and that we don't miss any tasks. | ||
1860 | */ | ||
1861 | heap->size = 0; | ||
1862 | cgroup_iter_start(scan->cg, &it); | ||
1863 | while ((p = cgroup_iter_next(scan->cg, &it))) { | ||
1864 | /* | ||
1865 | * Only affect tasks that qualify per the caller's callback, | ||
1866 | * if he provided one | ||
1867 | */ | ||
1868 | if (scan->test_task && !scan->test_task(p, scan)) | ||
1869 | continue; | ||
1870 | /* | ||
1871 | * Only process tasks that started after the last task | ||
1872 | * we processed | ||
1873 | */ | ||
1874 | if (!started_after_time(p, &latest_time, latest_task)) | ||
1875 | continue; | ||
1876 | dropped = heap_insert(heap, p); | ||
1877 | if (dropped == NULL) { | ||
1878 | /* | ||
1879 | * The new task was inserted; the heap wasn't | ||
1880 | * previously full | ||
1881 | */ | ||
1882 | get_task_struct(p); | ||
1883 | } else if (dropped != p) { | ||
1884 | /* | ||
1885 | * The new task was inserted, and pushed out a | ||
1886 | * different task | ||
1887 | */ | ||
1888 | get_task_struct(p); | ||
1889 | put_task_struct(dropped); | ||
1890 | } | ||
1891 | /* | ||
1892 | * Else the new task was newer than anything already in | ||
1893 | * the heap and wasn't inserted | ||
1894 | */ | ||
1895 | } | ||
1896 | cgroup_iter_end(scan->cg, &it); | ||
1897 | |||
1898 | if (heap->size) { | ||
1899 | for (i = 0; i < heap->size; i++) { | ||
1900 | struct task_struct *p = heap->ptrs[i]; | ||
1901 | if (i == 0) { | ||
1902 | latest_time = p->start_time; | ||
1903 | latest_task = p; | ||
1904 | } | ||
1905 | /* Process the task per the caller's callback */ | ||
1906 | scan->process_task(p, scan); | ||
1907 | put_task_struct(p); | ||
1908 | } | ||
1909 | /* | ||
1910 | * If we had to process any tasks at all, scan again | ||
1911 | * in case some of them were in the middle of forking | ||
1912 | * children that didn't get processed. | ||
1913 | * Not the most efficient way to do it, but it avoids | ||
1914 | * having to take callback_mutex in the fork path | ||
1915 | */ | ||
1916 | goto again; | ||
1917 | } | ||
1918 | if (heap == &tmp_heap) | ||
1919 | heap_free(&tmp_heap); | ||
1920 | return 0; | ||
1921 | } | ||
1922 | |||
1729 | /* | 1923 | /* |
1730 | * Stuff for reading the 'tasks' file. | 1924 | * Stuff for reading the 'tasks' file. |
1731 | * | 1925 | * |
@@ -1761,7 +1955,7 @@ static int pid_array_load(pid_t *pidarray, int npids, struct cgroup *cgrp) | |||
1761 | while ((tsk = cgroup_iter_next(cgrp, &it))) { | 1955 | while ((tsk = cgroup_iter_next(cgrp, &it))) { |
1762 | if (unlikely(n == npids)) | 1956 | if (unlikely(n == npids)) |
1763 | break; | 1957 | break; |
1764 | pidarray[n++] = task_pid_nr(tsk); | 1958 | pidarray[n++] = task_pid_vnr(tsk); |
1765 | } | 1959 | } |
1766 | cgroup_iter_end(cgrp, &it); | 1960 | cgroup_iter_end(cgrp, &it); |
1767 | return n; | 1961 | return n; |
@@ -2126,9 +2320,8 @@ static inline int cgroup_has_css_refs(struct cgroup *cgrp) | |||
2126 | * matter, since it can only happen if the cgroup | 2320 | * matter, since it can only happen if the cgroup |
2127 | * has been deleted and hence no longer needs the | 2321 | * has been deleted and hence no longer needs the |
2128 | * release agent to be called anyway. */ | 2322 | * release agent to be called anyway. */ |
2129 | if (css && atomic_read(&css->refcnt)) { | 2323 | if (css && atomic_read(&css->refcnt)) |
2130 | return 1; | 2324 | return 1; |
2131 | } | ||
2132 | } | 2325 | } |
2133 | return 0; | 2326 | return 0; |
2134 | } | 2327 | } |
@@ -2138,7 +2331,6 @@ static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry) | |||
2138 | struct cgroup *cgrp = dentry->d_fsdata; | 2331 | struct cgroup *cgrp = dentry->d_fsdata; |
2139 | struct dentry *d; | 2332 | struct dentry *d; |
2140 | struct cgroup *parent; | 2333 | struct cgroup *parent; |
2141 | struct cgroup_subsys *ss; | ||
2142 | struct super_block *sb; | 2334 | struct super_block *sb; |
2143 | struct cgroupfs_root *root; | 2335 | struct cgroupfs_root *root; |
2144 | 2336 | ||
@@ -2157,17 +2349,19 @@ static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry) | |||
2157 | parent = cgrp->parent; | 2349 | parent = cgrp->parent; |
2158 | root = cgrp->root; | 2350 | root = cgrp->root; |
2159 | sb = root->sb; | 2351 | sb = root->sb; |
2352 | /* | ||
2353 | * Call pre_destroy handlers of subsys | ||
2354 | */ | ||
2355 | cgroup_call_pre_destroy(cgrp); | ||
2356 | /* | ||
2357 | * Notify subsyses that rmdir() request comes. | ||
2358 | */ | ||
2160 | 2359 | ||
2161 | if (cgroup_has_css_refs(cgrp)) { | 2360 | if (cgroup_has_css_refs(cgrp)) { |
2162 | mutex_unlock(&cgroup_mutex); | 2361 | mutex_unlock(&cgroup_mutex); |
2163 | return -EBUSY; | 2362 | return -EBUSY; |
2164 | } | 2363 | } |
2165 | 2364 | ||
2166 | for_each_subsys(root, ss) { | ||
2167 | if (cgrp->subsys[ss->subsys_id]) | ||
2168 | ss->destroy(ss, cgrp); | ||
2169 | } | ||
2170 | |||
2171 | spin_lock(&release_list_lock); | 2365 | spin_lock(&release_list_lock); |
2172 | set_bit(CGRP_REMOVED, &cgrp->flags); | 2366 | set_bit(CGRP_REMOVED, &cgrp->flags); |
2173 | if (!list_empty(&cgrp->release_list)) | 2367 | if (!list_empty(&cgrp->release_list)) |
@@ -2182,15 +2376,11 @@ static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry) | |||
2182 | 2376 | ||
2183 | cgroup_d_remove_dir(d); | 2377 | cgroup_d_remove_dir(d); |
2184 | dput(d); | 2378 | dput(d); |
2185 | root->number_of_cgroups--; | ||
2186 | 2379 | ||
2187 | set_bit(CGRP_RELEASABLE, &parent->flags); | 2380 | set_bit(CGRP_RELEASABLE, &parent->flags); |
2188 | check_for_release(parent); | 2381 | check_for_release(parent); |
2189 | 2382 | ||
2190 | mutex_unlock(&cgroup_mutex); | 2383 | mutex_unlock(&cgroup_mutex); |
2191 | /* Drop the active superblock reference that we took when we | ||
2192 | * created the cgroup */ | ||
2193 | deactivate_super(sb); | ||
2194 | return 0; | 2384 | return 0; |
2195 | } | 2385 | } |
2196 | 2386 | ||
@@ -2324,7 +2514,7 @@ out: | |||
2324 | * - Used for /proc/<pid>/cgroup. | 2514 | * - Used for /proc/<pid>/cgroup. |
2325 | * - No need to task_lock(tsk) on this tsk->cgroup reference, as it | 2515 | * - No need to task_lock(tsk) on this tsk->cgroup reference, as it |
2326 | * doesn't really matter if tsk->cgroup changes after we read it, | 2516 | * doesn't really matter if tsk->cgroup changes after we read it, |
2327 | * and we take cgroup_mutex, keeping attach_task() from changing it | 2517 | * and we take cgroup_mutex, keeping cgroup_attach_task() from changing it |
2328 | * anyway. No need to check that tsk->cgroup != NULL, thanks to | 2518 | * anyway. No need to check that tsk->cgroup != NULL, thanks to |
2329 | * the_top_cgroup_hack in cgroup_exit(), which sets an exiting tasks | 2519 | * the_top_cgroup_hack in cgroup_exit(), which sets an exiting tasks |
2330 | * cgroup to top_cgroup. | 2520 | * cgroup to top_cgroup. |
@@ -2435,7 +2625,7 @@ static struct file_operations proc_cgroupstats_operations = { | |||
2435 | * A pointer to the shared css_set was automatically copied in | 2625 | * A pointer to the shared css_set was automatically copied in |
2436 | * fork.c by dup_task_struct(). However, we ignore that copy, since | 2626 | * fork.c by dup_task_struct(). However, we ignore that copy, since |
2437 | * it was not made under the protection of RCU or cgroup_mutex, so | 2627 | * it was not made under the protection of RCU or cgroup_mutex, so |
2438 | * might no longer be a valid cgroup pointer. attach_task() might | 2628 | * might no longer be a valid cgroup pointer. cgroup_attach_task() might |
2439 | * have already changed current->cgroups, allowing the previously | 2629 | * have already changed current->cgroups, allowing the previously |
2440 | * referenced cgroup group to be removed and freed. | 2630 | * referenced cgroup group to be removed and freed. |
2441 | * | 2631 | * |
@@ -2514,8 +2704,8 @@ void cgroup_post_fork(struct task_struct *child) | |||
2514 | * attach us to a different cgroup, decrementing the count on | 2704 | * attach us to a different cgroup, decrementing the count on |
2515 | * the first cgroup that we never incremented. But in this case, | 2705 | * the first cgroup that we never incremented. But in this case, |
2516 | * top_cgroup isn't going away, and either task has PF_EXITING set, | 2706 | * top_cgroup isn't going away, and either task has PF_EXITING set, |
2517 | * which wards off any attach_task() attempts, or task is a failed | 2707 | * which wards off any cgroup_attach_task() attempts, or task is a failed |
2518 | * fork, never visible to attach_task. | 2708 | * fork, never visible to cgroup_attach_task. |
2519 | * | 2709 | * |
2520 | */ | 2710 | */ |
2521 | void cgroup_exit(struct task_struct *tsk, int run_callbacks) | 2711 | void cgroup_exit(struct task_struct *tsk, int run_callbacks) |
@@ -2655,7 +2845,7 @@ int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys) | |||
2655 | } | 2845 | } |
2656 | 2846 | ||
2657 | /* All seems fine. Finish by moving the task into the new cgroup */ | 2847 | /* All seems fine. Finish by moving the task into the new cgroup */ |
2658 | ret = attach_task(child, tsk); | 2848 | ret = cgroup_attach_task(child, tsk); |
2659 | mutex_unlock(&cgroup_mutex); | 2849 | mutex_unlock(&cgroup_mutex); |
2660 | 2850 | ||
2661 | out_release: | 2851 | out_release: |
diff --git a/kernel/compat.c b/kernel/compat.c index 42a1ed4b61b1..5f0e201bcfd3 100644 --- a/kernel/compat.c +++ b/kernel/compat.c | |||
@@ -40,10 +40,35 @@ int put_compat_timespec(const struct timespec *ts, struct compat_timespec __user | |||
40 | __put_user(ts->tv_nsec, &cts->tv_nsec)) ? -EFAULT : 0; | 40 | __put_user(ts->tv_nsec, &cts->tv_nsec)) ? -EFAULT : 0; |
41 | } | 41 | } |
42 | 42 | ||
43 | static long compat_nanosleep_restart(struct restart_block *restart) | ||
44 | { | ||
45 | struct compat_timespec __user *rmtp; | ||
46 | struct timespec rmt; | ||
47 | mm_segment_t oldfs; | ||
48 | long ret; | ||
49 | |||
50 | rmtp = (struct compat_timespec __user *)(restart->arg1); | ||
51 | restart->arg1 = (unsigned long)&rmt; | ||
52 | oldfs = get_fs(); | ||
53 | set_fs(KERNEL_DS); | ||
54 | ret = hrtimer_nanosleep_restart(restart); | ||
55 | set_fs(oldfs); | ||
56 | |||
57 | if (ret) { | ||
58 | restart->arg1 = (unsigned long)rmtp; | ||
59 | |||
60 | if (rmtp && put_compat_timespec(&rmt, rmtp)) | ||
61 | return -EFAULT; | ||
62 | } | ||
63 | |||
64 | return ret; | ||
65 | } | ||
66 | |||
43 | asmlinkage long compat_sys_nanosleep(struct compat_timespec __user *rqtp, | 67 | asmlinkage long compat_sys_nanosleep(struct compat_timespec __user *rqtp, |
44 | struct compat_timespec __user *rmtp) | 68 | struct compat_timespec __user *rmtp) |
45 | { | 69 | { |
46 | struct timespec tu, rmt; | 70 | struct timespec tu, rmt; |
71 | mm_segment_t oldfs; | ||
47 | long ret; | 72 | long ret; |
48 | 73 | ||
49 | if (get_compat_timespec(&tu, rqtp)) | 74 | if (get_compat_timespec(&tu, rqtp)) |
@@ -52,11 +77,21 @@ asmlinkage long compat_sys_nanosleep(struct compat_timespec __user *rqtp, | |||
52 | if (!timespec_valid(&tu)) | 77 | if (!timespec_valid(&tu)) |
53 | return -EINVAL; | 78 | return -EINVAL; |
54 | 79 | ||
55 | ret = hrtimer_nanosleep(&tu, rmtp ? &rmt : NULL, HRTIMER_MODE_REL, | 80 | oldfs = get_fs(); |
56 | CLOCK_MONOTONIC); | 81 | set_fs(KERNEL_DS); |
82 | ret = hrtimer_nanosleep(&tu, | ||
83 | rmtp ? (struct timespec __user *)&rmt : NULL, | ||
84 | HRTIMER_MODE_REL, CLOCK_MONOTONIC); | ||
85 | set_fs(oldfs); | ||
86 | |||
87 | if (ret) { | ||
88 | struct restart_block *restart | ||
89 | = ¤t_thread_info()->restart_block; | ||
90 | |||
91 | restart->fn = compat_nanosleep_restart; | ||
92 | restart->arg1 = (unsigned long)rmtp; | ||
57 | 93 | ||
58 | if (ret && rmtp) { | 94 | if (rmtp && put_compat_timespec(&rmt, rmtp)) |
59 | if (put_compat_timespec(&rmt, rmtp)) | ||
60 | return -EFAULT; | 95 | return -EFAULT; |
61 | } | 96 | } |
62 | 97 | ||
diff --git a/kernel/cpu.c b/kernel/cpu.c index e0d3a4f56ecb..2eff3f63abed 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c | |||
@@ -389,7 +389,7 @@ int disable_nonboot_cpus(void) | |||
389 | return error; | 389 | return error; |
390 | } | 390 | } |
391 | 391 | ||
392 | void enable_nonboot_cpus(void) | 392 | void __ref enable_nonboot_cpus(void) |
393 | { | 393 | { |
394 | int cpu, error; | 394 | int cpu, error; |
395 | 395 | ||
diff --git a/kernel/cpuset.c b/kernel/cpuset.c index cfaf6419d817..3e296ed81d4d 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c | |||
@@ -38,7 +38,6 @@ | |||
38 | #include <linux/mount.h> | 38 | #include <linux/mount.h> |
39 | #include <linux/namei.h> | 39 | #include <linux/namei.h> |
40 | #include <linux/pagemap.h> | 40 | #include <linux/pagemap.h> |
41 | #include <linux/prio_heap.h> | ||
42 | #include <linux/proc_fs.h> | 41 | #include <linux/proc_fs.h> |
43 | #include <linux/rcupdate.h> | 42 | #include <linux/rcupdate.h> |
44 | #include <linux/sched.h> | 43 | #include <linux/sched.h> |
@@ -56,6 +55,8 @@ | |||
56 | #include <asm/atomic.h> | 55 | #include <asm/atomic.h> |
57 | #include <linux/mutex.h> | 56 | #include <linux/mutex.h> |
58 | #include <linux/kfifo.h> | 57 | #include <linux/kfifo.h> |
58 | #include <linux/workqueue.h> | ||
59 | #include <linux/cgroup.h> | ||
59 | 60 | ||
60 | /* | 61 | /* |
61 | * Tracks how many cpusets are currently defined in system. | 62 | * Tracks how many cpusets are currently defined in system. |
@@ -64,7 +65,7 @@ | |||
64 | */ | 65 | */ |
65 | int number_of_cpusets __read_mostly; | 66 | int number_of_cpusets __read_mostly; |
66 | 67 | ||
67 | /* Retrieve the cpuset from a cgroup */ | 68 | /* Forward declare cgroup structures */ |
68 | struct cgroup_subsys cpuset_subsys; | 69 | struct cgroup_subsys cpuset_subsys; |
69 | struct cpuset; | 70 | struct cpuset; |
70 | 71 | ||
@@ -96,6 +97,9 @@ struct cpuset { | |||
96 | 97 | ||
97 | /* partition number for rebuild_sched_domains() */ | 98 | /* partition number for rebuild_sched_domains() */ |
98 | int pn; | 99 | int pn; |
100 | |||
101 | /* used for walking a cpuset heirarchy */ | ||
102 | struct list_head stack_list; | ||
99 | }; | 103 | }; |
100 | 104 | ||
101 | /* Retrieve the cpuset for a cgroup */ | 105 | /* Retrieve the cpuset for a cgroup */ |
@@ -111,7 +115,10 @@ static inline struct cpuset *task_cs(struct task_struct *task) | |||
111 | return container_of(task_subsys_state(task, cpuset_subsys_id), | 115 | return container_of(task_subsys_state(task, cpuset_subsys_id), |
112 | struct cpuset, css); | 116 | struct cpuset, css); |
113 | } | 117 | } |
114 | 118 | struct cpuset_hotplug_scanner { | |
119 | struct cgroup_scanner scan; | ||
120 | struct cgroup *to; | ||
121 | }; | ||
115 | 122 | ||
116 | /* bits in struct cpuset flags field */ | 123 | /* bits in struct cpuset flags field */ |
117 | typedef enum { | 124 | typedef enum { |
@@ -160,17 +167,17 @@ static inline int is_spread_slab(const struct cpuset *cs) | |||
160 | * number, and avoid having to lock and reload mems_allowed unless | 167 | * number, and avoid having to lock and reload mems_allowed unless |
161 | * the cpuset they're using changes generation. | 168 | * the cpuset they're using changes generation. |
162 | * | 169 | * |
163 | * A single, global generation is needed because attach_task() could | 170 | * A single, global generation is needed because cpuset_attach_task() could |
164 | * reattach a task to a different cpuset, which must not have its | 171 | * reattach a task to a different cpuset, which must not have its |
165 | * generation numbers aliased with those of that tasks previous cpuset. | 172 | * generation numbers aliased with those of that tasks previous cpuset. |
166 | * | 173 | * |
167 | * Generations are needed for mems_allowed because one task cannot | 174 | * Generations are needed for mems_allowed because one task cannot |
168 | * modify anothers memory placement. So we must enable every task, | 175 | * modify another's memory placement. So we must enable every task, |
169 | * on every visit to __alloc_pages(), to efficiently check whether | 176 | * on every visit to __alloc_pages(), to efficiently check whether |
170 | * its current->cpuset->mems_allowed has changed, requiring an update | 177 | * its current->cpuset->mems_allowed has changed, requiring an update |
171 | * of its current->mems_allowed. | 178 | * of its current->mems_allowed. |
172 | * | 179 | * |
173 | * Since cpuset_mems_generation is guarded by manage_mutex, | 180 | * Since writes to cpuset_mems_generation are guarded by the cgroup lock |
174 | * there is no need to mark it atomic. | 181 | * there is no need to mark it atomic. |
175 | */ | 182 | */ |
176 | static int cpuset_mems_generation; | 183 | static int cpuset_mems_generation; |
@@ -182,17 +189,20 @@ static struct cpuset top_cpuset = { | |||
182 | }; | 189 | }; |
183 | 190 | ||
184 | /* | 191 | /* |
185 | * We have two global cpuset mutexes below. They can nest. | 192 | * There are two global mutexes guarding cpuset structures. The first |
186 | * It is ok to first take manage_mutex, then nest callback_mutex. We also | 193 | * is the main control groups cgroup_mutex, accessed via |
187 | * require taking task_lock() when dereferencing a tasks cpuset pointer. | 194 | * cgroup_lock()/cgroup_unlock(). The second is the cpuset-specific |
188 | * See "The task_lock() exception", at the end of this comment. | 195 | * callback_mutex, below. They can nest. It is ok to first take |
196 | * cgroup_mutex, then nest callback_mutex. We also require taking | ||
197 | * task_lock() when dereferencing a task's cpuset pointer. See "The | ||
198 | * task_lock() exception", at the end of this comment. | ||
189 | * | 199 | * |
190 | * A task must hold both mutexes to modify cpusets. If a task | 200 | * A task must hold both mutexes to modify cpusets. If a task |
191 | * holds manage_mutex, then it blocks others wanting that mutex, | 201 | * holds cgroup_mutex, then it blocks others wanting that mutex, |
192 | * ensuring that it is the only task able to also acquire callback_mutex | 202 | * ensuring that it is the only task able to also acquire callback_mutex |
193 | * and be able to modify cpusets. It can perform various checks on | 203 | * and be able to modify cpusets. It can perform various checks on |
194 | * the cpuset structure first, knowing nothing will change. It can | 204 | * the cpuset structure first, knowing nothing will change. It can |
195 | * also allocate memory while just holding manage_mutex. While it is | 205 | * also allocate memory while just holding cgroup_mutex. While it is |
196 | * performing these checks, various callback routines can briefly | 206 | * performing these checks, various callback routines can briefly |
197 | * acquire callback_mutex to query cpusets. Once it is ready to make | 207 | * acquire callback_mutex to query cpusets. Once it is ready to make |
198 | * the changes, it takes callback_mutex, blocking everyone else. | 208 | * the changes, it takes callback_mutex, blocking everyone else. |
@@ -208,60 +218,16 @@ static struct cpuset top_cpuset = { | |||
208 | * The task_struct fields mems_allowed and mems_generation may only | 218 | * The task_struct fields mems_allowed and mems_generation may only |
209 | * be accessed in the context of that task, so require no locks. | 219 | * be accessed in the context of that task, so require no locks. |
210 | * | 220 | * |
211 | * Any task can increment and decrement the count field without lock. | ||
212 | * So in general, code holding manage_mutex or callback_mutex can't rely | ||
213 | * on the count field not changing. However, if the count goes to | ||
214 | * zero, then only attach_task(), which holds both mutexes, can | ||
215 | * increment it again. Because a count of zero means that no tasks | ||
216 | * are currently attached, therefore there is no way a task attached | ||
217 | * to that cpuset can fork (the other way to increment the count). | ||
218 | * So code holding manage_mutex or callback_mutex can safely assume that | ||
219 | * if the count is zero, it will stay zero. Similarly, if a task | ||
220 | * holds manage_mutex or callback_mutex on a cpuset with zero count, it | ||
221 | * knows that the cpuset won't be removed, as cpuset_rmdir() needs | ||
222 | * both of those mutexes. | ||
223 | * | ||
224 | * The cpuset_common_file_write handler for operations that modify | 221 | * The cpuset_common_file_write handler for operations that modify |
225 | * the cpuset hierarchy holds manage_mutex across the entire operation, | 222 | * the cpuset hierarchy holds cgroup_mutex across the entire operation, |
226 | * single threading all such cpuset modifications across the system. | 223 | * single threading all such cpuset modifications across the system. |
227 | * | 224 | * |
228 | * The cpuset_common_file_read() handlers only hold callback_mutex across | 225 | * The cpuset_common_file_read() handlers only hold callback_mutex across |
229 | * small pieces of code, such as when reading out possibly multi-word | 226 | * small pieces of code, such as when reading out possibly multi-word |
230 | * cpumasks and nodemasks. | 227 | * cpumasks and nodemasks. |
231 | * | 228 | * |
232 | * The fork and exit callbacks cpuset_fork() and cpuset_exit(), don't | 229 | * Accessing a task's cpuset should be done in accordance with the |
233 | * (usually) take either mutex. These are the two most performance | 230 | * guidelines for accessing subsystem state in kernel/cgroup.c |
234 | * critical pieces of code here. The exception occurs on cpuset_exit(), | ||
235 | * when a task in a notify_on_release cpuset exits. Then manage_mutex | ||
236 | * is taken, and if the cpuset count is zero, a usermode call made | ||
237 | * to /sbin/cpuset_release_agent with the name of the cpuset (path | ||
238 | * relative to the root of cpuset file system) as the argument. | ||
239 | * | ||
240 | * A cpuset can only be deleted if both its 'count' of using tasks | ||
241 | * is zero, and its list of 'children' cpusets is empty. Since all | ||
242 | * tasks in the system use _some_ cpuset, and since there is always at | ||
243 | * least one task in the system (init), therefore, top_cpuset | ||
244 | * always has either children cpusets and/or using tasks. So we don't | ||
245 | * need a special hack to ensure that top_cpuset cannot be deleted. | ||
246 | * | ||
247 | * The above "Tale of Two Semaphores" would be complete, but for: | ||
248 | * | ||
249 | * The task_lock() exception | ||
250 | * | ||
251 | * The need for this exception arises from the action of attach_task(), | ||
252 | * which overwrites one tasks cpuset pointer with another. It does | ||
253 | * so using both mutexes, however there are several performance | ||
254 | * critical places that need to reference task->cpuset without the | ||
255 | * expense of grabbing a system global mutex. Therefore except as | ||
256 | * noted below, when dereferencing or, as in attach_task(), modifying | ||
257 | * a tasks cpuset pointer we use task_lock(), which acts on a spinlock | ||
258 | * (task->alloc_lock) already in the task_struct routinely used for | ||
259 | * such matters. | ||
260 | * | ||
261 | * P.S. One more locking exception. RCU is used to guard the | ||
262 | * update of a tasks cpuset pointer by attach_task() and the | ||
263 | * access of task->cpuset->mems_generation via that pointer in | ||
264 | * the routine cpuset_update_task_memory_state(). | ||
265 | */ | 231 | */ |
266 | 232 | ||
267 | static DEFINE_MUTEX(callback_mutex); | 233 | static DEFINE_MUTEX(callback_mutex); |
@@ -354,15 +320,14 @@ static void guarantee_online_mems(const struct cpuset *cs, nodemask_t *pmask) | |||
354 | * Do not call this routine if in_interrupt(). | 320 | * Do not call this routine if in_interrupt(). |
355 | * | 321 | * |
356 | * Call without callback_mutex or task_lock() held. May be | 322 | * Call without callback_mutex or task_lock() held. May be |
357 | * called with or without manage_mutex held. Thanks in part to | 323 | * called with or without cgroup_mutex held. Thanks in part to |
358 | * 'the_top_cpuset_hack', the tasks cpuset pointer will never | 324 | * 'the_top_cpuset_hack', the task's cpuset pointer will never |
359 | * be NULL. This routine also might acquire callback_mutex and | 325 | * be NULL. This routine also might acquire callback_mutex and |
360 | * current->mm->mmap_sem during call. | 326 | * current->mm->mmap_sem during call. |
361 | * | 327 | * |
362 | * Reading current->cpuset->mems_generation doesn't need task_lock | 328 | * Reading current->cpuset->mems_generation doesn't need task_lock |
363 | * to guard the current->cpuset derefence, because it is guarded | 329 | * to guard the current->cpuset derefence, because it is guarded |
364 | * from concurrent freeing of current->cpuset by attach_task(), | 330 | * from concurrent freeing of current->cpuset using RCU. |
365 | * using RCU. | ||
366 | * | 331 | * |
367 | * The rcu_dereference() is technically probably not needed, | 332 | * The rcu_dereference() is technically probably not needed, |
368 | * as I don't actually mind if I see a new cpuset pointer but | 333 | * as I don't actually mind if I see a new cpuset pointer but |
@@ -424,7 +389,7 @@ void cpuset_update_task_memory_state(void) | |||
424 | * | 389 | * |
425 | * One cpuset is a subset of another if all its allowed CPUs and | 390 | * One cpuset is a subset of another if all its allowed CPUs and |
426 | * Memory Nodes are a subset of the other, and its exclusive flags | 391 | * Memory Nodes are a subset of the other, and its exclusive flags |
427 | * are only set if the other's are set. Call holding manage_mutex. | 392 | * are only set if the other's are set. Call holding cgroup_mutex. |
428 | */ | 393 | */ |
429 | 394 | ||
430 | static int is_cpuset_subset(const struct cpuset *p, const struct cpuset *q) | 395 | static int is_cpuset_subset(const struct cpuset *p, const struct cpuset *q) |
@@ -442,7 +407,7 @@ static int is_cpuset_subset(const struct cpuset *p, const struct cpuset *q) | |||
442 | * If we replaced the flag and mask values of the current cpuset | 407 | * If we replaced the flag and mask values of the current cpuset |
443 | * (cur) with those values in the trial cpuset (trial), would | 408 | * (cur) with those values in the trial cpuset (trial), would |
444 | * our various subset and exclusive rules still be valid? Presumes | 409 | * our various subset and exclusive rules still be valid? Presumes |
445 | * manage_mutex held. | 410 | * cgroup_mutex held. |
446 | * | 411 | * |
447 | * 'cur' is the address of an actual, in-use cpuset. Operations | 412 | * 'cur' is the address of an actual, in-use cpuset. Operations |
448 | * such as list traversal that depend on the actual address of the | 413 | * such as list traversal that depend on the actual address of the |
@@ -476,7 +441,10 @@ static int validate_change(const struct cpuset *cur, const struct cpuset *trial) | |||
476 | if (!is_cpuset_subset(trial, par)) | 441 | if (!is_cpuset_subset(trial, par)) |
477 | return -EACCES; | 442 | return -EACCES; |
478 | 443 | ||
479 | /* If either I or some sibling (!= me) is exclusive, we can't overlap */ | 444 | /* |
445 | * If either I or some sibling (!= me) is exclusive, we can't | ||
446 | * overlap | ||
447 | */ | ||
480 | list_for_each_entry(cont, &par->css.cgroup->children, sibling) { | 448 | list_for_each_entry(cont, &par->css.cgroup->children, sibling) { |
481 | c = cgroup_cs(cont); | 449 | c = cgroup_cs(cont); |
482 | if ((is_cpu_exclusive(trial) || is_cpu_exclusive(c)) && | 450 | if ((is_cpu_exclusive(trial) || is_cpu_exclusive(c)) && |
@@ -732,22 +700,50 @@ static inline int started_after(void *p1, void *p2) | |||
732 | return started_after_time(t1, &t2->start_time, t2); | 700 | return started_after_time(t1, &t2->start_time, t2); |
733 | } | 701 | } |
734 | 702 | ||
735 | /* | 703 | /** |
736 | * Call with manage_mutex held. May take callback_mutex during call. | 704 | * cpuset_test_cpumask - test a task's cpus_allowed versus its cpuset's |
705 | * @tsk: task to test | ||
706 | * @scan: struct cgroup_scanner contained in its struct cpuset_hotplug_scanner | ||
707 | * | ||
708 | * Call with cgroup_mutex held. May take callback_mutex during call. | ||
709 | * Called for each task in a cgroup by cgroup_scan_tasks(). | ||
710 | * Return nonzero if this tasks's cpus_allowed mask should be changed (in other | ||
711 | * words, if its mask is not equal to its cpuset's mask). | ||
712 | */ | ||
713 | int cpuset_test_cpumask(struct task_struct *tsk, struct cgroup_scanner *scan) | ||
714 | { | ||
715 | return !cpus_equal(tsk->cpus_allowed, | ||
716 | (cgroup_cs(scan->cg))->cpus_allowed); | ||
717 | } | ||
718 | |||
719 | /** | ||
720 | * cpuset_change_cpumask - make a task's cpus_allowed the same as its cpuset's | ||
721 | * @tsk: task to test | ||
722 | * @scan: struct cgroup_scanner containing the cgroup of the task | ||
723 | * | ||
724 | * Called by cgroup_scan_tasks() for each task in a cgroup whose | ||
725 | * cpus_allowed mask needs to be changed. | ||
726 | * | ||
727 | * We don't need to re-check for the cgroup/cpuset membership, since we're | ||
728 | * holding cgroup_lock() at this point. | ||
737 | */ | 729 | */ |
730 | void cpuset_change_cpumask(struct task_struct *tsk, struct cgroup_scanner *scan) | ||
731 | { | ||
732 | set_cpus_allowed(tsk, (cgroup_cs(scan->cg))->cpus_allowed); | ||
733 | } | ||
738 | 734 | ||
735 | /** | ||
736 | * update_cpumask - update the cpus_allowed mask of a cpuset and all tasks in it | ||
737 | * @cs: the cpuset to consider | ||
738 | * @buf: buffer of cpu numbers written to this cpuset | ||
739 | */ | ||
739 | static int update_cpumask(struct cpuset *cs, char *buf) | 740 | static int update_cpumask(struct cpuset *cs, char *buf) |
740 | { | 741 | { |
741 | struct cpuset trialcs; | 742 | struct cpuset trialcs; |
742 | int retval, i; | 743 | struct cgroup_scanner scan; |
743 | int is_load_balanced; | ||
744 | struct cgroup_iter it; | ||
745 | struct cgroup *cgrp = cs->css.cgroup; | ||
746 | struct task_struct *p, *dropped; | ||
747 | /* Never dereference latest_task, since it's not refcounted */ | ||
748 | struct task_struct *latest_task = NULL; | ||
749 | struct ptr_heap heap; | 744 | struct ptr_heap heap; |
750 | struct timespec latest_time = { 0, 0 }; | 745 | int retval; |
746 | int is_load_balanced; | ||
751 | 747 | ||
752 | /* top_cpuset.cpus_allowed tracks cpu_online_map; it's read-only */ | 748 | /* top_cpuset.cpus_allowed tracks cpu_online_map; it's read-only */ |
753 | if (cs == &top_cpuset) | 749 | if (cs == &top_cpuset) |
@@ -756,7 +752,7 @@ static int update_cpumask(struct cpuset *cs, char *buf) | |||
756 | trialcs = *cs; | 752 | trialcs = *cs; |
757 | 753 | ||
758 | /* | 754 | /* |
759 | * An empty cpus_allowed is ok iff there are no tasks in the cpuset. | 755 | * An empty cpus_allowed is ok only if the cpuset has no tasks. |
760 | * Since cpulist_parse() fails on an empty mask, we special case | 756 | * Since cpulist_parse() fails on an empty mask, we special case |
761 | * that parsing. The validate_change() call ensures that cpusets | 757 | * that parsing. The validate_change() call ensures that cpusets |
762 | * with tasks have cpus. | 758 | * with tasks have cpus. |
@@ -777,6 +773,7 @@ static int update_cpumask(struct cpuset *cs, char *buf) | |||
777 | /* Nothing to do if the cpus didn't change */ | 773 | /* Nothing to do if the cpus didn't change */ |
778 | if (cpus_equal(cs->cpus_allowed, trialcs.cpus_allowed)) | 774 | if (cpus_equal(cs->cpus_allowed, trialcs.cpus_allowed)) |
779 | return 0; | 775 | return 0; |
776 | |||
780 | retval = heap_init(&heap, PAGE_SIZE, GFP_KERNEL, &started_after); | 777 | retval = heap_init(&heap, PAGE_SIZE, GFP_KERNEL, &started_after); |
781 | if (retval) | 778 | if (retval) |
782 | return retval; | 779 | return retval; |
@@ -787,62 +784,19 @@ static int update_cpumask(struct cpuset *cs, char *buf) | |||
787 | cs->cpus_allowed = trialcs.cpus_allowed; | 784 | cs->cpus_allowed = trialcs.cpus_allowed; |
788 | mutex_unlock(&callback_mutex); | 785 | mutex_unlock(&callback_mutex); |
789 | 786 | ||
790 | again: | ||
791 | /* | 787 | /* |
792 | * Scan tasks in the cpuset, and update the cpumasks of any | 788 | * Scan tasks in the cpuset, and update the cpumasks of any |
793 | * that need an update. Since we can't call set_cpus_allowed() | 789 | * that need an update. |
794 | * while holding tasklist_lock, gather tasks to be processed | ||
795 | * in a heap structure. If the statically-sized heap fills up, | ||
796 | * overflow tasks that started later, and in future iterations | ||
797 | * only consider tasks that started after the latest task in | ||
798 | * the previous pass. This guarantees forward progress and | ||
799 | * that we don't miss any tasks | ||
800 | */ | 790 | */ |
801 | heap.size = 0; | 791 | scan.cg = cs->css.cgroup; |
802 | cgroup_iter_start(cgrp, &it); | 792 | scan.test_task = cpuset_test_cpumask; |
803 | while ((p = cgroup_iter_next(cgrp, &it))) { | 793 | scan.process_task = cpuset_change_cpumask; |
804 | /* Only affect tasks that don't have the right cpus_allowed */ | 794 | scan.heap = &heap; |
805 | if (cpus_equal(p->cpus_allowed, cs->cpus_allowed)) | 795 | cgroup_scan_tasks(&scan); |
806 | continue; | ||
807 | /* | ||
808 | * Only process tasks that started after the last task | ||
809 | * we processed | ||
810 | */ | ||
811 | if (!started_after_time(p, &latest_time, latest_task)) | ||
812 | continue; | ||
813 | dropped = heap_insert(&heap, p); | ||
814 | if (dropped == NULL) { | ||
815 | get_task_struct(p); | ||
816 | } else if (dropped != p) { | ||
817 | get_task_struct(p); | ||
818 | put_task_struct(dropped); | ||
819 | } | ||
820 | } | ||
821 | cgroup_iter_end(cgrp, &it); | ||
822 | if (heap.size) { | ||
823 | for (i = 0; i < heap.size; i++) { | ||
824 | struct task_struct *p = heap.ptrs[i]; | ||
825 | if (i == 0) { | ||
826 | latest_time = p->start_time; | ||
827 | latest_task = p; | ||
828 | } | ||
829 | set_cpus_allowed(p, cs->cpus_allowed); | ||
830 | put_task_struct(p); | ||
831 | } | ||
832 | /* | ||
833 | * If we had to process any tasks at all, scan again | ||
834 | * in case some of them were in the middle of forking | ||
835 | * children that didn't notice the new cpumask | ||
836 | * restriction. Not the most efficient way to do it, | ||
837 | * but it avoids having to take callback_mutex in the | ||
838 | * fork path | ||
839 | */ | ||
840 | goto again; | ||
841 | } | ||
842 | heap_free(&heap); | 796 | heap_free(&heap); |
797 | |||
843 | if (is_load_balanced) | 798 | if (is_load_balanced) |
844 | rebuild_sched_domains(); | 799 | rebuild_sched_domains(); |
845 | |||
846 | return 0; | 800 | return 0; |
847 | } | 801 | } |
848 | 802 | ||
@@ -854,11 +808,11 @@ static int update_cpumask(struct cpuset *cs, char *buf) | |||
854 | * Temporarilly set tasks mems_allowed to target nodes of migration, | 808 | * Temporarilly set tasks mems_allowed to target nodes of migration, |
855 | * so that the migration code can allocate pages on these nodes. | 809 | * so that the migration code can allocate pages on these nodes. |
856 | * | 810 | * |
857 | * Call holding manage_mutex, so our current->cpuset won't change | 811 | * Call holding cgroup_mutex, so current's cpuset won't change |
858 | * during this call, as manage_mutex holds off any attach_task() | 812 | * during this call, as manage_mutex holds off any cpuset_attach() |
859 | * calls. Therefore we don't need to take task_lock around the | 813 | * calls. Therefore we don't need to take task_lock around the |
860 | * call to guarantee_online_mems(), as we know no one is changing | 814 | * call to guarantee_online_mems(), as we know no one is changing |
861 | * our tasks cpuset. | 815 | * our task's cpuset. |
862 | * | 816 | * |
863 | * Hold callback_mutex around the two modifications of our tasks | 817 | * Hold callback_mutex around the two modifications of our tasks |
864 | * mems_allowed to synchronize with cpuset_mems_allowed(). | 818 | * mems_allowed to synchronize with cpuset_mems_allowed(). |
@@ -903,7 +857,7 @@ static void cpuset_migrate_mm(struct mm_struct *mm, const nodemask_t *from, | |||
903 | * the cpuset is marked 'memory_migrate', migrate the tasks | 857 | * the cpuset is marked 'memory_migrate', migrate the tasks |
904 | * pages to the new memory. | 858 | * pages to the new memory. |
905 | * | 859 | * |
906 | * Call with manage_mutex held. May take callback_mutex during call. | 860 | * Call with cgroup_mutex held. May take callback_mutex during call. |
907 | * Will take tasklist_lock, scan tasklist for tasks in cpuset cs, | 861 | * Will take tasklist_lock, scan tasklist for tasks in cpuset cs, |
908 | * lock each such tasks mm->mmap_sem, scan its vma's and rebind | 862 | * lock each such tasks mm->mmap_sem, scan its vma's and rebind |
909 | * their mempolicies to the cpusets new mems_allowed. | 863 | * their mempolicies to the cpusets new mems_allowed. |
@@ -1016,7 +970,7 @@ static int update_nodemask(struct cpuset *cs, char *buf) | |||
1016 | * tasklist_lock. Forks can happen again now - the mpol_copy() | 970 | * tasklist_lock. Forks can happen again now - the mpol_copy() |
1017 | * cpuset_being_rebound check will catch such forks, and rebind | 971 | * cpuset_being_rebound check will catch such forks, and rebind |
1018 | * their vma mempolicies too. Because we still hold the global | 972 | * their vma mempolicies too. Because we still hold the global |
1019 | * cpuset manage_mutex, we know that no other rebind effort will | 973 | * cgroup_mutex, we know that no other rebind effort will |
1020 | * be contending for the global variable cpuset_being_rebound. | 974 | * be contending for the global variable cpuset_being_rebound. |
1021 | * It's ok if we rebind the same mm twice; mpol_rebind_mm() | 975 | * It's ok if we rebind the same mm twice; mpol_rebind_mm() |
1022 | * is idempotent. Also migrate pages in each mm to new nodes. | 976 | * is idempotent. Also migrate pages in each mm to new nodes. |
@@ -1031,7 +985,7 @@ static int update_nodemask(struct cpuset *cs, char *buf) | |||
1031 | mmput(mm); | 985 | mmput(mm); |
1032 | } | 986 | } |
1033 | 987 | ||
1034 | /* We're done rebinding vma's to this cpusets new mems_allowed. */ | 988 | /* We're done rebinding vmas to this cpuset's new mems_allowed. */ |
1035 | kfree(mmarray); | 989 | kfree(mmarray); |
1036 | cpuset_being_rebound = NULL; | 990 | cpuset_being_rebound = NULL; |
1037 | retval = 0; | 991 | retval = 0; |
@@ -1045,7 +999,7 @@ int current_cpuset_is_being_rebound(void) | |||
1045 | } | 999 | } |
1046 | 1000 | ||
1047 | /* | 1001 | /* |
1048 | * Call with manage_mutex held. | 1002 | * Call with cgroup_mutex held. |
1049 | */ | 1003 | */ |
1050 | 1004 | ||
1051 | static int update_memory_pressure_enabled(struct cpuset *cs, char *buf) | 1005 | static int update_memory_pressure_enabled(struct cpuset *cs, char *buf) |
@@ -1066,7 +1020,7 @@ static int update_memory_pressure_enabled(struct cpuset *cs, char *buf) | |||
1066 | * cs: the cpuset to update | 1020 | * cs: the cpuset to update |
1067 | * buf: the buffer where we read the 0 or 1 | 1021 | * buf: the buffer where we read the 0 or 1 |
1068 | * | 1022 | * |
1069 | * Call with manage_mutex held. | 1023 | * Call with cgroup_mutex held. |
1070 | */ | 1024 | */ |
1071 | 1025 | ||
1072 | static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs, char *buf) | 1026 | static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs, char *buf) |
@@ -1200,6 +1154,7 @@ static int fmeter_getrate(struct fmeter *fmp) | |||
1200 | return val; | 1154 | return val; |
1201 | } | 1155 | } |
1202 | 1156 | ||
1157 | /* Called by cgroups to determine if a cpuset is usable; cgroup_mutex held */ | ||
1203 | static int cpuset_can_attach(struct cgroup_subsys *ss, | 1158 | static int cpuset_can_attach(struct cgroup_subsys *ss, |
1204 | struct cgroup *cont, struct task_struct *tsk) | 1159 | struct cgroup *cont, struct task_struct *tsk) |
1205 | { | 1160 | { |
@@ -1547,7 +1502,8 @@ static int cpuset_populate(struct cgroup_subsys *ss, struct cgroup *cont) | |||
1547 | * If this becomes a problem for some users who wish to | 1502 | * If this becomes a problem for some users who wish to |
1548 | * allow that scenario, then cpuset_post_clone() could be | 1503 | * allow that scenario, then cpuset_post_clone() could be |
1549 | * changed to grant parent->cpus_allowed-sibling_cpus_exclusive | 1504 | * changed to grant parent->cpus_allowed-sibling_cpus_exclusive |
1550 | * (and likewise for mems) to the new cgroup. | 1505 | * (and likewise for mems) to the new cgroup. Called with cgroup_mutex |
1506 | * held. | ||
1551 | */ | 1507 | */ |
1552 | static void cpuset_post_clone(struct cgroup_subsys *ss, | 1508 | static void cpuset_post_clone(struct cgroup_subsys *ss, |
1553 | struct cgroup *cgroup) | 1509 | struct cgroup *cgroup) |
@@ -1571,11 +1527,8 @@ static void cpuset_post_clone(struct cgroup_subsys *ss, | |||
1571 | 1527 | ||
1572 | /* | 1528 | /* |
1573 | * cpuset_create - create a cpuset | 1529 | * cpuset_create - create a cpuset |
1574 | * parent: cpuset that will be parent of the new cpuset. | 1530 | * ss: cpuset cgroup subsystem |
1575 | * name: name of the new cpuset. Will be strcpy'ed. | 1531 | * cont: control group that the new cpuset will be part of |
1576 | * mode: mode to set on new inode | ||
1577 | * | ||
1578 | * Must be called with the mutex on the parent inode held | ||
1579 | */ | 1532 | */ |
1580 | 1533 | ||
1581 | static struct cgroup_subsys_state *cpuset_create( | 1534 | static struct cgroup_subsys_state *cpuset_create( |
@@ -1687,53 +1640,140 @@ int __init cpuset_init(void) | |||
1687 | return 0; | 1640 | return 0; |
1688 | } | 1641 | } |
1689 | 1642 | ||
1643 | /** | ||
1644 | * cpuset_do_move_task - move a given task to another cpuset | ||
1645 | * @tsk: pointer to task_struct the task to move | ||
1646 | * @scan: struct cgroup_scanner contained in its struct cpuset_hotplug_scanner | ||
1647 | * | ||
1648 | * Called by cgroup_scan_tasks() for each task in a cgroup. | ||
1649 | * Return nonzero to stop the walk through the tasks. | ||
1650 | */ | ||
1651 | void cpuset_do_move_task(struct task_struct *tsk, struct cgroup_scanner *scan) | ||
1652 | { | ||
1653 | struct cpuset_hotplug_scanner *chsp; | ||
1654 | |||
1655 | chsp = container_of(scan, struct cpuset_hotplug_scanner, scan); | ||
1656 | cgroup_attach_task(chsp->to, tsk); | ||
1657 | } | ||
1658 | |||
1659 | /** | ||
1660 | * move_member_tasks_to_cpuset - move tasks from one cpuset to another | ||
1661 | * @from: cpuset in which the tasks currently reside | ||
1662 | * @to: cpuset to which the tasks will be moved | ||
1663 | * | ||
1664 | * Called with cgroup_mutex held | ||
1665 | * callback_mutex must not be held, as cpuset_attach() will take it. | ||
1666 | * | ||
1667 | * The cgroup_scan_tasks() function will scan all the tasks in a cgroup, | ||
1668 | * calling callback functions for each. | ||
1669 | */ | ||
1670 | static void move_member_tasks_to_cpuset(struct cpuset *from, struct cpuset *to) | ||
1671 | { | ||
1672 | struct cpuset_hotplug_scanner scan; | ||
1673 | |||
1674 | scan.scan.cg = from->css.cgroup; | ||
1675 | scan.scan.test_task = NULL; /* select all tasks in cgroup */ | ||
1676 | scan.scan.process_task = cpuset_do_move_task; | ||
1677 | scan.scan.heap = NULL; | ||
1678 | scan.to = to->css.cgroup; | ||
1679 | |||
1680 | if (cgroup_scan_tasks((struct cgroup_scanner *)&scan)) | ||
1681 | printk(KERN_ERR "move_member_tasks_to_cpuset: " | ||
1682 | "cgroup_scan_tasks failed\n"); | ||
1683 | } | ||
1684 | |||
1690 | /* | 1685 | /* |
1691 | * If common_cpu_mem_hotplug_unplug(), below, unplugs any CPUs | 1686 | * If common_cpu_mem_hotplug_unplug(), below, unplugs any CPUs |
1692 | * or memory nodes, we need to walk over the cpuset hierarchy, | 1687 | * or memory nodes, we need to walk over the cpuset hierarchy, |
1693 | * removing that CPU or node from all cpusets. If this removes the | 1688 | * removing that CPU or node from all cpusets. If this removes the |
1694 | * last CPU or node from a cpuset, then the guarantee_online_cpus() | 1689 | * last CPU or node from a cpuset, then move the tasks in the empty |
1695 | * or guarantee_online_mems() code will use that emptied cpusets | 1690 | * cpuset to its next-highest non-empty parent. |
1696 | * parent online CPUs or nodes. Cpusets that were already empty of | 1691 | * |
1697 | * CPUs or nodes are left empty. | 1692 | * Called with cgroup_mutex held |
1693 | * callback_mutex must not be held, as cpuset_attach() will take it. | ||
1694 | */ | ||
1695 | static void remove_tasks_in_empty_cpuset(struct cpuset *cs) | ||
1696 | { | ||
1697 | struct cpuset *parent; | ||
1698 | |||
1699 | /* | ||
1700 | * The cgroup's css_sets list is in use if there are tasks | ||
1701 | * in the cpuset; the list is empty if there are none; | ||
1702 | * the cs->css.refcnt seems always 0. | ||
1703 | */ | ||
1704 | if (list_empty(&cs->css.cgroup->css_sets)) | ||
1705 | return; | ||
1706 | |||
1707 | /* | ||
1708 | * Find its next-highest non-empty parent, (top cpuset | ||
1709 | * has online cpus, so can't be empty). | ||
1710 | */ | ||
1711 | parent = cs->parent; | ||
1712 | while (cpus_empty(parent->cpus_allowed) || | ||
1713 | nodes_empty(parent->mems_allowed)) | ||
1714 | parent = parent->parent; | ||
1715 | |||
1716 | move_member_tasks_to_cpuset(cs, parent); | ||
1717 | } | ||
1718 | |||
1719 | /* | ||
1720 | * Walk the specified cpuset subtree and look for empty cpusets. | ||
1721 | * The tasks of such cpuset must be moved to a parent cpuset. | ||
1698 | * | 1722 | * |
1699 | * This routine is intentionally inefficient in a couple of regards. | 1723 | * Called with cgroup_mutex held. We take callback_mutex to modify |
1700 | * It will check all cpusets in a subtree even if the top cpuset of | 1724 | * cpus_allowed and mems_allowed. |
1701 | * the subtree has no offline CPUs or nodes. It checks both CPUs and | ||
1702 | * nodes, even though the caller could have been coded to know that | ||
1703 | * only one of CPUs or nodes needed to be checked on a given call. | ||
1704 | * This was done to minimize text size rather than cpu cycles. | ||
1705 | * | 1725 | * |
1706 | * Call with both manage_mutex and callback_mutex held. | 1726 | * This walk processes the tree from top to bottom, completing one layer |
1727 | * before dropping down to the next. It always processes a node before | ||
1728 | * any of its children. | ||
1707 | * | 1729 | * |
1708 | * Recursive, on depth of cpuset subtree. | 1730 | * For now, since we lack memory hot unplug, we'll never see a cpuset |
1731 | * that has tasks along with an empty 'mems'. But if we did see such | ||
1732 | * a cpuset, we'd handle it just like we do if its 'cpus' was empty. | ||
1709 | */ | 1733 | */ |
1710 | 1734 | static void scan_for_empty_cpusets(const struct cpuset *root) | |
1711 | static void guarantee_online_cpus_mems_in_subtree(const struct cpuset *cur) | ||
1712 | { | 1735 | { |
1736 | struct cpuset *cp; /* scans cpusets being updated */ | ||
1737 | struct cpuset *child; /* scans child cpusets of cp */ | ||
1738 | struct list_head queue; | ||
1713 | struct cgroup *cont; | 1739 | struct cgroup *cont; |
1714 | struct cpuset *c; | ||
1715 | 1740 | ||
1716 | /* Each of our child cpusets mems must be online */ | 1741 | INIT_LIST_HEAD(&queue); |
1717 | list_for_each_entry(cont, &cur->css.cgroup->children, sibling) { | 1742 | |
1718 | c = cgroup_cs(cont); | 1743 | list_add_tail((struct list_head *)&root->stack_list, &queue); |
1719 | guarantee_online_cpus_mems_in_subtree(c); | 1744 | |
1720 | if (!cpus_empty(c->cpus_allowed)) | 1745 | while (!list_empty(&queue)) { |
1721 | guarantee_online_cpus(c, &c->cpus_allowed); | 1746 | cp = container_of(queue.next, struct cpuset, stack_list); |
1722 | if (!nodes_empty(c->mems_allowed)) | 1747 | list_del(queue.next); |
1723 | guarantee_online_mems(c, &c->mems_allowed); | 1748 | list_for_each_entry(cont, &cp->css.cgroup->children, sibling) { |
1749 | child = cgroup_cs(cont); | ||
1750 | list_add_tail(&child->stack_list, &queue); | ||
1751 | } | ||
1752 | cont = cp->css.cgroup; | ||
1753 | |||
1754 | /* Continue past cpusets with all cpus, mems online */ | ||
1755 | if (cpus_subset(cp->cpus_allowed, cpu_online_map) && | ||
1756 | nodes_subset(cp->mems_allowed, node_states[N_HIGH_MEMORY])) | ||
1757 | continue; | ||
1758 | |||
1759 | /* Remove offline cpus and mems from this cpuset. */ | ||
1760 | mutex_lock(&callback_mutex); | ||
1761 | cpus_and(cp->cpus_allowed, cp->cpus_allowed, cpu_online_map); | ||
1762 | nodes_and(cp->mems_allowed, cp->mems_allowed, | ||
1763 | node_states[N_HIGH_MEMORY]); | ||
1764 | mutex_unlock(&callback_mutex); | ||
1765 | |||
1766 | /* Move tasks from the empty cpuset to a parent */ | ||
1767 | if (cpus_empty(cp->cpus_allowed) || | ||
1768 | nodes_empty(cp->mems_allowed)) | ||
1769 | remove_tasks_in_empty_cpuset(cp); | ||
1724 | } | 1770 | } |
1725 | } | 1771 | } |
1726 | 1772 | ||
1727 | /* | 1773 | /* |
1728 | * The cpus_allowed and mems_allowed nodemasks in the top_cpuset track | 1774 | * The cpus_allowed and mems_allowed nodemasks in the top_cpuset track |
1729 | * cpu_online_map and node_states[N_HIGH_MEMORY]. Force the top cpuset to | 1775 | * cpu_online_map and node_states[N_HIGH_MEMORY]. Force the top cpuset to |
1730 | * track what's online after any CPU or memory node hotplug or unplug | 1776 | * track what's online after any CPU or memory node hotplug or unplug event. |
1731 | * event. | ||
1732 | * | ||
1733 | * To ensure that we don't remove a CPU or node from the top cpuset | ||
1734 | * that is currently in use by a child cpuset (which would violate | ||
1735 | * the rule that cpusets must be subsets of their parent), we first | ||
1736 | * call the recursive routine guarantee_online_cpus_mems_in_subtree(). | ||
1737 | * | 1777 | * |
1738 | * Since there are two callers of this routine, one for CPU hotplug | 1778 | * Since there are two callers of this routine, one for CPU hotplug |
1739 | * events and one for memory node hotplug events, we could have coded | 1779 | * events and one for memory node hotplug events, we could have coded |
@@ -1744,13 +1784,11 @@ static void guarantee_online_cpus_mems_in_subtree(const struct cpuset *cur) | |||
1744 | static void common_cpu_mem_hotplug_unplug(void) | 1784 | static void common_cpu_mem_hotplug_unplug(void) |
1745 | { | 1785 | { |
1746 | cgroup_lock(); | 1786 | cgroup_lock(); |
1747 | mutex_lock(&callback_mutex); | ||
1748 | 1787 | ||
1749 | guarantee_online_cpus_mems_in_subtree(&top_cpuset); | ||
1750 | top_cpuset.cpus_allowed = cpu_online_map; | 1788 | top_cpuset.cpus_allowed = cpu_online_map; |
1751 | top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY]; | 1789 | top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY]; |
1790 | scan_for_empty_cpusets(&top_cpuset); | ||
1752 | 1791 | ||
1753 | mutex_unlock(&callback_mutex); | ||
1754 | cgroup_unlock(); | 1792 | cgroup_unlock(); |
1755 | } | 1793 | } |
1756 | 1794 | ||
@@ -1826,7 +1864,7 @@ cpumask_t cpuset_cpus_allowed(struct task_struct *tsk) | |||
1826 | 1864 | ||
1827 | /** | 1865 | /** |
1828 | * cpuset_cpus_allowed_locked - return cpus_allowed mask from a tasks cpuset. | 1866 | * cpuset_cpus_allowed_locked - return cpus_allowed mask from a tasks cpuset. |
1829 | * Must be called with callback_mutex held. | 1867 | * Must be called with callback_mutex held. |
1830 | **/ | 1868 | **/ |
1831 | cpumask_t cpuset_cpus_allowed_locked(struct task_struct *tsk) | 1869 | cpumask_t cpuset_cpus_allowed_locked(struct task_struct *tsk) |
1832 | { | 1870 | { |
@@ -2163,10 +2201,8 @@ void __cpuset_memory_pressure_bump(void) | |||
2163 | * - Used for /proc/<pid>/cpuset. | 2201 | * - Used for /proc/<pid>/cpuset. |
2164 | * - No need to task_lock(tsk) on this tsk->cpuset reference, as it | 2202 | * - No need to task_lock(tsk) on this tsk->cpuset reference, as it |
2165 | * doesn't really matter if tsk->cpuset changes after we read it, | 2203 | * doesn't really matter if tsk->cpuset changes after we read it, |
2166 | * and we take manage_mutex, keeping attach_task() from changing it | 2204 | * and we take cgroup_mutex, keeping cpuset_attach() from changing it |
2167 | * anyway. No need to check that tsk->cpuset != NULL, thanks to | 2205 | * anyway. |
2168 | * the_top_cpuset_hack in cpuset_exit(), which sets an exiting tasks | ||
2169 | * cpuset to top_cpuset. | ||
2170 | */ | 2206 | */ |
2171 | static int proc_cpuset_show(struct seq_file *m, void *unused_v) | 2207 | static int proc_cpuset_show(struct seq_file *m, void *unused_v) |
2172 | { | 2208 | { |
@@ -2219,13 +2255,14 @@ const struct file_operations proc_cpuset_operations = { | |||
2219 | #endif /* CONFIG_PROC_PID_CPUSET */ | 2255 | #endif /* CONFIG_PROC_PID_CPUSET */ |
2220 | 2256 | ||
2221 | /* Display task cpus_allowed, mems_allowed in /proc/<pid>/status file. */ | 2257 | /* Display task cpus_allowed, mems_allowed in /proc/<pid>/status file. */ |
2222 | char *cpuset_task_status_allowed(struct task_struct *task, char *buffer) | 2258 | void cpuset_task_status_allowed(struct seq_file *m, struct task_struct *task) |
2223 | { | 2259 | { |
2224 | buffer += sprintf(buffer, "Cpus_allowed:\t"); | 2260 | seq_printf(m, "Cpus_allowed:\t"); |
2225 | buffer += cpumask_scnprintf(buffer, PAGE_SIZE, task->cpus_allowed); | 2261 | m->count += cpumask_scnprintf(m->buf + m->count, m->size - m->count, |
2226 | buffer += sprintf(buffer, "\n"); | 2262 | task->cpus_allowed); |
2227 | buffer += sprintf(buffer, "Mems_allowed:\t"); | 2263 | seq_printf(m, "\n"); |
2228 | buffer += nodemask_scnprintf(buffer, PAGE_SIZE, task->mems_allowed); | 2264 | seq_printf(m, "Mems_allowed:\t"); |
2229 | buffer += sprintf(buffer, "\n"); | 2265 | m->count += nodemask_scnprintf(m->buf + m->count, m->size - m->count, |
2230 | return buffer; | 2266 | task->mems_allowed); |
2267 | seq_printf(m, "\n"); | ||
2231 | } | 2268 | } |
diff --git a/kernel/exit.c b/kernel/exit.c index 9e459fefda77..506a957b665a 100644 --- a/kernel/exit.c +++ b/kernel/exit.c | |||
@@ -293,26 +293,27 @@ static void reparent_to_kthreadd(void) | |||
293 | switch_uid(INIT_USER); | 293 | switch_uid(INIT_USER); |
294 | } | 294 | } |
295 | 295 | ||
296 | void __set_special_pids(pid_t session, pid_t pgrp) | 296 | void __set_special_pids(struct pid *pid) |
297 | { | 297 | { |
298 | struct task_struct *curr = current->group_leader; | 298 | struct task_struct *curr = current->group_leader; |
299 | pid_t nr = pid_nr(pid); | ||
299 | 300 | ||
300 | if (task_session_nr(curr) != session) { | 301 | if (task_session(curr) != pid) { |
301 | detach_pid(curr, PIDTYPE_SID); | 302 | detach_pid(curr, PIDTYPE_SID); |
302 | set_task_session(curr, session); | 303 | attach_pid(curr, PIDTYPE_SID, pid); |
303 | attach_pid(curr, PIDTYPE_SID, find_pid(session)); | 304 | set_task_session(curr, nr); |
304 | } | 305 | } |
305 | if (task_pgrp_nr(curr) != pgrp) { | 306 | if (task_pgrp(curr) != pid) { |
306 | detach_pid(curr, PIDTYPE_PGID); | 307 | detach_pid(curr, PIDTYPE_PGID); |
307 | set_task_pgrp(curr, pgrp); | 308 | attach_pid(curr, PIDTYPE_PGID, pid); |
308 | attach_pid(curr, PIDTYPE_PGID, find_pid(pgrp)); | 309 | set_task_pgrp(curr, nr); |
309 | } | 310 | } |
310 | } | 311 | } |
311 | 312 | ||
312 | static void set_special_pids(pid_t session, pid_t pgrp) | 313 | static void set_special_pids(struct pid *pid) |
313 | { | 314 | { |
314 | write_lock_irq(&tasklist_lock); | 315 | write_lock_irq(&tasklist_lock); |
315 | __set_special_pids(session, pgrp); | 316 | __set_special_pids(pid); |
316 | write_unlock_irq(&tasklist_lock); | 317 | write_unlock_irq(&tasklist_lock); |
317 | } | 318 | } |
318 | 319 | ||
@@ -383,7 +384,11 @@ void daemonize(const char *name, ...) | |||
383 | */ | 384 | */ |
384 | current->flags |= PF_NOFREEZE; | 385 | current->flags |= PF_NOFREEZE; |
385 | 386 | ||
386 | set_special_pids(1, 1); | 387 | if (current->nsproxy != &init_nsproxy) { |
388 | get_nsproxy(&init_nsproxy); | ||
389 | switch_task_namespaces(current, &init_nsproxy); | ||
390 | } | ||
391 | set_special_pids(&init_struct_pid); | ||
387 | proc_clear_tty(current); | 392 | proc_clear_tty(current); |
388 | 393 | ||
389 | /* Block and flush all signals */ | 394 | /* Block and flush all signals */ |
@@ -398,11 +403,6 @@ void daemonize(const char *name, ...) | |||
398 | current->fs = fs; | 403 | current->fs = fs; |
399 | atomic_inc(&fs->count); | 404 | atomic_inc(&fs->count); |
400 | 405 | ||
401 | if (current->nsproxy != init_task.nsproxy) { | ||
402 | get_nsproxy(init_task.nsproxy); | ||
403 | switch_task_namespaces(current, init_task.nsproxy); | ||
404 | } | ||
405 | |||
406 | exit_files(current); | 406 | exit_files(current); |
407 | current->files = init_task.files; | 407 | current->files = init_task.files; |
408 | atomic_inc(¤t->files->count); | 408 | atomic_inc(¤t->files->count); |
@@ -458,7 +458,7 @@ struct files_struct *get_files_struct(struct task_struct *task) | |||
458 | return files; | 458 | return files; |
459 | } | 459 | } |
460 | 460 | ||
461 | void fastcall put_files_struct(struct files_struct *files) | 461 | void put_files_struct(struct files_struct *files) |
462 | { | 462 | { |
463 | struct fdtable *fdt; | 463 | struct fdtable *fdt; |
464 | 464 | ||
@@ -512,14 +512,10 @@ static void __put_fs_struct(struct fs_struct *fs) | |||
512 | { | 512 | { |
513 | /* No need to hold fs->lock if we are killing it */ | 513 | /* No need to hold fs->lock if we are killing it */ |
514 | if (atomic_dec_and_test(&fs->count)) { | 514 | if (atomic_dec_and_test(&fs->count)) { |
515 | dput(fs->root); | 515 | path_put(&fs->root); |
516 | mntput(fs->rootmnt); | 516 | path_put(&fs->pwd); |
517 | dput(fs->pwd); | 517 | if (fs->altroot.dentry) |
518 | mntput(fs->pwdmnt); | 518 | path_put(&fs->altroot); |
519 | if (fs->altroot) { | ||
520 | dput(fs->altroot); | ||
521 | mntput(fs->altrootmnt); | ||
522 | } | ||
523 | kmem_cache_free(fs_cachep, fs); | 519 | kmem_cache_free(fs_cachep, fs); |
524 | } | 520 | } |
525 | } | 521 | } |
@@ -745,24 +741,6 @@ static void exit_notify(struct task_struct *tsk) | |||
745 | struct task_struct *t; | 741 | struct task_struct *t; |
746 | struct pid *pgrp; | 742 | struct pid *pgrp; |
747 | 743 | ||
748 | if (signal_pending(tsk) && !(tsk->signal->flags & SIGNAL_GROUP_EXIT) | ||
749 | && !thread_group_empty(tsk)) { | ||
750 | /* | ||
751 | * This occurs when there was a race between our exit | ||
752 | * syscall and a group signal choosing us as the one to | ||
753 | * wake up. It could be that we are the only thread | ||
754 | * alerted to check for pending signals, but another thread | ||
755 | * should be woken now to take the signal since we will not. | ||
756 | * Now we'll wake all the threads in the group just to make | ||
757 | * sure someone gets all the pending signals. | ||
758 | */ | ||
759 | spin_lock_irq(&tsk->sighand->siglock); | ||
760 | for (t = next_thread(tsk); t != tsk; t = next_thread(t)) | ||
761 | if (!signal_pending(t) && !(t->flags & PF_EXITING)) | ||
762 | recalc_sigpending_and_wake(t); | ||
763 | spin_unlock_irq(&tsk->sighand->siglock); | ||
764 | } | ||
765 | |||
766 | /* | 744 | /* |
767 | * This does two things: | 745 | * This does two things: |
768 | * | 746 | * |
@@ -905,7 +883,7 @@ static inline void exit_child_reaper(struct task_struct *tsk) | |||
905 | zap_pid_ns_processes(tsk->nsproxy->pid_ns); | 883 | zap_pid_ns_processes(tsk->nsproxy->pid_ns); |
906 | } | 884 | } |
907 | 885 | ||
908 | fastcall NORET_TYPE void do_exit(long code) | 886 | NORET_TYPE void do_exit(long code) |
909 | { | 887 | { |
910 | struct task_struct *tsk = current; | 888 | struct task_struct *tsk = current; |
911 | int group_dead; | 889 | int group_dead; |
@@ -947,7 +925,7 @@ fastcall NORET_TYPE void do_exit(long code) | |||
947 | schedule(); | 925 | schedule(); |
948 | } | 926 | } |
949 | 927 | ||
950 | tsk->flags |= PF_EXITING; | 928 | exit_signals(tsk); /* sets PF_EXITING */ |
951 | /* | 929 | /* |
952 | * tsk->flags are checked in the futex code to protect against | 930 | * tsk->flags are checked in the futex code to protect against |
953 | * an exiting task cleaning up the robust pi futexes. | 931 | * an exiting task cleaning up the robust pi futexes. |
@@ -1083,11 +1061,12 @@ do_group_exit(int exit_code) | |||
1083 | struct signal_struct *const sig = current->signal; | 1061 | struct signal_struct *const sig = current->signal; |
1084 | struct sighand_struct *const sighand = current->sighand; | 1062 | struct sighand_struct *const sighand = current->sighand; |
1085 | spin_lock_irq(&sighand->siglock); | 1063 | spin_lock_irq(&sighand->siglock); |
1086 | if (sig->flags & SIGNAL_GROUP_EXIT) | 1064 | if (signal_group_exit(sig)) |
1087 | /* Another thread got here before we took the lock. */ | 1065 | /* Another thread got here before we took the lock. */ |
1088 | exit_code = sig->group_exit_code; | 1066 | exit_code = sig->group_exit_code; |
1089 | else { | 1067 | else { |
1090 | sig->group_exit_code = exit_code; | 1068 | sig->group_exit_code = exit_code; |
1069 | sig->flags = SIGNAL_GROUP_EXIT; | ||
1091 | zap_other_threads(current); | 1070 | zap_other_threads(current); |
1092 | } | 1071 | } |
1093 | spin_unlock_irq(&sighand->siglock); | 1072 | spin_unlock_irq(&sighand->siglock); |
@@ -1107,20 +1086,23 @@ asmlinkage void sys_exit_group(int error_code) | |||
1107 | do_group_exit((error_code & 0xff) << 8); | 1086 | do_group_exit((error_code & 0xff) << 8); |
1108 | } | 1087 | } |
1109 | 1088 | ||
1110 | static int eligible_child(pid_t pid, int options, struct task_struct *p) | 1089 | static struct pid *task_pid_type(struct task_struct *task, enum pid_type type) |
1090 | { | ||
1091 | struct pid *pid = NULL; | ||
1092 | if (type == PIDTYPE_PID) | ||
1093 | pid = task->pids[type].pid; | ||
1094 | else if (type < PIDTYPE_MAX) | ||
1095 | pid = task->group_leader->pids[type].pid; | ||
1096 | return pid; | ||
1097 | } | ||
1098 | |||
1099 | static int eligible_child(enum pid_type type, struct pid *pid, int options, | ||
1100 | struct task_struct *p) | ||
1111 | { | 1101 | { |
1112 | int err; | 1102 | int err; |
1113 | struct pid_namespace *ns; | ||
1114 | 1103 | ||
1115 | ns = current->nsproxy->pid_ns; | 1104 | if (type < PIDTYPE_MAX) { |
1116 | if (pid > 0) { | 1105 | if (task_pid_type(p, type) != pid) |
1117 | if (task_pid_nr_ns(p, ns) != pid) | ||
1118 | return 0; | ||
1119 | } else if (!pid) { | ||
1120 | if (task_pgrp_nr_ns(p, ns) != task_pgrp_vnr(current)) | ||
1121 | return 0; | ||
1122 | } else if (pid != -1) { | ||
1123 | if (task_pgrp_nr_ns(p, ns) != -pid) | ||
1124 | return 0; | 1106 | return 0; |
1125 | } | 1107 | } |
1126 | 1108 | ||
@@ -1139,18 +1121,16 @@ static int eligible_child(pid_t pid, int options, struct task_struct *p) | |||
1139 | if (((p->exit_signal != SIGCHLD) ^ ((options & __WCLONE) != 0)) | 1121 | if (((p->exit_signal != SIGCHLD) ^ ((options & __WCLONE) != 0)) |
1140 | && !(options & __WALL)) | 1122 | && !(options & __WALL)) |
1141 | return 0; | 1123 | return 0; |
1142 | /* | ||
1143 | * Do not consider thread group leaders that are | ||
1144 | * in a non-empty thread group: | ||
1145 | */ | ||
1146 | if (delay_group_leader(p)) | ||
1147 | return 2; | ||
1148 | 1124 | ||
1149 | err = security_task_wait(p); | 1125 | err = security_task_wait(p); |
1150 | if (err) | 1126 | if (likely(!err)) |
1151 | return err; | 1127 | return 1; |
1152 | 1128 | ||
1153 | return 1; | 1129 | if (type != PIDTYPE_PID) |
1130 | return 0; | ||
1131 | /* This child was explicitly requested, abort */ | ||
1132 | read_unlock(&tasklist_lock); | ||
1133 | return err; | ||
1154 | } | 1134 | } |
1155 | 1135 | ||
1156 | static int wait_noreap_copyout(struct task_struct *p, pid_t pid, uid_t uid, | 1136 | static int wait_noreap_copyout(struct task_struct *p, pid_t pid, uid_t uid, |
@@ -1190,20 +1170,13 @@ static int wait_task_zombie(struct task_struct *p, int noreap, | |||
1190 | { | 1170 | { |
1191 | unsigned long state; | 1171 | unsigned long state; |
1192 | int retval, status, traced; | 1172 | int retval, status, traced; |
1193 | struct pid_namespace *ns; | 1173 | pid_t pid = task_pid_vnr(p); |
1194 | |||
1195 | ns = current->nsproxy->pid_ns; | ||
1196 | 1174 | ||
1197 | if (unlikely(noreap)) { | 1175 | if (unlikely(noreap)) { |
1198 | pid_t pid = task_pid_nr_ns(p, ns); | ||
1199 | uid_t uid = p->uid; | 1176 | uid_t uid = p->uid; |
1200 | int exit_code = p->exit_code; | 1177 | int exit_code = p->exit_code; |
1201 | int why, status; | 1178 | int why, status; |
1202 | 1179 | ||
1203 | if (unlikely(p->exit_state != EXIT_ZOMBIE)) | ||
1204 | return 0; | ||
1205 | if (unlikely(p->exit_signal == -1 && p->ptrace == 0)) | ||
1206 | return 0; | ||
1207 | get_task_struct(p); | 1180 | get_task_struct(p); |
1208 | read_unlock(&tasklist_lock); | 1181 | read_unlock(&tasklist_lock); |
1209 | if ((exit_code & 0x7f) == 0) { | 1182 | if ((exit_code & 0x7f) == 0) { |
@@ -1314,11 +1287,11 @@ static int wait_task_zombie(struct task_struct *p, int noreap, | |||
1314 | retval = put_user(status, &infop->si_status); | 1287 | retval = put_user(status, &infop->si_status); |
1315 | } | 1288 | } |
1316 | if (!retval && infop) | 1289 | if (!retval && infop) |
1317 | retval = put_user(task_pid_nr_ns(p, ns), &infop->si_pid); | 1290 | retval = put_user(pid, &infop->si_pid); |
1318 | if (!retval && infop) | 1291 | if (!retval && infop) |
1319 | retval = put_user(p->uid, &infop->si_uid); | 1292 | retval = put_user(p->uid, &infop->si_uid); |
1320 | if (!retval) | 1293 | if (!retval) |
1321 | retval = task_pid_nr_ns(p, ns); | 1294 | retval = pid; |
1322 | 1295 | ||
1323 | if (traced) { | 1296 | if (traced) { |
1324 | write_lock_irq(&tasklist_lock); | 1297 | write_lock_irq(&tasklist_lock); |
@@ -1350,21 +1323,38 @@ static int wait_task_zombie(struct task_struct *p, int noreap, | |||
1350 | * the lock and this task is uninteresting. If we return nonzero, we have | 1323 | * the lock and this task is uninteresting. If we return nonzero, we have |
1351 | * released the lock and the system call should return. | 1324 | * released the lock and the system call should return. |
1352 | */ | 1325 | */ |
1353 | static int wait_task_stopped(struct task_struct *p, int delayed_group_leader, | 1326 | static int wait_task_stopped(struct task_struct *p, |
1354 | int noreap, struct siginfo __user *infop, | 1327 | int noreap, struct siginfo __user *infop, |
1355 | int __user *stat_addr, struct rusage __user *ru) | 1328 | int __user *stat_addr, struct rusage __user *ru) |
1356 | { | 1329 | { |
1357 | int retval, exit_code; | 1330 | int retval, exit_code, why; |
1331 | uid_t uid = 0; /* unneeded, required by compiler */ | ||
1358 | pid_t pid; | 1332 | pid_t pid; |
1359 | 1333 | ||
1360 | if (!p->exit_code) | 1334 | exit_code = 0; |
1361 | return 0; | 1335 | spin_lock_irq(&p->sighand->siglock); |
1362 | if (delayed_group_leader && !(p->ptrace & PT_PTRACED) && | 1336 | |
1363 | p->signal->group_stop_count > 0) | 1337 | if (unlikely(!task_is_stopped_or_traced(p))) |
1338 | goto unlock_sig; | ||
1339 | |||
1340 | if (!(p->ptrace & PT_PTRACED) && p->signal->group_stop_count > 0) | ||
1364 | /* | 1341 | /* |
1365 | * A group stop is in progress and this is the group leader. | 1342 | * A group stop is in progress and this is the group leader. |
1366 | * We won't report until all threads have stopped. | 1343 | * We won't report until all threads have stopped. |
1367 | */ | 1344 | */ |
1345 | goto unlock_sig; | ||
1346 | |||
1347 | exit_code = p->exit_code; | ||
1348 | if (!exit_code) | ||
1349 | goto unlock_sig; | ||
1350 | |||
1351 | if (!noreap) | ||
1352 | p->exit_code = 0; | ||
1353 | |||
1354 | uid = p->uid; | ||
1355 | unlock_sig: | ||
1356 | spin_unlock_irq(&p->sighand->siglock); | ||
1357 | if (!exit_code) | ||
1368 | return 0; | 1358 | return 0; |
1369 | 1359 | ||
1370 | /* | 1360 | /* |
@@ -1374,65 +1364,15 @@ static int wait_task_stopped(struct task_struct *p, int delayed_group_leader, | |||
1374 | * keep holding onto the tasklist_lock while we call getrusage and | 1364 | * keep holding onto the tasklist_lock while we call getrusage and |
1375 | * possibly take page faults for user memory. | 1365 | * possibly take page faults for user memory. |
1376 | */ | 1366 | */ |
1377 | pid = task_pid_nr_ns(p, current->nsproxy->pid_ns); | ||
1378 | get_task_struct(p); | 1367 | get_task_struct(p); |
1368 | pid = task_pid_vnr(p); | ||
1369 | why = (p->ptrace & PT_PTRACED) ? CLD_TRAPPED : CLD_STOPPED; | ||
1379 | read_unlock(&tasklist_lock); | 1370 | read_unlock(&tasklist_lock); |
1380 | 1371 | ||
1381 | if (unlikely(noreap)) { | 1372 | if (unlikely(noreap)) |
1382 | uid_t uid = p->uid; | ||
1383 | int why = (p->ptrace & PT_PTRACED) ? CLD_TRAPPED : CLD_STOPPED; | ||
1384 | |||
1385 | exit_code = p->exit_code; | ||
1386 | if (unlikely(!exit_code) || unlikely(p->exit_state)) | ||
1387 | goto bail_ref; | ||
1388 | return wait_noreap_copyout(p, pid, uid, | 1373 | return wait_noreap_copyout(p, pid, uid, |
1389 | why, exit_code, | 1374 | why, exit_code, |
1390 | infop, ru); | 1375 | infop, ru); |
1391 | } | ||
1392 | |||
1393 | write_lock_irq(&tasklist_lock); | ||
1394 | |||
1395 | /* | ||
1396 | * This uses xchg to be atomic with the thread resuming and setting | ||
1397 | * it. It must also be done with the write lock held to prevent a | ||
1398 | * race with the EXIT_ZOMBIE case. | ||
1399 | */ | ||
1400 | exit_code = xchg(&p->exit_code, 0); | ||
1401 | if (unlikely(p->exit_state)) { | ||
1402 | /* | ||
1403 | * The task resumed and then died. Let the next iteration | ||
1404 | * catch it in EXIT_ZOMBIE. Note that exit_code might | ||
1405 | * already be zero here if it resumed and did _exit(0). | ||
1406 | * The task itself is dead and won't touch exit_code again; | ||
1407 | * other processors in this function are locked out. | ||
1408 | */ | ||
1409 | p->exit_code = exit_code; | ||
1410 | exit_code = 0; | ||
1411 | } | ||
1412 | if (unlikely(exit_code == 0)) { | ||
1413 | /* | ||
1414 | * Another thread in this function got to it first, or it | ||
1415 | * resumed, or it resumed and then died. | ||
1416 | */ | ||
1417 | write_unlock_irq(&tasklist_lock); | ||
1418 | bail_ref: | ||
1419 | put_task_struct(p); | ||
1420 | /* | ||
1421 | * We are returning to the wait loop without having successfully | ||
1422 | * removed the process and having released the lock. We cannot | ||
1423 | * continue, since the "p" task pointer is potentially stale. | ||
1424 | * | ||
1425 | * Return -EAGAIN, and do_wait() will restart the loop from the | ||
1426 | * beginning. Do _not_ re-acquire the lock. | ||
1427 | */ | ||
1428 | return -EAGAIN; | ||
1429 | } | ||
1430 | |||
1431 | /* move to end of parent's list to avoid starvation */ | ||
1432 | remove_parent(p); | ||
1433 | add_parent(p); | ||
1434 | |||
1435 | write_unlock_irq(&tasklist_lock); | ||
1436 | 1376 | ||
1437 | retval = ru ? getrusage(p, RUSAGE_BOTH, ru) : 0; | 1377 | retval = ru ? getrusage(p, RUSAGE_BOTH, ru) : 0; |
1438 | if (!retval && stat_addr) | 1378 | if (!retval && stat_addr) |
@@ -1442,15 +1382,13 @@ bail_ref: | |||
1442 | if (!retval && infop) | 1382 | if (!retval && infop) |
1443 | retval = put_user(0, &infop->si_errno); | 1383 | retval = put_user(0, &infop->si_errno); |
1444 | if (!retval && infop) | 1384 | if (!retval && infop) |
1445 | retval = put_user((short)((p->ptrace & PT_PTRACED) | 1385 | retval = put_user(why, &infop->si_code); |
1446 | ? CLD_TRAPPED : CLD_STOPPED), | ||
1447 | &infop->si_code); | ||
1448 | if (!retval && infop) | 1386 | if (!retval && infop) |
1449 | retval = put_user(exit_code, &infop->si_status); | 1387 | retval = put_user(exit_code, &infop->si_status); |
1450 | if (!retval && infop) | 1388 | if (!retval && infop) |
1451 | retval = put_user(pid, &infop->si_pid); | 1389 | retval = put_user(pid, &infop->si_pid); |
1452 | if (!retval && infop) | 1390 | if (!retval && infop) |
1453 | retval = put_user(p->uid, &infop->si_uid); | 1391 | retval = put_user(uid, &infop->si_uid); |
1454 | if (!retval) | 1392 | if (!retval) |
1455 | retval = pid; | 1393 | retval = pid; |
1456 | put_task_struct(p); | 1394 | put_task_struct(p); |
@@ -1472,7 +1410,6 @@ static int wait_task_continued(struct task_struct *p, int noreap, | |||
1472 | int retval; | 1410 | int retval; |
1473 | pid_t pid; | 1411 | pid_t pid; |
1474 | uid_t uid; | 1412 | uid_t uid; |
1475 | struct pid_namespace *ns; | ||
1476 | 1413 | ||
1477 | if (!(p->signal->flags & SIGNAL_STOP_CONTINUED)) | 1414 | if (!(p->signal->flags & SIGNAL_STOP_CONTINUED)) |
1478 | return 0; | 1415 | return 0; |
@@ -1487,8 +1424,7 @@ static int wait_task_continued(struct task_struct *p, int noreap, | |||
1487 | p->signal->flags &= ~SIGNAL_STOP_CONTINUED; | 1424 | p->signal->flags &= ~SIGNAL_STOP_CONTINUED; |
1488 | spin_unlock_irq(&p->sighand->siglock); | 1425 | spin_unlock_irq(&p->sighand->siglock); |
1489 | 1426 | ||
1490 | ns = current->nsproxy->pid_ns; | 1427 | pid = task_pid_vnr(p); |
1491 | pid = task_pid_nr_ns(p, ns); | ||
1492 | uid = p->uid; | 1428 | uid = p->uid; |
1493 | get_task_struct(p); | 1429 | get_task_struct(p); |
1494 | read_unlock(&tasklist_lock); | 1430 | read_unlock(&tasklist_lock); |
@@ -1499,7 +1435,7 @@ static int wait_task_continued(struct task_struct *p, int noreap, | |||
1499 | if (!retval && stat_addr) | 1435 | if (!retval && stat_addr) |
1500 | retval = put_user(0xffff, stat_addr); | 1436 | retval = put_user(0xffff, stat_addr); |
1501 | if (!retval) | 1437 | if (!retval) |
1502 | retval = task_pid_nr_ns(p, ns); | 1438 | retval = pid; |
1503 | } else { | 1439 | } else { |
1504 | retval = wait_noreap_copyout(p, pid, uid, | 1440 | retval = wait_noreap_copyout(p, pid, uid, |
1505 | CLD_CONTINUED, SIGCONT, | 1441 | CLD_CONTINUED, SIGCONT, |
@@ -1510,103 +1446,63 @@ static int wait_task_continued(struct task_struct *p, int noreap, | |||
1510 | return retval; | 1446 | return retval; |
1511 | } | 1447 | } |
1512 | 1448 | ||
1513 | 1449 | static long do_wait(enum pid_type type, struct pid *pid, int options, | |
1514 | static inline int my_ptrace_child(struct task_struct *p) | 1450 | struct siginfo __user *infop, int __user *stat_addr, |
1515 | { | 1451 | struct rusage __user *ru) |
1516 | if (!(p->ptrace & PT_PTRACED)) | ||
1517 | return 0; | ||
1518 | if (!(p->ptrace & PT_ATTACHED)) | ||
1519 | return 1; | ||
1520 | /* | ||
1521 | * This child was PTRACE_ATTACH'd. We should be seeing it only if | ||
1522 | * we are the attacher. If we are the real parent, this is a race | ||
1523 | * inside ptrace_attach. It is waiting for the tasklist_lock, | ||
1524 | * which we have to switch the parent links, but has already set | ||
1525 | * the flags in p->ptrace. | ||
1526 | */ | ||
1527 | return (p->parent != p->real_parent); | ||
1528 | } | ||
1529 | |||
1530 | static long do_wait(pid_t pid, int options, struct siginfo __user *infop, | ||
1531 | int __user *stat_addr, struct rusage __user *ru) | ||
1532 | { | 1452 | { |
1533 | DECLARE_WAITQUEUE(wait, current); | 1453 | DECLARE_WAITQUEUE(wait, current); |
1534 | struct task_struct *tsk; | 1454 | struct task_struct *tsk; |
1535 | int flag, retval; | 1455 | int flag, retval; |
1536 | int allowed, denied; | ||
1537 | 1456 | ||
1538 | add_wait_queue(¤t->signal->wait_chldexit,&wait); | 1457 | add_wait_queue(¤t->signal->wait_chldexit,&wait); |
1539 | repeat: | 1458 | repeat: |
1459 | /* If there is nothing that can match our critier just get out */ | ||
1460 | retval = -ECHILD; | ||
1461 | if ((type < PIDTYPE_MAX) && (!pid || hlist_empty(&pid->tasks[type]))) | ||
1462 | goto end; | ||
1463 | |||
1540 | /* | 1464 | /* |
1541 | * We will set this flag if we see any child that might later | 1465 | * We will set this flag if we see any child that might later |
1542 | * match our criteria, even if we are not able to reap it yet. | 1466 | * match our criteria, even if we are not able to reap it yet. |
1543 | */ | 1467 | */ |
1544 | flag = 0; | 1468 | flag = retval = 0; |
1545 | allowed = denied = 0; | ||
1546 | current->state = TASK_INTERRUPTIBLE; | 1469 | current->state = TASK_INTERRUPTIBLE; |
1547 | read_lock(&tasklist_lock); | 1470 | read_lock(&tasklist_lock); |
1548 | tsk = current; | 1471 | tsk = current; |
1549 | do { | 1472 | do { |
1550 | struct task_struct *p; | 1473 | struct task_struct *p; |
1551 | int ret; | ||
1552 | 1474 | ||
1553 | list_for_each_entry(p, &tsk->children, sibling) { | 1475 | list_for_each_entry(p, &tsk->children, sibling) { |
1554 | ret = eligible_child(pid, options, p); | 1476 | int ret = eligible_child(type, pid, options, p); |
1555 | if (!ret) | 1477 | if (!ret) |
1556 | continue; | 1478 | continue; |
1557 | 1479 | ||
1558 | if (unlikely(ret < 0)) { | 1480 | if (unlikely(ret < 0)) { |
1559 | denied = ret; | 1481 | retval = ret; |
1560 | continue; | 1482 | } else if (task_is_stopped_or_traced(p)) { |
1561 | } | ||
1562 | allowed = 1; | ||
1563 | |||
1564 | if (task_is_stopped_or_traced(p)) { | ||
1565 | /* | 1483 | /* |
1566 | * It's stopped now, so it might later | 1484 | * It's stopped now, so it might later |
1567 | * continue, exit, or stop again. | 1485 | * continue, exit, or stop again. |
1568 | * | ||
1569 | * When we hit the race with PTRACE_ATTACH, we | ||
1570 | * will not report this child. But the race | ||
1571 | * means it has not yet been moved to our | ||
1572 | * ptrace_children list, so we need to set the | ||
1573 | * flag here to avoid a spurious ECHILD when | ||
1574 | * the race happens with the only child. | ||
1575 | */ | 1486 | */ |
1576 | flag = 1; | 1487 | flag = 1; |
1488 | if (!(p->ptrace & PT_PTRACED) && | ||
1489 | !(options & WUNTRACED)) | ||
1490 | continue; | ||
1577 | 1491 | ||
1578 | if (!my_ptrace_child(p)) { | 1492 | retval = wait_task_stopped(p, |
1579 | if (task_is_traced(p)) | ||
1580 | continue; | ||
1581 | if (!(options & WUNTRACED)) | ||
1582 | continue; | ||
1583 | } | ||
1584 | |||
1585 | retval = wait_task_stopped(p, ret == 2, | ||
1586 | (options & WNOWAIT), infop, | 1493 | (options & WNOWAIT), infop, |
1587 | stat_addr, ru); | 1494 | stat_addr, ru); |
1588 | if (retval == -EAGAIN) | 1495 | } else if (p->exit_state == EXIT_ZOMBIE && |
1589 | goto repeat; | 1496 | !delay_group_leader(p)) { |
1590 | if (retval != 0) /* He released the lock. */ | ||
1591 | goto end; | ||
1592 | } else if (p->exit_state == EXIT_DEAD) { | ||
1593 | continue; | ||
1594 | } else if (p->exit_state == EXIT_ZOMBIE) { | ||
1595 | /* | 1497 | /* |
1596 | * Eligible but we cannot release it yet: | 1498 | * We don't reap group leaders with subthreads. |
1597 | */ | 1499 | */ |
1598 | if (ret == 2) | ||
1599 | goto check_continued; | ||
1600 | if (!likely(options & WEXITED)) | 1500 | if (!likely(options & WEXITED)) |
1601 | continue; | 1501 | continue; |
1602 | retval = wait_task_zombie(p, | 1502 | retval = wait_task_zombie(p, |
1603 | (options & WNOWAIT), infop, | 1503 | (options & WNOWAIT), infop, |
1604 | stat_addr, ru); | 1504 | stat_addr, ru); |
1605 | /* He released the lock. */ | 1505 | } else if (p->exit_state != EXIT_DEAD) { |
1606 | if (retval != 0) | ||
1607 | goto end; | ||
1608 | } else { | ||
1609 | check_continued: | ||
1610 | /* | 1506 | /* |
1611 | * It's running now, so it might later | 1507 | * It's running now, so it might later |
1612 | * exit, stop, or stop and then continue. | 1508 | * exit, stop, or stop and then continue. |
@@ -1617,17 +1513,20 @@ check_continued: | |||
1617 | retval = wait_task_continued(p, | 1513 | retval = wait_task_continued(p, |
1618 | (options & WNOWAIT), infop, | 1514 | (options & WNOWAIT), infop, |
1619 | stat_addr, ru); | 1515 | stat_addr, ru); |
1620 | if (retval != 0) /* He released the lock. */ | ||
1621 | goto end; | ||
1622 | } | 1516 | } |
1517 | if (retval != 0) /* tasklist_lock released */ | ||
1518 | goto end; | ||
1623 | } | 1519 | } |
1624 | if (!flag) { | 1520 | if (!flag) { |
1625 | list_for_each_entry(p, &tsk->ptrace_children, | 1521 | list_for_each_entry(p, &tsk->ptrace_children, |
1626 | ptrace_list) { | 1522 | ptrace_list) { |
1627 | if (!eligible_child(pid, options, p)) | 1523 | flag = eligible_child(type, pid, options, p); |
1524 | if (!flag) | ||
1628 | continue; | 1525 | continue; |
1629 | flag = 1; | 1526 | if (likely(flag > 0)) |
1630 | break; | 1527 | break; |
1528 | retval = flag; | ||
1529 | goto end; | ||
1631 | } | 1530 | } |
1632 | } | 1531 | } |
1633 | if (options & __WNOTHREAD) | 1532 | if (options & __WNOTHREAD) |
@@ -1635,10 +1534,9 @@ check_continued: | |||
1635 | tsk = next_thread(tsk); | 1534 | tsk = next_thread(tsk); |
1636 | BUG_ON(tsk->signal != current->signal); | 1535 | BUG_ON(tsk->signal != current->signal); |
1637 | } while (tsk != current); | 1536 | } while (tsk != current); |
1638 | |||
1639 | read_unlock(&tasklist_lock); | 1537 | read_unlock(&tasklist_lock); |
1538 | |||
1640 | if (flag) { | 1539 | if (flag) { |
1641 | retval = 0; | ||
1642 | if (options & WNOHANG) | 1540 | if (options & WNOHANG) |
1643 | goto end; | 1541 | goto end; |
1644 | retval = -ERESTARTSYS; | 1542 | retval = -ERESTARTSYS; |
@@ -1648,14 +1546,12 @@ check_continued: | |||
1648 | goto repeat; | 1546 | goto repeat; |
1649 | } | 1547 | } |
1650 | retval = -ECHILD; | 1548 | retval = -ECHILD; |
1651 | if (unlikely(denied) && !allowed) | ||
1652 | retval = denied; | ||
1653 | end: | 1549 | end: |
1654 | current->state = TASK_RUNNING; | 1550 | current->state = TASK_RUNNING; |
1655 | remove_wait_queue(¤t->signal->wait_chldexit,&wait); | 1551 | remove_wait_queue(¤t->signal->wait_chldexit,&wait); |
1656 | if (infop) { | 1552 | if (infop) { |
1657 | if (retval > 0) | 1553 | if (retval > 0) |
1658 | retval = 0; | 1554 | retval = 0; |
1659 | else { | 1555 | else { |
1660 | /* | 1556 | /* |
1661 | * For a WNOHANG return, clear out all the fields | 1557 | * For a WNOHANG return, clear out all the fields |
@@ -1679,10 +1575,12 @@ end: | |||
1679 | return retval; | 1575 | return retval; |
1680 | } | 1576 | } |
1681 | 1577 | ||
1682 | asmlinkage long sys_waitid(int which, pid_t pid, | 1578 | asmlinkage long sys_waitid(int which, pid_t upid, |
1683 | struct siginfo __user *infop, int options, | 1579 | struct siginfo __user *infop, int options, |
1684 | struct rusage __user *ru) | 1580 | struct rusage __user *ru) |
1685 | { | 1581 | { |
1582 | struct pid *pid = NULL; | ||
1583 | enum pid_type type; | ||
1686 | long ret; | 1584 | long ret; |
1687 | 1585 | ||
1688 | if (options & ~(WNOHANG|WNOWAIT|WEXITED|WSTOPPED|WCONTINUED)) | 1586 | if (options & ~(WNOHANG|WNOWAIT|WEXITED|WSTOPPED|WCONTINUED)) |
@@ -1692,37 +1590,58 @@ asmlinkage long sys_waitid(int which, pid_t pid, | |||
1692 | 1590 | ||
1693 | switch (which) { | 1591 | switch (which) { |
1694 | case P_ALL: | 1592 | case P_ALL: |
1695 | pid = -1; | 1593 | type = PIDTYPE_MAX; |
1696 | break; | 1594 | break; |
1697 | case P_PID: | 1595 | case P_PID: |
1698 | if (pid <= 0) | 1596 | type = PIDTYPE_PID; |
1597 | if (upid <= 0) | ||
1699 | return -EINVAL; | 1598 | return -EINVAL; |
1700 | break; | 1599 | break; |
1701 | case P_PGID: | 1600 | case P_PGID: |
1702 | if (pid <= 0) | 1601 | type = PIDTYPE_PGID; |
1602 | if (upid <= 0) | ||
1703 | return -EINVAL; | 1603 | return -EINVAL; |
1704 | pid = -pid; | ||
1705 | break; | 1604 | break; |
1706 | default: | 1605 | default: |
1707 | return -EINVAL; | 1606 | return -EINVAL; |
1708 | } | 1607 | } |
1709 | 1608 | ||
1710 | ret = do_wait(pid, options, infop, NULL, ru); | 1609 | if (type < PIDTYPE_MAX) |
1610 | pid = find_get_pid(upid); | ||
1611 | ret = do_wait(type, pid, options, infop, NULL, ru); | ||
1612 | put_pid(pid); | ||
1711 | 1613 | ||
1712 | /* avoid REGPARM breakage on x86: */ | 1614 | /* avoid REGPARM breakage on x86: */ |
1713 | prevent_tail_call(ret); | 1615 | prevent_tail_call(ret); |
1714 | return ret; | 1616 | return ret; |
1715 | } | 1617 | } |
1716 | 1618 | ||
1717 | asmlinkage long sys_wait4(pid_t pid, int __user *stat_addr, | 1619 | asmlinkage long sys_wait4(pid_t upid, int __user *stat_addr, |
1718 | int options, struct rusage __user *ru) | 1620 | int options, struct rusage __user *ru) |
1719 | { | 1621 | { |
1622 | struct pid *pid = NULL; | ||
1623 | enum pid_type type; | ||
1720 | long ret; | 1624 | long ret; |
1721 | 1625 | ||
1722 | if (options & ~(WNOHANG|WUNTRACED|WCONTINUED| | 1626 | if (options & ~(WNOHANG|WUNTRACED|WCONTINUED| |
1723 | __WNOTHREAD|__WCLONE|__WALL)) | 1627 | __WNOTHREAD|__WCLONE|__WALL)) |
1724 | return -EINVAL; | 1628 | return -EINVAL; |
1725 | ret = do_wait(pid, options | WEXITED, NULL, stat_addr, ru); | 1629 | |
1630 | if (upid == -1) | ||
1631 | type = PIDTYPE_MAX; | ||
1632 | else if (upid < 0) { | ||
1633 | type = PIDTYPE_PGID; | ||
1634 | pid = find_get_pid(-upid); | ||
1635 | } else if (upid == 0) { | ||
1636 | type = PIDTYPE_PGID; | ||
1637 | pid = get_pid(task_pgrp(current)); | ||
1638 | } else /* upid > 0 */ { | ||
1639 | type = PIDTYPE_PID; | ||
1640 | pid = find_get_pid(upid); | ||
1641 | } | ||
1642 | |||
1643 | ret = do_wait(type, pid, options | WEXITED, NULL, stat_addr, ru); | ||
1644 | put_pid(pid); | ||
1726 | 1645 | ||
1727 | /* avoid REGPARM breakage on x86: */ | 1646 | /* avoid REGPARM breakage on x86: */ |
1728 | prevent_tail_call(ret); | 1647 | prevent_tail_call(ret); |
diff --git a/kernel/fork.c b/kernel/fork.c index 05e0b6f4365b..dd249c37b3a3 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -40,6 +40,7 @@ | |||
40 | #include <linux/ptrace.h> | 40 | #include <linux/ptrace.h> |
41 | #include <linux/mount.h> | 41 | #include <linux/mount.h> |
42 | #include <linux/audit.h> | 42 | #include <linux/audit.h> |
43 | #include <linux/memcontrol.h> | ||
43 | #include <linux/profile.h> | 44 | #include <linux/profile.h> |
44 | #include <linux/rmap.h> | 45 | #include <linux/rmap.h> |
45 | #include <linux/acct.h> | 46 | #include <linux/acct.h> |
@@ -325,7 +326,7 @@ static inline int mm_alloc_pgd(struct mm_struct * mm) | |||
325 | 326 | ||
326 | static inline void mm_free_pgd(struct mm_struct * mm) | 327 | static inline void mm_free_pgd(struct mm_struct * mm) |
327 | { | 328 | { |
328 | pgd_free(mm->pgd); | 329 | pgd_free(mm, mm->pgd); |
329 | } | 330 | } |
330 | #else | 331 | #else |
331 | #define dup_mmap(mm, oldmm) (0) | 332 | #define dup_mmap(mm, oldmm) (0) |
@@ -340,7 +341,7 @@ __cacheline_aligned_in_smp DEFINE_SPINLOCK(mmlist_lock); | |||
340 | 341 | ||
341 | #include <linux/init_task.h> | 342 | #include <linux/init_task.h> |
342 | 343 | ||
343 | static struct mm_struct * mm_init(struct mm_struct * mm) | 344 | static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p) |
344 | { | 345 | { |
345 | atomic_set(&mm->mm_users, 1); | 346 | atomic_set(&mm->mm_users, 1); |
346 | atomic_set(&mm->mm_count, 1); | 347 | atomic_set(&mm->mm_count, 1); |
@@ -357,11 +358,14 @@ static struct mm_struct * mm_init(struct mm_struct * mm) | |||
357 | mm->ioctx_list = NULL; | 358 | mm->ioctx_list = NULL; |
358 | mm->free_area_cache = TASK_UNMAPPED_BASE; | 359 | mm->free_area_cache = TASK_UNMAPPED_BASE; |
359 | mm->cached_hole_size = ~0UL; | 360 | mm->cached_hole_size = ~0UL; |
361 | mm_init_cgroup(mm, p); | ||
360 | 362 | ||
361 | if (likely(!mm_alloc_pgd(mm))) { | 363 | if (likely(!mm_alloc_pgd(mm))) { |
362 | mm->def_flags = 0; | 364 | mm->def_flags = 0; |
363 | return mm; | 365 | return mm; |
364 | } | 366 | } |
367 | |||
368 | mm_free_cgroup(mm); | ||
365 | free_mm(mm); | 369 | free_mm(mm); |
366 | return NULL; | 370 | return NULL; |
367 | } | 371 | } |
@@ -376,7 +380,7 @@ struct mm_struct * mm_alloc(void) | |||
376 | mm = allocate_mm(); | 380 | mm = allocate_mm(); |
377 | if (mm) { | 381 | if (mm) { |
378 | memset(mm, 0, sizeof(*mm)); | 382 | memset(mm, 0, sizeof(*mm)); |
379 | mm = mm_init(mm); | 383 | mm = mm_init(mm, current); |
380 | } | 384 | } |
381 | return mm; | 385 | return mm; |
382 | } | 386 | } |
@@ -386,10 +390,11 @@ struct mm_struct * mm_alloc(void) | |||
386 | * is dropped: either by a lazy thread or by | 390 | * is dropped: either by a lazy thread or by |
387 | * mmput. Free the page directory and the mm. | 391 | * mmput. Free the page directory and the mm. |
388 | */ | 392 | */ |
389 | void fastcall __mmdrop(struct mm_struct *mm) | 393 | void __mmdrop(struct mm_struct *mm) |
390 | { | 394 | { |
391 | BUG_ON(mm == &init_mm); | 395 | BUG_ON(mm == &init_mm); |
392 | mm_free_pgd(mm); | 396 | mm_free_pgd(mm); |
397 | mm_free_cgroup(mm); | ||
393 | destroy_context(mm); | 398 | destroy_context(mm); |
394 | free_mm(mm); | 399 | free_mm(mm); |
395 | } | 400 | } |
@@ -511,7 +516,7 @@ static struct mm_struct *dup_mm(struct task_struct *tsk) | |||
511 | mm->token_priority = 0; | 516 | mm->token_priority = 0; |
512 | mm->last_interval = 0; | 517 | mm->last_interval = 0; |
513 | 518 | ||
514 | if (!mm_init(mm)) | 519 | if (!mm_init(mm, tsk)) |
515 | goto fail_nomem; | 520 | goto fail_nomem; |
516 | 521 | ||
517 | if (init_new_context(tsk, mm)) | 522 | if (init_new_context(tsk, mm)) |
@@ -595,16 +600,16 @@ static struct fs_struct *__copy_fs_struct(struct fs_struct *old) | |||
595 | rwlock_init(&fs->lock); | 600 | rwlock_init(&fs->lock); |
596 | fs->umask = old->umask; | 601 | fs->umask = old->umask; |
597 | read_lock(&old->lock); | 602 | read_lock(&old->lock); |
598 | fs->rootmnt = mntget(old->rootmnt); | 603 | fs->root = old->root; |
599 | fs->root = dget(old->root); | 604 | path_get(&old->root); |
600 | fs->pwdmnt = mntget(old->pwdmnt); | 605 | fs->pwd = old->pwd; |
601 | fs->pwd = dget(old->pwd); | 606 | path_get(&old->pwd); |
602 | if (old->altroot) { | 607 | if (old->altroot.dentry) { |
603 | fs->altrootmnt = mntget(old->altrootmnt); | 608 | fs->altroot = old->altroot; |
604 | fs->altroot = dget(old->altroot); | 609 | path_get(&old->altroot); |
605 | } else { | 610 | } else { |
606 | fs->altrootmnt = NULL; | 611 | fs->altroot.mnt = NULL; |
607 | fs->altroot = NULL; | 612 | fs->altroot.dentry = NULL; |
608 | } | 613 | } |
609 | read_unlock(&old->lock); | 614 | read_unlock(&old->lock); |
610 | } | 615 | } |
@@ -904,7 +909,6 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) | |||
904 | hrtimer_init(&sig->real_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); | 909 | hrtimer_init(&sig->real_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); |
905 | sig->it_real_incr.tv64 = 0; | 910 | sig->it_real_incr.tv64 = 0; |
906 | sig->real_timer.function = it_real_fn; | 911 | sig->real_timer.function = it_real_fn; |
907 | sig->tsk = tsk; | ||
908 | 912 | ||
909 | sig->it_virt_expires = cputime_zero; | 913 | sig->it_virt_expires = cputime_zero; |
910 | sig->it_virt_incr = cputime_zero; | 914 | sig->it_virt_incr = cputime_zero; |
@@ -1118,6 +1122,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1118 | #ifdef CONFIG_SECURITY | 1122 | #ifdef CONFIG_SECURITY |
1119 | p->security = NULL; | 1123 | p->security = NULL; |
1120 | #endif | 1124 | #endif |
1125 | p->cap_bset = current->cap_bset; | ||
1121 | p->io_context = NULL; | 1126 | p->io_context = NULL; |
1122 | p->audit_context = NULL; | 1127 | p->audit_context = NULL; |
1123 | cgroup_fork(p); | 1128 | cgroup_fork(p); |
@@ -1332,6 +1337,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1332 | if (clone_flags & CLONE_NEWPID) | 1337 | if (clone_flags & CLONE_NEWPID) |
1333 | p->nsproxy->pid_ns->child_reaper = p; | 1338 | p->nsproxy->pid_ns->child_reaper = p; |
1334 | 1339 | ||
1340 | p->signal->leader_pid = pid; | ||
1335 | p->signal->tty = current->signal->tty; | 1341 | p->signal->tty = current->signal->tty; |
1336 | set_task_pgrp(p, task_pgrp_nr(current)); | 1342 | set_task_pgrp(p, task_pgrp_nr(current)); |
1337 | set_task_session(p, task_session_nr(current)); | 1343 | set_task_session(p, task_session_nr(current)); |
@@ -1398,7 +1404,7 @@ fork_out: | |||
1398 | return ERR_PTR(retval); | 1404 | return ERR_PTR(retval); |
1399 | } | 1405 | } |
1400 | 1406 | ||
1401 | noinline struct pt_regs * __devinit __attribute__((weak)) idle_regs(struct pt_regs *regs) | 1407 | noinline struct pt_regs * __cpuinit __attribute__((weak)) idle_regs(struct pt_regs *regs) |
1402 | { | 1408 | { |
1403 | memset(regs, 0, sizeof(struct pt_regs)); | 1409 | memset(regs, 0, sizeof(struct pt_regs)); |
1404 | return regs; | 1410 | return regs; |
@@ -1450,6 +1456,23 @@ long do_fork(unsigned long clone_flags, | |||
1450 | int trace = 0; | 1456 | int trace = 0; |
1451 | long nr; | 1457 | long nr; |
1452 | 1458 | ||
1459 | /* | ||
1460 | * We hope to recycle these flags after 2.6.26 | ||
1461 | */ | ||
1462 | if (unlikely(clone_flags & CLONE_STOPPED)) { | ||
1463 | static int __read_mostly count = 100; | ||
1464 | |||
1465 | if (count > 0 && printk_ratelimit()) { | ||
1466 | char comm[TASK_COMM_LEN]; | ||
1467 | |||
1468 | count--; | ||
1469 | printk(KERN_INFO "fork(): process `%s' used deprecated " | ||
1470 | "clone flags 0x%lx\n", | ||
1471 | get_task_comm(comm, current), | ||
1472 | clone_flags & CLONE_STOPPED); | ||
1473 | } | ||
1474 | } | ||
1475 | |||
1453 | if (unlikely(current->ptrace)) { | 1476 | if (unlikely(current->ptrace)) { |
1454 | trace = fork_traceflag (clone_flags); | 1477 | trace = fork_traceflag (clone_flags); |
1455 | if (trace) | 1478 | if (trace) |
@@ -1465,13 +1488,7 @@ long do_fork(unsigned long clone_flags, | |||
1465 | if (!IS_ERR(p)) { | 1488 | if (!IS_ERR(p)) { |
1466 | struct completion vfork; | 1489 | struct completion vfork; |
1467 | 1490 | ||
1468 | /* | 1491 | nr = task_pid_vnr(p); |
1469 | * this is enough to call pid_nr_ns here, but this if | ||
1470 | * improves optimisation of regular fork() | ||
1471 | */ | ||
1472 | nr = (clone_flags & CLONE_NEWPID) ? | ||
1473 | task_pid_nr_ns(p, current->nsproxy->pid_ns) : | ||
1474 | task_pid_vnr(p); | ||
1475 | 1492 | ||
1476 | if (clone_flags & CLONE_PARENT_SETTID) | 1493 | if (clone_flags & CLONE_PARENT_SETTID) |
1477 | put_user(nr, parent_tidptr); | 1494 | put_user(nr, parent_tidptr); |
@@ -1492,7 +1509,7 @@ long do_fork(unsigned long clone_flags, | |||
1492 | if (!(clone_flags & CLONE_STOPPED)) | 1509 | if (!(clone_flags & CLONE_STOPPED)) |
1493 | wake_up_new_task(p, clone_flags); | 1510 | wake_up_new_task(p, clone_flags); |
1494 | else | 1511 | else |
1495 | p->state = TASK_STOPPED; | 1512 | __set_task_state(p, TASK_STOPPED); |
1496 | 1513 | ||
1497 | if (unlikely (trace)) { | 1514 | if (unlikely (trace)) { |
1498 | current->ptrace_message = nr; | 1515 | current->ptrace_message = nr; |
diff --git a/kernel/futex.c b/kernel/futex.c index a6baaec44b8f..221f2128a437 100644 --- a/kernel/futex.c +++ b/kernel/futex.c | |||
@@ -2116,7 +2116,7 @@ asmlinkage long sys_futex(u32 __user *uaddr, int op, u32 val, | |||
2116 | 2116 | ||
2117 | t = timespec_to_ktime(ts); | 2117 | t = timespec_to_ktime(ts); |
2118 | if (cmd == FUTEX_WAIT) | 2118 | if (cmd == FUTEX_WAIT) |
2119 | t = ktime_add(ktime_get(), t); | 2119 | t = ktime_add_safe(ktime_get(), t); |
2120 | tp = &t; | 2120 | tp = &t; |
2121 | } | 2121 | } |
2122 | /* | 2122 | /* |
diff --git a/kernel/futex_compat.c b/kernel/futex_compat.c index 133d558db452..7d5e4b016f39 100644 --- a/kernel/futex_compat.c +++ b/kernel/futex_compat.c | |||
@@ -176,7 +176,7 @@ asmlinkage long compat_sys_futex(u32 __user *uaddr, int op, u32 val, | |||
176 | 176 | ||
177 | t = timespec_to_ktime(ts); | 177 | t = timespec_to_ktime(ts); |
178 | if (cmd == FUTEX_WAIT) | 178 | if (cmd == FUTEX_WAIT) |
179 | t = ktime_add(ktime_get(), t); | 179 | t = ktime_add_safe(ktime_get(), t); |
180 | tp = &t; | 180 | tp = &t; |
181 | } | 181 | } |
182 | if (cmd == FUTEX_REQUEUE || cmd == FUTEX_CMP_REQUEUE) | 182 | if (cmd == FUTEX_REQUEUE || cmd == FUTEX_CMP_REQUEUE) |
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 1069998fe25f..98bee013f71f 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c | |||
@@ -306,7 +306,7 @@ EXPORT_SYMBOL_GPL(ktime_sub_ns); | |||
306 | /* | 306 | /* |
307 | * Divide a ktime value by a nanosecond value | 307 | * Divide a ktime value by a nanosecond value |
308 | */ | 308 | */ |
309 | unsigned long ktime_divns(const ktime_t kt, s64 div) | 309 | u64 ktime_divns(const ktime_t kt, s64 div) |
310 | { | 310 | { |
311 | u64 dclc, inc, dns; | 311 | u64 dclc, inc, dns; |
312 | int sft = 0; | 312 | int sft = 0; |
@@ -321,11 +321,28 @@ unsigned long ktime_divns(const ktime_t kt, s64 div) | |||
321 | dclc >>= sft; | 321 | dclc >>= sft; |
322 | do_div(dclc, (unsigned long) div); | 322 | do_div(dclc, (unsigned long) div); |
323 | 323 | ||
324 | return (unsigned long) dclc; | 324 | return dclc; |
325 | } | 325 | } |
326 | #endif /* BITS_PER_LONG >= 64 */ | 326 | #endif /* BITS_PER_LONG >= 64 */ |
327 | 327 | ||
328 | /* | 328 | /* |
329 | * Add two ktime values and do a safety check for overflow: | ||
330 | */ | ||
331 | ktime_t ktime_add_safe(const ktime_t lhs, const ktime_t rhs) | ||
332 | { | ||
333 | ktime_t res = ktime_add(lhs, rhs); | ||
334 | |||
335 | /* | ||
336 | * We use KTIME_SEC_MAX here, the maximum timeout which we can | ||
337 | * return to user space in a timespec: | ||
338 | */ | ||
339 | if (res.tv64 < 0 || res.tv64 < lhs.tv64 || res.tv64 < rhs.tv64) | ||
340 | res = ktime_set(KTIME_SEC_MAX, 0); | ||
341 | |||
342 | return res; | ||
343 | } | ||
344 | |||
345 | /* | ||
329 | * Check, whether the timer is on the callback pending list | 346 | * Check, whether the timer is on the callback pending list |
330 | */ | 347 | */ |
331 | static inline int hrtimer_cb_pending(const struct hrtimer *timer) | 348 | static inline int hrtimer_cb_pending(const struct hrtimer *timer) |
@@ -425,6 +442,8 @@ static int hrtimer_reprogram(struct hrtimer *timer, | |||
425 | ktime_t expires = ktime_sub(timer->expires, base->offset); | 442 | ktime_t expires = ktime_sub(timer->expires, base->offset); |
426 | int res; | 443 | int res; |
427 | 444 | ||
445 | WARN_ON_ONCE(timer->expires.tv64 < 0); | ||
446 | |||
428 | /* | 447 | /* |
429 | * When the callback is running, we do not reprogram the clock event | 448 | * When the callback is running, we do not reprogram the clock event |
430 | * device. The timer callback is either running on a different CPU or | 449 | * device. The timer callback is either running on a different CPU or |
@@ -435,6 +454,15 @@ static int hrtimer_reprogram(struct hrtimer *timer, | |||
435 | if (hrtimer_callback_running(timer)) | 454 | if (hrtimer_callback_running(timer)) |
436 | return 0; | 455 | return 0; |
437 | 456 | ||
457 | /* | ||
458 | * CLOCK_REALTIME timer might be requested with an absolute | ||
459 | * expiry time which is less than base->offset. Nothing wrong | ||
460 | * about that, just avoid to call into the tick code, which | ||
461 | * has now objections against negative expiry values. | ||
462 | */ | ||
463 | if (expires.tv64 < 0) | ||
464 | return -ETIME; | ||
465 | |||
438 | if (expires.tv64 >= expires_next->tv64) | 466 | if (expires.tv64 >= expires_next->tv64) |
439 | return 0; | 467 | return 0; |
440 | 468 | ||
@@ -656,10 +684,9 @@ void unlock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags) | |||
656 | * Forward the timer expiry so it will expire in the future. | 684 | * Forward the timer expiry so it will expire in the future. |
657 | * Returns the number of overruns. | 685 | * Returns the number of overruns. |
658 | */ | 686 | */ |
659 | unsigned long | 687 | u64 hrtimer_forward(struct hrtimer *timer, ktime_t now, ktime_t interval) |
660 | hrtimer_forward(struct hrtimer *timer, ktime_t now, ktime_t interval) | ||
661 | { | 688 | { |
662 | unsigned long orun = 1; | 689 | u64 orun = 1; |
663 | ktime_t delta; | 690 | ktime_t delta; |
664 | 691 | ||
665 | delta = ktime_sub(now, timer->expires); | 692 | delta = ktime_sub(now, timer->expires); |
@@ -683,13 +710,7 @@ hrtimer_forward(struct hrtimer *timer, ktime_t now, ktime_t interval) | |||
683 | */ | 710 | */ |
684 | orun++; | 711 | orun++; |
685 | } | 712 | } |
686 | timer->expires = ktime_add(timer->expires, interval); | 713 | timer->expires = ktime_add_safe(timer->expires, interval); |
687 | /* | ||
688 | * Make sure, that the result did not wrap with a very large | ||
689 | * interval. | ||
690 | */ | ||
691 | if (timer->expires.tv64 < 0) | ||
692 | timer->expires = ktime_set(KTIME_SEC_MAX, 0); | ||
693 | 714 | ||
694 | return orun; | 715 | return orun; |
695 | } | 716 | } |
@@ -840,7 +861,7 @@ hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode) | |||
840 | new_base = switch_hrtimer_base(timer, base); | 861 | new_base = switch_hrtimer_base(timer, base); |
841 | 862 | ||
842 | if (mode == HRTIMER_MODE_REL) { | 863 | if (mode == HRTIMER_MODE_REL) { |
843 | tim = ktime_add(tim, new_base->get_time()); | 864 | tim = ktime_add_safe(tim, new_base->get_time()); |
844 | /* | 865 | /* |
845 | * CONFIG_TIME_LOW_RES is a temporary way for architectures | 866 | * CONFIG_TIME_LOW_RES is a temporary way for architectures |
846 | * to signal that they simply return xtime in | 867 | * to signal that they simply return xtime in |
@@ -849,16 +870,8 @@ hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode) | |||
849 | * timeouts. This will go away with the GTOD framework. | 870 | * timeouts. This will go away with the GTOD framework. |
850 | */ | 871 | */ |
851 | #ifdef CONFIG_TIME_LOW_RES | 872 | #ifdef CONFIG_TIME_LOW_RES |
852 | tim = ktime_add(tim, base->resolution); | 873 | tim = ktime_add_safe(tim, base->resolution); |
853 | #endif | 874 | #endif |
854 | /* | ||
855 | * Careful here: User space might have asked for a | ||
856 | * very long sleep, so the add above might result in a | ||
857 | * negative number, which enqueues the timer in front | ||
858 | * of the queue. | ||
859 | */ | ||
860 | if (tim.tv64 < 0) | ||
861 | tim.tv64 = KTIME_MAX; | ||
862 | } | 875 | } |
863 | timer->expires = tim; | 876 | timer->expires = tim; |
864 | 877 | ||
@@ -1320,13 +1333,26 @@ static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mod | |||
1320 | return t->task == NULL; | 1333 | return t->task == NULL; |
1321 | } | 1334 | } |
1322 | 1335 | ||
1336 | static int update_rmtp(struct hrtimer *timer, struct timespec __user *rmtp) | ||
1337 | { | ||
1338 | struct timespec rmt; | ||
1339 | ktime_t rem; | ||
1340 | |||
1341 | rem = ktime_sub(timer->expires, timer->base->get_time()); | ||
1342 | if (rem.tv64 <= 0) | ||
1343 | return 0; | ||
1344 | rmt = ktime_to_timespec(rem); | ||
1345 | |||
1346 | if (copy_to_user(rmtp, &rmt, sizeof(*rmtp))) | ||
1347 | return -EFAULT; | ||
1348 | |||
1349 | return 1; | ||
1350 | } | ||
1351 | |||
1323 | long __sched hrtimer_nanosleep_restart(struct restart_block *restart) | 1352 | long __sched hrtimer_nanosleep_restart(struct restart_block *restart) |
1324 | { | 1353 | { |
1325 | struct hrtimer_sleeper t; | 1354 | struct hrtimer_sleeper t; |
1326 | struct timespec *rmtp; | 1355 | struct timespec __user *rmtp; |
1327 | ktime_t time; | ||
1328 | |||
1329 | restart->fn = do_no_restart_syscall; | ||
1330 | 1356 | ||
1331 | hrtimer_init(&t.timer, restart->arg0, HRTIMER_MODE_ABS); | 1357 | hrtimer_init(&t.timer, restart->arg0, HRTIMER_MODE_ABS); |
1332 | t.timer.expires.tv64 = ((u64)restart->arg3 << 32) | (u64) restart->arg2; | 1358 | t.timer.expires.tv64 = ((u64)restart->arg3 << 32) | (u64) restart->arg2; |
@@ -1334,26 +1360,22 @@ long __sched hrtimer_nanosleep_restart(struct restart_block *restart) | |||
1334 | if (do_nanosleep(&t, HRTIMER_MODE_ABS)) | 1360 | if (do_nanosleep(&t, HRTIMER_MODE_ABS)) |
1335 | return 0; | 1361 | return 0; |
1336 | 1362 | ||
1337 | rmtp = (struct timespec *)restart->arg1; | 1363 | rmtp = (struct timespec __user *)restart->arg1; |
1338 | if (rmtp) { | 1364 | if (rmtp) { |
1339 | time = ktime_sub(t.timer.expires, t.timer.base->get_time()); | 1365 | int ret = update_rmtp(&t.timer, rmtp); |
1340 | if (time.tv64 <= 0) | 1366 | if (ret <= 0) |
1341 | return 0; | 1367 | return ret; |
1342 | *rmtp = ktime_to_timespec(time); | ||
1343 | } | 1368 | } |
1344 | 1369 | ||
1345 | restart->fn = hrtimer_nanosleep_restart; | ||
1346 | |||
1347 | /* The other values in restart are already filled in */ | 1370 | /* The other values in restart are already filled in */ |
1348 | return -ERESTART_RESTARTBLOCK; | 1371 | return -ERESTART_RESTARTBLOCK; |
1349 | } | 1372 | } |
1350 | 1373 | ||
1351 | long hrtimer_nanosleep(struct timespec *rqtp, struct timespec *rmtp, | 1374 | long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp, |
1352 | const enum hrtimer_mode mode, const clockid_t clockid) | 1375 | const enum hrtimer_mode mode, const clockid_t clockid) |
1353 | { | 1376 | { |
1354 | struct restart_block *restart; | 1377 | struct restart_block *restart; |
1355 | struct hrtimer_sleeper t; | 1378 | struct hrtimer_sleeper t; |
1356 | ktime_t rem; | ||
1357 | 1379 | ||
1358 | hrtimer_init(&t.timer, clockid, mode); | 1380 | hrtimer_init(&t.timer, clockid, mode); |
1359 | t.timer.expires = timespec_to_ktime(*rqtp); | 1381 | t.timer.expires = timespec_to_ktime(*rqtp); |
@@ -1365,10 +1387,9 @@ long hrtimer_nanosleep(struct timespec *rqtp, struct timespec *rmtp, | |||
1365 | return -ERESTARTNOHAND; | 1387 | return -ERESTARTNOHAND; |
1366 | 1388 | ||
1367 | if (rmtp) { | 1389 | if (rmtp) { |
1368 | rem = ktime_sub(t.timer.expires, t.timer.base->get_time()); | 1390 | int ret = update_rmtp(&t.timer, rmtp); |
1369 | if (rem.tv64 <= 0) | 1391 | if (ret <= 0) |
1370 | return 0; | 1392 | return ret; |
1371 | *rmtp = ktime_to_timespec(rem); | ||
1372 | } | 1393 | } |
1373 | 1394 | ||
1374 | restart = ¤t_thread_info()->restart_block; | 1395 | restart = ¤t_thread_info()->restart_block; |
@@ -1384,8 +1405,7 @@ long hrtimer_nanosleep(struct timespec *rqtp, struct timespec *rmtp, | |||
1384 | asmlinkage long | 1405 | asmlinkage long |
1385 | sys_nanosleep(struct timespec __user *rqtp, struct timespec __user *rmtp) | 1406 | sys_nanosleep(struct timespec __user *rqtp, struct timespec __user *rmtp) |
1386 | { | 1407 | { |
1387 | struct timespec tu, rmt; | 1408 | struct timespec tu; |
1388 | int ret; | ||
1389 | 1409 | ||
1390 | if (copy_from_user(&tu, rqtp, sizeof(tu))) | 1410 | if (copy_from_user(&tu, rqtp, sizeof(tu))) |
1391 | return -EFAULT; | 1411 | return -EFAULT; |
@@ -1393,15 +1413,7 @@ sys_nanosleep(struct timespec __user *rqtp, struct timespec __user *rmtp) | |||
1393 | if (!timespec_valid(&tu)) | 1413 | if (!timespec_valid(&tu)) |
1394 | return -EINVAL; | 1414 | return -EINVAL; |
1395 | 1415 | ||
1396 | ret = hrtimer_nanosleep(&tu, rmtp ? &rmt : NULL, HRTIMER_MODE_REL, | 1416 | return hrtimer_nanosleep(&tu, rmtp, HRTIMER_MODE_REL, CLOCK_MONOTONIC); |
1397 | CLOCK_MONOTONIC); | ||
1398 | |||
1399 | if (ret && rmtp) { | ||
1400 | if (copy_to_user(rmtp, &rmt, sizeof(*rmtp))) | ||
1401 | return -EFAULT; | ||
1402 | } | ||
1403 | |||
1404 | return ret; | ||
1405 | } | 1417 | } |
1406 | 1418 | ||
1407 | /* | 1419 | /* |
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index 44019ce30a14..cc54c6276356 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c | |||
@@ -286,7 +286,7 @@ static inline void mask_ack_irq(struct irq_desc *desc, int irq) | |||
286 | * Note: The caller is expected to handle the ack, clear, mask and | 286 | * Note: The caller is expected to handle the ack, clear, mask and |
287 | * unmask issues if necessary. | 287 | * unmask issues if necessary. |
288 | */ | 288 | */ |
289 | void fastcall | 289 | void |
290 | handle_simple_irq(unsigned int irq, struct irq_desc *desc) | 290 | handle_simple_irq(unsigned int irq, struct irq_desc *desc) |
291 | { | 291 | { |
292 | struct irqaction *action; | 292 | struct irqaction *action; |
@@ -327,7 +327,7 @@ out_unlock: | |||
327 | * it after the associated handler has acknowledged the device, so the | 327 | * it after the associated handler has acknowledged the device, so the |
328 | * interrupt line is back to inactive. | 328 | * interrupt line is back to inactive. |
329 | */ | 329 | */ |
330 | void fastcall | 330 | void |
331 | handle_level_irq(unsigned int irq, struct irq_desc *desc) | 331 | handle_level_irq(unsigned int irq, struct irq_desc *desc) |
332 | { | 332 | { |
333 | unsigned int cpu = smp_processor_id(); | 333 | unsigned int cpu = smp_processor_id(); |
@@ -375,7 +375,7 @@ out_unlock: | |||
375 | * for modern forms of interrupt handlers, which handle the flow | 375 | * for modern forms of interrupt handlers, which handle the flow |
376 | * details in hardware, transparently. | 376 | * details in hardware, transparently. |
377 | */ | 377 | */ |
378 | void fastcall | 378 | void |
379 | handle_fasteoi_irq(unsigned int irq, struct irq_desc *desc) | 379 | handle_fasteoi_irq(unsigned int irq, struct irq_desc *desc) |
380 | { | 380 | { |
381 | unsigned int cpu = smp_processor_id(); | 381 | unsigned int cpu = smp_processor_id(); |
@@ -434,7 +434,7 @@ out: | |||
434 | * the handler was running. If all pending interrupts are handled, the | 434 | * the handler was running. If all pending interrupts are handled, the |
435 | * loop is left. | 435 | * loop is left. |
436 | */ | 436 | */ |
437 | void fastcall | 437 | void |
438 | handle_edge_irq(unsigned int irq, struct irq_desc *desc) | 438 | handle_edge_irq(unsigned int irq, struct irq_desc *desc) |
439 | { | 439 | { |
440 | const unsigned int cpu = smp_processor_id(); | 440 | const unsigned int cpu = smp_processor_id(); |
@@ -505,7 +505,7 @@ out_unlock: | |||
505 | * | 505 | * |
506 | * Per CPU interrupts on SMP machines without locking requirements | 506 | * Per CPU interrupts on SMP machines without locking requirements |
507 | */ | 507 | */ |
508 | void fastcall | 508 | void |
509 | handle_percpu_irq(unsigned int irq, struct irq_desc *desc) | 509 | handle_percpu_irq(unsigned int irq, struct irq_desc *desc) |
510 | { | 510 | { |
511 | irqreturn_t action_ret; | 511 | irqreturn_t action_ret; |
@@ -589,3 +589,39 @@ set_irq_chip_and_handler_name(unsigned int irq, struct irq_chip *chip, | |||
589 | set_irq_chip(irq, chip); | 589 | set_irq_chip(irq, chip); |
590 | __set_irq_handler(irq, handle, 0, name); | 590 | __set_irq_handler(irq, handle, 0, name); |
591 | } | 591 | } |
592 | |||
593 | void __init set_irq_noprobe(unsigned int irq) | ||
594 | { | ||
595 | struct irq_desc *desc; | ||
596 | unsigned long flags; | ||
597 | |||
598 | if (irq >= NR_IRQS) { | ||
599 | printk(KERN_ERR "Trying to mark IRQ%d non-probeable\n", irq); | ||
600 | |||
601 | return; | ||
602 | } | ||
603 | |||
604 | desc = irq_desc + irq; | ||
605 | |||
606 | spin_lock_irqsave(&desc->lock, flags); | ||
607 | desc->status |= IRQ_NOPROBE; | ||
608 | spin_unlock_irqrestore(&desc->lock, flags); | ||
609 | } | ||
610 | |||
611 | void __init set_irq_probe(unsigned int irq) | ||
612 | { | ||
613 | struct irq_desc *desc; | ||
614 | unsigned long flags; | ||
615 | |||
616 | if (irq >= NR_IRQS) { | ||
617 | printk(KERN_ERR "Trying to mark IRQ%d probeable\n", irq); | ||
618 | |||
619 | return; | ||
620 | } | ||
621 | |||
622 | desc = irq_desc + irq; | ||
623 | |||
624 | spin_lock_irqsave(&desc->lock, flags); | ||
625 | desc->status &= ~IRQ_NOPROBE; | ||
626 | spin_unlock_irqrestore(&desc->lock, flags); | ||
627 | } | ||
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index dc335ad27525..5fa6198e9139 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c | |||
@@ -25,7 +25,7 @@ | |||
25 | * | 25 | * |
26 | * Handles spurious and unhandled IRQ's. It also prints a debugmessage. | 26 | * Handles spurious and unhandled IRQ's. It also prints a debugmessage. |
27 | */ | 27 | */ |
28 | void fastcall | 28 | void |
29 | handle_bad_irq(unsigned int irq, struct irq_desc *desc) | 29 | handle_bad_irq(unsigned int irq, struct irq_desc *desc) |
30 | { | 30 | { |
31 | print_irq_desc(irq, desc); | 31 | print_irq_desc(irq, desc); |
@@ -163,7 +163,7 @@ irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action) | |||
163 | * This is the original x86 implementation which is used for every | 163 | * This is the original x86 implementation which is used for every |
164 | * interrupt type. | 164 | * interrupt type. |
165 | */ | 165 | */ |
166 | fastcall unsigned int __do_IRQ(unsigned int irq) | 166 | unsigned int __do_IRQ(unsigned int irq) |
167 | { | 167 | { |
168 | struct irq_desc *desc = irq_desc + irq; | 168 | struct irq_desc *desc = irq_desc + irq; |
169 | struct irqaction *action; | 169 | struct irqaction *action; |
diff --git a/kernel/itimer.c b/kernel/itimer.c index 2fab344dbf56..ab982747d9bd 100644 --- a/kernel/itimer.c +++ b/kernel/itimer.c | |||
@@ -132,7 +132,7 @@ enum hrtimer_restart it_real_fn(struct hrtimer *timer) | |||
132 | struct signal_struct *sig = | 132 | struct signal_struct *sig = |
133 | container_of(timer, struct signal_struct, real_timer); | 133 | container_of(timer, struct signal_struct, real_timer); |
134 | 134 | ||
135 | send_group_sig_info(SIGALRM, SEND_SIG_PRIV, sig->tsk); | 135 | kill_pid_info(SIGALRM, SEND_SIG_PRIV, sig->leader_pid); |
136 | 136 | ||
137 | return HRTIMER_NORESTART; | 137 | return HRTIMER_NORESTART; |
138 | } | 138 | } |
diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c index 7dadc71ce516..f091d13def00 100644 --- a/kernel/kallsyms.c +++ b/kernel/kallsyms.c | |||
@@ -53,14 +53,6 @@ static inline int is_kernel_inittext(unsigned long addr) | |||
53 | return 0; | 53 | return 0; |
54 | } | 54 | } |
55 | 55 | ||
56 | static inline int is_kernel_extratext(unsigned long addr) | ||
57 | { | ||
58 | if (addr >= (unsigned long)_sextratext | ||
59 | && addr <= (unsigned long)_eextratext) | ||
60 | return 1; | ||
61 | return 0; | ||
62 | } | ||
63 | |||
64 | static inline int is_kernel_text(unsigned long addr) | 56 | static inline int is_kernel_text(unsigned long addr) |
65 | { | 57 | { |
66 | if (addr >= (unsigned long)_stext && addr <= (unsigned long)_etext) | 58 | if (addr >= (unsigned long)_stext && addr <= (unsigned long)_etext) |
@@ -80,8 +72,7 @@ static int is_ksym_addr(unsigned long addr) | |||
80 | if (all_var) | 72 | if (all_var) |
81 | return is_kernel(addr); | 73 | return is_kernel(addr); |
82 | 74 | ||
83 | return is_kernel_text(addr) || is_kernel_inittext(addr) || | 75 | return is_kernel_text(addr) || is_kernel_inittext(addr); |
84 | is_kernel_extratext(addr); | ||
85 | } | 76 | } |
86 | 77 | ||
87 | /* expand a compressed symbol data into the resulting uncompressed string, | 78 | /* expand a compressed symbol data into the resulting uncompressed string, |
diff --git a/kernel/kexec.c b/kernel/kexec.c index 9a26eec9eb04..06a0e2775651 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c | |||
@@ -1361,8 +1361,8 @@ unsigned long __attribute__ ((weak)) paddr_vmcoreinfo_note(void) | |||
1361 | 1361 | ||
1362 | static int __init crash_save_vmcoreinfo_init(void) | 1362 | static int __init crash_save_vmcoreinfo_init(void) |
1363 | { | 1363 | { |
1364 | vmcoreinfo_append_str("OSRELEASE=%s\n", init_uts_ns.name.release); | 1364 | VMCOREINFO_OSRELEASE(init_uts_ns.name.release); |
1365 | vmcoreinfo_append_str("PAGESIZE=%ld\n", PAGE_SIZE); | 1365 | VMCOREINFO_PAGESIZE(PAGE_SIZE); |
1366 | 1366 | ||
1367 | VMCOREINFO_SYMBOL(init_uts_ns); | 1367 | VMCOREINFO_SYMBOL(init_uts_ns); |
1368 | VMCOREINFO_SYMBOL(node_online_map); | 1368 | VMCOREINFO_SYMBOL(node_online_map); |
@@ -1376,15 +1376,15 @@ static int __init crash_save_vmcoreinfo_init(void) | |||
1376 | #ifdef CONFIG_SPARSEMEM | 1376 | #ifdef CONFIG_SPARSEMEM |
1377 | VMCOREINFO_SYMBOL(mem_section); | 1377 | VMCOREINFO_SYMBOL(mem_section); |
1378 | VMCOREINFO_LENGTH(mem_section, NR_SECTION_ROOTS); | 1378 | VMCOREINFO_LENGTH(mem_section, NR_SECTION_ROOTS); |
1379 | VMCOREINFO_SIZE(mem_section); | 1379 | VMCOREINFO_STRUCT_SIZE(mem_section); |
1380 | VMCOREINFO_OFFSET(mem_section, section_mem_map); | 1380 | VMCOREINFO_OFFSET(mem_section, section_mem_map); |
1381 | #endif | 1381 | #endif |
1382 | VMCOREINFO_SIZE(page); | 1382 | VMCOREINFO_STRUCT_SIZE(page); |
1383 | VMCOREINFO_SIZE(pglist_data); | 1383 | VMCOREINFO_STRUCT_SIZE(pglist_data); |
1384 | VMCOREINFO_SIZE(zone); | 1384 | VMCOREINFO_STRUCT_SIZE(zone); |
1385 | VMCOREINFO_SIZE(free_area); | 1385 | VMCOREINFO_STRUCT_SIZE(free_area); |
1386 | VMCOREINFO_SIZE(list_head); | 1386 | VMCOREINFO_STRUCT_SIZE(list_head); |
1387 | VMCOREINFO_TYPEDEF_SIZE(nodemask_t); | 1387 | VMCOREINFO_SIZE(nodemask_t); |
1388 | VMCOREINFO_OFFSET(page, flags); | 1388 | VMCOREINFO_OFFSET(page, flags); |
1389 | VMCOREINFO_OFFSET(page, _count); | 1389 | VMCOREINFO_OFFSET(page, _count); |
1390 | VMCOREINFO_OFFSET(page, mapping); | 1390 | VMCOREINFO_OFFSET(page, mapping); |
diff --git a/kernel/kmod.c b/kernel/kmod.c index bb7df2a28bd7..22be3ff3f363 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c | |||
@@ -173,10 +173,7 @@ static int ____call_usermodehelper(void *data) | |||
173 | */ | 173 | */ |
174 | set_user_nice(current, 0); | 174 | set_user_nice(current, 0); |
175 | 175 | ||
176 | retval = -EPERM; | 176 | retval = kernel_execve(sub_info->path, sub_info->argv, sub_info->envp); |
177 | if (current->fs->root) | ||
178 | retval = kernel_execve(sub_info->path, | ||
179 | sub_info->argv, sub_info->envp); | ||
180 | 177 | ||
181 | /* Exec failed? */ | 178 | /* Exec failed? */ |
182 | sub_info->retval = retval; | 179 | sub_info->retval = retval; |
diff --git a/kernel/kprobes.c b/kernel/kprobes.c index d0493eafea3e..7a86e6432338 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c | |||
@@ -699,6 +699,12 @@ static int __kprobes pre_handler_kretprobe(struct kprobe *p, | |||
699 | struct kretprobe_instance, uflist); | 699 | struct kretprobe_instance, uflist); |
700 | ri->rp = rp; | 700 | ri->rp = rp; |
701 | ri->task = current; | 701 | ri->task = current; |
702 | |||
703 | if (rp->entry_handler && rp->entry_handler(ri, regs)) { | ||
704 | spin_unlock_irqrestore(&kretprobe_lock, flags); | ||
705 | return 0; | ||
706 | } | ||
707 | |||
702 | arch_prepare_kretprobe(ri, regs); | 708 | arch_prepare_kretprobe(ri, regs); |
703 | 709 | ||
704 | /* XXX(hch): why is there no hlist_move_head? */ | 710 | /* XXX(hch): why is there no hlist_move_head? */ |
@@ -745,7 +751,8 @@ int __kprobes register_kretprobe(struct kretprobe *rp) | |||
745 | INIT_HLIST_HEAD(&rp->used_instances); | 751 | INIT_HLIST_HEAD(&rp->used_instances); |
746 | INIT_HLIST_HEAD(&rp->free_instances); | 752 | INIT_HLIST_HEAD(&rp->free_instances); |
747 | for (i = 0; i < rp->maxactive; i++) { | 753 | for (i = 0; i < rp->maxactive; i++) { |
748 | inst = kmalloc(sizeof(struct kretprobe_instance), GFP_KERNEL); | 754 | inst = kmalloc(sizeof(struct kretprobe_instance) + |
755 | rp->data_size, GFP_KERNEL); | ||
749 | if (inst == NULL) { | 756 | if (inst == NULL) { |
750 | free_rp_inst(rp); | 757 | free_rp_inst(rp); |
751 | return -ENOMEM; | 758 | return -ENOMEM; |
diff --git a/kernel/latency.c b/kernel/latency.c deleted file mode 100644 index e63fcacb61a7..000000000000 --- a/kernel/latency.c +++ /dev/null | |||
@@ -1,280 +0,0 @@ | |||
1 | /* | ||
2 | * latency.c: Explicit system-wide latency-expectation infrastructure | ||
3 | * | ||
4 | * The purpose of this infrastructure is to allow device drivers to set | ||
5 | * latency constraint they have and to collect and summarize these | ||
6 | * expectations globally. The cummulated result can then be used by | ||
7 | * power management and similar users to make decisions that have | ||
8 | * tradoffs with a latency component. | ||
9 | * | ||
10 | * An example user of this are the x86 C-states; each higher C state saves | ||
11 | * more power, but has a higher exit latency. For the idle loop power | ||
12 | * code to make a good decision which C-state to use, information about | ||
13 | * acceptable latencies is required. | ||
14 | * | ||
15 | * An example announcer of latency is an audio driver that knowns it | ||
16 | * will get an interrupt when the hardware has 200 usec of samples | ||
17 | * left in the DMA buffer; in that case the driver can set a latency | ||
18 | * constraint of, say, 150 usec. | ||
19 | * | ||
20 | * Multiple drivers can each announce their maximum accepted latency, | ||
21 | * to keep these appart, a string based identifier is used. | ||
22 | * | ||
23 | * | ||
24 | * (C) Copyright 2006 Intel Corporation | ||
25 | * Author: Arjan van de Ven <arjan@linux.intel.com> | ||
26 | * | ||
27 | * This program is free software; you can redistribute it and/or | ||
28 | * modify it under the terms of the GNU General Public License | ||
29 | * as published by the Free Software Foundation; version 2 | ||
30 | * of the License. | ||
31 | */ | ||
32 | |||
33 | #include <linux/latency.h> | ||
34 | #include <linux/list.h> | ||
35 | #include <linux/spinlock.h> | ||
36 | #include <linux/slab.h> | ||
37 | #include <linux/module.h> | ||
38 | #include <linux/notifier.h> | ||
39 | #include <linux/jiffies.h> | ||
40 | #include <asm/atomic.h> | ||
41 | |||
42 | struct latency_info { | ||
43 | struct list_head list; | ||
44 | int usecs; | ||
45 | char *identifier; | ||
46 | }; | ||
47 | |||
48 | /* | ||
49 | * locking rule: all modifications to current_max_latency and | ||
50 | * latency_list need to be done while holding the latency_lock. | ||
51 | * latency_lock needs to be taken _irqsave. | ||
52 | */ | ||
53 | static atomic_t current_max_latency; | ||
54 | static DEFINE_SPINLOCK(latency_lock); | ||
55 | |||
56 | static LIST_HEAD(latency_list); | ||
57 | static BLOCKING_NOTIFIER_HEAD(latency_notifier); | ||
58 | |||
59 | /* | ||
60 | * This function returns the maximum latency allowed, which | ||
61 | * happens to be the minimum of all maximum latencies on the | ||
62 | * list. | ||
63 | */ | ||
64 | static int __find_max_latency(void) | ||
65 | { | ||
66 | int min = INFINITE_LATENCY; | ||
67 | struct latency_info *info; | ||
68 | |||
69 | list_for_each_entry(info, &latency_list, list) { | ||
70 | if (info->usecs < min) | ||
71 | min = info->usecs; | ||
72 | } | ||
73 | return min; | ||
74 | } | ||
75 | |||
76 | /** | ||
77 | * set_acceptable_latency - sets the maximum latency acceptable | ||
78 | * @identifier: string that identifies this driver | ||
79 | * @usecs: maximum acceptable latency for this driver | ||
80 | * | ||
81 | * This function informs the kernel that this device(driver) | ||
82 | * can accept at most usecs latency. This setting is used for | ||
83 | * power management and similar tradeoffs. | ||
84 | * | ||
85 | * This function sleeps and can only be called from process | ||
86 | * context. | ||
87 | * Calling this function with an existing identifier is valid | ||
88 | * and will cause the existing latency setting to be changed. | ||
89 | */ | ||
90 | void set_acceptable_latency(char *identifier, int usecs) | ||
91 | { | ||
92 | struct latency_info *info, *iter; | ||
93 | unsigned long flags; | ||
94 | int found_old = 0; | ||
95 | |||
96 | info = kzalloc(sizeof(struct latency_info), GFP_KERNEL); | ||
97 | if (!info) | ||
98 | return; | ||
99 | info->usecs = usecs; | ||
100 | info->identifier = kstrdup(identifier, GFP_KERNEL); | ||
101 | if (!info->identifier) | ||
102 | goto free_info; | ||
103 | |||
104 | spin_lock_irqsave(&latency_lock, flags); | ||
105 | list_for_each_entry(iter, &latency_list, list) { | ||
106 | if (strcmp(iter->identifier, identifier)==0) { | ||
107 | found_old = 1; | ||
108 | iter->usecs = usecs; | ||
109 | break; | ||
110 | } | ||
111 | } | ||
112 | if (!found_old) | ||
113 | list_add(&info->list, &latency_list); | ||
114 | |||
115 | if (usecs < atomic_read(¤t_max_latency)) | ||
116 | atomic_set(¤t_max_latency, usecs); | ||
117 | |||
118 | spin_unlock_irqrestore(&latency_lock, flags); | ||
119 | |||
120 | blocking_notifier_call_chain(&latency_notifier, | ||
121 | atomic_read(¤t_max_latency), NULL); | ||
122 | |||
123 | /* | ||
124 | * if we inserted the new one, we're done; otherwise there was | ||
125 | * an existing one so we need to free the redundant data | ||
126 | */ | ||
127 | if (!found_old) | ||
128 | return; | ||
129 | |||
130 | kfree(info->identifier); | ||
131 | free_info: | ||
132 | kfree(info); | ||
133 | } | ||
134 | EXPORT_SYMBOL_GPL(set_acceptable_latency); | ||
135 | |||
136 | /** | ||
137 | * modify_acceptable_latency - changes the maximum latency acceptable | ||
138 | * @identifier: string that identifies this driver | ||
139 | * @usecs: maximum acceptable latency for this driver | ||
140 | * | ||
141 | * This function informs the kernel that this device(driver) | ||
142 | * can accept at most usecs latency. This setting is used for | ||
143 | * power management and similar tradeoffs. | ||
144 | * | ||
145 | * This function does not sleep and can be called in any context. | ||
146 | * Trying to use a non-existing identifier silently gets ignored. | ||
147 | * | ||
148 | * Due to the atomic nature of this function, the modified latency | ||
149 | * value will only be used for future decisions; past decisions | ||
150 | * can still lead to longer latencies in the near future. | ||
151 | */ | ||
152 | void modify_acceptable_latency(char *identifier, int usecs) | ||
153 | { | ||
154 | struct latency_info *iter; | ||
155 | unsigned long flags; | ||
156 | |||
157 | spin_lock_irqsave(&latency_lock, flags); | ||
158 | list_for_each_entry(iter, &latency_list, list) { | ||
159 | if (strcmp(iter->identifier, identifier) == 0) { | ||
160 | iter->usecs = usecs; | ||
161 | break; | ||
162 | } | ||
163 | } | ||
164 | if (usecs < atomic_read(¤t_max_latency)) | ||
165 | atomic_set(¤t_max_latency, usecs); | ||
166 | spin_unlock_irqrestore(&latency_lock, flags); | ||
167 | } | ||
168 | EXPORT_SYMBOL_GPL(modify_acceptable_latency); | ||
169 | |||
170 | /** | ||
171 | * remove_acceptable_latency - removes the maximum latency acceptable | ||
172 | * @identifier: string that identifies this driver | ||
173 | * | ||
174 | * This function removes a previously set maximum latency setting | ||
175 | * for the driver and frees up any resources associated with the | ||
176 | * bookkeeping needed for this. | ||
177 | * | ||
178 | * This function does not sleep and can be called in any context. | ||
179 | * Trying to use a non-existing identifier silently gets ignored. | ||
180 | */ | ||
181 | void remove_acceptable_latency(char *identifier) | ||
182 | { | ||
183 | unsigned long flags; | ||
184 | int newmax = 0; | ||
185 | struct latency_info *iter, *temp; | ||
186 | |||
187 | spin_lock_irqsave(&latency_lock, flags); | ||
188 | |||
189 | list_for_each_entry_safe(iter, temp, &latency_list, list) { | ||
190 | if (strcmp(iter->identifier, identifier) == 0) { | ||
191 | list_del(&iter->list); | ||
192 | newmax = iter->usecs; | ||
193 | kfree(iter->identifier); | ||
194 | kfree(iter); | ||
195 | break; | ||
196 | } | ||
197 | } | ||
198 | |||
199 | /* If we just deleted the system wide value, we need to | ||
200 | * recalculate with a full search | ||
201 | */ | ||
202 | if (newmax == atomic_read(¤t_max_latency)) { | ||
203 | newmax = __find_max_latency(); | ||
204 | atomic_set(¤t_max_latency, newmax); | ||
205 | } | ||
206 | spin_unlock_irqrestore(&latency_lock, flags); | ||
207 | } | ||
208 | EXPORT_SYMBOL_GPL(remove_acceptable_latency); | ||
209 | |||
210 | /** | ||
211 | * system_latency_constraint - queries the system wide latency maximum | ||
212 | * | ||
213 | * This function returns the system wide maximum latency in | ||
214 | * microseconds. | ||
215 | * | ||
216 | * This function does not sleep and can be called in any context. | ||
217 | */ | ||
218 | int system_latency_constraint(void) | ||
219 | { | ||
220 | return atomic_read(¤t_max_latency); | ||
221 | } | ||
222 | EXPORT_SYMBOL_GPL(system_latency_constraint); | ||
223 | |||
224 | /** | ||
225 | * synchronize_acceptable_latency - recalculates all latency decisions | ||
226 | * | ||
227 | * This function will cause a callback to various kernel pieces that | ||
228 | * will make those pieces rethink their latency decisions. This implies | ||
229 | * that if there are overlong latencies in hardware state already, those | ||
230 | * latencies get taken right now. When this call completes no overlong | ||
231 | * latency decisions should be active anymore. | ||
232 | * | ||
233 | * Typical usecase of this is after a modify_acceptable_latency() call, | ||
234 | * which in itself is non-blocking and non-synchronizing. | ||
235 | * | ||
236 | * This function blocks and should not be called with locks held. | ||
237 | */ | ||
238 | |||
239 | void synchronize_acceptable_latency(void) | ||
240 | { | ||
241 | blocking_notifier_call_chain(&latency_notifier, | ||
242 | atomic_read(¤t_max_latency), NULL); | ||
243 | } | ||
244 | EXPORT_SYMBOL_GPL(synchronize_acceptable_latency); | ||
245 | |||
246 | /* | ||
247 | * Latency notifier: this notifier gets called when a non-atomic new | ||
248 | * latency value gets set. The expectation nof the caller of the | ||
249 | * non-atomic set is that when the call returns, future latencies | ||
250 | * are within bounds, so the functions on the notifier list are | ||
251 | * expected to take the overlong latencies immediately, inside the | ||
252 | * callback, and not make a overlong latency decision anymore. | ||
253 | * | ||
254 | * The callback gets called when the new latency value is made | ||
255 | * active so system_latency_constraint() returns the new latency. | ||
256 | */ | ||
257 | int register_latency_notifier(struct notifier_block * nb) | ||
258 | { | ||
259 | return blocking_notifier_chain_register(&latency_notifier, nb); | ||
260 | } | ||
261 | EXPORT_SYMBOL_GPL(register_latency_notifier); | ||
262 | |||
263 | int unregister_latency_notifier(struct notifier_block * nb) | ||
264 | { | ||
265 | return blocking_notifier_chain_unregister(&latency_notifier, nb); | ||
266 | } | ||
267 | EXPORT_SYMBOL_GPL(unregister_latency_notifier); | ||
268 | |||
269 | static __init int latency_init(void) | ||
270 | { | ||
271 | atomic_set(¤t_max_latency, INFINITE_LATENCY); | ||
272 | /* | ||
273 | * we don't want by default to have longer latencies than 2 ticks, | ||
274 | * since that would cause lost ticks | ||
275 | */ | ||
276 | set_acceptable_latency("kernel", 2*1000000/HZ); | ||
277 | return 0; | ||
278 | } | ||
279 | |||
280 | module_init(latency_init); | ||
diff --git a/kernel/marker.c b/kernel/marker.c index 5323cfaedbce..c4c2cd8b61f5 100644 --- a/kernel/marker.c +++ b/kernel/marker.c | |||
@@ -27,35 +27,42 @@ | |||
27 | extern struct marker __start___markers[]; | 27 | extern struct marker __start___markers[]; |
28 | extern struct marker __stop___markers[]; | 28 | extern struct marker __stop___markers[]; |
29 | 29 | ||
30 | /* Set to 1 to enable marker debug output */ | ||
31 | const int marker_debug; | ||
32 | |||
30 | /* | 33 | /* |
31 | * markers_mutex nests inside module_mutex. Markers mutex protects the builtin | 34 | * markers_mutex nests inside module_mutex. Markers mutex protects the builtin |
32 | * and module markers, the hash table and deferred_sync. | 35 | * and module markers and the hash table. |
33 | */ | 36 | */ |
34 | static DEFINE_MUTEX(markers_mutex); | 37 | static DEFINE_MUTEX(markers_mutex); |
35 | 38 | ||
36 | /* | 39 | /* |
37 | * Marker deferred synchronization. | ||
38 | * Upon marker probe_unregister, we delay call to synchronize_sched() to | ||
39 | * accelerate mass unregistration (only when there is no more reference to a | ||
40 | * given module do we call synchronize_sched()). However, we need to make sure | ||
41 | * every critical region has ended before we re-arm a marker that has been | ||
42 | * unregistered and then registered back with a different probe data. | ||
43 | */ | ||
44 | static int deferred_sync; | ||
45 | |||
46 | /* | ||
47 | * Marker hash table, containing the active markers. | 40 | * Marker hash table, containing the active markers. |
48 | * Protected by module_mutex. | 41 | * Protected by module_mutex. |
49 | */ | 42 | */ |
50 | #define MARKER_HASH_BITS 6 | 43 | #define MARKER_HASH_BITS 6 |
51 | #define MARKER_TABLE_SIZE (1 << MARKER_HASH_BITS) | 44 | #define MARKER_TABLE_SIZE (1 << MARKER_HASH_BITS) |
52 | 45 | ||
46 | /* | ||
47 | * Note about RCU : | ||
48 | * It is used to make sure every handler has finished using its private data | ||
49 | * between two consecutive operation (add or remove) on a given marker. It is | ||
50 | * also used to delay the free of multiple probes array until a quiescent state | ||
51 | * is reached. | ||
52 | * marker entries modifications are protected by the markers_mutex. | ||
53 | */ | ||
53 | struct marker_entry { | 54 | struct marker_entry { |
54 | struct hlist_node hlist; | 55 | struct hlist_node hlist; |
55 | char *format; | 56 | char *format; |
56 | marker_probe_func *probe; | 57 | void (*call)(const struct marker *mdata, /* Probe wrapper */ |
57 | void *private; | 58 | void *call_private, const char *fmt, ...); |
59 | struct marker_probe_closure single; | ||
60 | struct marker_probe_closure *multi; | ||
58 | int refcount; /* Number of times armed. 0 if disarmed. */ | 61 | int refcount; /* Number of times armed. 0 if disarmed. */ |
62 | struct rcu_head rcu; | ||
63 | void *oldptr; | ||
64 | char rcu_pending:1; | ||
65 | char ptype:1; | ||
59 | char name[0]; /* Contains name'\0'format'\0' */ | 66 | char name[0]; /* Contains name'\0'format'\0' */ |
60 | }; | 67 | }; |
61 | 68 | ||
@@ -63,7 +70,8 @@ static struct hlist_head marker_table[MARKER_TABLE_SIZE]; | |||
63 | 70 | ||
64 | /** | 71 | /** |
65 | * __mark_empty_function - Empty probe callback | 72 | * __mark_empty_function - Empty probe callback |
66 | * @mdata: pointer of type const struct marker | 73 | * @probe_private: probe private data |
74 | * @call_private: call site private data | ||
67 | * @fmt: format string | 75 | * @fmt: format string |
68 | * @...: variable argument list | 76 | * @...: variable argument list |
69 | * | 77 | * |
@@ -72,13 +80,267 @@ static struct hlist_head marker_table[MARKER_TABLE_SIZE]; | |||
72 | * though the function pointer change and the marker enabling are two distinct | 80 | * though the function pointer change and the marker enabling are two distinct |
73 | * operations that modifies the execution flow of preemptible code. | 81 | * operations that modifies the execution flow of preemptible code. |
74 | */ | 82 | */ |
75 | void __mark_empty_function(const struct marker *mdata, void *private, | 83 | void __mark_empty_function(void *probe_private, void *call_private, |
76 | const char *fmt, ...) | 84 | const char *fmt, va_list *args) |
77 | { | 85 | { |
78 | } | 86 | } |
79 | EXPORT_SYMBOL_GPL(__mark_empty_function); | 87 | EXPORT_SYMBOL_GPL(__mark_empty_function); |
80 | 88 | ||
81 | /* | 89 | /* |
90 | * marker_probe_cb Callback that prepares the variable argument list for probes. | ||
91 | * @mdata: pointer of type struct marker | ||
92 | * @call_private: caller site private data | ||
93 | * @fmt: format string | ||
94 | * @...: Variable argument list. | ||
95 | * | ||
96 | * Since we do not use "typical" pointer based RCU in the 1 argument case, we | ||
97 | * need to put a full smp_rmb() in this branch. This is why we do not use | ||
98 | * rcu_dereference() for the pointer read. | ||
99 | */ | ||
100 | void marker_probe_cb(const struct marker *mdata, void *call_private, | ||
101 | const char *fmt, ...) | ||
102 | { | ||
103 | va_list args; | ||
104 | char ptype; | ||
105 | |||
106 | /* | ||
107 | * disabling preemption to make sure the teardown of the callbacks can | ||
108 | * be done correctly when they are in modules and they insure RCU read | ||
109 | * coherency. | ||
110 | */ | ||
111 | preempt_disable(); | ||
112 | ptype = ACCESS_ONCE(mdata->ptype); | ||
113 | if (likely(!ptype)) { | ||
114 | marker_probe_func *func; | ||
115 | /* Must read the ptype before ptr. They are not data dependant, | ||
116 | * so we put an explicit smp_rmb() here. */ | ||
117 | smp_rmb(); | ||
118 | func = ACCESS_ONCE(mdata->single.func); | ||
119 | /* Must read the ptr before private data. They are not data | ||
120 | * dependant, so we put an explicit smp_rmb() here. */ | ||
121 | smp_rmb(); | ||
122 | va_start(args, fmt); | ||
123 | func(mdata->single.probe_private, call_private, fmt, &args); | ||
124 | va_end(args); | ||
125 | } else { | ||
126 | struct marker_probe_closure *multi; | ||
127 | int i; | ||
128 | /* | ||
129 | * multi points to an array, therefore accessing the array | ||
130 | * depends on reading multi. However, even in this case, | ||
131 | * we must insure that the pointer is read _before_ the array | ||
132 | * data. Same as rcu_dereference, but we need a full smp_rmb() | ||
133 | * in the fast path, so put the explicit barrier here. | ||
134 | */ | ||
135 | smp_read_barrier_depends(); | ||
136 | multi = ACCESS_ONCE(mdata->multi); | ||
137 | for (i = 0; multi[i].func; i++) { | ||
138 | va_start(args, fmt); | ||
139 | multi[i].func(multi[i].probe_private, call_private, fmt, | ||
140 | &args); | ||
141 | va_end(args); | ||
142 | } | ||
143 | } | ||
144 | preempt_enable(); | ||
145 | } | ||
146 | EXPORT_SYMBOL_GPL(marker_probe_cb); | ||
147 | |||
148 | /* | ||
149 | * marker_probe_cb Callback that does not prepare the variable argument list. | ||
150 | * @mdata: pointer of type struct marker | ||
151 | * @call_private: caller site private data | ||
152 | * @fmt: format string | ||
153 | * @...: Variable argument list. | ||
154 | * | ||
155 | * Should be connected to markers "MARK_NOARGS". | ||
156 | */ | ||
157 | void marker_probe_cb_noarg(const struct marker *mdata, | ||
158 | void *call_private, const char *fmt, ...) | ||
159 | { | ||
160 | va_list args; /* not initialized */ | ||
161 | char ptype; | ||
162 | |||
163 | preempt_disable(); | ||
164 | ptype = ACCESS_ONCE(mdata->ptype); | ||
165 | if (likely(!ptype)) { | ||
166 | marker_probe_func *func; | ||
167 | /* Must read the ptype before ptr. They are not data dependant, | ||
168 | * so we put an explicit smp_rmb() here. */ | ||
169 | smp_rmb(); | ||
170 | func = ACCESS_ONCE(mdata->single.func); | ||
171 | /* Must read the ptr before private data. They are not data | ||
172 | * dependant, so we put an explicit smp_rmb() here. */ | ||
173 | smp_rmb(); | ||
174 | func(mdata->single.probe_private, call_private, fmt, &args); | ||
175 | } else { | ||
176 | struct marker_probe_closure *multi; | ||
177 | int i; | ||
178 | /* | ||
179 | * multi points to an array, therefore accessing the array | ||
180 | * depends on reading multi. However, even in this case, | ||
181 | * we must insure that the pointer is read _before_ the array | ||
182 | * data. Same as rcu_dereference, but we need a full smp_rmb() | ||
183 | * in the fast path, so put the explicit barrier here. | ||
184 | */ | ||
185 | smp_read_barrier_depends(); | ||
186 | multi = ACCESS_ONCE(mdata->multi); | ||
187 | for (i = 0; multi[i].func; i++) | ||
188 | multi[i].func(multi[i].probe_private, call_private, fmt, | ||
189 | &args); | ||
190 | } | ||
191 | preempt_enable(); | ||
192 | } | ||
193 | EXPORT_SYMBOL_GPL(marker_probe_cb_noarg); | ||
194 | |||
195 | static void free_old_closure(struct rcu_head *head) | ||
196 | { | ||
197 | struct marker_entry *entry = container_of(head, | ||
198 | struct marker_entry, rcu); | ||
199 | kfree(entry->oldptr); | ||
200 | /* Make sure we free the data before setting the pending flag to 0 */ | ||
201 | smp_wmb(); | ||
202 | entry->rcu_pending = 0; | ||
203 | } | ||
204 | |||
205 | static void debug_print_probes(struct marker_entry *entry) | ||
206 | { | ||
207 | int i; | ||
208 | |||
209 | if (!marker_debug) | ||
210 | return; | ||
211 | |||
212 | if (!entry->ptype) { | ||
213 | printk(KERN_DEBUG "Single probe : %p %p\n", | ||
214 | entry->single.func, | ||
215 | entry->single.probe_private); | ||
216 | } else { | ||
217 | for (i = 0; entry->multi[i].func; i++) | ||
218 | printk(KERN_DEBUG "Multi probe %d : %p %p\n", i, | ||
219 | entry->multi[i].func, | ||
220 | entry->multi[i].probe_private); | ||
221 | } | ||
222 | } | ||
223 | |||
224 | static struct marker_probe_closure * | ||
225 | marker_entry_add_probe(struct marker_entry *entry, | ||
226 | marker_probe_func *probe, void *probe_private) | ||
227 | { | ||
228 | int nr_probes = 0; | ||
229 | struct marker_probe_closure *old, *new; | ||
230 | |||
231 | WARN_ON(!probe); | ||
232 | |||
233 | debug_print_probes(entry); | ||
234 | old = entry->multi; | ||
235 | if (!entry->ptype) { | ||
236 | if (entry->single.func == probe && | ||
237 | entry->single.probe_private == probe_private) | ||
238 | return ERR_PTR(-EBUSY); | ||
239 | if (entry->single.func == __mark_empty_function) { | ||
240 | /* 0 -> 1 probes */ | ||
241 | entry->single.func = probe; | ||
242 | entry->single.probe_private = probe_private; | ||
243 | entry->refcount = 1; | ||
244 | entry->ptype = 0; | ||
245 | debug_print_probes(entry); | ||
246 | return NULL; | ||
247 | } else { | ||
248 | /* 1 -> 2 probes */ | ||
249 | nr_probes = 1; | ||
250 | old = NULL; | ||
251 | } | ||
252 | } else { | ||
253 | /* (N -> N+1), (N != 0, 1) probes */ | ||
254 | for (nr_probes = 0; old[nr_probes].func; nr_probes++) | ||
255 | if (old[nr_probes].func == probe | ||
256 | && old[nr_probes].probe_private | ||
257 | == probe_private) | ||
258 | return ERR_PTR(-EBUSY); | ||
259 | } | ||
260 | /* + 2 : one for new probe, one for NULL func */ | ||
261 | new = kzalloc((nr_probes + 2) * sizeof(struct marker_probe_closure), | ||
262 | GFP_KERNEL); | ||
263 | if (new == NULL) | ||
264 | return ERR_PTR(-ENOMEM); | ||
265 | if (!old) | ||
266 | new[0] = entry->single; | ||
267 | else | ||
268 | memcpy(new, old, | ||
269 | nr_probes * sizeof(struct marker_probe_closure)); | ||
270 | new[nr_probes].func = probe; | ||
271 | new[nr_probes].probe_private = probe_private; | ||
272 | entry->refcount = nr_probes + 1; | ||
273 | entry->multi = new; | ||
274 | entry->ptype = 1; | ||
275 | debug_print_probes(entry); | ||
276 | return old; | ||
277 | } | ||
278 | |||
279 | static struct marker_probe_closure * | ||
280 | marker_entry_remove_probe(struct marker_entry *entry, | ||
281 | marker_probe_func *probe, void *probe_private) | ||
282 | { | ||
283 | int nr_probes = 0, nr_del = 0, i; | ||
284 | struct marker_probe_closure *old, *new; | ||
285 | |||
286 | old = entry->multi; | ||
287 | |||
288 | debug_print_probes(entry); | ||
289 | if (!entry->ptype) { | ||
290 | /* 0 -> N is an error */ | ||
291 | WARN_ON(entry->single.func == __mark_empty_function); | ||
292 | /* 1 -> 0 probes */ | ||
293 | WARN_ON(probe && entry->single.func != probe); | ||
294 | WARN_ON(entry->single.probe_private != probe_private); | ||
295 | entry->single.func = __mark_empty_function; | ||
296 | entry->refcount = 0; | ||
297 | entry->ptype = 0; | ||
298 | debug_print_probes(entry); | ||
299 | return NULL; | ||
300 | } else { | ||
301 | /* (N -> M), (N > 1, M >= 0) probes */ | ||
302 | for (nr_probes = 0; old[nr_probes].func; nr_probes++) { | ||
303 | if ((!probe || old[nr_probes].func == probe) | ||
304 | && old[nr_probes].probe_private | ||
305 | == probe_private) | ||
306 | nr_del++; | ||
307 | } | ||
308 | } | ||
309 | |||
310 | if (nr_probes - nr_del == 0) { | ||
311 | /* N -> 0, (N > 1) */ | ||
312 | entry->single.func = __mark_empty_function; | ||
313 | entry->refcount = 0; | ||
314 | entry->ptype = 0; | ||
315 | } else if (nr_probes - nr_del == 1) { | ||
316 | /* N -> 1, (N > 1) */ | ||
317 | for (i = 0; old[i].func; i++) | ||
318 | if ((probe && old[i].func != probe) || | ||
319 | old[i].probe_private != probe_private) | ||
320 | entry->single = old[i]; | ||
321 | entry->refcount = 1; | ||
322 | entry->ptype = 0; | ||
323 | } else { | ||
324 | int j = 0; | ||
325 | /* N -> M, (N > 1, M > 1) */ | ||
326 | /* + 1 for NULL */ | ||
327 | new = kzalloc((nr_probes - nr_del + 1) | ||
328 | * sizeof(struct marker_probe_closure), GFP_KERNEL); | ||
329 | if (new == NULL) | ||
330 | return ERR_PTR(-ENOMEM); | ||
331 | for (i = 0; old[i].func; i++) | ||
332 | if ((probe && old[i].func != probe) || | ||
333 | old[i].probe_private != probe_private) | ||
334 | new[j++] = old[i]; | ||
335 | entry->refcount = nr_probes - nr_del; | ||
336 | entry->ptype = 1; | ||
337 | entry->multi = new; | ||
338 | } | ||
339 | debug_print_probes(entry); | ||
340 | return old; | ||
341 | } | ||
342 | |||
343 | /* | ||
82 | * Get marker if the marker is present in the marker hash table. | 344 | * Get marker if the marker is present in the marker hash table. |
83 | * Must be called with markers_mutex held. | 345 | * Must be called with markers_mutex held. |
84 | * Returns NULL if not present. | 346 | * Returns NULL if not present. |
@@ -102,8 +364,7 @@ static struct marker_entry *get_marker(const char *name) | |||
102 | * Add the marker to the marker hash table. Must be called with markers_mutex | 364 | * Add the marker to the marker hash table. Must be called with markers_mutex |
103 | * held. | 365 | * held. |
104 | */ | 366 | */ |
105 | static int add_marker(const char *name, const char *format, | 367 | static struct marker_entry *add_marker(const char *name, const char *format) |
106 | marker_probe_func *probe, void *private) | ||
107 | { | 368 | { |
108 | struct hlist_head *head; | 369 | struct hlist_head *head; |
109 | struct hlist_node *node; | 370 | struct hlist_node *node; |
@@ -118,9 +379,8 @@ static int add_marker(const char *name, const char *format, | |||
118 | hlist_for_each_entry(e, node, head, hlist) { | 379 | hlist_for_each_entry(e, node, head, hlist) { |
119 | if (!strcmp(name, e->name)) { | 380 | if (!strcmp(name, e->name)) { |
120 | printk(KERN_NOTICE | 381 | printk(KERN_NOTICE |
121 | "Marker %s busy, probe %p already installed\n", | 382 | "Marker %s busy\n", name); |
122 | name, e->probe); | 383 | return ERR_PTR(-EBUSY); /* Already there */ |
123 | return -EBUSY; /* Already there */ | ||
124 | } | 384 | } |
125 | } | 385 | } |
126 | /* | 386 | /* |
@@ -130,34 +390,42 @@ static int add_marker(const char *name, const char *format, | |||
130 | e = kmalloc(sizeof(struct marker_entry) + name_len + format_len, | 390 | e = kmalloc(sizeof(struct marker_entry) + name_len + format_len, |
131 | GFP_KERNEL); | 391 | GFP_KERNEL); |
132 | if (!e) | 392 | if (!e) |
133 | return -ENOMEM; | 393 | return ERR_PTR(-ENOMEM); |
134 | memcpy(&e->name[0], name, name_len); | 394 | memcpy(&e->name[0], name, name_len); |
135 | if (format) { | 395 | if (format) { |
136 | e->format = &e->name[name_len]; | 396 | e->format = &e->name[name_len]; |
137 | memcpy(e->format, format, format_len); | 397 | memcpy(e->format, format, format_len); |
398 | if (strcmp(e->format, MARK_NOARGS) == 0) | ||
399 | e->call = marker_probe_cb_noarg; | ||
400 | else | ||
401 | e->call = marker_probe_cb; | ||
138 | trace_mark(core_marker_format, "name %s format %s", | 402 | trace_mark(core_marker_format, "name %s format %s", |
139 | e->name, e->format); | 403 | e->name, e->format); |
140 | } else | 404 | } else { |
141 | e->format = NULL; | 405 | e->format = NULL; |
142 | e->probe = probe; | 406 | e->call = marker_probe_cb; |
143 | e->private = private; | 407 | } |
408 | e->single.func = __mark_empty_function; | ||
409 | e->single.probe_private = NULL; | ||
410 | e->multi = NULL; | ||
411 | e->ptype = 0; | ||
144 | e->refcount = 0; | 412 | e->refcount = 0; |
413 | e->rcu_pending = 0; | ||
145 | hlist_add_head(&e->hlist, head); | 414 | hlist_add_head(&e->hlist, head); |
146 | return 0; | 415 | return e; |
147 | } | 416 | } |
148 | 417 | ||
149 | /* | 418 | /* |
150 | * Remove the marker from the marker hash table. Must be called with mutex_lock | 419 | * Remove the marker from the marker hash table. Must be called with mutex_lock |
151 | * held. | 420 | * held. |
152 | */ | 421 | */ |
153 | static void *remove_marker(const char *name) | 422 | static int remove_marker(const char *name) |
154 | { | 423 | { |
155 | struct hlist_head *head; | 424 | struct hlist_head *head; |
156 | struct hlist_node *node; | 425 | struct hlist_node *node; |
157 | struct marker_entry *e; | 426 | struct marker_entry *e; |
158 | int found = 0; | 427 | int found = 0; |
159 | size_t len = strlen(name) + 1; | 428 | size_t len = strlen(name) + 1; |
160 | void *private = NULL; | ||
161 | u32 hash = jhash(name, len-1, 0); | 429 | u32 hash = jhash(name, len-1, 0); |
162 | 430 | ||
163 | head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)]; | 431 | head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)]; |
@@ -167,12 +435,16 @@ static void *remove_marker(const char *name) | |||
167 | break; | 435 | break; |
168 | } | 436 | } |
169 | } | 437 | } |
170 | if (found) { | 438 | if (!found) |
171 | private = e->private; | 439 | return -ENOENT; |
172 | hlist_del(&e->hlist); | 440 | if (e->single.func != __mark_empty_function) |
173 | kfree(e); | 441 | return -EBUSY; |
174 | } | 442 | hlist_del(&e->hlist); |
175 | return private; | 443 | /* Make sure the call_rcu has been executed */ |
444 | if (e->rcu_pending) | ||
445 | rcu_barrier(); | ||
446 | kfree(e); | ||
447 | return 0; | ||
176 | } | 448 | } |
177 | 449 | ||
178 | /* | 450 | /* |
@@ -184,6 +456,7 @@ static int marker_set_format(struct marker_entry **entry, const char *format) | |||
184 | size_t name_len = strlen((*entry)->name) + 1; | 456 | size_t name_len = strlen((*entry)->name) + 1; |
185 | size_t format_len = strlen(format) + 1; | 457 | size_t format_len = strlen(format) + 1; |
186 | 458 | ||
459 | |||
187 | e = kmalloc(sizeof(struct marker_entry) + name_len + format_len, | 460 | e = kmalloc(sizeof(struct marker_entry) + name_len + format_len, |
188 | GFP_KERNEL); | 461 | GFP_KERNEL); |
189 | if (!e) | 462 | if (!e) |
@@ -191,11 +464,20 @@ static int marker_set_format(struct marker_entry **entry, const char *format) | |||
191 | memcpy(&e->name[0], (*entry)->name, name_len); | 464 | memcpy(&e->name[0], (*entry)->name, name_len); |
192 | e->format = &e->name[name_len]; | 465 | e->format = &e->name[name_len]; |
193 | memcpy(e->format, format, format_len); | 466 | memcpy(e->format, format, format_len); |
194 | e->probe = (*entry)->probe; | 467 | if (strcmp(e->format, MARK_NOARGS) == 0) |
195 | e->private = (*entry)->private; | 468 | e->call = marker_probe_cb_noarg; |
469 | else | ||
470 | e->call = marker_probe_cb; | ||
471 | e->single = (*entry)->single; | ||
472 | e->multi = (*entry)->multi; | ||
473 | e->ptype = (*entry)->ptype; | ||
196 | e->refcount = (*entry)->refcount; | 474 | e->refcount = (*entry)->refcount; |
475 | e->rcu_pending = 0; | ||
197 | hlist_add_before(&e->hlist, &(*entry)->hlist); | 476 | hlist_add_before(&e->hlist, &(*entry)->hlist); |
198 | hlist_del(&(*entry)->hlist); | 477 | hlist_del(&(*entry)->hlist); |
478 | /* Make sure the call_rcu has been executed */ | ||
479 | if ((*entry)->rcu_pending) | ||
480 | rcu_barrier(); | ||
199 | kfree(*entry); | 481 | kfree(*entry); |
200 | *entry = e; | 482 | *entry = e; |
201 | trace_mark(core_marker_format, "name %s format %s", | 483 | trace_mark(core_marker_format, "name %s format %s", |
@@ -206,7 +488,8 @@ static int marker_set_format(struct marker_entry **entry, const char *format) | |||
206 | /* | 488 | /* |
207 | * Sets the probe callback corresponding to one marker. | 489 | * Sets the probe callback corresponding to one marker. |
208 | */ | 490 | */ |
209 | static int set_marker(struct marker_entry **entry, struct marker *elem) | 491 | static int set_marker(struct marker_entry **entry, struct marker *elem, |
492 | int active) | ||
210 | { | 493 | { |
211 | int ret; | 494 | int ret; |
212 | WARN_ON(strcmp((*entry)->name, elem->name) != 0); | 495 | WARN_ON(strcmp((*entry)->name, elem->name) != 0); |
@@ -226,9 +509,43 @@ static int set_marker(struct marker_entry **entry, struct marker *elem) | |||
226 | if (ret) | 509 | if (ret) |
227 | return ret; | 510 | return ret; |
228 | } | 511 | } |
229 | elem->call = (*entry)->probe; | 512 | |
230 | elem->private = (*entry)->private; | 513 | /* |
231 | elem->state = 1; | 514 | * probe_cb setup (statically known) is done here. It is |
515 | * asynchronous with the rest of execution, therefore we only | ||
516 | * pass from a "safe" callback (with argument) to an "unsafe" | ||
517 | * callback (does not set arguments). | ||
518 | */ | ||
519 | elem->call = (*entry)->call; | ||
520 | /* | ||
521 | * Sanity check : | ||
522 | * We only update the single probe private data when the ptr is | ||
523 | * set to a _non_ single probe! (0 -> 1 and N -> 1, N != 1) | ||
524 | */ | ||
525 | WARN_ON(elem->single.func != __mark_empty_function | ||
526 | && elem->single.probe_private | ||
527 | != (*entry)->single.probe_private && | ||
528 | !elem->ptype); | ||
529 | elem->single.probe_private = (*entry)->single.probe_private; | ||
530 | /* | ||
531 | * Make sure the private data is valid when we update the | ||
532 | * single probe ptr. | ||
533 | */ | ||
534 | smp_wmb(); | ||
535 | elem->single.func = (*entry)->single.func; | ||
536 | /* | ||
537 | * We also make sure that the new probe callbacks array is consistent | ||
538 | * before setting a pointer to it. | ||
539 | */ | ||
540 | rcu_assign_pointer(elem->multi, (*entry)->multi); | ||
541 | /* | ||
542 | * Update the function or multi probe array pointer before setting the | ||
543 | * ptype. | ||
544 | */ | ||
545 | smp_wmb(); | ||
546 | elem->ptype = (*entry)->ptype; | ||
547 | elem->state = active; | ||
548 | |||
232 | return 0; | 549 | return 0; |
233 | } | 550 | } |
234 | 551 | ||
@@ -240,8 +557,12 @@ static int set_marker(struct marker_entry **entry, struct marker *elem) | |||
240 | */ | 557 | */ |
241 | static void disable_marker(struct marker *elem) | 558 | static void disable_marker(struct marker *elem) |
242 | { | 559 | { |
560 | /* leave "call" as is. It is known statically. */ | ||
243 | elem->state = 0; | 561 | elem->state = 0; |
244 | elem->call = __mark_empty_function; | 562 | elem->single.func = __mark_empty_function; |
563 | /* Update the function before setting the ptype */ | ||
564 | smp_wmb(); | ||
565 | elem->ptype = 0; /* single probe */ | ||
245 | /* | 566 | /* |
246 | * Leave the private data and id there, because removal is racy and | 567 | * Leave the private data and id there, because removal is racy and |
247 | * should be done only after a synchronize_sched(). These are never used | 568 | * should be done only after a synchronize_sched(). These are never used |
@@ -253,14 +574,11 @@ static void disable_marker(struct marker *elem) | |||
253 | * marker_update_probe_range - Update a probe range | 574 | * marker_update_probe_range - Update a probe range |
254 | * @begin: beginning of the range | 575 | * @begin: beginning of the range |
255 | * @end: end of the range | 576 | * @end: end of the range |
256 | * @probe_module: module address of the probe being updated | ||
257 | * @refcount: number of references left to the given probe_module (out) | ||
258 | * | 577 | * |
259 | * Updates the probe callback corresponding to a range of markers. | 578 | * Updates the probe callback corresponding to a range of markers. |
260 | */ | 579 | */ |
261 | void marker_update_probe_range(struct marker *begin, | 580 | void marker_update_probe_range(struct marker *begin, |
262 | struct marker *end, struct module *probe_module, | 581 | struct marker *end) |
263 | int *refcount) | ||
264 | { | 582 | { |
265 | struct marker *iter; | 583 | struct marker *iter; |
266 | struct marker_entry *mark_entry; | 584 | struct marker_entry *mark_entry; |
@@ -268,15 +586,12 @@ void marker_update_probe_range(struct marker *begin, | |||
268 | mutex_lock(&markers_mutex); | 586 | mutex_lock(&markers_mutex); |
269 | for (iter = begin; iter < end; iter++) { | 587 | for (iter = begin; iter < end; iter++) { |
270 | mark_entry = get_marker(iter->name); | 588 | mark_entry = get_marker(iter->name); |
271 | if (mark_entry && mark_entry->refcount) { | 589 | if (mark_entry) { |
272 | set_marker(&mark_entry, iter); | 590 | set_marker(&mark_entry, iter, |
591 | !!mark_entry->refcount); | ||
273 | /* | 592 | /* |
274 | * ignore error, continue | 593 | * ignore error, continue |
275 | */ | 594 | */ |
276 | if (probe_module) | ||
277 | if (probe_module == | ||
278 | __module_text_address((unsigned long)mark_entry->probe)) | ||
279 | (*refcount)++; | ||
280 | } else { | 595 | } else { |
281 | disable_marker(iter); | 596 | disable_marker(iter); |
282 | } | 597 | } |
@@ -289,20 +604,27 @@ void marker_update_probe_range(struct marker *begin, | |||
289 | * Issues a synchronize_sched() when no reference to the module passed | 604 | * Issues a synchronize_sched() when no reference to the module passed |
290 | * as parameter is found in the probes so the probe module can be | 605 | * as parameter is found in the probes so the probe module can be |
291 | * safely unloaded from now on. | 606 | * safely unloaded from now on. |
607 | * | ||
608 | * Internal callback only changed before the first probe is connected to it. | ||
609 | * Single probe private data can only be changed on 0 -> 1 and 2 -> 1 | ||
610 | * transitions. All other transitions will leave the old private data valid. | ||
611 | * This makes the non-atomicity of the callback/private data updates valid. | ||
612 | * | ||
613 | * "special case" updates : | ||
614 | * 0 -> 1 callback | ||
615 | * 1 -> 0 callback | ||
616 | * 1 -> 2 callbacks | ||
617 | * 2 -> 1 callbacks | ||
618 | * Other updates all behave the same, just like the 2 -> 3 or 3 -> 2 updates. | ||
619 | * Site effect : marker_set_format may delete the marker entry (creating a | ||
620 | * replacement). | ||
292 | */ | 621 | */ |
293 | static void marker_update_probes(struct module *probe_module) | 622 | static void marker_update_probes(void) |
294 | { | 623 | { |
295 | int refcount = 0; | ||
296 | |||
297 | /* Core kernel markers */ | 624 | /* Core kernel markers */ |
298 | marker_update_probe_range(__start___markers, | 625 | marker_update_probe_range(__start___markers, __stop___markers); |
299 | __stop___markers, probe_module, &refcount); | ||
300 | /* Markers in modules. */ | 626 | /* Markers in modules. */ |
301 | module_update_markers(probe_module, &refcount); | 627 | module_update_markers(); |
302 | if (probe_module && refcount == 0) { | ||
303 | synchronize_sched(); | ||
304 | deferred_sync = 0; | ||
305 | } | ||
306 | } | 628 | } |
307 | 629 | ||
308 | /** | 630 | /** |
@@ -310,33 +632,49 @@ static void marker_update_probes(struct module *probe_module) | |||
310 | * @name: marker name | 632 | * @name: marker name |
311 | * @format: format string | 633 | * @format: format string |
312 | * @probe: probe handler | 634 | * @probe: probe handler |
313 | * @private: probe private data | 635 | * @probe_private: probe private data |
314 | * | 636 | * |
315 | * private data must be a valid allocated memory address, or NULL. | 637 | * private data must be a valid allocated memory address, or NULL. |
316 | * Returns 0 if ok, error value on error. | 638 | * Returns 0 if ok, error value on error. |
639 | * The probe address must at least be aligned on the architecture pointer size. | ||
317 | */ | 640 | */ |
318 | int marker_probe_register(const char *name, const char *format, | 641 | int marker_probe_register(const char *name, const char *format, |
319 | marker_probe_func *probe, void *private) | 642 | marker_probe_func *probe, void *probe_private) |
320 | { | 643 | { |
321 | struct marker_entry *entry; | 644 | struct marker_entry *entry; |
322 | int ret = 0; | 645 | int ret = 0; |
646 | struct marker_probe_closure *old; | ||
323 | 647 | ||
324 | mutex_lock(&markers_mutex); | 648 | mutex_lock(&markers_mutex); |
325 | entry = get_marker(name); | 649 | entry = get_marker(name); |
326 | if (entry && entry->refcount) { | 650 | if (!entry) { |
327 | ret = -EBUSY; | 651 | entry = add_marker(name, format); |
328 | goto end; | 652 | if (IS_ERR(entry)) { |
329 | } | 653 | ret = PTR_ERR(entry); |
330 | if (deferred_sync) { | 654 | goto end; |
331 | synchronize_sched(); | 655 | } |
332 | deferred_sync = 0; | ||
333 | } | 656 | } |
334 | ret = add_marker(name, format, probe, private); | 657 | /* |
335 | if (ret) | 658 | * If we detect that a call_rcu is pending for this marker, |
659 | * make sure it's executed now. | ||
660 | */ | ||
661 | if (entry->rcu_pending) | ||
662 | rcu_barrier(); | ||
663 | old = marker_entry_add_probe(entry, probe, probe_private); | ||
664 | if (IS_ERR(old)) { | ||
665 | ret = PTR_ERR(old); | ||
336 | goto end; | 666 | goto end; |
667 | } | ||
337 | mutex_unlock(&markers_mutex); | 668 | mutex_unlock(&markers_mutex); |
338 | marker_update_probes(NULL); | 669 | marker_update_probes(); /* may update entry */ |
339 | return ret; | 670 | mutex_lock(&markers_mutex); |
671 | entry = get_marker(name); | ||
672 | WARN_ON(!entry); | ||
673 | entry->oldptr = old; | ||
674 | entry->rcu_pending = 1; | ||
675 | /* write rcu_pending before calling the RCU callback */ | ||
676 | smp_wmb(); | ||
677 | call_rcu(&entry->rcu, free_old_closure); | ||
340 | end: | 678 | end: |
341 | mutex_unlock(&markers_mutex); | 679 | mutex_unlock(&markers_mutex); |
342 | return ret; | 680 | return ret; |
@@ -346,171 +684,166 @@ EXPORT_SYMBOL_GPL(marker_probe_register); | |||
346 | /** | 684 | /** |
347 | * marker_probe_unregister - Disconnect a probe from a marker | 685 | * marker_probe_unregister - Disconnect a probe from a marker |
348 | * @name: marker name | 686 | * @name: marker name |
687 | * @probe: probe function pointer | ||
688 | * @probe_private: probe private data | ||
349 | * | 689 | * |
350 | * Returns the private data given to marker_probe_register, or an ERR_PTR(). | 690 | * Returns the private data given to marker_probe_register, or an ERR_PTR(). |
691 | * We do not need to call a synchronize_sched to make sure the probes have | ||
692 | * finished running before doing a module unload, because the module unload | ||
693 | * itself uses stop_machine(), which insures that every preempt disabled section | ||
694 | * have finished. | ||
351 | */ | 695 | */ |
352 | void *marker_probe_unregister(const char *name) | 696 | int marker_probe_unregister(const char *name, |
697 | marker_probe_func *probe, void *probe_private) | ||
353 | { | 698 | { |
354 | struct module *probe_module; | ||
355 | struct marker_entry *entry; | 699 | struct marker_entry *entry; |
356 | void *private; | 700 | struct marker_probe_closure *old; |
701 | int ret = 0; | ||
357 | 702 | ||
358 | mutex_lock(&markers_mutex); | 703 | mutex_lock(&markers_mutex); |
359 | entry = get_marker(name); | 704 | entry = get_marker(name); |
360 | if (!entry) { | 705 | if (!entry) { |
361 | private = ERR_PTR(-ENOENT); | 706 | ret = -ENOENT; |
362 | goto end; | 707 | goto end; |
363 | } | 708 | } |
364 | entry->refcount = 0; | 709 | if (entry->rcu_pending) |
365 | /* In what module is the probe handler ? */ | 710 | rcu_barrier(); |
366 | probe_module = __module_text_address((unsigned long)entry->probe); | 711 | old = marker_entry_remove_probe(entry, probe, probe_private); |
367 | private = remove_marker(name); | ||
368 | deferred_sync = 1; | ||
369 | mutex_unlock(&markers_mutex); | 712 | mutex_unlock(&markers_mutex); |
370 | marker_update_probes(probe_module); | 713 | marker_update_probes(); /* may update entry */ |
371 | return private; | 714 | mutex_lock(&markers_mutex); |
715 | entry = get_marker(name); | ||
716 | entry->oldptr = old; | ||
717 | entry->rcu_pending = 1; | ||
718 | /* write rcu_pending before calling the RCU callback */ | ||
719 | smp_wmb(); | ||
720 | call_rcu(&entry->rcu, free_old_closure); | ||
721 | remove_marker(name); /* Ignore busy error message */ | ||
372 | end: | 722 | end: |
373 | mutex_unlock(&markers_mutex); | 723 | mutex_unlock(&markers_mutex); |
374 | return private; | 724 | return ret; |
375 | } | 725 | } |
376 | EXPORT_SYMBOL_GPL(marker_probe_unregister); | 726 | EXPORT_SYMBOL_GPL(marker_probe_unregister); |
377 | 727 | ||
378 | /** | 728 | static struct marker_entry * |
379 | * marker_probe_unregister_private_data - Disconnect a probe from a marker | 729 | get_marker_from_private_data(marker_probe_func *probe, void *probe_private) |
380 | * @private: probe private data | ||
381 | * | ||
382 | * Unregister a marker by providing the registered private data. | ||
383 | * Returns the private data given to marker_probe_register, or an ERR_PTR(). | ||
384 | */ | ||
385 | void *marker_probe_unregister_private_data(void *private) | ||
386 | { | 730 | { |
387 | struct module *probe_module; | ||
388 | struct hlist_head *head; | ||
389 | struct hlist_node *node; | ||
390 | struct marker_entry *entry; | 731 | struct marker_entry *entry; |
391 | int found = 0; | ||
392 | unsigned int i; | 732 | unsigned int i; |
733 | struct hlist_head *head; | ||
734 | struct hlist_node *node; | ||
393 | 735 | ||
394 | mutex_lock(&markers_mutex); | ||
395 | for (i = 0; i < MARKER_TABLE_SIZE; i++) { | 736 | for (i = 0; i < MARKER_TABLE_SIZE; i++) { |
396 | head = &marker_table[i]; | 737 | head = &marker_table[i]; |
397 | hlist_for_each_entry(entry, node, head, hlist) { | 738 | hlist_for_each_entry(entry, node, head, hlist) { |
398 | if (entry->private == private) { | 739 | if (!entry->ptype) { |
399 | found = 1; | 740 | if (entry->single.func == probe |
400 | goto iter_end; | 741 | && entry->single.probe_private |
742 | == probe_private) | ||
743 | return entry; | ||
744 | } else { | ||
745 | struct marker_probe_closure *closure; | ||
746 | closure = entry->multi; | ||
747 | for (i = 0; closure[i].func; i++) { | ||
748 | if (closure[i].func == probe && | ||
749 | closure[i].probe_private | ||
750 | == probe_private) | ||
751 | return entry; | ||
752 | } | ||
401 | } | 753 | } |
402 | } | 754 | } |
403 | } | 755 | } |
404 | iter_end: | 756 | return NULL; |
405 | if (!found) { | ||
406 | private = ERR_PTR(-ENOENT); | ||
407 | goto end; | ||
408 | } | ||
409 | entry->refcount = 0; | ||
410 | /* In what module is the probe handler ? */ | ||
411 | probe_module = __module_text_address((unsigned long)entry->probe); | ||
412 | private = remove_marker(entry->name); | ||
413 | deferred_sync = 1; | ||
414 | mutex_unlock(&markers_mutex); | ||
415 | marker_update_probes(probe_module); | ||
416 | return private; | ||
417 | end: | ||
418 | mutex_unlock(&markers_mutex); | ||
419 | return private; | ||
420 | } | 757 | } |
421 | EXPORT_SYMBOL_GPL(marker_probe_unregister_private_data); | ||
422 | 758 | ||
423 | /** | 759 | /** |
424 | * marker_arm - Arm a marker | 760 | * marker_probe_unregister_private_data - Disconnect a probe from a marker |
425 | * @name: marker name | 761 | * @probe: probe function |
762 | * @probe_private: probe private data | ||
426 | * | 763 | * |
427 | * Activate a marker. It keeps a reference count of the number of | 764 | * Unregister a probe by providing the registered private data. |
428 | * arming/disarming done. | 765 | * Only removes the first marker found in hash table. |
429 | * Returns 0 if ok, error value on error. | 766 | * Return 0 on success or error value. |
767 | * We do not need to call a synchronize_sched to make sure the probes have | ||
768 | * finished running before doing a module unload, because the module unload | ||
769 | * itself uses stop_machine(), which insures that every preempt disabled section | ||
770 | * have finished. | ||
430 | */ | 771 | */ |
431 | int marker_arm(const char *name) | 772 | int marker_probe_unregister_private_data(marker_probe_func *probe, |
773 | void *probe_private) | ||
432 | { | 774 | { |
433 | struct marker_entry *entry; | 775 | struct marker_entry *entry; |
434 | int ret = 0; | 776 | int ret = 0; |
777 | struct marker_probe_closure *old; | ||
435 | 778 | ||
436 | mutex_lock(&markers_mutex); | 779 | mutex_lock(&markers_mutex); |
437 | entry = get_marker(name); | 780 | entry = get_marker_from_private_data(probe, probe_private); |
438 | if (!entry) { | 781 | if (!entry) { |
439 | ret = -ENOENT; | 782 | ret = -ENOENT; |
440 | goto end; | 783 | goto end; |
441 | } | 784 | } |
442 | /* | 785 | if (entry->rcu_pending) |
443 | * Only need to update probes when refcount passes from 0 to 1. | 786 | rcu_barrier(); |
444 | */ | 787 | old = marker_entry_remove_probe(entry, NULL, probe_private); |
445 | if (entry->refcount++) | ||
446 | goto end; | ||
447 | end: | ||
448 | mutex_unlock(&markers_mutex); | 788 | mutex_unlock(&markers_mutex); |
449 | marker_update_probes(NULL); | 789 | marker_update_probes(); /* may update entry */ |
450 | return ret; | ||
451 | } | ||
452 | EXPORT_SYMBOL_GPL(marker_arm); | ||
453 | |||
454 | /** | ||
455 | * marker_disarm - Disarm a marker | ||
456 | * @name: marker name | ||
457 | * | ||
458 | * Disarm a marker. It keeps a reference count of the number of arming/disarming | ||
459 | * done. | ||
460 | * Returns 0 if ok, error value on error. | ||
461 | */ | ||
462 | int marker_disarm(const char *name) | ||
463 | { | ||
464 | struct marker_entry *entry; | ||
465 | int ret = 0; | ||
466 | |||
467 | mutex_lock(&markers_mutex); | 790 | mutex_lock(&markers_mutex); |
468 | entry = get_marker(name); | 791 | entry = get_marker_from_private_data(probe, probe_private); |
469 | if (!entry) { | 792 | WARN_ON(!entry); |
470 | ret = -ENOENT; | 793 | entry->oldptr = old; |
471 | goto end; | 794 | entry->rcu_pending = 1; |
472 | } | 795 | /* write rcu_pending before calling the RCU callback */ |
473 | /* | 796 | smp_wmb(); |
474 | * Only permit decrement refcount if higher than 0. | 797 | call_rcu(&entry->rcu, free_old_closure); |
475 | * Do probe update only on 1 -> 0 transition. | 798 | remove_marker(entry->name); /* Ignore busy error message */ |
476 | */ | ||
477 | if (entry->refcount) { | ||
478 | if (--entry->refcount) | ||
479 | goto end; | ||
480 | } else { | ||
481 | ret = -EPERM; | ||
482 | goto end; | ||
483 | } | ||
484 | end: | 799 | end: |
485 | mutex_unlock(&markers_mutex); | 800 | mutex_unlock(&markers_mutex); |
486 | marker_update_probes(NULL); | ||
487 | return ret; | 801 | return ret; |
488 | } | 802 | } |
489 | EXPORT_SYMBOL_GPL(marker_disarm); | 803 | EXPORT_SYMBOL_GPL(marker_probe_unregister_private_data); |
490 | 804 | ||
491 | /** | 805 | /** |
492 | * marker_get_private_data - Get a marker's probe private data | 806 | * marker_get_private_data - Get a marker's probe private data |
493 | * @name: marker name | 807 | * @name: marker name |
808 | * @probe: probe to match | ||
809 | * @num: get the nth matching probe's private data | ||
494 | * | 810 | * |
811 | * Returns the nth private data pointer (starting from 0) matching, or an | ||
812 | * ERR_PTR. | ||
495 | * Returns the private data pointer, or an ERR_PTR. | 813 | * Returns the private data pointer, or an ERR_PTR. |
496 | * The private data pointer should _only_ be dereferenced if the caller is the | 814 | * The private data pointer should _only_ be dereferenced if the caller is the |
497 | * owner of the data, or its content could vanish. This is mostly used to | 815 | * owner of the data, or its content could vanish. This is mostly used to |
498 | * confirm that a caller is the owner of a registered probe. | 816 | * confirm that a caller is the owner of a registered probe. |
499 | */ | 817 | */ |
500 | void *marker_get_private_data(const char *name) | 818 | void *marker_get_private_data(const char *name, marker_probe_func *probe, |
819 | int num) | ||
501 | { | 820 | { |
502 | struct hlist_head *head; | 821 | struct hlist_head *head; |
503 | struct hlist_node *node; | 822 | struct hlist_node *node; |
504 | struct marker_entry *e; | 823 | struct marker_entry *e; |
505 | size_t name_len = strlen(name) + 1; | 824 | size_t name_len = strlen(name) + 1; |
506 | u32 hash = jhash(name, name_len-1, 0); | 825 | u32 hash = jhash(name, name_len-1, 0); |
507 | int found = 0; | 826 | int i; |
508 | 827 | ||
509 | head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)]; | 828 | head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)]; |
510 | hlist_for_each_entry(e, node, head, hlist) { | 829 | hlist_for_each_entry(e, node, head, hlist) { |
511 | if (!strcmp(name, e->name)) { | 830 | if (!strcmp(name, e->name)) { |
512 | found = 1; | 831 | if (!e->ptype) { |
513 | return e->private; | 832 | if (num == 0 && e->single.func == probe) |
833 | return e->single.probe_private; | ||
834 | else | ||
835 | break; | ||
836 | } else { | ||
837 | struct marker_probe_closure *closure; | ||
838 | int match = 0; | ||
839 | closure = e->multi; | ||
840 | for (i = 0; closure[i].func; i++) { | ||
841 | if (closure[i].func != probe) | ||
842 | continue; | ||
843 | if (match++ == num) | ||
844 | return closure[i].probe_private; | ||
845 | } | ||
846 | } | ||
514 | } | 847 | } |
515 | } | 848 | } |
516 | return ERR_PTR(-ENOENT); | 849 | return ERR_PTR(-ENOENT); |
diff --git a/kernel/module.c b/kernel/module.c index bd60278ee703..92595bad3812 100644 --- a/kernel/module.c +++ b/kernel/module.c | |||
@@ -46,6 +46,7 @@ | |||
46 | #include <asm/semaphore.h> | 46 | #include <asm/semaphore.h> |
47 | #include <asm/cacheflush.h> | 47 | #include <asm/cacheflush.h> |
48 | #include <linux/license.h> | 48 | #include <linux/license.h> |
49 | #include <asm/sections.h> | ||
49 | 50 | ||
50 | #if 0 | 51 | #if 0 |
51 | #define DEBUGP printk | 52 | #define DEBUGP printk |
@@ -290,7 +291,7 @@ static unsigned long __find_symbol(const char *name, | |||
290 | } | 291 | } |
291 | } | 292 | } |
292 | DEBUGP("Failed to find symbol %s\n", name); | 293 | DEBUGP("Failed to find symbol %s\n", name); |
293 | return 0; | 294 | return -ENOENT; |
294 | } | 295 | } |
295 | 296 | ||
296 | /* Search for module by name: must hold module_mutex. */ | 297 | /* Search for module by name: must hold module_mutex. */ |
@@ -343,9 +344,6 @@ static inline unsigned int block_size(int val) | |||
343 | return val; | 344 | return val; |
344 | } | 345 | } |
345 | 346 | ||
346 | /* Created by linker magic */ | ||
347 | extern char __per_cpu_start[], __per_cpu_end[]; | ||
348 | |||
349 | static void *percpu_modalloc(unsigned long size, unsigned long align, | 347 | static void *percpu_modalloc(unsigned long size, unsigned long align, |
350 | const char *name) | 348 | const char *name) |
351 | { | 349 | { |
@@ -783,7 +781,7 @@ void __symbol_put(const char *symbol) | |||
783 | const unsigned long *crc; | 781 | const unsigned long *crc; |
784 | 782 | ||
785 | preempt_disable(); | 783 | preempt_disable(); |
786 | if (!__find_symbol(symbol, &owner, &crc, 1)) | 784 | if (IS_ERR_VALUE(__find_symbol(symbol, &owner, &crc, 1))) |
787 | BUG(); | 785 | BUG(); |
788 | module_put(owner); | 786 | module_put(owner); |
789 | preempt_enable(); | 787 | preempt_enable(); |
@@ -929,7 +927,8 @@ static inline int check_modstruct_version(Elf_Shdr *sechdrs, | |||
929 | const unsigned long *crc; | 927 | const unsigned long *crc; |
930 | struct module *owner; | 928 | struct module *owner; |
931 | 929 | ||
932 | if (!__find_symbol("struct_module", &owner, &crc, 1)) | 930 | if (IS_ERR_VALUE(__find_symbol("struct_module", |
931 | &owner, &crc, 1))) | ||
933 | BUG(); | 932 | BUG(); |
934 | return check_version(sechdrs, versindex, "struct_module", mod, | 933 | return check_version(sechdrs, versindex, "struct_module", mod, |
935 | crc); | 934 | crc); |
@@ -978,12 +977,12 @@ static unsigned long resolve_symbol(Elf_Shdr *sechdrs, | |||
978 | 977 | ||
979 | ret = __find_symbol(name, &owner, &crc, | 978 | ret = __find_symbol(name, &owner, &crc, |
980 | !(mod->taints & TAINT_PROPRIETARY_MODULE)); | 979 | !(mod->taints & TAINT_PROPRIETARY_MODULE)); |
981 | if (ret) { | 980 | if (!IS_ERR_VALUE(ret)) { |
982 | /* use_module can fail due to OOM, | 981 | /* use_module can fail due to OOM, |
983 | or module initialization or unloading */ | 982 | or module initialization or unloading */ |
984 | if (!check_version(sechdrs, versindex, name, mod, crc) || | 983 | if (!check_version(sechdrs, versindex, name, mod, crc) || |
985 | !use_module(mod, owner)) | 984 | !use_module(mod, owner)) |
986 | ret = 0; | 985 | ret = -EINVAL; |
987 | } | 986 | } |
988 | return ret; | 987 | return ret; |
989 | } | 988 | } |
@@ -1371,7 +1370,9 @@ void *__symbol_get(const char *symbol) | |||
1371 | 1370 | ||
1372 | preempt_disable(); | 1371 | preempt_disable(); |
1373 | value = __find_symbol(symbol, &owner, &crc, 1); | 1372 | value = __find_symbol(symbol, &owner, &crc, 1); |
1374 | if (value && strong_try_module_get(owner) != 0) | 1373 | if (IS_ERR_VALUE(value)) |
1374 | value = 0; | ||
1375 | else if (strong_try_module_get(owner)) | ||
1375 | value = 0; | 1376 | value = 0; |
1376 | preempt_enable(); | 1377 | preempt_enable(); |
1377 | 1378 | ||
@@ -1391,14 +1392,16 @@ static int verify_export_symbols(struct module *mod) | |||
1391 | const unsigned long *crc; | 1392 | const unsigned long *crc; |
1392 | 1393 | ||
1393 | for (i = 0; i < mod->num_syms; i++) | 1394 | for (i = 0; i < mod->num_syms; i++) |
1394 | if (__find_symbol(mod->syms[i].name, &owner, &crc, 1)) { | 1395 | if (!IS_ERR_VALUE(__find_symbol(mod->syms[i].name, |
1396 | &owner, &crc, 1))) { | ||
1395 | name = mod->syms[i].name; | 1397 | name = mod->syms[i].name; |
1396 | ret = -ENOEXEC; | 1398 | ret = -ENOEXEC; |
1397 | goto dup; | 1399 | goto dup; |
1398 | } | 1400 | } |
1399 | 1401 | ||
1400 | for (i = 0; i < mod->num_gpl_syms; i++) | 1402 | for (i = 0; i < mod->num_gpl_syms; i++) |
1401 | if (__find_symbol(mod->gpl_syms[i].name, &owner, &crc, 1)) { | 1403 | if (!IS_ERR_VALUE(__find_symbol(mod->gpl_syms[i].name, |
1404 | &owner, &crc, 1))) { | ||
1402 | name = mod->gpl_syms[i].name; | 1405 | name = mod->gpl_syms[i].name; |
1403 | ret = -ENOEXEC; | 1406 | ret = -ENOEXEC; |
1404 | goto dup; | 1407 | goto dup; |
@@ -1448,7 +1451,7 @@ static int simplify_symbols(Elf_Shdr *sechdrs, | |||
1448 | strtab + sym[i].st_name, mod); | 1451 | strtab + sym[i].st_name, mod); |
1449 | 1452 | ||
1450 | /* Ok if resolved. */ | 1453 | /* Ok if resolved. */ |
1451 | if (sym[i].st_value != 0) | 1454 | if (!IS_ERR_VALUE(sym[i].st_value)) |
1452 | break; | 1455 | break; |
1453 | /* Ok if weak. */ | 1456 | /* Ok if weak. */ |
1454 | if (ELF_ST_BIND(sym[i].st_info) == STB_WEAK) | 1457 | if (ELF_ST_BIND(sym[i].st_info) == STB_WEAK) |
@@ -2035,7 +2038,7 @@ static struct module *load_module(void __user *umod, | |||
2035 | #ifdef CONFIG_MARKERS | 2038 | #ifdef CONFIG_MARKERS |
2036 | if (!mod->taints) | 2039 | if (!mod->taints) |
2037 | marker_update_probe_range(mod->markers, | 2040 | marker_update_probe_range(mod->markers, |
2038 | mod->markers + mod->num_markers, NULL, NULL); | 2041 | mod->markers + mod->num_markers); |
2039 | #endif | 2042 | #endif |
2040 | err = module_finalize(hdr, sechdrs, mod); | 2043 | err = module_finalize(hdr, sechdrs, mod); |
2041 | if (err < 0) | 2044 | if (err < 0) |
@@ -2250,7 +2253,7 @@ static const char *get_ksymbol(struct module *mod, | |||
2250 | 2253 | ||
2251 | /* For kallsyms to ask for address resolution. NULL means not found. Careful | 2254 | /* For kallsyms to ask for address resolution. NULL means not found. Careful |
2252 | * not to lock to avoid deadlock on oopses, simply disable preemption. */ | 2255 | * not to lock to avoid deadlock on oopses, simply disable preemption. */ |
2253 | char *module_address_lookup(unsigned long addr, | 2256 | const char *module_address_lookup(unsigned long addr, |
2254 | unsigned long *size, | 2257 | unsigned long *size, |
2255 | unsigned long *offset, | 2258 | unsigned long *offset, |
2256 | char **modname, | 2259 | char **modname, |
@@ -2275,7 +2278,7 @@ char *module_address_lookup(unsigned long addr, | |||
2275 | ret = namebuf; | 2278 | ret = namebuf; |
2276 | } | 2279 | } |
2277 | preempt_enable(); | 2280 | preempt_enable(); |
2278 | return (char *)ret; | 2281 | return ret; |
2279 | } | 2282 | } |
2280 | 2283 | ||
2281 | int lookup_module_symbol_name(unsigned long addr, char *symname) | 2284 | int lookup_module_symbol_name(unsigned long addr, char *symname) |
@@ -2561,7 +2564,7 @@ EXPORT_SYMBOL(struct_module); | |||
2561 | #endif | 2564 | #endif |
2562 | 2565 | ||
2563 | #ifdef CONFIG_MARKERS | 2566 | #ifdef CONFIG_MARKERS |
2564 | void module_update_markers(struct module *probe_module, int *refcount) | 2567 | void module_update_markers(void) |
2565 | { | 2568 | { |
2566 | struct module *mod; | 2569 | struct module *mod; |
2567 | 2570 | ||
@@ -2569,8 +2572,7 @@ void module_update_markers(struct module *probe_module, int *refcount) | |||
2569 | list_for_each_entry(mod, &modules, list) | 2572 | list_for_each_entry(mod, &modules, list) |
2570 | if (!mod->taints) | 2573 | if (!mod->taints) |
2571 | marker_update_probe_range(mod->markers, | 2574 | marker_update_probe_range(mod->markers, |
2572 | mod->markers + mod->num_markers, | 2575 | mod->markers + mod->num_markers); |
2573 | probe_module, refcount); | ||
2574 | mutex_unlock(&module_mutex); | 2576 | mutex_unlock(&module_mutex); |
2575 | } | 2577 | } |
2576 | #endif | 2578 | #endif |
diff --git a/kernel/mutex-debug.c b/kernel/mutex-debug.c index d17436cdea1b..3aaa06c561de 100644 --- a/kernel/mutex-debug.c +++ b/kernel/mutex-debug.c | |||
@@ -107,7 +107,7 @@ void debug_mutex_init(struct mutex *lock, const char *name, | |||
107 | * use of the mutex is forbidden. The mutex must not be locked when | 107 | * use of the mutex is forbidden. The mutex must not be locked when |
108 | * this function is called. | 108 | * this function is called. |
109 | */ | 109 | */ |
110 | void fastcall mutex_destroy(struct mutex *lock) | 110 | void mutex_destroy(struct mutex *lock) |
111 | { | 111 | { |
112 | DEBUG_LOCKS_WARN_ON(mutex_is_locked(lock)); | 112 | DEBUG_LOCKS_WARN_ON(mutex_is_locked(lock)); |
113 | lock->magic = NULL; | 113 | lock->magic = NULL; |
diff --git a/kernel/mutex.c b/kernel/mutex.c index d9ec9b666250..d046a345d365 100644 --- a/kernel/mutex.c +++ b/kernel/mutex.c | |||
@@ -58,7 +58,7 @@ EXPORT_SYMBOL(__mutex_init); | |||
58 | * We also put the fastpath first in the kernel image, to make sure the | 58 | * We also put the fastpath first in the kernel image, to make sure the |
59 | * branch is predicted by the CPU as default-untaken. | 59 | * branch is predicted by the CPU as default-untaken. |
60 | */ | 60 | */ |
61 | static void fastcall noinline __sched | 61 | static void noinline __sched |
62 | __mutex_lock_slowpath(atomic_t *lock_count); | 62 | __mutex_lock_slowpath(atomic_t *lock_count); |
63 | 63 | ||
64 | /*** | 64 | /*** |
@@ -82,7 +82,7 @@ __mutex_lock_slowpath(atomic_t *lock_count); | |||
82 | * | 82 | * |
83 | * This function is similar to (but not equivalent to) down(). | 83 | * This function is similar to (but not equivalent to) down(). |
84 | */ | 84 | */ |
85 | void inline fastcall __sched mutex_lock(struct mutex *lock) | 85 | void inline __sched mutex_lock(struct mutex *lock) |
86 | { | 86 | { |
87 | might_sleep(); | 87 | might_sleep(); |
88 | /* | 88 | /* |
@@ -95,8 +95,7 @@ void inline fastcall __sched mutex_lock(struct mutex *lock) | |||
95 | EXPORT_SYMBOL(mutex_lock); | 95 | EXPORT_SYMBOL(mutex_lock); |
96 | #endif | 96 | #endif |
97 | 97 | ||
98 | static void fastcall noinline __sched | 98 | static noinline void __sched __mutex_unlock_slowpath(atomic_t *lock_count); |
99 | __mutex_unlock_slowpath(atomic_t *lock_count); | ||
100 | 99 | ||
101 | /*** | 100 | /*** |
102 | * mutex_unlock - release the mutex | 101 | * mutex_unlock - release the mutex |
@@ -109,7 +108,7 @@ __mutex_unlock_slowpath(atomic_t *lock_count); | |||
109 | * | 108 | * |
110 | * This function is similar to (but not equivalent to) up(). | 109 | * This function is similar to (but not equivalent to) up(). |
111 | */ | 110 | */ |
112 | void fastcall __sched mutex_unlock(struct mutex *lock) | 111 | void __sched mutex_unlock(struct mutex *lock) |
113 | { | 112 | { |
114 | /* | 113 | /* |
115 | * The unlocking fastpath is the 0->1 transition from 'locked' | 114 | * The unlocking fastpath is the 0->1 transition from 'locked' |
@@ -234,7 +233,7 @@ EXPORT_SYMBOL_GPL(mutex_lock_interruptible_nested); | |||
234 | /* | 233 | /* |
235 | * Release the lock, slowpath: | 234 | * Release the lock, slowpath: |
236 | */ | 235 | */ |
237 | static fastcall inline void | 236 | static inline void |
238 | __mutex_unlock_common_slowpath(atomic_t *lock_count, int nested) | 237 | __mutex_unlock_common_slowpath(atomic_t *lock_count, int nested) |
239 | { | 238 | { |
240 | struct mutex *lock = container_of(lock_count, struct mutex, count); | 239 | struct mutex *lock = container_of(lock_count, struct mutex, count); |
@@ -271,7 +270,7 @@ __mutex_unlock_common_slowpath(atomic_t *lock_count, int nested) | |||
271 | /* | 270 | /* |
272 | * Release the lock, slowpath: | 271 | * Release the lock, slowpath: |
273 | */ | 272 | */ |
274 | static fastcall noinline void | 273 | static noinline void |
275 | __mutex_unlock_slowpath(atomic_t *lock_count) | 274 | __mutex_unlock_slowpath(atomic_t *lock_count) |
276 | { | 275 | { |
277 | __mutex_unlock_common_slowpath(lock_count, 1); | 276 | __mutex_unlock_common_slowpath(lock_count, 1); |
@@ -282,10 +281,10 @@ __mutex_unlock_slowpath(atomic_t *lock_count) | |||
282 | * Here come the less common (and hence less performance-critical) APIs: | 281 | * Here come the less common (and hence less performance-critical) APIs: |
283 | * mutex_lock_interruptible() and mutex_trylock(). | 282 | * mutex_lock_interruptible() and mutex_trylock(). |
284 | */ | 283 | */ |
285 | static int fastcall noinline __sched | 284 | static noinline int __sched |
286 | __mutex_lock_killable_slowpath(atomic_t *lock_count); | 285 | __mutex_lock_killable_slowpath(atomic_t *lock_count); |
287 | 286 | ||
288 | static noinline int fastcall __sched | 287 | static noinline int __sched |
289 | __mutex_lock_interruptible_slowpath(atomic_t *lock_count); | 288 | __mutex_lock_interruptible_slowpath(atomic_t *lock_count); |
290 | 289 | ||
291 | /*** | 290 | /*** |
@@ -299,7 +298,7 @@ __mutex_lock_interruptible_slowpath(atomic_t *lock_count); | |||
299 | * | 298 | * |
300 | * This function is similar to (but not equivalent to) down_interruptible(). | 299 | * This function is similar to (but not equivalent to) down_interruptible(). |
301 | */ | 300 | */ |
302 | int fastcall __sched mutex_lock_interruptible(struct mutex *lock) | 301 | int __sched mutex_lock_interruptible(struct mutex *lock) |
303 | { | 302 | { |
304 | might_sleep(); | 303 | might_sleep(); |
305 | return __mutex_fastpath_lock_retval | 304 | return __mutex_fastpath_lock_retval |
@@ -308,7 +307,7 @@ int fastcall __sched mutex_lock_interruptible(struct mutex *lock) | |||
308 | 307 | ||
309 | EXPORT_SYMBOL(mutex_lock_interruptible); | 308 | EXPORT_SYMBOL(mutex_lock_interruptible); |
310 | 309 | ||
311 | int fastcall __sched mutex_lock_killable(struct mutex *lock) | 310 | int __sched mutex_lock_killable(struct mutex *lock) |
312 | { | 311 | { |
313 | might_sleep(); | 312 | might_sleep(); |
314 | return __mutex_fastpath_lock_retval | 313 | return __mutex_fastpath_lock_retval |
@@ -316,7 +315,7 @@ int fastcall __sched mutex_lock_killable(struct mutex *lock) | |||
316 | } | 315 | } |
317 | EXPORT_SYMBOL(mutex_lock_killable); | 316 | EXPORT_SYMBOL(mutex_lock_killable); |
318 | 317 | ||
319 | static void fastcall noinline __sched | 318 | static noinline void __sched |
320 | __mutex_lock_slowpath(atomic_t *lock_count) | 319 | __mutex_lock_slowpath(atomic_t *lock_count) |
321 | { | 320 | { |
322 | struct mutex *lock = container_of(lock_count, struct mutex, count); | 321 | struct mutex *lock = container_of(lock_count, struct mutex, count); |
@@ -324,7 +323,7 @@ __mutex_lock_slowpath(atomic_t *lock_count) | |||
324 | __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, 0, _RET_IP_); | 323 | __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, 0, _RET_IP_); |
325 | } | 324 | } |
326 | 325 | ||
327 | static int fastcall noinline __sched | 326 | static noinline int __sched |
328 | __mutex_lock_killable_slowpath(atomic_t *lock_count) | 327 | __mutex_lock_killable_slowpath(atomic_t *lock_count) |
329 | { | 328 | { |
330 | struct mutex *lock = container_of(lock_count, struct mutex, count); | 329 | struct mutex *lock = container_of(lock_count, struct mutex, count); |
@@ -332,7 +331,7 @@ __mutex_lock_killable_slowpath(atomic_t *lock_count) | |||
332 | return __mutex_lock_common(lock, TASK_KILLABLE, 0, _RET_IP_); | 331 | return __mutex_lock_common(lock, TASK_KILLABLE, 0, _RET_IP_); |
333 | } | 332 | } |
334 | 333 | ||
335 | static noinline int fastcall __sched | 334 | static noinline int __sched |
336 | __mutex_lock_interruptible_slowpath(atomic_t *lock_count) | 335 | __mutex_lock_interruptible_slowpath(atomic_t *lock_count) |
337 | { | 336 | { |
338 | struct mutex *lock = container_of(lock_count, struct mutex, count); | 337 | struct mutex *lock = container_of(lock_count, struct mutex, count); |
@@ -381,7 +380,7 @@ static inline int __mutex_trylock_slowpath(atomic_t *lock_count) | |||
381 | * This function must not be used in interrupt context. The | 380 | * This function must not be used in interrupt context. The |
382 | * mutex must be released by the same task that acquired it. | 381 | * mutex must be released by the same task that acquired it. |
383 | */ | 382 | */ |
384 | int fastcall __sched mutex_trylock(struct mutex *lock) | 383 | int __sched mutex_trylock(struct mutex *lock) |
385 | { | 384 | { |
386 | return __mutex_fastpath_trylock(&lock->count, | 385 | return __mutex_fastpath_trylock(&lock->count, |
387 | __mutex_trylock_slowpath); | 386 | __mutex_trylock_slowpath); |
diff --git a/kernel/notifier.c b/kernel/notifier.c index 4253f472f060..643360d1bb14 100644 --- a/kernel/notifier.c +++ b/kernel/notifier.c | |||
@@ -4,6 +4,7 @@ | |||
4 | #include <linux/notifier.h> | 4 | #include <linux/notifier.h> |
5 | #include <linux/rcupdate.h> | 5 | #include <linux/rcupdate.h> |
6 | #include <linux/vmalloc.h> | 6 | #include <linux/vmalloc.h> |
7 | #include <linux/reboot.h> | ||
7 | 8 | ||
8 | /* | 9 | /* |
9 | * Notifier list for kernel code which wants to be called | 10 | * Notifier list for kernel code which wants to be called |
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c index 79f871bc0ef4..f5d332cf8c63 100644 --- a/kernel/nsproxy.c +++ b/kernel/nsproxy.c | |||
@@ -21,6 +21,7 @@ | |||
21 | #include <linux/utsname.h> | 21 | #include <linux/utsname.h> |
22 | #include <linux/pid_namespace.h> | 22 | #include <linux/pid_namespace.h> |
23 | #include <net/net_namespace.h> | 23 | #include <net/net_namespace.h> |
24 | #include <linux/ipc_namespace.h> | ||
24 | 25 | ||
25 | static struct kmem_cache *nsproxy_cachep; | 26 | static struct kmem_cache *nsproxy_cachep; |
26 | 27 | ||
diff --git a/kernel/panic.c b/kernel/panic.c index d9e90cfe3298..24af9f8bac99 100644 --- a/kernel/panic.c +++ b/kernel/panic.c | |||
@@ -161,7 +161,7 @@ const char *print_tainted(void) | |||
161 | { | 161 | { |
162 | static char buf[20]; | 162 | static char buf[20]; |
163 | if (tainted) { | 163 | if (tainted) { |
164 | snprintf(buf, sizeof(buf), "Tainted: %c%c%c%c%c%c%c%c", | 164 | snprintf(buf, sizeof(buf), "Tainted: %c%c%c%c%c%c%c%c%c", |
165 | tainted & TAINT_PROPRIETARY_MODULE ? 'P' : 'G', | 165 | tainted & TAINT_PROPRIETARY_MODULE ? 'P' : 'G', |
166 | tainted & TAINT_FORCED_MODULE ? 'F' : ' ', | 166 | tainted & TAINT_FORCED_MODULE ? 'F' : ' ', |
167 | tainted & TAINT_UNSAFE_SMP ? 'S' : ' ', | 167 | tainted & TAINT_UNSAFE_SMP ? 'S' : ' ', |
@@ -169,7 +169,8 @@ const char *print_tainted(void) | |||
169 | tainted & TAINT_MACHINE_CHECK ? 'M' : ' ', | 169 | tainted & TAINT_MACHINE_CHECK ? 'M' : ' ', |
170 | tainted & TAINT_BAD_PAGE ? 'B' : ' ', | 170 | tainted & TAINT_BAD_PAGE ? 'B' : ' ', |
171 | tainted & TAINT_USER ? 'U' : ' ', | 171 | tainted & TAINT_USER ? 'U' : ' ', |
172 | tainted & TAINT_DIE ? 'D' : ' '); | 172 | tainted & TAINT_DIE ? 'D' : ' ', |
173 | tainted & TAINT_OVERRIDDEN_ACPI_TABLE ? 'A' : ' '); | ||
173 | } | 174 | } |
174 | else | 175 | else |
175 | snprintf(buf, sizeof(buf), "Not tainted"); | 176 | snprintf(buf, sizeof(buf), "Not tainted"); |
diff --git a/kernel/params.c b/kernel/params.c index 42fe5e6126c0..afc46a23eb6d 100644 --- a/kernel/params.c +++ b/kernel/params.c | |||
@@ -180,12 +180,12 @@ int parse_args(const char *name, | |||
180 | #define STANDARD_PARAM_DEF(name, type, format, tmptype, strtolfn) \ | 180 | #define STANDARD_PARAM_DEF(name, type, format, tmptype, strtolfn) \ |
181 | int param_set_##name(const char *val, struct kernel_param *kp) \ | 181 | int param_set_##name(const char *val, struct kernel_param *kp) \ |
182 | { \ | 182 | { \ |
183 | char *endp; \ | ||
184 | tmptype l; \ | 183 | tmptype l; \ |
184 | int ret; \ | ||
185 | \ | 185 | \ |
186 | if (!val) return -EINVAL; \ | 186 | if (!val) return -EINVAL; \ |
187 | l = strtolfn(val, &endp, 0); \ | 187 | ret = strtolfn(val, 0, &l); \ |
188 | if (endp == val || ((type)l != l)) \ | 188 | if (ret == -EINVAL || ((type)l != l)) \ |
189 | return -EINVAL; \ | 189 | return -EINVAL; \ |
190 | *((type *)kp->arg) = l; \ | 190 | *((type *)kp->arg) = l; \ |
191 | return 0; \ | 191 | return 0; \ |
@@ -195,13 +195,13 @@ int parse_args(const char *name, | |||
195 | return sprintf(buffer, format, *((type *)kp->arg)); \ | 195 | return sprintf(buffer, format, *((type *)kp->arg)); \ |
196 | } | 196 | } |
197 | 197 | ||
198 | STANDARD_PARAM_DEF(byte, unsigned char, "%c", unsigned long, simple_strtoul); | 198 | STANDARD_PARAM_DEF(byte, unsigned char, "%c", unsigned long, strict_strtoul); |
199 | STANDARD_PARAM_DEF(short, short, "%hi", long, simple_strtol); | 199 | STANDARD_PARAM_DEF(short, short, "%hi", long, strict_strtol); |
200 | STANDARD_PARAM_DEF(ushort, unsigned short, "%hu", unsigned long, simple_strtoul); | 200 | STANDARD_PARAM_DEF(ushort, unsigned short, "%hu", unsigned long, strict_strtoul); |
201 | STANDARD_PARAM_DEF(int, int, "%i", long, simple_strtol); | 201 | STANDARD_PARAM_DEF(int, int, "%i", long, strict_strtol); |
202 | STANDARD_PARAM_DEF(uint, unsigned int, "%u", unsigned long, simple_strtoul); | 202 | STANDARD_PARAM_DEF(uint, unsigned int, "%u", unsigned long, strict_strtoul); |
203 | STANDARD_PARAM_DEF(long, long, "%li", long, simple_strtol); | 203 | STANDARD_PARAM_DEF(long, long, "%li", long, strict_strtol); |
204 | STANDARD_PARAM_DEF(ulong, unsigned long, "%lu", unsigned long, simple_strtoul); | 204 | STANDARD_PARAM_DEF(ulong, unsigned long, "%lu", unsigned long, strict_strtoul); |
205 | 205 | ||
206 | int param_set_charp(const char *val, struct kernel_param *kp) | 206 | int param_set_charp(const char *val, struct kernel_param *kp) |
207 | { | 207 | { |
@@ -272,7 +272,7 @@ static int param_array(const char *name, | |||
272 | unsigned int min, unsigned int max, | 272 | unsigned int min, unsigned int max, |
273 | void *elem, int elemsize, | 273 | void *elem, int elemsize, |
274 | int (*set)(const char *, struct kernel_param *kp), | 274 | int (*set)(const char *, struct kernel_param *kp), |
275 | int *num) | 275 | unsigned int *num) |
276 | { | 276 | { |
277 | int ret; | 277 | int ret; |
278 | struct kernel_param kp; | 278 | struct kernel_param kp; |
diff --git a/kernel/pid.c b/kernel/pid.c index f815455431bf..477691576b33 100644 --- a/kernel/pid.c +++ b/kernel/pid.c | |||
@@ -41,7 +41,6 @@ | |||
41 | static struct hlist_head *pid_hash; | 41 | static struct hlist_head *pid_hash; |
42 | static int pidhash_shift; | 42 | static int pidhash_shift; |
43 | struct pid init_struct_pid = INIT_STRUCT_PID; | 43 | struct pid init_struct_pid = INIT_STRUCT_PID; |
44 | static struct kmem_cache *pid_ns_cachep; | ||
45 | 44 | ||
46 | int pid_max = PID_MAX_DEFAULT; | 45 | int pid_max = PID_MAX_DEFAULT; |
47 | 46 | ||
@@ -112,7 +111,7 @@ EXPORT_SYMBOL(is_container_init); | |||
112 | 111 | ||
113 | static __cacheline_aligned_in_smp DEFINE_SPINLOCK(pidmap_lock); | 112 | static __cacheline_aligned_in_smp DEFINE_SPINLOCK(pidmap_lock); |
114 | 113 | ||
115 | static fastcall void free_pidmap(struct pid_namespace *pid_ns, int pid) | 114 | static void free_pidmap(struct pid_namespace *pid_ns, int pid) |
116 | { | 115 | { |
117 | struct pidmap *map = pid_ns->pidmap + pid / BITS_PER_PAGE; | 116 | struct pidmap *map = pid_ns->pidmap + pid / BITS_PER_PAGE; |
118 | int offset = pid & BITS_PER_PAGE_MASK; | 117 | int offset = pid & BITS_PER_PAGE_MASK; |
@@ -181,7 +180,7 @@ static int alloc_pidmap(struct pid_namespace *pid_ns) | |||
181 | return -1; | 180 | return -1; |
182 | } | 181 | } |
183 | 182 | ||
184 | static int next_pidmap(struct pid_namespace *pid_ns, int last) | 183 | int next_pidmap(struct pid_namespace *pid_ns, int last) |
185 | { | 184 | { |
186 | int offset; | 185 | int offset; |
187 | struct pidmap *map, *end; | 186 | struct pidmap *map, *end; |
@@ -199,7 +198,7 @@ static int next_pidmap(struct pid_namespace *pid_ns, int last) | |||
199 | return -1; | 198 | return -1; |
200 | } | 199 | } |
201 | 200 | ||
202 | fastcall void put_pid(struct pid *pid) | 201 | void put_pid(struct pid *pid) |
203 | { | 202 | { |
204 | struct pid_namespace *ns; | 203 | struct pid_namespace *ns; |
205 | 204 | ||
@@ -221,7 +220,7 @@ static void delayed_put_pid(struct rcu_head *rhp) | |||
221 | put_pid(pid); | 220 | put_pid(pid); |
222 | } | 221 | } |
223 | 222 | ||
224 | fastcall void free_pid(struct pid *pid) | 223 | void free_pid(struct pid *pid) |
225 | { | 224 | { |
226 | /* We can be called with write_lock_irq(&tasklist_lock) held */ | 225 | /* We can be called with write_lock_irq(&tasklist_lock) held */ |
227 | int i; | 226 | int i; |
@@ -287,7 +286,7 @@ out_free: | |||
287 | goto out; | 286 | goto out; |
288 | } | 287 | } |
289 | 288 | ||
290 | struct pid * fastcall find_pid_ns(int nr, struct pid_namespace *ns) | 289 | struct pid *find_pid_ns(int nr, struct pid_namespace *ns) |
291 | { | 290 | { |
292 | struct hlist_node *elem; | 291 | struct hlist_node *elem; |
293 | struct upid *pnr; | 292 | struct upid *pnr; |
@@ -317,7 +316,7 @@ EXPORT_SYMBOL_GPL(find_pid); | |||
317 | /* | 316 | /* |
318 | * attach_pid() must be called with the tasklist_lock write-held. | 317 | * attach_pid() must be called with the tasklist_lock write-held. |
319 | */ | 318 | */ |
320 | int fastcall attach_pid(struct task_struct *task, enum pid_type type, | 319 | int attach_pid(struct task_struct *task, enum pid_type type, |
321 | struct pid *pid) | 320 | struct pid *pid) |
322 | { | 321 | { |
323 | struct pid_link *link; | 322 | struct pid_link *link; |
@@ -329,7 +328,7 @@ int fastcall attach_pid(struct task_struct *task, enum pid_type type, | |||
329 | return 0; | 328 | return 0; |
330 | } | 329 | } |
331 | 330 | ||
332 | void fastcall detach_pid(struct task_struct *task, enum pid_type type) | 331 | void detach_pid(struct task_struct *task, enum pid_type type) |
333 | { | 332 | { |
334 | struct pid_link *link; | 333 | struct pid_link *link; |
335 | struct pid *pid; | 334 | struct pid *pid; |
@@ -349,7 +348,7 @@ void fastcall detach_pid(struct task_struct *task, enum pid_type type) | |||
349 | } | 348 | } |
350 | 349 | ||
351 | /* transfer_pid is an optimization of attach_pid(new), detach_pid(old) */ | 350 | /* transfer_pid is an optimization of attach_pid(new), detach_pid(old) */ |
352 | void fastcall transfer_pid(struct task_struct *old, struct task_struct *new, | 351 | void transfer_pid(struct task_struct *old, struct task_struct *new, |
353 | enum pid_type type) | 352 | enum pid_type type) |
354 | { | 353 | { |
355 | new->pids[type].pid = old->pids[type].pid; | 354 | new->pids[type].pid = old->pids[type].pid; |
@@ -357,7 +356,7 @@ void fastcall transfer_pid(struct task_struct *old, struct task_struct *new, | |||
357 | old->pids[type].pid = NULL; | 356 | old->pids[type].pid = NULL; |
358 | } | 357 | } |
359 | 358 | ||
360 | struct task_struct * fastcall pid_task(struct pid *pid, enum pid_type type) | 359 | struct task_struct *pid_task(struct pid *pid, enum pid_type type) |
361 | { | 360 | { |
362 | struct task_struct *result = NULL; | 361 | struct task_struct *result = NULL; |
363 | if (pid) { | 362 | if (pid) { |
@@ -368,6 +367,7 @@ struct task_struct * fastcall pid_task(struct pid *pid, enum pid_type type) | |||
368 | } | 367 | } |
369 | return result; | 368 | return result; |
370 | } | 369 | } |
370 | EXPORT_SYMBOL(pid_task); | ||
371 | 371 | ||
372 | /* | 372 | /* |
373 | * Must be called under rcu_read_lock() or with tasklist_lock read-held. | 373 | * Must be called under rcu_read_lock() or with tasklist_lock read-held. |
@@ -408,7 +408,7 @@ struct pid *get_task_pid(struct task_struct *task, enum pid_type type) | |||
408 | return pid; | 408 | return pid; |
409 | } | 409 | } |
410 | 410 | ||
411 | struct task_struct *fastcall get_pid_task(struct pid *pid, enum pid_type type) | 411 | struct task_struct *get_pid_task(struct pid *pid, enum pid_type type) |
412 | { | 412 | { |
413 | struct task_struct *result; | 413 | struct task_struct *result; |
414 | rcu_read_lock(); | 414 | rcu_read_lock(); |
@@ -443,6 +443,12 @@ pid_t pid_nr_ns(struct pid *pid, struct pid_namespace *ns) | |||
443 | return nr; | 443 | return nr; |
444 | } | 444 | } |
445 | 445 | ||
446 | pid_t pid_vnr(struct pid *pid) | ||
447 | { | ||
448 | return pid_nr_ns(pid, current->nsproxy->pid_ns); | ||
449 | } | ||
450 | EXPORT_SYMBOL_GPL(pid_vnr); | ||
451 | |||
446 | pid_t task_pid_nr_ns(struct task_struct *tsk, struct pid_namespace *ns) | 452 | pid_t task_pid_nr_ns(struct task_struct *tsk, struct pid_namespace *ns) |
447 | { | 453 | { |
448 | return pid_nr_ns(task_pid(tsk), ns); | 454 | return pid_nr_ns(task_pid(tsk), ns); |
@@ -487,180 +493,6 @@ struct pid *find_ge_pid(int nr, struct pid_namespace *ns) | |||
487 | } | 493 | } |
488 | EXPORT_SYMBOL_GPL(find_get_pid); | 494 | EXPORT_SYMBOL_GPL(find_get_pid); |
489 | 495 | ||
490 | struct pid_cache { | ||
491 | int nr_ids; | ||
492 | char name[16]; | ||
493 | struct kmem_cache *cachep; | ||
494 | struct list_head list; | ||
495 | }; | ||
496 | |||
497 | static LIST_HEAD(pid_caches_lh); | ||
498 | static DEFINE_MUTEX(pid_caches_mutex); | ||
499 | |||
500 | /* | ||
501 | * creates the kmem cache to allocate pids from. | ||
502 | * @nr_ids: the number of numerical ids this pid will have to carry | ||
503 | */ | ||
504 | |||
505 | static struct kmem_cache *create_pid_cachep(int nr_ids) | ||
506 | { | ||
507 | struct pid_cache *pcache; | ||
508 | struct kmem_cache *cachep; | ||
509 | |||
510 | mutex_lock(&pid_caches_mutex); | ||
511 | list_for_each_entry (pcache, &pid_caches_lh, list) | ||
512 | if (pcache->nr_ids == nr_ids) | ||
513 | goto out; | ||
514 | |||
515 | pcache = kmalloc(sizeof(struct pid_cache), GFP_KERNEL); | ||
516 | if (pcache == NULL) | ||
517 | goto err_alloc; | ||
518 | |||
519 | snprintf(pcache->name, sizeof(pcache->name), "pid_%d", nr_ids); | ||
520 | cachep = kmem_cache_create(pcache->name, | ||
521 | sizeof(struct pid) + (nr_ids - 1) * sizeof(struct upid), | ||
522 | 0, SLAB_HWCACHE_ALIGN, NULL); | ||
523 | if (cachep == NULL) | ||
524 | goto err_cachep; | ||
525 | |||
526 | pcache->nr_ids = nr_ids; | ||
527 | pcache->cachep = cachep; | ||
528 | list_add(&pcache->list, &pid_caches_lh); | ||
529 | out: | ||
530 | mutex_unlock(&pid_caches_mutex); | ||
531 | return pcache->cachep; | ||
532 | |||
533 | err_cachep: | ||
534 | kfree(pcache); | ||
535 | err_alloc: | ||
536 | mutex_unlock(&pid_caches_mutex); | ||
537 | return NULL; | ||
538 | } | ||
539 | |||
540 | #ifdef CONFIG_PID_NS | ||
541 | static struct pid_namespace *create_pid_namespace(int level) | ||
542 | { | ||
543 | struct pid_namespace *ns; | ||
544 | int i; | ||
545 | |||
546 | ns = kmem_cache_alloc(pid_ns_cachep, GFP_KERNEL); | ||
547 | if (ns == NULL) | ||
548 | goto out; | ||
549 | |||
550 | ns->pidmap[0].page = kzalloc(PAGE_SIZE, GFP_KERNEL); | ||
551 | if (!ns->pidmap[0].page) | ||
552 | goto out_free; | ||
553 | |||
554 | ns->pid_cachep = create_pid_cachep(level + 1); | ||
555 | if (ns->pid_cachep == NULL) | ||
556 | goto out_free_map; | ||
557 | |||
558 | kref_init(&ns->kref); | ||
559 | ns->last_pid = 0; | ||
560 | ns->child_reaper = NULL; | ||
561 | ns->level = level; | ||
562 | |||
563 | set_bit(0, ns->pidmap[0].page); | ||
564 | atomic_set(&ns->pidmap[0].nr_free, BITS_PER_PAGE - 1); | ||
565 | |||
566 | for (i = 1; i < PIDMAP_ENTRIES; i++) { | ||
567 | ns->pidmap[i].page = 0; | ||
568 | atomic_set(&ns->pidmap[i].nr_free, BITS_PER_PAGE); | ||
569 | } | ||
570 | |||
571 | return ns; | ||
572 | |||
573 | out_free_map: | ||
574 | kfree(ns->pidmap[0].page); | ||
575 | out_free: | ||
576 | kmem_cache_free(pid_ns_cachep, ns); | ||
577 | out: | ||
578 | return ERR_PTR(-ENOMEM); | ||
579 | } | ||
580 | |||
581 | static void destroy_pid_namespace(struct pid_namespace *ns) | ||
582 | { | ||
583 | int i; | ||
584 | |||
585 | for (i = 0; i < PIDMAP_ENTRIES; i++) | ||
586 | kfree(ns->pidmap[i].page); | ||
587 | kmem_cache_free(pid_ns_cachep, ns); | ||
588 | } | ||
589 | |||
590 | struct pid_namespace *copy_pid_ns(unsigned long flags, struct pid_namespace *old_ns) | ||
591 | { | ||
592 | struct pid_namespace *new_ns; | ||
593 | |||
594 | BUG_ON(!old_ns); | ||
595 | new_ns = get_pid_ns(old_ns); | ||
596 | if (!(flags & CLONE_NEWPID)) | ||
597 | goto out; | ||
598 | |||
599 | new_ns = ERR_PTR(-EINVAL); | ||
600 | if (flags & CLONE_THREAD) | ||
601 | goto out_put; | ||
602 | |||
603 | new_ns = create_pid_namespace(old_ns->level + 1); | ||
604 | if (!IS_ERR(new_ns)) | ||
605 | new_ns->parent = get_pid_ns(old_ns); | ||
606 | |||
607 | out_put: | ||
608 | put_pid_ns(old_ns); | ||
609 | out: | ||
610 | return new_ns; | ||
611 | } | ||
612 | |||
613 | void free_pid_ns(struct kref *kref) | ||
614 | { | ||
615 | struct pid_namespace *ns, *parent; | ||
616 | |||
617 | ns = container_of(kref, struct pid_namespace, kref); | ||
618 | |||
619 | parent = ns->parent; | ||
620 | destroy_pid_namespace(ns); | ||
621 | |||
622 | if (parent != NULL) | ||
623 | put_pid_ns(parent); | ||
624 | } | ||
625 | #endif /* CONFIG_PID_NS */ | ||
626 | |||
627 | void zap_pid_ns_processes(struct pid_namespace *pid_ns) | ||
628 | { | ||
629 | int nr; | ||
630 | int rc; | ||
631 | |||
632 | /* | ||
633 | * The last thread in the cgroup-init thread group is terminating. | ||
634 | * Find remaining pid_ts in the namespace, signal and wait for them | ||
635 | * to exit. | ||
636 | * | ||
637 | * Note: This signals each threads in the namespace - even those that | ||
638 | * belong to the same thread group, To avoid this, we would have | ||
639 | * to walk the entire tasklist looking a processes in this | ||
640 | * namespace, but that could be unnecessarily expensive if the | ||
641 | * pid namespace has just a few processes. Or we need to | ||
642 | * maintain a tasklist for each pid namespace. | ||
643 | * | ||
644 | */ | ||
645 | read_lock(&tasklist_lock); | ||
646 | nr = next_pidmap(pid_ns, 1); | ||
647 | while (nr > 0) { | ||
648 | kill_proc_info(SIGKILL, SEND_SIG_PRIV, nr); | ||
649 | nr = next_pidmap(pid_ns, nr); | ||
650 | } | ||
651 | read_unlock(&tasklist_lock); | ||
652 | |||
653 | do { | ||
654 | clear_thread_flag(TIF_SIGPENDING); | ||
655 | rc = sys_wait4(-1, NULL, __WALL, NULL); | ||
656 | } while (rc != -ECHILD); | ||
657 | |||
658 | |||
659 | /* Child reaper for the pid namespace is going away */ | ||
660 | pid_ns->child_reaper = NULL; | ||
661 | return; | ||
662 | } | ||
663 | |||
664 | /* | 496 | /* |
665 | * The pid hash table is scaled according to the amount of memory in the | 497 | * The pid hash table is scaled according to the amount of memory in the |
666 | * machine. From a minimum of 16 slots up to 4096 slots at one gigabyte or | 498 | * machine. From a minimum of 16 slots up to 4096 slots at one gigabyte or |
@@ -693,9 +525,6 @@ void __init pidmap_init(void) | |||
693 | set_bit(0, init_pid_ns.pidmap[0].page); | 525 | set_bit(0, init_pid_ns.pidmap[0].page); |
694 | atomic_dec(&init_pid_ns.pidmap[0].nr_free); | 526 | atomic_dec(&init_pid_ns.pidmap[0].nr_free); |
695 | 527 | ||
696 | init_pid_ns.pid_cachep = create_pid_cachep(1); | 528 | init_pid_ns.pid_cachep = KMEM_CACHE(pid, |
697 | if (init_pid_ns.pid_cachep == NULL) | 529 | SLAB_HWCACHE_ALIGN | SLAB_PANIC); |
698 | panic("Can't create pid_1 cachep\n"); | ||
699 | |||
700 | pid_ns_cachep = KMEM_CACHE(pid_namespace, SLAB_PANIC); | ||
701 | } | 530 | } |
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c new file mode 100644 index 000000000000..6d792b66d854 --- /dev/null +++ b/kernel/pid_namespace.c | |||
@@ -0,0 +1,197 @@ | |||
1 | /* | ||
2 | * Pid namespaces | ||
3 | * | ||
4 | * Authors: | ||
5 | * (C) 2007 Pavel Emelyanov <xemul@openvz.org>, OpenVZ, SWsoft Inc. | ||
6 | * (C) 2007 Sukadev Bhattiprolu <sukadev@us.ibm.com>, IBM | ||
7 | * Many thanks to Oleg Nesterov for comments and help | ||
8 | * | ||
9 | */ | ||
10 | |||
11 | #include <linux/pid.h> | ||
12 | #include <linux/pid_namespace.h> | ||
13 | #include <linux/syscalls.h> | ||
14 | #include <linux/err.h> | ||
15 | |||
16 | #define BITS_PER_PAGE (PAGE_SIZE*8) | ||
17 | |||
18 | struct pid_cache { | ||
19 | int nr_ids; | ||
20 | char name[16]; | ||
21 | struct kmem_cache *cachep; | ||
22 | struct list_head list; | ||
23 | }; | ||
24 | |||
25 | static LIST_HEAD(pid_caches_lh); | ||
26 | static DEFINE_MUTEX(pid_caches_mutex); | ||
27 | static struct kmem_cache *pid_ns_cachep; | ||
28 | |||
29 | /* | ||
30 | * creates the kmem cache to allocate pids from. | ||
31 | * @nr_ids: the number of numerical ids this pid will have to carry | ||
32 | */ | ||
33 | |||
34 | static struct kmem_cache *create_pid_cachep(int nr_ids) | ||
35 | { | ||
36 | struct pid_cache *pcache; | ||
37 | struct kmem_cache *cachep; | ||
38 | |||
39 | mutex_lock(&pid_caches_mutex); | ||
40 | list_for_each_entry(pcache, &pid_caches_lh, list) | ||
41 | if (pcache->nr_ids == nr_ids) | ||
42 | goto out; | ||
43 | |||
44 | pcache = kmalloc(sizeof(struct pid_cache), GFP_KERNEL); | ||
45 | if (pcache == NULL) | ||
46 | goto err_alloc; | ||
47 | |||
48 | snprintf(pcache->name, sizeof(pcache->name), "pid_%d", nr_ids); | ||
49 | cachep = kmem_cache_create(pcache->name, | ||
50 | sizeof(struct pid) + (nr_ids - 1) * sizeof(struct upid), | ||
51 | 0, SLAB_HWCACHE_ALIGN, NULL); | ||
52 | if (cachep == NULL) | ||
53 | goto err_cachep; | ||
54 | |||
55 | pcache->nr_ids = nr_ids; | ||
56 | pcache->cachep = cachep; | ||
57 | list_add(&pcache->list, &pid_caches_lh); | ||
58 | out: | ||
59 | mutex_unlock(&pid_caches_mutex); | ||
60 | return pcache->cachep; | ||
61 | |||
62 | err_cachep: | ||
63 | kfree(pcache); | ||
64 | err_alloc: | ||
65 | mutex_unlock(&pid_caches_mutex); | ||
66 | return NULL; | ||
67 | } | ||
68 | |||
69 | static struct pid_namespace *create_pid_namespace(int level) | ||
70 | { | ||
71 | struct pid_namespace *ns; | ||
72 | int i; | ||
73 | |||
74 | ns = kmem_cache_alloc(pid_ns_cachep, GFP_KERNEL); | ||
75 | if (ns == NULL) | ||
76 | goto out; | ||
77 | |||
78 | ns->pidmap[0].page = kzalloc(PAGE_SIZE, GFP_KERNEL); | ||
79 | if (!ns->pidmap[0].page) | ||
80 | goto out_free; | ||
81 | |||
82 | ns->pid_cachep = create_pid_cachep(level + 1); | ||
83 | if (ns->pid_cachep == NULL) | ||
84 | goto out_free_map; | ||
85 | |||
86 | kref_init(&ns->kref); | ||
87 | ns->last_pid = 0; | ||
88 | ns->child_reaper = NULL; | ||
89 | ns->level = level; | ||
90 | |||
91 | set_bit(0, ns->pidmap[0].page); | ||
92 | atomic_set(&ns->pidmap[0].nr_free, BITS_PER_PAGE - 1); | ||
93 | |||
94 | for (i = 1; i < PIDMAP_ENTRIES; i++) { | ||
95 | ns->pidmap[i].page = 0; | ||
96 | atomic_set(&ns->pidmap[i].nr_free, BITS_PER_PAGE); | ||
97 | } | ||
98 | |||
99 | return ns; | ||
100 | |||
101 | out_free_map: | ||
102 | kfree(ns->pidmap[0].page); | ||
103 | out_free: | ||
104 | kmem_cache_free(pid_ns_cachep, ns); | ||
105 | out: | ||
106 | return ERR_PTR(-ENOMEM); | ||
107 | } | ||
108 | |||
109 | static void destroy_pid_namespace(struct pid_namespace *ns) | ||
110 | { | ||
111 | int i; | ||
112 | |||
113 | for (i = 0; i < PIDMAP_ENTRIES; i++) | ||
114 | kfree(ns->pidmap[i].page); | ||
115 | kmem_cache_free(pid_ns_cachep, ns); | ||
116 | } | ||
117 | |||
118 | struct pid_namespace *copy_pid_ns(unsigned long flags, struct pid_namespace *old_ns) | ||
119 | { | ||
120 | struct pid_namespace *new_ns; | ||
121 | |||
122 | BUG_ON(!old_ns); | ||
123 | new_ns = get_pid_ns(old_ns); | ||
124 | if (!(flags & CLONE_NEWPID)) | ||
125 | goto out; | ||
126 | |||
127 | new_ns = ERR_PTR(-EINVAL); | ||
128 | if (flags & CLONE_THREAD) | ||
129 | goto out_put; | ||
130 | |||
131 | new_ns = create_pid_namespace(old_ns->level + 1); | ||
132 | if (!IS_ERR(new_ns)) | ||
133 | new_ns->parent = get_pid_ns(old_ns); | ||
134 | |||
135 | out_put: | ||
136 | put_pid_ns(old_ns); | ||
137 | out: | ||
138 | return new_ns; | ||
139 | } | ||
140 | |||
141 | void free_pid_ns(struct kref *kref) | ||
142 | { | ||
143 | struct pid_namespace *ns, *parent; | ||
144 | |||
145 | ns = container_of(kref, struct pid_namespace, kref); | ||
146 | |||
147 | parent = ns->parent; | ||
148 | destroy_pid_namespace(ns); | ||
149 | |||
150 | if (parent != NULL) | ||
151 | put_pid_ns(parent); | ||
152 | } | ||
153 | |||
154 | void zap_pid_ns_processes(struct pid_namespace *pid_ns) | ||
155 | { | ||
156 | int nr; | ||
157 | int rc; | ||
158 | |||
159 | /* | ||
160 | * The last thread in the cgroup-init thread group is terminating. | ||
161 | * Find remaining pid_ts in the namespace, signal and wait for them | ||
162 | * to exit. | ||
163 | * | ||
164 | * Note: This signals each threads in the namespace - even those that | ||
165 | * belong to the same thread group, To avoid this, we would have | ||
166 | * to walk the entire tasklist looking a processes in this | ||
167 | * namespace, but that could be unnecessarily expensive if the | ||
168 | * pid namespace has just a few processes. Or we need to | ||
169 | * maintain a tasklist for each pid namespace. | ||
170 | * | ||
171 | */ | ||
172 | read_lock(&tasklist_lock); | ||
173 | nr = next_pidmap(pid_ns, 1); | ||
174 | while (nr > 0) { | ||
175 | kill_proc_info(SIGKILL, SEND_SIG_PRIV, nr); | ||
176 | nr = next_pidmap(pid_ns, nr); | ||
177 | } | ||
178 | read_unlock(&tasklist_lock); | ||
179 | |||
180 | do { | ||
181 | clear_thread_flag(TIF_SIGPENDING); | ||
182 | rc = sys_wait4(-1, NULL, __WALL, NULL); | ||
183 | } while (rc != -ECHILD); | ||
184 | |||
185 | |||
186 | /* Child reaper for the pid namespace is going away */ | ||
187 | pid_ns->child_reaper = NULL; | ||
188 | return; | ||
189 | } | ||
190 | |||
191 | static __init int pid_namespaces_init(void) | ||
192 | { | ||
193 | pid_ns_cachep = KMEM_CACHE(pid_namespace, SLAB_PANIC); | ||
194 | return 0; | ||
195 | } | ||
196 | |||
197 | __initcall(pid_namespaces_init); | ||
diff --git a/kernel/pm_qos_params.c b/kernel/pm_qos_params.c new file mode 100644 index 000000000000..0afe32be4c85 --- /dev/null +++ b/kernel/pm_qos_params.c | |||
@@ -0,0 +1,425 @@ | |||
1 | /* | ||
2 | * This module exposes the interface to kernel space for specifying | ||
3 | * QoS dependencies. It provides infrastructure for registration of: | ||
4 | * | ||
5 | * Dependents on a QoS value : register requirements | ||
6 | * Watchers of QoS value : get notified when target QoS value changes | ||
7 | * | ||
8 | * This QoS design is best effort based. Dependents register their QoS needs. | ||
9 | * Watchers register to keep track of the current QoS needs of the system. | ||
10 | * | ||
11 | * There are 3 basic classes of QoS parameter: latency, timeout, throughput | ||
12 | * each have defined units: | ||
13 | * latency: usec | ||
14 | * timeout: usec <-- currently not used. | ||
15 | * throughput: kbs (kilo byte / sec) | ||
16 | * | ||
17 | * There are lists of pm_qos_objects each one wrapping requirements, notifiers | ||
18 | * | ||
19 | * User mode requirements on a QOS parameter register themselves to the | ||
20 | * subsystem by opening the device node /dev/... and writing there request to | ||
21 | * the node. As long as the process holds a file handle open to the node the | ||
22 | * client continues to be accounted for. Upon file release the usermode | ||
23 | * requirement is removed and a new qos target is computed. This way when the | ||
24 | * requirement that the application has is cleaned up when closes the file | ||
25 | * pointer or exits the pm_qos_object will get an opportunity to clean up. | ||
26 | * | ||
27 | * mark gross mgross@linux.intel.com | ||
28 | */ | ||
29 | |||
30 | #include <linux/pm_qos_params.h> | ||
31 | #include <linux/sched.h> | ||
32 | #include <linux/spinlock.h> | ||
33 | #include <linux/slab.h> | ||
34 | #include <linux/time.h> | ||
35 | #include <linux/fs.h> | ||
36 | #include <linux/device.h> | ||
37 | #include <linux/miscdevice.h> | ||
38 | #include <linux/string.h> | ||
39 | #include <linux/platform_device.h> | ||
40 | #include <linux/init.h> | ||
41 | |||
42 | #include <linux/uaccess.h> | ||
43 | |||
44 | /* | ||
45 | * locking rule: all changes to target_value or requirements or notifiers lists | ||
46 | * or pm_qos_object list and pm_qos_objects need to happen with pm_qos_lock | ||
47 | * held, taken with _irqsave. One lock to rule them all | ||
48 | */ | ||
49 | struct requirement_list { | ||
50 | struct list_head list; | ||
51 | union { | ||
52 | s32 value; | ||
53 | s32 usec; | ||
54 | s32 kbps; | ||
55 | }; | ||
56 | char *name; | ||
57 | }; | ||
58 | |||
59 | static s32 max_compare(s32 v1, s32 v2); | ||
60 | static s32 min_compare(s32 v1, s32 v2); | ||
61 | |||
62 | struct pm_qos_object { | ||
63 | struct requirement_list requirements; | ||
64 | struct blocking_notifier_head *notifiers; | ||
65 | struct miscdevice pm_qos_power_miscdev; | ||
66 | char *name; | ||
67 | s32 default_value; | ||
68 | s32 target_value; | ||
69 | s32 (*comparitor)(s32, s32); | ||
70 | }; | ||
71 | |||
72 | static struct pm_qos_object null_pm_qos; | ||
73 | static BLOCKING_NOTIFIER_HEAD(cpu_dma_lat_notifier); | ||
74 | static struct pm_qos_object cpu_dma_pm_qos = { | ||
75 | .requirements = {LIST_HEAD_INIT(cpu_dma_pm_qos.requirements.list)}, | ||
76 | .notifiers = &cpu_dma_lat_notifier, | ||
77 | .name = "cpu_dma_latency", | ||
78 | .default_value = 2000 * USEC_PER_SEC, | ||
79 | .target_value = 2000 * USEC_PER_SEC, | ||
80 | .comparitor = min_compare | ||
81 | }; | ||
82 | |||
83 | static BLOCKING_NOTIFIER_HEAD(network_lat_notifier); | ||
84 | static struct pm_qos_object network_lat_pm_qos = { | ||
85 | .requirements = {LIST_HEAD_INIT(network_lat_pm_qos.requirements.list)}, | ||
86 | .notifiers = &network_lat_notifier, | ||
87 | .name = "network_latency", | ||
88 | .default_value = 2000 * USEC_PER_SEC, | ||
89 | .target_value = 2000 * USEC_PER_SEC, | ||
90 | .comparitor = min_compare | ||
91 | }; | ||
92 | |||
93 | |||
94 | static BLOCKING_NOTIFIER_HEAD(network_throughput_notifier); | ||
95 | static struct pm_qos_object network_throughput_pm_qos = { | ||
96 | .requirements = | ||
97 | {LIST_HEAD_INIT(network_throughput_pm_qos.requirements.list)}, | ||
98 | .notifiers = &network_throughput_notifier, | ||
99 | .name = "network_throughput", | ||
100 | .default_value = 0, | ||
101 | .target_value = 0, | ||
102 | .comparitor = max_compare | ||
103 | }; | ||
104 | |||
105 | |||
106 | static struct pm_qos_object *pm_qos_array[] = { | ||
107 | &null_pm_qos, | ||
108 | &cpu_dma_pm_qos, | ||
109 | &network_lat_pm_qos, | ||
110 | &network_throughput_pm_qos | ||
111 | }; | ||
112 | |||
113 | static DEFINE_SPINLOCK(pm_qos_lock); | ||
114 | |||
115 | static ssize_t pm_qos_power_write(struct file *filp, const char __user *buf, | ||
116 | size_t count, loff_t *f_pos); | ||
117 | static int pm_qos_power_open(struct inode *inode, struct file *filp); | ||
118 | static int pm_qos_power_release(struct inode *inode, struct file *filp); | ||
119 | |||
120 | static const struct file_operations pm_qos_power_fops = { | ||
121 | .write = pm_qos_power_write, | ||
122 | .open = pm_qos_power_open, | ||
123 | .release = pm_qos_power_release, | ||
124 | }; | ||
125 | |||
126 | /* static helper functions */ | ||
127 | static s32 max_compare(s32 v1, s32 v2) | ||
128 | { | ||
129 | return max(v1, v2); | ||
130 | } | ||
131 | |||
132 | static s32 min_compare(s32 v1, s32 v2) | ||
133 | { | ||
134 | return min(v1, v2); | ||
135 | } | ||
136 | |||
137 | |||
138 | static void update_target(int target) | ||
139 | { | ||
140 | s32 extreme_value; | ||
141 | struct requirement_list *node; | ||
142 | unsigned long flags; | ||
143 | int call_notifier = 0; | ||
144 | |||
145 | spin_lock_irqsave(&pm_qos_lock, flags); | ||
146 | extreme_value = pm_qos_array[target]->default_value; | ||
147 | list_for_each_entry(node, | ||
148 | &pm_qos_array[target]->requirements.list, list) { | ||
149 | extreme_value = pm_qos_array[target]->comparitor( | ||
150 | extreme_value, node->value); | ||
151 | } | ||
152 | if (pm_qos_array[target]->target_value != extreme_value) { | ||
153 | call_notifier = 1; | ||
154 | pm_qos_array[target]->target_value = extreme_value; | ||
155 | pr_debug(KERN_ERR "new target for qos %d is %d\n", target, | ||
156 | pm_qos_array[target]->target_value); | ||
157 | } | ||
158 | spin_unlock_irqrestore(&pm_qos_lock, flags); | ||
159 | |||
160 | if (call_notifier) | ||
161 | blocking_notifier_call_chain(pm_qos_array[target]->notifiers, | ||
162 | (unsigned long) extreme_value, NULL); | ||
163 | } | ||
164 | |||
165 | static int register_pm_qos_misc(struct pm_qos_object *qos) | ||
166 | { | ||
167 | qos->pm_qos_power_miscdev.minor = MISC_DYNAMIC_MINOR; | ||
168 | qos->pm_qos_power_miscdev.name = qos->name; | ||
169 | qos->pm_qos_power_miscdev.fops = &pm_qos_power_fops; | ||
170 | |||
171 | return misc_register(&qos->pm_qos_power_miscdev); | ||
172 | } | ||
173 | |||
174 | static int find_pm_qos_object_by_minor(int minor) | ||
175 | { | ||
176 | int pm_qos_class; | ||
177 | |||
178 | for (pm_qos_class = 0; | ||
179 | pm_qos_class < PM_QOS_NUM_CLASSES; pm_qos_class++) { | ||
180 | if (minor == | ||
181 | pm_qos_array[pm_qos_class]->pm_qos_power_miscdev.minor) | ||
182 | return pm_qos_class; | ||
183 | } | ||
184 | return -1; | ||
185 | } | ||
186 | |||
187 | /** | ||
188 | * pm_qos_requirement - returns current system wide qos expectation | ||
189 | * @pm_qos_class: identification of which qos value is requested | ||
190 | * | ||
191 | * This function returns the current target value in an atomic manner. | ||
192 | */ | ||
193 | int pm_qos_requirement(int pm_qos_class) | ||
194 | { | ||
195 | int ret_val; | ||
196 | unsigned long flags; | ||
197 | |||
198 | spin_lock_irqsave(&pm_qos_lock, flags); | ||
199 | ret_val = pm_qos_array[pm_qos_class]->target_value; | ||
200 | spin_unlock_irqrestore(&pm_qos_lock, flags); | ||
201 | |||
202 | return ret_val; | ||
203 | } | ||
204 | EXPORT_SYMBOL_GPL(pm_qos_requirement); | ||
205 | |||
206 | /** | ||
207 | * pm_qos_add_requirement - inserts new qos request into the list | ||
208 | * @pm_qos_class: identifies which list of qos request to us | ||
209 | * @name: identifies the request | ||
210 | * @value: defines the qos request | ||
211 | * | ||
212 | * This function inserts a new entry in the pm_qos_class list of requested qos | ||
213 | * performance charactoistics. It recomputes the agregate QoS expectations for | ||
214 | * the pm_qos_class of parrameters. | ||
215 | */ | ||
216 | int pm_qos_add_requirement(int pm_qos_class, char *name, s32 value) | ||
217 | { | ||
218 | struct requirement_list *dep; | ||
219 | unsigned long flags; | ||
220 | |||
221 | dep = kzalloc(sizeof(struct requirement_list), GFP_KERNEL); | ||
222 | if (dep) { | ||
223 | if (value == PM_QOS_DEFAULT_VALUE) | ||
224 | dep->value = pm_qos_array[pm_qos_class]->default_value; | ||
225 | else | ||
226 | dep->value = value; | ||
227 | dep->name = kstrdup(name, GFP_KERNEL); | ||
228 | if (!dep->name) | ||
229 | goto cleanup; | ||
230 | |||
231 | spin_lock_irqsave(&pm_qos_lock, flags); | ||
232 | list_add(&dep->list, | ||
233 | &pm_qos_array[pm_qos_class]->requirements.list); | ||
234 | spin_unlock_irqrestore(&pm_qos_lock, flags); | ||
235 | update_target(pm_qos_class); | ||
236 | |||
237 | return 0; | ||
238 | } | ||
239 | |||
240 | cleanup: | ||
241 | kfree(dep); | ||
242 | return -ENOMEM; | ||
243 | } | ||
244 | EXPORT_SYMBOL_GPL(pm_qos_add_requirement); | ||
245 | |||
246 | /** | ||
247 | * pm_qos_update_requirement - modifies an existing qos request | ||
248 | * @pm_qos_class: identifies which list of qos request to us | ||
249 | * @name: identifies the request | ||
250 | * @value: defines the qos request | ||
251 | * | ||
252 | * Updates an existing qos requierement for the pm_qos_class of parameters along | ||
253 | * with updating the target pm_qos_class value. | ||
254 | * | ||
255 | * If the named request isn't in the lest then no change is made. | ||
256 | */ | ||
257 | int pm_qos_update_requirement(int pm_qos_class, char *name, s32 new_value) | ||
258 | { | ||
259 | unsigned long flags; | ||
260 | struct requirement_list *node; | ||
261 | int pending_update = 0; | ||
262 | |||
263 | spin_lock_irqsave(&pm_qos_lock, flags); | ||
264 | list_for_each_entry(node, | ||
265 | &pm_qos_array[pm_qos_class]->requirements.list, list) { | ||
266 | if (strcmp(node->name, name) == 0) { | ||
267 | if (new_value == PM_QOS_DEFAULT_VALUE) | ||
268 | node->value = | ||
269 | pm_qos_array[pm_qos_class]->default_value; | ||
270 | else | ||
271 | node->value = new_value; | ||
272 | pending_update = 1; | ||
273 | break; | ||
274 | } | ||
275 | } | ||
276 | spin_unlock_irqrestore(&pm_qos_lock, flags); | ||
277 | if (pending_update) | ||
278 | update_target(pm_qos_class); | ||
279 | |||
280 | return 0; | ||
281 | } | ||
282 | EXPORT_SYMBOL_GPL(pm_qos_update_requirement); | ||
283 | |||
284 | /** | ||
285 | * pm_qos_remove_requirement - modifies an existing qos request | ||
286 | * @pm_qos_class: identifies which list of qos request to us | ||
287 | * @name: identifies the request | ||
288 | * | ||
289 | * Will remove named qos request from pm_qos_class list of parrameters and | ||
290 | * recompute the current target value for the pm_qos_class. | ||
291 | */ | ||
292 | void pm_qos_remove_requirement(int pm_qos_class, char *name) | ||
293 | { | ||
294 | unsigned long flags; | ||
295 | struct requirement_list *node; | ||
296 | int pending_update = 0; | ||
297 | |||
298 | spin_lock_irqsave(&pm_qos_lock, flags); | ||
299 | list_for_each_entry(node, | ||
300 | &pm_qos_array[pm_qos_class]->requirements.list, list) { | ||
301 | if (strcmp(node->name, name) == 0) { | ||
302 | kfree(node->name); | ||
303 | list_del(&node->list); | ||
304 | kfree(node); | ||
305 | pending_update = 1; | ||
306 | break; | ||
307 | } | ||
308 | } | ||
309 | spin_unlock_irqrestore(&pm_qos_lock, flags); | ||
310 | if (pending_update) | ||
311 | update_target(pm_qos_class); | ||
312 | } | ||
313 | EXPORT_SYMBOL_GPL(pm_qos_remove_requirement); | ||
314 | |||
315 | /** | ||
316 | * pm_qos_add_notifier - sets notification entry for changes to target value | ||
317 | * @pm_qos_class: identifies which qos target changes should be notified. | ||
318 | * @notifier: notifier block managed by caller. | ||
319 | * | ||
320 | * will register the notifier into a notification chain that gets called | ||
321 | * uppon changes to the pm_qos_class target value. | ||
322 | */ | ||
323 | int pm_qos_add_notifier(int pm_qos_class, struct notifier_block *notifier) | ||
324 | { | ||
325 | int retval; | ||
326 | |||
327 | retval = blocking_notifier_chain_register( | ||
328 | pm_qos_array[pm_qos_class]->notifiers, notifier); | ||
329 | |||
330 | return retval; | ||
331 | } | ||
332 | EXPORT_SYMBOL_GPL(pm_qos_add_notifier); | ||
333 | |||
334 | /** | ||
335 | * pm_qos_remove_notifier - deletes notification entry from chain. | ||
336 | * @pm_qos_class: identifies which qos target changes are notified. | ||
337 | * @notifier: notifier block to be removed. | ||
338 | * | ||
339 | * will remove the notifier from the notification chain that gets called | ||
340 | * uppon changes to the pm_qos_class target value. | ||
341 | */ | ||
342 | int pm_qos_remove_notifier(int pm_qos_class, struct notifier_block *notifier) | ||
343 | { | ||
344 | int retval; | ||
345 | |||
346 | retval = blocking_notifier_chain_unregister( | ||
347 | pm_qos_array[pm_qos_class]->notifiers, notifier); | ||
348 | |||
349 | return retval; | ||
350 | } | ||
351 | EXPORT_SYMBOL_GPL(pm_qos_remove_notifier); | ||
352 | |||
353 | #define PID_NAME_LEN sizeof("process_1234567890") | ||
354 | static char name[PID_NAME_LEN]; | ||
355 | |||
356 | static int pm_qos_power_open(struct inode *inode, struct file *filp) | ||
357 | { | ||
358 | int ret; | ||
359 | long pm_qos_class; | ||
360 | |||
361 | pm_qos_class = find_pm_qos_object_by_minor(iminor(inode)); | ||
362 | if (pm_qos_class >= 0) { | ||
363 | filp->private_data = (void *)pm_qos_class; | ||
364 | sprintf(name, "process_%d", current->pid); | ||
365 | ret = pm_qos_add_requirement(pm_qos_class, name, | ||
366 | PM_QOS_DEFAULT_VALUE); | ||
367 | if (ret >= 0) | ||
368 | return 0; | ||
369 | } | ||
370 | |||
371 | return -EPERM; | ||
372 | } | ||
373 | |||
374 | static int pm_qos_power_release(struct inode *inode, struct file *filp) | ||
375 | { | ||
376 | int pm_qos_class; | ||
377 | |||
378 | pm_qos_class = (long)filp->private_data; | ||
379 | sprintf(name, "process_%d", current->pid); | ||
380 | pm_qos_remove_requirement(pm_qos_class, name); | ||
381 | |||
382 | return 0; | ||
383 | } | ||
384 | |||
385 | static ssize_t pm_qos_power_write(struct file *filp, const char __user *buf, | ||
386 | size_t count, loff_t *f_pos) | ||
387 | { | ||
388 | s32 value; | ||
389 | int pm_qos_class; | ||
390 | |||
391 | pm_qos_class = (long)filp->private_data; | ||
392 | if (count != sizeof(s32)) | ||
393 | return -EINVAL; | ||
394 | if (copy_from_user(&value, buf, sizeof(s32))) | ||
395 | return -EFAULT; | ||
396 | sprintf(name, "process_%d", current->pid); | ||
397 | pm_qos_update_requirement(pm_qos_class, name, value); | ||
398 | |||
399 | return sizeof(s32); | ||
400 | } | ||
401 | |||
402 | |||
403 | static int __init pm_qos_power_init(void) | ||
404 | { | ||
405 | int ret = 0; | ||
406 | |||
407 | ret = register_pm_qos_misc(&cpu_dma_pm_qos); | ||
408 | if (ret < 0) { | ||
409 | printk(KERN_ERR "pm_qos_param: cpu_dma_latency setup failed\n"); | ||
410 | return ret; | ||
411 | } | ||
412 | ret = register_pm_qos_misc(&network_lat_pm_qos); | ||
413 | if (ret < 0) { | ||
414 | printk(KERN_ERR "pm_qos_param: network_latency setup failed\n"); | ||
415 | return ret; | ||
416 | } | ||
417 | ret = register_pm_qos_misc(&network_throughput_pm_qos); | ||
418 | if (ret < 0) | ||
419 | printk(KERN_ERR | ||
420 | "pm_qos_param: network_throughput setup failed\n"); | ||
421 | |||
422 | return ret; | ||
423 | } | ||
424 | |||
425 | late_initcall(pm_qos_power_init); | ||
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index 0b7c82ac467e..2eae91f954ca 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c | |||
@@ -20,7 +20,7 @@ static int check_clock(const clockid_t which_clock) | |||
20 | return 0; | 20 | return 0; |
21 | 21 | ||
22 | read_lock(&tasklist_lock); | 22 | read_lock(&tasklist_lock); |
23 | p = find_task_by_pid(pid); | 23 | p = find_task_by_vpid(pid); |
24 | if (!p || !(CPUCLOCK_PERTHREAD(which_clock) ? | 24 | if (!p || !(CPUCLOCK_PERTHREAD(which_clock) ? |
25 | same_thread_group(p, current) : thread_group_leader(p))) { | 25 | same_thread_group(p, current) : thread_group_leader(p))) { |
26 | error = -EINVAL; | 26 | error = -EINVAL; |
@@ -305,7 +305,7 @@ int posix_cpu_clock_get(const clockid_t which_clock, struct timespec *tp) | |||
305 | */ | 305 | */ |
306 | struct task_struct *p; | 306 | struct task_struct *p; |
307 | rcu_read_lock(); | 307 | rcu_read_lock(); |
308 | p = find_task_by_pid(pid); | 308 | p = find_task_by_vpid(pid); |
309 | if (p) { | 309 | if (p) { |
310 | if (CPUCLOCK_PERTHREAD(which_clock)) { | 310 | if (CPUCLOCK_PERTHREAD(which_clock)) { |
311 | if (same_thread_group(p, current)) { | 311 | if (same_thread_group(p, current)) { |
@@ -354,7 +354,7 @@ int posix_cpu_timer_create(struct k_itimer *new_timer) | |||
354 | if (pid == 0) { | 354 | if (pid == 0) { |
355 | p = current; | 355 | p = current; |
356 | } else { | 356 | } else { |
357 | p = find_task_by_pid(pid); | 357 | p = find_task_by_vpid(pid); |
358 | if (p && !same_thread_group(p, current)) | 358 | if (p && !same_thread_group(p, current)) |
359 | p = NULL; | 359 | p = NULL; |
360 | } | 360 | } |
@@ -362,7 +362,7 @@ int posix_cpu_timer_create(struct k_itimer *new_timer) | |||
362 | if (pid == 0) { | 362 | if (pid == 0) { |
363 | p = current->group_leader; | 363 | p = current->group_leader; |
364 | } else { | 364 | } else { |
365 | p = find_task_by_pid(pid); | 365 | p = find_task_by_vpid(pid); |
366 | if (p && !thread_group_leader(p)) | 366 | if (p && !thread_group_leader(p)) |
367 | p = NULL; | 367 | p = NULL; |
368 | } | 368 | } |
diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c index 36d563fd9e3b..a9b04203a66d 100644 --- a/kernel/posix-timers.c +++ b/kernel/posix-timers.c | |||
@@ -256,8 +256,9 @@ static void schedule_next_timer(struct k_itimer *timr) | |||
256 | if (timr->it.real.interval.tv64 == 0) | 256 | if (timr->it.real.interval.tv64 == 0) |
257 | return; | 257 | return; |
258 | 258 | ||
259 | timr->it_overrun += hrtimer_forward(timer, timer->base->get_time(), | 259 | timr->it_overrun += (unsigned int) hrtimer_forward(timer, |
260 | timr->it.real.interval); | 260 | timer->base->get_time(), |
261 | timr->it.real.interval); | ||
261 | 262 | ||
262 | timr->it_overrun_last = timr->it_overrun; | 263 | timr->it_overrun_last = timr->it_overrun; |
263 | timr->it_overrun = -1; | 264 | timr->it_overrun = -1; |
@@ -386,7 +387,7 @@ static enum hrtimer_restart posix_timer_fn(struct hrtimer *timer) | |||
386 | now = ktime_add(now, kj); | 387 | now = ktime_add(now, kj); |
387 | } | 388 | } |
388 | #endif | 389 | #endif |
389 | timr->it_overrun += | 390 | timr->it_overrun += (unsigned int) |
390 | hrtimer_forward(timer, now, | 391 | hrtimer_forward(timer, now, |
391 | timr->it.real.interval); | 392 | timr->it.real.interval); |
392 | ret = HRTIMER_RESTART; | 393 | ret = HRTIMER_RESTART; |
@@ -403,7 +404,7 @@ static struct task_struct * good_sigevent(sigevent_t * event) | |||
403 | struct task_struct *rtn = current->group_leader; | 404 | struct task_struct *rtn = current->group_leader; |
404 | 405 | ||
405 | if ((event->sigev_notify & SIGEV_THREAD_ID ) && | 406 | if ((event->sigev_notify & SIGEV_THREAD_ID ) && |
406 | (!(rtn = find_task_by_pid(event->sigev_notify_thread_id)) || | 407 | (!(rtn = find_task_by_vpid(event->sigev_notify_thread_id)) || |
407 | !same_thread_group(rtn, current) || | 408 | !same_thread_group(rtn, current) || |
408 | (event->sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_SIGNAL)) | 409 | (event->sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_SIGNAL)) |
409 | return NULL; | 410 | return NULL; |
@@ -662,7 +663,7 @@ common_timer_get(struct k_itimer *timr, struct itimerspec *cur_setting) | |||
662 | */ | 663 | */ |
663 | if (iv.tv64 && (timr->it_requeue_pending & REQUEUE_PENDING || | 664 | if (iv.tv64 && (timr->it_requeue_pending & REQUEUE_PENDING || |
664 | (timr->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE)) | 665 | (timr->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE)) |
665 | timr->it_overrun += hrtimer_forward(timer, now, iv); | 666 | timr->it_overrun += (unsigned int) hrtimer_forward(timer, now, iv); |
666 | 667 | ||
667 | remaining = ktime_sub(timer->expires, now); | 668 | remaining = ktime_sub(timer->expires, now); |
668 | /* Return 0 only, when the timer is expired and not pending */ | 669 | /* Return 0 only, when the timer is expired and not pending */ |
@@ -766,9 +767,11 @@ common_timer_set(struct k_itimer *timr, int flags, | |||
766 | /* SIGEV_NONE timers are not queued ! See common_timer_get */ | 767 | /* SIGEV_NONE timers are not queued ! See common_timer_get */ |
767 | if (((timr->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE)) { | 768 | if (((timr->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE)) { |
768 | /* Setup correct expiry time for relative timers */ | 769 | /* Setup correct expiry time for relative timers */ |
769 | if (mode == HRTIMER_MODE_REL) | 770 | if (mode == HRTIMER_MODE_REL) { |
770 | timer->expires = ktime_add(timer->expires, | 771 | timer->expires = |
771 | timer->base->get_time()); | 772 | ktime_add_safe(timer->expires, |
773 | timer->base->get_time()); | ||
774 | } | ||
772 | return 0; | 775 | return 0; |
773 | } | 776 | } |
774 | 777 | ||
@@ -981,20 +984,9 @@ sys_clock_getres(const clockid_t which_clock, struct timespec __user *tp) | |||
981 | static int common_nsleep(const clockid_t which_clock, int flags, | 984 | static int common_nsleep(const clockid_t which_clock, int flags, |
982 | struct timespec *tsave, struct timespec __user *rmtp) | 985 | struct timespec *tsave, struct timespec __user *rmtp) |
983 | { | 986 | { |
984 | struct timespec rmt; | 987 | return hrtimer_nanosleep(tsave, rmtp, flags & TIMER_ABSTIME ? |
985 | int ret; | 988 | HRTIMER_MODE_ABS : HRTIMER_MODE_REL, |
986 | 989 | which_clock); | |
987 | ret = hrtimer_nanosleep(tsave, rmtp ? &rmt : NULL, | ||
988 | flags & TIMER_ABSTIME ? | ||
989 | HRTIMER_MODE_ABS : HRTIMER_MODE_REL, | ||
990 | which_clock); | ||
991 | |||
992 | if (ret && rmtp) { | ||
993 | if (copy_to_user(rmtp, &rmt, sizeof(*rmtp))) | ||
994 | return -EFAULT; | ||
995 | } | ||
996 | |||
997 | return ret; | ||
998 | } | 990 | } |
999 | 991 | ||
1000 | asmlinkage long | 992 | asmlinkage long |
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig index ef9b802738a5..79833170bb9c 100644 --- a/kernel/power/Kconfig +++ b/kernel/power/Kconfig | |||
@@ -74,8 +74,8 @@ config PM_TRACE_RTC | |||
74 | RTC across reboots, so that you can debug a machine that just hangs | 74 | RTC across reboots, so that you can debug a machine that just hangs |
75 | during suspend (or more commonly, during resume). | 75 | during suspend (or more commonly, during resume). |
76 | 76 | ||
77 | To use this debugging feature you should attempt to suspend the machine, | 77 | To use this debugging feature you should attempt to suspend the |
78 | then reboot it, then run | 78 | machine, reboot it and then run |
79 | 79 | ||
80 | dmesg -s 1000000 | grep 'hash matches' | 80 | dmesg -s 1000000 | grep 'hash matches' |
81 | 81 | ||
@@ -123,7 +123,10 @@ config HIBERNATION | |||
123 | called "hibernation" in user interfaces. STD checkpoints the | 123 | called "hibernation" in user interfaces. STD checkpoints the |
124 | system and powers it off; and restores that checkpoint on reboot. | 124 | system and powers it off; and restores that checkpoint on reboot. |
125 | 125 | ||
126 | You can suspend your machine with 'echo disk > /sys/power/state'. | 126 | You can suspend your machine with 'echo disk > /sys/power/state' |
127 | after placing resume=/dev/swappartition on the kernel command line | ||
128 | in your bootloader's configuration file. | ||
129 | |||
127 | Alternatively, you can use the additional userland tools available | 130 | Alternatively, you can use the additional userland tools available |
128 | from <http://suspend.sf.net>. | 131 | from <http://suspend.sf.net>. |
129 | 132 | ||
diff --git a/kernel/power/disk.c b/kernel/power/disk.c index d09da0895174..859a8e59773a 100644 --- a/kernel/power/disk.c +++ b/kernel/power/disk.c | |||
@@ -26,7 +26,7 @@ | |||
26 | 26 | ||
27 | 27 | ||
28 | static int noresume = 0; | 28 | static int noresume = 0; |
29 | char resume_file[256] = CONFIG_PM_STD_PARTITION; | 29 | static char resume_file[256] = CONFIG_PM_STD_PARTITION; |
30 | dev_t swsusp_resume_device; | 30 | dev_t swsusp_resume_device; |
31 | sector_t swsusp_resume_block; | 31 | sector_t swsusp_resume_block; |
32 | 32 | ||
@@ -185,7 +185,7 @@ static void platform_restore_cleanup(int platform_mode) | |||
185 | * reappears in this routine after a restore. | 185 | * reappears in this routine after a restore. |
186 | */ | 186 | */ |
187 | 187 | ||
188 | int create_image(int platform_mode) | 188 | static int create_image(int platform_mode) |
189 | { | 189 | { |
190 | int error; | 190 | int error; |
191 | 191 | ||
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index f6a5df934f8d..95250d7c8d91 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c | |||
@@ -1203,7 +1203,7 @@ asmlinkage int swsusp_save(void) | |||
1203 | 1203 | ||
1204 | printk(KERN_INFO "PM: Creating hibernation image: \n"); | 1204 | printk(KERN_INFO "PM: Creating hibernation image: \n"); |
1205 | 1205 | ||
1206 | drain_local_pages(); | 1206 | drain_local_pages(NULL); |
1207 | nr_pages = count_data_pages(); | 1207 | nr_pages = count_data_pages(); |
1208 | nr_highmem = count_highmem_pages(); | 1208 | nr_highmem = count_highmem_pages(); |
1209 | printk(KERN_INFO "PM: Need to copy %u pages\n", nr_pages + nr_highmem); | 1209 | printk(KERN_INFO "PM: Need to copy %u pages\n", nr_pages + nr_highmem); |
@@ -1221,7 +1221,7 @@ asmlinkage int swsusp_save(void) | |||
1221 | /* During allocating of suspend pagedir, new cold pages may appear. | 1221 | /* During allocating of suspend pagedir, new cold pages may appear. |
1222 | * Kill them. | 1222 | * Kill them. |
1223 | */ | 1223 | */ |
1224 | drain_local_pages(); | 1224 | drain_local_pages(NULL); |
1225 | copy_data_pages(©_bm, &orig_bm); | 1225 | copy_data_pages(©_bm, &orig_bm); |
1226 | 1226 | ||
1227 | /* | 1227 | /* |
diff --git a/kernel/printk.c b/kernel/printk.c index 29ae1e99cde0..bee36100f110 100644 --- a/kernel/printk.c +++ b/kernel/printk.c | |||
@@ -32,7 +32,6 @@ | |||
32 | #include <linux/security.h> | 32 | #include <linux/security.h> |
33 | #include <linux/bootmem.h> | 33 | #include <linux/bootmem.h> |
34 | #include <linux/syscalls.h> | 34 | #include <linux/syscalls.h> |
35 | #include <linux/jiffies.h> | ||
36 | 35 | ||
37 | #include <asm/uaccess.h> | 36 | #include <asm/uaccess.h> |
38 | 37 | ||
@@ -93,16 +92,16 @@ static int console_locked, console_suspended; | |||
93 | */ | 92 | */ |
94 | static DEFINE_SPINLOCK(logbuf_lock); | 93 | static DEFINE_SPINLOCK(logbuf_lock); |
95 | 94 | ||
96 | #define LOG_BUF_MASK (log_buf_len-1) | 95 | #define LOG_BUF_MASK (log_buf_len-1) |
97 | #define LOG_BUF(idx) (log_buf[(idx) & LOG_BUF_MASK]) | 96 | #define LOG_BUF(idx) (log_buf[(idx) & LOG_BUF_MASK]) |
98 | 97 | ||
99 | /* | 98 | /* |
100 | * The indices into log_buf are not constrained to log_buf_len - they | 99 | * The indices into log_buf are not constrained to log_buf_len - they |
101 | * must be masked before subscripting | 100 | * must be masked before subscripting |
102 | */ | 101 | */ |
103 | static unsigned long log_start; /* Index into log_buf: next char to be read by syslog() */ | 102 | static unsigned log_start; /* Index into log_buf: next char to be read by syslog() */ |
104 | static unsigned long con_start; /* Index into log_buf: next char to be sent to consoles */ | 103 | static unsigned con_start; /* Index into log_buf: next char to be sent to consoles */ |
105 | static unsigned long log_end; /* Index into log_buf: most-recently-written-char + 1 */ | 104 | static unsigned log_end; /* Index into log_buf: most-recently-written-char + 1 */ |
106 | 105 | ||
107 | /* | 106 | /* |
108 | * Array of consoles built from command line options (console=) | 107 | * Array of consoles built from command line options (console=) |
@@ -128,17 +127,17 @@ static int console_may_schedule; | |||
128 | static char __log_buf[__LOG_BUF_LEN]; | 127 | static char __log_buf[__LOG_BUF_LEN]; |
129 | static char *log_buf = __log_buf; | 128 | static char *log_buf = __log_buf; |
130 | static int log_buf_len = __LOG_BUF_LEN; | 129 | static int log_buf_len = __LOG_BUF_LEN; |
131 | static unsigned long logged_chars; /* Number of chars produced since last read+clear operation */ | 130 | static unsigned logged_chars; /* Number of chars produced since last read+clear operation */ |
132 | 131 | ||
133 | static int __init log_buf_len_setup(char *str) | 132 | static int __init log_buf_len_setup(char *str) |
134 | { | 133 | { |
135 | unsigned long size = memparse(str, &str); | 134 | unsigned size = memparse(str, &str); |
136 | unsigned long flags; | 135 | unsigned long flags; |
137 | 136 | ||
138 | if (size) | 137 | if (size) |
139 | size = roundup_pow_of_two(size); | 138 | size = roundup_pow_of_two(size); |
140 | if (size > log_buf_len) { | 139 | if (size > log_buf_len) { |
141 | unsigned long start, dest_idx, offset; | 140 | unsigned start, dest_idx, offset; |
142 | char *new_log_buf; | 141 | char *new_log_buf; |
143 | 142 | ||
144 | new_log_buf = alloc_bootmem(size); | 143 | new_log_buf = alloc_bootmem(size); |
@@ -295,7 +294,7 @@ int log_buf_read(int idx) | |||
295 | */ | 294 | */ |
296 | int do_syslog(int type, char __user *buf, int len) | 295 | int do_syslog(int type, char __user *buf, int len) |
297 | { | 296 | { |
298 | unsigned long i, j, limit, count; | 297 | unsigned i, j, limit, count; |
299 | int do_clear = 0; | 298 | int do_clear = 0; |
300 | char c; | 299 | char c; |
301 | int error = 0; | 300 | int error = 0; |
@@ -436,7 +435,7 @@ asmlinkage long sys_syslog(int type, char __user *buf, int len) | |||
436 | /* | 435 | /* |
437 | * Call the console drivers on a range of log_buf | 436 | * Call the console drivers on a range of log_buf |
438 | */ | 437 | */ |
439 | static void __call_console_drivers(unsigned long start, unsigned long end) | 438 | static void __call_console_drivers(unsigned start, unsigned end) |
440 | { | 439 | { |
441 | struct console *con; | 440 | struct console *con; |
442 | 441 | ||
@@ -463,8 +462,8 @@ early_param("ignore_loglevel", ignore_loglevel_setup); | |||
463 | /* | 462 | /* |
464 | * Write out chars from start to end - 1 inclusive | 463 | * Write out chars from start to end - 1 inclusive |
465 | */ | 464 | */ |
466 | static void _call_console_drivers(unsigned long start, | 465 | static void _call_console_drivers(unsigned start, |
467 | unsigned long end, int msg_log_level) | 466 | unsigned end, int msg_log_level) |
468 | { | 467 | { |
469 | if ((msg_log_level < console_loglevel || ignore_loglevel) && | 468 | if ((msg_log_level < console_loglevel || ignore_loglevel) && |
470 | console_drivers && start != end) { | 469 | console_drivers && start != end) { |
@@ -484,12 +483,12 @@ static void _call_console_drivers(unsigned long start, | |||
484 | * log_buf[start] to log_buf[end - 1]. | 483 | * log_buf[start] to log_buf[end - 1]. |
485 | * The console_sem must be held. | 484 | * The console_sem must be held. |
486 | */ | 485 | */ |
487 | static void call_console_drivers(unsigned long start, unsigned long end) | 486 | static void call_console_drivers(unsigned start, unsigned end) |
488 | { | 487 | { |
489 | unsigned long cur_index, start_print; | 488 | unsigned cur_index, start_print; |
490 | static int msg_level = -1; | 489 | static int msg_level = -1; |
491 | 490 | ||
492 | BUG_ON(((long)(start - end)) > 0); | 491 | BUG_ON(((int)(start - end)) > 0); |
493 | 492 | ||
494 | cur_index = start; | 493 | cur_index = start; |
495 | start_print = start; | 494 | start_print = start; |
@@ -567,19 +566,6 @@ static int printk_time = 0; | |||
567 | #endif | 566 | #endif |
568 | module_param_named(time, printk_time, bool, S_IRUGO | S_IWUSR); | 567 | module_param_named(time, printk_time, bool, S_IRUGO | S_IWUSR); |
569 | 568 | ||
570 | static int __init printk_time_setup(char *str) | ||
571 | { | ||
572 | if (*str) | ||
573 | return 0; | ||
574 | printk_time = 1; | ||
575 | printk(KERN_NOTICE "The 'time' option is deprecated and " | ||
576 | "is scheduled for removal in early 2008\n"); | ||
577 | printk(KERN_NOTICE "Use 'printk.time=<value>' instead\n"); | ||
578 | return 1; | ||
579 | } | ||
580 | |||
581 | __setup("time", printk_time_setup); | ||
582 | |||
583 | /* Check if we have any console registered that can be called early in boot. */ | 569 | /* Check if we have any console registered that can be called early in boot. */ |
584 | static int have_callable_console(void) | 570 | static int have_callable_console(void) |
585 | { | 571 | { |
@@ -790,7 +776,7 @@ asmlinkage long sys_syslog(int type, char __user *buf, int len) | |||
790 | return -ENOSYS; | 776 | return -ENOSYS; |
791 | } | 777 | } |
792 | 778 | ||
793 | static void call_console_drivers(unsigned long start, unsigned long end) | 779 | static void call_console_drivers(unsigned start, unsigned end) |
794 | { | 780 | { |
795 | } | 781 | } |
796 | 782 | ||
@@ -983,8 +969,8 @@ void wake_up_klogd(void) | |||
983 | void release_console_sem(void) | 969 | void release_console_sem(void) |
984 | { | 970 | { |
985 | unsigned long flags; | 971 | unsigned long flags; |
986 | unsigned long _con_start, _log_end; | 972 | unsigned _con_start, _log_end; |
987 | unsigned long wake_klogd = 0; | 973 | unsigned wake_klogd = 0; |
988 | 974 | ||
989 | if (console_suspended) { | 975 | if (console_suspended) { |
990 | up(&secondary_console_sem); | 976 | up(&secondary_console_sem); |
@@ -1265,6 +1251,7 @@ void tty_write_message(struct tty_struct *tty, char *msg) | |||
1265 | return; | 1251 | return; |
1266 | } | 1252 | } |
1267 | 1253 | ||
1254 | #if defined CONFIG_PRINTK | ||
1268 | /* | 1255 | /* |
1269 | * printk rate limiting, lifted from the networking subsystem. | 1256 | * printk rate limiting, lifted from the networking subsystem. |
1270 | * | 1257 | * |
@@ -1275,7 +1262,7 @@ void tty_write_message(struct tty_struct *tty, char *msg) | |||
1275 | int __printk_ratelimit(int ratelimit_jiffies, int ratelimit_burst) | 1262 | int __printk_ratelimit(int ratelimit_jiffies, int ratelimit_burst) |
1276 | { | 1263 | { |
1277 | static DEFINE_SPINLOCK(ratelimit_lock); | 1264 | static DEFINE_SPINLOCK(ratelimit_lock); |
1278 | static unsigned long toks = 10 * 5 * HZ; | 1265 | static unsigned toks = 10 * 5 * HZ; |
1279 | static unsigned long last_msg; | 1266 | static unsigned long last_msg; |
1280 | static int missed; | 1267 | static int missed; |
1281 | unsigned long flags; | 1268 | unsigned long flags; |
@@ -1334,3 +1321,4 @@ bool printk_timed_ratelimit(unsigned long *caller_jiffies, | |||
1334 | return false; | 1321 | return false; |
1335 | } | 1322 | } |
1336 | EXPORT_SYMBOL(printk_timed_ratelimit); | 1323 | EXPORT_SYMBOL(printk_timed_ratelimit); |
1324 | #endif | ||
diff --git a/kernel/profile.c b/kernel/profile.c index e64c2da11c0f..3b7a1b055122 100644 --- a/kernel/profile.c +++ b/kernel/profile.c | |||
@@ -20,7 +20,6 @@ | |||
20 | #include <linux/mm.h> | 20 | #include <linux/mm.h> |
21 | #include <linux/cpumask.h> | 21 | #include <linux/cpumask.h> |
22 | #include <linux/cpu.h> | 22 | #include <linux/cpu.h> |
23 | #include <linux/profile.h> | ||
24 | #include <linux/highmem.h> | 23 | #include <linux/highmem.h> |
25 | #include <linux/mutex.h> | 24 | #include <linux/mutex.h> |
26 | #include <asm/sections.h> | 25 | #include <asm/sections.h> |
diff --git a/kernel/ptrace.c b/kernel/ptrace.c index b0d4ab4dfd3d..fdb34e86f923 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c | |||
@@ -20,6 +20,7 @@ | |||
20 | #include <linux/signal.h> | 20 | #include <linux/signal.h> |
21 | #include <linux/audit.h> | 21 | #include <linux/audit.h> |
22 | #include <linux/pid_namespace.h> | 22 | #include <linux/pid_namespace.h> |
23 | #include <linux/syscalls.h> | ||
23 | 24 | ||
24 | #include <asm/pgtable.h> | 25 | #include <asm/pgtable.h> |
25 | #include <asm/uaccess.h> | 26 | #include <asm/uaccess.h> |
@@ -53,7 +54,7 @@ void ptrace_untrace(struct task_struct *child) | |||
53 | spin_lock(&child->sighand->siglock); | 54 | spin_lock(&child->sighand->siglock); |
54 | if (task_is_traced(child)) { | 55 | if (task_is_traced(child)) { |
55 | if (child->signal->flags & SIGNAL_STOP_STOPPED) { | 56 | if (child->signal->flags & SIGNAL_STOP_STOPPED) { |
56 | child->state = TASK_STOPPED; | 57 | __set_task_state(child, TASK_STOPPED); |
57 | } else { | 58 | } else { |
58 | signal_wake_up(child, 1); | 59 | signal_wake_up(child, 1); |
59 | } | 60 | } |
@@ -98,23 +99,23 @@ int ptrace_check_attach(struct task_struct *child, int kill) | |||
98 | * be changed by us so it's not changing right after this. | 99 | * be changed by us so it's not changing right after this. |
99 | */ | 100 | */ |
100 | read_lock(&tasklist_lock); | 101 | read_lock(&tasklist_lock); |
101 | if ((child->ptrace & PT_PTRACED) && child->parent == current && | 102 | if ((child->ptrace & PT_PTRACED) && child->parent == current) { |
102 | (!(child->ptrace & PT_ATTACHED) || child->real_parent != current) | ||
103 | && child->signal != NULL) { | ||
104 | ret = 0; | 103 | ret = 0; |
104 | /* | ||
105 | * child->sighand can't be NULL, release_task() | ||
106 | * does ptrace_unlink() before __exit_signal(). | ||
107 | */ | ||
105 | spin_lock_irq(&child->sighand->siglock); | 108 | spin_lock_irq(&child->sighand->siglock); |
106 | if (task_is_stopped(child)) { | 109 | if (task_is_stopped(child)) |
107 | child->state = TASK_TRACED; | 110 | child->state = TASK_TRACED; |
108 | } else if (!task_is_traced(child) && !kill) { | 111 | else if (!task_is_traced(child) && !kill) |
109 | ret = -ESRCH; | 112 | ret = -ESRCH; |
110 | } | ||
111 | spin_unlock_irq(&child->sighand->siglock); | 113 | spin_unlock_irq(&child->sighand->siglock); |
112 | } | 114 | } |
113 | read_unlock(&tasklist_lock); | 115 | read_unlock(&tasklist_lock); |
114 | 116 | ||
115 | if (!ret && !kill) { | 117 | if (!ret && !kill) |
116 | wait_task_inactive(child); | 118 | wait_task_inactive(child); |
117 | } | ||
118 | 119 | ||
119 | /* All systems go.. */ | 120 | /* All systems go.. */ |
120 | return ret; | 121 | return ret; |
@@ -201,8 +202,7 @@ repeat: | |||
201 | goto bad; | 202 | goto bad; |
202 | 203 | ||
203 | /* Go */ | 204 | /* Go */ |
204 | task->ptrace |= PT_PTRACED | ((task->real_parent != current) | 205 | task->ptrace |= PT_PTRACED; |
205 | ? PT_ATTACHED : 0); | ||
206 | if (capable(CAP_SYS_PTRACE)) | 206 | if (capable(CAP_SYS_PTRACE)) |
207 | task->ptrace |= PT_PTRACE_CAP; | 207 | task->ptrace |= PT_PTRACE_CAP; |
208 | 208 | ||
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index 760dfc233a00..c09605f8d16c 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c | |||
@@ -56,7 +56,10 @@ static atomic_t rcu_barrier_cpu_count; | |||
56 | static DEFINE_MUTEX(rcu_barrier_mutex); | 56 | static DEFINE_MUTEX(rcu_barrier_mutex); |
57 | static struct completion rcu_barrier_completion; | 57 | static struct completion rcu_barrier_completion; |
58 | 58 | ||
59 | /* Because of FASTCALL declaration of complete, we use this wrapper */ | 59 | /* |
60 | * Awaken the corresponding synchronize_rcu() instance now that a | ||
61 | * grace period has elapsed. | ||
62 | */ | ||
60 | static void wakeme_after_rcu(struct rcu_head *head) | 63 | static void wakeme_after_rcu(struct rcu_head *head) |
61 | { | 64 | { |
62 | struct rcu_synchronize *rcu; | 65 | struct rcu_synchronize *rcu; |
diff --git a/kernel/relay.c b/kernel/relay.c index 7c0373322f18..d080b9d161a7 100644 --- a/kernel/relay.c +++ b/kernel/relay.c | |||
@@ -37,37 +37,31 @@ static void relay_file_mmap_close(struct vm_area_struct *vma) | |||
37 | } | 37 | } |
38 | 38 | ||
39 | /* | 39 | /* |
40 | * nopage() vm_op implementation for relay file mapping. | 40 | * fault() vm_op implementation for relay file mapping. |
41 | */ | 41 | */ |
42 | static struct page *relay_buf_nopage(struct vm_area_struct *vma, | 42 | static int relay_buf_fault(struct vm_area_struct *vma, struct vm_fault *vmf) |
43 | unsigned long address, | ||
44 | int *type) | ||
45 | { | 43 | { |
46 | struct page *page; | 44 | struct page *page; |
47 | struct rchan_buf *buf = vma->vm_private_data; | 45 | struct rchan_buf *buf = vma->vm_private_data; |
48 | unsigned long offset = address - vma->vm_start; | 46 | pgoff_t pgoff = vmf->pgoff; |
49 | 47 | ||
50 | if (address > vma->vm_end) | ||
51 | return NOPAGE_SIGBUS; /* Disallow mremap */ | ||
52 | if (!buf) | 48 | if (!buf) |
53 | return NOPAGE_OOM; | 49 | return VM_FAULT_OOM; |
54 | 50 | ||
55 | page = vmalloc_to_page(buf->start + offset); | 51 | page = vmalloc_to_page(buf->start + (pgoff << PAGE_SHIFT)); |
56 | if (!page) | 52 | if (!page) |
57 | return NOPAGE_OOM; | 53 | return VM_FAULT_SIGBUS; |
58 | get_page(page); | 54 | get_page(page); |
55 | vmf->page = page; | ||
59 | 56 | ||
60 | if (type) | 57 | return 0; |
61 | *type = VM_FAULT_MINOR; | ||
62 | |||
63 | return page; | ||
64 | } | 58 | } |
65 | 59 | ||
66 | /* | 60 | /* |
67 | * vm_ops for relay file mappings. | 61 | * vm_ops for relay file mappings. |
68 | */ | 62 | */ |
69 | static struct vm_operations_struct relay_file_mmap_ops = { | 63 | static struct vm_operations_struct relay_file_mmap_ops = { |
70 | .nopage = relay_buf_nopage, | 64 | .fault = relay_buf_fault, |
71 | .close = relay_file_mmap_close, | 65 | .close = relay_file_mmap_close, |
72 | }; | 66 | }; |
73 | 67 | ||
diff --git a/kernel/res_counter.c b/kernel/res_counter.c new file mode 100644 index 000000000000..16cbec2d5d60 --- /dev/null +++ b/kernel/res_counter.c | |||
@@ -0,0 +1,134 @@ | |||
1 | /* | ||
2 | * resource cgroups | ||
3 | * | ||
4 | * Copyright 2007 OpenVZ SWsoft Inc | ||
5 | * | ||
6 | * Author: Pavel Emelianov <xemul@openvz.org> | ||
7 | * | ||
8 | */ | ||
9 | |||
10 | #include <linux/types.h> | ||
11 | #include <linux/parser.h> | ||
12 | #include <linux/fs.h> | ||
13 | #include <linux/res_counter.h> | ||
14 | #include <linux/uaccess.h> | ||
15 | |||
16 | void res_counter_init(struct res_counter *counter) | ||
17 | { | ||
18 | spin_lock_init(&counter->lock); | ||
19 | counter->limit = (unsigned long long)LLONG_MAX; | ||
20 | } | ||
21 | |||
22 | int res_counter_charge_locked(struct res_counter *counter, unsigned long val) | ||
23 | { | ||
24 | if (counter->usage + val > counter->limit) { | ||
25 | counter->failcnt++; | ||
26 | return -ENOMEM; | ||
27 | } | ||
28 | |||
29 | counter->usage += val; | ||
30 | return 0; | ||
31 | } | ||
32 | |||
33 | int res_counter_charge(struct res_counter *counter, unsigned long val) | ||
34 | { | ||
35 | int ret; | ||
36 | unsigned long flags; | ||
37 | |||
38 | spin_lock_irqsave(&counter->lock, flags); | ||
39 | ret = res_counter_charge_locked(counter, val); | ||
40 | spin_unlock_irqrestore(&counter->lock, flags); | ||
41 | return ret; | ||
42 | } | ||
43 | |||
44 | void res_counter_uncharge_locked(struct res_counter *counter, unsigned long val) | ||
45 | { | ||
46 | if (WARN_ON(counter->usage < val)) | ||
47 | val = counter->usage; | ||
48 | |||
49 | counter->usage -= val; | ||
50 | } | ||
51 | |||
52 | void res_counter_uncharge(struct res_counter *counter, unsigned long val) | ||
53 | { | ||
54 | unsigned long flags; | ||
55 | |||
56 | spin_lock_irqsave(&counter->lock, flags); | ||
57 | res_counter_uncharge_locked(counter, val); | ||
58 | spin_unlock_irqrestore(&counter->lock, flags); | ||
59 | } | ||
60 | |||
61 | |||
62 | static inline unsigned long long * | ||
63 | res_counter_member(struct res_counter *counter, int member) | ||
64 | { | ||
65 | switch (member) { | ||
66 | case RES_USAGE: | ||
67 | return &counter->usage; | ||
68 | case RES_LIMIT: | ||
69 | return &counter->limit; | ||
70 | case RES_FAILCNT: | ||
71 | return &counter->failcnt; | ||
72 | }; | ||
73 | |||
74 | BUG(); | ||
75 | return NULL; | ||
76 | } | ||
77 | |||
78 | ssize_t res_counter_read(struct res_counter *counter, int member, | ||
79 | const char __user *userbuf, size_t nbytes, loff_t *pos, | ||
80 | int (*read_strategy)(unsigned long long val, char *st_buf)) | ||
81 | { | ||
82 | unsigned long long *val; | ||
83 | char buf[64], *s; | ||
84 | |||
85 | s = buf; | ||
86 | val = res_counter_member(counter, member); | ||
87 | if (read_strategy) | ||
88 | s += read_strategy(*val, s); | ||
89 | else | ||
90 | s += sprintf(s, "%llu\n", *val); | ||
91 | return simple_read_from_buffer((void __user *)userbuf, nbytes, | ||
92 | pos, buf, s - buf); | ||
93 | } | ||
94 | |||
95 | ssize_t res_counter_write(struct res_counter *counter, int member, | ||
96 | const char __user *userbuf, size_t nbytes, loff_t *pos, | ||
97 | int (*write_strategy)(char *st_buf, unsigned long long *val)) | ||
98 | { | ||
99 | int ret; | ||
100 | char *buf, *end; | ||
101 | unsigned long flags; | ||
102 | unsigned long long tmp, *val; | ||
103 | |||
104 | buf = kmalloc(nbytes + 1, GFP_KERNEL); | ||
105 | ret = -ENOMEM; | ||
106 | if (buf == NULL) | ||
107 | goto out; | ||
108 | |||
109 | buf[nbytes] = '\0'; | ||
110 | ret = -EFAULT; | ||
111 | if (copy_from_user(buf, userbuf, nbytes)) | ||
112 | goto out_free; | ||
113 | |||
114 | ret = -EINVAL; | ||
115 | |||
116 | if (write_strategy) { | ||
117 | if (write_strategy(buf, &tmp)) { | ||
118 | goto out_free; | ||
119 | } | ||
120 | } else { | ||
121 | tmp = simple_strtoull(buf, &end, 10); | ||
122 | if (*end != '\0') | ||
123 | goto out_free; | ||
124 | } | ||
125 | spin_lock_irqsave(&counter->lock, flags); | ||
126 | val = res_counter_member(counter, member); | ||
127 | *val = tmp; | ||
128 | spin_unlock_irqrestore(&counter->lock, flags); | ||
129 | ret = nbytes; | ||
130 | out_free: | ||
131 | kfree(buf); | ||
132 | out: | ||
133 | return ret; | ||
134 | } | ||
diff --git a/kernel/resource.c b/kernel/resource.c index 2eb553d9b517..82aea814d409 100644 --- a/kernel/resource.c +++ b/kernel/resource.c | |||
@@ -228,7 +228,7 @@ int release_resource(struct resource *old) | |||
228 | 228 | ||
229 | EXPORT_SYMBOL(release_resource); | 229 | EXPORT_SYMBOL(release_resource); |
230 | 230 | ||
231 | #ifdef CONFIG_MEMORY_HOTPLUG | 231 | #if defined(CONFIG_MEMORY_HOTPLUG) && !defined(CONFIG_ARCH_HAS_WALK_MEMORY) |
232 | /* | 232 | /* |
233 | * Finds the lowest memory reosurce exists within [res->start.res->end) | 233 | * Finds the lowest memory reosurce exists within [res->start.res->end) |
234 | * the caller must specify res->start, res->end, res->flags. | 234 | * the caller must specify res->start, res->end, res->flags. |
diff --git a/kernel/rtmutex-debug.c b/kernel/rtmutex-debug.c index 56d73cb8826d..5fcb4fe645e2 100644 --- a/kernel/rtmutex-debug.c +++ b/kernel/rtmutex-debug.c | |||
@@ -130,7 +130,7 @@ void debug_rt_mutex_deadlock(int detect, struct rt_mutex_waiter *act_waiter, | |||
130 | 130 | ||
131 | task = rt_mutex_owner(act_waiter->lock); | 131 | task = rt_mutex_owner(act_waiter->lock); |
132 | if (task && task != current) { | 132 | if (task && task != current) { |
133 | act_waiter->deadlock_task_pid = task->pid; | 133 | act_waiter->deadlock_task_pid = get_pid(task_pid(task)); |
134 | act_waiter->deadlock_lock = lock; | 134 | act_waiter->deadlock_lock = lock; |
135 | } | 135 | } |
136 | } | 136 | } |
@@ -142,9 +142,12 @@ void debug_rt_mutex_print_deadlock(struct rt_mutex_waiter *waiter) | |||
142 | if (!waiter->deadlock_lock || !rt_trace_on) | 142 | if (!waiter->deadlock_lock || !rt_trace_on) |
143 | return; | 143 | return; |
144 | 144 | ||
145 | task = find_task_by_pid(waiter->deadlock_task_pid); | 145 | rcu_read_lock(); |
146 | if (!task) | 146 | task = pid_task(waiter->deadlock_task_pid, PIDTYPE_PID); |
147 | if (!task) { | ||
148 | rcu_read_unlock(); | ||
147 | return; | 149 | return; |
150 | } | ||
148 | 151 | ||
149 | TRACE_OFF_NOLOCK(); | 152 | TRACE_OFF_NOLOCK(); |
150 | 153 | ||
@@ -173,6 +176,7 @@ void debug_rt_mutex_print_deadlock(struct rt_mutex_waiter *waiter) | |||
173 | current->comm, task_pid_nr(current)); | 176 | current->comm, task_pid_nr(current)); |
174 | dump_stack(); | 177 | dump_stack(); |
175 | debug_show_all_locks(); | 178 | debug_show_all_locks(); |
179 | rcu_read_unlock(); | ||
176 | 180 | ||
177 | printk("[ turning off deadlock detection." | 181 | printk("[ turning off deadlock detection." |
178 | "Please report this trace. ]\n\n"); | 182 | "Please report this trace. ]\n\n"); |
@@ -203,10 +207,12 @@ void debug_rt_mutex_init_waiter(struct rt_mutex_waiter *waiter) | |||
203 | memset(waiter, 0x11, sizeof(*waiter)); | 207 | memset(waiter, 0x11, sizeof(*waiter)); |
204 | plist_node_init(&waiter->list_entry, MAX_PRIO); | 208 | plist_node_init(&waiter->list_entry, MAX_PRIO); |
205 | plist_node_init(&waiter->pi_list_entry, MAX_PRIO); | 209 | plist_node_init(&waiter->pi_list_entry, MAX_PRIO); |
210 | waiter->deadlock_task_pid = NULL; | ||
206 | } | 211 | } |
207 | 212 | ||
208 | void debug_rt_mutex_free_waiter(struct rt_mutex_waiter *waiter) | 213 | void debug_rt_mutex_free_waiter(struct rt_mutex_waiter *waiter) |
209 | { | 214 | { |
215 | put_pid(waiter->deadlock_task_pid); | ||
210 | TRACE_WARN_ON(!plist_node_empty(&waiter->list_entry)); | 216 | TRACE_WARN_ON(!plist_node_empty(&waiter->list_entry)); |
211 | TRACE_WARN_ON(!plist_node_empty(&waiter->pi_list_entry)); | 217 | TRACE_WARN_ON(!plist_node_empty(&waiter->pi_list_entry)); |
212 | TRACE_WARN_ON(waiter->task); | 218 | TRACE_WARN_ON(waiter->task); |
diff --git a/kernel/rtmutex.c b/kernel/rtmutex.c index 0deef71ff8d2..6522ae5b14a2 100644 --- a/kernel/rtmutex.c +++ b/kernel/rtmutex.c | |||
@@ -630,9 +630,12 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state, | |||
630 | set_current_state(state); | 630 | set_current_state(state); |
631 | 631 | ||
632 | /* Setup the timer, when timeout != NULL */ | 632 | /* Setup the timer, when timeout != NULL */ |
633 | if (unlikely(timeout)) | 633 | if (unlikely(timeout)) { |
634 | hrtimer_start(&timeout->timer, timeout->timer.expires, | 634 | hrtimer_start(&timeout->timer, timeout->timer.expires, |
635 | HRTIMER_MODE_ABS); | 635 | HRTIMER_MODE_ABS); |
636 | if (!hrtimer_active(&timeout->timer)) | ||
637 | timeout->task = NULL; | ||
638 | } | ||
636 | 639 | ||
637 | for (;;) { | 640 | for (;;) { |
638 | /* Try to acquire the lock: */ | 641 | /* Try to acquire the lock: */ |
diff --git a/kernel/rtmutex_common.h b/kernel/rtmutex_common.h index 2d3b83593ca3..e124bf5800ea 100644 --- a/kernel/rtmutex_common.h +++ b/kernel/rtmutex_common.h | |||
@@ -51,7 +51,7 @@ struct rt_mutex_waiter { | |||
51 | struct rt_mutex *lock; | 51 | struct rt_mutex *lock; |
52 | #ifdef CONFIG_DEBUG_RT_MUTEXES | 52 | #ifdef CONFIG_DEBUG_RT_MUTEXES |
53 | unsigned long ip; | 53 | unsigned long ip; |
54 | pid_t deadlock_task_pid; | 54 | struct pid *deadlock_task_pid; |
55 | struct rt_mutex *deadlock_lock; | 55 | struct rt_mutex *deadlock_lock; |
56 | #endif | 56 | #endif |
57 | }; | 57 | }; |
diff --git a/kernel/sched.c b/kernel/sched.c index 9474b23c28bf..f28f19e65b59 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -155,7 +155,7 @@ struct rt_prio_array { | |||
155 | struct list_head queue[MAX_RT_PRIO]; | 155 | struct list_head queue[MAX_RT_PRIO]; |
156 | }; | 156 | }; |
157 | 157 | ||
158 | #ifdef CONFIG_FAIR_GROUP_SCHED | 158 | #ifdef CONFIG_GROUP_SCHED |
159 | 159 | ||
160 | #include <linux/cgroup.h> | 160 | #include <linux/cgroup.h> |
161 | 161 | ||
@@ -165,19 +165,16 @@ static LIST_HEAD(task_groups); | |||
165 | 165 | ||
166 | /* task group related information */ | 166 | /* task group related information */ |
167 | struct task_group { | 167 | struct task_group { |
168 | #ifdef CONFIG_FAIR_CGROUP_SCHED | 168 | #ifdef CONFIG_CGROUP_SCHED |
169 | struct cgroup_subsys_state css; | 169 | struct cgroup_subsys_state css; |
170 | #endif | 170 | #endif |
171 | |||
172 | #ifdef CONFIG_FAIR_GROUP_SCHED | ||
171 | /* schedulable entities of this group on each cpu */ | 173 | /* schedulable entities of this group on each cpu */ |
172 | struct sched_entity **se; | 174 | struct sched_entity **se; |
173 | /* runqueue "owned" by this group on each cpu */ | 175 | /* runqueue "owned" by this group on each cpu */ |
174 | struct cfs_rq **cfs_rq; | 176 | struct cfs_rq **cfs_rq; |
175 | 177 | ||
176 | struct sched_rt_entity **rt_se; | ||
177 | struct rt_rq **rt_rq; | ||
178 | |||
179 | unsigned int rt_ratio; | ||
180 | |||
181 | /* | 178 | /* |
182 | * shares assigned to a task group governs how much of cpu bandwidth | 179 | * shares assigned to a task group governs how much of cpu bandwidth |
183 | * is allocated to the group. The more shares a group has, the more is | 180 | * is allocated to the group. The more shares a group has, the more is |
@@ -213,33 +210,46 @@ struct task_group { | |||
213 | * | 210 | * |
214 | */ | 211 | */ |
215 | unsigned long shares; | 212 | unsigned long shares; |
213 | #endif | ||
214 | |||
215 | #ifdef CONFIG_RT_GROUP_SCHED | ||
216 | struct sched_rt_entity **rt_se; | ||
217 | struct rt_rq **rt_rq; | ||
218 | |||
219 | u64 rt_runtime; | ||
220 | #endif | ||
216 | 221 | ||
217 | struct rcu_head rcu; | 222 | struct rcu_head rcu; |
218 | struct list_head list; | 223 | struct list_head list; |
219 | }; | 224 | }; |
220 | 225 | ||
226 | #ifdef CONFIG_FAIR_GROUP_SCHED | ||
221 | /* Default task group's sched entity on each cpu */ | 227 | /* Default task group's sched entity on each cpu */ |
222 | static DEFINE_PER_CPU(struct sched_entity, init_sched_entity); | 228 | static DEFINE_PER_CPU(struct sched_entity, init_sched_entity); |
223 | /* Default task group's cfs_rq on each cpu */ | 229 | /* Default task group's cfs_rq on each cpu */ |
224 | static DEFINE_PER_CPU(struct cfs_rq, init_cfs_rq) ____cacheline_aligned_in_smp; | 230 | static DEFINE_PER_CPU(struct cfs_rq, init_cfs_rq) ____cacheline_aligned_in_smp; |
225 | 231 | ||
226 | static DEFINE_PER_CPU(struct sched_rt_entity, init_sched_rt_entity); | ||
227 | static DEFINE_PER_CPU(struct rt_rq, init_rt_rq) ____cacheline_aligned_in_smp; | ||
228 | |||
229 | static struct sched_entity *init_sched_entity_p[NR_CPUS]; | 232 | static struct sched_entity *init_sched_entity_p[NR_CPUS]; |
230 | static struct cfs_rq *init_cfs_rq_p[NR_CPUS]; | 233 | static struct cfs_rq *init_cfs_rq_p[NR_CPUS]; |
234 | #endif | ||
235 | |||
236 | #ifdef CONFIG_RT_GROUP_SCHED | ||
237 | static DEFINE_PER_CPU(struct sched_rt_entity, init_sched_rt_entity); | ||
238 | static DEFINE_PER_CPU(struct rt_rq, init_rt_rq) ____cacheline_aligned_in_smp; | ||
231 | 239 | ||
232 | static struct sched_rt_entity *init_sched_rt_entity_p[NR_CPUS]; | 240 | static struct sched_rt_entity *init_sched_rt_entity_p[NR_CPUS]; |
233 | static struct rt_rq *init_rt_rq_p[NR_CPUS]; | 241 | static struct rt_rq *init_rt_rq_p[NR_CPUS]; |
242 | #endif | ||
234 | 243 | ||
235 | /* task_group_mutex serializes add/remove of task groups and also changes to | 244 | /* task_group_lock serializes add/remove of task groups and also changes to |
236 | * a task group's cpu shares. | 245 | * a task group's cpu shares. |
237 | */ | 246 | */ |
238 | static DEFINE_MUTEX(task_group_mutex); | 247 | static DEFINE_SPINLOCK(task_group_lock); |
239 | 248 | ||
240 | /* doms_cur_mutex serializes access to doms_cur[] array */ | 249 | /* doms_cur_mutex serializes access to doms_cur[] array */ |
241 | static DEFINE_MUTEX(doms_cur_mutex); | 250 | static DEFINE_MUTEX(doms_cur_mutex); |
242 | 251 | ||
252 | #ifdef CONFIG_FAIR_GROUP_SCHED | ||
243 | #ifdef CONFIG_SMP | 253 | #ifdef CONFIG_SMP |
244 | /* kernel thread that runs rebalance_shares() periodically */ | 254 | /* kernel thread that runs rebalance_shares() periodically */ |
245 | static struct task_struct *lb_monitor_task; | 255 | static struct task_struct *lb_monitor_task; |
@@ -248,35 +258,40 @@ static int load_balance_monitor(void *unused); | |||
248 | 258 | ||
249 | static void set_se_shares(struct sched_entity *se, unsigned long shares); | 259 | static void set_se_shares(struct sched_entity *se, unsigned long shares); |
250 | 260 | ||
261 | #ifdef CONFIG_USER_SCHED | ||
262 | # define INIT_TASK_GROUP_LOAD (2*NICE_0_LOAD) | ||
263 | #else | ||
264 | # define INIT_TASK_GROUP_LOAD NICE_0_LOAD | ||
265 | #endif | ||
266 | |||
267 | #define MIN_GROUP_SHARES 2 | ||
268 | |||
269 | static int init_task_group_load = INIT_TASK_GROUP_LOAD; | ||
270 | #endif | ||
271 | |||
251 | /* Default task group. | 272 | /* Default task group. |
252 | * Every task in system belong to this group at bootup. | 273 | * Every task in system belong to this group at bootup. |
253 | */ | 274 | */ |
254 | struct task_group init_task_group = { | 275 | struct task_group init_task_group = { |
276 | #ifdef CONFIG_FAIR_GROUP_SCHED | ||
255 | .se = init_sched_entity_p, | 277 | .se = init_sched_entity_p, |
256 | .cfs_rq = init_cfs_rq_p, | 278 | .cfs_rq = init_cfs_rq_p, |
279 | #endif | ||
257 | 280 | ||
281 | #ifdef CONFIG_RT_GROUP_SCHED | ||
258 | .rt_se = init_sched_rt_entity_p, | 282 | .rt_se = init_sched_rt_entity_p, |
259 | .rt_rq = init_rt_rq_p, | 283 | .rt_rq = init_rt_rq_p, |
260 | }; | ||
261 | |||
262 | #ifdef CONFIG_FAIR_USER_SCHED | ||
263 | # define INIT_TASK_GROUP_LOAD (2*NICE_0_LOAD) | ||
264 | #else | ||
265 | # define INIT_TASK_GROUP_LOAD NICE_0_LOAD | ||
266 | #endif | 284 | #endif |
267 | 285 | }; | |
268 | #define MIN_GROUP_SHARES 2 | ||
269 | |||
270 | static int init_task_group_load = INIT_TASK_GROUP_LOAD; | ||
271 | 286 | ||
272 | /* return group to which a task belongs */ | 287 | /* return group to which a task belongs */ |
273 | static inline struct task_group *task_group(struct task_struct *p) | 288 | static inline struct task_group *task_group(struct task_struct *p) |
274 | { | 289 | { |
275 | struct task_group *tg; | 290 | struct task_group *tg; |
276 | 291 | ||
277 | #ifdef CONFIG_FAIR_USER_SCHED | 292 | #ifdef CONFIG_USER_SCHED |
278 | tg = p->user->tg; | 293 | tg = p->user->tg; |
279 | #elif defined(CONFIG_FAIR_CGROUP_SCHED) | 294 | #elif defined(CONFIG_CGROUP_SCHED) |
280 | tg = container_of(task_subsys_state(p, cpu_cgroup_subsys_id), | 295 | tg = container_of(task_subsys_state(p, cpu_cgroup_subsys_id), |
281 | struct task_group, css); | 296 | struct task_group, css); |
282 | #else | 297 | #else |
@@ -288,21 +303,15 @@ static inline struct task_group *task_group(struct task_struct *p) | |||
288 | /* Change a task's cfs_rq and parent entity if it moves across CPUs/groups */ | 303 | /* Change a task's cfs_rq and parent entity if it moves across CPUs/groups */ |
289 | static inline void set_task_rq(struct task_struct *p, unsigned int cpu) | 304 | static inline void set_task_rq(struct task_struct *p, unsigned int cpu) |
290 | { | 305 | { |
306 | #ifdef CONFIG_FAIR_GROUP_SCHED | ||
291 | p->se.cfs_rq = task_group(p)->cfs_rq[cpu]; | 307 | p->se.cfs_rq = task_group(p)->cfs_rq[cpu]; |
292 | p->se.parent = task_group(p)->se[cpu]; | 308 | p->se.parent = task_group(p)->se[cpu]; |
309 | #endif | ||
293 | 310 | ||
311 | #ifdef CONFIG_RT_GROUP_SCHED | ||
294 | p->rt.rt_rq = task_group(p)->rt_rq[cpu]; | 312 | p->rt.rt_rq = task_group(p)->rt_rq[cpu]; |
295 | p->rt.parent = task_group(p)->rt_se[cpu]; | 313 | p->rt.parent = task_group(p)->rt_se[cpu]; |
296 | } | 314 | #endif |
297 | |||
298 | static inline void lock_task_group_list(void) | ||
299 | { | ||
300 | mutex_lock(&task_group_mutex); | ||
301 | } | ||
302 | |||
303 | static inline void unlock_task_group_list(void) | ||
304 | { | ||
305 | mutex_unlock(&task_group_mutex); | ||
306 | } | 315 | } |
307 | 316 | ||
308 | static inline void lock_doms_cur(void) | 317 | static inline void lock_doms_cur(void) |
@@ -318,12 +327,10 @@ static inline void unlock_doms_cur(void) | |||
318 | #else | 327 | #else |
319 | 328 | ||
320 | static inline void set_task_rq(struct task_struct *p, unsigned int cpu) { } | 329 | static inline void set_task_rq(struct task_struct *p, unsigned int cpu) { } |
321 | static inline void lock_task_group_list(void) { } | ||
322 | static inline void unlock_task_group_list(void) { } | ||
323 | static inline void lock_doms_cur(void) { } | 330 | static inline void lock_doms_cur(void) { } |
324 | static inline void unlock_doms_cur(void) { } | 331 | static inline void unlock_doms_cur(void) { } |
325 | 332 | ||
326 | #endif /* CONFIG_FAIR_GROUP_SCHED */ | 333 | #endif /* CONFIG_GROUP_SCHED */ |
327 | 334 | ||
328 | /* CFS-related fields in a runqueue */ | 335 | /* CFS-related fields in a runqueue */ |
329 | struct cfs_rq { | 336 | struct cfs_rq { |
@@ -363,7 +370,7 @@ struct cfs_rq { | |||
363 | struct rt_rq { | 370 | struct rt_rq { |
364 | struct rt_prio_array active; | 371 | struct rt_prio_array active; |
365 | unsigned long rt_nr_running; | 372 | unsigned long rt_nr_running; |
366 | #if defined CONFIG_SMP || defined CONFIG_FAIR_GROUP_SCHED | 373 | #if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED |
367 | int highest_prio; /* highest queued rt task prio */ | 374 | int highest_prio; /* highest queued rt task prio */ |
368 | #endif | 375 | #endif |
369 | #ifdef CONFIG_SMP | 376 | #ifdef CONFIG_SMP |
@@ -373,7 +380,9 @@ struct rt_rq { | |||
373 | int rt_throttled; | 380 | int rt_throttled; |
374 | u64 rt_time; | 381 | u64 rt_time; |
375 | 382 | ||
376 | #ifdef CONFIG_FAIR_GROUP_SCHED | 383 | #ifdef CONFIG_RT_GROUP_SCHED |
384 | unsigned long rt_nr_boosted; | ||
385 | |||
377 | struct rq *rq; | 386 | struct rq *rq; |
378 | struct list_head leaf_rt_rq_list; | 387 | struct list_head leaf_rt_rq_list; |
379 | struct task_group *tg; | 388 | struct task_group *tg; |
@@ -447,6 +456,8 @@ struct rq { | |||
447 | #ifdef CONFIG_FAIR_GROUP_SCHED | 456 | #ifdef CONFIG_FAIR_GROUP_SCHED |
448 | /* list of leaf cfs_rq on this cpu: */ | 457 | /* list of leaf cfs_rq on this cpu: */ |
449 | struct list_head leaf_cfs_rq_list; | 458 | struct list_head leaf_cfs_rq_list; |
459 | #endif | ||
460 | #ifdef CONFIG_RT_GROUP_SCHED | ||
450 | struct list_head leaf_rt_rq_list; | 461 | struct list_head leaf_rt_rq_list; |
451 | #endif | 462 | #endif |
452 | 463 | ||
@@ -652,19 +663,21 @@ const_debug unsigned int sysctl_sched_features = | |||
652 | const_debug unsigned int sysctl_sched_nr_migrate = 32; | 663 | const_debug unsigned int sysctl_sched_nr_migrate = 32; |
653 | 664 | ||
654 | /* | 665 | /* |
655 | * period over which we measure -rt task cpu usage in ms. | 666 | * period over which we measure -rt task cpu usage in us. |
656 | * default: 1s | 667 | * default: 1s |
657 | */ | 668 | */ |
658 | const_debug unsigned int sysctl_sched_rt_period = 1000; | 669 | unsigned int sysctl_sched_rt_period = 1000000; |
659 | 670 | ||
660 | #define SCHED_RT_FRAC_SHIFT 16 | 671 | /* |
661 | #define SCHED_RT_FRAC (1UL << SCHED_RT_FRAC_SHIFT) | 672 | * part of the period that we allow rt tasks to run in us. |
673 | * default: 0.95s | ||
674 | */ | ||
675 | int sysctl_sched_rt_runtime = 950000; | ||
662 | 676 | ||
663 | /* | 677 | /* |
664 | * ratio of time -rt tasks may consume. | 678 | * single value that denotes runtime == period, ie unlimited time. |
665 | * default: 95% | ||
666 | */ | 679 | */ |
667 | const_debug unsigned int sysctl_sched_rt_ratio = 62259; | 680 | #define RUNTIME_INF ((u64)~0ULL) |
668 | 681 | ||
669 | /* | 682 | /* |
670 | * For kernel-internal use: high-speed (but slightly incorrect) per-cpu | 683 | * For kernel-internal use: high-speed (but slightly incorrect) per-cpu |
@@ -1893,13 +1906,13 @@ out: | |||
1893 | return success; | 1906 | return success; |
1894 | } | 1907 | } |
1895 | 1908 | ||
1896 | int fastcall wake_up_process(struct task_struct *p) | 1909 | int wake_up_process(struct task_struct *p) |
1897 | { | 1910 | { |
1898 | return try_to_wake_up(p, TASK_ALL, 0); | 1911 | return try_to_wake_up(p, TASK_ALL, 0); |
1899 | } | 1912 | } |
1900 | EXPORT_SYMBOL(wake_up_process); | 1913 | EXPORT_SYMBOL(wake_up_process); |
1901 | 1914 | ||
1902 | int fastcall wake_up_state(struct task_struct *p, unsigned int state) | 1915 | int wake_up_state(struct task_struct *p, unsigned int state) |
1903 | { | 1916 | { |
1904 | return try_to_wake_up(p, state, 0); | 1917 | return try_to_wake_up(p, state, 0); |
1905 | } | 1918 | } |
@@ -1986,7 +1999,7 @@ void sched_fork(struct task_struct *p, int clone_flags) | |||
1986 | * that must be done for every newly created context, then puts the task | 1999 | * that must be done for every newly created context, then puts the task |
1987 | * on the runqueue and wakes it. | 2000 | * on the runqueue and wakes it. |
1988 | */ | 2001 | */ |
1989 | void fastcall wake_up_new_task(struct task_struct *p, unsigned long clone_flags) | 2002 | void wake_up_new_task(struct task_struct *p, unsigned long clone_flags) |
1990 | { | 2003 | { |
1991 | unsigned long flags; | 2004 | unsigned long flags; |
1992 | struct rq *rq; | 2005 | struct rq *rq; |
@@ -3753,7 +3766,7 @@ void scheduler_tick(void) | |||
3753 | 3766 | ||
3754 | #if defined(CONFIG_PREEMPT) && defined(CONFIG_DEBUG_PREEMPT) | 3767 | #if defined(CONFIG_PREEMPT) && defined(CONFIG_DEBUG_PREEMPT) |
3755 | 3768 | ||
3756 | void fastcall add_preempt_count(int val) | 3769 | void add_preempt_count(int val) |
3757 | { | 3770 | { |
3758 | /* | 3771 | /* |
3759 | * Underflow? | 3772 | * Underflow? |
@@ -3769,7 +3782,7 @@ void fastcall add_preempt_count(int val) | |||
3769 | } | 3782 | } |
3770 | EXPORT_SYMBOL(add_preempt_count); | 3783 | EXPORT_SYMBOL(add_preempt_count); |
3771 | 3784 | ||
3772 | void fastcall sub_preempt_count(int val) | 3785 | void sub_preempt_count(int val) |
3773 | { | 3786 | { |
3774 | /* | 3787 | /* |
3775 | * Underflow? | 3788 | * Underflow? |
@@ -4067,7 +4080,7 @@ static void __wake_up_common(wait_queue_head_t *q, unsigned int mode, | |||
4067 | * @nr_exclusive: how many wake-one or wake-many threads to wake up | 4080 | * @nr_exclusive: how many wake-one or wake-many threads to wake up |
4068 | * @key: is directly passed to the wakeup function | 4081 | * @key: is directly passed to the wakeup function |
4069 | */ | 4082 | */ |
4070 | void fastcall __wake_up(wait_queue_head_t *q, unsigned int mode, | 4083 | void __wake_up(wait_queue_head_t *q, unsigned int mode, |
4071 | int nr_exclusive, void *key) | 4084 | int nr_exclusive, void *key) |
4072 | { | 4085 | { |
4073 | unsigned long flags; | 4086 | unsigned long flags; |
@@ -4081,7 +4094,7 @@ EXPORT_SYMBOL(__wake_up); | |||
4081 | /* | 4094 | /* |
4082 | * Same as __wake_up but called with the spinlock in wait_queue_head_t held. | 4095 | * Same as __wake_up but called with the spinlock in wait_queue_head_t held. |
4083 | */ | 4096 | */ |
4084 | void fastcall __wake_up_locked(wait_queue_head_t *q, unsigned int mode) | 4097 | void __wake_up_locked(wait_queue_head_t *q, unsigned int mode) |
4085 | { | 4098 | { |
4086 | __wake_up_common(q, mode, 1, 0, NULL); | 4099 | __wake_up_common(q, mode, 1, 0, NULL); |
4087 | } | 4100 | } |
@@ -4099,7 +4112,7 @@ void fastcall __wake_up_locked(wait_queue_head_t *q, unsigned int mode) | |||
4099 | * | 4112 | * |
4100 | * On UP it can prevent extra preemption. | 4113 | * On UP it can prevent extra preemption. |
4101 | */ | 4114 | */ |
4102 | void fastcall | 4115 | void |
4103 | __wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr_exclusive) | 4116 | __wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr_exclusive) |
4104 | { | 4117 | { |
4105 | unsigned long flags; | 4118 | unsigned long flags; |
@@ -4571,6 +4584,15 @@ recheck: | |||
4571 | return -EPERM; | 4584 | return -EPERM; |
4572 | } | 4585 | } |
4573 | 4586 | ||
4587 | #ifdef CONFIG_RT_GROUP_SCHED | ||
4588 | /* | ||
4589 | * Do not allow realtime tasks into groups that have no runtime | ||
4590 | * assigned. | ||
4591 | */ | ||
4592 | if (rt_policy(policy) && task_group(p)->rt_runtime == 0) | ||
4593 | return -EPERM; | ||
4594 | #endif | ||
4595 | |||
4574 | retval = security_task_setscheduler(p, policy, param); | 4596 | retval = security_task_setscheduler(p, policy, param); |
4575 | if (retval) | 4597 | if (retval) |
4576 | return retval; | 4598 | return retval; |
@@ -7112,7 +7134,7 @@ static void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq) | |||
7112 | /* delimiter for bitsearch: */ | 7134 | /* delimiter for bitsearch: */ |
7113 | __set_bit(MAX_RT_PRIO, array->bitmap); | 7135 | __set_bit(MAX_RT_PRIO, array->bitmap); |
7114 | 7136 | ||
7115 | #if defined CONFIG_SMP || defined CONFIG_FAIR_GROUP_SCHED | 7137 | #if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED |
7116 | rt_rq->highest_prio = MAX_RT_PRIO; | 7138 | rt_rq->highest_prio = MAX_RT_PRIO; |
7117 | #endif | 7139 | #endif |
7118 | #ifdef CONFIG_SMP | 7140 | #ifdef CONFIG_SMP |
@@ -7123,7 +7145,8 @@ static void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq) | |||
7123 | rt_rq->rt_time = 0; | 7145 | rt_rq->rt_time = 0; |
7124 | rt_rq->rt_throttled = 0; | 7146 | rt_rq->rt_throttled = 0; |
7125 | 7147 | ||
7126 | #ifdef CONFIG_FAIR_GROUP_SCHED | 7148 | #ifdef CONFIG_RT_GROUP_SCHED |
7149 | rt_rq->rt_nr_boosted = 0; | ||
7127 | rt_rq->rq = rq; | 7150 | rt_rq->rq = rq; |
7128 | #endif | 7151 | #endif |
7129 | } | 7152 | } |
@@ -7146,7 +7169,9 @@ static void init_tg_cfs_entry(struct rq *rq, struct task_group *tg, | |||
7146 | se->load.inv_weight = div64_64(1ULL<<32, se->load.weight); | 7169 | se->load.inv_weight = div64_64(1ULL<<32, se->load.weight); |
7147 | se->parent = NULL; | 7170 | se->parent = NULL; |
7148 | } | 7171 | } |
7172 | #endif | ||
7149 | 7173 | ||
7174 | #ifdef CONFIG_RT_GROUP_SCHED | ||
7150 | static void init_tg_rt_entry(struct rq *rq, struct task_group *tg, | 7175 | static void init_tg_rt_entry(struct rq *rq, struct task_group *tg, |
7151 | struct rt_rq *rt_rq, struct sched_rt_entity *rt_se, | 7176 | struct rt_rq *rt_rq, struct sched_rt_entity *rt_se, |
7152 | int cpu, int add) | 7177 | int cpu, int add) |
@@ -7175,7 +7200,7 @@ void __init sched_init(void) | |||
7175 | init_defrootdomain(); | 7200 | init_defrootdomain(); |
7176 | #endif | 7201 | #endif |
7177 | 7202 | ||
7178 | #ifdef CONFIG_FAIR_GROUP_SCHED | 7203 | #ifdef CONFIG_GROUP_SCHED |
7179 | list_add(&init_task_group.list, &task_groups); | 7204 | list_add(&init_task_group.list, &task_groups); |
7180 | #endif | 7205 | #endif |
7181 | 7206 | ||
@@ -7196,7 +7221,10 @@ void __init sched_init(void) | |||
7196 | &per_cpu(init_cfs_rq, i), | 7221 | &per_cpu(init_cfs_rq, i), |
7197 | &per_cpu(init_sched_entity, i), i, 1); | 7222 | &per_cpu(init_sched_entity, i), i, 1); |
7198 | 7223 | ||
7199 | init_task_group.rt_ratio = sysctl_sched_rt_ratio; /* XXX */ | 7224 | #endif |
7225 | #ifdef CONFIG_RT_GROUP_SCHED | ||
7226 | init_task_group.rt_runtime = | ||
7227 | sysctl_sched_rt_runtime * NSEC_PER_USEC; | ||
7200 | INIT_LIST_HEAD(&rq->leaf_rt_rq_list); | 7228 | INIT_LIST_HEAD(&rq->leaf_rt_rq_list); |
7201 | init_tg_rt_entry(rq, &init_task_group, | 7229 | init_tg_rt_entry(rq, &init_task_group, |
7202 | &per_cpu(init_rt_rq, i), | 7230 | &per_cpu(init_rt_rq, i), |
@@ -7303,7 +7331,7 @@ void normalize_rt_tasks(void) | |||
7303 | unsigned long flags; | 7331 | unsigned long flags; |
7304 | struct rq *rq; | 7332 | struct rq *rq; |
7305 | 7333 | ||
7306 | read_lock_irq(&tasklist_lock); | 7334 | read_lock_irqsave(&tasklist_lock, flags); |
7307 | do_each_thread(g, p) { | 7335 | do_each_thread(g, p) { |
7308 | /* | 7336 | /* |
7309 | * Only normalize user tasks: | 7337 | * Only normalize user tasks: |
@@ -7329,16 +7357,16 @@ void normalize_rt_tasks(void) | |||
7329 | continue; | 7357 | continue; |
7330 | } | 7358 | } |
7331 | 7359 | ||
7332 | spin_lock_irqsave(&p->pi_lock, flags); | 7360 | spin_lock(&p->pi_lock); |
7333 | rq = __task_rq_lock(p); | 7361 | rq = __task_rq_lock(p); |
7334 | 7362 | ||
7335 | normalize_task(rq, p); | 7363 | normalize_task(rq, p); |
7336 | 7364 | ||
7337 | __task_rq_unlock(rq); | 7365 | __task_rq_unlock(rq); |
7338 | spin_unlock_irqrestore(&p->pi_lock, flags); | 7366 | spin_unlock(&p->pi_lock); |
7339 | } while_each_thread(g, p); | 7367 | } while_each_thread(g, p); |
7340 | 7368 | ||
7341 | read_unlock_irq(&tasklist_lock); | 7369 | read_unlock_irqrestore(&tasklist_lock, flags); |
7342 | } | 7370 | } |
7343 | 7371 | ||
7344 | #endif /* CONFIG_MAGIC_SYSRQ */ | 7372 | #endif /* CONFIG_MAGIC_SYSRQ */ |
@@ -7387,9 +7415,9 @@ void set_curr_task(int cpu, struct task_struct *p) | |||
7387 | 7415 | ||
7388 | #endif | 7416 | #endif |
7389 | 7417 | ||
7390 | #ifdef CONFIG_FAIR_GROUP_SCHED | 7418 | #ifdef CONFIG_GROUP_SCHED |
7391 | 7419 | ||
7392 | #ifdef CONFIG_SMP | 7420 | #if defined CONFIG_FAIR_GROUP_SCHED && defined CONFIG_SMP |
7393 | /* | 7421 | /* |
7394 | * distribute shares of all task groups among their schedulable entities, | 7422 | * distribute shares of all task groups among their schedulable entities, |
7395 | * to reflect load distribution across cpus. | 7423 | * to reflect load distribution across cpus. |
@@ -7540,7 +7568,8 @@ static int load_balance_monitor(void *unused) | |||
7540 | } | 7568 | } |
7541 | #endif /* CONFIG_SMP */ | 7569 | #endif /* CONFIG_SMP */ |
7542 | 7570 | ||
7543 | static void free_sched_group(struct task_group *tg) | 7571 | #ifdef CONFIG_FAIR_GROUP_SCHED |
7572 | static void free_fair_sched_group(struct task_group *tg) | ||
7544 | { | 7573 | { |
7545 | int i; | 7574 | int i; |
7546 | 7575 | ||
@@ -7549,49 +7578,27 @@ static void free_sched_group(struct task_group *tg) | |||
7549 | kfree(tg->cfs_rq[i]); | 7578 | kfree(tg->cfs_rq[i]); |
7550 | if (tg->se) | 7579 | if (tg->se) |
7551 | kfree(tg->se[i]); | 7580 | kfree(tg->se[i]); |
7552 | if (tg->rt_rq) | ||
7553 | kfree(tg->rt_rq[i]); | ||
7554 | if (tg->rt_se) | ||
7555 | kfree(tg->rt_se[i]); | ||
7556 | } | 7581 | } |
7557 | 7582 | ||
7558 | kfree(tg->cfs_rq); | 7583 | kfree(tg->cfs_rq); |
7559 | kfree(tg->se); | 7584 | kfree(tg->se); |
7560 | kfree(tg->rt_rq); | ||
7561 | kfree(tg->rt_se); | ||
7562 | kfree(tg); | ||
7563 | } | 7585 | } |
7564 | 7586 | ||
7565 | /* allocate runqueue etc for a new task group */ | 7587 | static int alloc_fair_sched_group(struct task_group *tg) |
7566 | struct task_group *sched_create_group(void) | ||
7567 | { | 7588 | { |
7568 | struct task_group *tg; | ||
7569 | struct cfs_rq *cfs_rq; | 7589 | struct cfs_rq *cfs_rq; |
7570 | struct sched_entity *se; | 7590 | struct sched_entity *se; |
7571 | struct rt_rq *rt_rq; | ||
7572 | struct sched_rt_entity *rt_se; | ||
7573 | struct rq *rq; | 7591 | struct rq *rq; |
7574 | int i; | 7592 | int i; |
7575 | 7593 | ||
7576 | tg = kzalloc(sizeof(*tg), GFP_KERNEL); | ||
7577 | if (!tg) | ||
7578 | return ERR_PTR(-ENOMEM); | ||
7579 | |||
7580 | tg->cfs_rq = kzalloc(sizeof(cfs_rq) * NR_CPUS, GFP_KERNEL); | 7594 | tg->cfs_rq = kzalloc(sizeof(cfs_rq) * NR_CPUS, GFP_KERNEL); |
7581 | if (!tg->cfs_rq) | 7595 | if (!tg->cfs_rq) |
7582 | goto err; | 7596 | goto err; |
7583 | tg->se = kzalloc(sizeof(se) * NR_CPUS, GFP_KERNEL); | 7597 | tg->se = kzalloc(sizeof(se) * NR_CPUS, GFP_KERNEL); |
7584 | if (!tg->se) | 7598 | if (!tg->se) |
7585 | goto err; | 7599 | goto err; |
7586 | tg->rt_rq = kzalloc(sizeof(rt_rq) * NR_CPUS, GFP_KERNEL); | ||
7587 | if (!tg->rt_rq) | ||
7588 | goto err; | ||
7589 | tg->rt_se = kzalloc(sizeof(rt_se) * NR_CPUS, GFP_KERNEL); | ||
7590 | if (!tg->rt_se) | ||
7591 | goto err; | ||
7592 | 7600 | ||
7593 | tg->shares = NICE_0_LOAD; | 7601 | tg->shares = NICE_0_LOAD; |
7594 | tg->rt_ratio = 0; /* XXX */ | ||
7595 | 7602 | ||
7596 | for_each_possible_cpu(i) { | 7603 | for_each_possible_cpu(i) { |
7597 | rq = cpu_rq(i); | 7604 | rq = cpu_rq(i); |
@@ -7606,6 +7613,79 @@ struct task_group *sched_create_group(void) | |||
7606 | if (!se) | 7613 | if (!se) |
7607 | goto err; | 7614 | goto err; |
7608 | 7615 | ||
7616 | init_tg_cfs_entry(rq, tg, cfs_rq, se, i, 0); | ||
7617 | } | ||
7618 | |||
7619 | return 1; | ||
7620 | |||
7621 | err: | ||
7622 | return 0; | ||
7623 | } | ||
7624 | |||
7625 | static inline void register_fair_sched_group(struct task_group *tg, int cpu) | ||
7626 | { | ||
7627 | list_add_rcu(&tg->cfs_rq[cpu]->leaf_cfs_rq_list, | ||
7628 | &cpu_rq(cpu)->leaf_cfs_rq_list); | ||
7629 | } | ||
7630 | |||
7631 | static inline void unregister_fair_sched_group(struct task_group *tg, int cpu) | ||
7632 | { | ||
7633 | list_del_rcu(&tg->cfs_rq[cpu]->leaf_cfs_rq_list); | ||
7634 | } | ||
7635 | #else | ||
7636 | static inline void free_fair_sched_group(struct task_group *tg) | ||
7637 | { | ||
7638 | } | ||
7639 | |||
7640 | static inline int alloc_fair_sched_group(struct task_group *tg) | ||
7641 | { | ||
7642 | return 1; | ||
7643 | } | ||
7644 | |||
7645 | static inline void register_fair_sched_group(struct task_group *tg, int cpu) | ||
7646 | { | ||
7647 | } | ||
7648 | |||
7649 | static inline void unregister_fair_sched_group(struct task_group *tg, int cpu) | ||
7650 | { | ||
7651 | } | ||
7652 | #endif | ||
7653 | |||
7654 | #ifdef CONFIG_RT_GROUP_SCHED | ||
7655 | static void free_rt_sched_group(struct task_group *tg) | ||
7656 | { | ||
7657 | int i; | ||
7658 | |||
7659 | for_each_possible_cpu(i) { | ||
7660 | if (tg->rt_rq) | ||
7661 | kfree(tg->rt_rq[i]); | ||
7662 | if (tg->rt_se) | ||
7663 | kfree(tg->rt_se[i]); | ||
7664 | } | ||
7665 | |||
7666 | kfree(tg->rt_rq); | ||
7667 | kfree(tg->rt_se); | ||
7668 | } | ||
7669 | |||
7670 | static int alloc_rt_sched_group(struct task_group *tg) | ||
7671 | { | ||
7672 | struct rt_rq *rt_rq; | ||
7673 | struct sched_rt_entity *rt_se; | ||
7674 | struct rq *rq; | ||
7675 | int i; | ||
7676 | |||
7677 | tg->rt_rq = kzalloc(sizeof(rt_rq) * NR_CPUS, GFP_KERNEL); | ||
7678 | if (!tg->rt_rq) | ||
7679 | goto err; | ||
7680 | tg->rt_se = kzalloc(sizeof(rt_se) * NR_CPUS, GFP_KERNEL); | ||
7681 | if (!tg->rt_se) | ||
7682 | goto err; | ||
7683 | |||
7684 | tg->rt_runtime = 0; | ||
7685 | |||
7686 | for_each_possible_cpu(i) { | ||
7687 | rq = cpu_rq(i); | ||
7688 | |||
7609 | rt_rq = kmalloc_node(sizeof(struct rt_rq), | 7689 | rt_rq = kmalloc_node(sizeof(struct rt_rq), |
7610 | GFP_KERNEL|__GFP_ZERO, cpu_to_node(i)); | 7690 | GFP_KERNEL|__GFP_ZERO, cpu_to_node(i)); |
7611 | if (!rt_rq) | 7691 | if (!rt_rq) |
@@ -7616,20 +7696,75 @@ struct task_group *sched_create_group(void) | |||
7616 | if (!rt_se) | 7696 | if (!rt_se) |
7617 | goto err; | 7697 | goto err; |
7618 | 7698 | ||
7619 | init_tg_cfs_entry(rq, tg, cfs_rq, se, i, 0); | ||
7620 | init_tg_rt_entry(rq, tg, rt_rq, rt_se, i, 0); | 7699 | init_tg_rt_entry(rq, tg, rt_rq, rt_se, i, 0); |
7621 | } | 7700 | } |
7622 | 7701 | ||
7623 | lock_task_group_list(); | 7702 | return 1; |
7703 | |||
7704 | err: | ||
7705 | return 0; | ||
7706 | } | ||
7707 | |||
7708 | static inline void register_rt_sched_group(struct task_group *tg, int cpu) | ||
7709 | { | ||
7710 | list_add_rcu(&tg->rt_rq[cpu]->leaf_rt_rq_list, | ||
7711 | &cpu_rq(cpu)->leaf_rt_rq_list); | ||
7712 | } | ||
7713 | |||
7714 | static inline void unregister_rt_sched_group(struct task_group *tg, int cpu) | ||
7715 | { | ||
7716 | list_del_rcu(&tg->rt_rq[cpu]->leaf_rt_rq_list); | ||
7717 | } | ||
7718 | #else | ||
7719 | static inline void free_rt_sched_group(struct task_group *tg) | ||
7720 | { | ||
7721 | } | ||
7722 | |||
7723 | static inline int alloc_rt_sched_group(struct task_group *tg) | ||
7724 | { | ||
7725 | return 1; | ||
7726 | } | ||
7727 | |||
7728 | static inline void register_rt_sched_group(struct task_group *tg, int cpu) | ||
7729 | { | ||
7730 | } | ||
7731 | |||
7732 | static inline void unregister_rt_sched_group(struct task_group *tg, int cpu) | ||
7733 | { | ||
7734 | } | ||
7735 | #endif | ||
7736 | |||
7737 | static void free_sched_group(struct task_group *tg) | ||
7738 | { | ||
7739 | free_fair_sched_group(tg); | ||
7740 | free_rt_sched_group(tg); | ||
7741 | kfree(tg); | ||
7742 | } | ||
7743 | |||
7744 | /* allocate runqueue etc for a new task group */ | ||
7745 | struct task_group *sched_create_group(void) | ||
7746 | { | ||
7747 | struct task_group *tg; | ||
7748 | unsigned long flags; | ||
7749 | int i; | ||
7750 | |||
7751 | tg = kzalloc(sizeof(*tg), GFP_KERNEL); | ||
7752 | if (!tg) | ||
7753 | return ERR_PTR(-ENOMEM); | ||
7754 | |||
7755 | if (!alloc_fair_sched_group(tg)) | ||
7756 | goto err; | ||
7757 | |||
7758 | if (!alloc_rt_sched_group(tg)) | ||
7759 | goto err; | ||
7760 | |||
7761 | spin_lock_irqsave(&task_group_lock, flags); | ||
7624 | for_each_possible_cpu(i) { | 7762 | for_each_possible_cpu(i) { |
7625 | rq = cpu_rq(i); | 7763 | register_fair_sched_group(tg, i); |
7626 | cfs_rq = tg->cfs_rq[i]; | 7764 | register_rt_sched_group(tg, i); |
7627 | list_add_rcu(&cfs_rq->leaf_cfs_rq_list, &rq->leaf_cfs_rq_list); | ||
7628 | rt_rq = tg->rt_rq[i]; | ||
7629 | list_add_rcu(&rt_rq->leaf_rt_rq_list, &rq->leaf_rt_rq_list); | ||
7630 | } | 7765 | } |
7631 | list_add_rcu(&tg->list, &task_groups); | 7766 | list_add_rcu(&tg->list, &task_groups); |
7632 | unlock_task_group_list(); | 7767 | spin_unlock_irqrestore(&task_group_lock, flags); |
7633 | 7768 | ||
7634 | return tg; | 7769 | return tg; |
7635 | 7770 | ||
@@ -7648,21 +7783,16 @@ static void free_sched_group_rcu(struct rcu_head *rhp) | |||
7648 | /* Destroy runqueue etc associated with a task group */ | 7783 | /* Destroy runqueue etc associated with a task group */ |
7649 | void sched_destroy_group(struct task_group *tg) | 7784 | void sched_destroy_group(struct task_group *tg) |
7650 | { | 7785 | { |
7651 | struct cfs_rq *cfs_rq = NULL; | 7786 | unsigned long flags; |
7652 | struct rt_rq *rt_rq = NULL; | ||
7653 | int i; | 7787 | int i; |
7654 | 7788 | ||
7655 | lock_task_group_list(); | 7789 | spin_lock_irqsave(&task_group_lock, flags); |
7656 | for_each_possible_cpu(i) { | 7790 | for_each_possible_cpu(i) { |
7657 | cfs_rq = tg->cfs_rq[i]; | 7791 | unregister_fair_sched_group(tg, i); |
7658 | list_del_rcu(&cfs_rq->leaf_cfs_rq_list); | 7792 | unregister_rt_sched_group(tg, i); |
7659 | rt_rq = tg->rt_rq[i]; | ||
7660 | list_del_rcu(&rt_rq->leaf_rt_rq_list); | ||
7661 | } | 7793 | } |
7662 | list_del_rcu(&tg->list); | 7794 | list_del_rcu(&tg->list); |
7663 | unlock_task_group_list(); | 7795 | spin_unlock_irqrestore(&task_group_lock, flags); |
7664 | |||
7665 | BUG_ON(!cfs_rq); | ||
7666 | 7796 | ||
7667 | /* wait for possible concurrent references to cfs_rqs complete */ | 7797 | /* wait for possible concurrent references to cfs_rqs complete */ |
7668 | call_rcu(&tg->rcu, free_sched_group_rcu); | 7798 | call_rcu(&tg->rcu, free_sched_group_rcu); |
@@ -7703,6 +7833,7 @@ void sched_move_task(struct task_struct *tsk) | |||
7703 | task_rq_unlock(rq, &flags); | 7833 | task_rq_unlock(rq, &flags); |
7704 | } | 7834 | } |
7705 | 7835 | ||
7836 | #ifdef CONFIG_FAIR_GROUP_SCHED | ||
7706 | /* rq->lock to be locked by caller */ | 7837 | /* rq->lock to be locked by caller */ |
7707 | static void set_se_shares(struct sched_entity *se, unsigned long shares) | 7838 | static void set_se_shares(struct sched_entity *se, unsigned long shares) |
7708 | { | 7839 | { |
@@ -7728,13 +7859,14 @@ static void set_se_shares(struct sched_entity *se, unsigned long shares) | |||
7728 | } | 7859 | } |
7729 | } | 7860 | } |
7730 | 7861 | ||
7862 | static DEFINE_MUTEX(shares_mutex); | ||
7863 | |||
7731 | int sched_group_set_shares(struct task_group *tg, unsigned long shares) | 7864 | int sched_group_set_shares(struct task_group *tg, unsigned long shares) |
7732 | { | 7865 | { |
7733 | int i; | 7866 | int i; |
7734 | struct cfs_rq *cfs_rq; | 7867 | unsigned long flags; |
7735 | struct rq *rq; | ||
7736 | 7868 | ||
7737 | lock_task_group_list(); | 7869 | mutex_lock(&shares_mutex); |
7738 | if (tg->shares == shares) | 7870 | if (tg->shares == shares) |
7739 | goto done; | 7871 | goto done; |
7740 | 7872 | ||
@@ -7746,10 +7878,10 @@ int sched_group_set_shares(struct task_group *tg, unsigned long shares) | |||
7746 | * load_balance_fair) from referring to this group first, | 7878 | * load_balance_fair) from referring to this group first, |
7747 | * by taking it off the rq->leaf_cfs_rq_list on each cpu. | 7879 | * by taking it off the rq->leaf_cfs_rq_list on each cpu. |
7748 | */ | 7880 | */ |
7749 | for_each_possible_cpu(i) { | 7881 | spin_lock_irqsave(&task_group_lock, flags); |
7750 | cfs_rq = tg->cfs_rq[i]; | 7882 | for_each_possible_cpu(i) |
7751 | list_del_rcu(&cfs_rq->leaf_cfs_rq_list); | 7883 | unregister_fair_sched_group(tg, i); |
7752 | } | 7884 | spin_unlock_irqrestore(&task_group_lock, flags); |
7753 | 7885 | ||
7754 | /* wait for any ongoing reference to this group to finish */ | 7886 | /* wait for any ongoing reference to this group to finish */ |
7755 | synchronize_sched(); | 7887 | synchronize_sched(); |
@@ -7769,13 +7901,12 @@ int sched_group_set_shares(struct task_group *tg, unsigned long shares) | |||
7769 | * Enable load balance activity on this group, by inserting it back on | 7901 | * Enable load balance activity on this group, by inserting it back on |
7770 | * each cpu's rq->leaf_cfs_rq_list. | 7902 | * each cpu's rq->leaf_cfs_rq_list. |
7771 | */ | 7903 | */ |
7772 | for_each_possible_cpu(i) { | 7904 | spin_lock_irqsave(&task_group_lock, flags); |
7773 | rq = cpu_rq(i); | 7905 | for_each_possible_cpu(i) |
7774 | cfs_rq = tg->cfs_rq[i]; | 7906 | register_fair_sched_group(tg, i); |
7775 | list_add_rcu(&cfs_rq->leaf_cfs_rq_list, &rq->leaf_cfs_rq_list); | 7907 | spin_unlock_irqrestore(&task_group_lock, flags); |
7776 | } | ||
7777 | done: | 7908 | done: |
7778 | unlock_task_group_list(); | 7909 | mutex_unlock(&shares_mutex); |
7779 | return 0; | 7910 | return 0; |
7780 | } | 7911 | } |
7781 | 7912 | ||
@@ -7783,35 +7914,84 @@ unsigned long sched_group_shares(struct task_group *tg) | |||
7783 | { | 7914 | { |
7784 | return tg->shares; | 7915 | return tg->shares; |
7785 | } | 7916 | } |
7917 | #endif | ||
7786 | 7918 | ||
7919 | #ifdef CONFIG_RT_GROUP_SCHED | ||
7787 | /* | 7920 | /* |
7788 | * Ensure the total rt_ratio <= sysctl_sched_rt_ratio | 7921 | * Ensure that the real time constraints are schedulable. |
7789 | */ | 7922 | */ |
7790 | int sched_group_set_rt_ratio(struct task_group *tg, unsigned long rt_ratio) | 7923 | static DEFINE_MUTEX(rt_constraints_mutex); |
7924 | |||
7925 | static unsigned long to_ratio(u64 period, u64 runtime) | ||
7926 | { | ||
7927 | if (runtime == RUNTIME_INF) | ||
7928 | return 1ULL << 16; | ||
7929 | |||
7930 | runtime *= (1ULL << 16); | ||
7931 | div64_64(runtime, period); | ||
7932 | return runtime; | ||
7933 | } | ||
7934 | |||
7935 | static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime) | ||
7791 | { | 7936 | { |
7792 | struct task_group *tgi; | 7937 | struct task_group *tgi; |
7793 | unsigned long total = 0; | 7938 | unsigned long total = 0; |
7939 | unsigned long global_ratio = | ||
7940 | to_ratio(sysctl_sched_rt_period, | ||
7941 | sysctl_sched_rt_runtime < 0 ? | ||
7942 | RUNTIME_INF : sysctl_sched_rt_runtime); | ||
7794 | 7943 | ||
7795 | rcu_read_lock(); | 7944 | rcu_read_lock(); |
7796 | list_for_each_entry_rcu(tgi, &task_groups, list) | 7945 | list_for_each_entry_rcu(tgi, &task_groups, list) { |
7797 | total += tgi->rt_ratio; | 7946 | if (tgi == tg) |
7798 | rcu_read_unlock(); | 7947 | continue; |
7799 | 7948 | ||
7800 | if (total + rt_ratio - tg->rt_ratio > sysctl_sched_rt_ratio) | 7949 | total += to_ratio(period, tgi->rt_runtime); |
7801 | return -EINVAL; | 7950 | } |
7951 | rcu_read_unlock(); | ||
7802 | 7952 | ||
7803 | tg->rt_ratio = rt_ratio; | 7953 | return total + to_ratio(period, runtime) < global_ratio; |
7804 | return 0; | ||
7805 | } | 7954 | } |
7806 | 7955 | ||
7807 | unsigned long sched_group_rt_ratio(struct task_group *tg) | 7956 | int sched_group_set_rt_runtime(struct task_group *tg, long rt_runtime_us) |
7808 | { | 7957 | { |
7809 | return tg->rt_ratio; | 7958 | u64 rt_runtime, rt_period; |
7959 | int err = 0; | ||
7960 | |||
7961 | rt_period = sysctl_sched_rt_period * NSEC_PER_USEC; | ||
7962 | rt_runtime = (u64)rt_runtime_us * NSEC_PER_USEC; | ||
7963 | if (rt_runtime_us == -1) | ||
7964 | rt_runtime = rt_period; | ||
7965 | |||
7966 | mutex_lock(&rt_constraints_mutex); | ||
7967 | if (!__rt_schedulable(tg, rt_period, rt_runtime)) { | ||
7968 | err = -EINVAL; | ||
7969 | goto unlock; | ||
7970 | } | ||
7971 | if (rt_runtime_us == -1) | ||
7972 | rt_runtime = RUNTIME_INF; | ||
7973 | tg->rt_runtime = rt_runtime; | ||
7974 | unlock: | ||
7975 | mutex_unlock(&rt_constraints_mutex); | ||
7976 | |||
7977 | return err; | ||
7810 | } | 7978 | } |
7811 | 7979 | ||
7812 | #endif /* CONFIG_FAIR_GROUP_SCHED */ | 7980 | long sched_group_rt_runtime(struct task_group *tg) |
7981 | { | ||
7982 | u64 rt_runtime_us; | ||
7983 | |||
7984 | if (tg->rt_runtime == RUNTIME_INF) | ||
7985 | return -1; | ||
7986 | |||
7987 | rt_runtime_us = tg->rt_runtime; | ||
7988 | do_div(rt_runtime_us, NSEC_PER_USEC); | ||
7989 | return rt_runtime_us; | ||
7990 | } | ||
7991 | #endif | ||
7992 | #endif /* CONFIG_GROUP_SCHED */ | ||
7813 | 7993 | ||
7814 | #ifdef CONFIG_FAIR_CGROUP_SCHED | 7994 | #ifdef CONFIG_CGROUP_SCHED |
7815 | 7995 | ||
7816 | /* return corresponding task_group object of a cgroup */ | 7996 | /* return corresponding task_group object of a cgroup */ |
7817 | static inline struct task_group *cgroup_tg(struct cgroup *cgrp) | 7997 | static inline struct task_group *cgroup_tg(struct cgroup *cgrp) |
@@ -7857,9 +8037,15 @@ static int | |||
7857 | cpu_cgroup_can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, | 8037 | cpu_cgroup_can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, |
7858 | struct task_struct *tsk) | 8038 | struct task_struct *tsk) |
7859 | { | 8039 | { |
8040 | #ifdef CONFIG_RT_GROUP_SCHED | ||
8041 | /* Don't accept realtime tasks when there is no way for them to run */ | ||
8042 | if (rt_task(tsk) && cgroup_tg(cgrp)->rt_runtime == 0) | ||
8043 | return -EINVAL; | ||
8044 | #else | ||
7860 | /* We don't support RT-tasks being in separate groups */ | 8045 | /* We don't support RT-tasks being in separate groups */ |
7861 | if (tsk->sched_class != &fair_sched_class) | 8046 | if (tsk->sched_class != &fair_sched_class) |
7862 | return -EINVAL; | 8047 | return -EINVAL; |
8048 | #endif | ||
7863 | 8049 | ||
7864 | return 0; | 8050 | return 0; |
7865 | } | 8051 | } |
@@ -7871,6 +8057,7 @@ cpu_cgroup_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, | |||
7871 | sched_move_task(tsk); | 8057 | sched_move_task(tsk); |
7872 | } | 8058 | } |
7873 | 8059 | ||
8060 | #ifdef CONFIG_FAIR_GROUP_SCHED | ||
7874 | static int cpu_shares_write_uint(struct cgroup *cgrp, struct cftype *cftype, | 8061 | static int cpu_shares_write_uint(struct cgroup *cgrp, struct cftype *cftype, |
7875 | u64 shareval) | 8062 | u64 shareval) |
7876 | { | 8063 | { |
@@ -7883,31 +8070,70 @@ static u64 cpu_shares_read_uint(struct cgroup *cgrp, struct cftype *cft) | |||
7883 | 8070 | ||
7884 | return (u64) tg->shares; | 8071 | return (u64) tg->shares; |
7885 | } | 8072 | } |
8073 | #endif | ||
7886 | 8074 | ||
7887 | static int cpu_rt_ratio_write_uint(struct cgroup *cgrp, struct cftype *cftype, | 8075 | #ifdef CONFIG_RT_GROUP_SCHED |
7888 | u64 rt_ratio_val) | 8076 | static int cpu_rt_runtime_write(struct cgroup *cgrp, struct cftype *cft, |
8077 | struct file *file, | ||
8078 | const char __user *userbuf, | ||
8079 | size_t nbytes, loff_t *unused_ppos) | ||
7889 | { | 8080 | { |
7890 | return sched_group_set_rt_ratio(cgroup_tg(cgrp), rt_ratio_val); | 8081 | char buffer[64]; |
8082 | int retval = 0; | ||
8083 | s64 val; | ||
8084 | char *end; | ||
8085 | |||
8086 | if (!nbytes) | ||
8087 | return -EINVAL; | ||
8088 | if (nbytes >= sizeof(buffer)) | ||
8089 | return -E2BIG; | ||
8090 | if (copy_from_user(buffer, userbuf, nbytes)) | ||
8091 | return -EFAULT; | ||
8092 | |||
8093 | buffer[nbytes] = 0; /* nul-terminate */ | ||
8094 | |||
8095 | /* strip newline if necessary */ | ||
8096 | if (nbytes && (buffer[nbytes-1] == '\n')) | ||
8097 | buffer[nbytes-1] = 0; | ||
8098 | val = simple_strtoll(buffer, &end, 0); | ||
8099 | if (*end) | ||
8100 | return -EINVAL; | ||
8101 | |||
8102 | /* Pass to subsystem */ | ||
8103 | retval = sched_group_set_rt_runtime(cgroup_tg(cgrp), val); | ||
8104 | if (!retval) | ||
8105 | retval = nbytes; | ||
8106 | return retval; | ||
7891 | } | 8107 | } |
7892 | 8108 | ||
7893 | static u64 cpu_rt_ratio_read_uint(struct cgroup *cgrp, struct cftype *cft) | 8109 | static ssize_t cpu_rt_runtime_read(struct cgroup *cgrp, struct cftype *cft, |
8110 | struct file *file, | ||
8111 | char __user *buf, size_t nbytes, | ||
8112 | loff_t *ppos) | ||
7894 | { | 8113 | { |
7895 | struct task_group *tg = cgroup_tg(cgrp); | 8114 | char tmp[64]; |
8115 | long val = sched_group_rt_runtime(cgroup_tg(cgrp)); | ||
8116 | int len = sprintf(tmp, "%ld\n", val); | ||
7896 | 8117 | ||
7897 | return (u64) tg->rt_ratio; | 8118 | return simple_read_from_buffer(buf, nbytes, ppos, tmp, len); |
7898 | } | 8119 | } |
8120 | #endif | ||
7899 | 8121 | ||
7900 | static struct cftype cpu_files[] = { | 8122 | static struct cftype cpu_files[] = { |
8123 | #ifdef CONFIG_FAIR_GROUP_SCHED | ||
7901 | { | 8124 | { |
7902 | .name = "shares", | 8125 | .name = "shares", |
7903 | .read_uint = cpu_shares_read_uint, | 8126 | .read_uint = cpu_shares_read_uint, |
7904 | .write_uint = cpu_shares_write_uint, | 8127 | .write_uint = cpu_shares_write_uint, |
7905 | }, | 8128 | }, |
8129 | #endif | ||
8130 | #ifdef CONFIG_RT_GROUP_SCHED | ||
7906 | { | 8131 | { |
7907 | .name = "rt_ratio", | 8132 | .name = "rt_runtime_us", |
7908 | .read_uint = cpu_rt_ratio_read_uint, | 8133 | .read = cpu_rt_runtime_read, |
7909 | .write_uint = cpu_rt_ratio_write_uint, | 8134 | .write = cpu_rt_runtime_write, |
7910 | }, | 8135 | }, |
8136 | #endif | ||
7911 | }; | 8137 | }; |
7912 | 8138 | ||
7913 | static int cpu_cgroup_populate(struct cgroup_subsys *ss, struct cgroup *cont) | 8139 | static int cpu_cgroup_populate(struct cgroup_subsys *ss, struct cgroup *cont) |
@@ -7926,7 +8152,7 @@ struct cgroup_subsys cpu_cgroup_subsys = { | |||
7926 | .early_init = 1, | 8152 | .early_init = 1, |
7927 | }; | 8153 | }; |
7928 | 8154 | ||
7929 | #endif /* CONFIG_FAIR_CGROUP_SCHED */ | 8155 | #endif /* CONFIG_CGROUP_SCHED */ |
7930 | 8156 | ||
7931 | #ifdef CONFIG_CGROUP_CPUACCT | 8157 | #ifdef CONFIG_CGROUP_CPUACCT |
7932 | 8158 | ||
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index 274b40d7bef2..f54792b175b2 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c | |||
@@ -55,14 +55,14 @@ static inline int on_rt_rq(struct sched_rt_entity *rt_se) | |||
55 | return !list_empty(&rt_se->run_list); | 55 | return !list_empty(&rt_se->run_list); |
56 | } | 56 | } |
57 | 57 | ||
58 | #ifdef CONFIG_FAIR_GROUP_SCHED | 58 | #ifdef CONFIG_RT_GROUP_SCHED |
59 | 59 | ||
60 | static inline unsigned int sched_rt_ratio(struct rt_rq *rt_rq) | 60 | static inline u64 sched_rt_runtime(struct rt_rq *rt_rq) |
61 | { | 61 | { |
62 | if (!rt_rq->tg) | 62 | if (!rt_rq->tg) |
63 | return SCHED_RT_FRAC; | 63 | return RUNTIME_INF; |
64 | 64 | ||
65 | return rt_rq->tg->rt_ratio; | 65 | return rt_rq->tg->rt_runtime; |
66 | } | 66 | } |
67 | 67 | ||
68 | #define for_each_leaf_rt_rq(rt_rq, rq) \ | 68 | #define for_each_leaf_rt_rq(rt_rq, rq) \ |
@@ -89,7 +89,7 @@ static inline struct rt_rq *group_rt_rq(struct sched_rt_entity *rt_se) | |||
89 | static void enqueue_rt_entity(struct sched_rt_entity *rt_se); | 89 | static void enqueue_rt_entity(struct sched_rt_entity *rt_se); |
90 | static void dequeue_rt_entity(struct sched_rt_entity *rt_se); | 90 | static void dequeue_rt_entity(struct sched_rt_entity *rt_se); |
91 | 91 | ||
92 | static void sched_rt_ratio_enqueue(struct rt_rq *rt_rq) | 92 | static void sched_rt_rq_enqueue(struct rt_rq *rt_rq) |
93 | { | 93 | { |
94 | struct sched_rt_entity *rt_se = rt_rq->rt_se; | 94 | struct sched_rt_entity *rt_se = rt_rq->rt_se; |
95 | 95 | ||
@@ -102,7 +102,7 @@ static void sched_rt_ratio_enqueue(struct rt_rq *rt_rq) | |||
102 | } | 102 | } |
103 | } | 103 | } |
104 | 104 | ||
105 | static void sched_rt_ratio_dequeue(struct rt_rq *rt_rq) | 105 | static void sched_rt_rq_dequeue(struct rt_rq *rt_rq) |
106 | { | 106 | { |
107 | struct sched_rt_entity *rt_se = rt_rq->rt_se; | 107 | struct sched_rt_entity *rt_se = rt_rq->rt_se; |
108 | 108 | ||
@@ -110,11 +110,31 @@ static void sched_rt_ratio_dequeue(struct rt_rq *rt_rq) | |||
110 | dequeue_rt_entity(rt_se); | 110 | dequeue_rt_entity(rt_se); |
111 | } | 111 | } |
112 | 112 | ||
113 | static inline int rt_rq_throttled(struct rt_rq *rt_rq) | ||
114 | { | ||
115 | return rt_rq->rt_throttled && !rt_rq->rt_nr_boosted; | ||
116 | } | ||
117 | |||
118 | static int rt_se_boosted(struct sched_rt_entity *rt_se) | ||
119 | { | ||
120 | struct rt_rq *rt_rq = group_rt_rq(rt_se); | ||
121 | struct task_struct *p; | ||
122 | |||
123 | if (rt_rq) | ||
124 | return !!rt_rq->rt_nr_boosted; | ||
125 | |||
126 | p = rt_task_of(rt_se); | ||
127 | return p->prio != p->normal_prio; | ||
128 | } | ||
129 | |||
113 | #else | 130 | #else |
114 | 131 | ||
115 | static inline unsigned int sched_rt_ratio(struct rt_rq *rt_rq) | 132 | static inline u64 sched_rt_runtime(struct rt_rq *rt_rq) |
116 | { | 133 | { |
117 | return sysctl_sched_rt_ratio; | 134 | if (sysctl_sched_rt_runtime == -1) |
135 | return RUNTIME_INF; | ||
136 | |||
137 | return (u64)sysctl_sched_rt_runtime * NSEC_PER_USEC; | ||
118 | } | 138 | } |
119 | 139 | ||
120 | #define for_each_leaf_rt_rq(rt_rq, rq) \ | 140 | #define for_each_leaf_rt_rq(rt_rq, rq) \ |
@@ -141,19 +161,23 @@ static inline struct rt_rq *group_rt_rq(struct sched_rt_entity *rt_se) | |||
141 | return NULL; | 161 | return NULL; |
142 | } | 162 | } |
143 | 163 | ||
144 | static inline void sched_rt_ratio_enqueue(struct rt_rq *rt_rq) | 164 | static inline void sched_rt_rq_enqueue(struct rt_rq *rt_rq) |
145 | { | 165 | { |
146 | } | 166 | } |
147 | 167 | ||
148 | static inline void sched_rt_ratio_dequeue(struct rt_rq *rt_rq) | 168 | static inline void sched_rt_rq_dequeue(struct rt_rq *rt_rq) |
149 | { | 169 | { |
150 | } | 170 | } |
151 | 171 | ||
172 | static inline int rt_rq_throttled(struct rt_rq *rt_rq) | ||
173 | { | ||
174 | return rt_rq->rt_throttled; | ||
175 | } | ||
152 | #endif | 176 | #endif |
153 | 177 | ||
154 | static inline int rt_se_prio(struct sched_rt_entity *rt_se) | 178 | static inline int rt_se_prio(struct sched_rt_entity *rt_se) |
155 | { | 179 | { |
156 | #ifdef CONFIG_FAIR_GROUP_SCHED | 180 | #ifdef CONFIG_RT_GROUP_SCHED |
157 | struct rt_rq *rt_rq = group_rt_rq(rt_se); | 181 | struct rt_rq *rt_rq = group_rt_rq(rt_se); |
158 | 182 | ||
159 | if (rt_rq) | 183 | if (rt_rq) |
@@ -163,28 +187,26 @@ static inline int rt_se_prio(struct sched_rt_entity *rt_se) | |||
163 | return rt_task_of(rt_se)->prio; | 187 | return rt_task_of(rt_se)->prio; |
164 | } | 188 | } |
165 | 189 | ||
166 | static int sched_rt_ratio_exceeded(struct rt_rq *rt_rq) | 190 | static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq) |
167 | { | 191 | { |
168 | unsigned int rt_ratio = sched_rt_ratio(rt_rq); | 192 | u64 runtime = sched_rt_runtime(rt_rq); |
169 | u64 period, ratio; | ||
170 | 193 | ||
171 | if (rt_ratio == SCHED_RT_FRAC) | 194 | if (runtime == RUNTIME_INF) |
172 | return 0; | 195 | return 0; |
173 | 196 | ||
174 | if (rt_rq->rt_throttled) | 197 | if (rt_rq->rt_throttled) |
175 | return 1; | 198 | return rt_rq_throttled(rt_rq); |
176 | |||
177 | period = (u64)sysctl_sched_rt_period * NSEC_PER_MSEC; | ||
178 | ratio = (period * rt_ratio) >> SCHED_RT_FRAC_SHIFT; | ||
179 | 199 | ||
180 | if (rt_rq->rt_time > ratio) { | 200 | if (rt_rq->rt_time > runtime) { |
181 | struct rq *rq = rq_of_rt_rq(rt_rq); | 201 | struct rq *rq = rq_of_rt_rq(rt_rq); |
182 | 202 | ||
183 | rq->rt_throttled = 1; | 203 | rq->rt_throttled = 1; |
184 | rt_rq->rt_throttled = 1; | 204 | rt_rq->rt_throttled = 1; |
185 | 205 | ||
186 | sched_rt_ratio_dequeue(rt_rq); | 206 | if (rt_rq_throttled(rt_rq)) { |
187 | return 1; | 207 | sched_rt_rq_dequeue(rt_rq); |
208 | return 1; | ||
209 | } | ||
188 | } | 210 | } |
189 | 211 | ||
190 | return 0; | 212 | return 0; |
@@ -196,17 +218,16 @@ static void update_sched_rt_period(struct rq *rq) | |||
196 | u64 period; | 218 | u64 period; |
197 | 219 | ||
198 | while (rq->clock > rq->rt_period_expire) { | 220 | while (rq->clock > rq->rt_period_expire) { |
199 | period = (u64)sysctl_sched_rt_period * NSEC_PER_MSEC; | 221 | period = (u64)sysctl_sched_rt_period * NSEC_PER_USEC; |
200 | rq->rt_period_expire += period; | 222 | rq->rt_period_expire += period; |
201 | 223 | ||
202 | for_each_leaf_rt_rq(rt_rq, rq) { | 224 | for_each_leaf_rt_rq(rt_rq, rq) { |
203 | unsigned long rt_ratio = sched_rt_ratio(rt_rq); | 225 | u64 runtime = sched_rt_runtime(rt_rq); |
204 | u64 ratio = (period * rt_ratio) >> SCHED_RT_FRAC_SHIFT; | ||
205 | 226 | ||
206 | rt_rq->rt_time -= min(rt_rq->rt_time, ratio); | 227 | rt_rq->rt_time -= min(rt_rq->rt_time, runtime); |
207 | if (rt_rq->rt_throttled) { | 228 | if (rt_rq->rt_throttled && rt_rq->rt_time < runtime) { |
208 | rt_rq->rt_throttled = 0; | 229 | rt_rq->rt_throttled = 0; |
209 | sched_rt_ratio_enqueue(rt_rq); | 230 | sched_rt_rq_enqueue(rt_rq); |
210 | } | 231 | } |
211 | } | 232 | } |
212 | 233 | ||
@@ -239,12 +260,7 @@ static void update_curr_rt(struct rq *rq) | |||
239 | cpuacct_charge(curr, delta_exec); | 260 | cpuacct_charge(curr, delta_exec); |
240 | 261 | ||
241 | rt_rq->rt_time += delta_exec; | 262 | rt_rq->rt_time += delta_exec; |
242 | /* | 263 | if (sched_rt_runtime_exceeded(rt_rq)) |
243 | * might make it a tad more accurate: | ||
244 | * | ||
245 | * update_sched_rt_period(rq); | ||
246 | */ | ||
247 | if (sched_rt_ratio_exceeded(rt_rq)) | ||
248 | resched_task(curr); | 264 | resched_task(curr); |
249 | } | 265 | } |
250 | 266 | ||
@@ -253,7 +269,7 @@ void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) | |||
253 | { | 269 | { |
254 | WARN_ON(!rt_prio(rt_se_prio(rt_se))); | 270 | WARN_ON(!rt_prio(rt_se_prio(rt_se))); |
255 | rt_rq->rt_nr_running++; | 271 | rt_rq->rt_nr_running++; |
256 | #if defined CONFIG_SMP || defined CONFIG_FAIR_GROUP_SCHED | 272 | #if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED |
257 | if (rt_se_prio(rt_se) < rt_rq->highest_prio) | 273 | if (rt_se_prio(rt_se) < rt_rq->highest_prio) |
258 | rt_rq->highest_prio = rt_se_prio(rt_se); | 274 | rt_rq->highest_prio = rt_se_prio(rt_se); |
259 | #endif | 275 | #endif |
@@ -265,6 +281,10 @@ void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) | |||
265 | 281 | ||
266 | update_rt_migration(rq_of_rt_rq(rt_rq)); | 282 | update_rt_migration(rq_of_rt_rq(rt_rq)); |
267 | #endif | 283 | #endif |
284 | #ifdef CONFIG_RT_GROUP_SCHED | ||
285 | if (rt_se_boosted(rt_se)) | ||
286 | rt_rq->rt_nr_boosted++; | ||
287 | #endif | ||
268 | } | 288 | } |
269 | 289 | ||
270 | static inline | 290 | static inline |
@@ -273,7 +293,7 @@ void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) | |||
273 | WARN_ON(!rt_prio(rt_se_prio(rt_se))); | 293 | WARN_ON(!rt_prio(rt_se_prio(rt_se))); |
274 | WARN_ON(!rt_rq->rt_nr_running); | 294 | WARN_ON(!rt_rq->rt_nr_running); |
275 | rt_rq->rt_nr_running--; | 295 | rt_rq->rt_nr_running--; |
276 | #if defined CONFIG_SMP || defined CONFIG_FAIR_GROUP_SCHED | 296 | #if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED |
277 | if (rt_rq->rt_nr_running) { | 297 | if (rt_rq->rt_nr_running) { |
278 | struct rt_prio_array *array; | 298 | struct rt_prio_array *array; |
279 | 299 | ||
@@ -295,6 +315,12 @@ void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) | |||
295 | 315 | ||
296 | update_rt_migration(rq_of_rt_rq(rt_rq)); | 316 | update_rt_migration(rq_of_rt_rq(rt_rq)); |
297 | #endif /* CONFIG_SMP */ | 317 | #endif /* CONFIG_SMP */ |
318 | #ifdef CONFIG_RT_GROUP_SCHED | ||
319 | if (rt_se_boosted(rt_se)) | ||
320 | rt_rq->rt_nr_boosted--; | ||
321 | |||
322 | WARN_ON(!rt_rq->rt_nr_running && rt_rq->rt_nr_boosted); | ||
323 | #endif | ||
298 | } | 324 | } |
299 | 325 | ||
300 | static void enqueue_rt_entity(struct sched_rt_entity *rt_se) | 326 | static void enqueue_rt_entity(struct sched_rt_entity *rt_se) |
@@ -303,7 +329,7 @@ static void enqueue_rt_entity(struct sched_rt_entity *rt_se) | |||
303 | struct rt_prio_array *array = &rt_rq->active; | 329 | struct rt_prio_array *array = &rt_rq->active; |
304 | struct rt_rq *group_rq = group_rt_rq(rt_se); | 330 | struct rt_rq *group_rq = group_rt_rq(rt_se); |
305 | 331 | ||
306 | if (group_rq && group_rq->rt_throttled) | 332 | if (group_rq && rt_rq_throttled(group_rq)) |
307 | return; | 333 | return; |
308 | 334 | ||
309 | list_add_tail(&rt_se->run_list, array->queue + rt_se_prio(rt_se)); | 335 | list_add_tail(&rt_se->run_list, array->queue + rt_se_prio(rt_se)); |
@@ -496,7 +522,7 @@ static struct task_struct *pick_next_task_rt(struct rq *rq) | |||
496 | if (unlikely(!rt_rq->rt_nr_running)) | 522 | if (unlikely(!rt_rq->rt_nr_running)) |
497 | return NULL; | 523 | return NULL; |
498 | 524 | ||
499 | if (sched_rt_ratio_exceeded(rt_rq)) | 525 | if (rt_rq_throttled(rt_rq)) |
500 | return NULL; | 526 | return NULL; |
501 | 527 | ||
502 | do { | 528 | do { |
diff --git a/kernel/signal.c b/kernel/signal.c index 4333b6dbb424..84917fe507f7 100644 --- a/kernel/signal.c +++ b/kernel/signal.c | |||
@@ -911,27 +911,6 @@ __group_complete_signal(int sig, struct task_struct *p) | |||
911 | } while_each_thread(p, t); | 911 | } while_each_thread(p, t); |
912 | return; | 912 | return; |
913 | } | 913 | } |
914 | |||
915 | /* | ||
916 | * There will be a core dump. We make all threads other | ||
917 | * than the chosen one go into a group stop so that nothing | ||
918 | * happens until it gets scheduled, takes the signal off | ||
919 | * the shared queue, and does the core dump. This is a | ||
920 | * little more complicated than strictly necessary, but it | ||
921 | * keeps the signal state that winds up in the core dump | ||
922 | * unchanged from the death state, e.g. which thread had | ||
923 | * the core-dump signal unblocked. | ||
924 | */ | ||
925 | rm_from_queue(SIG_KERNEL_STOP_MASK, &t->pending); | ||
926 | rm_from_queue(SIG_KERNEL_STOP_MASK, &p->signal->shared_pending); | ||
927 | p->signal->group_stop_count = 0; | ||
928 | p->signal->group_exit_task = t; | ||
929 | p = t; | ||
930 | do { | ||
931 | p->signal->group_stop_count++; | ||
932 | signal_wake_up(t, t == p); | ||
933 | } while_each_thread(p, t); | ||
934 | return; | ||
935 | } | 914 | } |
936 | 915 | ||
937 | /* | 916 | /* |
@@ -978,7 +957,6 @@ void zap_other_threads(struct task_struct *p) | |||
978 | { | 957 | { |
979 | struct task_struct *t; | 958 | struct task_struct *t; |
980 | 959 | ||
981 | p->signal->flags = SIGNAL_GROUP_EXIT; | ||
982 | p->signal->group_stop_count = 0; | 960 | p->signal->group_stop_count = 0; |
983 | 961 | ||
984 | for (t = next_thread(p); t != p; t = next_thread(t)) { | 962 | for (t = next_thread(p); t != p; t = next_thread(t)) { |
@@ -994,7 +972,7 @@ void zap_other_threads(struct task_struct *p) | |||
994 | } | 972 | } |
995 | } | 973 | } |
996 | 974 | ||
997 | int fastcall __fatal_signal_pending(struct task_struct *tsk) | 975 | int __fatal_signal_pending(struct task_struct *tsk) |
998 | { | 976 | { |
999 | return sigismember(&tsk->pending.signal, SIGKILL); | 977 | return sigismember(&tsk->pending.signal, SIGKILL); |
1000 | } | 978 | } |
@@ -1040,7 +1018,7 @@ int group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p) | |||
1040 | } | 1018 | } |
1041 | 1019 | ||
1042 | /* | 1020 | /* |
1043 | * kill_pgrp_info() sends a signal to a process group: this is what the tty | 1021 | * __kill_pgrp_info() sends a signal to a process group: this is what the tty |
1044 | * control characters do (^C, ^Z etc) | 1022 | * control characters do (^C, ^Z etc) |
1045 | */ | 1023 | */ |
1046 | 1024 | ||
@@ -1059,30 +1037,28 @@ int __kill_pgrp_info(int sig, struct siginfo *info, struct pid *pgrp) | |||
1059 | return success ? 0 : retval; | 1037 | return success ? 0 : retval; |
1060 | } | 1038 | } |
1061 | 1039 | ||
1062 | int kill_pgrp_info(int sig, struct siginfo *info, struct pid *pgrp) | ||
1063 | { | ||
1064 | int retval; | ||
1065 | |||
1066 | read_lock(&tasklist_lock); | ||
1067 | retval = __kill_pgrp_info(sig, info, pgrp); | ||
1068 | read_unlock(&tasklist_lock); | ||
1069 | |||
1070 | return retval; | ||
1071 | } | ||
1072 | |||
1073 | int kill_pid_info(int sig, struct siginfo *info, struct pid *pid) | 1040 | int kill_pid_info(int sig, struct siginfo *info, struct pid *pid) |
1074 | { | 1041 | { |
1075 | int error; | 1042 | int error = -ESRCH; |
1076 | struct task_struct *p; | 1043 | struct task_struct *p; |
1077 | 1044 | ||
1078 | rcu_read_lock(); | 1045 | rcu_read_lock(); |
1079 | if (unlikely(sig_needs_tasklist(sig))) | 1046 | if (unlikely(sig_needs_tasklist(sig))) |
1080 | read_lock(&tasklist_lock); | 1047 | read_lock(&tasklist_lock); |
1081 | 1048 | ||
1049 | retry: | ||
1082 | p = pid_task(pid, PIDTYPE_PID); | 1050 | p = pid_task(pid, PIDTYPE_PID); |
1083 | error = -ESRCH; | 1051 | if (p) { |
1084 | if (p) | ||
1085 | error = group_send_sig_info(sig, info, p); | 1052 | error = group_send_sig_info(sig, info, p); |
1053 | if (unlikely(error == -ESRCH)) | ||
1054 | /* | ||
1055 | * The task was unhashed in between, try again. | ||
1056 | * If it is dead, pid_task() will return NULL, | ||
1057 | * if we race with de_thread() it will find the | ||
1058 | * new leader. | ||
1059 | */ | ||
1060 | goto retry; | ||
1061 | } | ||
1086 | 1062 | ||
1087 | if (unlikely(sig_needs_tasklist(sig))) | 1063 | if (unlikely(sig_needs_tasklist(sig))) |
1088 | read_unlock(&tasklist_lock); | 1064 | read_unlock(&tasklist_lock); |
@@ -1147,14 +1123,22 @@ EXPORT_SYMBOL_GPL(kill_pid_info_as_uid); | |||
1147 | static int kill_something_info(int sig, struct siginfo *info, int pid) | 1123 | static int kill_something_info(int sig, struct siginfo *info, int pid) |
1148 | { | 1124 | { |
1149 | int ret; | 1125 | int ret; |
1150 | rcu_read_lock(); | 1126 | |
1151 | if (!pid) { | 1127 | if (pid > 0) { |
1152 | ret = kill_pgrp_info(sig, info, task_pgrp(current)); | 1128 | rcu_read_lock(); |
1153 | } else if (pid == -1) { | 1129 | ret = kill_pid_info(sig, info, find_vpid(pid)); |
1130 | rcu_read_unlock(); | ||
1131 | return ret; | ||
1132 | } | ||
1133 | |||
1134 | read_lock(&tasklist_lock); | ||
1135 | if (pid != -1) { | ||
1136 | ret = __kill_pgrp_info(sig, info, | ||
1137 | pid ? find_vpid(-pid) : task_pgrp(current)); | ||
1138 | } else { | ||
1154 | int retval = 0, count = 0; | 1139 | int retval = 0, count = 0; |
1155 | struct task_struct * p; | 1140 | struct task_struct * p; |
1156 | 1141 | ||
1157 | read_lock(&tasklist_lock); | ||
1158 | for_each_process(p) { | 1142 | for_each_process(p) { |
1159 | if (p->pid > 1 && !same_thread_group(p, current)) { | 1143 | if (p->pid > 1 && !same_thread_group(p, current)) { |
1160 | int err = group_send_sig_info(sig, info, p); | 1144 | int err = group_send_sig_info(sig, info, p); |
@@ -1163,14 +1147,10 @@ static int kill_something_info(int sig, struct siginfo *info, int pid) | |||
1163 | retval = err; | 1147 | retval = err; |
1164 | } | 1148 | } |
1165 | } | 1149 | } |
1166 | read_unlock(&tasklist_lock); | ||
1167 | ret = count ? retval : -ESRCH; | 1150 | ret = count ? retval : -ESRCH; |
1168 | } else if (pid < 0) { | ||
1169 | ret = kill_pgrp_info(sig, info, find_vpid(-pid)); | ||
1170 | } else { | ||
1171 | ret = kill_pid_info(sig, info, find_vpid(pid)); | ||
1172 | } | 1151 | } |
1173 | rcu_read_unlock(); | 1152 | read_unlock(&tasklist_lock); |
1153 | |||
1174 | return ret; | 1154 | return ret; |
1175 | } | 1155 | } |
1176 | 1156 | ||
@@ -1218,20 +1198,6 @@ send_sig(int sig, struct task_struct *p, int priv) | |||
1218 | return send_sig_info(sig, __si_special(priv), p); | 1198 | return send_sig_info(sig, __si_special(priv), p); |
1219 | } | 1199 | } |
1220 | 1200 | ||
1221 | /* | ||
1222 | * This is the entry point for "process-wide" signals. | ||
1223 | * They will go to an appropriate thread in the thread group. | ||
1224 | */ | ||
1225 | int | ||
1226 | send_group_sig_info(int sig, struct siginfo *info, struct task_struct *p) | ||
1227 | { | ||
1228 | int ret; | ||
1229 | read_lock(&tasklist_lock); | ||
1230 | ret = group_send_sig_info(sig, info, p); | ||
1231 | read_unlock(&tasklist_lock); | ||
1232 | return ret; | ||
1233 | } | ||
1234 | |||
1235 | void | 1201 | void |
1236 | force_sig(int sig, struct task_struct *p) | 1202 | force_sig(int sig, struct task_struct *p) |
1237 | { | 1203 | { |
@@ -1259,7 +1225,13 @@ force_sigsegv(int sig, struct task_struct *p) | |||
1259 | 1225 | ||
1260 | int kill_pgrp(struct pid *pid, int sig, int priv) | 1226 | int kill_pgrp(struct pid *pid, int sig, int priv) |
1261 | { | 1227 | { |
1262 | return kill_pgrp_info(sig, __si_special(priv), pid); | 1228 | int ret; |
1229 | |||
1230 | read_lock(&tasklist_lock); | ||
1231 | ret = __kill_pgrp_info(sig, __si_special(priv), pid); | ||
1232 | read_unlock(&tasklist_lock); | ||
1233 | |||
1234 | return ret; | ||
1263 | } | 1235 | } |
1264 | EXPORT_SYMBOL(kill_pgrp); | 1236 | EXPORT_SYMBOL(kill_pgrp); |
1265 | 1237 | ||
@@ -1578,11 +1550,6 @@ static inline int may_ptrace_stop(void) | |||
1578 | { | 1550 | { |
1579 | if (!likely(current->ptrace & PT_PTRACED)) | 1551 | if (!likely(current->ptrace & PT_PTRACED)) |
1580 | return 0; | 1552 | return 0; |
1581 | |||
1582 | if (unlikely(current->parent == current->real_parent && | ||
1583 | (current->ptrace & PT_ATTACHED))) | ||
1584 | return 0; | ||
1585 | |||
1586 | /* | 1553 | /* |
1587 | * Are we in the middle of do_coredump? | 1554 | * Are we in the middle of do_coredump? |
1588 | * If so and our tracer is also part of the coredump stopping | 1555 | * If so and our tracer is also part of the coredump stopping |
@@ -1600,6 +1567,17 @@ static inline int may_ptrace_stop(void) | |||
1600 | } | 1567 | } |
1601 | 1568 | ||
1602 | /* | 1569 | /* |
1570 | * Return nonzero if there is a SIGKILL that should be waking us up. | ||
1571 | * Called with the siglock held. | ||
1572 | */ | ||
1573 | static int sigkill_pending(struct task_struct *tsk) | ||
1574 | { | ||
1575 | return ((sigismember(&tsk->pending.signal, SIGKILL) || | ||
1576 | sigismember(&tsk->signal->shared_pending.signal, SIGKILL)) && | ||
1577 | !unlikely(sigismember(&tsk->blocked, SIGKILL))); | ||
1578 | } | ||
1579 | |||
1580 | /* | ||
1603 | * This must be called with current->sighand->siglock held. | 1581 | * This must be called with current->sighand->siglock held. |
1604 | * | 1582 | * |
1605 | * This should be the path for all ptrace stops. | 1583 | * This should be the path for all ptrace stops. |
@@ -1607,11 +1585,31 @@ static inline int may_ptrace_stop(void) | |||
1607 | * That makes it a way to test a stopped process for | 1585 | * That makes it a way to test a stopped process for |
1608 | * being ptrace-stopped vs being job-control-stopped. | 1586 | * being ptrace-stopped vs being job-control-stopped. |
1609 | * | 1587 | * |
1610 | * If we actually decide not to stop at all because the tracer is gone, | 1588 | * If we actually decide not to stop at all because the tracer |
1611 | * we leave nostop_code in current->exit_code. | 1589 | * is gone, we keep current->exit_code unless clear_code. |
1612 | */ | 1590 | */ |
1613 | static void ptrace_stop(int exit_code, int nostop_code, siginfo_t *info) | 1591 | static void ptrace_stop(int exit_code, int clear_code, siginfo_t *info) |
1614 | { | 1592 | { |
1593 | int killed = 0; | ||
1594 | |||
1595 | if (arch_ptrace_stop_needed(exit_code, info)) { | ||
1596 | /* | ||
1597 | * The arch code has something special to do before a | ||
1598 | * ptrace stop. This is allowed to block, e.g. for faults | ||
1599 | * on user stack pages. We can't keep the siglock while | ||
1600 | * calling arch_ptrace_stop, so we must release it now. | ||
1601 | * To preserve proper semantics, we must do this before | ||
1602 | * any signal bookkeeping like checking group_stop_count. | ||
1603 | * Meanwhile, a SIGKILL could come in before we retake the | ||
1604 | * siglock. That must prevent us from sleeping in TASK_TRACED. | ||
1605 | * So after regaining the lock, we must check for SIGKILL. | ||
1606 | */ | ||
1607 | spin_unlock_irq(¤t->sighand->siglock); | ||
1608 | arch_ptrace_stop(exit_code, info); | ||
1609 | spin_lock_irq(¤t->sighand->siglock); | ||
1610 | killed = sigkill_pending(current); | ||
1611 | } | ||
1612 | |||
1615 | /* | 1613 | /* |
1616 | * If there is a group stop in progress, | 1614 | * If there is a group stop in progress, |
1617 | * we must participate in the bookkeeping. | 1615 | * we must participate in the bookkeeping. |
@@ -1623,22 +1621,23 @@ static void ptrace_stop(int exit_code, int nostop_code, siginfo_t *info) | |||
1623 | current->exit_code = exit_code; | 1621 | current->exit_code = exit_code; |
1624 | 1622 | ||
1625 | /* Let the debugger run. */ | 1623 | /* Let the debugger run. */ |
1626 | set_current_state(TASK_TRACED); | 1624 | __set_current_state(TASK_TRACED); |
1627 | spin_unlock_irq(¤t->sighand->siglock); | 1625 | spin_unlock_irq(¤t->sighand->siglock); |
1628 | try_to_freeze(); | 1626 | try_to_freeze(); |
1629 | read_lock(&tasklist_lock); | 1627 | read_lock(&tasklist_lock); |
1630 | if (may_ptrace_stop()) { | 1628 | if (!unlikely(killed) && may_ptrace_stop()) { |
1631 | do_notify_parent_cldstop(current, CLD_TRAPPED); | 1629 | do_notify_parent_cldstop(current, CLD_TRAPPED); |
1632 | read_unlock(&tasklist_lock); | 1630 | read_unlock(&tasklist_lock); |
1633 | schedule(); | 1631 | schedule(); |
1634 | } else { | 1632 | } else { |
1635 | /* | 1633 | /* |
1636 | * By the time we got the lock, our tracer went away. | 1634 | * By the time we got the lock, our tracer went away. |
1637 | * Don't stop here. | 1635 | * Don't drop the lock yet, another tracer may come. |
1638 | */ | 1636 | */ |
1637 | __set_current_state(TASK_RUNNING); | ||
1638 | if (clear_code) | ||
1639 | current->exit_code = 0; | ||
1639 | read_unlock(&tasklist_lock); | 1640 | read_unlock(&tasklist_lock); |
1640 | set_current_state(TASK_RUNNING); | ||
1641 | current->exit_code = nostop_code; | ||
1642 | } | 1641 | } |
1643 | 1642 | ||
1644 | /* | 1643 | /* |
@@ -1671,7 +1670,7 @@ void ptrace_notify(int exit_code) | |||
1671 | 1670 | ||
1672 | /* Let the debugger run. */ | 1671 | /* Let the debugger run. */ |
1673 | spin_lock_irq(¤t->sighand->siglock); | 1672 | spin_lock_irq(¤t->sighand->siglock); |
1674 | ptrace_stop(exit_code, 0, &info); | 1673 | ptrace_stop(exit_code, 1, &info); |
1675 | spin_unlock_irq(¤t->sighand->siglock); | 1674 | spin_unlock_irq(¤t->sighand->siglock); |
1676 | } | 1675 | } |
1677 | 1676 | ||
@@ -1709,9 +1708,6 @@ static int do_signal_stop(int signr) | |||
1709 | struct signal_struct *sig = current->signal; | 1708 | struct signal_struct *sig = current->signal; |
1710 | int stop_count; | 1709 | int stop_count; |
1711 | 1710 | ||
1712 | if (!likely(sig->flags & SIGNAL_STOP_DEQUEUED)) | ||
1713 | return 0; | ||
1714 | |||
1715 | if (sig->group_stop_count > 0) { | 1711 | if (sig->group_stop_count > 0) { |
1716 | /* | 1712 | /* |
1717 | * There is a group stop in progress. We don't need to | 1713 | * There is a group stop in progress. We don't need to |
@@ -1719,12 +1715,15 @@ static int do_signal_stop(int signr) | |||
1719 | */ | 1715 | */ |
1720 | stop_count = --sig->group_stop_count; | 1716 | stop_count = --sig->group_stop_count; |
1721 | } else { | 1717 | } else { |
1718 | struct task_struct *t; | ||
1719 | |||
1720 | if (!likely(sig->flags & SIGNAL_STOP_DEQUEUED) || | ||
1721 | unlikely(sig->group_exit_task)) | ||
1722 | return 0; | ||
1722 | /* | 1723 | /* |
1723 | * There is no group stop already in progress. | 1724 | * There is no group stop already in progress. |
1724 | * We must initiate one now. | 1725 | * We must initiate one now. |
1725 | */ | 1726 | */ |
1726 | struct task_struct *t; | ||
1727 | |||
1728 | sig->group_exit_code = signr; | 1727 | sig->group_exit_code = signr; |
1729 | 1728 | ||
1730 | stop_count = 0; | 1729 | stop_count = 0; |
@@ -1734,7 +1733,7 @@ static int do_signal_stop(int signr) | |||
1734 | * stop is always done with the siglock held, | 1733 | * stop is always done with the siglock held, |
1735 | * so this check has no races. | 1734 | * so this check has no races. |
1736 | */ | 1735 | */ |
1737 | if (!t->exit_state && | 1736 | if (!(t->flags & PF_EXITING) && |
1738 | !task_is_stopped_or_traced(t)) { | 1737 | !task_is_stopped_or_traced(t)) { |
1739 | stop_count++; | 1738 | stop_count++; |
1740 | signal_wake_up(t, 0); | 1739 | signal_wake_up(t, 0); |
@@ -1752,47 +1751,6 @@ static int do_signal_stop(int signr) | |||
1752 | return 1; | 1751 | return 1; |
1753 | } | 1752 | } |
1754 | 1753 | ||
1755 | /* | ||
1756 | * Do appropriate magic when group_stop_count > 0. | ||
1757 | * We return nonzero if we stopped, after releasing the siglock. | ||
1758 | * We return zero if we still hold the siglock and should look | ||
1759 | * for another signal without checking group_stop_count again. | ||
1760 | */ | ||
1761 | static int handle_group_stop(void) | ||
1762 | { | ||
1763 | int stop_count; | ||
1764 | |||
1765 | if (current->signal->group_exit_task == current) { | ||
1766 | /* | ||
1767 | * Group stop is so we can do a core dump, | ||
1768 | * We are the initiating thread, so get on with it. | ||
1769 | */ | ||
1770 | current->signal->group_exit_task = NULL; | ||
1771 | return 0; | ||
1772 | } | ||
1773 | |||
1774 | if (current->signal->flags & SIGNAL_GROUP_EXIT) | ||
1775 | /* | ||
1776 | * Group stop is so another thread can do a core dump, | ||
1777 | * or else we are racing against a death signal. | ||
1778 | * Just punt the stop so we can get the next signal. | ||
1779 | */ | ||
1780 | return 0; | ||
1781 | |||
1782 | /* | ||
1783 | * There is a group stop in progress. We stop | ||
1784 | * without any associated signal being in our queue. | ||
1785 | */ | ||
1786 | stop_count = --current->signal->group_stop_count; | ||
1787 | if (stop_count == 0) | ||
1788 | current->signal->flags = SIGNAL_STOP_STOPPED; | ||
1789 | current->exit_code = current->signal->group_exit_code; | ||
1790 | set_current_state(TASK_STOPPED); | ||
1791 | spin_unlock_irq(¤t->sighand->siglock); | ||
1792 | finish_stop(stop_count); | ||
1793 | return 1; | ||
1794 | } | ||
1795 | |||
1796 | int get_signal_to_deliver(siginfo_t *info, struct k_sigaction *return_ka, | 1754 | int get_signal_to_deliver(siginfo_t *info, struct k_sigaction *return_ka, |
1797 | struct pt_regs *regs, void *cookie) | 1755 | struct pt_regs *regs, void *cookie) |
1798 | { | 1756 | { |
@@ -1807,7 +1765,7 @@ relock: | |||
1807 | struct k_sigaction *ka; | 1765 | struct k_sigaction *ka; |
1808 | 1766 | ||
1809 | if (unlikely(current->signal->group_stop_count > 0) && | 1767 | if (unlikely(current->signal->group_stop_count > 0) && |
1810 | handle_group_stop()) | 1768 | do_signal_stop(0)) |
1811 | goto relock; | 1769 | goto relock; |
1812 | 1770 | ||
1813 | signr = dequeue_signal(current, mask, info); | 1771 | signr = dequeue_signal(current, mask, info); |
@@ -1819,7 +1777,7 @@ relock: | |||
1819 | ptrace_signal_deliver(regs, cookie); | 1777 | ptrace_signal_deliver(regs, cookie); |
1820 | 1778 | ||
1821 | /* Let the debugger run. */ | 1779 | /* Let the debugger run. */ |
1822 | ptrace_stop(signr, signr, info); | 1780 | ptrace_stop(signr, 0, info); |
1823 | 1781 | ||
1824 | /* We're back. Did the debugger cancel the sig? */ | 1782 | /* We're back. Did the debugger cancel the sig? */ |
1825 | signr = current->exit_code; | 1783 | signr = current->exit_code; |
@@ -1936,6 +1894,48 @@ relock: | |||
1936 | return signr; | 1894 | return signr; |
1937 | } | 1895 | } |
1938 | 1896 | ||
1897 | void exit_signals(struct task_struct *tsk) | ||
1898 | { | ||
1899 | int group_stop = 0; | ||
1900 | struct task_struct *t; | ||
1901 | |||
1902 | if (thread_group_empty(tsk) || signal_group_exit(tsk->signal)) { | ||
1903 | tsk->flags |= PF_EXITING; | ||
1904 | return; | ||
1905 | } | ||
1906 | |||
1907 | spin_lock_irq(&tsk->sighand->siglock); | ||
1908 | /* | ||
1909 | * From now this task is not visible for group-wide signals, | ||
1910 | * see wants_signal(), do_signal_stop(). | ||
1911 | */ | ||
1912 | tsk->flags |= PF_EXITING; | ||
1913 | if (!signal_pending(tsk)) | ||
1914 | goto out; | ||
1915 | |||
1916 | /* It could be that __group_complete_signal() choose us to | ||
1917 | * notify about group-wide signal. Another thread should be | ||
1918 | * woken now to take the signal since we will not. | ||
1919 | */ | ||
1920 | for (t = tsk; (t = next_thread(t)) != tsk; ) | ||
1921 | if (!signal_pending(t) && !(t->flags & PF_EXITING)) | ||
1922 | recalc_sigpending_and_wake(t); | ||
1923 | |||
1924 | if (unlikely(tsk->signal->group_stop_count) && | ||
1925 | !--tsk->signal->group_stop_count) { | ||
1926 | tsk->signal->flags = SIGNAL_STOP_STOPPED; | ||
1927 | group_stop = 1; | ||
1928 | } | ||
1929 | out: | ||
1930 | spin_unlock_irq(&tsk->sighand->siglock); | ||
1931 | |||
1932 | if (unlikely(group_stop)) { | ||
1933 | read_lock(&tasklist_lock); | ||
1934 | do_notify_parent_cldstop(tsk, CLD_STOPPED); | ||
1935 | read_unlock(&tasklist_lock); | ||
1936 | } | ||
1937 | } | ||
1938 | |||
1939 | EXPORT_SYMBOL(recalc_sigpending); | 1939 | EXPORT_SYMBOL(recalc_sigpending); |
1940 | EXPORT_SYMBOL_GPL(dequeue_signal); | 1940 | EXPORT_SYMBOL_GPL(dequeue_signal); |
1941 | EXPORT_SYMBOL(flush_signals); | 1941 | EXPORT_SYMBOL(flush_signals); |
diff --git a/kernel/softirq.c b/kernel/softirq.c index d7837d45419e..5b3aea5f471e 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c | |||
@@ -320,7 +320,7 @@ void irq_exit(void) | |||
320 | /* | 320 | /* |
321 | * This function must run with irqs disabled! | 321 | * This function must run with irqs disabled! |
322 | */ | 322 | */ |
323 | inline fastcall void raise_softirq_irqoff(unsigned int nr) | 323 | inline void raise_softirq_irqoff(unsigned int nr) |
324 | { | 324 | { |
325 | __raise_softirq_irqoff(nr); | 325 | __raise_softirq_irqoff(nr); |
326 | 326 | ||
@@ -337,7 +337,7 @@ inline fastcall void raise_softirq_irqoff(unsigned int nr) | |||
337 | wakeup_softirqd(); | 337 | wakeup_softirqd(); |
338 | } | 338 | } |
339 | 339 | ||
340 | void fastcall raise_softirq(unsigned int nr) | 340 | void raise_softirq(unsigned int nr) |
341 | { | 341 | { |
342 | unsigned long flags; | 342 | unsigned long flags; |
343 | 343 | ||
@@ -363,7 +363,7 @@ struct tasklet_head | |||
363 | static DEFINE_PER_CPU(struct tasklet_head, tasklet_vec) = { NULL }; | 363 | static DEFINE_PER_CPU(struct tasklet_head, tasklet_vec) = { NULL }; |
364 | static DEFINE_PER_CPU(struct tasklet_head, tasklet_hi_vec) = { NULL }; | 364 | static DEFINE_PER_CPU(struct tasklet_head, tasklet_hi_vec) = { NULL }; |
365 | 365 | ||
366 | void fastcall __tasklet_schedule(struct tasklet_struct *t) | 366 | void __tasklet_schedule(struct tasklet_struct *t) |
367 | { | 367 | { |
368 | unsigned long flags; | 368 | unsigned long flags; |
369 | 369 | ||
@@ -376,7 +376,7 @@ void fastcall __tasklet_schedule(struct tasklet_struct *t) | |||
376 | 376 | ||
377 | EXPORT_SYMBOL(__tasklet_schedule); | 377 | EXPORT_SYMBOL(__tasklet_schedule); |
378 | 378 | ||
379 | void fastcall __tasklet_hi_schedule(struct tasklet_struct *t) | 379 | void __tasklet_hi_schedule(struct tasklet_struct *t) |
380 | { | 380 | { |
381 | unsigned long flags; | 381 | unsigned long flags; |
382 | 382 | ||
diff --git a/kernel/srcu.c b/kernel/srcu.c index 3507cabe963b..b0aeeaf22ce4 100644 --- a/kernel/srcu.c +++ b/kernel/srcu.c | |||
@@ -74,7 +74,7 @@ static int srcu_readers_active_idx(struct srcu_struct *sp, int idx) | |||
74 | * severe errors when invoked on an active srcu_struct. That said, it | 74 | * severe errors when invoked on an active srcu_struct. That said, it |
75 | * can be useful as an error check at cleanup time. | 75 | * can be useful as an error check at cleanup time. |
76 | */ | 76 | */ |
77 | int srcu_readers_active(struct srcu_struct *sp) | 77 | static int srcu_readers_active(struct srcu_struct *sp) |
78 | { | 78 | { |
79 | return srcu_readers_active_idx(sp, 0) + srcu_readers_active_idx(sp, 1); | 79 | return srcu_readers_active_idx(sp, 0) + srcu_readers_active_idx(sp, 1); |
80 | } | 80 | } |
@@ -255,4 +255,3 @@ EXPORT_SYMBOL_GPL(srcu_read_lock); | |||
255 | EXPORT_SYMBOL_GPL(srcu_read_unlock); | 255 | EXPORT_SYMBOL_GPL(srcu_read_unlock); |
256 | EXPORT_SYMBOL_GPL(synchronize_srcu); | 256 | EXPORT_SYMBOL_GPL(synchronize_srcu); |
257 | EXPORT_SYMBOL_GPL(srcu_batches_completed); | 257 | EXPORT_SYMBOL_GPL(srcu_batches_completed); |
258 | EXPORT_SYMBOL_GPL(srcu_readers_active); | ||
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c index 51b5ee53571a..6f4e0e13f70c 100644 --- a/kernel/stop_machine.c +++ b/kernel/stop_machine.c | |||
@@ -29,7 +29,6 @@ enum stopmachine_state { | |||
29 | static enum stopmachine_state stopmachine_state; | 29 | static enum stopmachine_state stopmachine_state; |
30 | static unsigned int stopmachine_num_threads; | 30 | static unsigned int stopmachine_num_threads; |
31 | static atomic_t stopmachine_thread_ack; | 31 | static atomic_t stopmachine_thread_ack; |
32 | static DECLARE_MUTEX(stopmachine_mutex); | ||
33 | 32 | ||
34 | static int stopmachine(void *cpu) | 33 | static int stopmachine(void *cpu) |
35 | { | 34 | { |
@@ -170,6 +169,7 @@ static int do_stop(void *_smdata) | |||
170 | struct task_struct *__stop_machine_run(int (*fn)(void *), void *data, | 169 | struct task_struct *__stop_machine_run(int (*fn)(void *), void *data, |
171 | unsigned int cpu) | 170 | unsigned int cpu) |
172 | { | 171 | { |
172 | static DEFINE_MUTEX(stopmachine_mutex); | ||
173 | struct stop_machine_data smdata; | 173 | struct stop_machine_data smdata; |
174 | struct task_struct *p; | 174 | struct task_struct *p; |
175 | 175 | ||
@@ -177,7 +177,7 @@ struct task_struct *__stop_machine_run(int (*fn)(void *), void *data, | |||
177 | smdata.data = data; | 177 | smdata.data = data; |
178 | init_completion(&smdata.done); | 178 | init_completion(&smdata.done); |
179 | 179 | ||
180 | down(&stopmachine_mutex); | 180 | mutex_lock(&stopmachine_mutex); |
181 | 181 | ||
182 | /* If they don't care which CPU fn runs on, bind to any online one. */ | 182 | /* If they don't care which CPU fn runs on, bind to any online one. */ |
183 | if (cpu == NR_CPUS) | 183 | if (cpu == NR_CPUS) |
@@ -193,7 +193,7 @@ struct task_struct *__stop_machine_run(int (*fn)(void *), void *data, | |||
193 | wake_up_process(p); | 193 | wake_up_process(p); |
194 | wait_for_completion(&smdata.done); | 194 | wait_for_completion(&smdata.done); |
195 | } | 195 | } |
196 | up(&stopmachine_mutex); | 196 | mutex_unlock(&stopmachine_mutex); |
197 | return p; | 197 | return p; |
198 | } | 198 | } |
199 | 199 | ||
diff --git a/kernel/sys.c b/kernel/sys.c index d1fe71eb4546..a626116af5db 100644 --- a/kernel/sys.c +++ b/kernel/sys.c | |||
@@ -315,7 +315,7 @@ static void kernel_kexec(void) | |||
315 | #endif | 315 | #endif |
316 | } | 316 | } |
317 | 317 | ||
318 | void kernel_shutdown_prepare(enum system_states state) | 318 | static void kernel_shutdown_prepare(enum system_states state) |
319 | { | 319 | { |
320 | blocking_notifier_call_chain(&reboot_notifier_list, | 320 | blocking_notifier_call_chain(&reboot_notifier_list, |
321 | (state == SYSTEM_HALT)?SYS_HALT:SYS_POWER_OFF, NULL); | 321 | (state == SYSTEM_HALT)?SYS_HALT:SYS_POWER_OFF, NULL); |
@@ -916,8 +916,8 @@ asmlinkage long sys_setpgid(pid_t pid, pid_t pgid) | |||
916 | { | 916 | { |
917 | struct task_struct *p; | 917 | struct task_struct *p; |
918 | struct task_struct *group_leader = current->group_leader; | 918 | struct task_struct *group_leader = current->group_leader; |
919 | int err = -EINVAL; | 919 | struct pid *pgrp; |
920 | struct pid_namespace *ns; | 920 | int err; |
921 | 921 | ||
922 | if (!pid) | 922 | if (!pid) |
923 | pid = task_pid_vnr(group_leader); | 923 | pid = task_pid_vnr(group_leader); |
@@ -929,12 +929,10 @@ asmlinkage long sys_setpgid(pid_t pid, pid_t pgid) | |||
929 | /* From this point forward we keep holding onto the tasklist lock | 929 | /* From this point forward we keep holding onto the tasklist lock |
930 | * so that our parent does not change from under us. -DaveM | 930 | * so that our parent does not change from under us. -DaveM |
931 | */ | 931 | */ |
932 | ns = current->nsproxy->pid_ns; | ||
933 | |||
934 | write_lock_irq(&tasklist_lock); | 932 | write_lock_irq(&tasklist_lock); |
935 | 933 | ||
936 | err = -ESRCH; | 934 | err = -ESRCH; |
937 | p = find_task_by_pid_ns(pid, ns); | 935 | p = find_task_by_vpid(pid); |
938 | if (!p) | 936 | if (!p) |
939 | goto out; | 937 | goto out; |
940 | 938 | ||
@@ -942,7 +940,7 @@ asmlinkage long sys_setpgid(pid_t pid, pid_t pgid) | |||
942 | if (!thread_group_leader(p)) | 940 | if (!thread_group_leader(p)) |
943 | goto out; | 941 | goto out; |
944 | 942 | ||
945 | if (p->real_parent->tgid == group_leader->tgid) { | 943 | if (same_thread_group(p->real_parent, group_leader)) { |
946 | err = -EPERM; | 944 | err = -EPERM; |
947 | if (task_session(p) != task_session(group_leader)) | 945 | if (task_session(p) != task_session(group_leader)) |
948 | goto out; | 946 | goto out; |
@@ -959,10 +957,12 @@ asmlinkage long sys_setpgid(pid_t pid, pid_t pgid) | |||
959 | if (p->signal->leader) | 957 | if (p->signal->leader) |
960 | goto out; | 958 | goto out; |
961 | 959 | ||
960 | pgrp = task_pid(p); | ||
962 | if (pgid != pid) { | 961 | if (pgid != pid) { |
963 | struct task_struct *g; | 962 | struct task_struct *g; |
964 | 963 | ||
965 | g = find_task_by_pid_type_ns(PIDTYPE_PGID, pgid, ns); | 964 | pgrp = find_vpid(pgid); |
965 | g = pid_task(pgrp, PIDTYPE_PGID); | ||
966 | if (!g || task_session(g) != task_session(group_leader)) | 966 | if (!g || task_session(g) != task_session(group_leader)) |
967 | goto out; | 967 | goto out; |
968 | } | 968 | } |
@@ -971,13 +971,10 @@ asmlinkage long sys_setpgid(pid_t pid, pid_t pgid) | |||
971 | if (err) | 971 | if (err) |
972 | goto out; | 972 | goto out; |
973 | 973 | ||
974 | if (task_pgrp_nr_ns(p, ns) != pgid) { | 974 | if (task_pgrp(p) != pgrp) { |
975 | struct pid *pid; | ||
976 | |||
977 | detach_pid(p, PIDTYPE_PGID); | 975 | detach_pid(p, PIDTYPE_PGID); |
978 | pid = find_vpid(pgid); | 976 | attach_pid(p, PIDTYPE_PGID, pgrp); |
979 | attach_pid(p, PIDTYPE_PGID, pid); | 977 | set_task_pgrp(p, pid_nr(pgrp)); |
980 | set_task_pgrp(p, pid_nr(pid)); | ||
981 | } | 978 | } |
982 | 979 | ||
983 | err = 0; | 980 | err = 0; |
@@ -994,17 +991,14 @@ asmlinkage long sys_getpgid(pid_t pid) | |||
994 | else { | 991 | else { |
995 | int retval; | 992 | int retval; |
996 | struct task_struct *p; | 993 | struct task_struct *p; |
997 | struct pid_namespace *ns; | ||
998 | |||
999 | ns = current->nsproxy->pid_ns; | ||
1000 | 994 | ||
1001 | read_lock(&tasklist_lock); | 995 | read_lock(&tasklist_lock); |
1002 | p = find_task_by_pid_ns(pid, ns); | 996 | p = find_task_by_vpid(pid); |
1003 | retval = -ESRCH; | 997 | retval = -ESRCH; |
1004 | if (p) { | 998 | if (p) { |
1005 | retval = security_task_getpgid(p); | 999 | retval = security_task_getpgid(p); |
1006 | if (!retval) | 1000 | if (!retval) |
1007 | retval = task_pgrp_nr_ns(p, ns); | 1001 | retval = task_pgrp_vnr(p); |
1008 | } | 1002 | } |
1009 | read_unlock(&tasklist_lock); | 1003 | read_unlock(&tasklist_lock); |
1010 | return retval; | 1004 | return retval; |
@@ -1028,19 +1022,16 @@ asmlinkage long sys_getsid(pid_t pid) | |||
1028 | else { | 1022 | else { |
1029 | int retval; | 1023 | int retval; |
1030 | struct task_struct *p; | 1024 | struct task_struct *p; |
1031 | struct pid_namespace *ns; | ||
1032 | |||
1033 | ns = current->nsproxy->pid_ns; | ||
1034 | 1025 | ||
1035 | read_lock(&tasklist_lock); | 1026 | rcu_read_lock(); |
1036 | p = find_task_by_pid_ns(pid, ns); | 1027 | p = find_task_by_vpid(pid); |
1037 | retval = -ESRCH; | 1028 | retval = -ESRCH; |
1038 | if (p) { | 1029 | if (p) { |
1039 | retval = security_task_getsid(p); | 1030 | retval = security_task_getsid(p); |
1040 | if (!retval) | 1031 | if (!retval) |
1041 | retval = task_session_nr_ns(p, ns); | 1032 | retval = task_session_vnr(p); |
1042 | } | 1033 | } |
1043 | read_unlock(&tasklist_lock); | 1034 | rcu_read_unlock(); |
1044 | return retval; | 1035 | return retval; |
1045 | } | 1036 | } |
1046 | } | 1037 | } |
@@ -1048,35 +1039,29 @@ asmlinkage long sys_getsid(pid_t pid) | |||
1048 | asmlinkage long sys_setsid(void) | 1039 | asmlinkage long sys_setsid(void) |
1049 | { | 1040 | { |
1050 | struct task_struct *group_leader = current->group_leader; | 1041 | struct task_struct *group_leader = current->group_leader; |
1051 | pid_t session; | 1042 | struct pid *sid = task_pid(group_leader); |
1043 | pid_t session = pid_vnr(sid); | ||
1052 | int err = -EPERM; | 1044 | int err = -EPERM; |
1053 | 1045 | ||
1054 | write_lock_irq(&tasklist_lock); | 1046 | write_lock_irq(&tasklist_lock); |
1055 | |||
1056 | /* Fail if I am already a session leader */ | 1047 | /* Fail if I am already a session leader */ |
1057 | if (group_leader->signal->leader) | 1048 | if (group_leader->signal->leader) |
1058 | goto out; | 1049 | goto out; |
1059 | 1050 | ||
1060 | session = group_leader->pid; | ||
1061 | /* Fail if a process group id already exists that equals the | 1051 | /* Fail if a process group id already exists that equals the |
1062 | * proposed session id. | 1052 | * proposed session id. |
1063 | * | ||
1064 | * Don't check if session id == 1 because kernel threads use this | ||
1065 | * session id and so the check will always fail and make it so | ||
1066 | * init cannot successfully call setsid. | ||
1067 | */ | 1053 | */ |
1068 | if (session > 1 && find_task_by_pid_type_ns(PIDTYPE_PGID, | 1054 | if (pid_task(sid, PIDTYPE_PGID)) |
1069 | session, &init_pid_ns)) | ||
1070 | goto out; | 1055 | goto out; |
1071 | 1056 | ||
1072 | group_leader->signal->leader = 1; | 1057 | group_leader->signal->leader = 1; |
1073 | __set_special_pids(session, session); | 1058 | __set_special_pids(sid); |
1074 | 1059 | ||
1075 | spin_lock(&group_leader->sighand->siglock); | 1060 | spin_lock(&group_leader->sighand->siglock); |
1076 | group_leader->signal->tty = NULL; | 1061 | group_leader->signal->tty = NULL; |
1077 | spin_unlock(&group_leader->sighand->siglock); | 1062 | spin_unlock(&group_leader->sighand->siglock); |
1078 | 1063 | ||
1079 | err = task_pgrp_vnr(group_leader); | 1064 | err = session; |
1080 | out: | 1065 | out: |
1081 | write_unlock_irq(&tasklist_lock); | 1066 | write_unlock_irq(&tasklist_lock); |
1082 | return err; | 1067 | return err; |
@@ -1145,16 +1130,16 @@ static int groups_to_user(gid_t __user *grouplist, | |||
1145 | struct group_info *group_info) | 1130 | struct group_info *group_info) |
1146 | { | 1131 | { |
1147 | int i; | 1132 | int i; |
1148 | int count = group_info->ngroups; | 1133 | unsigned int count = group_info->ngroups; |
1149 | 1134 | ||
1150 | for (i = 0; i < group_info->nblocks; i++) { | 1135 | for (i = 0; i < group_info->nblocks; i++) { |
1151 | int cp_count = min(NGROUPS_PER_BLOCK, count); | 1136 | unsigned int cp_count = min(NGROUPS_PER_BLOCK, count); |
1152 | int off = i * NGROUPS_PER_BLOCK; | 1137 | unsigned int len = cp_count * sizeof(*grouplist); |
1153 | int len = cp_count * sizeof(*grouplist); | ||
1154 | 1138 | ||
1155 | if (copy_to_user(grouplist+off, group_info->blocks[i], len)) | 1139 | if (copy_to_user(grouplist, group_info->blocks[i], len)) |
1156 | return -EFAULT; | 1140 | return -EFAULT; |
1157 | 1141 | ||
1142 | grouplist += NGROUPS_PER_BLOCK; | ||
1158 | count -= cp_count; | 1143 | count -= cp_count; |
1159 | } | 1144 | } |
1160 | return 0; | 1145 | return 0; |
@@ -1165,16 +1150,16 @@ static int groups_from_user(struct group_info *group_info, | |||
1165 | gid_t __user *grouplist) | 1150 | gid_t __user *grouplist) |
1166 | { | 1151 | { |
1167 | int i; | 1152 | int i; |
1168 | int count = group_info->ngroups; | 1153 | unsigned int count = group_info->ngroups; |
1169 | 1154 | ||
1170 | for (i = 0; i < group_info->nblocks; i++) { | 1155 | for (i = 0; i < group_info->nblocks; i++) { |
1171 | int cp_count = min(NGROUPS_PER_BLOCK, count); | 1156 | unsigned int cp_count = min(NGROUPS_PER_BLOCK, count); |
1172 | int off = i * NGROUPS_PER_BLOCK; | 1157 | unsigned int len = cp_count * sizeof(*grouplist); |
1173 | int len = cp_count * sizeof(*grouplist); | ||
1174 | 1158 | ||
1175 | if (copy_from_user(group_info->blocks[i], grouplist+off, len)) | 1159 | if (copy_from_user(group_info->blocks[i], grouplist, len)) |
1176 | return -EFAULT; | 1160 | return -EFAULT; |
1177 | 1161 | ||
1162 | grouplist += NGROUPS_PER_BLOCK; | ||
1178 | count -= cp_count; | 1163 | count -= cp_count; |
1179 | } | 1164 | } |
1180 | return 0; | 1165 | return 0; |
@@ -1472,7 +1457,7 @@ asmlinkage long sys_setrlimit(unsigned int resource, struct rlimit __user *rlim) | |||
1472 | if ((new_rlim.rlim_max > old_rlim->rlim_max) && | 1457 | if ((new_rlim.rlim_max > old_rlim->rlim_max) && |
1473 | !capable(CAP_SYS_RESOURCE)) | 1458 | !capable(CAP_SYS_RESOURCE)) |
1474 | return -EPERM; | 1459 | return -EPERM; |
1475 | if (resource == RLIMIT_NOFILE && new_rlim.rlim_max > NR_OPEN) | 1460 | if (resource == RLIMIT_NOFILE && new_rlim.rlim_max > sysctl_nr_open) |
1476 | return -EPERM; | 1461 | return -EPERM; |
1477 | 1462 | ||
1478 | retval = security_task_setrlimit(resource, &new_rlim); | 1463 | retval = security_task_setrlimit(resource, &new_rlim); |
@@ -1637,7 +1622,7 @@ asmlinkage long sys_umask(int mask) | |||
1637 | mask = xchg(¤t->fs->umask, mask & S_IRWXUGO); | 1622 | mask = xchg(¤t->fs->umask, mask & S_IRWXUGO); |
1638 | return mask; | 1623 | return mask; |
1639 | } | 1624 | } |
1640 | 1625 | ||
1641 | asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3, | 1626 | asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3, |
1642 | unsigned long arg4, unsigned long arg5) | 1627 | unsigned long arg4, unsigned long arg5) |
1643 | { | 1628 | { |
@@ -1742,6 +1727,17 @@ asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3, | |||
1742 | error = prctl_set_seccomp(arg2); | 1727 | error = prctl_set_seccomp(arg2); |
1743 | break; | 1728 | break; |
1744 | 1729 | ||
1730 | case PR_CAPBSET_READ: | ||
1731 | if (!cap_valid(arg2)) | ||
1732 | return -EINVAL; | ||
1733 | return !!cap_raised(current->cap_bset, arg2); | ||
1734 | case PR_CAPBSET_DROP: | ||
1735 | #ifdef CONFIG_SECURITY_FILE_CAPABILITIES | ||
1736 | return cap_prctl_drop(arg2); | ||
1737 | #else | ||
1738 | return -EINVAL; | ||
1739 | #endif | ||
1740 | |||
1745 | default: | 1741 | default: |
1746 | error = -EINVAL; | 1742 | error = -EINVAL; |
1747 | break; | 1743 | break; |
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index beee5b3b68a2..5b9b467de070 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c | |||
@@ -154,7 +154,10 @@ cond_syscall(sys_ioprio_get); | |||
154 | 154 | ||
155 | /* New file descriptors */ | 155 | /* New file descriptors */ |
156 | cond_syscall(sys_signalfd); | 156 | cond_syscall(sys_signalfd); |
157 | cond_syscall(sys_timerfd); | ||
158 | cond_syscall(compat_sys_signalfd); | 157 | cond_syscall(compat_sys_signalfd); |
159 | cond_syscall(compat_sys_timerfd); | 158 | cond_syscall(sys_timerfd_create); |
159 | cond_syscall(sys_timerfd_settime); | ||
160 | cond_syscall(sys_timerfd_gettime); | ||
161 | cond_syscall(compat_sys_timerfd_settime); | ||
162 | cond_syscall(compat_sys_timerfd_gettime); | ||
160 | cond_syscall(sys_eventfd); | 163 | cond_syscall(sys_eventfd); |
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 7cb1ac3e6fff..8b7e95411795 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
@@ -37,7 +37,6 @@ | |||
37 | #include <linux/highuid.h> | 37 | #include <linux/highuid.h> |
38 | #include <linux/writeback.h> | 38 | #include <linux/writeback.h> |
39 | #include <linux/hugetlb.h> | 39 | #include <linux/hugetlb.h> |
40 | #include <linux/security.h> | ||
41 | #include <linux/initrd.h> | 40 | #include <linux/initrd.h> |
42 | #include <linux/times.h> | 41 | #include <linux/times.h> |
43 | #include <linux/limits.h> | 42 | #include <linux/limits.h> |
@@ -67,14 +66,13 @@ extern int sysctl_overcommit_memory; | |||
67 | extern int sysctl_overcommit_ratio; | 66 | extern int sysctl_overcommit_ratio; |
68 | extern int sysctl_panic_on_oom; | 67 | extern int sysctl_panic_on_oom; |
69 | extern int sysctl_oom_kill_allocating_task; | 68 | extern int sysctl_oom_kill_allocating_task; |
69 | extern int sysctl_oom_dump_tasks; | ||
70 | extern int max_threads; | 70 | extern int max_threads; |
71 | extern int core_uses_pid; | 71 | extern int core_uses_pid; |
72 | extern int suid_dumpable; | 72 | extern int suid_dumpable; |
73 | extern char core_pattern[]; | 73 | extern char core_pattern[]; |
74 | extern int pid_max; | 74 | extern int pid_max; |
75 | extern int min_free_kbytes; | 75 | extern int min_free_kbytes; |
76 | extern int printk_ratelimit_jiffies; | ||
77 | extern int printk_ratelimit_burst; | ||
78 | extern int pid_max_min, pid_max_max; | 76 | extern int pid_max_min, pid_max_max; |
79 | extern int sysctl_drop_caches; | 77 | extern int sysctl_drop_caches; |
80 | extern int percpu_pagelist_fraction; | 78 | extern int percpu_pagelist_fraction; |
@@ -84,8 +82,11 @@ extern int sysctl_stat_interval; | |||
84 | extern int latencytop_enabled; | 82 | extern int latencytop_enabled; |
85 | 83 | ||
86 | /* Constants used for minimum and maximum */ | 84 | /* Constants used for minimum and maximum */ |
87 | #ifdef CONFIG_DETECT_SOFTLOCKUP | 85 | #if defined(CONFIG_DETECT_SOFTLOCKUP) || defined(CONFIG_HIGHMEM) |
88 | static int one = 1; | 86 | static int one = 1; |
87 | #endif | ||
88 | |||
89 | #ifdef CONFIG_DETECT_SOFTLOCKUP | ||
89 | static int sixty = 60; | 90 | static int sixty = 60; |
90 | #endif | 91 | #endif |
91 | 92 | ||
@@ -310,22 +311,6 @@ static struct ctl_table kern_table[] = { | |||
310 | .mode = 0644, | 311 | .mode = 0644, |
311 | .proc_handler = &proc_dointvec, | 312 | .proc_handler = &proc_dointvec, |
312 | }, | 313 | }, |
313 | { | ||
314 | .ctl_name = CTL_UNNUMBERED, | ||
315 | .procname = "sched_rt_period_ms", | ||
316 | .data = &sysctl_sched_rt_period, | ||
317 | .maxlen = sizeof(unsigned int), | ||
318 | .mode = 0644, | ||
319 | .proc_handler = &proc_dointvec, | ||
320 | }, | ||
321 | { | ||
322 | .ctl_name = CTL_UNNUMBERED, | ||
323 | .procname = "sched_rt_ratio", | ||
324 | .data = &sysctl_sched_rt_ratio, | ||
325 | .maxlen = sizeof(unsigned int), | ||
326 | .mode = 0644, | ||
327 | .proc_handler = &proc_dointvec, | ||
328 | }, | ||
329 | #if defined(CONFIG_FAIR_GROUP_SCHED) && defined(CONFIG_SMP) | 314 | #if defined(CONFIG_FAIR_GROUP_SCHED) && defined(CONFIG_SMP) |
330 | { | 315 | { |
331 | .ctl_name = CTL_UNNUMBERED, | 316 | .ctl_name = CTL_UNNUMBERED, |
@@ -347,6 +332,22 @@ static struct ctl_table kern_table[] = { | |||
347 | #endif | 332 | #endif |
348 | { | 333 | { |
349 | .ctl_name = CTL_UNNUMBERED, | 334 | .ctl_name = CTL_UNNUMBERED, |
335 | .procname = "sched_rt_period_us", | ||
336 | .data = &sysctl_sched_rt_period, | ||
337 | .maxlen = sizeof(unsigned int), | ||
338 | .mode = 0644, | ||
339 | .proc_handler = &proc_dointvec, | ||
340 | }, | ||
341 | { | ||
342 | .ctl_name = CTL_UNNUMBERED, | ||
343 | .procname = "sched_rt_runtime_us", | ||
344 | .data = &sysctl_sched_rt_runtime, | ||
345 | .maxlen = sizeof(int), | ||
346 | .mode = 0644, | ||
347 | .proc_handler = &proc_dointvec, | ||
348 | }, | ||
349 | { | ||
350 | .ctl_name = CTL_UNNUMBERED, | ||
350 | .procname = "sched_compat_yield", | 351 | .procname = "sched_compat_yield", |
351 | .data = &sysctl_sched_compat_yield, | 352 | .data = &sysctl_sched_compat_yield, |
352 | .maxlen = sizeof(unsigned int), | 353 | .maxlen = sizeof(unsigned int), |
@@ -416,15 +417,6 @@ static struct ctl_table kern_table[] = { | |||
416 | .proc_handler = &proc_dointvec, | 417 | .proc_handler = &proc_dointvec, |
417 | }, | 418 | }, |
418 | #endif | 419 | #endif |
419 | #ifdef CONFIG_SECURITY_CAPABILITIES | ||
420 | { | ||
421 | .procname = "cap-bound", | ||
422 | .data = &cap_bset, | ||
423 | .maxlen = sizeof(kernel_cap_t), | ||
424 | .mode = 0600, | ||
425 | .proc_handler = &proc_dointvec_bset, | ||
426 | }, | ||
427 | #endif /* def CONFIG_SECURITY_CAPABILITIES */ | ||
428 | #ifdef CONFIG_BLK_DEV_INITRD | 420 | #ifdef CONFIG_BLK_DEV_INITRD |
429 | { | 421 | { |
430 | .ctl_name = KERN_REALROOTDEV, | 422 | .ctl_name = KERN_REALROOTDEV, |
@@ -496,14 +488,6 @@ static struct ctl_table kern_table[] = { | |||
496 | .mode = 0644, | 488 | .mode = 0644, |
497 | .proc_handler = &proc_dointvec, | 489 | .proc_handler = &proc_dointvec, |
498 | }, | 490 | }, |
499 | { | ||
500 | .ctl_name = KERN_PRINTK, | ||
501 | .procname = "printk", | ||
502 | .data = &console_loglevel, | ||
503 | .maxlen = 4*sizeof(int), | ||
504 | .mode = 0644, | ||
505 | .proc_handler = &proc_dointvec, | ||
506 | }, | ||
507 | #ifdef CONFIG_KMOD | 491 | #ifdef CONFIG_KMOD |
508 | { | 492 | { |
509 | .ctl_name = KERN_MODPROBE, | 493 | .ctl_name = KERN_MODPROBE, |
@@ -650,6 +634,15 @@ static struct ctl_table kern_table[] = { | |||
650 | .mode = 0644, | 634 | .mode = 0644, |
651 | .proc_handler = &proc_dointvec, | 635 | .proc_handler = &proc_dointvec, |
652 | }, | 636 | }, |
637 | #if defined CONFIG_PRINTK | ||
638 | { | ||
639 | .ctl_name = KERN_PRINTK, | ||
640 | .procname = "printk", | ||
641 | .data = &console_loglevel, | ||
642 | .maxlen = 4*sizeof(int), | ||
643 | .mode = 0644, | ||
644 | .proc_handler = &proc_dointvec, | ||
645 | }, | ||
653 | { | 646 | { |
654 | .ctl_name = KERN_PRINTK_RATELIMIT, | 647 | .ctl_name = KERN_PRINTK_RATELIMIT, |
655 | .procname = "printk_ratelimit", | 648 | .procname = "printk_ratelimit", |
@@ -667,6 +660,7 @@ static struct ctl_table kern_table[] = { | |||
667 | .mode = 0644, | 660 | .mode = 0644, |
668 | .proc_handler = &proc_dointvec, | 661 | .proc_handler = &proc_dointvec, |
669 | }, | 662 | }, |
663 | #endif | ||
670 | { | 664 | { |
671 | .ctl_name = KERN_NGROUPS_MAX, | 665 | .ctl_name = KERN_NGROUPS_MAX, |
672 | .procname = "ngroups_max", | 666 | .procname = "ngroups_max", |
@@ -877,6 +871,14 @@ static struct ctl_table vm_table[] = { | |||
877 | .proc_handler = &proc_dointvec, | 871 | .proc_handler = &proc_dointvec, |
878 | }, | 872 | }, |
879 | { | 873 | { |
874 | .ctl_name = CTL_UNNUMBERED, | ||
875 | .procname = "oom_dump_tasks", | ||
876 | .data = &sysctl_oom_dump_tasks, | ||
877 | .maxlen = sizeof(sysctl_oom_dump_tasks), | ||
878 | .mode = 0644, | ||
879 | .proc_handler = &proc_dointvec, | ||
880 | }, | ||
881 | { | ||
880 | .ctl_name = VM_OVERCOMMIT_RATIO, | 882 | .ctl_name = VM_OVERCOMMIT_RATIO, |
881 | .procname = "overcommit_ratio", | 883 | .procname = "overcommit_ratio", |
882 | .data = &sysctl_overcommit_ratio, | 884 | .data = &sysctl_overcommit_ratio, |
@@ -976,10 +978,10 @@ static struct ctl_table vm_table[] = { | |||
976 | { | 978 | { |
977 | .ctl_name = CTL_UNNUMBERED, | 979 | .ctl_name = CTL_UNNUMBERED, |
978 | .procname = "nr_overcommit_hugepages", | 980 | .procname = "nr_overcommit_hugepages", |
979 | .data = &nr_overcommit_huge_pages, | 981 | .data = &sysctl_overcommit_huge_pages, |
980 | .maxlen = sizeof(nr_overcommit_huge_pages), | 982 | .maxlen = sizeof(sysctl_overcommit_huge_pages), |
981 | .mode = 0644, | 983 | .mode = 0644, |
982 | .proc_handler = &proc_doulongvec_minmax, | 984 | .proc_handler = &hugetlb_overcommit_handler, |
983 | }, | 985 | }, |
984 | #endif | 986 | #endif |
985 | { | 987 | { |
@@ -1150,6 +1152,19 @@ static struct ctl_table vm_table[] = { | |||
1150 | .extra1 = &zero, | 1152 | .extra1 = &zero, |
1151 | }, | 1153 | }, |
1152 | #endif | 1154 | #endif |
1155 | #ifdef CONFIG_HIGHMEM | ||
1156 | { | ||
1157 | .ctl_name = CTL_UNNUMBERED, | ||
1158 | .procname = "highmem_is_dirtyable", | ||
1159 | .data = &vm_highmem_is_dirtyable, | ||
1160 | .maxlen = sizeof(vm_highmem_is_dirtyable), | ||
1161 | .mode = 0644, | ||
1162 | .proc_handler = &proc_dointvec_minmax, | ||
1163 | .strategy = &sysctl_intvec, | ||
1164 | .extra1 = &zero, | ||
1165 | .extra2 = &one, | ||
1166 | }, | ||
1167 | #endif | ||
1153 | /* | 1168 | /* |
1154 | * NOTE: do not add new entries to this table unless you have read | 1169 | * NOTE: do not add new entries to this table unless you have read |
1155 | * Documentation/sysctl/ctl_unnumbered.txt | 1170 | * Documentation/sysctl/ctl_unnumbered.txt |
@@ -1196,6 +1211,14 @@ static struct ctl_table fs_table[] = { | |||
1196 | .proc_handler = &proc_dointvec, | 1211 | .proc_handler = &proc_dointvec, |
1197 | }, | 1212 | }, |
1198 | { | 1213 | { |
1214 | .ctl_name = CTL_UNNUMBERED, | ||
1215 | .procname = "nr_open", | ||
1216 | .data = &sysctl_nr_open, | ||
1217 | .maxlen = sizeof(int), | ||
1218 | .mode = 0644, | ||
1219 | .proc_handler = &proc_dointvec, | ||
1220 | }, | ||
1221 | { | ||
1199 | .ctl_name = FS_DENTRY, | 1222 | .ctl_name = FS_DENTRY, |
1200 | .procname = "dentry-state", | 1223 | .procname = "dentry-state", |
1201 | .data = &dentry_stat, | 1224 | .data = &dentry_stat, |
@@ -2080,26 +2103,6 @@ static int do_proc_dointvec_bset_conv(int *negp, unsigned long *lvalp, | |||
2080 | return 0; | 2103 | return 0; |
2081 | } | 2104 | } |
2082 | 2105 | ||
2083 | #ifdef CONFIG_SECURITY_CAPABILITIES | ||
2084 | /* | ||
2085 | * init may raise the set. | ||
2086 | */ | ||
2087 | |||
2088 | int proc_dointvec_bset(struct ctl_table *table, int write, struct file *filp, | ||
2089 | void __user *buffer, size_t *lenp, loff_t *ppos) | ||
2090 | { | ||
2091 | int op; | ||
2092 | |||
2093 | if (write && !capable(CAP_SYS_MODULE)) { | ||
2094 | return -EPERM; | ||
2095 | } | ||
2096 | |||
2097 | op = is_global_init(current) ? OP_SET : OP_AND; | ||
2098 | return do_proc_dointvec(table,write,filp,buffer,lenp,ppos, | ||
2099 | do_proc_dointvec_bset_conv,&op); | ||
2100 | } | ||
2101 | #endif /* def CONFIG_SECURITY_CAPABILITIES */ | ||
2102 | |||
2103 | /* | 2106 | /* |
2104 | * Taint values can only be increased | 2107 | * Taint values can only be increased |
2105 | */ | 2108 | */ |
@@ -2484,7 +2487,7 @@ static int proc_do_cad_pid(struct ctl_table *table, int write, struct file *filp | |||
2484 | pid_t tmp; | 2487 | pid_t tmp; |
2485 | int r; | 2488 | int r; |
2486 | 2489 | ||
2487 | tmp = pid_nr_ns(cad_pid, current->nsproxy->pid_ns); | 2490 | tmp = pid_vnr(cad_pid); |
2488 | 2491 | ||
2489 | r = __do_proc_dointvec(&tmp, table, write, filp, buffer, | 2492 | r = __do_proc_dointvec(&tmp, table, write, filp, buffer, |
2490 | lenp, ppos, NULL, NULL); | 2493 | lenp, ppos, NULL, NULL); |
@@ -2513,12 +2516,6 @@ int proc_dointvec(struct ctl_table *table, int write, struct file *filp, | |||
2513 | return -ENOSYS; | 2516 | return -ENOSYS; |
2514 | } | 2517 | } |
2515 | 2518 | ||
2516 | int proc_dointvec_bset(struct ctl_table *table, int write, struct file *filp, | ||
2517 | void __user *buffer, size_t *lenp, loff_t *ppos) | ||
2518 | { | ||
2519 | return -ENOSYS; | ||
2520 | } | ||
2521 | |||
2522 | int proc_dointvec_minmax(struct ctl_table *table, int write, struct file *filp, | 2519 | int proc_dointvec_minmax(struct ctl_table *table, int write, struct file *filp, |
2523 | void __user *buffer, size_t *lenp, loff_t *ppos) | 2520 | void __user *buffer, size_t *lenp, loff_t *ppos) |
2524 | { | 2521 | { |
diff --git a/kernel/sysctl_check.c b/kernel/sysctl_check.c index c3206fa50048..c09350d564f2 100644 --- a/kernel/sysctl_check.c +++ b/kernel/sysctl_check.c | |||
@@ -8,10 +8,10 @@ | |||
8 | struct trans_ctl_table { | 8 | struct trans_ctl_table { |
9 | int ctl_name; | 9 | int ctl_name; |
10 | const char *procname; | 10 | const char *procname; |
11 | struct trans_ctl_table *child; | 11 | const struct trans_ctl_table *child; |
12 | }; | 12 | }; |
13 | 13 | ||
14 | static struct trans_ctl_table trans_random_table[] = { | 14 | static const struct trans_ctl_table trans_random_table[] = { |
15 | { RANDOM_POOLSIZE, "poolsize" }, | 15 | { RANDOM_POOLSIZE, "poolsize" }, |
16 | { RANDOM_ENTROPY_COUNT, "entropy_avail" }, | 16 | { RANDOM_ENTROPY_COUNT, "entropy_avail" }, |
17 | { RANDOM_READ_THRESH, "read_wakeup_threshold" }, | 17 | { RANDOM_READ_THRESH, "read_wakeup_threshold" }, |
@@ -21,13 +21,13 @@ static struct trans_ctl_table trans_random_table[] = { | |||
21 | {} | 21 | {} |
22 | }; | 22 | }; |
23 | 23 | ||
24 | static struct trans_ctl_table trans_pty_table[] = { | 24 | static const struct trans_ctl_table trans_pty_table[] = { |
25 | { PTY_MAX, "max" }, | 25 | { PTY_MAX, "max" }, |
26 | { PTY_NR, "nr" }, | 26 | { PTY_NR, "nr" }, |
27 | {} | 27 | {} |
28 | }; | 28 | }; |
29 | 29 | ||
30 | static struct trans_ctl_table trans_kern_table[] = { | 30 | static const struct trans_ctl_table trans_kern_table[] = { |
31 | { KERN_OSTYPE, "ostype" }, | 31 | { KERN_OSTYPE, "ostype" }, |
32 | { KERN_OSRELEASE, "osrelease" }, | 32 | { KERN_OSRELEASE, "osrelease" }, |
33 | /* KERN_OSREV not used */ | 33 | /* KERN_OSREV not used */ |
@@ -37,10 +37,6 @@ static struct trans_ctl_table trans_kern_table[] = { | |||
37 | { KERN_NODENAME, "hostname" }, | 37 | { KERN_NODENAME, "hostname" }, |
38 | { KERN_DOMAINNAME, "domainname" }, | 38 | { KERN_DOMAINNAME, "domainname" }, |
39 | 39 | ||
40 | #ifdef CONFIG_SECURITY_CAPABILITIES | ||
41 | { KERN_CAP_BSET, "cap-bound" }, | ||
42 | #endif /* def CONFIG_SECURITY_CAPABILITIES */ | ||
43 | |||
44 | { KERN_PANIC, "panic" }, | 40 | { KERN_PANIC, "panic" }, |
45 | { KERN_REALROOTDEV, "real-root-dev" }, | 41 | { KERN_REALROOTDEV, "real-root-dev" }, |
46 | 42 | ||
@@ -111,7 +107,7 @@ static struct trans_ctl_table trans_kern_table[] = { | |||
111 | {} | 107 | {} |
112 | }; | 108 | }; |
113 | 109 | ||
114 | static struct trans_ctl_table trans_vm_table[] = { | 110 | static const struct trans_ctl_table trans_vm_table[] = { |
115 | { VM_OVERCOMMIT_MEMORY, "overcommit_memory" }, | 111 | { VM_OVERCOMMIT_MEMORY, "overcommit_memory" }, |
116 | { VM_PAGE_CLUSTER, "page-cluster" }, | 112 | { VM_PAGE_CLUSTER, "page-cluster" }, |
117 | { VM_DIRTY_BACKGROUND, "dirty_background_ratio" }, | 113 | { VM_DIRTY_BACKGROUND, "dirty_background_ratio" }, |
@@ -143,7 +139,7 @@ static struct trans_ctl_table trans_vm_table[] = { | |||
143 | {} | 139 | {} |
144 | }; | 140 | }; |
145 | 141 | ||
146 | static struct trans_ctl_table trans_net_core_table[] = { | 142 | static const struct trans_ctl_table trans_net_core_table[] = { |
147 | { NET_CORE_WMEM_MAX, "wmem_max" }, | 143 | { NET_CORE_WMEM_MAX, "wmem_max" }, |
148 | { NET_CORE_RMEM_MAX, "rmem_max" }, | 144 | { NET_CORE_RMEM_MAX, "rmem_max" }, |
149 | { NET_CORE_WMEM_DEFAULT, "wmem_default" }, | 145 | { NET_CORE_WMEM_DEFAULT, "wmem_default" }, |
@@ -169,14 +165,14 @@ static struct trans_ctl_table trans_net_core_table[] = { | |||
169 | {}, | 165 | {}, |
170 | }; | 166 | }; |
171 | 167 | ||
172 | static struct trans_ctl_table trans_net_unix_table[] = { | 168 | static const struct trans_ctl_table trans_net_unix_table[] = { |
173 | /* NET_UNIX_DESTROY_DELAY unused */ | 169 | /* NET_UNIX_DESTROY_DELAY unused */ |
174 | /* NET_UNIX_DELETE_DELAY unused */ | 170 | /* NET_UNIX_DELETE_DELAY unused */ |
175 | { NET_UNIX_MAX_DGRAM_QLEN, "max_dgram_qlen" }, | 171 | { NET_UNIX_MAX_DGRAM_QLEN, "max_dgram_qlen" }, |
176 | {} | 172 | {} |
177 | }; | 173 | }; |
178 | 174 | ||
179 | static struct trans_ctl_table trans_net_ipv4_route_table[] = { | 175 | static const struct trans_ctl_table trans_net_ipv4_route_table[] = { |
180 | { NET_IPV4_ROUTE_FLUSH, "flush" }, | 176 | { NET_IPV4_ROUTE_FLUSH, "flush" }, |
181 | { NET_IPV4_ROUTE_MIN_DELAY, "min_delay" }, | 177 | { NET_IPV4_ROUTE_MIN_DELAY, "min_delay" }, |
182 | { NET_IPV4_ROUTE_MAX_DELAY, "max_delay" }, | 178 | { NET_IPV4_ROUTE_MAX_DELAY, "max_delay" }, |
@@ -199,7 +195,7 @@ static struct trans_ctl_table trans_net_ipv4_route_table[] = { | |||
199 | {} | 195 | {} |
200 | }; | 196 | }; |
201 | 197 | ||
202 | static struct trans_ctl_table trans_net_ipv4_conf_vars_table[] = { | 198 | static const struct trans_ctl_table trans_net_ipv4_conf_vars_table[] = { |
203 | { NET_IPV4_CONF_FORWARDING, "forwarding" }, | 199 | { NET_IPV4_CONF_FORWARDING, "forwarding" }, |
204 | { NET_IPV4_CONF_MC_FORWARDING, "mc_forwarding" }, | 200 | { NET_IPV4_CONF_MC_FORWARDING, "mc_forwarding" }, |
205 | 201 | ||
@@ -226,14 +222,14 @@ static struct trans_ctl_table trans_net_ipv4_conf_vars_table[] = { | |||
226 | {} | 222 | {} |
227 | }; | 223 | }; |
228 | 224 | ||
229 | static struct trans_ctl_table trans_net_ipv4_conf_table[] = { | 225 | static const struct trans_ctl_table trans_net_ipv4_conf_table[] = { |
230 | { NET_PROTO_CONF_ALL, "all", trans_net_ipv4_conf_vars_table }, | 226 | { NET_PROTO_CONF_ALL, "all", trans_net_ipv4_conf_vars_table }, |
231 | { NET_PROTO_CONF_DEFAULT, "default", trans_net_ipv4_conf_vars_table }, | 227 | { NET_PROTO_CONF_DEFAULT, "default", trans_net_ipv4_conf_vars_table }, |
232 | { 0, NULL, trans_net_ipv4_conf_vars_table }, | 228 | { 0, NULL, trans_net_ipv4_conf_vars_table }, |
233 | {} | 229 | {} |
234 | }; | 230 | }; |
235 | 231 | ||
236 | static struct trans_ctl_table trans_net_neigh_vars_table[] = { | 232 | static const struct trans_ctl_table trans_net_neigh_vars_table[] = { |
237 | { NET_NEIGH_MCAST_SOLICIT, "mcast_solicit" }, | 233 | { NET_NEIGH_MCAST_SOLICIT, "mcast_solicit" }, |
238 | { NET_NEIGH_UCAST_SOLICIT, "ucast_solicit" }, | 234 | { NET_NEIGH_UCAST_SOLICIT, "ucast_solicit" }, |
239 | { NET_NEIGH_APP_SOLICIT, "app_solicit" }, | 235 | { NET_NEIGH_APP_SOLICIT, "app_solicit" }, |
@@ -255,13 +251,13 @@ static struct trans_ctl_table trans_net_neigh_vars_table[] = { | |||
255 | {} | 251 | {} |
256 | }; | 252 | }; |
257 | 253 | ||
258 | static struct trans_ctl_table trans_net_neigh_table[] = { | 254 | static const struct trans_ctl_table trans_net_neigh_table[] = { |
259 | { NET_PROTO_CONF_DEFAULT, "default", trans_net_neigh_vars_table }, | 255 | { NET_PROTO_CONF_DEFAULT, "default", trans_net_neigh_vars_table }, |
260 | { 0, NULL, trans_net_neigh_vars_table }, | 256 | { 0, NULL, trans_net_neigh_vars_table }, |
261 | {} | 257 | {} |
262 | }; | 258 | }; |
263 | 259 | ||
264 | static struct trans_ctl_table trans_net_ipv4_netfilter_table[] = { | 260 | static const struct trans_ctl_table trans_net_ipv4_netfilter_table[] = { |
265 | { NET_IPV4_NF_CONNTRACK_MAX, "ip_conntrack_max" }, | 261 | { NET_IPV4_NF_CONNTRACK_MAX, "ip_conntrack_max" }, |
266 | 262 | ||
267 | { NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_SYN_SENT, "ip_conntrack_tcp_timeout_syn_sent" }, | 263 | { NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_SYN_SENT, "ip_conntrack_tcp_timeout_syn_sent" }, |
@@ -298,7 +294,7 @@ static struct trans_ctl_table trans_net_ipv4_netfilter_table[] = { | |||
298 | {} | 294 | {} |
299 | }; | 295 | }; |
300 | 296 | ||
301 | static struct trans_ctl_table trans_net_ipv4_table[] = { | 297 | static const struct trans_ctl_table trans_net_ipv4_table[] = { |
302 | { NET_IPV4_FORWARD, "ip_forward" }, | 298 | { NET_IPV4_FORWARD, "ip_forward" }, |
303 | { NET_IPV4_DYNADDR, "ip_dynaddr" }, | 299 | { NET_IPV4_DYNADDR, "ip_dynaddr" }, |
304 | 300 | ||
@@ -397,13 +393,13 @@ static struct trans_ctl_table trans_net_ipv4_table[] = { | |||
397 | {} | 393 | {} |
398 | }; | 394 | }; |
399 | 395 | ||
400 | static struct trans_ctl_table trans_net_ipx_table[] = { | 396 | static const struct trans_ctl_table trans_net_ipx_table[] = { |
401 | { NET_IPX_PPROP_BROADCASTING, "ipx_pprop_broadcasting" }, | 397 | { NET_IPX_PPROP_BROADCASTING, "ipx_pprop_broadcasting" }, |
402 | /* NET_IPX_FORWARDING unused */ | 398 | /* NET_IPX_FORWARDING unused */ |
403 | {} | 399 | {} |
404 | }; | 400 | }; |
405 | 401 | ||
406 | static struct trans_ctl_table trans_net_atalk_table[] = { | 402 | static const struct trans_ctl_table trans_net_atalk_table[] = { |
407 | { NET_ATALK_AARP_EXPIRY_TIME, "aarp-expiry-time" }, | 403 | { NET_ATALK_AARP_EXPIRY_TIME, "aarp-expiry-time" }, |
408 | { NET_ATALK_AARP_TICK_TIME, "aarp-tick-time" }, | 404 | { NET_ATALK_AARP_TICK_TIME, "aarp-tick-time" }, |
409 | { NET_ATALK_AARP_RETRANSMIT_LIMIT, "aarp-retransmit-limit" }, | 405 | { NET_ATALK_AARP_RETRANSMIT_LIMIT, "aarp-retransmit-limit" }, |
@@ -411,7 +407,7 @@ static struct trans_ctl_table trans_net_atalk_table[] = { | |||
411 | {}, | 407 | {}, |
412 | }; | 408 | }; |
413 | 409 | ||
414 | static struct trans_ctl_table trans_net_netrom_table[] = { | 410 | static const struct trans_ctl_table trans_net_netrom_table[] = { |
415 | { NET_NETROM_DEFAULT_PATH_QUALITY, "default_path_quality" }, | 411 | { NET_NETROM_DEFAULT_PATH_QUALITY, "default_path_quality" }, |
416 | { NET_NETROM_OBSOLESCENCE_COUNT_INITIALISER, "obsolescence_count_initialiser" }, | 412 | { NET_NETROM_OBSOLESCENCE_COUNT_INITIALISER, "obsolescence_count_initialiser" }, |
417 | { NET_NETROM_NETWORK_TTL_INITIALISER, "network_ttl_initialiser" }, | 413 | { NET_NETROM_NETWORK_TTL_INITIALISER, "network_ttl_initialiser" }, |
@@ -427,7 +423,7 @@ static struct trans_ctl_table trans_net_netrom_table[] = { | |||
427 | {} | 423 | {} |
428 | }; | 424 | }; |
429 | 425 | ||
430 | static struct trans_ctl_table trans_net_ax25_param_table[] = { | 426 | static const struct trans_ctl_table trans_net_ax25_param_table[] = { |
431 | { NET_AX25_IP_DEFAULT_MODE, "ip_default_mode" }, | 427 | { NET_AX25_IP_DEFAULT_MODE, "ip_default_mode" }, |
432 | { NET_AX25_DEFAULT_MODE, "ax25_default_mode" }, | 428 | { NET_AX25_DEFAULT_MODE, "ax25_default_mode" }, |
433 | { NET_AX25_BACKOFF_TYPE, "backoff_type" }, | 429 | { NET_AX25_BACKOFF_TYPE, "backoff_type" }, |
@@ -445,12 +441,12 @@ static struct trans_ctl_table trans_net_ax25_param_table[] = { | |||
445 | {} | 441 | {} |
446 | }; | 442 | }; |
447 | 443 | ||
448 | static struct trans_ctl_table trans_net_ax25_table[] = { | 444 | static const struct trans_ctl_table trans_net_ax25_table[] = { |
449 | { 0, NULL, trans_net_ax25_param_table }, | 445 | { 0, NULL, trans_net_ax25_param_table }, |
450 | {} | 446 | {} |
451 | }; | 447 | }; |
452 | 448 | ||
453 | static struct trans_ctl_table trans_net_bridge_table[] = { | 449 | static const struct trans_ctl_table trans_net_bridge_table[] = { |
454 | { NET_BRIDGE_NF_CALL_ARPTABLES, "bridge-nf-call-arptables" }, | 450 | { NET_BRIDGE_NF_CALL_ARPTABLES, "bridge-nf-call-arptables" }, |
455 | { NET_BRIDGE_NF_CALL_IPTABLES, "bridge-nf-call-iptables" }, | 451 | { NET_BRIDGE_NF_CALL_IPTABLES, "bridge-nf-call-iptables" }, |
456 | { NET_BRIDGE_NF_CALL_IP6TABLES, "bridge-nf-call-ip6tables" }, | 452 | { NET_BRIDGE_NF_CALL_IP6TABLES, "bridge-nf-call-ip6tables" }, |
@@ -459,7 +455,7 @@ static struct trans_ctl_table trans_net_bridge_table[] = { | |||
459 | {} | 455 | {} |
460 | }; | 456 | }; |
461 | 457 | ||
462 | static struct trans_ctl_table trans_net_rose_table[] = { | 458 | static const struct trans_ctl_table trans_net_rose_table[] = { |
463 | { NET_ROSE_RESTART_REQUEST_TIMEOUT, "restart_request_timeout" }, | 459 | { NET_ROSE_RESTART_REQUEST_TIMEOUT, "restart_request_timeout" }, |
464 | { NET_ROSE_CALL_REQUEST_TIMEOUT, "call_request_timeout" }, | 460 | { NET_ROSE_CALL_REQUEST_TIMEOUT, "call_request_timeout" }, |
465 | { NET_ROSE_RESET_REQUEST_TIMEOUT, "reset_request_timeout" }, | 461 | { NET_ROSE_RESET_REQUEST_TIMEOUT, "reset_request_timeout" }, |
@@ -473,7 +469,7 @@ static struct trans_ctl_table trans_net_rose_table[] = { | |||
473 | {} | 469 | {} |
474 | }; | 470 | }; |
475 | 471 | ||
476 | static struct trans_ctl_table trans_net_ipv6_conf_var_table[] = { | 472 | static const struct trans_ctl_table trans_net_ipv6_conf_var_table[] = { |
477 | { NET_IPV6_FORWARDING, "forwarding" }, | 473 | { NET_IPV6_FORWARDING, "forwarding" }, |
478 | { NET_IPV6_HOP_LIMIT, "hop_limit" }, | 474 | { NET_IPV6_HOP_LIMIT, "hop_limit" }, |
479 | { NET_IPV6_MTU, "mtu" }, | 475 | { NET_IPV6_MTU, "mtu" }, |
@@ -501,14 +497,14 @@ static struct trans_ctl_table trans_net_ipv6_conf_var_table[] = { | |||
501 | {} | 497 | {} |
502 | }; | 498 | }; |
503 | 499 | ||
504 | static struct trans_ctl_table trans_net_ipv6_conf_table[] = { | 500 | static const struct trans_ctl_table trans_net_ipv6_conf_table[] = { |
505 | { NET_PROTO_CONF_ALL, "all", trans_net_ipv6_conf_var_table }, | 501 | { NET_PROTO_CONF_ALL, "all", trans_net_ipv6_conf_var_table }, |
506 | { NET_PROTO_CONF_DEFAULT, "default", trans_net_ipv6_conf_var_table }, | 502 | { NET_PROTO_CONF_DEFAULT, "default", trans_net_ipv6_conf_var_table }, |
507 | { 0, NULL, trans_net_ipv6_conf_var_table }, | 503 | { 0, NULL, trans_net_ipv6_conf_var_table }, |
508 | {} | 504 | {} |
509 | }; | 505 | }; |
510 | 506 | ||
511 | static struct trans_ctl_table trans_net_ipv6_route_table[] = { | 507 | static const struct trans_ctl_table trans_net_ipv6_route_table[] = { |
512 | { NET_IPV6_ROUTE_FLUSH, "flush" }, | 508 | { NET_IPV6_ROUTE_FLUSH, "flush" }, |
513 | { NET_IPV6_ROUTE_GC_THRESH, "gc_thresh" }, | 509 | { NET_IPV6_ROUTE_GC_THRESH, "gc_thresh" }, |
514 | { NET_IPV6_ROUTE_MAX_SIZE, "max_size" }, | 510 | { NET_IPV6_ROUTE_MAX_SIZE, "max_size" }, |
@@ -522,12 +518,12 @@ static struct trans_ctl_table trans_net_ipv6_route_table[] = { | |||
522 | {} | 518 | {} |
523 | }; | 519 | }; |
524 | 520 | ||
525 | static struct trans_ctl_table trans_net_ipv6_icmp_table[] = { | 521 | static const struct trans_ctl_table trans_net_ipv6_icmp_table[] = { |
526 | { NET_IPV6_ICMP_RATELIMIT, "ratelimit" }, | 522 | { NET_IPV6_ICMP_RATELIMIT, "ratelimit" }, |
527 | {} | 523 | {} |
528 | }; | 524 | }; |
529 | 525 | ||
530 | static struct trans_ctl_table trans_net_ipv6_table[] = { | 526 | static const struct trans_ctl_table trans_net_ipv6_table[] = { |
531 | { NET_IPV6_CONF, "conf", trans_net_ipv6_conf_table }, | 527 | { NET_IPV6_CONF, "conf", trans_net_ipv6_conf_table }, |
532 | { NET_IPV6_NEIGH, "neigh", trans_net_neigh_table }, | 528 | { NET_IPV6_NEIGH, "neigh", trans_net_neigh_table }, |
533 | { NET_IPV6_ROUTE, "route", trans_net_ipv6_route_table }, | 529 | { NET_IPV6_ROUTE, "route", trans_net_ipv6_route_table }, |
@@ -542,7 +538,7 @@ static struct trans_ctl_table trans_net_ipv6_table[] = { | |||
542 | {} | 538 | {} |
543 | }; | 539 | }; |
544 | 540 | ||
545 | static struct trans_ctl_table trans_net_x25_table[] = { | 541 | static const struct trans_ctl_table trans_net_x25_table[] = { |
546 | { NET_X25_RESTART_REQUEST_TIMEOUT, "restart_request_timeout" }, | 542 | { NET_X25_RESTART_REQUEST_TIMEOUT, "restart_request_timeout" }, |
547 | { NET_X25_CALL_REQUEST_TIMEOUT, "call_request_timeout" }, | 543 | { NET_X25_CALL_REQUEST_TIMEOUT, "call_request_timeout" }, |
548 | { NET_X25_RESET_REQUEST_TIMEOUT, "reset_request_timeout" }, | 544 | { NET_X25_RESET_REQUEST_TIMEOUT, "reset_request_timeout" }, |
@@ -552,13 +548,13 @@ static struct trans_ctl_table trans_net_x25_table[] = { | |||
552 | {} | 548 | {} |
553 | }; | 549 | }; |
554 | 550 | ||
555 | static struct trans_ctl_table trans_net_tr_table[] = { | 551 | static const struct trans_ctl_table trans_net_tr_table[] = { |
556 | { NET_TR_RIF_TIMEOUT, "rif_timeout" }, | 552 | { NET_TR_RIF_TIMEOUT, "rif_timeout" }, |
557 | {} | 553 | {} |
558 | }; | 554 | }; |
559 | 555 | ||
560 | 556 | ||
561 | static struct trans_ctl_table trans_net_decnet_conf_vars[] = { | 557 | static const struct trans_ctl_table trans_net_decnet_conf_vars[] = { |
562 | { NET_DECNET_CONF_DEV_FORWARDING, "forwarding" }, | 558 | { NET_DECNET_CONF_DEV_FORWARDING, "forwarding" }, |
563 | { NET_DECNET_CONF_DEV_PRIORITY, "priority" }, | 559 | { NET_DECNET_CONF_DEV_PRIORITY, "priority" }, |
564 | { NET_DECNET_CONF_DEV_T2, "t2" }, | 560 | { NET_DECNET_CONF_DEV_T2, "t2" }, |
@@ -566,12 +562,12 @@ static struct trans_ctl_table trans_net_decnet_conf_vars[] = { | |||
566 | {} | 562 | {} |
567 | }; | 563 | }; |
568 | 564 | ||
569 | static struct trans_ctl_table trans_net_decnet_conf[] = { | 565 | static const struct trans_ctl_table trans_net_decnet_conf[] = { |
570 | { 0, NULL, trans_net_decnet_conf_vars }, | 566 | { 0, NULL, trans_net_decnet_conf_vars }, |
571 | {} | 567 | {} |
572 | }; | 568 | }; |
573 | 569 | ||
574 | static struct trans_ctl_table trans_net_decnet_table[] = { | 570 | static const struct trans_ctl_table trans_net_decnet_table[] = { |
575 | { NET_DECNET_CONF, "conf", trans_net_decnet_conf }, | 571 | { NET_DECNET_CONF, "conf", trans_net_decnet_conf }, |
576 | { NET_DECNET_NODE_ADDRESS, "node_address" }, | 572 | { NET_DECNET_NODE_ADDRESS, "node_address" }, |
577 | { NET_DECNET_NODE_NAME, "node_name" }, | 573 | { NET_DECNET_NODE_NAME, "node_name" }, |
@@ -589,7 +585,7 @@ static struct trans_ctl_table trans_net_decnet_table[] = { | |||
589 | {} | 585 | {} |
590 | }; | 586 | }; |
591 | 587 | ||
592 | static struct trans_ctl_table trans_net_sctp_table[] = { | 588 | static const struct trans_ctl_table trans_net_sctp_table[] = { |
593 | { NET_SCTP_RTO_INITIAL, "rto_initial" }, | 589 | { NET_SCTP_RTO_INITIAL, "rto_initial" }, |
594 | { NET_SCTP_RTO_MIN, "rto_min" }, | 590 | { NET_SCTP_RTO_MIN, "rto_min" }, |
595 | { NET_SCTP_RTO_MAX, "rto_max" }, | 591 | { NET_SCTP_RTO_MAX, "rto_max" }, |
@@ -610,7 +606,7 @@ static struct trans_ctl_table trans_net_sctp_table[] = { | |||
610 | {} | 606 | {} |
611 | }; | 607 | }; |
612 | 608 | ||
613 | static struct trans_ctl_table trans_net_llc_llc2_timeout_table[] = { | 609 | static const struct trans_ctl_table trans_net_llc_llc2_timeout_table[] = { |
614 | { NET_LLC2_ACK_TIMEOUT, "ack" }, | 610 | { NET_LLC2_ACK_TIMEOUT, "ack" }, |
615 | { NET_LLC2_P_TIMEOUT, "p" }, | 611 | { NET_LLC2_P_TIMEOUT, "p" }, |
616 | { NET_LLC2_REJ_TIMEOUT, "rej" }, | 612 | { NET_LLC2_REJ_TIMEOUT, "rej" }, |
@@ -618,23 +614,23 @@ static struct trans_ctl_table trans_net_llc_llc2_timeout_table[] = { | |||
618 | {} | 614 | {} |
619 | }; | 615 | }; |
620 | 616 | ||
621 | static struct trans_ctl_table trans_net_llc_station_table[] = { | 617 | static const struct trans_ctl_table trans_net_llc_station_table[] = { |
622 | { NET_LLC_STATION_ACK_TIMEOUT, "ack_timeout" }, | 618 | { NET_LLC_STATION_ACK_TIMEOUT, "ack_timeout" }, |
623 | {} | 619 | {} |
624 | }; | 620 | }; |
625 | 621 | ||
626 | static struct trans_ctl_table trans_net_llc_llc2_table[] = { | 622 | static const struct trans_ctl_table trans_net_llc_llc2_table[] = { |
627 | { NET_LLC2, "timeout", trans_net_llc_llc2_timeout_table }, | 623 | { NET_LLC2, "timeout", trans_net_llc_llc2_timeout_table }, |
628 | {} | 624 | {} |
629 | }; | 625 | }; |
630 | 626 | ||
631 | static struct trans_ctl_table trans_net_llc_table[] = { | 627 | static const struct trans_ctl_table trans_net_llc_table[] = { |
632 | { NET_LLC2, "llc2", trans_net_llc_llc2_table }, | 628 | { NET_LLC2, "llc2", trans_net_llc_llc2_table }, |
633 | { NET_LLC_STATION, "station", trans_net_llc_station_table }, | 629 | { NET_LLC_STATION, "station", trans_net_llc_station_table }, |
634 | {} | 630 | {} |
635 | }; | 631 | }; |
636 | 632 | ||
637 | static struct trans_ctl_table trans_net_netfilter_table[] = { | 633 | static const struct trans_ctl_table trans_net_netfilter_table[] = { |
638 | { NET_NF_CONNTRACK_MAX, "nf_conntrack_max" }, | 634 | { NET_NF_CONNTRACK_MAX, "nf_conntrack_max" }, |
639 | { NET_NF_CONNTRACK_TCP_TIMEOUT_SYN_SENT, "nf_conntrack_tcp_timeout_syn_sent" }, | 635 | { NET_NF_CONNTRACK_TCP_TIMEOUT_SYN_SENT, "nf_conntrack_tcp_timeout_syn_sent" }, |
640 | { NET_NF_CONNTRACK_TCP_TIMEOUT_SYN_RECV, "nf_conntrack_tcp_timeout_syn_recv" }, | 636 | { NET_NF_CONNTRACK_TCP_TIMEOUT_SYN_RECV, "nf_conntrack_tcp_timeout_syn_recv" }, |
@@ -671,12 +667,12 @@ static struct trans_ctl_table trans_net_netfilter_table[] = { | |||
671 | {} | 667 | {} |
672 | }; | 668 | }; |
673 | 669 | ||
674 | static struct trans_ctl_table trans_net_dccp_table[] = { | 670 | static const struct trans_ctl_table trans_net_dccp_table[] = { |
675 | { NET_DCCP_DEFAULT, "default" }, | 671 | { NET_DCCP_DEFAULT, "default" }, |
676 | {} | 672 | {} |
677 | }; | 673 | }; |
678 | 674 | ||
679 | static struct trans_ctl_table trans_net_irda_table[] = { | 675 | static const struct trans_ctl_table trans_net_irda_table[] = { |
680 | { NET_IRDA_DISCOVERY, "discovery" }, | 676 | { NET_IRDA_DISCOVERY, "discovery" }, |
681 | { NET_IRDA_DEVNAME, "devname" }, | 677 | { NET_IRDA_DEVNAME, "devname" }, |
682 | { NET_IRDA_DEBUG, "debug" }, | 678 | { NET_IRDA_DEBUG, "debug" }, |
@@ -694,7 +690,7 @@ static struct trans_ctl_table trans_net_irda_table[] = { | |||
694 | {} | 690 | {} |
695 | }; | 691 | }; |
696 | 692 | ||
697 | static struct trans_ctl_table trans_net_table[] = { | 693 | static const struct trans_ctl_table trans_net_table[] = { |
698 | { NET_CORE, "core", trans_net_core_table }, | 694 | { NET_CORE, "core", trans_net_core_table }, |
699 | /* NET_ETHER not used */ | 695 | /* NET_ETHER not used */ |
700 | /* NET_802 not used */ | 696 | /* NET_802 not used */ |
@@ -720,7 +716,7 @@ static struct trans_ctl_table trans_net_table[] = { | |||
720 | {} | 716 | {} |
721 | }; | 717 | }; |
722 | 718 | ||
723 | static struct trans_ctl_table trans_fs_quota_table[] = { | 719 | static const struct trans_ctl_table trans_fs_quota_table[] = { |
724 | { FS_DQ_LOOKUPS, "lookups" }, | 720 | { FS_DQ_LOOKUPS, "lookups" }, |
725 | { FS_DQ_DROPS, "drops" }, | 721 | { FS_DQ_DROPS, "drops" }, |
726 | { FS_DQ_READS, "reads" }, | 722 | { FS_DQ_READS, "reads" }, |
@@ -733,7 +729,7 @@ static struct trans_ctl_table trans_fs_quota_table[] = { | |||
733 | {} | 729 | {} |
734 | }; | 730 | }; |
735 | 731 | ||
736 | static struct trans_ctl_table trans_fs_xfs_table[] = { | 732 | static const struct trans_ctl_table trans_fs_xfs_table[] = { |
737 | { XFS_RESTRICT_CHOWN, "restrict_chown" }, | 733 | { XFS_RESTRICT_CHOWN, "restrict_chown" }, |
738 | { XFS_SGID_INHERIT, "irix_sgid_inherit" }, | 734 | { XFS_SGID_INHERIT, "irix_sgid_inherit" }, |
739 | { XFS_SYMLINK_MODE, "irix_symlink_mode" }, | 735 | { XFS_SYMLINK_MODE, "irix_symlink_mode" }, |
@@ -754,24 +750,24 @@ static struct trans_ctl_table trans_fs_xfs_table[] = { | |||
754 | {} | 750 | {} |
755 | }; | 751 | }; |
756 | 752 | ||
757 | static struct trans_ctl_table trans_fs_ocfs2_nm_table[] = { | 753 | static const struct trans_ctl_table trans_fs_ocfs2_nm_table[] = { |
758 | { 1, "hb_ctl_path" }, | 754 | { 1, "hb_ctl_path" }, |
759 | {} | 755 | {} |
760 | }; | 756 | }; |
761 | 757 | ||
762 | static struct trans_ctl_table trans_fs_ocfs2_table[] = { | 758 | static const struct trans_ctl_table trans_fs_ocfs2_table[] = { |
763 | { 1, "nm", trans_fs_ocfs2_nm_table }, | 759 | { 1, "nm", trans_fs_ocfs2_nm_table }, |
764 | {} | 760 | {} |
765 | }; | 761 | }; |
766 | 762 | ||
767 | static struct trans_ctl_table trans_inotify_table[] = { | 763 | static const struct trans_ctl_table trans_inotify_table[] = { |
768 | { INOTIFY_MAX_USER_INSTANCES, "max_user_instances" }, | 764 | { INOTIFY_MAX_USER_INSTANCES, "max_user_instances" }, |
769 | { INOTIFY_MAX_USER_WATCHES, "max_user_watches" }, | 765 | { INOTIFY_MAX_USER_WATCHES, "max_user_watches" }, |
770 | { INOTIFY_MAX_QUEUED_EVENTS, "max_queued_events" }, | 766 | { INOTIFY_MAX_QUEUED_EVENTS, "max_queued_events" }, |
771 | {} | 767 | {} |
772 | }; | 768 | }; |
773 | 769 | ||
774 | static struct trans_ctl_table trans_fs_table[] = { | 770 | static const struct trans_ctl_table trans_fs_table[] = { |
775 | { FS_NRINODE, "inode-nr" }, | 771 | { FS_NRINODE, "inode-nr" }, |
776 | { FS_STATINODE, "inode-state" }, | 772 | { FS_STATINODE, "inode-state" }, |
777 | /* FS_MAXINODE unused */ | 773 | /* FS_MAXINODE unused */ |
@@ -797,11 +793,11 @@ static struct trans_ctl_table trans_fs_table[] = { | |||
797 | {} | 793 | {} |
798 | }; | 794 | }; |
799 | 795 | ||
800 | static struct trans_ctl_table trans_debug_table[] = { | 796 | static const struct trans_ctl_table trans_debug_table[] = { |
801 | {} | 797 | {} |
802 | }; | 798 | }; |
803 | 799 | ||
804 | static struct trans_ctl_table trans_cdrom_table[] = { | 800 | static const struct trans_ctl_table trans_cdrom_table[] = { |
805 | { DEV_CDROM_INFO, "info" }, | 801 | { DEV_CDROM_INFO, "info" }, |
806 | { DEV_CDROM_AUTOCLOSE, "autoclose" }, | 802 | { DEV_CDROM_AUTOCLOSE, "autoclose" }, |
807 | { DEV_CDROM_AUTOEJECT, "autoeject" }, | 803 | { DEV_CDROM_AUTOEJECT, "autoeject" }, |
@@ -811,12 +807,12 @@ static struct trans_ctl_table trans_cdrom_table[] = { | |||
811 | {} | 807 | {} |
812 | }; | 808 | }; |
813 | 809 | ||
814 | static struct trans_ctl_table trans_ipmi_table[] = { | 810 | static const struct trans_ctl_table trans_ipmi_table[] = { |
815 | { DEV_IPMI_POWEROFF_POWERCYCLE, "poweroff_powercycle" }, | 811 | { DEV_IPMI_POWEROFF_POWERCYCLE, "poweroff_powercycle" }, |
816 | {} | 812 | {} |
817 | }; | 813 | }; |
818 | 814 | ||
819 | static struct trans_ctl_table trans_mac_hid_files[] = { | 815 | static const struct trans_ctl_table trans_mac_hid_files[] = { |
820 | /* DEV_MAC_HID_KEYBOARD_SENDS_LINUX_KEYCODES unused */ | 816 | /* DEV_MAC_HID_KEYBOARD_SENDS_LINUX_KEYCODES unused */ |
821 | /* DEV_MAC_HID_KEYBOARD_LOCK_KEYCODES unused */ | 817 | /* DEV_MAC_HID_KEYBOARD_LOCK_KEYCODES unused */ |
822 | { DEV_MAC_HID_MOUSE_BUTTON_EMULATION, "mouse_button_emulation" }, | 818 | { DEV_MAC_HID_MOUSE_BUTTON_EMULATION, "mouse_button_emulation" }, |
@@ -826,35 +822,35 @@ static struct trans_ctl_table trans_mac_hid_files[] = { | |||
826 | {} | 822 | {} |
827 | }; | 823 | }; |
828 | 824 | ||
829 | static struct trans_ctl_table trans_raid_table[] = { | 825 | static const struct trans_ctl_table trans_raid_table[] = { |
830 | { DEV_RAID_SPEED_LIMIT_MIN, "speed_limit_min" }, | 826 | { DEV_RAID_SPEED_LIMIT_MIN, "speed_limit_min" }, |
831 | { DEV_RAID_SPEED_LIMIT_MAX, "speed_limit_max" }, | 827 | { DEV_RAID_SPEED_LIMIT_MAX, "speed_limit_max" }, |
832 | {} | 828 | {} |
833 | }; | 829 | }; |
834 | 830 | ||
835 | static struct trans_ctl_table trans_scsi_table[] = { | 831 | static const struct trans_ctl_table trans_scsi_table[] = { |
836 | { DEV_SCSI_LOGGING_LEVEL, "logging_level" }, | 832 | { DEV_SCSI_LOGGING_LEVEL, "logging_level" }, |
837 | {} | 833 | {} |
838 | }; | 834 | }; |
839 | 835 | ||
840 | static struct trans_ctl_table trans_parport_default_table[] = { | 836 | static const struct trans_ctl_table trans_parport_default_table[] = { |
841 | { DEV_PARPORT_DEFAULT_TIMESLICE, "timeslice" }, | 837 | { DEV_PARPORT_DEFAULT_TIMESLICE, "timeslice" }, |
842 | { DEV_PARPORT_DEFAULT_SPINTIME, "spintime" }, | 838 | { DEV_PARPORT_DEFAULT_SPINTIME, "spintime" }, |
843 | {} | 839 | {} |
844 | }; | 840 | }; |
845 | 841 | ||
846 | static struct trans_ctl_table trans_parport_device_table[] = { | 842 | static const struct trans_ctl_table trans_parport_device_table[] = { |
847 | { DEV_PARPORT_DEVICE_TIMESLICE, "timeslice" }, | 843 | { DEV_PARPORT_DEVICE_TIMESLICE, "timeslice" }, |
848 | {} | 844 | {} |
849 | }; | 845 | }; |
850 | 846 | ||
851 | static struct trans_ctl_table trans_parport_devices_table[] = { | 847 | static const struct trans_ctl_table trans_parport_devices_table[] = { |
852 | { DEV_PARPORT_DEVICES_ACTIVE, "active" }, | 848 | { DEV_PARPORT_DEVICES_ACTIVE, "active" }, |
853 | { 0, NULL, trans_parport_device_table }, | 849 | { 0, NULL, trans_parport_device_table }, |
854 | {} | 850 | {} |
855 | }; | 851 | }; |
856 | 852 | ||
857 | static struct trans_ctl_table trans_parport_parport_table[] = { | 853 | static const struct trans_ctl_table trans_parport_parport_table[] = { |
858 | { DEV_PARPORT_SPINTIME, "spintime" }, | 854 | { DEV_PARPORT_SPINTIME, "spintime" }, |
859 | { DEV_PARPORT_BASE_ADDR, "base-addr" }, | 855 | { DEV_PARPORT_BASE_ADDR, "base-addr" }, |
860 | { DEV_PARPORT_IRQ, "irq" }, | 856 | { DEV_PARPORT_IRQ, "irq" }, |
@@ -868,13 +864,13 @@ static struct trans_ctl_table trans_parport_parport_table[] = { | |||
868 | { DEV_PARPORT_AUTOPROBE + 4, "autoprobe3" }, | 864 | { DEV_PARPORT_AUTOPROBE + 4, "autoprobe3" }, |
869 | {} | 865 | {} |
870 | }; | 866 | }; |
871 | static struct trans_ctl_table trans_parport_table[] = { | 867 | static const struct trans_ctl_table trans_parport_table[] = { |
872 | { DEV_PARPORT_DEFAULT, "default", trans_parport_default_table }, | 868 | { DEV_PARPORT_DEFAULT, "default", trans_parport_default_table }, |
873 | { 0, NULL, trans_parport_parport_table }, | 869 | { 0, NULL, trans_parport_parport_table }, |
874 | {} | 870 | {} |
875 | }; | 871 | }; |
876 | 872 | ||
877 | static struct trans_ctl_table trans_dev_table[] = { | 873 | static const struct trans_ctl_table trans_dev_table[] = { |
878 | { DEV_CDROM, "cdrom", trans_cdrom_table }, | 874 | { DEV_CDROM, "cdrom", trans_cdrom_table }, |
879 | /* DEV_HWMON unused */ | 875 | /* DEV_HWMON unused */ |
880 | { DEV_PARPORT, "parport", trans_parport_table }, | 876 | { DEV_PARPORT, "parport", trans_parport_table }, |
@@ -885,19 +881,19 @@ static struct trans_ctl_table trans_dev_table[] = { | |||
885 | {} | 881 | {} |
886 | }; | 882 | }; |
887 | 883 | ||
888 | static struct trans_ctl_table trans_bus_isa_table[] = { | 884 | static const struct trans_ctl_table trans_bus_isa_table[] = { |
889 | { BUS_ISA_MEM_BASE, "membase" }, | 885 | { BUS_ISA_MEM_BASE, "membase" }, |
890 | { BUS_ISA_PORT_BASE, "portbase" }, | 886 | { BUS_ISA_PORT_BASE, "portbase" }, |
891 | { BUS_ISA_PORT_SHIFT, "portshift" }, | 887 | { BUS_ISA_PORT_SHIFT, "portshift" }, |
892 | {} | 888 | {} |
893 | }; | 889 | }; |
894 | 890 | ||
895 | static struct trans_ctl_table trans_bus_table[] = { | 891 | static const struct trans_ctl_table trans_bus_table[] = { |
896 | { CTL_BUS_ISA, "isa", trans_bus_isa_table }, | 892 | { CTL_BUS_ISA, "isa", trans_bus_isa_table }, |
897 | {} | 893 | {} |
898 | }; | 894 | }; |
899 | 895 | ||
900 | static struct trans_ctl_table trans_arlan_conf_table0[] = { | 896 | static const struct trans_ctl_table trans_arlan_conf_table0[] = { |
901 | { 1, "spreadingCode" }, | 897 | { 1, "spreadingCode" }, |
902 | { 2, "channelNumber" }, | 898 | { 2, "channelNumber" }, |
903 | { 3, "scramblingDisable" }, | 899 | { 3, "scramblingDisable" }, |
@@ -968,7 +964,7 @@ static struct trans_ctl_table trans_arlan_conf_table0[] = { | |||
968 | {} | 964 | {} |
969 | }; | 965 | }; |
970 | 966 | ||
971 | static struct trans_ctl_table trans_arlan_conf_table1[] = { | 967 | static const struct trans_ctl_table trans_arlan_conf_table1[] = { |
972 | { 1, "spreadingCode" }, | 968 | { 1, "spreadingCode" }, |
973 | { 2, "channelNumber" }, | 969 | { 2, "channelNumber" }, |
974 | { 3, "scramblingDisable" }, | 970 | { 3, "scramblingDisable" }, |
@@ -1039,7 +1035,7 @@ static struct trans_ctl_table trans_arlan_conf_table1[] = { | |||
1039 | {} | 1035 | {} |
1040 | }; | 1036 | }; |
1041 | 1037 | ||
1042 | static struct trans_ctl_table trans_arlan_conf_table2[] = { | 1038 | static const struct trans_ctl_table trans_arlan_conf_table2[] = { |
1043 | { 1, "spreadingCode" }, | 1039 | { 1, "spreadingCode" }, |
1044 | { 2, "channelNumber" }, | 1040 | { 2, "channelNumber" }, |
1045 | { 3, "scramblingDisable" }, | 1041 | { 3, "scramblingDisable" }, |
@@ -1110,7 +1106,7 @@ static struct trans_ctl_table trans_arlan_conf_table2[] = { | |||
1110 | {} | 1106 | {} |
1111 | }; | 1107 | }; |
1112 | 1108 | ||
1113 | static struct trans_ctl_table trans_arlan_conf_table3[] = { | 1109 | static const struct trans_ctl_table trans_arlan_conf_table3[] = { |
1114 | { 1, "spreadingCode" }, | 1110 | { 1, "spreadingCode" }, |
1115 | { 2, "channelNumber" }, | 1111 | { 2, "channelNumber" }, |
1116 | { 3, "scramblingDisable" }, | 1112 | { 3, "scramblingDisable" }, |
@@ -1181,7 +1177,7 @@ static struct trans_ctl_table trans_arlan_conf_table3[] = { | |||
1181 | {} | 1177 | {} |
1182 | }; | 1178 | }; |
1183 | 1179 | ||
1184 | static struct trans_ctl_table trans_arlan_table[] = { | 1180 | static const struct trans_ctl_table trans_arlan_table[] = { |
1185 | { 1, "arlan0", trans_arlan_conf_table0 }, | 1181 | { 1, "arlan0", trans_arlan_conf_table0 }, |
1186 | { 2, "arlan1", trans_arlan_conf_table1 }, | 1182 | { 2, "arlan1", trans_arlan_conf_table1 }, |
1187 | { 3, "arlan2", trans_arlan_conf_table2 }, | 1183 | { 3, "arlan2", trans_arlan_conf_table2 }, |
@@ -1189,13 +1185,13 @@ static struct trans_ctl_table trans_arlan_table[] = { | |||
1189 | {} | 1185 | {} |
1190 | }; | 1186 | }; |
1191 | 1187 | ||
1192 | static struct trans_ctl_table trans_s390dbf_table[] = { | 1188 | static const struct trans_ctl_table trans_s390dbf_table[] = { |
1193 | { 5678 /* CTL_S390DBF_STOPPABLE */, "debug_stoppable" }, | 1189 | { 5678 /* CTL_S390DBF_STOPPABLE */, "debug_stoppable" }, |
1194 | { 5679 /* CTL_S390DBF_ACTIVE */, "debug_active" }, | 1190 | { 5679 /* CTL_S390DBF_ACTIVE */, "debug_active" }, |
1195 | {} | 1191 | {} |
1196 | }; | 1192 | }; |
1197 | 1193 | ||
1198 | static struct trans_ctl_table trans_sunrpc_table[] = { | 1194 | static const struct trans_ctl_table trans_sunrpc_table[] = { |
1199 | { CTL_RPCDEBUG, "rpc_debug" }, | 1195 | { CTL_RPCDEBUG, "rpc_debug" }, |
1200 | { CTL_NFSDEBUG, "nfs_debug" }, | 1196 | { CTL_NFSDEBUG, "nfs_debug" }, |
1201 | { CTL_NFSDDEBUG, "nfsd_debug" }, | 1197 | { CTL_NFSDDEBUG, "nfsd_debug" }, |
@@ -1207,7 +1203,7 @@ static struct trans_ctl_table trans_sunrpc_table[] = { | |||
1207 | {} | 1203 | {} |
1208 | }; | 1204 | }; |
1209 | 1205 | ||
1210 | static struct trans_ctl_table trans_pm_table[] = { | 1206 | static const struct trans_ctl_table trans_pm_table[] = { |
1211 | { 1 /* CTL_PM_SUSPEND */, "suspend" }, | 1207 | { 1 /* CTL_PM_SUSPEND */, "suspend" }, |
1212 | { 2 /* CTL_PM_CMODE */, "cmode" }, | 1208 | { 2 /* CTL_PM_CMODE */, "cmode" }, |
1213 | { 3 /* CTL_PM_P0 */, "p0" }, | 1209 | { 3 /* CTL_PM_P0 */, "p0" }, |
@@ -1215,13 +1211,13 @@ static struct trans_ctl_table trans_pm_table[] = { | |||
1215 | {} | 1211 | {} |
1216 | }; | 1212 | }; |
1217 | 1213 | ||
1218 | static struct trans_ctl_table trans_frv_table[] = { | 1214 | static const struct trans_ctl_table trans_frv_table[] = { |
1219 | { 1, "cache-mode" }, | 1215 | { 1, "cache-mode" }, |
1220 | { 2, "pin-cxnr" }, | 1216 | { 2, "pin-cxnr" }, |
1221 | {} | 1217 | {} |
1222 | }; | 1218 | }; |
1223 | 1219 | ||
1224 | static struct trans_ctl_table trans_root_table[] = { | 1220 | static const struct trans_ctl_table trans_root_table[] = { |
1225 | { CTL_KERN, "kernel", trans_kern_table }, | 1221 | { CTL_KERN, "kernel", trans_kern_table }, |
1226 | { CTL_VM, "vm", trans_vm_table }, | 1222 | { CTL_VM, "vm", trans_vm_table }, |
1227 | { CTL_NET, "net", trans_net_table }, | 1223 | { CTL_NET, "net", trans_net_table }, |
@@ -1265,15 +1261,14 @@ static struct ctl_table *sysctl_parent(struct ctl_table *table, int n) | |||
1265 | return table; | 1261 | return table; |
1266 | } | 1262 | } |
1267 | 1263 | ||
1268 | static struct trans_ctl_table *sysctl_binary_lookup(struct ctl_table *table) | 1264 | static const struct trans_ctl_table *sysctl_binary_lookup(struct ctl_table *table) |
1269 | { | 1265 | { |
1270 | struct ctl_table *test; | 1266 | struct ctl_table *test; |
1271 | struct trans_ctl_table *ref; | 1267 | const struct trans_ctl_table *ref; |
1272 | int depth, cur_depth; | 1268 | int cur_depth; |
1273 | 1269 | ||
1274 | depth = sysctl_depth(table); | 1270 | cur_depth = sysctl_depth(table); |
1275 | 1271 | ||
1276 | cur_depth = depth; | ||
1277 | ref = trans_root_table; | 1272 | ref = trans_root_table; |
1278 | repeat: | 1273 | repeat: |
1279 | test = sysctl_parent(table, cur_depth); | 1274 | test = sysctl_parent(table, cur_depth); |
@@ -1441,7 +1436,7 @@ static void sysctl_check_leaf(struct nsproxy *namespaces, | |||
1441 | 1436 | ||
1442 | static void sysctl_check_bin_path(struct ctl_table *table, const char **fail) | 1437 | static void sysctl_check_bin_path(struct ctl_table *table, const char **fail) |
1443 | { | 1438 | { |
1444 | struct trans_ctl_table *ref; | 1439 | const struct trans_ctl_table *ref; |
1445 | 1440 | ||
1446 | ref = sysctl_binary_lookup(table); | 1441 | ref = sysctl_binary_lookup(table); |
1447 | if (table->ctl_name && !ref) | 1442 | if (table->ctl_name && !ref) |
@@ -1498,9 +1493,6 @@ int sysctl_check_table(struct nsproxy *namespaces, struct ctl_table *table) | |||
1498 | (table->strategy == sysctl_ms_jiffies) || | 1493 | (table->strategy == sysctl_ms_jiffies) || |
1499 | (table->proc_handler == proc_dostring) || | 1494 | (table->proc_handler == proc_dostring) || |
1500 | (table->proc_handler == proc_dointvec) || | 1495 | (table->proc_handler == proc_dointvec) || |
1501 | #ifdef CONFIG_SECURITY_CAPABILITIES | ||
1502 | (table->proc_handler == proc_dointvec_bset) || | ||
1503 | #endif /* def CONFIG_SECURITY_CAPABILITIES */ | ||
1504 | (table->proc_handler == proc_dointvec_minmax) || | 1496 | (table->proc_handler == proc_dointvec_minmax) || |
1505 | (table->proc_handler == proc_dointvec_jiffies) || | 1497 | (table->proc_handler == proc_dointvec_jiffies) || |
1506 | (table->proc_handler == proc_dointvec_userhz_jiffies) || | 1498 | (table->proc_handler == proc_dointvec_userhz_jiffies) || |
diff --git a/kernel/test_kprobes.c b/kernel/test_kprobes.c index 88cdb109e13c..06b6395b45b2 100644 --- a/kernel/test_kprobes.c +++ b/kernel/test_kprobes.c | |||
@@ -135,6 +135,12 @@ static int test_jprobe(void) | |||
135 | #ifdef CONFIG_KRETPROBES | 135 | #ifdef CONFIG_KRETPROBES |
136 | static u32 krph_val; | 136 | static u32 krph_val; |
137 | 137 | ||
138 | static int entry_handler(struct kretprobe_instance *ri, struct pt_regs *regs) | ||
139 | { | ||
140 | krph_val = (rand1 / div_factor); | ||
141 | return 0; | ||
142 | } | ||
143 | |||
138 | static int return_handler(struct kretprobe_instance *ri, struct pt_regs *regs) | 144 | static int return_handler(struct kretprobe_instance *ri, struct pt_regs *regs) |
139 | { | 145 | { |
140 | unsigned long ret = regs_return_value(regs); | 146 | unsigned long ret = regs_return_value(regs); |
@@ -144,13 +150,19 @@ static int return_handler(struct kretprobe_instance *ri, struct pt_regs *regs) | |||
144 | printk(KERN_ERR "Kprobe smoke test failed: " | 150 | printk(KERN_ERR "Kprobe smoke test failed: " |
145 | "incorrect value in kretprobe handler\n"); | 151 | "incorrect value in kretprobe handler\n"); |
146 | } | 152 | } |
153 | if (krph_val == 0) { | ||
154 | handler_errors++; | ||
155 | printk(KERN_ERR "Kprobe smoke test failed: " | ||
156 | "call to kretprobe entry handler failed\n"); | ||
157 | } | ||
147 | 158 | ||
148 | krph_val = (rand1 / div_factor); | 159 | krph_val = rand1; |
149 | return 0; | 160 | return 0; |
150 | } | 161 | } |
151 | 162 | ||
152 | static struct kretprobe rp = { | 163 | static struct kretprobe rp = { |
153 | .handler = return_handler, | 164 | .handler = return_handler, |
165 | .entry_handler = entry_handler, | ||
154 | .kp.symbol_name = "kprobe_target" | 166 | .kp.symbol_name = "kprobe_target" |
155 | }; | 167 | }; |
156 | 168 | ||
@@ -167,7 +179,7 @@ static int test_kretprobe(void) | |||
167 | 179 | ||
168 | ret = kprobe_target(rand1); | 180 | ret = kprobe_target(rand1); |
169 | unregister_kretprobe(&rp); | 181 | unregister_kretprobe(&rp); |
170 | if (krph_val == 0) { | 182 | if (krph_val != rand1) { |
171 | printk(KERN_ERR "Kprobe smoke test failed: " | 183 | printk(KERN_ERR "Kprobe smoke test failed: " |
172 | "kretprobe handler not called\n"); | 184 | "kretprobe handler not called\n"); |
173 | handler_errors++; | 185 | handler_errors++; |
diff --git a/kernel/time.c b/kernel/time.c index 4064c0566e77..a5ec013b6c80 100644 --- a/kernel/time.c +++ b/kernel/time.c | |||
@@ -39,6 +39,8 @@ | |||
39 | #include <asm/uaccess.h> | 39 | #include <asm/uaccess.h> |
40 | #include <asm/unistd.h> | 40 | #include <asm/unistd.h> |
41 | 41 | ||
42 | #include "timeconst.h" | ||
43 | |||
42 | /* | 44 | /* |
43 | * The timezone where the local system is located. Used as a default by some | 45 | * The timezone where the local system is located. Used as a default by some |
44 | * programs who obtain this value by using gettimeofday. | 46 | * programs who obtain this value by using gettimeofday. |
@@ -93,7 +95,8 @@ asmlinkage long sys_stime(time_t __user *tptr) | |||
93 | 95 | ||
94 | #endif /* __ARCH_WANT_SYS_TIME */ | 96 | #endif /* __ARCH_WANT_SYS_TIME */ |
95 | 97 | ||
96 | asmlinkage long sys_gettimeofday(struct timeval __user *tv, struct timezone __user *tz) | 98 | asmlinkage long sys_gettimeofday(struct timeval __user *tv, |
99 | struct timezone __user *tz) | ||
97 | { | 100 | { |
98 | if (likely(tv != NULL)) { | 101 | if (likely(tv != NULL)) { |
99 | struct timeval ktv; | 102 | struct timeval ktv; |
@@ -118,7 +121,7 @@ asmlinkage long sys_gettimeofday(struct timeval __user *tv, struct timezone __us | |||
118 | * hard to make the program warp the clock precisely n hours) or | 121 | * hard to make the program warp the clock precisely n hours) or |
119 | * compile in the timezone information into the kernel. Bad, bad.... | 122 | * compile in the timezone information into the kernel. Bad, bad.... |
120 | * | 123 | * |
121 | * - TYT, 1992-01-01 | 124 | * - TYT, 1992-01-01 |
122 | * | 125 | * |
123 | * The best thing to do is to keep the CMOS clock in universal time (UTC) | 126 | * The best thing to do is to keep the CMOS clock in universal time (UTC) |
124 | * as real UNIX machines always do it. This avoids all headaches about | 127 | * as real UNIX machines always do it. This avoids all headaches about |
@@ -240,7 +243,11 @@ unsigned int inline jiffies_to_msecs(const unsigned long j) | |||
240 | #elif HZ > MSEC_PER_SEC && !(HZ % MSEC_PER_SEC) | 243 | #elif HZ > MSEC_PER_SEC && !(HZ % MSEC_PER_SEC) |
241 | return (j + (HZ / MSEC_PER_SEC) - 1)/(HZ / MSEC_PER_SEC); | 244 | return (j + (HZ / MSEC_PER_SEC) - 1)/(HZ / MSEC_PER_SEC); |
242 | #else | 245 | #else |
243 | return (j * MSEC_PER_SEC) / HZ; | 246 | # if BITS_PER_LONG == 32 |
247 | return ((u64)HZ_TO_MSEC_MUL32 * j) >> HZ_TO_MSEC_SHR32; | ||
248 | # else | ||
249 | return (j * HZ_TO_MSEC_NUM) / HZ_TO_MSEC_DEN; | ||
250 | # endif | ||
244 | #endif | 251 | #endif |
245 | } | 252 | } |
246 | EXPORT_SYMBOL(jiffies_to_msecs); | 253 | EXPORT_SYMBOL(jiffies_to_msecs); |
@@ -252,7 +259,11 @@ unsigned int inline jiffies_to_usecs(const unsigned long j) | |||
252 | #elif HZ > USEC_PER_SEC && !(HZ % USEC_PER_SEC) | 259 | #elif HZ > USEC_PER_SEC && !(HZ % USEC_PER_SEC) |
253 | return (j + (HZ / USEC_PER_SEC) - 1)/(HZ / USEC_PER_SEC); | 260 | return (j + (HZ / USEC_PER_SEC) - 1)/(HZ / USEC_PER_SEC); |
254 | #else | 261 | #else |
255 | return (j * USEC_PER_SEC) / HZ; | 262 | # if BITS_PER_LONG == 32 |
263 | return ((u64)HZ_TO_USEC_MUL32 * j) >> HZ_TO_USEC_SHR32; | ||
264 | # else | ||
265 | return (j * HZ_TO_USEC_NUM) / HZ_TO_USEC_DEN; | ||
266 | # endif | ||
256 | #endif | 267 | #endif |
257 | } | 268 | } |
258 | EXPORT_SYMBOL(jiffies_to_usecs); | 269 | EXPORT_SYMBOL(jiffies_to_usecs); |
@@ -267,7 +278,7 @@ EXPORT_SYMBOL(jiffies_to_usecs); | |||
267 | * | 278 | * |
268 | * This function should be only used for timestamps returned by | 279 | * This function should be only used for timestamps returned by |
269 | * current_kernel_time() or CURRENT_TIME, not with do_gettimeofday() because | 280 | * current_kernel_time() or CURRENT_TIME, not with do_gettimeofday() because |
270 | * it doesn't handle the better resolution of the later. | 281 | * it doesn't handle the better resolution of the latter. |
271 | */ | 282 | */ |
272 | struct timespec timespec_trunc(struct timespec t, unsigned gran) | 283 | struct timespec timespec_trunc(struct timespec t, unsigned gran) |
273 | { | 284 | { |
@@ -315,7 +326,7 @@ EXPORT_SYMBOL_GPL(getnstimeofday); | |||
315 | * This algorithm was first published by Gauss (I think). | 326 | * This algorithm was first published by Gauss (I think). |
316 | * | 327 | * |
317 | * WARNING: this function will overflow on 2106-02-07 06:28:16 on | 328 | * WARNING: this function will overflow on 2106-02-07 06:28:16 on |
318 | * machines were long is 32-bit! (However, as time_t is signed, we | 329 | * machines where long is 32-bit! (However, as time_t is signed, we |
319 | * will already get problems at other places on 2038-01-19 03:14:08) | 330 | * will already get problems at other places on 2038-01-19 03:14:08) |
320 | */ | 331 | */ |
321 | unsigned long | 332 | unsigned long |
@@ -352,7 +363,7 @@ EXPORT_SYMBOL(mktime); | |||
352 | * normalize to the timespec storage format | 363 | * normalize to the timespec storage format |
353 | * | 364 | * |
354 | * Note: The tv_nsec part is always in the range of | 365 | * Note: The tv_nsec part is always in the range of |
355 | * 0 <= tv_nsec < NSEC_PER_SEC | 366 | * 0 <= tv_nsec < NSEC_PER_SEC |
356 | * For negative values only the tv_sec field is negative ! | 367 | * For negative values only the tv_sec field is negative ! |
357 | */ | 368 | */ |
358 | void set_normalized_timespec(struct timespec *ts, time_t sec, long nsec) | 369 | void set_normalized_timespec(struct timespec *ts, time_t sec, long nsec) |
@@ -453,12 +464,13 @@ unsigned long msecs_to_jiffies(const unsigned int m) | |||
453 | /* | 464 | /* |
454 | * Generic case - multiply, round and divide. But first | 465 | * Generic case - multiply, round and divide. But first |
455 | * check that if we are doing a net multiplication, that | 466 | * check that if we are doing a net multiplication, that |
456 | * we wouldnt overflow: | 467 | * we wouldn't overflow: |
457 | */ | 468 | */ |
458 | if (HZ > MSEC_PER_SEC && m > jiffies_to_msecs(MAX_JIFFY_OFFSET)) | 469 | if (HZ > MSEC_PER_SEC && m > jiffies_to_msecs(MAX_JIFFY_OFFSET)) |
459 | return MAX_JIFFY_OFFSET; | 470 | return MAX_JIFFY_OFFSET; |
460 | 471 | ||
461 | return (m * HZ + MSEC_PER_SEC - 1) / MSEC_PER_SEC; | 472 | return ((u64)MSEC_TO_HZ_MUL32 * m + MSEC_TO_HZ_ADJ32) |
473 | >> MSEC_TO_HZ_SHR32; | ||
462 | #endif | 474 | #endif |
463 | } | 475 | } |
464 | EXPORT_SYMBOL(msecs_to_jiffies); | 476 | EXPORT_SYMBOL(msecs_to_jiffies); |
@@ -472,7 +484,8 @@ unsigned long usecs_to_jiffies(const unsigned int u) | |||
472 | #elif HZ > USEC_PER_SEC && !(HZ % USEC_PER_SEC) | 484 | #elif HZ > USEC_PER_SEC && !(HZ % USEC_PER_SEC) |
473 | return u * (HZ / USEC_PER_SEC); | 485 | return u * (HZ / USEC_PER_SEC); |
474 | #else | 486 | #else |
475 | return (u * HZ + USEC_PER_SEC - 1) / USEC_PER_SEC; | 487 | return ((u64)USEC_TO_HZ_MUL32 * u + USEC_TO_HZ_ADJ32) |
488 | >> USEC_TO_HZ_SHR32; | ||
476 | #endif | 489 | #endif |
477 | } | 490 | } |
478 | EXPORT_SYMBOL(usecs_to_jiffies); | 491 | EXPORT_SYMBOL(usecs_to_jiffies); |
@@ -566,7 +579,11 @@ EXPORT_SYMBOL(jiffies_to_timeval); | |||
566 | clock_t jiffies_to_clock_t(long x) | 579 | clock_t jiffies_to_clock_t(long x) |
567 | { | 580 | { |
568 | #if (TICK_NSEC % (NSEC_PER_SEC / USER_HZ)) == 0 | 581 | #if (TICK_NSEC % (NSEC_PER_SEC / USER_HZ)) == 0 |
582 | # if HZ < USER_HZ | ||
583 | return x * (USER_HZ / HZ); | ||
584 | # else | ||
569 | return x / (HZ / USER_HZ); | 585 | return x / (HZ / USER_HZ); |
586 | # endif | ||
570 | #else | 587 | #else |
571 | u64 tmp = (u64)x * TICK_NSEC; | 588 | u64 tmp = (u64)x * TICK_NSEC; |
572 | do_div(tmp, (NSEC_PER_SEC / USER_HZ)); | 589 | do_div(tmp, (NSEC_PER_SEC / USER_HZ)); |
@@ -599,7 +616,14 @@ EXPORT_SYMBOL(clock_t_to_jiffies); | |||
599 | u64 jiffies_64_to_clock_t(u64 x) | 616 | u64 jiffies_64_to_clock_t(u64 x) |
600 | { | 617 | { |
601 | #if (TICK_NSEC % (NSEC_PER_SEC / USER_HZ)) == 0 | 618 | #if (TICK_NSEC % (NSEC_PER_SEC / USER_HZ)) == 0 |
619 | # if HZ < USER_HZ | ||
620 | x *= USER_HZ; | ||
621 | do_div(x, HZ); | ||
622 | # elif HZ > USER_HZ | ||
602 | do_div(x, HZ / USER_HZ); | 623 | do_div(x, HZ / USER_HZ); |
624 | # else | ||
625 | /* Nothing to do */ | ||
626 | # endif | ||
603 | #else | 627 | #else |
604 | /* | 628 | /* |
605 | * There are better ways that don't overflow early, | 629 | * There are better ways that don't overflow early, |
@@ -611,7 +635,6 @@ u64 jiffies_64_to_clock_t(u64 x) | |||
611 | #endif | 635 | #endif |
612 | return x; | 636 | return x; |
613 | } | 637 | } |
614 | |||
615 | EXPORT_SYMBOL(jiffies_64_to_clock_t); | 638 | EXPORT_SYMBOL(jiffies_64_to_clock_t); |
616 | 639 | ||
617 | u64 nsec_to_clock_t(u64 x) | 640 | u64 nsec_to_clock_t(u64 x) |
@@ -646,7 +669,6 @@ u64 get_jiffies_64(void) | |||
646 | } while (read_seqretry(&xtime_lock, seq)); | 669 | } while (read_seqretry(&xtime_lock, seq)); |
647 | return ret; | 670 | return ret; |
648 | } | 671 | } |
649 | |||
650 | EXPORT_SYMBOL(get_jiffies_64); | 672 | EXPORT_SYMBOL(get_jiffies_64); |
651 | #endif | 673 | #endif |
652 | 674 | ||
diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index 3e59fce6dd43..3d1e3e1a1971 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c | |||
@@ -133,7 +133,7 @@ static void clockevents_do_notify(unsigned long reason, void *dev) | |||
133 | } | 133 | } |
134 | 134 | ||
135 | /* | 135 | /* |
136 | * Called after a notify add to make devices availble which were | 136 | * Called after a notify add to make devices available which were |
137 | * released from the notifier call. | 137 | * released from the notifier call. |
138 | */ | 138 | */ |
139 | static void clockevents_notify_released(void) | 139 | static void clockevents_notify_released(void) |
@@ -218,6 +218,8 @@ void clockevents_exchange_device(struct clock_event_device *old, | |||
218 | */ | 218 | */ |
219 | void clockevents_notify(unsigned long reason, void *arg) | 219 | void clockevents_notify(unsigned long reason, void *arg) |
220 | { | 220 | { |
221 | struct list_head *node, *tmp; | ||
222 | |||
221 | spin_lock(&clockevents_lock); | 223 | spin_lock(&clockevents_lock); |
222 | clockevents_do_notify(reason, arg); | 224 | clockevents_do_notify(reason, arg); |
223 | 225 | ||
@@ -227,13 +229,8 @@ void clockevents_notify(unsigned long reason, void *arg) | |||
227 | * Unregister the clock event devices which were | 229 | * Unregister the clock event devices which were |
228 | * released from the users in the notify chain. | 230 | * released from the users in the notify chain. |
229 | */ | 231 | */ |
230 | while (!list_empty(&clockevents_released)) { | 232 | list_for_each_safe(node, tmp, &clockevents_released) |
231 | struct clock_event_device *dev; | 233 | list_del(node); |
232 | |||
233 | dev = list_entry(clockevents_released.next, | ||
234 | struct clock_event_device, list); | ||
235 | list_del(&dev->list); | ||
236 | } | ||
237 | break; | 234 | break; |
238 | default: | 235 | default: |
239 | break; | 236 | break; |
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index 6e9259a5d501..548c436a776b 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c | |||
@@ -91,7 +91,6 @@ static void clocksource_ratewd(struct clocksource *cs, int64_t delta) | |||
91 | cs->name, delta); | 91 | cs->name, delta); |
92 | cs->flags &= ~(CLOCK_SOURCE_VALID_FOR_HRES | CLOCK_SOURCE_WATCHDOG); | 92 | cs->flags &= ~(CLOCK_SOURCE_VALID_FOR_HRES | CLOCK_SOURCE_WATCHDOG); |
93 | clocksource_change_rating(cs, 0); | 93 | clocksource_change_rating(cs, 0); |
94 | cs->flags &= ~CLOCK_SOURCE_WATCHDOG; | ||
95 | list_del(&cs->wd_list); | 94 | list_del(&cs->wd_list); |
96 | } | 95 | } |
97 | 96 | ||
@@ -363,15 +362,13 @@ void clocksource_unregister(struct clocksource *cs) | |||
363 | static ssize_t | 362 | static ssize_t |
364 | sysfs_show_current_clocksources(struct sys_device *dev, char *buf) | 363 | sysfs_show_current_clocksources(struct sys_device *dev, char *buf) |
365 | { | 364 | { |
366 | char *curr = buf; | 365 | ssize_t count = 0; |
367 | 366 | ||
368 | spin_lock_irq(&clocksource_lock); | 367 | spin_lock_irq(&clocksource_lock); |
369 | curr += sprintf(curr, "%s ", curr_clocksource->name); | 368 | count = snprintf(buf, PAGE_SIZE, "%s\n", curr_clocksource->name); |
370 | spin_unlock_irq(&clocksource_lock); | 369 | spin_unlock_irq(&clocksource_lock); |
371 | 370 | ||
372 | curr += sprintf(curr, "\n"); | 371 | return count; |
373 | |||
374 | return curr - buf; | ||
375 | } | 372 | } |
376 | 373 | ||
377 | /** | 374 | /** |
@@ -439,17 +436,20 @@ static ssize_t | |||
439 | sysfs_show_available_clocksources(struct sys_device *dev, char *buf) | 436 | sysfs_show_available_clocksources(struct sys_device *dev, char *buf) |
440 | { | 437 | { |
441 | struct clocksource *src; | 438 | struct clocksource *src; |
442 | char *curr = buf; | 439 | ssize_t count = 0; |
443 | 440 | ||
444 | spin_lock_irq(&clocksource_lock); | 441 | spin_lock_irq(&clocksource_lock); |
445 | list_for_each_entry(src, &clocksource_list, list) { | 442 | list_for_each_entry(src, &clocksource_list, list) { |
446 | curr += sprintf(curr, "%s ", src->name); | 443 | count += snprintf(buf + count, |
444 | max((ssize_t)PAGE_SIZE - count, (ssize_t)0), | ||
445 | "%s ", src->name); | ||
447 | } | 446 | } |
448 | spin_unlock_irq(&clocksource_lock); | 447 | spin_unlock_irq(&clocksource_lock); |
449 | 448 | ||
450 | curr += sprintf(curr, "\n"); | 449 | count += snprintf(buf + count, |
450 | max((ssize_t)PAGE_SIZE - count, (ssize_t)0), "\n"); | ||
451 | 451 | ||
452 | return curr - buf; | 452 | return count; |
453 | } | 453 | } |
454 | 454 | ||
455 | /* | 455 | /* |
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index e64efaf957e8..c88b5910e7ab 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c | |||
@@ -43,10 +43,6 @@ long time_freq; /* frequency offset (scaled ppm)*/ | |||
43 | static long time_reftime; /* time at last adjustment (s) */ | 43 | static long time_reftime; /* time at last adjustment (s) */ |
44 | long time_adjust; | 44 | long time_adjust; |
45 | 45 | ||
46 | #define CLOCK_TICK_OVERFLOW (LATCH * HZ - CLOCK_TICK_RATE) | ||
47 | #define CLOCK_TICK_ADJUST (((s64)CLOCK_TICK_OVERFLOW * NSEC_PER_SEC) / \ | ||
48 | (s64)CLOCK_TICK_RATE) | ||
49 | |||
50 | static void ntp_update_frequency(void) | 46 | static void ntp_update_frequency(void) |
51 | { | 47 | { |
52 | u64 second_length = (u64)(tick_usec * NSEC_PER_USEC * USER_HZ) | 48 | u64 second_length = (u64)(tick_usec * NSEC_PER_USEC * USER_HZ) |
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 88267f0a8471..fa9bb73dbdb4 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c | |||
@@ -681,7 +681,7 @@ int tick_check_oneshot_change(int allow_nohz) | |||
681 | if (ts->nohz_mode != NOHZ_MODE_INACTIVE) | 681 | if (ts->nohz_mode != NOHZ_MODE_INACTIVE) |
682 | return 0; | 682 | return 0; |
683 | 683 | ||
684 | if (!timekeeping_is_continuous() || !tick_is_oneshot_available()) | 684 | if (!timekeeping_valid_for_hres() || !tick_is_oneshot_available()) |
685 | return 0; | 685 | return 0; |
686 | 686 | ||
687 | if (!allow_nohz) | 687 | if (!allow_nohz) |
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index cd5dbc4579c9..1af9fb050fe2 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c | |||
@@ -201,9 +201,9 @@ static inline s64 __get_nsec_offset(void) { return 0; } | |||
201 | #endif | 201 | #endif |
202 | 202 | ||
203 | /** | 203 | /** |
204 | * timekeeping_is_continuous - check to see if timekeeping is free running | 204 | * timekeeping_valid_for_hres - Check if timekeeping is suitable for hres |
205 | */ | 205 | */ |
206 | int timekeeping_is_continuous(void) | 206 | int timekeeping_valid_for_hres(void) |
207 | { | 207 | { |
208 | unsigned long seq; | 208 | unsigned long seq; |
209 | int ret; | 209 | int ret; |
@@ -364,7 +364,7 @@ static __always_inline int clocksource_bigadjust(s64 error, s64 *interval, | |||
364 | * with losing too many ticks, otherwise we would overadjust and | 364 | * with losing too many ticks, otherwise we would overadjust and |
365 | * produce an even larger error. The smaller the adjustment the | 365 | * produce an even larger error. The smaller the adjustment the |
366 | * faster we try to adjust for it, as lost ticks can do less harm | 366 | * faster we try to adjust for it, as lost ticks can do less harm |
367 | * here. This is tuned so that an error of about 1 msec is adusted | 367 | * here. This is tuned so that an error of about 1 msec is adjusted |
368 | * within about 1 sec (or 2^20 nsec in 2^SHIFT_HZ ticks). | 368 | * within about 1 sec (or 2^20 nsec in 2^SHIFT_HZ ticks). |
369 | */ | 369 | */ |
370 | error2 = clock->error >> (TICK_LENGTH_SHIFT + 22 - 2 * SHIFT_HZ); | 370 | error2 = clock->error >> (TICK_LENGTH_SHIFT + 22 - 2 * SHIFT_HZ); |
diff --git a/kernel/timeconst.pl b/kernel/timeconst.pl new file mode 100644 index 000000000000..41468035473c --- /dev/null +++ b/kernel/timeconst.pl | |||
@@ -0,0 +1,402 @@ | |||
1 | #!/usr/bin/perl | ||
2 | # ----------------------------------------------------------------------- | ||
3 | # | ||
4 | # Copyright 2007 rPath, Inc. - All Rights Reserved | ||
5 | # | ||
6 | # This file is part of the Linux kernel, and is made available under | ||
7 | # the terms of the GNU General Public License version 2 or (at your | ||
8 | # option) any later version; incorporated herein by reference. | ||
9 | # | ||
10 | # ----------------------------------------------------------------------- | ||
11 | # | ||
12 | |||
13 | # | ||
14 | # Usage: timeconst.pl HZ > timeconst.h | ||
15 | # | ||
16 | |||
17 | # Precomputed values for systems without Math::BigInt | ||
18 | # Generated by: | ||
19 | # timeconst.pl --can 24 32 48 64 100 122 128 200 250 256 300 512 1000 1024 1200 | ||
20 | %canned_values = ( | ||
21 | 24 => [ | ||
22 | '0xa6aaaaab','0x2aaaaaa',26, | ||
23 | '0xa6aaaaaaaaaaaaab','0x2aaaaaaaaaaaaaa',58, | ||
24 | 125,3, | ||
25 | '0xc49ba5e4','0x1fbe76c8b4',37, | ||
26 | '0xc49ba5e353f7ceda','0x1fbe76c8b439581062',69, | ||
27 | 3,125, | ||
28 | '0xa2c2aaab','0xaaaa',16, | ||
29 | '0xa2c2aaaaaaaaaaab','0xaaaaaaaaaaaa',48, | ||
30 | 125000,3, | ||
31 | '0xc9539b89','0x7fffbce4217d',47, | ||
32 | '0xc9539b8887229e91','0x7fffbce4217d2849cb25',79, | ||
33 | 3,125000, | ||
34 | ], 32 => [ | ||
35 | '0xfa000000','0x6000000',27, | ||
36 | '0xfa00000000000000','0x600000000000000',59, | ||
37 | 125,4, | ||
38 | '0x83126e98','0xfdf3b645a',36, | ||
39 | '0x83126e978d4fdf3c','0xfdf3b645a1cac0831',68, | ||
40 | 4,125, | ||
41 | '0xf4240000','0x0',17, | ||
42 | '0xf424000000000000','0x0',49, | ||
43 | 31250,1, | ||
44 | '0x8637bd06','0x3fff79c842fa',46, | ||
45 | '0x8637bd05af6c69b6','0x3fff79c842fa5093964a',78, | ||
46 | 1,31250, | ||
47 | ], 48 => [ | ||
48 | '0xa6aaaaab','0x6aaaaaa',27, | ||
49 | '0xa6aaaaaaaaaaaaab','0x6aaaaaaaaaaaaaa',59, | ||
50 | 125,6, | ||
51 | '0xc49ba5e4','0xfdf3b645a',36, | ||
52 | '0xc49ba5e353f7ceda','0xfdf3b645a1cac0831',68, | ||
53 | 6,125, | ||
54 | '0xa2c2aaab','0x15555',17, | ||
55 | '0xa2c2aaaaaaaaaaab','0x1555555555555',49, | ||
56 | 62500,3, | ||
57 | '0xc9539b89','0x3fffbce4217d',46, | ||
58 | '0xc9539b8887229e91','0x3fffbce4217d2849cb25',78, | ||
59 | 3,62500, | ||
60 | ], 64 => [ | ||
61 | '0xfa000000','0xe000000',28, | ||
62 | '0xfa00000000000000','0xe00000000000000',60, | ||
63 | 125,8, | ||
64 | '0x83126e98','0x7ef9db22d',35, | ||
65 | '0x83126e978d4fdf3c','0x7ef9db22d0e560418',67, | ||
66 | 8,125, | ||
67 | '0xf4240000','0x0',18, | ||
68 | '0xf424000000000000','0x0',50, | ||
69 | 15625,1, | ||
70 | '0x8637bd06','0x1fff79c842fa',45, | ||
71 | '0x8637bd05af6c69b6','0x1fff79c842fa5093964a',77, | ||
72 | 1,15625, | ||
73 | ], 100 => [ | ||
74 | '0xa0000000','0x0',28, | ||
75 | '0xa000000000000000','0x0',60, | ||
76 | 10,1, | ||
77 | '0xcccccccd','0x733333333',35, | ||
78 | '0xcccccccccccccccd','0x73333333333333333',67, | ||
79 | 1,10, | ||
80 | '0x9c400000','0x0',18, | ||
81 | '0x9c40000000000000','0x0',50, | ||
82 | 10000,1, | ||
83 | '0xd1b71759','0x1fff2e48e8a7',45, | ||
84 | '0xd1b71758e219652c','0x1fff2e48e8a71de69ad4',77, | ||
85 | 1,10000, | ||
86 | ], 122 => [ | ||
87 | '0x8325c53f','0xfbcda3a',28, | ||
88 | '0x8325c53ef368eb05','0xfbcda3ac10c9714',60, | ||
89 | 500,61, | ||
90 | '0xf9db22d1','0x7fbe76c8b',35, | ||
91 | '0xf9db22d0e560418a','0x7fbe76c8b43958106',67, | ||
92 | 61,500, | ||
93 | '0x8012e2a0','0x3ef36',18, | ||
94 | '0x8012e29f79b47583','0x3ef368eb04325',50, | ||
95 | 500000,61, | ||
96 | '0xffda4053','0x1ffffbce4217',45, | ||
97 | '0xffda4052d666a983','0x1ffffbce4217d2849cb2',77, | ||
98 | 61,500000, | ||
99 | ], 128 => [ | ||
100 | '0xfa000000','0x1e000000',29, | ||
101 | '0xfa00000000000000','0x1e00000000000000',61, | ||
102 | 125,16, | ||
103 | '0x83126e98','0x3f7ced916',34, | ||
104 | '0x83126e978d4fdf3c','0x3f7ced916872b020c',66, | ||
105 | 16,125, | ||
106 | '0xf4240000','0x40000',19, | ||
107 | '0xf424000000000000','0x4000000000000',51, | ||
108 | 15625,2, | ||
109 | '0x8637bd06','0xfffbce4217d',44, | ||
110 | '0x8637bd05af6c69b6','0xfffbce4217d2849cb25',76, | ||
111 | 2,15625, | ||
112 | ], 200 => [ | ||
113 | '0xa0000000','0x0',29, | ||
114 | '0xa000000000000000','0x0',61, | ||
115 | 5,1, | ||
116 | '0xcccccccd','0x333333333',34, | ||
117 | '0xcccccccccccccccd','0x33333333333333333',66, | ||
118 | 1,5, | ||
119 | '0x9c400000','0x0',19, | ||
120 | '0x9c40000000000000','0x0',51, | ||
121 | 5000,1, | ||
122 | '0xd1b71759','0xfff2e48e8a7',44, | ||
123 | '0xd1b71758e219652c','0xfff2e48e8a71de69ad4',76, | ||
124 | 1,5000, | ||
125 | ], 250 => [ | ||
126 | '0x80000000','0x0',29, | ||
127 | '0x8000000000000000','0x0',61, | ||
128 | 4,1, | ||
129 | '0x80000000','0x180000000',33, | ||
130 | '0x8000000000000000','0x18000000000000000',65, | ||
131 | 1,4, | ||
132 | '0xfa000000','0x0',20, | ||
133 | '0xfa00000000000000','0x0',52, | ||
134 | 4000,1, | ||
135 | '0x83126e98','0x7ff7ced9168',43, | ||
136 | '0x83126e978d4fdf3c','0x7ff7ced916872b020c4',75, | ||
137 | 1,4000, | ||
138 | ], 256 => [ | ||
139 | '0xfa000000','0x3e000000',30, | ||
140 | '0xfa00000000000000','0x3e00000000000000',62, | ||
141 | 125,32, | ||
142 | '0x83126e98','0x1fbe76c8b',33, | ||
143 | '0x83126e978d4fdf3c','0x1fbe76c8b43958106',65, | ||
144 | 32,125, | ||
145 | '0xf4240000','0xc0000',20, | ||
146 | '0xf424000000000000','0xc000000000000',52, | ||
147 | 15625,4, | ||
148 | '0x8637bd06','0x7ffde7210be',43, | ||
149 | '0x8637bd05af6c69b6','0x7ffde7210be9424e592',75, | ||
150 | 4,15625, | ||
151 | ], 300 => [ | ||
152 | '0xd5555556','0x2aaaaaaa',30, | ||
153 | '0xd555555555555556','0x2aaaaaaaaaaaaaaa',62, | ||
154 | 10,3, | ||
155 | '0x9999999a','0x1cccccccc',33, | ||
156 | '0x999999999999999a','0x1cccccccccccccccc',65, | ||
157 | 3,10, | ||
158 | '0xd0555556','0xaaaaa',20, | ||
159 | '0xd055555555555556','0xaaaaaaaaaaaaa',52, | ||
160 | 10000,3, | ||
161 | '0x9d495183','0x7ffcb923a29',43, | ||
162 | '0x9d495182a9930be1','0x7ffcb923a29c779a6b5',75, | ||
163 | 3,10000, | ||
164 | ], 512 => [ | ||
165 | '0xfa000000','0x7e000000',31, | ||
166 | '0xfa00000000000000','0x7e00000000000000',63, | ||
167 | 125,64, | ||
168 | '0x83126e98','0xfdf3b645',32, | ||
169 | '0x83126e978d4fdf3c','0xfdf3b645a1cac083',64, | ||
170 | 64,125, | ||
171 | '0xf4240000','0x1c0000',21, | ||
172 | '0xf424000000000000','0x1c000000000000',53, | ||
173 | 15625,8, | ||
174 | '0x8637bd06','0x3ffef39085f',42, | ||
175 | '0x8637bd05af6c69b6','0x3ffef39085f4a1272c9',74, | ||
176 | 8,15625, | ||
177 | ], 1000 => [ | ||
178 | '0x80000000','0x0',31, | ||
179 | '0x8000000000000000','0x0',63, | ||
180 | 1,1, | ||
181 | '0x80000000','0x0',31, | ||
182 | '0x8000000000000000','0x0',63, | ||
183 | 1,1, | ||
184 | '0xfa000000','0x0',22, | ||
185 | '0xfa00000000000000','0x0',54, | ||
186 | 1000,1, | ||
187 | '0x83126e98','0x1ff7ced9168',41, | ||
188 | '0x83126e978d4fdf3c','0x1ff7ced916872b020c4',73, | ||
189 | 1,1000, | ||
190 | ], 1024 => [ | ||
191 | '0xfa000000','0xfe000000',32, | ||
192 | '0xfa00000000000000','0xfe00000000000000',64, | ||
193 | 125,128, | ||
194 | '0x83126e98','0x7ef9db22',31, | ||
195 | '0x83126e978d4fdf3c','0x7ef9db22d0e56041',63, | ||
196 | 128,125, | ||
197 | '0xf4240000','0x3c0000',22, | ||
198 | '0xf424000000000000','0x3c000000000000',54, | ||
199 | 15625,16, | ||
200 | '0x8637bd06','0x1fff79c842f',41, | ||
201 | '0x8637bd05af6c69b6','0x1fff79c842fa5093964',73, | ||
202 | 16,15625, | ||
203 | ], 1200 => [ | ||
204 | '0xd5555556','0xd5555555',32, | ||
205 | '0xd555555555555556','0xd555555555555555',64, | ||
206 | 5,6, | ||
207 | '0x9999999a','0x66666666',31, | ||
208 | '0x999999999999999a','0x6666666666666666',63, | ||
209 | 6,5, | ||
210 | '0xd0555556','0x2aaaaa',22, | ||
211 | '0xd055555555555556','0x2aaaaaaaaaaaaa',54, | ||
212 | 2500,3, | ||
213 | '0x9d495183','0x1ffcb923a29',41, | ||
214 | '0x9d495182a9930be1','0x1ffcb923a29c779a6b5',73, | ||
215 | 3,2500, | ||
216 | ] | ||
217 | ); | ||
218 | |||
219 | $has_bigint = eval 'use Math::BigInt qw(bgcd); 1;'; | ||
220 | |||
221 | sub bint($) | ||
222 | { | ||
223 | my($x) = @_; | ||
224 | return Math::BigInt->new($x); | ||
225 | } | ||
226 | |||
227 | # | ||
228 | # Constants for division by reciprocal multiplication. | ||
229 | # (bits, numerator, denominator) | ||
230 | # | ||
231 | sub fmul($$$) | ||
232 | { | ||
233 | my ($b,$n,$d) = @_; | ||
234 | |||
235 | $n = bint($n); | ||
236 | $d = bint($d); | ||
237 | |||
238 | return scalar (($n << $b)+$d-bint(1))/$d; | ||
239 | } | ||
240 | |||
241 | sub fadj($$$) | ||
242 | { | ||
243 | my($b,$n,$d) = @_; | ||
244 | |||
245 | $n = bint($n); | ||
246 | $d = bint($d); | ||
247 | |||
248 | $d = $d/bgcd($n, $d); | ||
249 | return scalar (($d-bint(1)) << $b)/$d; | ||
250 | } | ||
251 | |||
252 | sub fmuls($$$) { | ||
253 | my($b,$n,$d) = @_; | ||
254 | my($s,$m); | ||
255 | my($thres) = bint(1) << ($b-1); | ||
256 | |||
257 | $n = bint($n); | ||
258 | $d = bint($d); | ||
259 | |||
260 | for ($s = 0; 1; $s++) { | ||
261 | $m = fmul($s,$n,$d); | ||
262 | return $s if ($m >= $thres); | ||
263 | } | ||
264 | return 0; | ||
265 | } | ||
266 | |||
267 | # Provides mul, adj, and shr factors for a specific | ||
268 | # (bit, time, hz) combination | ||
269 | sub muladj($$$) { | ||
270 | my($b, $t, $hz) = @_; | ||
271 | my $s = fmuls($b, $t, $hz); | ||
272 | my $m = fmul($s, $t, $hz); | ||
273 | my $a = fadj($s, $t, $hz); | ||
274 | return ($m->as_hex(), $a->as_hex(), $s); | ||
275 | } | ||
276 | |||
277 | # Provides numerator, denominator values | ||
278 | sub numden($$) { | ||
279 | my($n, $d) = @_; | ||
280 | my $g = bgcd($n, $d); | ||
281 | return ($n/$g, $d/$g); | ||
282 | } | ||
283 | |||
284 | # All values for a specific (time, hz) combo | ||
285 | sub conversions($$) { | ||
286 | my ($t, $hz) = @_; | ||
287 | my @val = (); | ||
288 | |||
289 | # HZ_TO_xx | ||
290 | push(@val, muladj(32, $t, $hz)); | ||
291 | push(@val, muladj(64, $t, $hz)); | ||
292 | push(@val, numden($t, $hz)); | ||
293 | |||
294 | # xx_TO_HZ | ||
295 | push(@val, muladj(32, $hz, $t)); | ||
296 | push(@val, muladj(64, $hz, $t)); | ||
297 | push(@val, numden($hz, $t)); | ||
298 | |||
299 | return @val; | ||
300 | } | ||
301 | |||
302 | sub compute_values($) { | ||
303 | my($hz) = @_; | ||
304 | my @val = (); | ||
305 | my $s, $m, $a, $g; | ||
306 | |||
307 | if (!$has_bigint) { | ||
308 | die "$0: HZ == $hz not canned and ". | ||
309 | "Math::BigInt not available\n"; | ||
310 | } | ||
311 | |||
312 | # MSEC conversions | ||
313 | push(@val, conversions(1000, $hz)); | ||
314 | |||
315 | # USEC conversions | ||
316 | push(@val, conversions(1000000, $hz)); | ||
317 | |||
318 | return @val; | ||
319 | } | ||
320 | |||
321 | sub output($@) | ||
322 | { | ||
323 | my($hz, @val) = @_; | ||
324 | my $pfx, $bit, $suf, $s, $m, $a; | ||
325 | |||
326 | print "/* Automatically generated by kernel/timeconst.pl */\n"; | ||
327 | print "/* Conversion constants for HZ == $hz */\n"; | ||
328 | print "\n"; | ||
329 | print "#ifndef KERNEL_TIMECONST_H\n"; | ||
330 | print "#define KERNEL_TIMECONST_H\n"; | ||
331 | print "\n"; | ||
332 | |||
333 | print "#include <linux/param.h>\n"; | ||
334 | |||
335 | print "\n"; | ||
336 | print "#if HZ != $hz\n"; | ||
337 | print "#error \"kernel/timeconst.h has the wrong HZ value!\"\n"; | ||
338 | print "#endif\n"; | ||
339 | print "\n"; | ||
340 | |||
341 | foreach $pfx ('HZ_TO_MSEC','MSEC_TO_HZ', | ||
342 | 'HZ_TO_USEC','USEC_TO_HZ') { | ||
343 | foreach $bit (32, 64) { | ||
344 | foreach $suf ('MUL', 'ADJ', 'SHR') { | ||
345 | printf "#define %-23s %s\n", | ||
346 | "${pfx}_$suf$bit", shift(@val); | ||
347 | } | ||
348 | } | ||
349 | foreach $suf ('NUM', 'DEN') { | ||
350 | printf "#define %-23s %s\n", | ||
351 | "${pfx}_$suf", shift(@val); | ||
352 | } | ||
353 | } | ||
354 | |||
355 | print "\n"; | ||
356 | print "#endif /* KERNEL_TIMECONST_H */\n"; | ||
357 | } | ||
358 | |||
359 | ($hz) = @ARGV; | ||
360 | |||
361 | # Use this to generate the %canned_values structure | ||
362 | if ($hz eq '--can') { | ||
363 | shift(@ARGV); | ||
364 | @hzlist = sort {$a <=> $b} (@ARGV); | ||
365 | |||
366 | print "# Precomputed values for systems without Math::BigInt\n"; | ||
367 | print "# Generated by:\n"; | ||
368 | print "# timeconst.pl --can ", join(' ', @hzlist), "\n"; | ||
369 | print "\%canned_values = (\n"; | ||
370 | my $pf = "\t"; | ||
371 | foreach $hz (@hzlist) { | ||
372 | my @values = compute_values($hz); | ||
373 | print "$pf$hz => [\n"; | ||
374 | while (scalar(@values)) { | ||
375 | my $bit; | ||
376 | foreach $bit (32, 64) { | ||
377 | my $m = shift(@values); | ||
378 | my $a = shift(@values); | ||
379 | my $s = shift(@values); | ||
380 | print "\t\t\'",$m,"\',\'",$a,"\',",$s,",\n"; | ||
381 | } | ||
382 | my $n = shift(@values); | ||
383 | my $d = shift(@values); | ||
384 | print "\t\t",$n,',',$d,",\n"; | ||
385 | } | ||
386 | print "\t]"; | ||
387 | $pf = ', '; | ||
388 | } | ||
389 | print "\n);\n"; | ||
390 | } else { | ||
391 | $hz += 0; # Force to number | ||
392 | if ($hz < 1) { | ||
393 | die "Usage: $0 HZ\n"; | ||
394 | } | ||
395 | |||
396 | @val = @{$canned_values{$hz}}; | ||
397 | if (!defined(@val)) { | ||
398 | @val = compute_values($hz); | ||
399 | } | ||
400 | output($hz, @val); | ||
401 | } | ||
402 | exit 0; | ||
diff --git a/kernel/timer.c b/kernel/timer.c index 9fbb472b8cf0..99b00a25f88b 100644 --- a/kernel/timer.c +++ b/kernel/timer.c | |||
@@ -327,7 +327,7 @@ static void timer_stats_account_timer(struct timer_list *timer) {} | |||
327 | * init_timer() must be done to a timer prior calling *any* of the | 327 | * init_timer() must be done to a timer prior calling *any* of the |
328 | * other timer functions. | 328 | * other timer functions. |
329 | */ | 329 | */ |
330 | void fastcall init_timer(struct timer_list *timer) | 330 | void init_timer(struct timer_list *timer) |
331 | { | 331 | { |
332 | timer->entry.next = NULL; | 332 | timer->entry.next = NULL; |
333 | timer->base = __raw_get_cpu_var(tvec_bases); | 333 | timer->base = __raw_get_cpu_var(tvec_bases); |
@@ -339,7 +339,7 @@ void fastcall init_timer(struct timer_list *timer) | |||
339 | } | 339 | } |
340 | EXPORT_SYMBOL(init_timer); | 340 | EXPORT_SYMBOL(init_timer); |
341 | 341 | ||
342 | void fastcall init_timer_deferrable(struct timer_list *timer) | 342 | void init_timer_deferrable(struct timer_list *timer) |
343 | { | 343 | { |
344 | init_timer(timer); | 344 | init_timer(timer); |
345 | timer_set_deferrable(timer); | 345 | timer_set_deferrable(timer); |
@@ -818,12 +818,14 @@ unsigned long next_timer_interrupt(void) | |||
818 | #ifndef CONFIG_VIRT_CPU_ACCOUNTING | 818 | #ifndef CONFIG_VIRT_CPU_ACCOUNTING |
819 | void account_process_tick(struct task_struct *p, int user_tick) | 819 | void account_process_tick(struct task_struct *p, int user_tick) |
820 | { | 820 | { |
821 | cputime_t one_jiffy = jiffies_to_cputime(1); | ||
822 | |||
821 | if (user_tick) { | 823 | if (user_tick) { |
822 | account_user_time(p, jiffies_to_cputime(1)); | 824 | account_user_time(p, one_jiffy); |
823 | account_user_time_scaled(p, jiffies_to_cputime(1)); | 825 | account_user_time_scaled(p, cputime_to_scaled(one_jiffy)); |
824 | } else { | 826 | } else { |
825 | account_system_time(p, HARDIRQ_OFFSET, jiffies_to_cputime(1)); | 827 | account_system_time(p, HARDIRQ_OFFSET, one_jiffy); |
826 | account_system_time_scaled(p, jiffies_to_cputime(1)); | 828 | account_system_time_scaled(p, cputime_to_scaled(one_jiffy)); |
827 | } | 829 | } |
828 | } | 830 | } |
829 | #endif | 831 | #endif |
@@ -977,7 +979,7 @@ asmlinkage long sys_getppid(void) | |||
977 | int pid; | 979 | int pid; |
978 | 980 | ||
979 | rcu_read_lock(); | 981 | rcu_read_lock(); |
980 | pid = task_tgid_nr_ns(current->real_parent, current->nsproxy->pid_ns); | 982 | pid = task_tgid_vnr(current->real_parent); |
981 | rcu_read_unlock(); | 983 | rcu_read_unlock(); |
982 | 984 | ||
983 | return pid; | 985 | return pid; |
@@ -1040,7 +1042,7 @@ static void process_timeout(unsigned long __data) | |||
1040 | * | 1042 | * |
1041 | * In all cases the return value is guaranteed to be non-negative. | 1043 | * In all cases the return value is guaranteed to be non-negative. |
1042 | */ | 1044 | */ |
1043 | fastcall signed long __sched schedule_timeout(signed long timeout) | 1045 | signed long __sched schedule_timeout(signed long timeout) |
1044 | { | 1046 | { |
1045 | struct timer_list timer; | 1047 | struct timer_list timer; |
1046 | unsigned long expire; | 1048 | unsigned long expire; |
diff --git a/kernel/user.c b/kernel/user.c index bc1c48d35cb3..7132022a040c 100644 --- a/kernel/user.c +++ b/kernel/user.c | |||
@@ -17,6 +17,14 @@ | |||
17 | #include <linux/module.h> | 17 | #include <linux/module.h> |
18 | #include <linux/user_namespace.h> | 18 | #include <linux/user_namespace.h> |
19 | 19 | ||
20 | struct user_namespace init_user_ns = { | ||
21 | .kref = { | ||
22 | .refcount = ATOMIC_INIT(2), | ||
23 | }, | ||
24 | .root_user = &root_user, | ||
25 | }; | ||
26 | EXPORT_SYMBOL_GPL(init_user_ns); | ||
27 | |||
20 | /* | 28 | /* |
21 | * UID task count cache, to get fast user lookup in "alloc_uid" | 29 | * UID task count cache, to get fast user lookup in "alloc_uid" |
22 | * when changing user ID's (ie setuid() and friends). | 30 | * when changing user ID's (ie setuid() and friends). |
@@ -49,7 +57,7 @@ struct user_struct root_user = { | |||
49 | .uid_keyring = &root_user_keyring, | 57 | .uid_keyring = &root_user_keyring, |
50 | .session_keyring = &root_session_keyring, | 58 | .session_keyring = &root_session_keyring, |
51 | #endif | 59 | #endif |
52 | #ifdef CONFIG_FAIR_USER_SCHED | 60 | #ifdef CONFIG_USER_SCHED |
53 | .tg = &init_task_group, | 61 | .tg = &init_task_group, |
54 | #endif | 62 | #endif |
55 | }; | 63 | }; |
@@ -82,7 +90,7 @@ static struct user_struct *uid_hash_find(uid_t uid, struct hlist_head *hashent) | |||
82 | return NULL; | 90 | return NULL; |
83 | } | 91 | } |
84 | 92 | ||
85 | #ifdef CONFIG_FAIR_USER_SCHED | 93 | #ifdef CONFIG_USER_SCHED |
86 | 94 | ||
87 | static void sched_destroy_user(struct user_struct *up) | 95 | static void sched_destroy_user(struct user_struct *up) |
88 | { | 96 | { |
@@ -105,15 +113,15 @@ static void sched_switch_user(struct task_struct *p) | |||
105 | sched_move_task(p); | 113 | sched_move_task(p); |
106 | } | 114 | } |
107 | 115 | ||
108 | #else /* CONFIG_FAIR_USER_SCHED */ | 116 | #else /* CONFIG_USER_SCHED */ |
109 | 117 | ||
110 | static void sched_destroy_user(struct user_struct *up) { } | 118 | static void sched_destroy_user(struct user_struct *up) { } |
111 | static int sched_create_user(struct user_struct *up) { return 0; } | 119 | static int sched_create_user(struct user_struct *up) { return 0; } |
112 | static void sched_switch_user(struct task_struct *p) { } | 120 | static void sched_switch_user(struct task_struct *p) { } |
113 | 121 | ||
114 | #endif /* CONFIG_FAIR_USER_SCHED */ | 122 | #endif /* CONFIG_USER_SCHED */ |
115 | 123 | ||
116 | #if defined(CONFIG_FAIR_USER_SCHED) && defined(CONFIG_SYSFS) | 124 | #if defined(CONFIG_USER_SCHED) && defined(CONFIG_SYSFS) |
117 | 125 | ||
118 | static struct kset *uids_kset; /* represents the /sys/kernel/uids/ directory */ | 126 | static struct kset *uids_kset; /* represents the /sys/kernel/uids/ directory */ |
119 | static DEFINE_MUTEX(uids_mutex); | 127 | static DEFINE_MUTEX(uids_mutex); |
@@ -129,6 +137,7 @@ static inline void uids_mutex_unlock(void) | |||
129 | } | 137 | } |
130 | 138 | ||
131 | /* uid directory attributes */ | 139 | /* uid directory attributes */ |
140 | #ifdef CONFIG_FAIR_GROUP_SCHED | ||
132 | static ssize_t cpu_shares_show(struct kobject *kobj, | 141 | static ssize_t cpu_shares_show(struct kobject *kobj, |
133 | struct kobj_attribute *attr, | 142 | struct kobj_attribute *attr, |
134 | char *buf) | 143 | char *buf) |
@@ -155,10 +164,45 @@ static ssize_t cpu_shares_store(struct kobject *kobj, | |||
155 | 164 | ||
156 | static struct kobj_attribute cpu_share_attr = | 165 | static struct kobj_attribute cpu_share_attr = |
157 | __ATTR(cpu_share, 0644, cpu_shares_show, cpu_shares_store); | 166 | __ATTR(cpu_share, 0644, cpu_shares_show, cpu_shares_store); |
167 | #endif | ||
168 | |||
169 | #ifdef CONFIG_RT_GROUP_SCHED | ||
170 | static ssize_t cpu_rt_runtime_show(struct kobject *kobj, | ||
171 | struct kobj_attribute *attr, | ||
172 | char *buf) | ||
173 | { | ||
174 | struct user_struct *up = container_of(kobj, struct user_struct, kobj); | ||
175 | |||
176 | return sprintf(buf, "%lu\n", sched_group_rt_runtime(up->tg)); | ||
177 | } | ||
178 | |||
179 | static ssize_t cpu_rt_runtime_store(struct kobject *kobj, | ||
180 | struct kobj_attribute *attr, | ||
181 | const char *buf, size_t size) | ||
182 | { | ||
183 | struct user_struct *up = container_of(kobj, struct user_struct, kobj); | ||
184 | unsigned long rt_runtime; | ||
185 | int rc; | ||
186 | |||
187 | sscanf(buf, "%lu", &rt_runtime); | ||
188 | |||
189 | rc = sched_group_set_rt_runtime(up->tg, rt_runtime); | ||
190 | |||
191 | return (rc ? rc : size); | ||
192 | } | ||
193 | |||
194 | static struct kobj_attribute cpu_rt_runtime_attr = | ||
195 | __ATTR(cpu_rt_runtime, 0644, cpu_rt_runtime_show, cpu_rt_runtime_store); | ||
196 | #endif | ||
158 | 197 | ||
159 | /* default attributes per uid directory */ | 198 | /* default attributes per uid directory */ |
160 | static struct attribute *uids_attributes[] = { | 199 | static struct attribute *uids_attributes[] = { |
200 | #ifdef CONFIG_FAIR_GROUP_SCHED | ||
161 | &cpu_share_attr.attr, | 201 | &cpu_share_attr.attr, |
202 | #endif | ||
203 | #ifdef CONFIG_RT_GROUP_SCHED | ||
204 | &cpu_rt_runtime_attr.attr, | ||
205 | #endif | ||
162 | NULL | 206 | NULL |
163 | }; | 207 | }; |
164 | 208 | ||
@@ -261,7 +305,7 @@ static inline void free_user(struct user_struct *up, unsigned long flags) | |||
261 | schedule_work(&up->work); | 305 | schedule_work(&up->work); |
262 | } | 306 | } |
263 | 307 | ||
264 | #else /* CONFIG_FAIR_USER_SCHED && CONFIG_SYSFS */ | 308 | #else /* CONFIG_USER_SCHED && CONFIG_SYSFS */ |
265 | 309 | ||
266 | int uids_sysfs_init(void) { return 0; } | 310 | int uids_sysfs_init(void) { return 0; } |
267 | static inline int uids_user_create(struct user_struct *up) { return 0; } | 311 | static inline int uids_user_create(struct user_struct *up) { return 0; } |
@@ -365,7 +409,7 @@ struct user_struct * alloc_uid(struct user_namespace *ns, uid_t uid) | |||
365 | spin_lock_irq(&uidhash_lock); | 409 | spin_lock_irq(&uidhash_lock); |
366 | up = uid_hash_find(uid, hashent); | 410 | up = uid_hash_find(uid, hashent); |
367 | if (up) { | 411 | if (up) { |
368 | /* This case is not possible when CONFIG_FAIR_USER_SCHED | 412 | /* This case is not possible when CONFIG_USER_SCHED |
369 | * is defined, since we serialize alloc_uid() using | 413 | * is defined, since we serialize alloc_uid() using |
370 | * uids_mutex. Hence no need to call | 414 | * uids_mutex. Hence no need to call |
371 | * sched_destroy_user() or remove_user_sysfs_dir(). | 415 | * sched_destroy_user() or remove_user_sysfs_dir(). |
@@ -427,6 +471,7 @@ void switch_uid(struct user_struct *new_user) | |||
427 | suid_keys(current); | 471 | suid_keys(current); |
428 | } | 472 | } |
429 | 473 | ||
474 | #ifdef CONFIG_USER_NS | ||
430 | void release_uids(struct user_namespace *ns) | 475 | void release_uids(struct user_namespace *ns) |
431 | { | 476 | { |
432 | int i; | 477 | int i; |
@@ -451,6 +496,7 @@ void release_uids(struct user_namespace *ns) | |||
451 | 496 | ||
452 | free_uid(ns->root_user); | 497 | free_uid(ns->root_user); |
453 | } | 498 | } |
499 | #endif | ||
454 | 500 | ||
455 | static int __init uid_cache_init(void) | 501 | static int __init uid_cache_init(void) |
456 | { | 502 | { |
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index 7af90fc4f0fd..4c9006275df7 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c | |||
@@ -10,17 +10,6 @@ | |||
10 | #include <linux/nsproxy.h> | 10 | #include <linux/nsproxy.h> |
11 | #include <linux/user_namespace.h> | 11 | #include <linux/user_namespace.h> |
12 | 12 | ||
13 | struct user_namespace init_user_ns = { | ||
14 | .kref = { | ||
15 | .refcount = ATOMIC_INIT(2), | ||
16 | }, | ||
17 | .root_user = &root_user, | ||
18 | }; | ||
19 | |||
20 | EXPORT_SYMBOL_GPL(init_user_ns); | ||
21 | |||
22 | #ifdef CONFIG_USER_NS | ||
23 | |||
24 | /* | 13 | /* |
25 | * Clone a new ns copying an original user ns, setting refcount to 1 | 14 | * Clone a new ns copying an original user ns, setting refcount to 1 |
26 | * @old_ns: namespace to clone | 15 | * @old_ns: namespace to clone |
@@ -84,5 +73,3 @@ void free_user_ns(struct kref *kref) | |||
84 | release_uids(ns); | 73 | release_uids(ns); |
85 | kfree(ns); | 74 | kfree(ns); |
86 | } | 75 | } |
87 | |||
88 | #endif /* CONFIG_USER_NS */ | ||
diff --git a/kernel/wait.c b/kernel/wait.c index f9876888a569..c275c56cf2d3 100644 --- a/kernel/wait.c +++ b/kernel/wait.c | |||
@@ -18,7 +18,7 @@ void init_waitqueue_head(wait_queue_head_t *q) | |||
18 | 18 | ||
19 | EXPORT_SYMBOL(init_waitqueue_head); | 19 | EXPORT_SYMBOL(init_waitqueue_head); |
20 | 20 | ||
21 | void fastcall add_wait_queue(wait_queue_head_t *q, wait_queue_t *wait) | 21 | void add_wait_queue(wait_queue_head_t *q, wait_queue_t *wait) |
22 | { | 22 | { |
23 | unsigned long flags; | 23 | unsigned long flags; |
24 | 24 | ||
@@ -29,7 +29,7 @@ void fastcall add_wait_queue(wait_queue_head_t *q, wait_queue_t *wait) | |||
29 | } | 29 | } |
30 | EXPORT_SYMBOL(add_wait_queue); | 30 | EXPORT_SYMBOL(add_wait_queue); |
31 | 31 | ||
32 | void fastcall add_wait_queue_exclusive(wait_queue_head_t *q, wait_queue_t *wait) | 32 | void add_wait_queue_exclusive(wait_queue_head_t *q, wait_queue_t *wait) |
33 | { | 33 | { |
34 | unsigned long flags; | 34 | unsigned long flags; |
35 | 35 | ||
@@ -40,7 +40,7 @@ void fastcall add_wait_queue_exclusive(wait_queue_head_t *q, wait_queue_t *wait) | |||
40 | } | 40 | } |
41 | EXPORT_SYMBOL(add_wait_queue_exclusive); | 41 | EXPORT_SYMBOL(add_wait_queue_exclusive); |
42 | 42 | ||
43 | void fastcall remove_wait_queue(wait_queue_head_t *q, wait_queue_t *wait) | 43 | void remove_wait_queue(wait_queue_head_t *q, wait_queue_t *wait) |
44 | { | 44 | { |
45 | unsigned long flags; | 45 | unsigned long flags; |
46 | 46 | ||
@@ -63,7 +63,7 @@ EXPORT_SYMBOL(remove_wait_queue); | |||
63 | * stops them from bleeding out - it would still allow subsequent | 63 | * stops them from bleeding out - it would still allow subsequent |
64 | * loads to move into the critical region). | 64 | * loads to move into the critical region). |
65 | */ | 65 | */ |
66 | void fastcall | 66 | void |
67 | prepare_to_wait(wait_queue_head_t *q, wait_queue_t *wait, int state) | 67 | prepare_to_wait(wait_queue_head_t *q, wait_queue_t *wait, int state) |
68 | { | 68 | { |
69 | unsigned long flags; | 69 | unsigned long flags; |
@@ -82,7 +82,7 @@ prepare_to_wait(wait_queue_head_t *q, wait_queue_t *wait, int state) | |||
82 | } | 82 | } |
83 | EXPORT_SYMBOL(prepare_to_wait); | 83 | EXPORT_SYMBOL(prepare_to_wait); |
84 | 84 | ||
85 | void fastcall | 85 | void |
86 | prepare_to_wait_exclusive(wait_queue_head_t *q, wait_queue_t *wait, int state) | 86 | prepare_to_wait_exclusive(wait_queue_head_t *q, wait_queue_t *wait, int state) |
87 | { | 87 | { |
88 | unsigned long flags; | 88 | unsigned long flags; |
@@ -101,7 +101,7 @@ prepare_to_wait_exclusive(wait_queue_head_t *q, wait_queue_t *wait, int state) | |||
101 | } | 101 | } |
102 | EXPORT_SYMBOL(prepare_to_wait_exclusive); | 102 | EXPORT_SYMBOL(prepare_to_wait_exclusive); |
103 | 103 | ||
104 | void fastcall finish_wait(wait_queue_head_t *q, wait_queue_t *wait) | 104 | void finish_wait(wait_queue_head_t *q, wait_queue_t *wait) |
105 | { | 105 | { |
106 | unsigned long flags; | 106 | unsigned long flags; |
107 | 107 | ||
@@ -157,7 +157,7 @@ EXPORT_SYMBOL(wake_bit_function); | |||
157 | * waiting, the actions of __wait_on_bit() and __wait_on_bit_lock() are | 157 | * waiting, the actions of __wait_on_bit() and __wait_on_bit_lock() are |
158 | * permitted return codes. Nonzero return codes halt waiting and return. | 158 | * permitted return codes. Nonzero return codes halt waiting and return. |
159 | */ | 159 | */ |
160 | int __sched fastcall | 160 | int __sched |
161 | __wait_on_bit(wait_queue_head_t *wq, struct wait_bit_queue *q, | 161 | __wait_on_bit(wait_queue_head_t *wq, struct wait_bit_queue *q, |
162 | int (*action)(void *), unsigned mode) | 162 | int (*action)(void *), unsigned mode) |
163 | { | 163 | { |
@@ -173,7 +173,7 @@ __wait_on_bit(wait_queue_head_t *wq, struct wait_bit_queue *q, | |||
173 | } | 173 | } |
174 | EXPORT_SYMBOL(__wait_on_bit); | 174 | EXPORT_SYMBOL(__wait_on_bit); |
175 | 175 | ||
176 | int __sched fastcall out_of_line_wait_on_bit(void *word, int bit, | 176 | int __sched out_of_line_wait_on_bit(void *word, int bit, |
177 | int (*action)(void *), unsigned mode) | 177 | int (*action)(void *), unsigned mode) |
178 | { | 178 | { |
179 | wait_queue_head_t *wq = bit_waitqueue(word, bit); | 179 | wait_queue_head_t *wq = bit_waitqueue(word, bit); |
@@ -183,7 +183,7 @@ int __sched fastcall out_of_line_wait_on_bit(void *word, int bit, | |||
183 | } | 183 | } |
184 | EXPORT_SYMBOL(out_of_line_wait_on_bit); | 184 | EXPORT_SYMBOL(out_of_line_wait_on_bit); |
185 | 185 | ||
186 | int __sched fastcall | 186 | int __sched |
187 | __wait_on_bit_lock(wait_queue_head_t *wq, struct wait_bit_queue *q, | 187 | __wait_on_bit_lock(wait_queue_head_t *wq, struct wait_bit_queue *q, |
188 | int (*action)(void *), unsigned mode) | 188 | int (*action)(void *), unsigned mode) |
189 | { | 189 | { |
@@ -201,7 +201,7 @@ __wait_on_bit_lock(wait_queue_head_t *wq, struct wait_bit_queue *q, | |||
201 | } | 201 | } |
202 | EXPORT_SYMBOL(__wait_on_bit_lock); | 202 | EXPORT_SYMBOL(__wait_on_bit_lock); |
203 | 203 | ||
204 | int __sched fastcall out_of_line_wait_on_bit_lock(void *word, int bit, | 204 | int __sched out_of_line_wait_on_bit_lock(void *word, int bit, |
205 | int (*action)(void *), unsigned mode) | 205 | int (*action)(void *), unsigned mode) |
206 | { | 206 | { |
207 | wait_queue_head_t *wq = bit_waitqueue(word, bit); | 207 | wait_queue_head_t *wq = bit_waitqueue(word, bit); |
@@ -211,7 +211,7 @@ int __sched fastcall out_of_line_wait_on_bit_lock(void *word, int bit, | |||
211 | } | 211 | } |
212 | EXPORT_SYMBOL(out_of_line_wait_on_bit_lock); | 212 | EXPORT_SYMBOL(out_of_line_wait_on_bit_lock); |
213 | 213 | ||
214 | void fastcall __wake_up_bit(wait_queue_head_t *wq, void *word, int bit) | 214 | void __wake_up_bit(wait_queue_head_t *wq, void *word, int bit) |
215 | { | 215 | { |
216 | struct wait_bit_key key = __WAIT_BIT_KEY_INITIALIZER(word, bit); | 216 | struct wait_bit_key key = __WAIT_BIT_KEY_INITIALIZER(word, bit); |
217 | if (waitqueue_active(wq)) | 217 | if (waitqueue_active(wq)) |
@@ -236,13 +236,13 @@ EXPORT_SYMBOL(__wake_up_bit); | |||
236 | * may need to use a less regular barrier, such fs/inode.c's smp_mb(), | 236 | * may need to use a less regular barrier, such fs/inode.c's smp_mb(), |
237 | * because spin_unlock() does not guarantee a memory barrier. | 237 | * because spin_unlock() does not guarantee a memory barrier. |
238 | */ | 238 | */ |
239 | void fastcall wake_up_bit(void *word, int bit) | 239 | void wake_up_bit(void *word, int bit) |
240 | { | 240 | { |
241 | __wake_up_bit(bit_waitqueue(word, bit), word, bit); | 241 | __wake_up_bit(bit_waitqueue(word, bit), word, bit); |
242 | } | 242 | } |
243 | EXPORT_SYMBOL(wake_up_bit); | 243 | EXPORT_SYMBOL(wake_up_bit); |
244 | 244 | ||
245 | fastcall wait_queue_head_t *bit_waitqueue(void *word, int bit) | 245 | wait_queue_head_t *bit_waitqueue(void *word, int bit) |
246 | { | 246 | { |
247 | const int shift = BITS_PER_LONG == 32 ? 5 : 6; | 247 | const int shift = BITS_PER_LONG == 32 ? 5 : 6; |
248 | const struct zone *zone = page_zone(virt_to_page(word)); | 248 | const struct zone *zone = page_zone(virt_to_page(word)); |
diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 52db48e7f6e7..ff06611655af 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c | |||
@@ -161,7 +161,7 @@ static void __queue_work(struct cpu_workqueue_struct *cwq, | |||
161 | * We queue the work to the CPU it was submitted, but there is no | 161 | * We queue the work to the CPU it was submitted, but there is no |
162 | * guarantee that it will be processed by that CPU. | 162 | * guarantee that it will be processed by that CPU. |
163 | */ | 163 | */ |
164 | int fastcall queue_work(struct workqueue_struct *wq, struct work_struct *work) | 164 | int queue_work(struct workqueue_struct *wq, struct work_struct *work) |
165 | { | 165 | { |
166 | int ret = 0; | 166 | int ret = 0; |
167 | 167 | ||
@@ -175,7 +175,7 @@ int fastcall queue_work(struct workqueue_struct *wq, struct work_struct *work) | |||
175 | } | 175 | } |
176 | EXPORT_SYMBOL_GPL(queue_work); | 176 | EXPORT_SYMBOL_GPL(queue_work); |
177 | 177 | ||
178 | void delayed_work_timer_fn(unsigned long __data) | 178 | static void delayed_work_timer_fn(unsigned long __data) |
179 | { | 179 | { |
180 | struct delayed_work *dwork = (struct delayed_work *)__data; | 180 | struct delayed_work *dwork = (struct delayed_work *)__data; |
181 | struct cpu_workqueue_struct *cwq = get_wq_data(&dwork->work); | 181 | struct cpu_workqueue_struct *cwq = get_wq_data(&dwork->work); |
@@ -192,7 +192,7 @@ void delayed_work_timer_fn(unsigned long __data) | |||
192 | * | 192 | * |
193 | * Returns 0 if @work was already on a queue, non-zero otherwise. | 193 | * Returns 0 if @work was already on a queue, non-zero otherwise. |
194 | */ | 194 | */ |
195 | int fastcall queue_delayed_work(struct workqueue_struct *wq, | 195 | int queue_delayed_work(struct workqueue_struct *wq, |
196 | struct delayed_work *dwork, unsigned long delay) | 196 | struct delayed_work *dwork, unsigned long delay) |
197 | { | 197 | { |
198 | timer_stats_timer_set_start_info(&dwork->timer); | 198 | timer_stats_timer_set_start_info(&dwork->timer); |
@@ -388,7 +388,7 @@ static int flush_cpu_workqueue(struct cpu_workqueue_struct *cwq) | |||
388 | * This function used to run the workqueues itself. Now we just wait for the | 388 | * This function used to run the workqueues itself. Now we just wait for the |
389 | * helper threads to do it. | 389 | * helper threads to do it. |
390 | */ | 390 | */ |
391 | void fastcall flush_workqueue(struct workqueue_struct *wq) | 391 | void flush_workqueue(struct workqueue_struct *wq) |
392 | { | 392 | { |
393 | const cpumask_t *cpu_map = wq_cpu_map(wq); | 393 | const cpumask_t *cpu_map = wq_cpu_map(wq); |
394 | int cpu; | 394 | int cpu; |
@@ -546,7 +546,7 @@ static struct workqueue_struct *keventd_wq __read_mostly; | |||
546 | * | 546 | * |
547 | * This puts a job in the kernel-global workqueue. | 547 | * This puts a job in the kernel-global workqueue. |
548 | */ | 548 | */ |
549 | int fastcall schedule_work(struct work_struct *work) | 549 | int schedule_work(struct work_struct *work) |
550 | { | 550 | { |
551 | return queue_work(keventd_wq, work); | 551 | return queue_work(keventd_wq, work); |
552 | } | 552 | } |
@@ -560,7 +560,7 @@ EXPORT_SYMBOL(schedule_work); | |||
560 | * After waiting for a given time this puts a job in the kernel-global | 560 | * After waiting for a given time this puts a job in the kernel-global |
561 | * workqueue. | 561 | * workqueue. |
562 | */ | 562 | */ |
563 | int fastcall schedule_delayed_work(struct delayed_work *dwork, | 563 | int schedule_delayed_work(struct delayed_work *dwork, |
564 | unsigned long delay) | 564 | unsigned long delay) |
565 | { | 565 | { |
566 | timer_stats_timer_set_start_info(&dwork->timer); | 566 | timer_stats_timer_set_start_info(&dwork->timer); |