diff options
Diffstat (limited to 'kernel')
70 files changed, 2218 insertions, 2580 deletions
diff --git a/kernel/Makefile b/kernel/Makefile index 6c072b6da239..bbde5f1a4486 100644 --- a/kernel/Makefile +++ b/kernel/Makefile | |||
@@ -7,7 +7,7 @@ obj-y = fork.o exec_domain.o panic.o printk.o \ | |||
7 | sysctl.o sysctl_binary.o capability.o ptrace.o timer.o user.o \ | 7 | sysctl.o sysctl_binary.o capability.o ptrace.o timer.o user.o \ |
8 | signal.o sys.o kmod.o workqueue.o pid.o task_work.o \ | 8 | signal.o sys.o kmod.o workqueue.o pid.o task_work.o \ |
9 | rcupdate.o extable.o params.o posix-timers.o \ | 9 | rcupdate.o extable.o params.o posix-timers.o \ |
10 | kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \ | 10 | kthread.o wait.o sys_ni.o posix-cpu-timers.o mutex.o \ |
11 | hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \ | 11 | hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \ |
12 | notifier.o ksysfs.o cred.o \ | 12 | notifier.o ksysfs.o cred.o \ |
13 | async.o range.o groups.o lglock.o smpboot.o | 13 | async.o range.o groups.o lglock.o smpboot.o |
@@ -25,9 +25,7 @@ endif | |||
25 | obj-y += sched/ | 25 | obj-y += sched/ |
26 | obj-y += power/ | 26 | obj-y += power/ |
27 | 27 | ||
28 | ifeq ($(CONFIG_CHECKPOINT_RESTORE),y) | 28 | obj-$(CONFIG_CHECKPOINT_RESTORE) += kcmp.o |
29 | obj-$(CONFIG_X86) += kcmp.o | ||
30 | endif | ||
31 | obj-$(CONFIG_FREEZER) += freezer.o | 29 | obj-$(CONFIG_FREEZER) += freezer.o |
32 | obj-$(CONFIG_PROFILING) += profile.o | 30 | obj-$(CONFIG_PROFILING) += profile.o |
33 | obj-$(CONFIG_STACKTRACE) += stacktrace.o | 31 | obj-$(CONFIG_STACKTRACE) += stacktrace.o |
@@ -127,11 +125,19 @@ $(obj)/config_data.h: $(obj)/config_data.gz FORCE | |||
127 | 125 | ||
128 | $(obj)/time.o: $(obj)/timeconst.h | 126 | $(obj)/time.o: $(obj)/timeconst.h |
129 | 127 | ||
130 | quiet_cmd_timeconst = TIMEC $@ | 128 | quiet_cmd_hzfile = HZFILE $@ |
131 | cmd_timeconst = $(PERL) $< $(CONFIG_HZ) > $@ | 129 | cmd_hzfile = echo "hz=$(CONFIG_HZ)" > $@ |
130 | |||
131 | targets += hz.bc | ||
132 | $(obj)/hz.bc: $(objtree)/include/config/hz.h FORCE | ||
133 | $(call if_changed,hzfile) | ||
134 | |||
135 | quiet_cmd_bc = BC $@ | ||
136 | cmd_bc = bc -q $(filter-out FORCE,$^) > $@ | ||
137 | |||
132 | targets += timeconst.h | 138 | targets += timeconst.h |
133 | $(obj)/timeconst.h: $(src)/timeconst.pl FORCE | 139 | $(obj)/timeconst.h: $(obj)/hz.bc $(src)/timeconst.bc FORCE |
134 | $(call if_changed,timeconst) | 140 | $(call if_changed,bc) |
135 | 141 | ||
136 | ifeq ($(CONFIG_MODULE_SIG),y) | 142 | ifeq ($(CONFIG_MODULE_SIG),y) |
137 | # | 143 | # |
@@ -153,23 +159,7 @@ kernel/modsign_certificate.o: signing_key.x509 extra_certificates | |||
153 | # fail and that the kernel may be used afterwards. | 159 | # fail and that the kernel may be used afterwards. |
154 | # | 160 | # |
155 | ############################################################################### | 161 | ############################################################################### |
156 | sign_key_with_hash := | 162 | ifndef CONFIG_MODULE_SIG_HASH |
157 | ifeq ($(CONFIG_MODULE_SIG_SHA1),y) | ||
158 | sign_key_with_hash := -sha1 | ||
159 | endif | ||
160 | ifeq ($(CONFIG_MODULE_SIG_SHA224),y) | ||
161 | sign_key_with_hash := -sha224 | ||
162 | endif | ||
163 | ifeq ($(CONFIG_MODULE_SIG_SHA256),y) | ||
164 | sign_key_with_hash := -sha256 | ||
165 | endif | ||
166 | ifeq ($(CONFIG_MODULE_SIG_SHA384),y) | ||
167 | sign_key_with_hash := -sha384 | ||
168 | endif | ||
169 | ifeq ($(CONFIG_MODULE_SIG_SHA512),y) | ||
170 | sign_key_with_hash := -sha512 | ||
171 | endif | ||
172 | ifeq ($(sign_key_with_hash),) | ||
173 | $(error Could not determine digest type to use from kernel config) | 163 | $(error Could not determine digest type to use from kernel config) |
174 | endif | 164 | endif |
175 | 165 | ||
@@ -182,8 +172,8 @@ signing_key.priv signing_key.x509: x509.genkey | |||
182 | @echo "### needs to be run as root, and uses a hardware random" | 172 | @echo "### needs to be run as root, and uses a hardware random" |
183 | @echo "### number generator if one is available." | 173 | @echo "### number generator if one is available." |
184 | @echo "###" | 174 | @echo "###" |
185 | openssl req -new -nodes -utf8 $(sign_key_with_hash) -days 36500 -batch \ | 175 | openssl req -new -nodes -utf8 -$(CONFIG_MODULE_SIG_HASH) -days 36500 \ |
186 | -x509 -config x509.genkey \ | 176 | -batch -x509 -config x509.genkey \ |
187 | -outform DER -out signing_key.x509 \ | 177 | -outform DER -out signing_key.x509 \ |
188 | -keyout signing_key.priv | 178 | -keyout signing_key.priv |
189 | @echo "###" | 179 | @echo "###" |
diff --git a/kernel/acct.c b/kernel/acct.c index e8b1627ab9c7..b9bd7f098ee5 100644 --- a/kernel/acct.c +++ b/kernel/acct.c | |||
@@ -205,7 +205,7 @@ static int acct_on(struct filename *pathname) | |||
205 | if (IS_ERR(file)) | 205 | if (IS_ERR(file)) |
206 | return PTR_ERR(file); | 206 | return PTR_ERR(file); |
207 | 207 | ||
208 | if (!S_ISREG(file->f_path.dentry->d_inode->i_mode)) { | 208 | if (!S_ISREG(file_inode(file)->i_mode)) { |
209 | filp_close(file, NULL); | 209 | filp_close(file, NULL); |
210 | return -EACCES; | 210 | return -EACCES; |
211 | } | 211 | } |
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 4855892798fd..a32f9432666c 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c | |||
@@ -52,7 +52,7 @@ | |||
52 | #include <linux/module.h> | 52 | #include <linux/module.h> |
53 | #include <linux/delayacct.h> | 53 | #include <linux/delayacct.h> |
54 | #include <linux/cgroupstats.h> | 54 | #include <linux/cgroupstats.h> |
55 | #include <linux/hash.h> | 55 | #include <linux/hashtable.h> |
56 | #include <linux/namei.h> | 56 | #include <linux/namei.h> |
57 | #include <linux/pid_namespace.h> | 57 | #include <linux/pid_namespace.h> |
58 | #include <linux/idr.h> | 58 | #include <linux/idr.h> |
@@ -376,22 +376,18 @@ static int css_set_count; | |||
376 | * account cgroups in empty hierarchies. | 376 | * account cgroups in empty hierarchies. |
377 | */ | 377 | */ |
378 | #define CSS_SET_HASH_BITS 7 | 378 | #define CSS_SET_HASH_BITS 7 |
379 | #define CSS_SET_TABLE_SIZE (1 << CSS_SET_HASH_BITS) | 379 | static DEFINE_HASHTABLE(css_set_table, CSS_SET_HASH_BITS); |
380 | static struct hlist_head css_set_table[CSS_SET_TABLE_SIZE]; | ||
381 | 380 | ||
382 | static struct hlist_head *css_set_hash(struct cgroup_subsys_state *css[]) | 381 | static unsigned long css_set_hash(struct cgroup_subsys_state *css[]) |
383 | { | 382 | { |
384 | int i; | 383 | int i; |
385 | int index; | 384 | unsigned long key = 0UL; |
386 | unsigned long tmp = 0UL; | ||
387 | 385 | ||
388 | for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) | 386 | for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) |
389 | tmp += (unsigned long)css[i]; | 387 | key += (unsigned long)css[i]; |
390 | tmp = (tmp >> 16) ^ tmp; | 388 | key = (key >> 16) ^ key; |
391 | 389 | ||
392 | index = hash_long(tmp, CSS_SET_HASH_BITS); | 390 | return key; |
393 | |||
394 | return &css_set_table[index]; | ||
395 | } | 391 | } |
396 | 392 | ||
397 | /* We don't maintain the lists running through each css_set to its | 393 | /* We don't maintain the lists running through each css_set to its |
@@ -418,7 +414,7 @@ static void __put_css_set(struct css_set *cg, int taskexit) | |||
418 | } | 414 | } |
419 | 415 | ||
420 | /* This css_set is dead. unlink it and release cgroup refcounts */ | 416 | /* This css_set is dead. unlink it and release cgroup refcounts */ |
421 | hlist_del(&cg->hlist); | 417 | hash_del(&cg->hlist); |
422 | css_set_count--; | 418 | css_set_count--; |
423 | 419 | ||
424 | list_for_each_entry_safe(link, saved_link, &cg->cg_links, | 420 | list_for_each_entry_safe(link, saved_link, &cg->cg_links, |
@@ -426,12 +422,20 @@ static void __put_css_set(struct css_set *cg, int taskexit) | |||
426 | struct cgroup *cgrp = link->cgrp; | 422 | struct cgroup *cgrp = link->cgrp; |
427 | list_del(&link->cg_link_list); | 423 | list_del(&link->cg_link_list); |
428 | list_del(&link->cgrp_link_list); | 424 | list_del(&link->cgrp_link_list); |
425 | |||
426 | /* | ||
427 | * We may not be holding cgroup_mutex, and if cgrp->count is | ||
428 | * dropped to 0 the cgroup can be destroyed at any time, hence | ||
429 | * rcu_read_lock is used to keep it alive. | ||
430 | */ | ||
431 | rcu_read_lock(); | ||
429 | if (atomic_dec_and_test(&cgrp->count) && | 432 | if (atomic_dec_and_test(&cgrp->count) && |
430 | notify_on_release(cgrp)) { | 433 | notify_on_release(cgrp)) { |
431 | if (taskexit) | 434 | if (taskexit) |
432 | set_bit(CGRP_RELEASABLE, &cgrp->flags); | 435 | set_bit(CGRP_RELEASABLE, &cgrp->flags); |
433 | check_for_release(cgrp); | 436 | check_for_release(cgrp); |
434 | } | 437 | } |
438 | rcu_read_unlock(); | ||
435 | 439 | ||
436 | kfree(link); | 440 | kfree(link); |
437 | } | 441 | } |
@@ -550,9 +554,8 @@ static struct css_set *find_existing_css_set( | |||
550 | { | 554 | { |
551 | int i; | 555 | int i; |
552 | struct cgroupfs_root *root = cgrp->root; | 556 | struct cgroupfs_root *root = cgrp->root; |
553 | struct hlist_head *hhead; | ||
554 | struct hlist_node *node; | ||
555 | struct css_set *cg; | 557 | struct css_set *cg; |
558 | unsigned long key; | ||
556 | 559 | ||
557 | /* | 560 | /* |
558 | * Build the set of subsystem state objects that we want to see in the | 561 | * Build the set of subsystem state objects that we want to see in the |
@@ -572,8 +575,8 @@ static struct css_set *find_existing_css_set( | |||
572 | } | 575 | } |
573 | } | 576 | } |
574 | 577 | ||
575 | hhead = css_set_hash(template); | 578 | key = css_set_hash(template); |
576 | hlist_for_each_entry(cg, node, hhead, hlist) { | 579 | hash_for_each_possible(css_set_table, cg, hlist, key) { |
577 | if (!compare_css_sets(cg, oldcg, cgrp, template)) | 580 | if (!compare_css_sets(cg, oldcg, cgrp, template)) |
578 | continue; | 581 | continue; |
579 | 582 | ||
@@ -657,8 +660,8 @@ static struct css_set *find_css_set( | |||
657 | 660 | ||
658 | struct list_head tmp_cg_links; | 661 | struct list_head tmp_cg_links; |
659 | 662 | ||
660 | struct hlist_head *hhead; | ||
661 | struct cg_cgroup_link *link; | 663 | struct cg_cgroup_link *link; |
664 | unsigned long key; | ||
662 | 665 | ||
663 | /* First see if we already have a cgroup group that matches | 666 | /* First see if we already have a cgroup group that matches |
664 | * the desired set */ | 667 | * the desired set */ |
@@ -704,8 +707,8 @@ static struct css_set *find_css_set( | |||
704 | css_set_count++; | 707 | css_set_count++; |
705 | 708 | ||
706 | /* Add this cgroup group to the hash table */ | 709 | /* Add this cgroup group to the hash table */ |
707 | hhead = css_set_hash(res->subsys); | 710 | key = css_set_hash(res->subsys); |
708 | hlist_add_head(&res->hlist, hhead); | 711 | hash_add(css_set_table, &res->hlist, key); |
709 | 712 | ||
710 | write_unlock(&css_set_lock); | 713 | write_unlock(&css_set_lock); |
711 | 714 | ||
@@ -856,47 +859,54 @@ static struct inode *cgroup_new_inode(umode_t mode, struct super_block *sb) | |||
856 | return inode; | 859 | return inode; |
857 | } | 860 | } |
858 | 861 | ||
859 | static void cgroup_diput(struct dentry *dentry, struct inode *inode) | 862 | static void cgroup_free_fn(struct work_struct *work) |
860 | { | 863 | { |
861 | /* is dentry a directory ? if so, kfree() associated cgroup */ | 864 | struct cgroup *cgrp = container_of(work, struct cgroup, free_work); |
862 | if (S_ISDIR(inode->i_mode)) { | 865 | struct cgroup_subsys *ss; |
863 | struct cgroup *cgrp = dentry->d_fsdata; | ||
864 | struct cgroup_subsys *ss; | ||
865 | BUG_ON(!(cgroup_is_removed(cgrp))); | ||
866 | /* It's possible for external users to be holding css | ||
867 | * reference counts on a cgroup; css_put() needs to | ||
868 | * be able to access the cgroup after decrementing | ||
869 | * the reference count in order to know if it needs to | ||
870 | * queue the cgroup to be handled by the release | ||
871 | * agent */ | ||
872 | synchronize_rcu(); | ||
873 | 866 | ||
874 | mutex_lock(&cgroup_mutex); | 867 | mutex_lock(&cgroup_mutex); |
875 | /* | 868 | /* |
876 | * Release the subsystem state objects. | 869 | * Release the subsystem state objects. |
877 | */ | 870 | */ |
878 | for_each_subsys(cgrp->root, ss) | 871 | for_each_subsys(cgrp->root, ss) |
879 | ss->css_free(cgrp); | 872 | ss->css_free(cgrp); |
880 | 873 | ||
881 | cgrp->root->number_of_cgroups--; | 874 | cgrp->root->number_of_cgroups--; |
882 | mutex_unlock(&cgroup_mutex); | 875 | mutex_unlock(&cgroup_mutex); |
883 | 876 | ||
884 | /* | 877 | /* |
885 | * Drop the active superblock reference that we took when we | 878 | * Drop the active superblock reference that we took when we |
886 | * created the cgroup | 879 | * created the cgroup |
887 | */ | 880 | */ |
888 | deactivate_super(cgrp->root->sb); | 881 | deactivate_super(cgrp->root->sb); |
889 | 882 | ||
890 | /* | 883 | /* |
891 | * if we're getting rid of the cgroup, refcount should ensure | 884 | * if we're getting rid of the cgroup, refcount should ensure |
892 | * that there are no pidlists left. | 885 | * that there are no pidlists left. |
893 | */ | 886 | */ |
894 | BUG_ON(!list_empty(&cgrp->pidlists)); | 887 | BUG_ON(!list_empty(&cgrp->pidlists)); |
888 | |||
889 | simple_xattrs_free(&cgrp->xattrs); | ||
890 | |||
891 | ida_simple_remove(&cgrp->root->cgroup_ida, cgrp->id); | ||
892 | kfree(cgrp); | ||
893 | } | ||
895 | 894 | ||
896 | simple_xattrs_free(&cgrp->xattrs); | 895 | static void cgroup_free_rcu(struct rcu_head *head) |
896 | { | ||
897 | struct cgroup *cgrp = container_of(head, struct cgroup, rcu_head); | ||
898 | |||
899 | schedule_work(&cgrp->free_work); | ||
900 | } | ||
901 | |||
902 | static void cgroup_diput(struct dentry *dentry, struct inode *inode) | ||
903 | { | ||
904 | /* is dentry a directory ? if so, kfree() associated cgroup */ | ||
905 | if (S_ISDIR(inode->i_mode)) { | ||
906 | struct cgroup *cgrp = dentry->d_fsdata; | ||
897 | 907 | ||
898 | ida_simple_remove(&cgrp->root->cgroup_ida, cgrp->id); | 908 | BUG_ON(!(cgroup_is_removed(cgrp))); |
899 | kfree_rcu(cgrp, rcu_head); | 909 | call_rcu(&cgrp->rcu_head, cgroup_free_rcu); |
900 | } else { | 910 | } else { |
901 | struct cfent *cfe = __d_cfe(dentry); | 911 | struct cfent *cfe = __d_cfe(dentry); |
902 | struct cgroup *cgrp = dentry->d_parent->d_fsdata; | 912 | struct cgroup *cgrp = dentry->d_parent->d_fsdata; |
@@ -925,13 +935,17 @@ static void remove_dir(struct dentry *d) | |||
925 | dput(parent); | 935 | dput(parent); |
926 | } | 936 | } |
927 | 937 | ||
928 | static int cgroup_rm_file(struct cgroup *cgrp, const struct cftype *cft) | 938 | static void cgroup_rm_file(struct cgroup *cgrp, const struct cftype *cft) |
929 | { | 939 | { |
930 | struct cfent *cfe; | 940 | struct cfent *cfe; |
931 | 941 | ||
932 | lockdep_assert_held(&cgrp->dentry->d_inode->i_mutex); | 942 | lockdep_assert_held(&cgrp->dentry->d_inode->i_mutex); |
933 | lockdep_assert_held(&cgroup_mutex); | 943 | lockdep_assert_held(&cgroup_mutex); |
934 | 944 | ||
945 | /* | ||
946 | * If we're doing cleanup due to failure of cgroup_create(), | ||
947 | * the corresponding @cfe may not exist. | ||
948 | */ | ||
935 | list_for_each_entry(cfe, &cgrp->files, node) { | 949 | list_for_each_entry(cfe, &cgrp->files, node) { |
936 | struct dentry *d = cfe->dentry; | 950 | struct dentry *d = cfe->dentry; |
937 | 951 | ||
@@ -944,9 +958,8 @@ static int cgroup_rm_file(struct cgroup *cgrp, const struct cftype *cft) | |||
944 | list_del_init(&cfe->node); | 958 | list_del_init(&cfe->node); |
945 | dput(d); | 959 | dput(d); |
946 | 960 | ||
947 | return 0; | 961 | break; |
948 | } | 962 | } |
949 | return -ENOENT; | ||
950 | } | 963 | } |
951 | 964 | ||
952 | /** | 965 | /** |
@@ -1083,7 +1096,6 @@ static int rebind_subsystems(struct cgroupfs_root *root, | |||
1083 | } | 1096 | } |
1084 | } | 1097 | } |
1085 | root->subsys_mask = root->actual_subsys_mask = final_subsys_mask; | 1098 | root->subsys_mask = root->actual_subsys_mask = final_subsys_mask; |
1086 | synchronize_rcu(); | ||
1087 | 1099 | ||
1088 | return 0; | 1100 | return 0; |
1089 | } | 1101 | } |
@@ -1393,6 +1405,7 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp) | |||
1393 | INIT_LIST_HEAD(&cgrp->allcg_node); | 1405 | INIT_LIST_HEAD(&cgrp->allcg_node); |
1394 | INIT_LIST_HEAD(&cgrp->release_list); | 1406 | INIT_LIST_HEAD(&cgrp->release_list); |
1395 | INIT_LIST_HEAD(&cgrp->pidlists); | 1407 | INIT_LIST_HEAD(&cgrp->pidlists); |
1408 | INIT_WORK(&cgrp->free_work, cgroup_free_fn); | ||
1396 | mutex_init(&cgrp->pidlist_mutex); | 1409 | mutex_init(&cgrp->pidlist_mutex); |
1397 | INIT_LIST_HEAD(&cgrp->event_list); | 1410 | INIT_LIST_HEAD(&cgrp->event_list); |
1398 | spin_lock_init(&cgrp->event_list_lock); | 1411 | spin_lock_init(&cgrp->event_list_lock); |
@@ -1597,6 +1610,7 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, | |||
1597 | struct cgroupfs_root *existing_root; | 1610 | struct cgroupfs_root *existing_root; |
1598 | const struct cred *cred; | 1611 | const struct cred *cred; |
1599 | int i; | 1612 | int i; |
1613 | struct css_set *cg; | ||
1600 | 1614 | ||
1601 | BUG_ON(sb->s_root != NULL); | 1615 | BUG_ON(sb->s_root != NULL); |
1602 | 1616 | ||
@@ -1650,14 +1664,8 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, | |||
1650 | /* Link the top cgroup in this hierarchy into all | 1664 | /* Link the top cgroup in this hierarchy into all |
1651 | * the css_set objects */ | 1665 | * the css_set objects */ |
1652 | write_lock(&css_set_lock); | 1666 | write_lock(&css_set_lock); |
1653 | for (i = 0; i < CSS_SET_TABLE_SIZE; i++) { | 1667 | hash_for_each(css_set_table, i, cg, hlist) |
1654 | struct hlist_head *hhead = &css_set_table[i]; | 1668 | link_css_set(&tmp_cg_links, cg, root_cgrp); |
1655 | struct hlist_node *node; | ||
1656 | struct css_set *cg; | ||
1657 | |||
1658 | hlist_for_each_entry(cg, node, hhead, hlist) | ||
1659 | link_css_set(&tmp_cg_links, cg, root_cgrp); | ||
1660 | } | ||
1661 | write_unlock(&css_set_lock); | 1669 | write_unlock(&css_set_lock); |
1662 | 1670 | ||
1663 | free_cg_links(&tmp_cg_links); | 1671 | free_cg_links(&tmp_cg_links); |
@@ -1773,7 +1781,7 @@ int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen) | |||
1773 | rcu_lockdep_assert(rcu_read_lock_held() || cgroup_lock_is_held(), | 1781 | rcu_lockdep_assert(rcu_read_lock_held() || cgroup_lock_is_held(), |
1774 | "cgroup_path() called without proper locking"); | 1782 | "cgroup_path() called without proper locking"); |
1775 | 1783 | ||
1776 | if (!dentry || cgrp == dummytop) { | 1784 | if (cgrp == dummytop) { |
1777 | /* | 1785 | /* |
1778 | * Inactive subsystems have no dentry for their root | 1786 | * Inactive subsystems have no dentry for their root |
1779 | * cgroup | 1787 | * cgroup |
@@ -1982,7 +1990,6 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk) | |||
1982 | ss->attach(cgrp, &tset); | 1990 | ss->attach(cgrp, &tset); |
1983 | } | 1991 | } |
1984 | 1992 | ||
1985 | synchronize_rcu(); | ||
1986 | out: | 1993 | out: |
1987 | if (retval) { | 1994 | if (retval) { |
1988 | for_each_subsys(root, ss) { | 1995 | for_each_subsys(root, ss) { |
@@ -2151,7 +2158,6 @@ static int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader) | |||
2151 | /* | 2158 | /* |
2152 | * step 5: success! and cleanup | 2159 | * step 5: success! and cleanup |
2153 | */ | 2160 | */ |
2154 | synchronize_rcu(); | ||
2155 | retval = 0; | 2161 | retval = 0; |
2156 | out_put_css_set_refs: | 2162 | out_put_css_set_refs: |
2157 | if (retval) { | 2163 | if (retval) { |
@@ -2637,7 +2643,7 @@ static struct dentry *cgroup_lookup(struct inode *dir, struct dentry *dentry, un | |||
2637 | */ | 2643 | */ |
2638 | static inline struct cftype *__file_cft(struct file *file) | 2644 | static inline struct cftype *__file_cft(struct file *file) |
2639 | { | 2645 | { |
2640 | if (file->f_dentry->d_inode->i_fop != &cgroup_file_operations) | 2646 | if (file_inode(file)->i_fop != &cgroup_file_operations) |
2641 | return ERR_PTR(-EINVAL); | 2647 | return ERR_PTR(-EINVAL); |
2642 | return __d_cft(file->f_dentry); | 2648 | return __d_cft(file->f_dentry); |
2643 | } | 2649 | } |
@@ -2769,14 +2775,14 @@ static int cgroup_addrm_files(struct cgroup *cgrp, struct cgroup_subsys *subsys, | |||
2769 | if ((cft->flags & CFTYPE_ONLY_ON_ROOT) && cgrp->parent) | 2775 | if ((cft->flags & CFTYPE_ONLY_ON_ROOT) && cgrp->parent) |
2770 | continue; | 2776 | continue; |
2771 | 2777 | ||
2772 | if (is_add) | 2778 | if (is_add) { |
2773 | err = cgroup_add_file(cgrp, subsys, cft); | 2779 | err = cgroup_add_file(cgrp, subsys, cft); |
2774 | else | 2780 | if (err) |
2775 | err = cgroup_rm_file(cgrp, cft); | 2781 | pr_warn("cgroup_addrm_files: failed to add %s, err=%d\n", |
2776 | if (err) { | 2782 | cft->name, err); |
2777 | pr_warning("cgroup_addrm_files: failed to %s %s, err=%d\n", | ||
2778 | is_add ? "add" : "remove", cft->name, err); | ||
2779 | ret = err; | 2783 | ret = err; |
2784 | } else { | ||
2785 | cgroup_rm_file(cgrp, cft); | ||
2780 | } | 2786 | } |
2781 | } | 2787 | } |
2782 | return ret; | 2788 | return ret; |
@@ -3017,6 +3023,32 @@ struct cgroup *cgroup_next_descendant_pre(struct cgroup *pos, | |||
3017 | } | 3023 | } |
3018 | EXPORT_SYMBOL_GPL(cgroup_next_descendant_pre); | 3024 | EXPORT_SYMBOL_GPL(cgroup_next_descendant_pre); |
3019 | 3025 | ||
3026 | /** | ||
3027 | * cgroup_rightmost_descendant - return the rightmost descendant of a cgroup | ||
3028 | * @pos: cgroup of interest | ||
3029 | * | ||
3030 | * Return the rightmost descendant of @pos. If there's no descendant, | ||
3031 | * @pos is returned. This can be used during pre-order traversal to skip | ||
3032 | * subtree of @pos. | ||
3033 | */ | ||
3034 | struct cgroup *cgroup_rightmost_descendant(struct cgroup *pos) | ||
3035 | { | ||
3036 | struct cgroup *last, *tmp; | ||
3037 | |||
3038 | WARN_ON_ONCE(!rcu_read_lock_held()); | ||
3039 | |||
3040 | do { | ||
3041 | last = pos; | ||
3042 | /* ->prev isn't RCU safe, walk ->next till the end */ | ||
3043 | pos = NULL; | ||
3044 | list_for_each_entry_rcu(tmp, &last->children, sibling) | ||
3045 | pos = tmp; | ||
3046 | } while (pos); | ||
3047 | |||
3048 | return last; | ||
3049 | } | ||
3050 | EXPORT_SYMBOL_GPL(cgroup_rightmost_descendant); | ||
3051 | |||
3020 | static struct cgroup *cgroup_leftmost_descendant(struct cgroup *pos) | 3052 | static struct cgroup *cgroup_leftmost_descendant(struct cgroup *pos) |
3021 | { | 3053 | { |
3022 | struct cgroup *last; | 3054 | struct cgroup *last; |
@@ -3752,8 +3784,13 @@ static void cgroup_event_remove(struct work_struct *work) | |||
3752 | remove); | 3784 | remove); |
3753 | struct cgroup *cgrp = event->cgrp; | 3785 | struct cgroup *cgrp = event->cgrp; |
3754 | 3786 | ||
3787 | remove_wait_queue(event->wqh, &event->wait); | ||
3788 | |||
3755 | event->cft->unregister_event(cgrp, event->cft, event->eventfd); | 3789 | event->cft->unregister_event(cgrp, event->cft, event->eventfd); |
3756 | 3790 | ||
3791 | /* Notify userspace the event is going away. */ | ||
3792 | eventfd_signal(event->eventfd, 1); | ||
3793 | |||
3757 | eventfd_ctx_put(event->eventfd); | 3794 | eventfd_ctx_put(event->eventfd); |
3758 | kfree(event); | 3795 | kfree(event); |
3759 | dput(cgrp->dentry); | 3796 | dput(cgrp->dentry); |
@@ -3773,15 +3810,25 @@ static int cgroup_event_wake(wait_queue_t *wait, unsigned mode, | |||
3773 | unsigned long flags = (unsigned long)key; | 3810 | unsigned long flags = (unsigned long)key; |
3774 | 3811 | ||
3775 | if (flags & POLLHUP) { | 3812 | if (flags & POLLHUP) { |
3776 | __remove_wait_queue(event->wqh, &event->wait); | ||
3777 | spin_lock(&cgrp->event_list_lock); | ||
3778 | list_del_init(&event->list); | ||
3779 | spin_unlock(&cgrp->event_list_lock); | ||
3780 | /* | 3813 | /* |
3781 | * We are in atomic context, but cgroup_event_remove() may | 3814 | * If the event has been detached at cgroup removal, we |
3782 | * sleep, so we have to call it in workqueue. | 3815 | * can simply return knowing the other side will cleanup |
3816 | * for us. | ||
3817 | * | ||
3818 | * We can't race against event freeing since the other | ||
3819 | * side will require wqh->lock via remove_wait_queue(), | ||
3820 | * which we hold. | ||
3783 | */ | 3821 | */ |
3784 | schedule_work(&event->remove); | 3822 | spin_lock(&cgrp->event_list_lock); |
3823 | if (!list_empty(&event->list)) { | ||
3824 | list_del_init(&event->list); | ||
3825 | /* | ||
3826 | * We are in atomic context, but cgroup_event_remove() | ||
3827 | * may sleep, so we have to call it in workqueue. | ||
3828 | */ | ||
3829 | schedule_work(&event->remove); | ||
3830 | } | ||
3831 | spin_unlock(&cgrp->event_list_lock); | ||
3785 | } | 3832 | } |
3786 | 3833 | ||
3787 | return 0; | 3834 | return 0; |
@@ -3807,6 +3854,7 @@ static int cgroup_write_event_control(struct cgroup *cgrp, struct cftype *cft, | |||
3807 | const char *buffer) | 3854 | const char *buffer) |
3808 | { | 3855 | { |
3809 | struct cgroup_event *event = NULL; | 3856 | struct cgroup_event *event = NULL; |
3857 | struct cgroup *cgrp_cfile; | ||
3810 | unsigned int efd, cfd; | 3858 | unsigned int efd, cfd; |
3811 | struct file *efile = NULL; | 3859 | struct file *efile = NULL; |
3812 | struct file *cfile = NULL; | 3860 | struct file *cfile = NULL; |
@@ -3852,7 +3900,7 @@ static int cgroup_write_event_control(struct cgroup *cgrp, struct cftype *cft, | |||
3852 | 3900 | ||
3853 | /* the process need read permission on control file */ | 3901 | /* the process need read permission on control file */ |
3854 | /* AV: shouldn't we check that it's been opened for read instead? */ | 3902 | /* AV: shouldn't we check that it's been opened for read instead? */ |
3855 | ret = inode_permission(cfile->f_path.dentry->d_inode, MAY_READ); | 3903 | ret = inode_permission(file_inode(cfile), MAY_READ); |
3856 | if (ret < 0) | 3904 | if (ret < 0) |
3857 | goto fail; | 3905 | goto fail; |
3858 | 3906 | ||
@@ -3862,6 +3910,16 @@ static int cgroup_write_event_control(struct cgroup *cgrp, struct cftype *cft, | |||
3862 | goto fail; | 3910 | goto fail; |
3863 | } | 3911 | } |
3864 | 3912 | ||
3913 | /* | ||
3914 | * The file to be monitored must be in the same cgroup as | ||
3915 | * cgroup.event_control is. | ||
3916 | */ | ||
3917 | cgrp_cfile = __d_cgrp(cfile->f_dentry->d_parent); | ||
3918 | if (cgrp_cfile != cgrp) { | ||
3919 | ret = -EINVAL; | ||
3920 | goto fail; | ||
3921 | } | ||
3922 | |||
3865 | if (!event->cft->register_event || !event->cft->unregister_event) { | 3923 | if (!event->cft->register_event || !event->cft->unregister_event) { |
3866 | ret = -EINVAL; | 3924 | ret = -EINVAL; |
3867 | goto fail; | 3925 | goto fail; |
@@ -4135,6 +4193,9 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry, | |||
4135 | 4193 | ||
4136 | init_cgroup_housekeeping(cgrp); | 4194 | init_cgroup_housekeeping(cgrp); |
4137 | 4195 | ||
4196 | dentry->d_fsdata = cgrp; | ||
4197 | cgrp->dentry = dentry; | ||
4198 | |||
4138 | cgrp->parent = parent; | 4199 | cgrp->parent = parent; |
4139 | cgrp->root = parent->root; | 4200 | cgrp->root = parent->root; |
4140 | cgrp->top_cgroup = parent->top_cgroup; | 4201 | cgrp->top_cgroup = parent->top_cgroup; |
@@ -4172,8 +4233,6 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry, | |||
4172 | lockdep_assert_held(&dentry->d_inode->i_mutex); | 4233 | lockdep_assert_held(&dentry->d_inode->i_mutex); |
4173 | 4234 | ||
4174 | /* allocation complete, commit to creation */ | 4235 | /* allocation complete, commit to creation */ |
4175 | dentry->d_fsdata = cgrp; | ||
4176 | cgrp->dentry = dentry; | ||
4177 | list_add_tail(&cgrp->allcg_node, &root->allcg_list); | 4236 | list_add_tail(&cgrp->allcg_node, &root->allcg_list); |
4178 | list_add_tail_rcu(&cgrp->sibling, &cgrp->parent->children); | 4237 | list_add_tail_rcu(&cgrp->sibling, &cgrp->parent->children); |
4179 | root->number_of_cgroups++; | 4238 | root->number_of_cgroups++; |
@@ -4340,20 +4399,14 @@ static int cgroup_destroy_locked(struct cgroup *cgrp) | |||
4340 | /* | 4399 | /* |
4341 | * Unregister events and notify userspace. | 4400 | * Unregister events and notify userspace. |
4342 | * Notify userspace about cgroup removing only after rmdir of cgroup | 4401 | * Notify userspace about cgroup removing only after rmdir of cgroup |
4343 | * directory to avoid race between userspace and kernelspace. Use | 4402 | * directory to avoid race between userspace and kernelspace. |
4344 | * a temporary list to avoid a deadlock with cgroup_event_wake(). Since | ||
4345 | * cgroup_event_wake() is called with the wait queue head locked, | ||
4346 | * remove_wait_queue() cannot be called while holding event_list_lock. | ||
4347 | */ | 4403 | */ |
4348 | spin_lock(&cgrp->event_list_lock); | 4404 | spin_lock(&cgrp->event_list_lock); |
4349 | list_splice_init(&cgrp->event_list, &tmp_list); | 4405 | list_for_each_entry_safe(event, tmp, &cgrp->event_list, list) { |
4350 | spin_unlock(&cgrp->event_list_lock); | ||
4351 | list_for_each_entry_safe(event, tmp, &tmp_list, list) { | ||
4352 | list_del_init(&event->list); | 4406 | list_del_init(&event->list); |
4353 | remove_wait_queue(event->wqh, &event->wait); | ||
4354 | eventfd_signal(event->eventfd, 1); | ||
4355 | schedule_work(&event->remove); | 4407 | schedule_work(&event->remove); |
4356 | } | 4408 | } |
4409 | spin_unlock(&cgrp->event_list_lock); | ||
4357 | 4410 | ||
4358 | return 0; | 4411 | return 0; |
4359 | } | 4412 | } |
@@ -4438,6 +4491,9 @@ int __init_or_module cgroup_load_subsys(struct cgroup_subsys *ss) | |||
4438 | { | 4491 | { |
4439 | struct cgroup_subsys_state *css; | 4492 | struct cgroup_subsys_state *css; |
4440 | int i, ret; | 4493 | int i, ret; |
4494 | struct hlist_node *tmp; | ||
4495 | struct css_set *cg; | ||
4496 | unsigned long key; | ||
4441 | 4497 | ||
4442 | /* check name and function validity */ | 4498 | /* check name and function validity */ |
4443 | if (ss->name == NULL || strlen(ss->name) > MAX_CGROUP_TYPE_NAMELEN || | 4499 | if (ss->name == NULL || strlen(ss->name) > MAX_CGROUP_TYPE_NAMELEN || |
@@ -4503,23 +4559,17 @@ int __init_or_module cgroup_load_subsys(struct cgroup_subsys *ss) | |||
4503 | * this is all done under the css_set_lock. | 4559 | * this is all done under the css_set_lock. |
4504 | */ | 4560 | */ |
4505 | write_lock(&css_set_lock); | 4561 | write_lock(&css_set_lock); |
4506 | for (i = 0; i < CSS_SET_TABLE_SIZE; i++) { | 4562 | hash_for_each_safe(css_set_table, i, tmp, cg, hlist) { |
4507 | struct css_set *cg; | 4563 | /* skip entries that we already rehashed */ |
4508 | struct hlist_node *node, *tmp; | 4564 | if (cg->subsys[ss->subsys_id]) |
4509 | struct hlist_head *bucket = &css_set_table[i], *new_bucket; | 4565 | continue; |
4510 | 4566 | /* remove existing entry */ | |
4511 | hlist_for_each_entry_safe(cg, node, tmp, bucket, hlist) { | 4567 | hash_del(&cg->hlist); |
4512 | /* skip entries that we already rehashed */ | 4568 | /* set new value */ |
4513 | if (cg->subsys[ss->subsys_id]) | 4569 | cg->subsys[ss->subsys_id] = css; |
4514 | continue; | 4570 | /* recompute hash and restore entry */ |
4515 | /* remove existing entry */ | 4571 | key = css_set_hash(cg->subsys); |
4516 | hlist_del(&cg->hlist); | 4572 | hash_add(css_set_table, &cg->hlist, key); |
4517 | /* set new value */ | ||
4518 | cg->subsys[ss->subsys_id] = css; | ||
4519 | /* recompute hash and restore entry */ | ||
4520 | new_bucket = css_set_hash(cg->subsys); | ||
4521 | hlist_add_head(&cg->hlist, new_bucket); | ||
4522 | } | ||
4523 | } | 4573 | } |
4524 | write_unlock(&css_set_lock); | 4574 | write_unlock(&css_set_lock); |
4525 | 4575 | ||
@@ -4551,7 +4601,6 @@ EXPORT_SYMBOL_GPL(cgroup_load_subsys); | |||
4551 | void cgroup_unload_subsys(struct cgroup_subsys *ss) | 4601 | void cgroup_unload_subsys(struct cgroup_subsys *ss) |
4552 | { | 4602 | { |
4553 | struct cg_cgroup_link *link; | 4603 | struct cg_cgroup_link *link; |
4554 | struct hlist_head *hhead; | ||
4555 | 4604 | ||
4556 | BUG_ON(ss->module == NULL); | 4605 | BUG_ON(ss->module == NULL); |
4557 | 4606 | ||
@@ -4567,10 +4616,8 @@ void cgroup_unload_subsys(struct cgroup_subsys *ss) | |||
4567 | offline_css(ss, dummytop); | 4616 | offline_css(ss, dummytop); |
4568 | ss->active = 0; | 4617 | ss->active = 0; |
4569 | 4618 | ||
4570 | if (ss->use_id) { | 4619 | if (ss->use_id) |
4571 | idr_remove_all(&ss->idr); | ||
4572 | idr_destroy(&ss->idr); | 4620 | idr_destroy(&ss->idr); |
4573 | } | ||
4574 | 4621 | ||
4575 | /* deassign the subsys_id */ | 4622 | /* deassign the subsys_id */ |
4576 | subsys[ss->subsys_id] = NULL; | 4623 | subsys[ss->subsys_id] = NULL; |
@@ -4585,11 +4632,12 @@ void cgroup_unload_subsys(struct cgroup_subsys *ss) | |||
4585 | write_lock(&css_set_lock); | 4632 | write_lock(&css_set_lock); |
4586 | list_for_each_entry(link, &dummytop->css_sets, cgrp_link_list) { | 4633 | list_for_each_entry(link, &dummytop->css_sets, cgrp_link_list) { |
4587 | struct css_set *cg = link->cg; | 4634 | struct css_set *cg = link->cg; |
4635 | unsigned long key; | ||
4588 | 4636 | ||
4589 | hlist_del(&cg->hlist); | 4637 | hash_del(&cg->hlist); |
4590 | cg->subsys[ss->subsys_id] = NULL; | 4638 | cg->subsys[ss->subsys_id] = NULL; |
4591 | hhead = css_set_hash(cg->subsys); | 4639 | key = css_set_hash(cg->subsys); |
4592 | hlist_add_head(&cg->hlist, hhead); | 4640 | hash_add(css_set_table, &cg->hlist, key); |
4593 | } | 4641 | } |
4594 | write_unlock(&css_set_lock); | 4642 | write_unlock(&css_set_lock); |
4595 | 4643 | ||
@@ -4631,9 +4679,6 @@ int __init cgroup_init_early(void) | |||
4631 | list_add(&init_css_set_link.cg_link_list, | 4679 | list_add(&init_css_set_link.cg_link_list, |
4632 | &init_css_set.cg_links); | 4680 | &init_css_set.cg_links); |
4633 | 4681 | ||
4634 | for (i = 0; i < CSS_SET_TABLE_SIZE; i++) | ||
4635 | INIT_HLIST_HEAD(&css_set_table[i]); | ||
4636 | |||
4637 | for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { | 4682 | for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { |
4638 | struct cgroup_subsys *ss = subsys[i]; | 4683 | struct cgroup_subsys *ss = subsys[i]; |
4639 | 4684 | ||
@@ -4667,7 +4712,7 @@ int __init cgroup_init(void) | |||
4667 | { | 4712 | { |
4668 | int err; | 4713 | int err; |
4669 | int i; | 4714 | int i; |
4670 | struct hlist_head *hhead; | 4715 | unsigned long key; |
4671 | 4716 | ||
4672 | err = bdi_init(&cgroup_backing_dev_info); | 4717 | err = bdi_init(&cgroup_backing_dev_info); |
4673 | if (err) | 4718 | if (err) |
@@ -4686,8 +4731,8 @@ int __init cgroup_init(void) | |||
4686 | } | 4731 | } |
4687 | 4732 | ||
4688 | /* Add init_css_set to the hash table */ | 4733 | /* Add init_css_set to the hash table */ |
4689 | hhead = css_set_hash(init_css_set.subsys); | 4734 | key = css_set_hash(init_css_set.subsys); |
4690 | hlist_add_head(&init_css_set.hlist, hhead); | 4735 | hash_add(css_set_table, &init_css_set.hlist, key); |
4691 | BUG_ON(!init_root_id(&rootnode)); | 4736 | BUG_ON(!init_root_id(&rootnode)); |
4692 | 4737 | ||
4693 | cgroup_kobj = kobject_create_and_add("cgroup", fs_kobj); | 4738 | cgroup_kobj = kobject_create_and_add("cgroup", fs_kobj); |
@@ -4982,8 +5027,7 @@ void cgroup_exit(struct task_struct *tsk, int run_callbacks) | |||
4982 | } | 5027 | } |
4983 | task_unlock(tsk); | 5028 | task_unlock(tsk); |
4984 | 5029 | ||
4985 | if (cg) | 5030 | put_css_set_taskexit(cg); |
4986 | put_css_set_taskexit(cg); | ||
4987 | } | 5031 | } |
4988 | 5032 | ||
4989 | /** | 5033 | /** |
@@ -5274,7 +5318,7 @@ EXPORT_SYMBOL_GPL(free_css_id); | |||
5274 | static struct css_id *get_new_cssid(struct cgroup_subsys *ss, int depth) | 5318 | static struct css_id *get_new_cssid(struct cgroup_subsys *ss, int depth) |
5275 | { | 5319 | { |
5276 | struct css_id *newid; | 5320 | struct css_id *newid; |
5277 | int myid, error, size; | 5321 | int ret, size; |
5278 | 5322 | ||
5279 | BUG_ON(!ss->use_id); | 5323 | BUG_ON(!ss->use_id); |
5280 | 5324 | ||
@@ -5282,35 +5326,24 @@ static struct css_id *get_new_cssid(struct cgroup_subsys *ss, int depth) | |||
5282 | newid = kzalloc(size, GFP_KERNEL); | 5326 | newid = kzalloc(size, GFP_KERNEL); |
5283 | if (!newid) | 5327 | if (!newid) |
5284 | return ERR_PTR(-ENOMEM); | 5328 | return ERR_PTR(-ENOMEM); |
5285 | /* get id */ | 5329 | |
5286 | if (unlikely(!idr_pre_get(&ss->idr, GFP_KERNEL))) { | 5330 | idr_preload(GFP_KERNEL); |
5287 | error = -ENOMEM; | ||
5288 | goto err_out; | ||
5289 | } | ||
5290 | spin_lock(&ss->id_lock); | 5331 | spin_lock(&ss->id_lock); |
5291 | /* Don't use 0. allocates an ID of 1-65535 */ | 5332 | /* Don't use 0. allocates an ID of 1-65535 */ |
5292 | error = idr_get_new_above(&ss->idr, newid, 1, &myid); | 5333 | ret = idr_alloc(&ss->idr, newid, 1, CSS_ID_MAX + 1, GFP_NOWAIT); |
5293 | spin_unlock(&ss->id_lock); | 5334 | spin_unlock(&ss->id_lock); |
5335 | idr_preload_end(); | ||
5294 | 5336 | ||
5295 | /* Returns error when there are no free spaces for new ID.*/ | 5337 | /* Returns error when there are no free spaces for new ID.*/ |
5296 | if (error) { | 5338 | if (ret < 0) |
5297 | error = -ENOSPC; | ||
5298 | goto err_out; | 5339 | goto err_out; |
5299 | } | ||
5300 | if (myid > CSS_ID_MAX) | ||
5301 | goto remove_idr; | ||
5302 | 5340 | ||
5303 | newid->id = myid; | 5341 | newid->id = ret; |
5304 | newid->depth = depth; | 5342 | newid->depth = depth; |
5305 | return newid; | 5343 | return newid; |
5306 | remove_idr: | ||
5307 | error = -ENOSPC; | ||
5308 | spin_lock(&ss->id_lock); | ||
5309 | idr_remove(&ss->idr, myid); | ||
5310 | spin_unlock(&ss->id_lock); | ||
5311 | err_out: | 5344 | err_out: |
5312 | kfree(newid); | 5345 | kfree(newid); |
5313 | return ERR_PTR(error); | 5346 | return ERR_PTR(ret); |
5314 | 5347 | ||
5315 | } | 5348 | } |
5316 | 5349 | ||
@@ -5441,7 +5474,7 @@ struct cgroup_subsys_state *cgroup_css_from_dir(struct file *f, int id) | |||
5441 | struct inode *inode; | 5474 | struct inode *inode; |
5442 | struct cgroup_subsys_state *css; | 5475 | struct cgroup_subsys_state *css; |
5443 | 5476 | ||
5444 | inode = f->f_dentry->d_inode; | 5477 | inode = file_inode(f); |
5445 | /* check in cgroup filesystem dir */ | 5478 | /* check in cgroup filesystem dir */ |
5446 | if (inode->i_op != &cgroup_dir_inode_operations) | 5479 | if (inode->i_op != &cgroup_dir_inode_operations) |
5447 | return ERR_PTR(-EBADF); | 5480 | return ERR_PTR(-EBADF); |
diff --git a/kernel/compat.c b/kernel/compat.c index 36700e9e2be9..19971d8c7299 100644 --- a/kernel/compat.c +++ b/kernel/compat.c | |||
@@ -290,8 +290,8 @@ static inline long put_compat_itimerval(struct compat_itimerval __user *o, | |||
290 | __put_user(i->it_value.tv_usec, &o->it_value.tv_usec))); | 290 | __put_user(i->it_value.tv_usec, &o->it_value.tv_usec))); |
291 | } | 291 | } |
292 | 292 | ||
293 | asmlinkage long compat_sys_getitimer(int which, | 293 | COMPAT_SYSCALL_DEFINE2(getitimer, int, which, |
294 | struct compat_itimerval __user *it) | 294 | struct compat_itimerval __user *, it) |
295 | { | 295 | { |
296 | struct itimerval kit; | 296 | struct itimerval kit; |
297 | int error; | 297 | int error; |
@@ -302,9 +302,9 @@ asmlinkage long compat_sys_getitimer(int which, | |||
302 | return error; | 302 | return error; |
303 | } | 303 | } |
304 | 304 | ||
305 | asmlinkage long compat_sys_setitimer(int which, | 305 | COMPAT_SYSCALL_DEFINE3(setitimer, int, which, |
306 | struct compat_itimerval __user *in, | 306 | struct compat_itimerval __user *, in, |
307 | struct compat_itimerval __user *out) | 307 | struct compat_itimerval __user *, out) |
308 | { | 308 | { |
309 | struct itimerval kin, kout; | 309 | struct itimerval kin, kout; |
310 | int error; | 310 | int error; |
@@ -381,9 +381,9 @@ static inline void compat_sig_setmask(sigset_t *blocked, compat_sigset_word set) | |||
381 | memcpy(blocked->sig, &set, sizeof(set)); | 381 | memcpy(blocked->sig, &set, sizeof(set)); |
382 | } | 382 | } |
383 | 383 | ||
384 | asmlinkage long compat_sys_sigprocmask(int how, | 384 | COMPAT_SYSCALL_DEFINE3(sigprocmask, int, how, |
385 | compat_old_sigset_t __user *nset, | 385 | compat_old_sigset_t __user *, nset, |
386 | compat_old_sigset_t __user *oset) | 386 | compat_old_sigset_t __user *, oset) |
387 | { | 387 | { |
388 | old_sigset_t old_set, new_set; | 388 | old_sigset_t old_set, new_set; |
389 | sigset_t new_blocked; | 389 | sigset_t new_blocked; |
@@ -593,7 +593,7 @@ COMPAT_SYSCALL_DEFINE5(waitid, | |||
593 | else | 593 | else |
594 | ret = put_compat_rusage(&ru, uru); | 594 | ret = put_compat_rusage(&ru, uru); |
595 | if (ret) | 595 | if (ret) |
596 | return ret; | 596 | return -EFAULT; |
597 | } | 597 | } |
598 | 598 | ||
599 | BUG_ON(info.si_code & __SI_MASK); | 599 | BUG_ON(info.si_code & __SI_MASK); |
@@ -971,7 +971,7 @@ long compat_put_bitmap(compat_ulong_t __user *umask, unsigned long *mask, | |||
971 | } | 971 | } |
972 | 972 | ||
973 | void | 973 | void |
974 | sigset_from_compat (sigset_t *set, compat_sigset_t *compat) | 974 | sigset_from_compat(sigset_t *set, const compat_sigset_t *compat) |
975 | { | 975 | { |
976 | switch (_NSIG_WORDS) { | 976 | switch (_NSIG_WORDS) { |
977 | case 4: set->sig[3] = compat->sig[6] | (((long)compat->sig[7]) << 32 ); | 977 | case 4: set->sig[3] = compat->sig[6] | (((long)compat->sig[7]) << 32 ); |
@@ -982,10 +982,20 @@ sigset_from_compat (sigset_t *set, compat_sigset_t *compat) | |||
982 | } | 982 | } |
983 | EXPORT_SYMBOL_GPL(sigset_from_compat); | 983 | EXPORT_SYMBOL_GPL(sigset_from_compat); |
984 | 984 | ||
985 | asmlinkage long | 985 | void |
986 | compat_sys_rt_sigtimedwait (compat_sigset_t __user *uthese, | 986 | sigset_to_compat(compat_sigset_t *compat, const sigset_t *set) |
987 | struct compat_siginfo __user *uinfo, | 987 | { |
988 | struct compat_timespec __user *uts, compat_size_t sigsetsize) | 988 | switch (_NSIG_WORDS) { |
989 | case 4: compat->sig[7] = (set->sig[3] >> 32); compat->sig[6] = set->sig[3]; | ||
990 | case 3: compat->sig[5] = (set->sig[2] >> 32); compat->sig[4] = set->sig[2]; | ||
991 | case 2: compat->sig[3] = (set->sig[1] >> 32); compat->sig[2] = set->sig[1]; | ||
992 | case 1: compat->sig[1] = (set->sig[0] >> 32); compat->sig[0] = set->sig[0]; | ||
993 | } | ||
994 | } | ||
995 | |||
996 | COMPAT_SYSCALL_DEFINE4(rt_sigtimedwait, compat_sigset_t __user *, uthese, | ||
997 | struct compat_siginfo __user *, uinfo, | ||
998 | struct compat_timespec __user *, uts, compat_size_t, sigsetsize) | ||
989 | { | 999 | { |
990 | compat_sigset_t s32; | 1000 | compat_sigset_t s32; |
991 | sigset_t s; | 1001 | sigset_t s; |
@@ -1013,18 +1023,6 @@ compat_sys_rt_sigtimedwait (compat_sigset_t __user *uthese, | |||
1013 | } | 1023 | } |
1014 | 1024 | ||
1015 | return ret; | 1025 | return ret; |
1016 | |||
1017 | } | ||
1018 | |||
1019 | asmlinkage long | ||
1020 | compat_sys_rt_tgsigqueueinfo(compat_pid_t tgid, compat_pid_t pid, int sig, | ||
1021 | struct compat_siginfo __user *uinfo) | ||
1022 | { | ||
1023 | siginfo_t info; | ||
1024 | |||
1025 | if (copy_siginfo_from_user32(&info, uinfo)) | ||
1026 | return -EFAULT; | ||
1027 | return do_rt_tgsigqueueinfo(tgid, pid, sig, &info); | ||
1028 | } | 1026 | } |
1029 | 1027 | ||
1030 | #ifdef __ARCH_WANT_COMPAT_SYS_TIME | 1028 | #ifdef __ARCH_WANT_COMPAT_SYS_TIME |
@@ -1067,23 +1065,6 @@ asmlinkage long compat_sys_stime(compat_time_t __user *tptr) | |||
1067 | 1065 | ||
1068 | #endif /* __ARCH_WANT_COMPAT_SYS_TIME */ | 1066 | #endif /* __ARCH_WANT_COMPAT_SYS_TIME */ |
1069 | 1067 | ||
1070 | #ifdef __ARCH_WANT_COMPAT_SYS_RT_SIGSUSPEND | ||
1071 | asmlinkage long compat_sys_rt_sigsuspend(compat_sigset_t __user *unewset, compat_size_t sigsetsize) | ||
1072 | { | ||
1073 | sigset_t newset; | ||
1074 | compat_sigset_t newset32; | ||
1075 | |||
1076 | /* XXX: Don't preclude handling different sized sigset_t's. */ | ||
1077 | if (sigsetsize != sizeof(sigset_t)) | ||
1078 | return -EINVAL; | ||
1079 | |||
1080 | if (copy_from_user(&newset32, unewset, sizeof(compat_sigset_t))) | ||
1081 | return -EFAULT; | ||
1082 | sigset_from_compat(&newset, &newset32); | ||
1083 | return sigsuspend(&newset); | ||
1084 | } | ||
1085 | #endif /* __ARCH_WANT_COMPAT_SYS_RT_SIGSUSPEND */ | ||
1086 | |||
1087 | asmlinkage long compat_sys_adjtimex(struct compat_timex __user *utp) | 1068 | asmlinkage long compat_sys_adjtimex(struct compat_timex __user *utp) |
1088 | { | 1069 | { |
1089 | struct timex txc; | 1070 | struct timex txc; |
@@ -1222,9 +1203,9 @@ compat_sys_sysinfo(struct compat_sysinfo __user *info) | |||
1222 | return 0; | 1203 | return 0; |
1223 | } | 1204 | } |
1224 | 1205 | ||
1225 | #ifdef __ARCH_WANT_COMPAT_SYS_SCHED_RR_GET_INTERVAL | 1206 | COMPAT_SYSCALL_DEFINE2(sched_rr_get_interval, |
1226 | asmlinkage long compat_sys_sched_rr_get_interval(compat_pid_t pid, | 1207 | compat_pid_t, pid, |
1227 | struct compat_timespec __user *interval) | 1208 | struct compat_timespec __user *, interval) |
1228 | { | 1209 | { |
1229 | struct timespec t; | 1210 | struct timespec t; |
1230 | int ret; | 1211 | int ret; |
@@ -1237,7 +1218,6 @@ asmlinkage long compat_sys_sched_rr_get_interval(compat_pid_t pid, | |||
1237 | return -EFAULT; | 1218 | return -EFAULT; |
1238 | return ret; | 1219 | return ret; |
1239 | } | 1220 | } |
1240 | #endif /* __ARCH_WANT_COMPAT_SYS_SCHED_RR_GET_INTERVAL */ | ||
1241 | 1221 | ||
1242 | /* | 1222 | /* |
1243 | * Allocate user-space memory for the duration of a single system call, | 1223 | * Allocate user-space memory for the duration of a single system call, |
diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 7bb63eea6eb8..4f9dfe43ecbd 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c | |||
@@ -61,14 +61,6 @@ | |||
61 | #include <linux/cgroup.h> | 61 | #include <linux/cgroup.h> |
62 | 62 | ||
63 | /* | 63 | /* |
64 | * Workqueue for cpuset related tasks. | ||
65 | * | ||
66 | * Using kevent workqueue may cause deadlock when memory_migrate | ||
67 | * is set. So we create a separate workqueue thread for cpuset. | ||
68 | */ | ||
69 | static struct workqueue_struct *cpuset_wq; | ||
70 | |||
71 | /* | ||
72 | * Tracks how many cpusets are currently defined in system. | 64 | * Tracks how many cpusets are currently defined in system. |
73 | * When there is only one cpuset (the root cpuset) we can | 65 | * When there is only one cpuset (the root cpuset) we can |
74 | * short circuit some hooks. | 66 | * short circuit some hooks. |
@@ -95,18 +87,21 @@ struct cpuset { | |||
95 | cpumask_var_t cpus_allowed; /* CPUs allowed to tasks in cpuset */ | 87 | cpumask_var_t cpus_allowed; /* CPUs allowed to tasks in cpuset */ |
96 | nodemask_t mems_allowed; /* Memory Nodes allowed to tasks */ | 88 | nodemask_t mems_allowed; /* Memory Nodes allowed to tasks */ |
97 | 89 | ||
98 | struct cpuset *parent; /* my parent */ | ||
99 | |||
100 | struct fmeter fmeter; /* memory_pressure filter */ | 90 | struct fmeter fmeter; /* memory_pressure filter */ |
101 | 91 | ||
92 | /* | ||
93 | * Tasks are being attached to this cpuset. Used to prevent | ||
94 | * zeroing cpus/mems_allowed between ->can_attach() and ->attach(). | ||
95 | */ | ||
96 | int attach_in_progress; | ||
97 | |||
102 | /* partition number for rebuild_sched_domains() */ | 98 | /* partition number for rebuild_sched_domains() */ |
103 | int pn; | 99 | int pn; |
104 | 100 | ||
105 | /* for custom sched domain */ | 101 | /* for custom sched domain */ |
106 | int relax_domain_level; | 102 | int relax_domain_level; |
107 | 103 | ||
108 | /* used for walking a cpuset hierarchy */ | 104 | struct work_struct hotplug_work; |
109 | struct list_head stack_list; | ||
110 | }; | 105 | }; |
111 | 106 | ||
112 | /* Retrieve the cpuset for a cgroup */ | 107 | /* Retrieve the cpuset for a cgroup */ |
@@ -123,6 +118,15 @@ static inline struct cpuset *task_cs(struct task_struct *task) | |||
123 | struct cpuset, css); | 118 | struct cpuset, css); |
124 | } | 119 | } |
125 | 120 | ||
121 | static inline struct cpuset *parent_cs(const struct cpuset *cs) | ||
122 | { | ||
123 | struct cgroup *pcgrp = cs->css.cgroup->parent; | ||
124 | |||
125 | if (pcgrp) | ||
126 | return cgroup_cs(pcgrp); | ||
127 | return NULL; | ||
128 | } | ||
129 | |||
126 | #ifdef CONFIG_NUMA | 130 | #ifdef CONFIG_NUMA |
127 | static inline bool task_has_mempolicy(struct task_struct *task) | 131 | static inline bool task_has_mempolicy(struct task_struct *task) |
128 | { | 132 | { |
@@ -138,6 +142,7 @@ static inline bool task_has_mempolicy(struct task_struct *task) | |||
138 | 142 | ||
139 | /* bits in struct cpuset flags field */ | 143 | /* bits in struct cpuset flags field */ |
140 | typedef enum { | 144 | typedef enum { |
145 | CS_ONLINE, | ||
141 | CS_CPU_EXCLUSIVE, | 146 | CS_CPU_EXCLUSIVE, |
142 | CS_MEM_EXCLUSIVE, | 147 | CS_MEM_EXCLUSIVE, |
143 | CS_MEM_HARDWALL, | 148 | CS_MEM_HARDWALL, |
@@ -147,13 +152,12 @@ typedef enum { | |||
147 | CS_SPREAD_SLAB, | 152 | CS_SPREAD_SLAB, |
148 | } cpuset_flagbits_t; | 153 | } cpuset_flagbits_t; |
149 | 154 | ||
150 | /* the type of hotplug event */ | ||
151 | enum hotplug_event { | ||
152 | CPUSET_CPU_OFFLINE, | ||
153 | CPUSET_MEM_OFFLINE, | ||
154 | }; | ||
155 | |||
156 | /* convenient tests for these bits */ | 155 | /* convenient tests for these bits */ |
156 | static inline bool is_cpuset_online(const struct cpuset *cs) | ||
157 | { | ||
158 | return test_bit(CS_ONLINE, &cs->flags); | ||
159 | } | ||
160 | |||
157 | static inline int is_cpu_exclusive(const struct cpuset *cs) | 161 | static inline int is_cpu_exclusive(const struct cpuset *cs) |
158 | { | 162 | { |
159 | return test_bit(CS_CPU_EXCLUSIVE, &cs->flags); | 163 | return test_bit(CS_CPU_EXCLUSIVE, &cs->flags); |
@@ -190,27 +194,52 @@ static inline int is_spread_slab(const struct cpuset *cs) | |||
190 | } | 194 | } |
191 | 195 | ||
192 | static struct cpuset top_cpuset = { | 196 | static struct cpuset top_cpuset = { |
193 | .flags = ((1 << CS_CPU_EXCLUSIVE) | (1 << CS_MEM_EXCLUSIVE)), | 197 | .flags = ((1 << CS_ONLINE) | (1 << CS_CPU_EXCLUSIVE) | |
198 | (1 << CS_MEM_EXCLUSIVE)), | ||
194 | }; | 199 | }; |
195 | 200 | ||
201 | /** | ||
202 | * cpuset_for_each_child - traverse online children of a cpuset | ||
203 | * @child_cs: loop cursor pointing to the current child | ||
204 | * @pos_cgrp: used for iteration | ||
205 | * @parent_cs: target cpuset to walk children of | ||
206 | * | ||
207 | * Walk @child_cs through the online children of @parent_cs. Must be used | ||
208 | * with RCU read locked. | ||
209 | */ | ||
210 | #define cpuset_for_each_child(child_cs, pos_cgrp, parent_cs) \ | ||
211 | cgroup_for_each_child((pos_cgrp), (parent_cs)->css.cgroup) \ | ||
212 | if (is_cpuset_online(((child_cs) = cgroup_cs((pos_cgrp))))) | ||
213 | |||
214 | /** | ||
215 | * cpuset_for_each_descendant_pre - pre-order walk of a cpuset's descendants | ||
216 | * @des_cs: loop cursor pointing to the current descendant | ||
217 | * @pos_cgrp: used for iteration | ||
218 | * @root_cs: target cpuset to walk ancestor of | ||
219 | * | ||
220 | * Walk @des_cs through the online descendants of @root_cs. Must be used | ||
221 | * with RCU read locked. The caller may modify @pos_cgrp by calling | ||
222 | * cgroup_rightmost_descendant() to skip subtree. | ||
223 | */ | ||
224 | #define cpuset_for_each_descendant_pre(des_cs, pos_cgrp, root_cs) \ | ||
225 | cgroup_for_each_descendant_pre((pos_cgrp), (root_cs)->css.cgroup) \ | ||
226 | if (is_cpuset_online(((des_cs) = cgroup_cs((pos_cgrp))))) | ||
227 | |||
196 | /* | 228 | /* |
197 | * There are two global mutexes guarding cpuset structures. The first | 229 | * There are two global mutexes guarding cpuset structures - cpuset_mutex |
198 | * is the main control groups cgroup_mutex, accessed via | 230 | * and callback_mutex. The latter may nest inside the former. We also |
199 | * cgroup_lock()/cgroup_unlock(). The second is the cpuset-specific | 231 | * require taking task_lock() when dereferencing a task's cpuset pointer. |
200 | * callback_mutex, below. They can nest. It is ok to first take | 232 | * See "The task_lock() exception", at the end of this comment. |
201 | * cgroup_mutex, then nest callback_mutex. We also require taking | 233 | * |
202 | * task_lock() when dereferencing a task's cpuset pointer. See "The | 234 | * A task must hold both mutexes to modify cpusets. If a task holds |
203 | * task_lock() exception", at the end of this comment. | 235 | * cpuset_mutex, then it blocks others wanting that mutex, ensuring that it |
204 | * | 236 | * is the only task able to also acquire callback_mutex and be able to |
205 | * A task must hold both mutexes to modify cpusets. If a task | 237 | * modify cpusets. It can perform various checks on the cpuset structure |
206 | * holds cgroup_mutex, then it blocks others wanting that mutex, | 238 | * first, knowing nothing will change. It can also allocate memory while |
207 | * ensuring that it is the only task able to also acquire callback_mutex | 239 | * just holding cpuset_mutex. While it is performing these checks, various |
208 | * and be able to modify cpusets. It can perform various checks on | 240 | * callback routines can briefly acquire callback_mutex to query cpusets. |
209 | * the cpuset structure first, knowing nothing will change. It can | 241 | * Once it is ready to make the changes, it takes callback_mutex, blocking |
210 | * also allocate memory while just holding cgroup_mutex. While it is | 242 | * everyone else. |
211 | * performing these checks, various callback routines can briefly | ||
212 | * acquire callback_mutex to query cpusets. Once it is ready to make | ||
213 | * the changes, it takes callback_mutex, blocking everyone else. | ||
214 | * | 243 | * |
215 | * Calls to the kernel memory allocator can not be made while holding | 244 | * Calls to the kernel memory allocator can not be made while holding |
216 | * callback_mutex, as that would risk double tripping on callback_mutex | 245 | * callback_mutex, as that would risk double tripping on callback_mutex |
@@ -232,6 +261,7 @@ static struct cpuset top_cpuset = { | |||
232 | * guidelines for accessing subsystem state in kernel/cgroup.c | 261 | * guidelines for accessing subsystem state in kernel/cgroup.c |
233 | */ | 262 | */ |
234 | 263 | ||
264 | static DEFINE_MUTEX(cpuset_mutex); | ||
235 | static DEFINE_MUTEX(callback_mutex); | 265 | static DEFINE_MUTEX(callback_mutex); |
236 | 266 | ||
237 | /* | 267 | /* |
@@ -246,6 +276,17 @@ static char cpuset_nodelist[CPUSET_NODELIST_LEN]; | |||
246 | static DEFINE_SPINLOCK(cpuset_buffer_lock); | 276 | static DEFINE_SPINLOCK(cpuset_buffer_lock); |
247 | 277 | ||
248 | /* | 278 | /* |
279 | * CPU / memory hotplug is handled asynchronously. | ||
280 | */ | ||
281 | static struct workqueue_struct *cpuset_propagate_hotplug_wq; | ||
282 | |||
283 | static void cpuset_hotplug_workfn(struct work_struct *work); | ||
284 | static void cpuset_propagate_hotplug_workfn(struct work_struct *work); | ||
285 | static void schedule_cpuset_propagate_hotplug(struct cpuset *cs); | ||
286 | |||
287 | static DECLARE_WORK(cpuset_hotplug_work, cpuset_hotplug_workfn); | ||
288 | |||
289 | /* | ||
249 | * This is ugly, but preserves the userspace API for existing cpuset | 290 | * This is ugly, but preserves the userspace API for existing cpuset |
250 | * users. If someone tries to mount the "cpuset" filesystem, we | 291 | * users. If someone tries to mount the "cpuset" filesystem, we |
251 | * silently switch it to mount "cgroup" instead | 292 | * silently switch it to mount "cgroup" instead |
@@ -289,7 +330,7 @@ static void guarantee_online_cpus(const struct cpuset *cs, | |||
289 | struct cpumask *pmask) | 330 | struct cpumask *pmask) |
290 | { | 331 | { |
291 | while (cs && !cpumask_intersects(cs->cpus_allowed, cpu_online_mask)) | 332 | while (cs && !cpumask_intersects(cs->cpus_allowed, cpu_online_mask)) |
292 | cs = cs->parent; | 333 | cs = parent_cs(cs); |
293 | if (cs) | 334 | if (cs) |
294 | cpumask_and(pmask, cs->cpus_allowed, cpu_online_mask); | 335 | cpumask_and(pmask, cs->cpus_allowed, cpu_online_mask); |
295 | else | 336 | else |
@@ -314,7 +355,7 @@ static void guarantee_online_mems(const struct cpuset *cs, nodemask_t *pmask) | |||
314 | { | 355 | { |
315 | while (cs && !nodes_intersects(cs->mems_allowed, | 356 | while (cs && !nodes_intersects(cs->mems_allowed, |
316 | node_states[N_MEMORY])) | 357 | node_states[N_MEMORY])) |
317 | cs = cs->parent; | 358 | cs = parent_cs(cs); |
318 | if (cs) | 359 | if (cs) |
319 | nodes_and(*pmask, cs->mems_allowed, | 360 | nodes_and(*pmask, cs->mems_allowed, |
320 | node_states[N_MEMORY]); | 361 | node_states[N_MEMORY]); |
@@ -326,7 +367,7 @@ static void guarantee_online_mems(const struct cpuset *cs, nodemask_t *pmask) | |||
326 | /* | 367 | /* |
327 | * update task's spread flag if cpuset's page/slab spread flag is set | 368 | * update task's spread flag if cpuset's page/slab spread flag is set |
328 | * | 369 | * |
329 | * Called with callback_mutex/cgroup_mutex held | 370 | * Called with callback_mutex/cpuset_mutex held |
330 | */ | 371 | */ |
331 | static void cpuset_update_task_spread_flag(struct cpuset *cs, | 372 | static void cpuset_update_task_spread_flag(struct cpuset *cs, |
332 | struct task_struct *tsk) | 373 | struct task_struct *tsk) |
@@ -346,7 +387,7 @@ static void cpuset_update_task_spread_flag(struct cpuset *cs, | |||
346 | * | 387 | * |
347 | * One cpuset is a subset of another if all its allowed CPUs and | 388 | * One cpuset is a subset of another if all its allowed CPUs and |
348 | * Memory Nodes are a subset of the other, and its exclusive flags | 389 | * Memory Nodes are a subset of the other, and its exclusive flags |
349 | * are only set if the other's are set. Call holding cgroup_mutex. | 390 | * are only set if the other's are set. Call holding cpuset_mutex. |
350 | */ | 391 | */ |
351 | 392 | ||
352 | static int is_cpuset_subset(const struct cpuset *p, const struct cpuset *q) | 393 | static int is_cpuset_subset(const struct cpuset *p, const struct cpuset *q) |
@@ -395,7 +436,7 @@ static void free_trial_cpuset(struct cpuset *trial) | |||
395 | * If we replaced the flag and mask values of the current cpuset | 436 | * If we replaced the flag and mask values of the current cpuset |
396 | * (cur) with those values in the trial cpuset (trial), would | 437 | * (cur) with those values in the trial cpuset (trial), would |
397 | * our various subset and exclusive rules still be valid? Presumes | 438 | * our various subset and exclusive rules still be valid? Presumes |
398 | * cgroup_mutex held. | 439 | * cpuset_mutex held. |
399 | * | 440 | * |
400 | * 'cur' is the address of an actual, in-use cpuset. Operations | 441 | * 'cur' is the address of an actual, in-use cpuset. Operations |
401 | * such as list traversal that depend on the actual address of the | 442 | * such as list traversal that depend on the actual address of the |
@@ -412,48 +453,58 @@ static int validate_change(const struct cpuset *cur, const struct cpuset *trial) | |||
412 | { | 453 | { |
413 | struct cgroup *cont; | 454 | struct cgroup *cont; |
414 | struct cpuset *c, *par; | 455 | struct cpuset *c, *par; |
456 | int ret; | ||
457 | |||
458 | rcu_read_lock(); | ||
415 | 459 | ||
416 | /* Each of our child cpusets must be a subset of us */ | 460 | /* Each of our child cpusets must be a subset of us */ |
417 | list_for_each_entry(cont, &cur->css.cgroup->children, sibling) { | 461 | ret = -EBUSY; |
418 | if (!is_cpuset_subset(cgroup_cs(cont), trial)) | 462 | cpuset_for_each_child(c, cont, cur) |
419 | return -EBUSY; | 463 | if (!is_cpuset_subset(c, trial)) |
420 | } | 464 | goto out; |
421 | 465 | ||
422 | /* Remaining checks don't apply to root cpuset */ | 466 | /* Remaining checks don't apply to root cpuset */ |
467 | ret = 0; | ||
423 | if (cur == &top_cpuset) | 468 | if (cur == &top_cpuset) |
424 | return 0; | 469 | goto out; |
425 | 470 | ||
426 | par = cur->parent; | 471 | par = parent_cs(cur); |
427 | 472 | ||
428 | /* We must be a subset of our parent cpuset */ | 473 | /* We must be a subset of our parent cpuset */ |
474 | ret = -EACCES; | ||
429 | if (!is_cpuset_subset(trial, par)) | 475 | if (!is_cpuset_subset(trial, par)) |
430 | return -EACCES; | 476 | goto out; |
431 | 477 | ||
432 | /* | 478 | /* |
433 | * If either I or some sibling (!= me) is exclusive, we can't | 479 | * If either I or some sibling (!= me) is exclusive, we can't |
434 | * overlap | 480 | * overlap |
435 | */ | 481 | */ |
436 | list_for_each_entry(cont, &par->css.cgroup->children, sibling) { | 482 | ret = -EINVAL; |
437 | c = cgroup_cs(cont); | 483 | cpuset_for_each_child(c, cont, par) { |
438 | if ((is_cpu_exclusive(trial) || is_cpu_exclusive(c)) && | 484 | if ((is_cpu_exclusive(trial) || is_cpu_exclusive(c)) && |
439 | c != cur && | 485 | c != cur && |
440 | cpumask_intersects(trial->cpus_allowed, c->cpus_allowed)) | 486 | cpumask_intersects(trial->cpus_allowed, c->cpus_allowed)) |
441 | return -EINVAL; | 487 | goto out; |
442 | if ((is_mem_exclusive(trial) || is_mem_exclusive(c)) && | 488 | if ((is_mem_exclusive(trial) || is_mem_exclusive(c)) && |
443 | c != cur && | 489 | c != cur && |
444 | nodes_intersects(trial->mems_allowed, c->mems_allowed)) | 490 | nodes_intersects(trial->mems_allowed, c->mems_allowed)) |
445 | return -EINVAL; | 491 | goto out; |
446 | } | 492 | } |
447 | 493 | ||
448 | /* Cpusets with tasks can't have empty cpus_allowed or mems_allowed */ | 494 | /* |
449 | if (cgroup_task_count(cur->css.cgroup)) { | 495 | * Cpusets with tasks - existing or newly being attached - can't |
450 | if (cpumask_empty(trial->cpus_allowed) || | 496 | * have empty cpus_allowed or mems_allowed. |
451 | nodes_empty(trial->mems_allowed)) { | 497 | */ |
452 | return -ENOSPC; | 498 | ret = -ENOSPC; |
453 | } | 499 | if ((cgroup_task_count(cur->css.cgroup) || cur->attach_in_progress) && |
454 | } | 500 | (cpumask_empty(trial->cpus_allowed) || |
501 | nodes_empty(trial->mems_allowed))) | ||
502 | goto out; | ||
455 | 503 | ||
456 | return 0; | 504 | ret = 0; |
505 | out: | ||
506 | rcu_read_unlock(); | ||
507 | return ret; | ||
457 | } | 508 | } |
458 | 509 | ||
459 | #ifdef CONFIG_SMP | 510 | #ifdef CONFIG_SMP |
@@ -474,31 +525,24 @@ update_domain_attr(struct sched_domain_attr *dattr, struct cpuset *c) | |||
474 | return; | 525 | return; |
475 | } | 526 | } |
476 | 527 | ||
477 | static void | 528 | static void update_domain_attr_tree(struct sched_domain_attr *dattr, |
478 | update_domain_attr_tree(struct sched_domain_attr *dattr, struct cpuset *c) | 529 | struct cpuset *root_cs) |
479 | { | 530 | { |
480 | LIST_HEAD(q); | 531 | struct cpuset *cp; |
481 | 532 | struct cgroup *pos_cgrp; | |
482 | list_add(&c->stack_list, &q); | ||
483 | while (!list_empty(&q)) { | ||
484 | struct cpuset *cp; | ||
485 | struct cgroup *cont; | ||
486 | struct cpuset *child; | ||
487 | |||
488 | cp = list_first_entry(&q, struct cpuset, stack_list); | ||
489 | list_del(q.next); | ||
490 | 533 | ||
491 | if (cpumask_empty(cp->cpus_allowed)) | 534 | rcu_read_lock(); |
535 | cpuset_for_each_descendant_pre(cp, pos_cgrp, root_cs) { | ||
536 | /* skip the whole subtree if @cp doesn't have any CPU */ | ||
537 | if (cpumask_empty(cp->cpus_allowed)) { | ||
538 | pos_cgrp = cgroup_rightmost_descendant(pos_cgrp); | ||
492 | continue; | 539 | continue; |
540 | } | ||
493 | 541 | ||
494 | if (is_sched_load_balance(cp)) | 542 | if (is_sched_load_balance(cp)) |
495 | update_domain_attr(dattr, cp); | 543 | update_domain_attr(dattr, cp); |
496 | |||
497 | list_for_each_entry(cont, &cp->css.cgroup->children, sibling) { | ||
498 | child = cgroup_cs(cont); | ||
499 | list_add_tail(&child->stack_list, &q); | ||
500 | } | ||
501 | } | 544 | } |
545 | rcu_read_unlock(); | ||
502 | } | 546 | } |
503 | 547 | ||
504 | /* | 548 | /* |
@@ -520,7 +564,7 @@ update_domain_attr_tree(struct sched_domain_attr *dattr, struct cpuset *c) | |||
520 | * domains when operating in the severe memory shortage situations | 564 | * domains when operating in the severe memory shortage situations |
521 | * that could cause allocation failures below. | 565 | * that could cause allocation failures below. |
522 | * | 566 | * |
523 | * Must be called with cgroup_lock held. | 567 | * Must be called with cpuset_mutex held. |
524 | * | 568 | * |
525 | * The three key local variables below are: | 569 | * The three key local variables below are: |
526 | * q - a linked-list queue of cpuset pointers, used to implement a | 570 | * q - a linked-list queue of cpuset pointers, used to implement a |
@@ -558,7 +602,6 @@ update_domain_attr_tree(struct sched_domain_attr *dattr, struct cpuset *c) | |||
558 | static int generate_sched_domains(cpumask_var_t **domains, | 602 | static int generate_sched_domains(cpumask_var_t **domains, |
559 | struct sched_domain_attr **attributes) | 603 | struct sched_domain_attr **attributes) |
560 | { | 604 | { |
561 | LIST_HEAD(q); /* queue of cpusets to be scanned */ | ||
562 | struct cpuset *cp; /* scans q */ | 605 | struct cpuset *cp; /* scans q */ |
563 | struct cpuset **csa; /* array of all cpuset ptrs */ | 606 | struct cpuset **csa; /* array of all cpuset ptrs */ |
564 | int csn; /* how many cpuset ptrs in csa so far */ | 607 | int csn; /* how many cpuset ptrs in csa so far */ |
@@ -567,6 +610,7 @@ static int generate_sched_domains(cpumask_var_t **domains, | |||
567 | struct sched_domain_attr *dattr; /* attributes for custom domains */ | 610 | struct sched_domain_attr *dattr; /* attributes for custom domains */ |
568 | int ndoms = 0; /* number of sched domains in result */ | 611 | int ndoms = 0; /* number of sched domains in result */ |
569 | int nslot; /* next empty doms[] struct cpumask slot */ | 612 | int nslot; /* next empty doms[] struct cpumask slot */ |
613 | struct cgroup *pos_cgrp; | ||
570 | 614 | ||
571 | doms = NULL; | 615 | doms = NULL; |
572 | dattr = NULL; | 616 | dattr = NULL; |
@@ -594,33 +638,27 @@ static int generate_sched_domains(cpumask_var_t **domains, | |||
594 | goto done; | 638 | goto done; |
595 | csn = 0; | 639 | csn = 0; |
596 | 640 | ||
597 | list_add(&top_cpuset.stack_list, &q); | 641 | rcu_read_lock(); |
598 | while (!list_empty(&q)) { | 642 | cpuset_for_each_descendant_pre(cp, pos_cgrp, &top_cpuset) { |
599 | struct cgroup *cont; | ||
600 | struct cpuset *child; /* scans child cpusets of cp */ | ||
601 | |||
602 | cp = list_first_entry(&q, struct cpuset, stack_list); | ||
603 | list_del(q.next); | ||
604 | |||
605 | if (cpumask_empty(cp->cpus_allowed)) | ||
606 | continue; | ||
607 | |||
608 | /* | 643 | /* |
609 | * All child cpusets contain a subset of the parent's cpus, so | 644 | * Continue traversing beyond @cp iff @cp has some CPUs and |
610 | * just skip them, and then we call update_domain_attr_tree() | 645 | * isn't load balancing. The former is obvious. The |
611 | * to calc relax_domain_level of the corresponding sched | 646 | * latter: All child cpusets contain a subset of the |
612 | * domain. | 647 | * parent's cpus, so just skip them, and then we call |
648 | * update_domain_attr_tree() to calc relax_domain_level of | ||
649 | * the corresponding sched domain. | ||
613 | */ | 650 | */ |
614 | if (is_sched_load_balance(cp)) { | 651 | if (!cpumask_empty(cp->cpus_allowed) && |
615 | csa[csn++] = cp; | 652 | !is_sched_load_balance(cp)) |
616 | continue; | 653 | continue; |
617 | } | ||
618 | 654 | ||
619 | list_for_each_entry(cont, &cp->css.cgroup->children, sibling) { | 655 | if (is_sched_load_balance(cp)) |
620 | child = cgroup_cs(cont); | 656 | csa[csn++] = cp; |
621 | list_add_tail(&child->stack_list, &q); | 657 | |
622 | } | 658 | /* skip @cp's subtree */ |
623 | } | 659 | pos_cgrp = cgroup_rightmost_descendant(pos_cgrp); |
660 | } | ||
661 | rcu_read_unlock(); | ||
624 | 662 | ||
625 | for (i = 0; i < csn; i++) | 663 | for (i = 0; i < csn; i++) |
626 | csa[i]->pn = i; | 664 | csa[i]->pn = i; |
@@ -725,25 +763,25 @@ done: | |||
725 | /* | 763 | /* |
726 | * Rebuild scheduler domains. | 764 | * Rebuild scheduler domains. |
727 | * | 765 | * |
728 | * Call with neither cgroup_mutex held nor within get_online_cpus(). | 766 | * If the flag 'sched_load_balance' of any cpuset with non-empty |
729 | * Takes both cgroup_mutex and get_online_cpus(). | 767 | * 'cpus' changes, or if the 'cpus' allowed changes in any cpuset |
768 | * which has that flag enabled, or if any cpuset with a non-empty | ||
769 | * 'cpus' is removed, then call this routine to rebuild the | ||
770 | * scheduler's dynamic sched domains. | ||
730 | * | 771 | * |
731 | * Cannot be directly called from cpuset code handling changes | 772 | * Call with cpuset_mutex held. Takes get_online_cpus(). |
732 | * to the cpuset pseudo-filesystem, because it cannot be called | ||
733 | * from code that already holds cgroup_mutex. | ||
734 | */ | 773 | */ |
735 | static void do_rebuild_sched_domains(struct work_struct *unused) | 774 | static void rebuild_sched_domains_locked(void) |
736 | { | 775 | { |
737 | struct sched_domain_attr *attr; | 776 | struct sched_domain_attr *attr; |
738 | cpumask_var_t *doms; | 777 | cpumask_var_t *doms; |
739 | int ndoms; | 778 | int ndoms; |
740 | 779 | ||
780 | lockdep_assert_held(&cpuset_mutex); | ||
741 | get_online_cpus(); | 781 | get_online_cpus(); |
742 | 782 | ||
743 | /* Generate domain masks and attrs */ | 783 | /* Generate domain masks and attrs */ |
744 | cgroup_lock(); | ||
745 | ndoms = generate_sched_domains(&doms, &attr); | 784 | ndoms = generate_sched_domains(&doms, &attr); |
746 | cgroup_unlock(); | ||
747 | 785 | ||
748 | /* Have scheduler rebuild the domains */ | 786 | /* Have scheduler rebuild the domains */ |
749 | partition_sched_domains(ndoms, doms, attr); | 787 | partition_sched_domains(ndoms, doms, attr); |
@@ -751,7 +789,7 @@ static void do_rebuild_sched_domains(struct work_struct *unused) | |||
751 | put_online_cpus(); | 789 | put_online_cpus(); |
752 | } | 790 | } |
753 | #else /* !CONFIG_SMP */ | 791 | #else /* !CONFIG_SMP */ |
754 | static void do_rebuild_sched_domains(struct work_struct *unused) | 792 | static void rebuild_sched_domains_locked(void) |
755 | { | 793 | { |
756 | } | 794 | } |
757 | 795 | ||
@@ -763,44 +801,11 @@ static int generate_sched_domains(cpumask_var_t **domains, | |||
763 | } | 801 | } |
764 | #endif /* CONFIG_SMP */ | 802 | #endif /* CONFIG_SMP */ |
765 | 803 | ||
766 | static DECLARE_WORK(rebuild_sched_domains_work, do_rebuild_sched_domains); | ||
767 | |||
768 | /* | ||
769 | * Rebuild scheduler domains, asynchronously via workqueue. | ||
770 | * | ||
771 | * If the flag 'sched_load_balance' of any cpuset with non-empty | ||
772 | * 'cpus' changes, or if the 'cpus' allowed changes in any cpuset | ||
773 | * which has that flag enabled, or if any cpuset with a non-empty | ||
774 | * 'cpus' is removed, then call this routine to rebuild the | ||
775 | * scheduler's dynamic sched domains. | ||
776 | * | ||
777 | * The rebuild_sched_domains() and partition_sched_domains() | ||
778 | * routines must nest cgroup_lock() inside get_online_cpus(), | ||
779 | * but such cpuset changes as these must nest that locking the | ||
780 | * other way, holding cgroup_lock() for much of the code. | ||
781 | * | ||
782 | * So in order to avoid an ABBA deadlock, the cpuset code handling | ||
783 | * these user changes delegates the actual sched domain rebuilding | ||
784 | * to a separate workqueue thread, which ends up processing the | ||
785 | * above do_rebuild_sched_domains() function. | ||
786 | */ | ||
787 | static void async_rebuild_sched_domains(void) | ||
788 | { | ||
789 | queue_work(cpuset_wq, &rebuild_sched_domains_work); | ||
790 | } | ||
791 | |||
792 | /* | ||
793 | * Accomplishes the same scheduler domain rebuild as the above | ||
794 | * async_rebuild_sched_domains(), however it directly calls the | ||
795 | * rebuild routine synchronously rather than calling it via an | ||
796 | * asynchronous work thread. | ||
797 | * | ||
798 | * This can only be called from code that is not holding | ||
799 | * cgroup_mutex (not nested in a cgroup_lock() call.) | ||
800 | */ | ||
801 | void rebuild_sched_domains(void) | 804 | void rebuild_sched_domains(void) |
802 | { | 805 | { |
803 | do_rebuild_sched_domains(NULL); | 806 | mutex_lock(&cpuset_mutex); |
807 | rebuild_sched_domains_locked(); | ||
808 | mutex_unlock(&cpuset_mutex); | ||
804 | } | 809 | } |
805 | 810 | ||
806 | /** | 811 | /** |
@@ -808,7 +813,7 @@ void rebuild_sched_domains(void) | |||
808 | * @tsk: task to test | 813 | * @tsk: task to test |
809 | * @scan: struct cgroup_scanner contained in its struct cpuset_hotplug_scanner | 814 | * @scan: struct cgroup_scanner contained in its struct cpuset_hotplug_scanner |
810 | * | 815 | * |
811 | * Call with cgroup_mutex held. May take callback_mutex during call. | 816 | * Call with cpuset_mutex held. May take callback_mutex during call. |
812 | * Called for each task in a cgroup by cgroup_scan_tasks(). | 817 | * Called for each task in a cgroup by cgroup_scan_tasks(). |
813 | * Return nonzero if this tasks's cpus_allowed mask should be changed (in other | 818 | * Return nonzero if this tasks's cpus_allowed mask should be changed (in other |
814 | * words, if its mask is not equal to its cpuset's mask). | 819 | * words, if its mask is not equal to its cpuset's mask). |
@@ -829,7 +834,7 @@ static int cpuset_test_cpumask(struct task_struct *tsk, | |||
829 | * cpus_allowed mask needs to be changed. | 834 | * cpus_allowed mask needs to be changed. |
830 | * | 835 | * |
831 | * We don't need to re-check for the cgroup/cpuset membership, since we're | 836 | * We don't need to re-check for the cgroup/cpuset membership, since we're |
832 | * holding cgroup_lock() at this point. | 837 | * holding cpuset_mutex at this point. |
833 | */ | 838 | */ |
834 | static void cpuset_change_cpumask(struct task_struct *tsk, | 839 | static void cpuset_change_cpumask(struct task_struct *tsk, |
835 | struct cgroup_scanner *scan) | 840 | struct cgroup_scanner *scan) |
@@ -842,7 +847,7 @@ static void cpuset_change_cpumask(struct task_struct *tsk, | |||
842 | * @cs: the cpuset in which each task's cpus_allowed mask needs to be changed | 847 | * @cs: the cpuset in which each task's cpus_allowed mask needs to be changed |
843 | * @heap: if NULL, defer allocating heap memory to cgroup_scan_tasks() | 848 | * @heap: if NULL, defer allocating heap memory to cgroup_scan_tasks() |
844 | * | 849 | * |
845 | * Called with cgroup_mutex held | 850 | * Called with cpuset_mutex held |
846 | * | 851 | * |
847 | * The cgroup_scan_tasks() function will scan all the tasks in a cgroup, | 852 | * The cgroup_scan_tasks() function will scan all the tasks in a cgroup, |
848 | * calling callback functions for each. | 853 | * calling callback functions for each. |
@@ -920,7 +925,7 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs, | |||
920 | heap_free(&heap); | 925 | heap_free(&heap); |
921 | 926 | ||
922 | if (is_load_balanced) | 927 | if (is_load_balanced) |
923 | async_rebuild_sched_domains(); | 928 | rebuild_sched_domains_locked(); |
924 | return 0; | 929 | return 0; |
925 | } | 930 | } |
926 | 931 | ||
@@ -932,7 +937,7 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs, | |||
932 | * Temporarilly set tasks mems_allowed to target nodes of migration, | 937 | * Temporarilly set tasks mems_allowed to target nodes of migration, |
933 | * so that the migration code can allocate pages on these nodes. | 938 | * so that the migration code can allocate pages on these nodes. |
934 | * | 939 | * |
935 | * Call holding cgroup_mutex, so current's cpuset won't change | 940 | * Call holding cpuset_mutex, so current's cpuset won't change |
936 | * during this call, as manage_mutex holds off any cpuset_attach() | 941 | * during this call, as manage_mutex holds off any cpuset_attach() |
937 | * calls. Therefore we don't need to take task_lock around the | 942 | * calls. Therefore we don't need to take task_lock around the |
938 | * call to guarantee_online_mems(), as we know no one is changing | 943 | * call to guarantee_online_mems(), as we know no one is changing |
@@ -1007,7 +1012,7 @@ static void cpuset_change_task_nodemask(struct task_struct *tsk, | |||
1007 | /* | 1012 | /* |
1008 | * Update task's mems_allowed and rebind its mempolicy and vmas' mempolicy | 1013 | * Update task's mems_allowed and rebind its mempolicy and vmas' mempolicy |
1009 | * of it to cpuset's new mems_allowed, and migrate pages to new nodes if | 1014 | * of it to cpuset's new mems_allowed, and migrate pages to new nodes if |
1010 | * memory_migrate flag is set. Called with cgroup_mutex held. | 1015 | * memory_migrate flag is set. Called with cpuset_mutex held. |
1011 | */ | 1016 | */ |
1012 | static void cpuset_change_nodemask(struct task_struct *p, | 1017 | static void cpuset_change_nodemask(struct task_struct *p, |
1013 | struct cgroup_scanner *scan) | 1018 | struct cgroup_scanner *scan) |
@@ -1016,7 +1021,7 @@ static void cpuset_change_nodemask(struct task_struct *p, | |||
1016 | struct cpuset *cs; | 1021 | struct cpuset *cs; |
1017 | int migrate; | 1022 | int migrate; |
1018 | const nodemask_t *oldmem = scan->data; | 1023 | const nodemask_t *oldmem = scan->data; |
1019 | static nodemask_t newmems; /* protected by cgroup_mutex */ | 1024 | static nodemask_t newmems; /* protected by cpuset_mutex */ |
1020 | 1025 | ||
1021 | cs = cgroup_cs(scan->cg); | 1026 | cs = cgroup_cs(scan->cg); |
1022 | guarantee_online_mems(cs, &newmems); | 1027 | guarantee_online_mems(cs, &newmems); |
@@ -1043,7 +1048,7 @@ static void *cpuset_being_rebound; | |||
1043 | * @oldmem: old mems_allowed of cpuset cs | 1048 | * @oldmem: old mems_allowed of cpuset cs |
1044 | * @heap: if NULL, defer allocating heap memory to cgroup_scan_tasks() | 1049 | * @heap: if NULL, defer allocating heap memory to cgroup_scan_tasks() |
1045 | * | 1050 | * |
1046 | * Called with cgroup_mutex held | 1051 | * Called with cpuset_mutex held |
1047 | * No return value. It's guaranteed that cgroup_scan_tasks() always returns 0 | 1052 | * No return value. It's guaranteed that cgroup_scan_tasks() always returns 0 |
1048 | * if @heap != NULL. | 1053 | * if @heap != NULL. |
1049 | */ | 1054 | */ |
@@ -1065,7 +1070,7 @@ static void update_tasks_nodemask(struct cpuset *cs, const nodemask_t *oldmem, | |||
1065 | * take while holding tasklist_lock. Forks can happen - the | 1070 | * take while holding tasklist_lock. Forks can happen - the |
1066 | * mpol_dup() cpuset_being_rebound check will catch such forks, | 1071 | * mpol_dup() cpuset_being_rebound check will catch such forks, |
1067 | * and rebind their vma mempolicies too. Because we still hold | 1072 | * and rebind their vma mempolicies too. Because we still hold |
1068 | * the global cgroup_mutex, we know that no other rebind effort | 1073 | * the global cpuset_mutex, we know that no other rebind effort |
1069 | * will be contending for the global variable cpuset_being_rebound. | 1074 | * will be contending for the global variable cpuset_being_rebound. |
1070 | * It's ok if we rebind the same mm twice; mpol_rebind_mm() | 1075 | * It's ok if we rebind the same mm twice; mpol_rebind_mm() |
1071 | * is idempotent. Also migrate pages in each mm to new nodes. | 1076 | * is idempotent. Also migrate pages in each mm to new nodes. |
@@ -1084,7 +1089,7 @@ static void update_tasks_nodemask(struct cpuset *cs, const nodemask_t *oldmem, | |||
1084 | * mempolicies and if the cpuset is marked 'memory_migrate', | 1089 | * mempolicies and if the cpuset is marked 'memory_migrate', |
1085 | * migrate the tasks pages to the new memory. | 1090 | * migrate the tasks pages to the new memory. |
1086 | * | 1091 | * |
1087 | * Call with cgroup_mutex held. May take callback_mutex during call. | 1092 | * Call with cpuset_mutex held. May take callback_mutex during call. |
1088 | * Will take tasklist_lock, scan tasklist for tasks in cpuset cs, | 1093 | * Will take tasklist_lock, scan tasklist for tasks in cpuset cs, |
1089 | * lock each such tasks mm->mmap_sem, scan its vma's and rebind | 1094 | * lock each such tasks mm->mmap_sem, scan its vma's and rebind |
1090 | * their mempolicies to the cpusets new mems_allowed. | 1095 | * their mempolicies to the cpusets new mems_allowed. |
@@ -1168,7 +1173,7 @@ static int update_relax_domain_level(struct cpuset *cs, s64 val) | |||
1168 | cs->relax_domain_level = val; | 1173 | cs->relax_domain_level = val; |
1169 | if (!cpumask_empty(cs->cpus_allowed) && | 1174 | if (!cpumask_empty(cs->cpus_allowed) && |
1170 | is_sched_load_balance(cs)) | 1175 | is_sched_load_balance(cs)) |
1171 | async_rebuild_sched_domains(); | 1176 | rebuild_sched_domains_locked(); |
1172 | } | 1177 | } |
1173 | 1178 | ||
1174 | return 0; | 1179 | return 0; |
@@ -1182,7 +1187,7 @@ static int update_relax_domain_level(struct cpuset *cs, s64 val) | |||
1182 | * Called by cgroup_scan_tasks() for each task in a cgroup. | 1187 | * Called by cgroup_scan_tasks() for each task in a cgroup. |
1183 | * | 1188 | * |
1184 | * We don't need to re-check for the cgroup/cpuset membership, since we're | 1189 | * We don't need to re-check for the cgroup/cpuset membership, since we're |
1185 | * holding cgroup_lock() at this point. | 1190 | * holding cpuset_mutex at this point. |
1186 | */ | 1191 | */ |
1187 | static void cpuset_change_flag(struct task_struct *tsk, | 1192 | static void cpuset_change_flag(struct task_struct *tsk, |
1188 | struct cgroup_scanner *scan) | 1193 | struct cgroup_scanner *scan) |
@@ -1195,7 +1200,7 @@ static void cpuset_change_flag(struct task_struct *tsk, | |||
1195 | * @cs: the cpuset in which each task's spread flags needs to be changed | 1200 | * @cs: the cpuset in which each task's spread flags needs to be changed |
1196 | * @heap: if NULL, defer allocating heap memory to cgroup_scan_tasks() | 1201 | * @heap: if NULL, defer allocating heap memory to cgroup_scan_tasks() |
1197 | * | 1202 | * |
1198 | * Called with cgroup_mutex held | 1203 | * Called with cpuset_mutex held |
1199 | * | 1204 | * |
1200 | * The cgroup_scan_tasks() function will scan all the tasks in a cgroup, | 1205 | * The cgroup_scan_tasks() function will scan all the tasks in a cgroup, |
1201 | * calling callback functions for each. | 1206 | * calling callback functions for each. |
@@ -1220,7 +1225,7 @@ static void update_tasks_flags(struct cpuset *cs, struct ptr_heap *heap) | |||
1220 | * cs: the cpuset to update | 1225 | * cs: the cpuset to update |
1221 | * turning_on: whether the flag is being set or cleared | 1226 | * turning_on: whether the flag is being set or cleared |
1222 | * | 1227 | * |
1223 | * Call with cgroup_mutex held. | 1228 | * Call with cpuset_mutex held. |
1224 | */ | 1229 | */ |
1225 | 1230 | ||
1226 | static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs, | 1231 | static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs, |
@@ -1260,7 +1265,7 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs, | |||
1260 | mutex_unlock(&callback_mutex); | 1265 | mutex_unlock(&callback_mutex); |
1261 | 1266 | ||
1262 | if (!cpumask_empty(trialcs->cpus_allowed) && balance_flag_changed) | 1267 | if (!cpumask_empty(trialcs->cpus_allowed) && balance_flag_changed) |
1263 | async_rebuild_sched_domains(); | 1268 | rebuild_sched_domains_locked(); |
1264 | 1269 | ||
1265 | if (spread_flag_changed) | 1270 | if (spread_flag_changed) |
1266 | update_tasks_flags(cs, &heap); | 1271 | update_tasks_flags(cs, &heap); |
@@ -1368,24 +1373,18 @@ static int fmeter_getrate(struct fmeter *fmp) | |||
1368 | return val; | 1373 | return val; |
1369 | } | 1374 | } |
1370 | 1375 | ||
1371 | /* | 1376 | /* Called by cgroups to determine if a cpuset is usable; cpuset_mutex held */ |
1372 | * Protected by cgroup_lock. The nodemasks must be stored globally because | ||
1373 | * dynamically allocating them is not allowed in can_attach, and they must | ||
1374 | * persist until attach. | ||
1375 | */ | ||
1376 | static cpumask_var_t cpus_attach; | ||
1377 | static nodemask_t cpuset_attach_nodemask_from; | ||
1378 | static nodemask_t cpuset_attach_nodemask_to; | ||
1379 | |||
1380 | /* Called by cgroups to determine if a cpuset is usable; cgroup_mutex held */ | ||
1381 | static int cpuset_can_attach(struct cgroup *cgrp, struct cgroup_taskset *tset) | 1377 | static int cpuset_can_attach(struct cgroup *cgrp, struct cgroup_taskset *tset) |
1382 | { | 1378 | { |
1383 | struct cpuset *cs = cgroup_cs(cgrp); | 1379 | struct cpuset *cs = cgroup_cs(cgrp); |
1384 | struct task_struct *task; | 1380 | struct task_struct *task; |
1385 | int ret; | 1381 | int ret; |
1386 | 1382 | ||
1383 | mutex_lock(&cpuset_mutex); | ||
1384 | |||
1385 | ret = -ENOSPC; | ||
1387 | if (cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed)) | 1386 | if (cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed)) |
1388 | return -ENOSPC; | 1387 | goto out_unlock; |
1389 | 1388 | ||
1390 | cgroup_taskset_for_each(task, cgrp, tset) { | 1389 | cgroup_taskset_for_each(task, cgrp, tset) { |
1391 | /* | 1390 | /* |
@@ -1397,25 +1396,45 @@ static int cpuset_can_attach(struct cgroup *cgrp, struct cgroup_taskset *tset) | |||
1397 | * set_cpus_allowed_ptr() on all attached tasks before | 1396 | * set_cpus_allowed_ptr() on all attached tasks before |
1398 | * cpus_allowed may be changed. | 1397 | * cpus_allowed may be changed. |
1399 | */ | 1398 | */ |
1399 | ret = -EINVAL; | ||
1400 | if (task->flags & PF_THREAD_BOUND) | 1400 | if (task->flags & PF_THREAD_BOUND) |
1401 | return -EINVAL; | 1401 | goto out_unlock; |
1402 | if ((ret = security_task_setscheduler(task))) | 1402 | ret = security_task_setscheduler(task); |
1403 | return ret; | 1403 | if (ret) |
1404 | goto out_unlock; | ||
1404 | } | 1405 | } |
1405 | 1406 | ||
1406 | /* prepare for attach */ | 1407 | /* |
1407 | if (cs == &top_cpuset) | 1408 | * Mark attach is in progress. This makes validate_change() fail |
1408 | cpumask_copy(cpus_attach, cpu_possible_mask); | 1409 | * changes which zero cpus/mems_allowed. |
1409 | else | 1410 | */ |
1410 | guarantee_online_cpus(cs, cpus_attach); | 1411 | cs->attach_in_progress++; |
1411 | 1412 | ret = 0; | |
1412 | guarantee_online_mems(cs, &cpuset_attach_nodemask_to); | 1413 | out_unlock: |
1414 | mutex_unlock(&cpuset_mutex); | ||
1415 | return ret; | ||
1416 | } | ||
1413 | 1417 | ||
1414 | return 0; | 1418 | static void cpuset_cancel_attach(struct cgroup *cgrp, |
1419 | struct cgroup_taskset *tset) | ||
1420 | { | ||
1421 | mutex_lock(&cpuset_mutex); | ||
1422 | cgroup_cs(cgrp)->attach_in_progress--; | ||
1423 | mutex_unlock(&cpuset_mutex); | ||
1415 | } | 1424 | } |
1416 | 1425 | ||
1426 | /* | ||
1427 | * Protected by cpuset_mutex. cpus_attach is used only by cpuset_attach() | ||
1428 | * but we can't allocate it dynamically there. Define it global and | ||
1429 | * allocate from cpuset_init(). | ||
1430 | */ | ||
1431 | static cpumask_var_t cpus_attach; | ||
1432 | |||
1417 | static void cpuset_attach(struct cgroup *cgrp, struct cgroup_taskset *tset) | 1433 | static void cpuset_attach(struct cgroup *cgrp, struct cgroup_taskset *tset) |
1418 | { | 1434 | { |
1435 | /* static bufs protected by cpuset_mutex */ | ||
1436 | static nodemask_t cpuset_attach_nodemask_from; | ||
1437 | static nodemask_t cpuset_attach_nodemask_to; | ||
1419 | struct mm_struct *mm; | 1438 | struct mm_struct *mm; |
1420 | struct task_struct *task; | 1439 | struct task_struct *task; |
1421 | struct task_struct *leader = cgroup_taskset_first(tset); | 1440 | struct task_struct *leader = cgroup_taskset_first(tset); |
@@ -1423,6 +1442,16 @@ static void cpuset_attach(struct cgroup *cgrp, struct cgroup_taskset *tset) | |||
1423 | struct cpuset *cs = cgroup_cs(cgrp); | 1442 | struct cpuset *cs = cgroup_cs(cgrp); |
1424 | struct cpuset *oldcs = cgroup_cs(oldcgrp); | 1443 | struct cpuset *oldcs = cgroup_cs(oldcgrp); |
1425 | 1444 | ||
1445 | mutex_lock(&cpuset_mutex); | ||
1446 | |||
1447 | /* prepare for attach */ | ||
1448 | if (cs == &top_cpuset) | ||
1449 | cpumask_copy(cpus_attach, cpu_possible_mask); | ||
1450 | else | ||
1451 | guarantee_online_cpus(cs, cpus_attach); | ||
1452 | |||
1453 | guarantee_online_mems(cs, &cpuset_attach_nodemask_to); | ||
1454 | |||
1426 | cgroup_taskset_for_each(task, cgrp, tset) { | 1455 | cgroup_taskset_for_each(task, cgrp, tset) { |
1427 | /* | 1456 | /* |
1428 | * can_attach beforehand should guarantee that this doesn't | 1457 | * can_attach beforehand should guarantee that this doesn't |
@@ -1448,6 +1477,18 @@ static void cpuset_attach(struct cgroup *cgrp, struct cgroup_taskset *tset) | |||
1448 | &cpuset_attach_nodemask_to); | 1477 | &cpuset_attach_nodemask_to); |
1449 | mmput(mm); | 1478 | mmput(mm); |
1450 | } | 1479 | } |
1480 | |||
1481 | cs->attach_in_progress--; | ||
1482 | |||
1483 | /* | ||
1484 | * We may have raced with CPU/memory hotunplug. Trigger hotplug | ||
1485 | * propagation if @cs doesn't have any CPU or memory. It will move | ||
1486 | * the newly added tasks to the nearest parent which can execute. | ||
1487 | */ | ||
1488 | if (cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed)) | ||
1489 | schedule_cpuset_propagate_hotplug(cs); | ||
1490 | |||
1491 | mutex_unlock(&cpuset_mutex); | ||
1451 | } | 1492 | } |
1452 | 1493 | ||
1453 | /* The various types of files and directories in a cpuset file system */ | 1494 | /* The various types of files and directories in a cpuset file system */ |
@@ -1469,12 +1510,13 @@ typedef enum { | |||
1469 | 1510 | ||
1470 | static int cpuset_write_u64(struct cgroup *cgrp, struct cftype *cft, u64 val) | 1511 | static int cpuset_write_u64(struct cgroup *cgrp, struct cftype *cft, u64 val) |
1471 | { | 1512 | { |
1472 | int retval = 0; | ||
1473 | struct cpuset *cs = cgroup_cs(cgrp); | 1513 | struct cpuset *cs = cgroup_cs(cgrp); |
1474 | cpuset_filetype_t type = cft->private; | 1514 | cpuset_filetype_t type = cft->private; |
1515 | int retval = -ENODEV; | ||
1475 | 1516 | ||
1476 | if (!cgroup_lock_live_group(cgrp)) | 1517 | mutex_lock(&cpuset_mutex); |
1477 | return -ENODEV; | 1518 | if (!is_cpuset_online(cs)) |
1519 | goto out_unlock; | ||
1478 | 1520 | ||
1479 | switch (type) { | 1521 | switch (type) { |
1480 | case FILE_CPU_EXCLUSIVE: | 1522 | case FILE_CPU_EXCLUSIVE: |
@@ -1508,18 +1550,20 @@ static int cpuset_write_u64(struct cgroup *cgrp, struct cftype *cft, u64 val) | |||
1508 | retval = -EINVAL; | 1550 | retval = -EINVAL; |
1509 | break; | 1551 | break; |
1510 | } | 1552 | } |
1511 | cgroup_unlock(); | 1553 | out_unlock: |
1554 | mutex_unlock(&cpuset_mutex); | ||
1512 | return retval; | 1555 | return retval; |
1513 | } | 1556 | } |
1514 | 1557 | ||
1515 | static int cpuset_write_s64(struct cgroup *cgrp, struct cftype *cft, s64 val) | 1558 | static int cpuset_write_s64(struct cgroup *cgrp, struct cftype *cft, s64 val) |
1516 | { | 1559 | { |
1517 | int retval = 0; | ||
1518 | struct cpuset *cs = cgroup_cs(cgrp); | 1560 | struct cpuset *cs = cgroup_cs(cgrp); |
1519 | cpuset_filetype_t type = cft->private; | 1561 | cpuset_filetype_t type = cft->private; |
1562 | int retval = -ENODEV; | ||
1520 | 1563 | ||
1521 | if (!cgroup_lock_live_group(cgrp)) | 1564 | mutex_lock(&cpuset_mutex); |
1522 | return -ENODEV; | 1565 | if (!is_cpuset_online(cs)) |
1566 | goto out_unlock; | ||
1523 | 1567 | ||
1524 | switch (type) { | 1568 | switch (type) { |
1525 | case FILE_SCHED_RELAX_DOMAIN_LEVEL: | 1569 | case FILE_SCHED_RELAX_DOMAIN_LEVEL: |
@@ -1529,7 +1573,8 @@ static int cpuset_write_s64(struct cgroup *cgrp, struct cftype *cft, s64 val) | |||
1529 | retval = -EINVAL; | 1573 | retval = -EINVAL; |
1530 | break; | 1574 | break; |
1531 | } | 1575 | } |
1532 | cgroup_unlock(); | 1576 | out_unlock: |
1577 | mutex_unlock(&cpuset_mutex); | ||
1533 | return retval; | 1578 | return retval; |
1534 | } | 1579 | } |
1535 | 1580 | ||
@@ -1539,17 +1584,36 @@ static int cpuset_write_s64(struct cgroup *cgrp, struct cftype *cft, s64 val) | |||
1539 | static int cpuset_write_resmask(struct cgroup *cgrp, struct cftype *cft, | 1584 | static int cpuset_write_resmask(struct cgroup *cgrp, struct cftype *cft, |
1540 | const char *buf) | 1585 | const char *buf) |
1541 | { | 1586 | { |
1542 | int retval = 0; | ||
1543 | struct cpuset *cs = cgroup_cs(cgrp); | 1587 | struct cpuset *cs = cgroup_cs(cgrp); |
1544 | struct cpuset *trialcs; | 1588 | struct cpuset *trialcs; |
1589 | int retval = -ENODEV; | ||
1590 | |||
1591 | /* | ||
1592 | * CPU or memory hotunplug may leave @cs w/o any execution | ||
1593 | * resources, in which case the hotplug code asynchronously updates | ||
1594 | * configuration and transfers all tasks to the nearest ancestor | ||
1595 | * which can execute. | ||
1596 | * | ||
1597 | * As writes to "cpus" or "mems" may restore @cs's execution | ||
1598 | * resources, wait for the previously scheduled operations before | ||
1599 | * proceeding, so that we don't end up keep removing tasks added | ||
1600 | * after execution capability is restored. | ||
1601 | * | ||
1602 | * Flushing cpuset_hotplug_work is enough to synchronize against | ||
1603 | * hotplug hanlding; however, cpuset_attach() may schedule | ||
1604 | * propagation work directly. Flush the workqueue too. | ||
1605 | */ | ||
1606 | flush_work(&cpuset_hotplug_work); | ||
1607 | flush_workqueue(cpuset_propagate_hotplug_wq); | ||
1545 | 1608 | ||
1546 | if (!cgroup_lock_live_group(cgrp)) | 1609 | mutex_lock(&cpuset_mutex); |
1547 | return -ENODEV; | 1610 | if (!is_cpuset_online(cs)) |
1611 | goto out_unlock; | ||
1548 | 1612 | ||
1549 | trialcs = alloc_trial_cpuset(cs); | 1613 | trialcs = alloc_trial_cpuset(cs); |
1550 | if (!trialcs) { | 1614 | if (!trialcs) { |
1551 | retval = -ENOMEM; | 1615 | retval = -ENOMEM; |
1552 | goto out; | 1616 | goto out_unlock; |
1553 | } | 1617 | } |
1554 | 1618 | ||
1555 | switch (cft->private) { | 1619 | switch (cft->private) { |
@@ -1565,8 +1629,8 @@ static int cpuset_write_resmask(struct cgroup *cgrp, struct cftype *cft, | |||
1565 | } | 1629 | } |
1566 | 1630 | ||
1567 | free_trial_cpuset(trialcs); | 1631 | free_trial_cpuset(trialcs); |
1568 | out: | 1632 | out_unlock: |
1569 | cgroup_unlock(); | 1633 | mutex_unlock(&cpuset_mutex); |
1570 | return retval; | 1634 | return retval; |
1571 | } | 1635 | } |
1572 | 1636 | ||
@@ -1790,15 +1854,12 @@ static struct cftype files[] = { | |||
1790 | 1854 | ||
1791 | static struct cgroup_subsys_state *cpuset_css_alloc(struct cgroup *cont) | 1855 | static struct cgroup_subsys_state *cpuset_css_alloc(struct cgroup *cont) |
1792 | { | 1856 | { |
1793 | struct cgroup *parent_cg = cont->parent; | 1857 | struct cpuset *cs; |
1794 | struct cgroup *tmp_cg; | ||
1795 | struct cpuset *parent, *cs; | ||
1796 | 1858 | ||
1797 | if (!parent_cg) | 1859 | if (!cont->parent) |
1798 | return &top_cpuset.css; | 1860 | return &top_cpuset.css; |
1799 | parent = cgroup_cs(parent_cg); | ||
1800 | 1861 | ||
1801 | cs = kmalloc(sizeof(*cs), GFP_KERNEL); | 1862 | cs = kzalloc(sizeof(*cs), GFP_KERNEL); |
1802 | if (!cs) | 1863 | if (!cs) |
1803 | return ERR_PTR(-ENOMEM); | 1864 | return ERR_PTR(-ENOMEM); |
1804 | if (!alloc_cpumask_var(&cs->cpus_allowed, GFP_KERNEL)) { | 1865 | if (!alloc_cpumask_var(&cs->cpus_allowed, GFP_KERNEL)) { |
@@ -1806,22 +1867,38 @@ static struct cgroup_subsys_state *cpuset_css_alloc(struct cgroup *cont) | |||
1806 | return ERR_PTR(-ENOMEM); | 1867 | return ERR_PTR(-ENOMEM); |
1807 | } | 1868 | } |
1808 | 1869 | ||
1809 | cs->flags = 0; | ||
1810 | if (is_spread_page(parent)) | ||
1811 | set_bit(CS_SPREAD_PAGE, &cs->flags); | ||
1812 | if (is_spread_slab(parent)) | ||
1813 | set_bit(CS_SPREAD_SLAB, &cs->flags); | ||
1814 | set_bit(CS_SCHED_LOAD_BALANCE, &cs->flags); | 1870 | set_bit(CS_SCHED_LOAD_BALANCE, &cs->flags); |
1815 | cpumask_clear(cs->cpus_allowed); | 1871 | cpumask_clear(cs->cpus_allowed); |
1816 | nodes_clear(cs->mems_allowed); | 1872 | nodes_clear(cs->mems_allowed); |
1817 | fmeter_init(&cs->fmeter); | 1873 | fmeter_init(&cs->fmeter); |
1874 | INIT_WORK(&cs->hotplug_work, cpuset_propagate_hotplug_workfn); | ||
1818 | cs->relax_domain_level = -1; | 1875 | cs->relax_domain_level = -1; |
1819 | 1876 | ||
1820 | cs->parent = parent; | 1877 | return &cs->css; |
1878 | } | ||
1879 | |||
1880 | static int cpuset_css_online(struct cgroup *cgrp) | ||
1881 | { | ||
1882 | struct cpuset *cs = cgroup_cs(cgrp); | ||
1883 | struct cpuset *parent = parent_cs(cs); | ||
1884 | struct cpuset *tmp_cs; | ||
1885 | struct cgroup *pos_cg; | ||
1886 | |||
1887 | if (!parent) | ||
1888 | return 0; | ||
1889 | |||
1890 | mutex_lock(&cpuset_mutex); | ||
1891 | |||
1892 | set_bit(CS_ONLINE, &cs->flags); | ||
1893 | if (is_spread_page(parent)) | ||
1894 | set_bit(CS_SPREAD_PAGE, &cs->flags); | ||
1895 | if (is_spread_slab(parent)) | ||
1896 | set_bit(CS_SPREAD_SLAB, &cs->flags); | ||
1897 | |||
1821 | number_of_cpusets++; | 1898 | number_of_cpusets++; |
1822 | 1899 | ||
1823 | if (!test_bit(CGRP_CPUSET_CLONE_CHILDREN, &cont->flags)) | 1900 | if (!test_bit(CGRP_CPUSET_CLONE_CHILDREN, &cgrp->flags)) |
1824 | goto skip_clone; | 1901 | goto out_unlock; |
1825 | 1902 | ||
1826 | /* | 1903 | /* |
1827 | * Clone @parent's configuration if CGRP_CPUSET_CLONE_CHILDREN is | 1904 | * Clone @parent's configuration if CGRP_CPUSET_CLONE_CHILDREN is |
@@ -1836,35 +1913,49 @@ static struct cgroup_subsys_state *cpuset_css_alloc(struct cgroup *cont) | |||
1836 | * changed to grant parent->cpus_allowed-sibling_cpus_exclusive | 1913 | * changed to grant parent->cpus_allowed-sibling_cpus_exclusive |
1837 | * (and likewise for mems) to the new cgroup. | 1914 | * (and likewise for mems) to the new cgroup. |
1838 | */ | 1915 | */ |
1839 | list_for_each_entry(tmp_cg, &parent_cg->children, sibling) { | 1916 | rcu_read_lock(); |
1840 | struct cpuset *tmp_cs = cgroup_cs(tmp_cg); | 1917 | cpuset_for_each_child(tmp_cs, pos_cg, parent) { |
1841 | 1918 | if (is_mem_exclusive(tmp_cs) || is_cpu_exclusive(tmp_cs)) { | |
1842 | if (is_mem_exclusive(tmp_cs) || is_cpu_exclusive(tmp_cs)) | 1919 | rcu_read_unlock(); |
1843 | goto skip_clone; | 1920 | goto out_unlock; |
1921 | } | ||
1844 | } | 1922 | } |
1923 | rcu_read_unlock(); | ||
1845 | 1924 | ||
1846 | mutex_lock(&callback_mutex); | 1925 | mutex_lock(&callback_mutex); |
1847 | cs->mems_allowed = parent->mems_allowed; | 1926 | cs->mems_allowed = parent->mems_allowed; |
1848 | cpumask_copy(cs->cpus_allowed, parent->cpus_allowed); | 1927 | cpumask_copy(cs->cpus_allowed, parent->cpus_allowed); |
1849 | mutex_unlock(&callback_mutex); | 1928 | mutex_unlock(&callback_mutex); |
1850 | skip_clone: | 1929 | out_unlock: |
1851 | return &cs->css; | 1930 | mutex_unlock(&cpuset_mutex); |
1931 | return 0; | ||
1932 | } | ||
1933 | |||
1934 | static void cpuset_css_offline(struct cgroup *cgrp) | ||
1935 | { | ||
1936 | struct cpuset *cs = cgroup_cs(cgrp); | ||
1937 | |||
1938 | mutex_lock(&cpuset_mutex); | ||
1939 | |||
1940 | if (is_sched_load_balance(cs)) | ||
1941 | update_flag(CS_SCHED_LOAD_BALANCE, cs, 0); | ||
1942 | |||
1943 | number_of_cpusets--; | ||
1944 | clear_bit(CS_ONLINE, &cs->flags); | ||
1945 | |||
1946 | mutex_unlock(&cpuset_mutex); | ||
1852 | } | 1947 | } |
1853 | 1948 | ||
1854 | /* | 1949 | /* |
1855 | * If the cpuset being removed has its flag 'sched_load_balance' | 1950 | * If the cpuset being removed has its flag 'sched_load_balance' |
1856 | * enabled, then simulate turning sched_load_balance off, which | 1951 | * enabled, then simulate turning sched_load_balance off, which |
1857 | * will call async_rebuild_sched_domains(). | 1952 | * will call rebuild_sched_domains_locked(). |
1858 | */ | 1953 | */ |
1859 | 1954 | ||
1860 | static void cpuset_css_free(struct cgroup *cont) | 1955 | static void cpuset_css_free(struct cgroup *cont) |
1861 | { | 1956 | { |
1862 | struct cpuset *cs = cgroup_cs(cont); | 1957 | struct cpuset *cs = cgroup_cs(cont); |
1863 | 1958 | ||
1864 | if (is_sched_load_balance(cs)) | ||
1865 | update_flag(CS_SCHED_LOAD_BALANCE, cs, 0); | ||
1866 | |||
1867 | number_of_cpusets--; | ||
1868 | free_cpumask_var(cs->cpus_allowed); | 1959 | free_cpumask_var(cs->cpus_allowed); |
1869 | kfree(cs); | 1960 | kfree(cs); |
1870 | } | 1961 | } |
@@ -1872,8 +1963,11 @@ static void cpuset_css_free(struct cgroup *cont) | |||
1872 | struct cgroup_subsys cpuset_subsys = { | 1963 | struct cgroup_subsys cpuset_subsys = { |
1873 | .name = "cpuset", | 1964 | .name = "cpuset", |
1874 | .css_alloc = cpuset_css_alloc, | 1965 | .css_alloc = cpuset_css_alloc, |
1966 | .css_online = cpuset_css_online, | ||
1967 | .css_offline = cpuset_css_offline, | ||
1875 | .css_free = cpuset_css_free, | 1968 | .css_free = cpuset_css_free, |
1876 | .can_attach = cpuset_can_attach, | 1969 | .can_attach = cpuset_can_attach, |
1970 | .cancel_attach = cpuset_cancel_attach, | ||
1877 | .attach = cpuset_attach, | 1971 | .attach = cpuset_attach, |
1878 | .subsys_id = cpuset_subsys_id, | 1972 | .subsys_id = cpuset_subsys_id, |
1879 | .base_cftypes = files, | 1973 | .base_cftypes = files, |
@@ -1924,7 +2018,9 @@ static void cpuset_do_move_task(struct task_struct *tsk, | |||
1924 | { | 2018 | { |
1925 | struct cgroup *new_cgroup = scan->data; | 2019 | struct cgroup *new_cgroup = scan->data; |
1926 | 2020 | ||
2021 | cgroup_lock(); | ||
1927 | cgroup_attach_task(new_cgroup, tsk); | 2022 | cgroup_attach_task(new_cgroup, tsk); |
2023 | cgroup_unlock(); | ||
1928 | } | 2024 | } |
1929 | 2025 | ||
1930 | /** | 2026 | /** |
@@ -1932,7 +2028,7 @@ static void cpuset_do_move_task(struct task_struct *tsk, | |||
1932 | * @from: cpuset in which the tasks currently reside | 2028 | * @from: cpuset in which the tasks currently reside |
1933 | * @to: cpuset to which the tasks will be moved | 2029 | * @to: cpuset to which the tasks will be moved |
1934 | * | 2030 | * |
1935 | * Called with cgroup_mutex held | 2031 | * Called with cpuset_mutex held |
1936 | * callback_mutex must not be held, as cpuset_attach() will take it. | 2032 | * callback_mutex must not be held, as cpuset_attach() will take it. |
1937 | * | 2033 | * |
1938 | * The cgroup_scan_tasks() function will scan all the tasks in a cgroup, | 2034 | * The cgroup_scan_tasks() function will scan all the tasks in a cgroup, |
@@ -1959,169 +2055,200 @@ static void move_member_tasks_to_cpuset(struct cpuset *from, struct cpuset *to) | |||
1959 | * removing that CPU or node from all cpusets. If this removes the | 2055 | * removing that CPU or node from all cpusets. If this removes the |
1960 | * last CPU or node from a cpuset, then move the tasks in the empty | 2056 | * last CPU or node from a cpuset, then move the tasks in the empty |
1961 | * cpuset to its next-highest non-empty parent. | 2057 | * cpuset to its next-highest non-empty parent. |
1962 | * | ||
1963 | * Called with cgroup_mutex held | ||
1964 | * callback_mutex must not be held, as cpuset_attach() will take it. | ||
1965 | */ | 2058 | */ |
1966 | static void remove_tasks_in_empty_cpuset(struct cpuset *cs) | 2059 | static void remove_tasks_in_empty_cpuset(struct cpuset *cs) |
1967 | { | 2060 | { |
1968 | struct cpuset *parent; | 2061 | struct cpuset *parent; |
1969 | 2062 | ||
1970 | /* | 2063 | /* |
1971 | * The cgroup's css_sets list is in use if there are tasks | ||
1972 | * in the cpuset; the list is empty if there are none; | ||
1973 | * the cs->css.refcnt seems always 0. | ||
1974 | */ | ||
1975 | if (list_empty(&cs->css.cgroup->css_sets)) | ||
1976 | return; | ||
1977 | |||
1978 | /* | ||
1979 | * Find its next-highest non-empty parent, (top cpuset | 2064 | * Find its next-highest non-empty parent, (top cpuset |
1980 | * has online cpus, so can't be empty). | 2065 | * has online cpus, so can't be empty). |
1981 | */ | 2066 | */ |
1982 | parent = cs->parent; | 2067 | parent = parent_cs(cs); |
1983 | while (cpumask_empty(parent->cpus_allowed) || | 2068 | while (cpumask_empty(parent->cpus_allowed) || |
1984 | nodes_empty(parent->mems_allowed)) | 2069 | nodes_empty(parent->mems_allowed)) |
1985 | parent = parent->parent; | 2070 | parent = parent_cs(parent); |
1986 | 2071 | ||
1987 | move_member_tasks_to_cpuset(cs, parent); | 2072 | move_member_tasks_to_cpuset(cs, parent); |
1988 | } | 2073 | } |
1989 | 2074 | ||
1990 | /* | 2075 | /** |
1991 | * Helper function to traverse cpusets. | 2076 | * cpuset_propagate_hotplug_workfn - propagate CPU/memory hotplug to a cpuset |
1992 | * It can be used to walk the cpuset tree from top to bottom, completing | 2077 | * @cs: cpuset in interest |
1993 | * one layer before dropping down to the next (thus always processing a | 2078 | * |
1994 | * node before any of its children). | 2079 | * Compare @cs's cpu and mem masks against top_cpuset and if some have gone |
2080 | * offline, update @cs accordingly. If @cs ends up with no CPU or memory, | ||
2081 | * all its tasks are moved to the nearest ancestor with both resources. | ||
1995 | */ | 2082 | */ |
1996 | static struct cpuset *cpuset_next(struct list_head *queue) | 2083 | static void cpuset_propagate_hotplug_workfn(struct work_struct *work) |
1997 | { | 2084 | { |
1998 | struct cpuset *cp; | 2085 | static cpumask_t off_cpus; |
1999 | struct cpuset *child; /* scans child cpusets of cp */ | 2086 | static nodemask_t off_mems, tmp_mems; |
2000 | struct cgroup *cont; | 2087 | struct cpuset *cs = container_of(work, struct cpuset, hotplug_work); |
2088 | bool is_empty; | ||
2001 | 2089 | ||
2002 | if (list_empty(queue)) | 2090 | mutex_lock(&cpuset_mutex); |
2003 | return NULL; | 2091 | |
2092 | cpumask_andnot(&off_cpus, cs->cpus_allowed, top_cpuset.cpus_allowed); | ||
2093 | nodes_andnot(off_mems, cs->mems_allowed, top_cpuset.mems_allowed); | ||
2004 | 2094 | ||
2005 | cp = list_first_entry(queue, struct cpuset, stack_list); | 2095 | /* remove offline cpus from @cs */ |
2006 | list_del(queue->next); | 2096 | if (!cpumask_empty(&off_cpus)) { |
2007 | list_for_each_entry(cont, &cp->css.cgroup->children, sibling) { | 2097 | mutex_lock(&callback_mutex); |
2008 | child = cgroup_cs(cont); | 2098 | cpumask_andnot(cs->cpus_allowed, cs->cpus_allowed, &off_cpus); |
2009 | list_add_tail(&child->stack_list, queue); | 2099 | mutex_unlock(&callback_mutex); |
2100 | update_tasks_cpumask(cs, NULL); | ||
2101 | } | ||
2102 | |||
2103 | /* remove offline mems from @cs */ | ||
2104 | if (!nodes_empty(off_mems)) { | ||
2105 | tmp_mems = cs->mems_allowed; | ||
2106 | mutex_lock(&callback_mutex); | ||
2107 | nodes_andnot(cs->mems_allowed, cs->mems_allowed, off_mems); | ||
2108 | mutex_unlock(&callback_mutex); | ||
2109 | update_tasks_nodemask(cs, &tmp_mems, NULL); | ||
2010 | } | 2110 | } |
2011 | 2111 | ||
2012 | return cp; | 2112 | is_empty = cpumask_empty(cs->cpus_allowed) || |
2113 | nodes_empty(cs->mems_allowed); | ||
2114 | |||
2115 | mutex_unlock(&cpuset_mutex); | ||
2116 | |||
2117 | /* | ||
2118 | * If @cs became empty, move tasks to the nearest ancestor with | ||
2119 | * execution resources. This is full cgroup operation which will | ||
2120 | * also call back into cpuset. Should be done outside any lock. | ||
2121 | */ | ||
2122 | if (is_empty) | ||
2123 | remove_tasks_in_empty_cpuset(cs); | ||
2124 | |||
2125 | /* the following may free @cs, should be the last operation */ | ||
2126 | css_put(&cs->css); | ||
2013 | } | 2127 | } |
2014 | 2128 | ||
2129 | /** | ||
2130 | * schedule_cpuset_propagate_hotplug - schedule hotplug propagation to a cpuset | ||
2131 | * @cs: cpuset of interest | ||
2132 | * | ||
2133 | * Schedule cpuset_propagate_hotplug_workfn() which will update CPU and | ||
2134 | * memory masks according to top_cpuset. | ||
2135 | */ | ||
2136 | static void schedule_cpuset_propagate_hotplug(struct cpuset *cs) | ||
2137 | { | ||
2138 | /* | ||
2139 | * Pin @cs. The refcnt will be released when the work item | ||
2140 | * finishes executing. | ||
2141 | */ | ||
2142 | if (!css_tryget(&cs->css)) | ||
2143 | return; | ||
2015 | 2144 | ||
2016 | /* | 2145 | /* |
2017 | * Walk the specified cpuset subtree upon a hotplug operation (CPU/Memory | 2146 | * Queue @cs->hotplug_work. If already pending, lose the css ref. |
2018 | * online/offline) and update the cpusets accordingly. | 2147 | * cpuset_propagate_hotplug_wq is ordered and propagation will |
2019 | * For regular CPU/Mem hotplug, look for empty cpusets; the tasks of such | 2148 | * happen in the order this function is called. |
2020 | * cpuset must be moved to a parent cpuset. | 2149 | */ |
2150 | if (!queue_work(cpuset_propagate_hotplug_wq, &cs->hotplug_work)) | ||
2151 | css_put(&cs->css); | ||
2152 | } | ||
2153 | |||
2154 | /** | ||
2155 | * cpuset_hotplug_workfn - handle CPU/memory hotunplug for a cpuset | ||
2021 | * | 2156 | * |
2022 | * Called with cgroup_mutex held. We take callback_mutex to modify | 2157 | * This function is called after either CPU or memory configuration has |
2023 | * cpus_allowed and mems_allowed. | 2158 | * changed and updates cpuset accordingly. The top_cpuset is always |
2159 | * synchronized to cpu_active_mask and N_MEMORY, which is necessary in | ||
2160 | * order to make cpusets transparent (of no affect) on systems that are | ||
2161 | * actively using CPU hotplug but making no active use of cpusets. | ||
2024 | * | 2162 | * |
2025 | * This walk processes the tree from top to bottom, completing one layer | 2163 | * Non-root cpusets are only affected by offlining. If any CPUs or memory |
2026 | * before dropping down to the next. It always processes a node before | 2164 | * nodes have been taken down, cpuset_propagate_hotplug() is invoked on all |
2027 | * any of its children. | 2165 | * descendants. |
2028 | * | 2166 | * |
2029 | * In the case of memory hot-unplug, it will remove nodes from N_MEMORY | 2167 | * Note that CPU offlining during suspend is ignored. We don't modify |
2030 | * if all present pages from a node are offlined. | 2168 | * cpusets across suspend/resume cycles at all. |
2031 | */ | 2169 | */ |
2032 | static void | 2170 | static void cpuset_hotplug_workfn(struct work_struct *work) |
2033 | scan_cpusets_upon_hotplug(struct cpuset *root, enum hotplug_event event) | ||
2034 | { | 2171 | { |
2035 | LIST_HEAD(queue); | 2172 | static cpumask_t new_cpus, tmp_cpus; |
2036 | struct cpuset *cp; /* scans cpusets being updated */ | 2173 | static nodemask_t new_mems, tmp_mems; |
2037 | static nodemask_t oldmems; /* protected by cgroup_mutex */ | 2174 | bool cpus_updated, mems_updated; |
2175 | bool cpus_offlined, mems_offlined; | ||
2038 | 2176 | ||
2039 | list_add_tail((struct list_head *)&root->stack_list, &queue); | 2177 | mutex_lock(&cpuset_mutex); |
2040 | 2178 | ||
2041 | switch (event) { | 2179 | /* fetch the available cpus/mems and find out which changed how */ |
2042 | case CPUSET_CPU_OFFLINE: | 2180 | cpumask_copy(&new_cpus, cpu_active_mask); |
2043 | while ((cp = cpuset_next(&queue)) != NULL) { | 2181 | new_mems = node_states[N_MEMORY]; |
2044 | 2182 | ||
2045 | /* Continue past cpusets with all cpus online */ | 2183 | cpus_updated = !cpumask_equal(top_cpuset.cpus_allowed, &new_cpus); |
2046 | if (cpumask_subset(cp->cpus_allowed, cpu_active_mask)) | 2184 | cpus_offlined = cpumask_andnot(&tmp_cpus, top_cpuset.cpus_allowed, |
2047 | continue; | 2185 | &new_cpus); |
2048 | 2186 | ||
2049 | /* Remove offline cpus from this cpuset. */ | 2187 | mems_updated = !nodes_equal(top_cpuset.mems_allowed, new_mems); |
2050 | mutex_lock(&callback_mutex); | 2188 | nodes_andnot(tmp_mems, top_cpuset.mems_allowed, new_mems); |
2051 | cpumask_and(cp->cpus_allowed, cp->cpus_allowed, | 2189 | mems_offlined = !nodes_empty(tmp_mems); |
2052 | cpu_active_mask); | ||
2053 | mutex_unlock(&callback_mutex); | ||
2054 | 2190 | ||
2055 | /* Move tasks from the empty cpuset to a parent */ | 2191 | /* synchronize cpus_allowed to cpu_active_mask */ |
2056 | if (cpumask_empty(cp->cpus_allowed)) | 2192 | if (cpus_updated) { |
2057 | remove_tasks_in_empty_cpuset(cp); | 2193 | mutex_lock(&callback_mutex); |
2058 | else | 2194 | cpumask_copy(top_cpuset.cpus_allowed, &new_cpus); |
2059 | update_tasks_cpumask(cp, NULL); | 2195 | mutex_unlock(&callback_mutex); |
2060 | } | 2196 | /* we don't mess with cpumasks of tasks in top_cpuset */ |
2061 | break; | 2197 | } |
2062 | 2198 | ||
2063 | case CPUSET_MEM_OFFLINE: | 2199 | /* synchronize mems_allowed to N_MEMORY */ |
2064 | while ((cp = cpuset_next(&queue)) != NULL) { | 2200 | if (mems_updated) { |
2201 | tmp_mems = top_cpuset.mems_allowed; | ||
2202 | mutex_lock(&callback_mutex); | ||
2203 | top_cpuset.mems_allowed = new_mems; | ||
2204 | mutex_unlock(&callback_mutex); | ||
2205 | update_tasks_nodemask(&top_cpuset, &tmp_mems, NULL); | ||
2206 | } | ||
2065 | 2207 | ||
2066 | /* Continue past cpusets with all mems online */ | 2208 | /* if cpus or mems went down, we need to propagate to descendants */ |
2067 | if (nodes_subset(cp->mems_allowed, | 2209 | if (cpus_offlined || mems_offlined) { |
2068 | node_states[N_MEMORY])) | 2210 | struct cpuset *cs; |
2069 | continue; | 2211 | struct cgroup *pos_cgrp; |
2070 | 2212 | ||
2071 | oldmems = cp->mems_allowed; | 2213 | rcu_read_lock(); |
2214 | cpuset_for_each_descendant_pre(cs, pos_cgrp, &top_cpuset) | ||
2215 | schedule_cpuset_propagate_hotplug(cs); | ||
2216 | rcu_read_unlock(); | ||
2217 | } | ||
2072 | 2218 | ||
2073 | /* Remove offline mems from this cpuset. */ | 2219 | mutex_unlock(&cpuset_mutex); |
2074 | mutex_lock(&callback_mutex); | ||
2075 | nodes_and(cp->mems_allowed, cp->mems_allowed, | ||
2076 | node_states[N_MEMORY]); | ||
2077 | mutex_unlock(&callback_mutex); | ||
2078 | 2220 | ||
2079 | /* Move tasks from the empty cpuset to a parent */ | 2221 | /* wait for propagations to finish */ |
2080 | if (nodes_empty(cp->mems_allowed)) | 2222 | flush_workqueue(cpuset_propagate_hotplug_wq); |
2081 | remove_tasks_in_empty_cpuset(cp); | 2223 | |
2082 | else | 2224 | /* rebuild sched domains if cpus_allowed has changed */ |
2083 | update_tasks_nodemask(cp, &oldmems, NULL); | 2225 | if (cpus_updated) { |
2084 | } | 2226 | struct sched_domain_attr *attr; |
2227 | cpumask_var_t *doms; | ||
2228 | int ndoms; | ||
2229 | |||
2230 | mutex_lock(&cpuset_mutex); | ||
2231 | ndoms = generate_sched_domains(&doms, &attr); | ||
2232 | mutex_unlock(&cpuset_mutex); | ||
2233 | |||
2234 | partition_sched_domains(ndoms, doms, attr); | ||
2085 | } | 2235 | } |
2086 | } | 2236 | } |
2087 | 2237 | ||
2088 | /* | ||
2089 | * The top_cpuset tracks what CPUs and Memory Nodes are online, | ||
2090 | * period. This is necessary in order to make cpusets transparent | ||
2091 | * (of no affect) on systems that are actively using CPU hotplug | ||
2092 | * but making no active use of cpusets. | ||
2093 | * | ||
2094 | * The only exception to this is suspend/resume, where we don't | ||
2095 | * modify cpusets at all. | ||
2096 | * | ||
2097 | * This routine ensures that top_cpuset.cpus_allowed tracks | ||
2098 | * cpu_active_mask on each CPU hotplug (cpuhp) event. | ||
2099 | * | ||
2100 | * Called within get_online_cpus(). Needs to call cgroup_lock() | ||
2101 | * before calling generate_sched_domains(). | ||
2102 | * | ||
2103 | * @cpu_online: Indicates whether this is a CPU online event (true) or | ||
2104 | * a CPU offline event (false). | ||
2105 | */ | ||
2106 | void cpuset_update_active_cpus(bool cpu_online) | 2238 | void cpuset_update_active_cpus(bool cpu_online) |
2107 | { | 2239 | { |
2108 | struct sched_domain_attr *attr; | 2240 | /* |
2109 | cpumask_var_t *doms; | 2241 | * We're inside cpu hotplug critical region which usually nests |
2110 | int ndoms; | 2242 | * inside cgroup synchronization. Bounce actual hotplug processing |
2111 | 2243 | * to a work item to avoid reverse locking order. | |
2112 | cgroup_lock(); | 2244 | * |
2113 | mutex_lock(&callback_mutex); | 2245 | * We still need to do partition_sched_domains() synchronously; |
2114 | cpumask_copy(top_cpuset.cpus_allowed, cpu_active_mask); | 2246 | * otherwise, the scheduler will get confused and put tasks to the |
2115 | mutex_unlock(&callback_mutex); | 2247 | * dead CPU. Fall back to the default single domain. |
2116 | 2248 | * cpuset_hotplug_workfn() will rebuild it as necessary. | |
2117 | if (!cpu_online) | 2249 | */ |
2118 | scan_cpusets_upon_hotplug(&top_cpuset, CPUSET_CPU_OFFLINE); | 2250 | partition_sched_domains(1, NULL, NULL); |
2119 | 2251 | schedule_work(&cpuset_hotplug_work); | |
2120 | ndoms = generate_sched_domains(&doms, &attr); | ||
2121 | cgroup_unlock(); | ||
2122 | |||
2123 | /* Have scheduler rebuild the domains */ | ||
2124 | partition_sched_domains(ndoms, doms, attr); | ||
2125 | } | 2252 | } |
2126 | 2253 | ||
2127 | #ifdef CONFIG_MEMORY_HOTPLUG | 2254 | #ifdef CONFIG_MEMORY_HOTPLUG |
@@ -2133,29 +2260,7 @@ void cpuset_update_active_cpus(bool cpu_online) | |||
2133 | static int cpuset_track_online_nodes(struct notifier_block *self, | 2260 | static int cpuset_track_online_nodes(struct notifier_block *self, |
2134 | unsigned long action, void *arg) | 2261 | unsigned long action, void *arg) |
2135 | { | 2262 | { |
2136 | static nodemask_t oldmems; /* protected by cgroup_mutex */ | 2263 | schedule_work(&cpuset_hotplug_work); |
2137 | |||
2138 | cgroup_lock(); | ||
2139 | switch (action) { | ||
2140 | case MEM_ONLINE: | ||
2141 | oldmems = top_cpuset.mems_allowed; | ||
2142 | mutex_lock(&callback_mutex); | ||
2143 | top_cpuset.mems_allowed = node_states[N_MEMORY]; | ||
2144 | mutex_unlock(&callback_mutex); | ||
2145 | update_tasks_nodemask(&top_cpuset, &oldmems, NULL); | ||
2146 | break; | ||
2147 | case MEM_OFFLINE: | ||
2148 | /* | ||
2149 | * needn't update top_cpuset.mems_allowed explicitly because | ||
2150 | * scan_cpusets_upon_hotplug() will update it. | ||
2151 | */ | ||
2152 | scan_cpusets_upon_hotplug(&top_cpuset, CPUSET_MEM_OFFLINE); | ||
2153 | break; | ||
2154 | default: | ||
2155 | break; | ||
2156 | } | ||
2157 | cgroup_unlock(); | ||
2158 | |||
2159 | return NOTIFY_OK; | 2264 | return NOTIFY_OK; |
2160 | } | 2265 | } |
2161 | #endif | 2266 | #endif |
@@ -2173,8 +2278,9 @@ void __init cpuset_init_smp(void) | |||
2173 | 2278 | ||
2174 | hotplug_memory_notifier(cpuset_track_online_nodes, 10); | 2279 | hotplug_memory_notifier(cpuset_track_online_nodes, 10); |
2175 | 2280 | ||
2176 | cpuset_wq = create_singlethread_workqueue("cpuset"); | 2281 | cpuset_propagate_hotplug_wq = |
2177 | BUG_ON(!cpuset_wq); | 2282 | alloc_ordered_workqueue("cpuset_hotplug", 0); |
2283 | BUG_ON(!cpuset_propagate_hotplug_wq); | ||
2178 | } | 2284 | } |
2179 | 2285 | ||
2180 | /** | 2286 | /** |
@@ -2273,8 +2379,8 @@ int cpuset_nodemask_valid_mems_allowed(nodemask_t *nodemask) | |||
2273 | */ | 2379 | */ |
2274 | static const struct cpuset *nearest_hardwall_ancestor(const struct cpuset *cs) | 2380 | static const struct cpuset *nearest_hardwall_ancestor(const struct cpuset *cs) |
2275 | { | 2381 | { |
2276 | while (!(is_mem_exclusive(cs) || is_mem_hardwall(cs)) && cs->parent) | 2382 | while (!(is_mem_exclusive(cs) || is_mem_hardwall(cs)) && parent_cs(cs)) |
2277 | cs = cs->parent; | 2383 | cs = parent_cs(cs); |
2278 | return cs; | 2384 | return cs; |
2279 | } | 2385 | } |
2280 | 2386 | ||
@@ -2412,17 +2518,6 @@ int __cpuset_node_allowed_hardwall(int node, gfp_t gfp_mask) | |||
2412 | } | 2518 | } |
2413 | 2519 | ||
2414 | /** | 2520 | /** |
2415 | * cpuset_unlock - release lock on cpuset changes | ||
2416 | * | ||
2417 | * Undo the lock taken in a previous cpuset_lock() call. | ||
2418 | */ | ||
2419 | |||
2420 | void cpuset_unlock(void) | ||
2421 | { | ||
2422 | mutex_unlock(&callback_mutex); | ||
2423 | } | ||
2424 | |||
2425 | /** | ||
2426 | * cpuset_mem_spread_node() - On which node to begin search for a file page | 2521 | * cpuset_mem_spread_node() - On which node to begin search for a file page |
2427 | * cpuset_slab_spread_node() - On which node to begin search for a slab page | 2522 | * cpuset_slab_spread_node() - On which node to begin search for a slab page |
2428 | * | 2523 | * |
@@ -2511,8 +2606,16 @@ void cpuset_print_task_mems_allowed(struct task_struct *tsk) | |||
2511 | 2606 | ||
2512 | dentry = task_cs(tsk)->css.cgroup->dentry; | 2607 | dentry = task_cs(tsk)->css.cgroup->dentry; |
2513 | spin_lock(&cpuset_buffer_lock); | 2608 | spin_lock(&cpuset_buffer_lock); |
2514 | snprintf(cpuset_name, CPUSET_NAME_LEN, | 2609 | |
2515 | dentry ? (const char *)dentry->d_name.name : "/"); | 2610 | if (!dentry) { |
2611 | strcpy(cpuset_name, "/"); | ||
2612 | } else { | ||
2613 | spin_lock(&dentry->d_lock); | ||
2614 | strlcpy(cpuset_name, (const char *)dentry->d_name.name, | ||
2615 | CPUSET_NAME_LEN); | ||
2616 | spin_unlock(&dentry->d_lock); | ||
2617 | } | ||
2618 | |||
2516 | nodelist_scnprintf(cpuset_nodelist, CPUSET_NODELIST_LEN, | 2619 | nodelist_scnprintf(cpuset_nodelist, CPUSET_NODELIST_LEN, |
2517 | tsk->mems_allowed); | 2620 | tsk->mems_allowed); |
2518 | printk(KERN_INFO "%s cpuset=%s mems_allowed=%s\n", | 2621 | printk(KERN_INFO "%s cpuset=%s mems_allowed=%s\n", |
@@ -2560,7 +2663,7 @@ void __cpuset_memory_pressure_bump(void) | |||
2560 | * - Used for /proc/<pid>/cpuset. | 2663 | * - Used for /proc/<pid>/cpuset. |
2561 | * - No need to task_lock(tsk) on this tsk->cpuset reference, as it | 2664 | * - No need to task_lock(tsk) on this tsk->cpuset reference, as it |
2562 | * doesn't really matter if tsk->cpuset changes after we read it, | 2665 | * doesn't really matter if tsk->cpuset changes after we read it, |
2563 | * and we take cgroup_mutex, keeping cpuset_attach() from changing it | 2666 | * and we take cpuset_mutex, keeping cpuset_attach() from changing it |
2564 | * anyway. | 2667 | * anyway. |
2565 | */ | 2668 | */ |
2566 | static int proc_cpuset_show(struct seq_file *m, void *unused_v) | 2669 | static int proc_cpuset_show(struct seq_file *m, void *unused_v) |
@@ -2582,16 +2685,15 @@ static int proc_cpuset_show(struct seq_file *m, void *unused_v) | |||
2582 | if (!tsk) | 2685 | if (!tsk) |
2583 | goto out_free; | 2686 | goto out_free; |
2584 | 2687 | ||
2585 | retval = -EINVAL; | 2688 | rcu_read_lock(); |
2586 | cgroup_lock(); | ||
2587 | css = task_subsys_state(tsk, cpuset_subsys_id); | 2689 | css = task_subsys_state(tsk, cpuset_subsys_id); |
2588 | retval = cgroup_path(css->cgroup, buf, PAGE_SIZE); | 2690 | retval = cgroup_path(css->cgroup, buf, PAGE_SIZE); |
2691 | rcu_read_unlock(); | ||
2589 | if (retval < 0) | 2692 | if (retval < 0) |
2590 | goto out_unlock; | 2693 | goto out_put_task; |
2591 | seq_puts(m, buf); | 2694 | seq_puts(m, buf); |
2592 | seq_putc(m, '\n'); | 2695 | seq_putc(m, '\n'); |
2593 | out_unlock: | 2696 | out_put_task: |
2594 | cgroup_unlock(); | ||
2595 | put_task_struct(tsk); | 2697 | put_task_struct(tsk); |
2596 | out_free: | 2698 | out_free: |
2597 | kfree(buf); | 2699 | kfree(buf); |
diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c index 9a61738cefc8..c26278fd4851 100644 --- a/kernel/debug/debug_core.c +++ b/kernel/debug/debug_core.c | |||
@@ -29,6 +29,7 @@ | |||
29 | */ | 29 | */ |
30 | #include <linux/pid_namespace.h> | 30 | #include <linux/pid_namespace.h> |
31 | #include <linux/clocksource.h> | 31 | #include <linux/clocksource.h> |
32 | #include <linux/serial_core.h> | ||
32 | #include <linux/interrupt.h> | 33 | #include <linux/interrupt.h> |
33 | #include <linux/spinlock.h> | 34 | #include <linux/spinlock.h> |
34 | #include <linux/console.h> | 35 | #include <linux/console.h> |
diff --git a/kernel/debug/debug_core.h b/kernel/debug/debug_core.h index 3494c28a7e7a..2235967e78b0 100644 --- a/kernel/debug/debug_core.h +++ b/kernel/debug/debug_core.h | |||
@@ -72,6 +72,8 @@ extern int dbg_kdb_mode; | |||
72 | #ifdef CONFIG_KGDB_KDB | 72 | #ifdef CONFIG_KGDB_KDB |
73 | extern int kdb_stub(struct kgdb_state *ks); | 73 | extern int kdb_stub(struct kgdb_state *ks); |
74 | extern int kdb_parse(const char *cmdstr); | 74 | extern int kdb_parse(const char *cmdstr); |
75 | extern int kdb_common_init_state(struct kgdb_state *ks); | ||
76 | extern int kdb_common_deinit_state(void); | ||
75 | #else /* ! CONFIG_KGDB_KDB */ | 77 | #else /* ! CONFIG_KGDB_KDB */ |
76 | static inline int kdb_stub(struct kgdb_state *ks) | 78 | static inline int kdb_stub(struct kgdb_state *ks) |
77 | { | 79 | { |
diff --git a/kernel/debug/gdbstub.c b/kernel/debug/gdbstub.c index ce615e064482..19d9a578c753 100644 --- a/kernel/debug/gdbstub.c +++ b/kernel/debug/gdbstub.c | |||
@@ -31,6 +31,7 @@ | |||
31 | #include <linux/kernel.h> | 31 | #include <linux/kernel.h> |
32 | #include <linux/kgdb.h> | 32 | #include <linux/kgdb.h> |
33 | #include <linux/kdb.h> | 33 | #include <linux/kdb.h> |
34 | #include <linux/serial_core.h> | ||
34 | #include <linux/reboot.h> | 35 | #include <linux/reboot.h> |
35 | #include <linux/uaccess.h> | 36 | #include <linux/uaccess.h> |
36 | #include <asm/cacheflush.h> | 37 | #include <asm/cacheflush.h> |
@@ -782,7 +783,10 @@ static void gdb_cmd_query(struct kgdb_state *ks) | |||
782 | len = len / 2; | 783 | len = len / 2; |
783 | remcom_out_buffer[len++] = 0; | 784 | remcom_out_buffer[len++] = 0; |
784 | 785 | ||
786 | kdb_common_init_state(ks); | ||
785 | kdb_parse(remcom_out_buffer); | 787 | kdb_parse(remcom_out_buffer); |
788 | kdb_common_deinit_state(); | ||
789 | |||
786 | strcpy(remcom_out_buffer, "OK"); | 790 | strcpy(remcom_out_buffer, "OK"); |
787 | } | 791 | } |
788 | break; | 792 | break; |
diff --git a/kernel/debug/kdb/kdb_bp.c b/kernel/debug/kdb/kdb_bp.c index 8418c2f8ec5d..70a504601dc3 100644 --- a/kernel/debug/kdb/kdb_bp.c +++ b/kernel/debug/kdb/kdb_bp.c | |||
@@ -486,11 +486,9 @@ static int kdb_bc(int argc, const char **argv) | |||
486 | /* | 486 | /* |
487 | * kdb_ss | 487 | * kdb_ss |
488 | * | 488 | * |
489 | * Process the 'ss' (Single Step) and 'ssb' (Single Step to Branch) | 489 | * Process the 'ss' (Single Step) command. |
490 | * commands. | ||
491 | * | 490 | * |
492 | * ss | 491 | * ss |
493 | * ssb | ||
494 | * | 492 | * |
495 | * Parameters: | 493 | * Parameters: |
496 | * argc Argument count | 494 | * argc Argument count |
@@ -498,35 +496,23 @@ static int kdb_bc(int argc, const char **argv) | |||
498 | * Outputs: | 496 | * Outputs: |
499 | * None. | 497 | * None. |
500 | * Returns: | 498 | * Returns: |
501 | * KDB_CMD_SS[B] for success, a kdb error if failure. | 499 | * KDB_CMD_SS for success, a kdb error if failure. |
502 | * Locking: | 500 | * Locking: |
503 | * None. | 501 | * None. |
504 | * Remarks: | 502 | * Remarks: |
505 | * | 503 | * |
506 | * Set the arch specific option to trigger a debug trap after the next | 504 | * Set the arch specific option to trigger a debug trap after the next |
507 | * instruction. | 505 | * instruction. |
508 | * | ||
509 | * For 'ssb', set the trace flag in the debug trap handler | ||
510 | * after printing the current insn and return directly without | ||
511 | * invoking the kdb command processor, until a branch instruction | ||
512 | * is encountered. | ||
513 | */ | 506 | */ |
514 | 507 | ||
515 | static int kdb_ss(int argc, const char **argv) | 508 | static int kdb_ss(int argc, const char **argv) |
516 | { | 509 | { |
517 | int ssb = 0; | ||
518 | |||
519 | ssb = (strcmp(argv[0], "ssb") == 0); | ||
520 | if (argc != 0) | 510 | if (argc != 0) |
521 | return KDB_ARGCOUNT; | 511 | return KDB_ARGCOUNT; |
522 | /* | 512 | /* |
523 | * Set trace flag and go. | 513 | * Set trace flag and go. |
524 | */ | 514 | */ |
525 | KDB_STATE_SET(DOING_SS); | 515 | KDB_STATE_SET(DOING_SS); |
526 | if (ssb) { | ||
527 | KDB_STATE_SET(DOING_SSB); | ||
528 | return KDB_CMD_SSB; | ||
529 | } | ||
530 | return KDB_CMD_SS; | 516 | return KDB_CMD_SS; |
531 | } | 517 | } |
532 | 518 | ||
@@ -561,8 +547,6 @@ void __init kdb_initbptab(void) | |||
561 | 547 | ||
562 | kdb_register_repeat("ss", kdb_ss, "", | 548 | kdb_register_repeat("ss", kdb_ss, "", |
563 | "Single Step", 1, KDB_REPEAT_NO_ARGS); | 549 | "Single Step", 1, KDB_REPEAT_NO_ARGS); |
564 | kdb_register_repeat("ssb", kdb_ss, "", | ||
565 | "Single step to branch/call", 0, KDB_REPEAT_NO_ARGS); | ||
566 | /* | 550 | /* |
567 | * Architecture dependent initialization. | 551 | * Architecture dependent initialization. |
568 | */ | 552 | */ |
diff --git a/kernel/debug/kdb/kdb_debugger.c b/kernel/debug/kdb/kdb_debugger.c index be7b33b73d30..328d18ef31e4 100644 --- a/kernel/debug/kdb/kdb_debugger.c +++ b/kernel/debug/kdb/kdb_debugger.c | |||
@@ -34,6 +34,22 @@ EXPORT_SYMBOL_GPL(kdb_poll_idx); | |||
34 | 34 | ||
35 | static struct kgdb_state *kdb_ks; | 35 | static struct kgdb_state *kdb_ks; |
36 | 36 | ||
37 | int kdb_common_init_state(struct kgdb_state *ks) | ||
38 | { | ||
39 | kdb_initial_cpu = atomic_read(&kgdb_active); | ||
40 | kdb_current_task = kgdb_info[ks->cpu].task; | ||
41 | kdb_current_regs = kgdb_info[ks->cpu].debuggerinfo; | ||
42 | return 0; | ||
43 | } | ||
44 | |||
45 | int kdb_common_deinit_state(void) | ||
46 | { | ||
47 | kdb_initial_cpu = -1; | ||
48 | kdb_current_task = NULL; | ||
49 | kdb_current_regs = NULL; | ||
50 | return 0; | ||
51 | } | ||
52 | |||
37 | int kdb_stub(struct kgdb_state *ks) | 53 | int kdb_stub(struct kgdb_state *ks) |
38 | { | 54 | { |
39 | int error = 0; | 55 | int error = 0; |
@@ -94,13 +110,10 @@ int kdb_stub(struct kgdb_state *ks) | |||
94 | } | 110 | } |
95 | /* Set initial kdb state variables */ | 111 | /* Set initial kdb state variables */ |
96 | KDB_STATE_CLEAR(KGDB_TRANS); | 112 | KDB_STATE_CLEAR(KGDB_TRANS); |
97 | kdb_initial_cpu = atomic_read(&kgdb_active); | 113 | kdb_common_init_state(ks); |
98 | kdb_current_task = kgdb_info[ks->cpu].task; | ||
99 | kdb_current_regs = kgdb_info[ks->cpu].debuggerinfo; | ||
100 | /* Remove any breakpoints as needed by kdb and clear single step */ | 114 | /* Remove any breakpoints as needed by kdb and clear single step */ |
101 | kdb_bp_remove(); | 115 | kdb_bp_remove(); |
102 | KDB_STATE_CLEAR(DOING_SS); | 116 | KDB_STATE_CLEAR(DOING_SS); |
103 | KDB_STATE_CLEAR(DOING_SSB); | ||
104 | KDB_STATE_SET(PAGER); | 117 | KDB_STATE_SET(PAGER); |
105 | /* zero out any offline cpu data */ | 118 | /* zero out any offline cpu data */ |
106 | for_each_present_cpu(i) { | 119 | for_each_present_cpu(i) { |
@@ -125,9 +138,7 @@ int kdb_stub(struct kgdb_state *ks) | |||
125 | * Upon exit from the kdb main loop setup break points and restart | 138 | * Upon exit from the kdb main loop setup break points and restart |
126 | * the system based on the requested continue state | 139 | * the system based on the requested continue state |
127 | */ | 140 | */ |
128 | kdb_initial_cpu = -1; | 141 | kdb_common_deinit_state(); |
129 | kdb_current_task = NULL; | ||
130 | kdb_current_regs = NULL; | ||
131 | KDB_STATE_CLEAR(PAGER); | 142 | KDB_STATE_CLEAR(PAGER); |
132 | kdbnearsym_cleanup(); | 143 | kdbnearsym_cleanup(); |
133 | if (error == KDB_CMD_KGDB) { | 144 | if (error == KDB_CMD_KGDB) { |
diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c index 8875254120b6..00eb8f7fbf41 100644 --- a/kernel/debug/kdb/kdb_main.c +++ b/kernel/debug/kdb/kdb_main.c | |||
@@ -124,7 +124,7 @@ static kdbmsg_t kdbmsgs[] = { | |||
124 | }; | 124 | }; |
125 | #undef KDBMSG | 125 | #undef KDBMSG |
126 | 126 | ||
127 | static const int __nkdb_err = sizeof(kdbmsgs) / sizeof(kdbmsg_t); | 127 | static const int __nkdb_err = ARRAY_SIZE(kdbmsgs); |
128 | 128 | ||
129 | 129 | ||
130 | /* | 130 | /* |
@@ -175,7 +175,7 @@ static char *__env[] = { | |||
175 | (char *)0, | 175 | (char *)0, |
176 | }; | 176 | }; |
177 | 177 | ||
178 | static const int __nenv = (sizeof(__env) / sizeof(char *)); | 178 | static const int __nenv = ARRAY_SIZE(__env); |
179 | 179 | ||
180 | struct task_struct *kdb_curr_task(int cpu) | 180 | struct task_struct *kdb_curr_task(int cpu) |
181 | { | 181 | { |
@@ -681,34 +681,50 @@ static int kdb_defcmd(int argc, const char **argv) | |||
681 | } | 681 | } |
682 | if (argc != 3) | 682 | if (argc != 3) |
683 | return KDB_ARGCOUNT; | 683 | return KDB_ARGCOUNT; |
684 | defcmd_set = kmalloc((defcmd_set_count + 1) * sizeof(*defcmd_set), | 684 | if (in_dbg_master()) { |
685 | GFP_KDB); | 685 | kdb_printf("Command only available during kdb_init()\n"); |
686 | if (!defcmd_set) { | ||
687 | kdb_printf("Could not allocate new defcmd_set entry for %s\n", | ||
688 | argv[1]); | ||
689 | defcmd_set = save_defcmd_set; | ||
690 | return KDB_NOTIMP; | 686 | return KDB_NOTIMP; |
691 | } | 687 | } |
688 | defcmd_set = kmalloc((defcmd_set_count + 1) * sizeof(*defcmd_set), | ||
689 | GFP_KDB); | ||
690 | if (!defcmd_set) | ||
691 | goto fail_defcmd; | ||
692 | memcpy(defcmd_set, save_defcmd_set, | 692 | memcpy(defcmd_set, save_defcmd_set, |
693 | defcmd_set_count * sizeof(*defcmd_set)); | 693 | defcmd_set_count * sizeof(*defcmd_set)); |
694 | kfree(save_defcmd_set); | ||
695 | s = defcmd_set + defcmd_set_count; | 694 | s = defcmd_set + defcmd_set_count; |
696 | memset(s, 0, sizeof(*s)); | 695 | memset(s, 0, sizeof(*s)); |
697 | s->usable = 1; | 696 | s->usable = 1; |
698 | s->name = kdb_strdup(argv[1], GFP_KDB); | 697 | s->name = kdb_strdup(argv[1], GFP_KDB); |
698 | if (!s->name) | ||
699 | goto fail_name; | ||
699 | s->usage = kdb_strdup(argv[2], GFP_KDB); | 700 | s->usage = kdb_strdup(argv[2], GFP_KDB); |
701 | if (!s->usage) | ||
702 | goto fail_usage; | ||
700 | s->help = kdb_strdup(argv[3], GFP_KDB); | 703 | s->help = kdb_strdup(argv[3], GFP_KDB); |
704 | if (!s->help) | ||
705 | goto fail_help; | ||
701 | if (s->usage[0] == '"') { | 706 | if (s->usage[0] == '"') { |
702 | strcpy(s->usage, s->usage+1); | 707 | strcpy(s->usage, argv[2]+1); |
703 | s->usage[strlen(s->usage)-1] = '\0'; | 708 | s->usage[strlen(s->usage)-1] = '\0'; |
704 | } | 709 | } |
705 | if (s->help[0] == '"') { | 710 | if (s->help[0] == '"') { |
706 | strcpy(s->help, s->help+1); | 711 | strcpy(s->help, argv[3]+1); |
707 | s->help[strlen(s->help)-1] = '\0'; | 712 | s->help[strlen(s->help)-1] = '\0'; |
708 | } | 713 | } |
709 | ++defcmd_set_count; | 714 | ++defcmd_set_count; |
710 | defcmd_in_progress = 1; | 715 | defcmd_in_progress = 1; |
716 | kfree(save_defcmd_set); | ||
711 | return 0; | 717 | return 0; |
718 | fail_help: | ||
719 | kfree(s->usage); | ||
720 | fail_usage: | ||
721 | kfree(s->name); | ||
722 | fail_name: | ||
723 | kfree(defcmd_set); | ||
724 | fail_defcmd: | ||
725 | kdb_printf("Could not allocate new defcmd_set entry for %s\n", argv[1]); | ||
726 | defcmd_set = save_defcmd_set; | ||
727 | return KDB_NOTIMP; | ||
712 | } | 728 | } |
713 | 729 | ||
714 | /* | 730 | /* |
@@ -1112,7 +1128,6 @@ void kdb_set_current_task(struct task_struct *p) | |||
1112 | * KDB_CMD_GO User typed 'go'. | 1128 | * KDB_CMD_GO User typed 'go'. |
1113 | * KDB_CMD_CPU User switched to another cpu. | 1129 | * KDB_CMD_CPU User switched to another cpu. |
1114 | * KDB_CMD_SS Single step. | 1130 | * KDB_CMD_SS Single step. |
1115 | * KDB_CMD_SSB Single step until branch. | ||
1116 | */ | 1131 | */ |
1117 | static int kdb_local(kdb_reason_t reason, int error, struct pt_regs *regs, | 1132 | static int kdb_local(kdb_reason_t reason, int error, struct pt_regs *regs, |
1118 | kdb_dbtrap_t db_result) | 1133 | kdb_dbtrap_t db_result) |
@@ -1151,14 +1166,6 @@ static int kdb_local(kdb_reason_t reason, int error, struct pt_regs *regs, | |||
1151 | kdb_printf("due to Debug @ " kdb_machreg_fmt "\n", | 1166 | kdb_printf("due to Debug @ " kdb_machreg_fmt "\n", |
1152 | instruction_pointer(regs)); | 1167 | instruction_pointer(regs)); |
1153 | break; | 1168 | break; |
1154 | case KDB_DB_SSB: | ||
1155 | /* | ||
1156 | * In the midst of ssb command. Just return. | ||
1157 | */ | ||
1158 | KDB_DEBUG_STATE("kdb_local 3", reason); | ||
1159 | return KDB_CMD_SSB; /* Continue with SSB command */ | ||
1160 | |||
1161 | break; | ||
1162 | case KDB_DB_SS: | 1169 | case KDB_DB_SS: |
1163 | break; | 1170 | break; |
1164 | case KDB_DB_SSBPT: | 1171 | case KDB_DB_SSBPT: |
@@ -1281,7 +1288,6 @@ do_full_getstr: | |||
1281 | if (diag == KDB_CMD_GO | 1288 | if (diag == KDB_CMD_GO |
1282 | || diag == KDB_CMD_CPU | 1289 | || diag == KDB_CMD_CPU |
1283 | || diag == KDB_CMD_SS | 1290 | || diag == KDB_CMD_SS |
1284 | || diag == KDB_CMD_SSB | ||
1285 | || diag == KDB_CMD_KGDB) | 1291 | || diag == KDB_CMD_KGDB) |
1286 | break; | 1292 | break; |
1287 | 1293 | ||
@@ -1368,12 +1374,6 @@ int kdb_main_loop(kdb_reason_t reason, kdb_reason_t reason2, int error, | |||
1368 | break; | 1374 | break; |
1369 | } | 1375 | } |
1370 | 1376 | ||
1371 | if (result == KDB_CMD_SSB) { | ||
1372 | KDB_STATE_SET(DOING_SS); | ||
1373 | KDB_STATE_SET(DOING_SSB); | ||
1374 | break; | ||
1375 | } | ||
1376 | |||
1377 | if (result == KDB_CMD_KGDB) { | 1377 | if (result == KDB_CMD_KGDB) { |
1378 | if (!KDB_STATE(DOING_KGDB)) | 1378 | if (!KDB_STATE(DOING_KGDB)) |
1379 | kdb_printf("Entering please attach debugger " | 1379 | kdb_printf("Entering please attach debugger " |
@@ -2350,69 +2350,6 @@ static int kdb_pid(int argc, const char **argv) | |||
2350 | return 0; | 2350 | return 0; |
2351 | } | 2351 | } |
2352 | 2352 | ||
2353 | /* | ||
2354 | * kdb_ll - This function implements the 'll' command which follows a | ||
2355 | * linked list and executes an arbitrary command for each | ||
2356 | * element. | ||
2357 | */ | ||
2358 | static int kdb_ll(int argc, const char **argv) | ||
2359 | { | ||
2360 | int diag = 0; | ||
2361 | unsigned long addr; | ||
2362 | long offset = 0; | ||
2363 | unsigned long va; | ||
2364 | unsigned long linkoffset; | ||
2365 | int nextarg; | ||
2366 | const char *command; | ||
2367 | |||
2368 | if (argc != 3) | ||
2369 | return KDB_ARGCOUNT; | ||
2370 | |||
2371 | nextarg = 1; | ||
2372 | diag = kdbgetaddrarg(argc, argv, &nextarg, &addr, &offset, NULL); | ||
2373 | if (diag) | ||
2374 | return diag; | ||
2375 | |||
2376 | diag = kdbgetularg(argv[2], &linkoffset); | ||
2377 | if (diag) | ||
2378 | return diag; | ||
2379 | |||
2380 | /* | ||
2381 | * Using the starting address as | ||
2382 | * the first element in the list, and assuming that | ||
2383 | * the list ends with a null pointer. | ||
2384 | */ | ||
2385 | |||
2386 | va = addr; | ||
2387 | command = kdb_strdup(argv[3], GFP_KDB); | ||
2388 | if (!command) { | ||
2389 | kdb_printf("%s: cannot duplicate command\n", __func__); | ||
2390 | return 0; | ||
2391 | } | ||
2392 | /* Recursive use of kdb_parse, do not use argv after this point */ | ||
2393 | argv = NULL; | ||
2394 | |||
2395 | while (va) { | ||
2396 | char buf[80]; | ||
2397 | |||
2398 | if (KDB_FLAG(CMD_INTERRUPT)) | ||
2399 | goto out; | ||
2400 | |||
2401 | sprintf(buf, "%s " kdb_machreg_fmt "\n", command, va); | ||
2402 | diag = kdb_parse(buf); | ||
2403 | if (diag) | ||
2404 | goto out; | ||
2405 | |||
2406 | addr = va + linkoffset; | ||
2407 | if (kdb_getword(&va, addr, sizeof(va))) | ||
2408 | goto out; | ||
2409 | } | ||
2410 | |||
2411 | out: | ||
2412 | kfree(command); | ||
2413 | return diag; | ||
2414 | } | ||
2415 | |||
2416 | static int kdb_kgdb(int argc, const char **argv) | 2353 | static int kdb_kgdb(int argc, const char **argv) |
2417 | { | 2354 | { |
2418 | return KDB_CMD_KGDB; | 2355 | return KDB_CMD_KGDB; |
@@ -2430,11 +2367,15 @@ static int kdb_help(int argc, const char **argv) | |||
2430 | kdb_printf("-----------------------------" | 2367 | kdb_printf("-----------------------------" |
2431 | "-----------------------------\n"); | 2368 | "-----------------------------\n"); |
2432 | for_each_kdbcmd(kt, i) { | 2369 | for_each_kdbcmd(kt, i) { |
2433 | if (kt->cmd_name) | 2370 | char *space = ""; |
2434 | kdb_printf("%-15.15s %-20.20s %s\n", kt->cmd_name, | ||
2435 | kt->cmd_usage, kt->cmd_help); | ||
2436 | if (KDB_FLAG(CMD_INTERRUPT)) | 2371 | if (KDB_FLAG(CMD_INTERRUPT)) |
2437 | return 0; | 2372 | return 0; |
2373 | if (!kt->cmd_name) | ||
2374 | continue; | ||
2375 | if (strlen(kt->cmd_usage) > 20) | ||
2376 | space = "\n "; | ||
2377 | kdb_printf("%-15.15s %-20s%s%s\n", kt->cmd_name, | ||
2378 | kt->cmd_usage, space, kt->cmd_help); | ||
2438 | } | 2379 | } |
2439 | return 0; | 2380 | return 0; |
2440 | } | 2381 | } |
@@ -2739,7 +2680,7 @@ int kdb_register_repeat(char *cmd, | |||
2739 | (kdb_max_commands - KDB_BASE_CMD_MAX) * sizeof(*new)); | 2680 | (kdb_max_commands - KDB_BASE_CMD_MAX) * sizeof(*new)); |
2740 | kfree(kdb_commands); | 2681 | kfree(kdb_commands); |
2741 | } | 2682 | } |
2742 | memset(new + kdb_max_commands, 0, | 2683 | memset(new + kdb_max_commands - KDB_BASE_CMD_MAX, 0, |
2743 | kdb_command_extend * sizeof(*new)); | 2684 | kdb_command_extend * sizeof(*new)); |
2744 | kdb_commands = new; | 2685 | kdb_commands = new; |
2745 | kp = kdb_commands + kdb_max_commands - KDB_BASE_CMD_MAX; | 2686 | kp = kdb_commands + kdb_max_commands - KDB_BASE_CMD_MAX; |
@@ -2843,15 +2784,13 @@ static void __init kdb_inittab(void) | |||
2843 | "Stack traceback", 1, KDB_REPEAT_NONE); | 2784 | "Stack traceback", 1, KDB_REPEAT_NONE); |
2844 | kdb_register_repeat("btp", kdb_bt, "<pid>", | 2785 | kdb_register_repeat("btp", kdb_bt, "<pid>", |
2845 | "Display stack for process <pid>", 0, KDB_REPEAT_NONE); | 2786 | "Display stack for process <pid>", 0, KDB_REPEAT_NONE); |
2846 | kdb_register_repeat("bta", kdb_bt, "[DRSTCZEUIMA]", | 2787 | kdb_register_repeat("bta", kdb_bt, "[D|R|S|T|C|Z|E|U|I|M|A]", |
2847 | "Display stack all processes", 0, KDB_REPEAT_NONE); | 2788 | "Backtrace all processes matching state flag", 0, KDB_REPEAT_NONE); |
2848 | kdb_register_repeat("btc", kdb_bt, "", | 2789 | kdb_register_repeat("btc", kdb_bt, "", |
2849 | "Backtrace current process on each cpu", 0, KDB_REPEAT_NONE); | 2790 | "Backtrace current process on each cpu", 0, KDB_REPEAT_NONE); |
2850 | kdb_register_repeat("btt", kdb_bt, "<vaddr>", | 2791 | kdb_register_repeat("btt", kdb_bt, "<vaddr>", |
2851 | "Backtrace process given its struct task address", 0, | 2792 | "Backtrace process given its struct task address", 0, |
2852 | KDB_REPEAT_NONE); | 2793 | KDB_REPEAT_NONE); |
2853 | kdb_register_repeat("ll", kdb_ll, "<first-element> <linkoffset> <cmd>", | ||
2854 | "Execute cmd for each element in linked list", 0, KDB_REPEAT_NONE); | ||
2855 | kdb_register_repeat("env", kdb_env, "", | 2794 | kdb_register_repeat("env", kdb_env, "", |
2856 | "Show environment variables", 0, KDB_REPEAT_NONE); | 2795 | "Show environment variables", 0, KDB_REPEAT_NONE); |
2857 | kdb_register_repeat("set", kdb_set, "", | 2796 | kdb_register_repeat("set", kdb_set, "", |
diff --git a/kernel/debug/kdb/kdb_private.h b/kernel/debug/kdb/kdb_private.h index 392ec6a25844..7afd3c8c41d5 100644 --- a/kernel/debug/kdb/kdb_private.h +++ b/kernel/debug/kdb/kdb_private.h | |||
@@ -19,7 +19,6 @@ | |||
19 | #define KDB_CMD_GO (-1001) | 19 | #define KDB_CMD_GO (-1001) |
20 | #define KDB_CMD_CPU (-1002) | 20 | #define KDB_CMD_CPU (-1002) |
21 | #define KDB_CMD_SS (-1003) | 21 | #define KDB_CMD_SS (-1003) |
22 | #define KDB_CMD_SSB (-1004) | ||
23 | #define KDB_CMD_KGDB (-1005) | 22 | #define KDB_CMD_KGDB (-1005) |
24 | 23 | ||
25 | /* Internal debug flags */ | 24 | /* Internal debug flags */ |
@@ -125,8 +124,6 @@ extern int kdb_state; | |||
125 | * kdb control */ | 124 | * kdb control */ |
126 | #define KDB_STATE_HOLD_CPU 0x00000010 /* Hold this cpu inside kdb */ | 125 | #define KDB_STATE_HOLD_CPU 0x00000010 /* Hold this cpu inside kdb */ |
127 | #define KDB_STATE_DOING_SS 0x00000020 /* Doing ss command */ | 126 | #define KDB_STATE_DOING_SS 0x00000020 /* Doing ss command */ |
128 | #define KDB_STATE_DOING_SSB 0x00000040 /* Doing ssb command, | ||
129 | * DOING_SS is also set */ | ||
130 | #define KDB_STATE_SSBPT 0x00000080 /* Install breakpoint | 127 | #define KDB_STATE_SSBPT 0x00000080 /* Install breakpoint |
131 | * after one ss, independent of | 128 | * after one ss, independent of |
132 | * DOING_SS */ | 129 | * DOING_SS */ |
@@ -191,7 +188,6 @@ extern void kdb_bp_remove(void); | |||
191 | typedef enum { | 188 | typedef enum { |
192 | KDB_DB_BPT, /* Breakpoint */ | 189 | KDB_DB_BPT, /* Breakpoint */ |
193 | KDB_DB_SS, /* Single-step trap */ | 190 | KDB_DB_SS, /* Single-step trap */ |
194 | KDB_DB_SSB, /* Single step to branch */ | ||
195 | KDB_DB_SSBPT, /* Single step over breakpoint */ | 191 | KDB_DB_SSBPT, /* Single step over breakpoint */ |
196 | KDB_DB_NOBPT /* Spurious breakpoint */ | 192 | KDB_DB_NOBPT /* Spurious breakpoint */ |
197 | } kdb_dbtrap_t; | 193 | } kdb_dbtrap_t; |
diff --git a/kernel/events/core.c b/kernel/events/core.c index 5c75791d7269..59412d037eed 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c | |||
@@ -3691,7 +3691,7 @@ unlock: | |||
3691 | 3691 | ||
3692 | static int perf_fasync(int fd, struct file *filp, int on) | 3692 | static int perf_fasync(int fd, struct file *filp, int on) |
3693 | { | 3693 | { |
3694 | struct inode *inode = filp->f_path.dentry->d_inode; | 3694 | struct inode *inode = file_inode(filp); |
3695 | struct perf_event *event = filp->private_data; | 3695 | struct perf_event *event = filp->private_data; |
3696 | int retval; | 3696 | int retval; |
3697 | 3697 | ||
@@ -4434,12 +4434,15 @@ static void perf_event_task_event(struct perf_task_event *task_event) | |||
4434 | if (ctxn < 0) | 4434 | if (ctxn < 0) |
4435 | goto next; | 4435 | goto next; |
4436 | ctx = rcu_dereference(current->perf_event_ctxp[ctxn]); | 4436 | ctx = rcu_dereference(current->perf_event_ctxp[ctxn]); |
4437 | if (ctx) | ||
4438 | perf_event_task_ctx(ctx, task_event); | ||
4437 | } | 4439 | } |
4438 | if (ctx) | ||
4439 | perf_event_task_ctx(ctx, task_event); | ||
4440 | next: | 4440 | next: |
4441 | put_cpu_ptr(pmu->pmu_cpu_context); | 4441 | put_cpu_ptr(pmu->pmu_cpu_context); |
4442 | } | 4442 | } |
4443 | if (task_event->task_ctx) | ||
4444 | perf_event_task_ctx(task_event->task_ctx, task_event); | ||
4445 | |||
4443 | rcu_read_unlock(); | 4446 | rcu_read_unlock(); |
4444 | } | 4447 | } |
4445 | 4448 | ||
@@ -5126,7 +5129,6 @@ static void do_perf_sw_event(enum perf_type_id type, u32 event_id, | |||
5126 | { | 5129 | { |
5127 | struct swevent_htable *swhash = &__get_cpu_var(swevent_htable); | 5130 | struct swevent_htable *swhash = &__get_cpu_var(swevent_htable); |
5128 | struct perf_event *event; | 5131 | struct perf_event *event; |
5129 | struct hlist_node *node; | ||
5130 | struct hlist_head *head; | 5132 | struct hlist_head *head; |
5131 | 5133 | ||
5132 | rcu_read_lock(); | 5134 | rcu_read_lock(); |
@@ -5134,7 +5136,7 @@ static void do_perf_sw_event(enum perf_type_id type, u32 event_id, | |||
5134 | if (!head) | 5136 | if (!head) |
5135 | goto end; | 5137 | goto end; |
5136 | 5138 | ||
5137 | hlist_for_each_entry_rcu(event, node, head, hlist_entry) { | 5139 | hlist_for_each_entry_rcu(event, head, hlist_entry) { |
5138 | if (perf_swevent_match(event, type, event_id, data, regs)) | 5140 | if (perf_swevent_match(event, type, event_id, data, regs)) |
5139 | perf_swevent_event(event, nr, data, regs); | 5141 | perf_swevent_event(event, nr, data, regs); |
5140 | } | 5142 | } |
@@ -5419,7 +5421,6 @@ void perf_tp_event(u64 addr, u64 count, void *record, int entry_size, | |||
5419 | { | 5421 | { |
5420 | struct perf_sample_data data; | 5422 | struct perf_sample_data data; |
5421 | struct perf_event *event; | 5423 | struct perf_event *event; |
5422 | struct hlist_node *node; | ||
5423 | 5424 | ||
5424 | struct perf_raw_record raw = { | 5425 | struct perf_raw_record raw = { |
5425 | .size = entry_size, | 5426 | .size = entry_size, |
@@ -5429,7 +5430,7 @@ void perf_tp_event(u64 addr, u64 count, void *record, int entry_size, | |||
5429 | perf_sample_data_init(&data, addr, 0); | 5430 | perf_sample_data_init(&data, addr, 0); |
5430 | data.raw = &raw; | 5431 | data.raw = &raw; |
5431 | 5432 | ||
5432 | hlist_for_each_entry_rcu(event, node, head, hlist_entry) { | 5433 | hlist_for_each_entry_rcu(event, head, hlist_entry) { |
5433 | if (perf_tp_event_match(event, &data, regs)) | 5434 | if (perf_tp_event_match(event, &data, regs)) |
5434 | perf_swevent_event(event, count, &data, regs); | 5435 | perf_swevent_event(event, count, &data, regs); |
5435 | } | 5436 | } |
@@ -5649,6 +5650,7 @@ static void perf_swevent_init_hrtimer(struct perf_event *event) | |||
5649 | event->attr.sample_period = NSEC_PER_SEC / freq; | 5650 | event->attr.sample_period = NSEC_PER_SEC / freq; |
5650 | hwc->sample_period = event->attr.sample_period; | 5651 | hwc->sample_period = event->attr.sample_period; |
5651 | local64_set(&hwc->period_left, hwc->sample_period); | 5652 | local64_set(&hwc->period_left, hwc->sample_period); |
5653 | hwc->last_period = hwc->sample_period; | ||
5652 | event->attr.freq = 0; | 5654 | event->attr.freq = 0; |
5653 | } | 5655 | } |
5654 | } | 5656 | } |
@@ -5965,13 +5967,9 @@ int perf_pmu_register(struct pmu *pmu, char *name, int type) | |||
5965 | pmu->name = name; | 5967 | pmu->name = name; |
5966 | 5968 | ||
5967 | if (type < 0) { | 5969 | if (type < 0) { |
5968 | int err = idr_pre_get(&pmu_idr, GFP_KERNEL); | 5970 | type = idr_alloc(&pmu_idr, pmu, PERF_TYPE_MAX, 0, GFP_KERNEL); |
5969 | if (!err) | 5971 | if (type < 0) { |
5970 | goto free_pdc; | 5972 | ret = type; |
5971 | |||
5972 | err = idr_get_new_above(&pmu_idr, pmu, PERF_TYPE_MAX, &type); | ||
5973 | if (err) { | ||
5974 | ret = err; | ||
5975 | goto free_pdc; | 5973 | goto free_pdc; |
5976 | } | 5974 | } |
5977 | } | 5975 | } |
diff --git a/kernel/exit.c b/kernel/exit.c index 7dd20408707c..51e485ca9935 100644 --- a/kernel/exit.c +++ b/kernel/exit.c | |||
@@ -20,6 +20,7 @@ | |||
20 | #include <linux/tsacct_kern.h> | 20 | #include <linux/tsacct_kern.h> |
21 | #include <linux/file.h> | 21 | #include <linux/file.h> |
22 | #include <linux/fdtable.h> | 22 | #include <linux/fdtable.h> |
23 | #include <linux/freezer.h> | ||
23 | #include <linux/binfmts.h> | 24 | #include <linux/binfmts.h> |
24 | #include <linux/nsproxy.h> | 25 | #include <linux/nsproxy.h> |
25 | #include <linux/pid_namespace.h> | 26 | #include <linux/pid_namespace.h> |
@@ -31,7 +32,6 @@ | |||
31 | #include <linux/mempolicy.h> | 32 | #include <linux/mempolicy.h> |
32 | #include <linux/taskstats_kern.h> | 33 | #include <linux/taskstats_kern.h> |
33 | #include <linux/delayacct.h> | 34 | #include <linux/delayacct.h> |
34 | #include <linux/freezer.h> | ||
35 | #include <linux/cgroup.h> | 35 | #include <linux/cgroup.h> |
36 | #include <linux/syscalls.h> | 36 | #include <linux/syscalls.h> |
37 | #include <linux/signal.h> | 37 | #include <linux/signal.h> |
@@ -485,7 +485,7 @@ static void exit_mm(struct task_struct * tsk) | |||
485 | set_task_state(tsk, TASK_UNINTERRUPTIBLE); | 485 | set_task_state(tsk, TASK_UNINTERRUPTIBLE); |
486 | if (!self.task) /* see coredump_finish() */ | 486 | if (!self.task) /* see coredump_finish() */ |
487 | break; | 487 | break; |
488 | schedule(); | 488 | freezable_schedule(); |
489 | } | 489 | } |
490 | __set_task_state(tsk, TASK_RUNNING); | 490 | __set_task_state(tsk, TASK_RUNNING); |
491 | down_read(&mm->mmap_sem); | 491 | down_read(&mm->mmap_sem); |
@@ -835,7 +835,7 @@ void do_exit(long code) | |||
835 | /* | 835 | /* |
836 | * Make sure we are holding no locks: | 836 | * Make sure we are holding no locks: |
837 | */ | 837 | */ |
838 | debug_check_no_locks_held(tsk); | 838 | debug_check_no_locks_held(); |
839 | /* | 839 | /* |
840 | * We can do this unlocked here. The futex code uses this flag | 840 | * We can do this unlocked here. The futex code uses this flag |
841 | * just to verify whether the pi state cleanup has been done | 841 | * just to verify whether the pi state cleanup has been done |
diff --git a/kernel/fork.c b/kernel/fork.c index 4133876d8cd2..1766d324d5e3 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -413,7 +413,7 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) | |||
413 | tmp->vm_next = tmp->vm_prev = NULL; | 413 | tmp->vm_next = tmp->vm_prev = NULL; |
414 | file = tmp->vm_file; | 414 | file = tmp->vm_file; |
415 | if (file) { | 415 | if (file) { |
416 | struct inode *inode = file->f_path.dentry->d_inode; | 416 | struct inode *inode = file_inode(file); |
417 | struct address_space *mapping = file->f_mapping; | 417 | struct address_space *mapping = file->f_mapping; |
418 | 418 | ||
419 | get_file(file); | 419 | get_file(file); |
@@ -1141,6 +1141,9 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1141 | if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS)) | 1141 | if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS)) |
1142 | return ERR_PTR(-EINVAL); | 1142 | return ERR_PTR(-EINVAL); |
1143 | 1143 | ||
1144 | if ((clone_flags & (CLONE_NEWUSER|CLONE_FS)) == (CLONE_NEWUSER|CLONE_FS)) | ||
1145 | return ERR_PTR(-EINVAL); | ||
1146 | |||
1144 | /* | 1147 | /* |
1145 | * Thread groups must share signals as well, and detached threads | 1148 | * Thread groups must share signals as well, and detached threads |
1146 | * can only be started up within the thread group. | 1149 | * can only be started up within the thread group. |
@@ -1807,7 +1810,7 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags) | |||
1807 | * If unsharing a user namespace must also unshare the thread. | 1810 | * If unsharing a user namespace must also unshare the thread. |
1808 | */ | 1811 | */ |
1809 | if (unshare_flags & CLONE_NEWUSER) | 1812 | if (unshare_flags & CLONE_NEWUSER) |
1810 | unshare_flags |= CLONE_THREAD; | 1813 | unshare_flags |= CLONE_THREAD | CLONE_FS; |
1811 | /* | 1814 | /* |
1812 | * If unsharing a pid namespace must also unshare the thread. | 1815 | * If unsharing a pid namespace must also unshare the thread. |
1813 | */ | 1816 | */ |
@@ -1861,10 +1864,8 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags) | |||
1861 | exit_sem(current); | 1864 | exit_sem(current); |
1862 | } | 1865 | } |
1863 | 1866 | ||
1864 | if (new_nsproxy) { | 1867 | if (new_nsproxy) |
1865 | switch_task_namespaces(current, new_nsproxy); | 1868 | switch_task_namespaces(current, new_nsproxy); |
1866 | new_nsproxy = NULL; | ||
1867 | } | ||
1868 | 1869 | ||
1869 | task_lock(current); | 1870 | task_lock(current); |
1870 | 1871 | ||
@@ -1894,9 +1895,6 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags) | |||
1894 | } | 1895 | } |
1895 | } | 1896 | } |
1896 | 1897 | ||
1897 | if (new_nsproxy) | ||
1898 | put_nsproxy(new_nsproxy); | ||
1899 | |||
1900 | bad_unshare_cleanup_cred: | 1898 | bad_unshare_cleanup_cred: |
1901 | if (new_cred) | 1899 | if (new_cred) |
1902 | put_cred(new_cred); | 1900 | put_cred(new_cred); |
diff --git a/kernel/futex.c b/kernel/futex.c index 9618b6e9fb36..b26dcfc02c94 100644 --- a/kernel/futex.c +++ b/kernel/futex.c | |||
@@ -223,10 +223,11 @@ static void drop_futex_key_refs(union futex_key *key) | |||
223 | * @rw: mapping needs to be read/write (values: VERIFY_READ, | 223 | * @rw: mapping needs to be read/write (values: VERIFY_READ, |
224 | * VERIFY_WRITE) | 224 | * VERIFY_WRITE) |
225 | * | 225 | * |
226 | * Returns a negative error code or 0 | 226 | * Return: a negative error code or 0 |
227 | * | ||
227 | * The key words are stored in *key on success. | 228 | * The key words are stored in *key on success. |
228 | * | 229 | * |
229 | * For shared mappings, it's (page->index, vma->vm_file->f_path.dentry->d_inode, | 230 | * For shared mappings, it's (page->index, file_inode(vma->vm_file), |
230 | * offset_within_page). For private mappings, it's (uaddr, current->mm). | 231 | * offset_within_page). For private mappings, it's (uaddr, current->mm). |
231 | * We can usually work out the index without swapping in the page. | 232 | * We can usually work out the index without swapping in the page. |
232 | * | 233 | * |
@@ -705,9 +706,9 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, | |||
705 | * be "current" except in the case of requeue pi. | 706 | * be "current" except in the case of requeue pi. |
706 | * @set_waiters: force setting the FUTEX_WAITERS bit (1) or not (0) | 707 | * @set_waiters: force setting the FUTEX_WAITERS bit (1) or not (0) |
707 | * | 708 | * |
708 | * Returns: | 709 | * Return: |
709 | * 0 - ready to wait | 710 | * 0 - ready to wait; |
710 | * 1 - acquired the lock | 711 | * 1 - acquired the lock; |
711 | * <0 - error | 712 | * <0 - error |
712 | * | 713 | * |
713 | * The hb->lock and futex_key refs shall be held by the caller. | 714 | * The hb->lock and futex_key refs shall be held by the caller. |
@@ -1191,9 +1192,9 @@ void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key, | |||
1191 | * then direct futex_lock_pi_atomic() to force setting the FUTEX_WAITERS bit. | 1192 | * then direct futex_lock_pi_atomic() to force setting the FUTEX_WAITERS bit. |
1192 | * hb1 and hb2 must be held by the caller. | 1193 | * hb1 and hb2 must be held by the caller. |
1193 | * | 1194 | * |
1194 | * Returns: | 1195 | * Return: |
1195 | * 0 - failed to acquire the lock atomicly | 1196 | * 0 - failed to acquire the lock atomically; |
1196 | * 1 - acquired the lock | 1197 | * 1 - acquired the lock; |
1197 | * <0 - error | 1198 | * <0 - error |
1198 | */ | 1199 | */ |
1199 | static int futex_proxy_trylock_atomic(u32 __user *pifutex, | 1200 | static int futex_proxy_trylock_atomic(u32 __user *pifutex, |
@@ -1254,8 +1255,8 @@ static int futex_proxy_trylock_atomic(u32 __user *pifutex, | |||
1254 | * Requeue waiters on uaddr1 to uaddr2. In the requeue_pi case, try to acquire | 1255 | * Requeue waiters on uaddr1 to uaddr2. In the requeue_pi case, try to acquire |
1255 | * uaddr2 atomically on behalf of the top waiter. | 1256 | * uaddr2 atomically on behalf of the top waiter. |
1256 | * | 1257 | * |
1257 | * Returns: | 1258 | * Return: |
1258 | * >=0 - on success, the number of tasks requeued or woken | 1259 | * >=0 - on success, the number of tasks requeued or woken; |
1259 | * <0 - on error | 1260 | * <0 - on error |
1260 | */ | 1261 | */ |
1261 | static int futex_requeue(u32 __user *uaddr1, unsigned int flags, | 1262 | static int futex_requeue(u32 __user *uaddr1, unsigned int flags, |
@@ -1536,8 +1537,8 @@ static inline void queue_me(struct futex_q *q, struct futex_hash_bucket *hb) | |||
1536 | * The q->lock_ptr must not be held by the caller. A call to unqueue_me() must | 1537 | * The q->lock_ptr must not be held by the caller. A call to unqueue_me() must |
1537 | * be paired with exactly one earlier call to queue_me(). | 1538 | * be paired with exactly one earlier call to queue_me(). |
1538 | * | 1539 | * |
1539 | * Returns: | 1540 | * Return: |
1540 | * 1 - if the futex_q was still queued (and we removed unqueued it) | 1541 | * 1 - if the futex_q was still queued (and we removed unqueued it); |
1541 | * 0 - if the futex_q was already removed by the waking thread | 1542 | * 0 - if the futex_q was already removed by the waking thread |
1542 | */ | 1543 | */ |
1543 | static int unqueue_me(struct futex_q *q) | 1544 | static int unqueue_me(struct futex_q *q) |
@@ -1707,9 +1708,9 @@ static long futex_wait_restart(struct restart_block *restart); | |||
1707 | * the pi_state owner as well as handle race conditions that may allow us to | 1708 | * the pi_state owner as well as handle race conditions that may allow us to |
1708 | * acquire the lock. Must be called with the hb lock held. | 1709 | * acquire the lock. Must be called with the hb lock held. |
1709 | * | 1710 | * |
1710 | * Returns: | 1711 | * Return: |
1711 | * 1 - success, lock taken | 1712 | * 1 - success, lock taken; |
1712 | * 0 - success, lock not taken | 1713 | * 0 - success, lock not taken; |
1713 | * <0 - on error (-EFAULT) | 1714 | * <0 - on error (-EFAULT) |
1714 | */ | 1715 | */ |
1715 | static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked) | 1716 | static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked) |
@@ -1824,8 +1825,8 @@ static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q, | |||
1824 | * Return with the hb lock held and a q.key reference on success, and unlocked | 1825 | * Return with the hb lock held and a q.key reference on success, and unlocked |
1825 | * with no q.key reference on failure. | 1826 | * with no q.key reference on failure. |
1826 | * | 1827 | * |
1827 | * Returns: | 1828 | * Return: |
1828 | * 0 - uaddr contains val and hb has been locked | 1829 | * 0 - uaddr contains val and hb has been locked; |
1829 | * <1 - -EFAULT or -EWOULDBLOCK (uaddr does not contain val) and hb is unlocked | 1830 | * <1 - -EFAULT or -EWOULDBLOCK (uaddr does not contain val) and hb is unlocked |
1830 | */ | 1831 | */ |
1831 | static int futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags, | 1832 | static int futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags, |
@@ -2203,9 +2204,9 @@ pi_faulted: | |||
2203 | * the wakeup and return the appropriate error code to the caller. Must be | 2204 | * the wakeup and return the appropriate error code to the caller. Must be |
2204 | * called with the hb lock held. | 2205 | * called with the hb lock held. |
2205 | * | 2206 | * |
2206 | * Returns | 2207 | * Return: |
2207 | * 0 - no early wakeup detected | 2208 | * 0 = no early wakeup detected; |
2208 | * <0 - -ETIMEDOUT or -ERESTARTNOINTR | 2209 | * <0 = -ETIMEDOUT or -ERESTARTNOINTR |
2209 | */ | 2210 | */ |
2210 | static inline | 2211 | static inline |
2211 | int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb, | 2212 | int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb, |
@@ -2247,7 +2248,6 @@ int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb, | |||
2247 | * @val: the expected value of uaddr | 2248 | * @val: the expected value of uaddr |
2248 | * @abs_time: absolute timeout | 2249 | * @abs_time: absolute timeout |
2249 | * @bitset: 32 bit wakeup bitset set by userspace, defaults to all | 2250 | * @bitset: 32 bit wakeup bitset set by userspace, defaults to all |
2250 | * @clockrt: whether to use CLOCK_REALTIME (1) or CLOCK_MONOTONIC (0) | ||
2251 | * @uaddr2: the pi futex we will take prior to returning to user-space | 2251 | * @uaddr2: the pi futex we will take prior to returning to user-space |
2252 | * | 2252 | * |
2253 | * The caller will wait on uaddr and will be requeued by futex_requeue() to | 2253 | * The caller will wait on uaddr and will be requeued by futex_requeue() to |
@@ -2258,7 +2258,7 @@ int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb, | |||
2258 | * there was a need to. | 2258 | * there was a need to. |
2259 | * | 2259 | * |
2260 | * We call schedule in futex_wait_queue_me() when we enqueue and return there | 2260 | * We call schedule in futex_wait_queue_me() when we enqueue and return there |
2261 | * via the following: | 2261 | * via the following-- |
2262 | * 1) wakeup on uaddr2 after an atomic lock acquisition by futex_requeue() | 2262 | * 1) wakeup on uaddr2 after an atomic lock acquisition by futex_requeue() |
2263 | * 2) wakeup on uaddr2 after a requeue | 2263 | * 2) wakeup on uaddr2 after a requeue |
2264 | * 3) signal | 2264 | * 3) signal |
@@ -2276,8 +2276,8 @@ int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb, | |||
2276 | * | 2276 | * |
2277 | * If 4 or 7, we cleanup and return with -ETIMEDOUT. | 2277 | * If 4 or 7, we cleanup and return with -ETIMEDOUT. |
2278 | * | 2278 | * |
2279 | * Returns: | 2279 | * Return: |
2280 | * 0 - On success | 2280 | * 0 - On success; |
2281 | * <0 - On error | 2281 | * <0 - On error |
2282 | */ | 2282 | */ |
2283 | static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, | 2283 | static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, |
@@ -2472,8 +2472,6 @@ SYSCALL_DEFINE3(get_robust_list, int, pid, | |||
2472 | if (!futex_cmpxchg_enabled) | 2472 | if (!futex_cmpxchg_enabled) |
2473 | return -ENOSYS; | 2473 | return -ENOSYS; |
2474 | 2474 | ||
2475 | WARN_ONCE(1, "deprecated: get_robust_list will be deleted in 2013.\n"); | ||
2476 | |||
2477 | rcu_read_lock(); | 2475 | rcu_read_lock(); |
2478 | 2476 | ||
2479 | ret = -ESRCH; | 2477 | ret = -ESRCH; |
diff --git a/kernel/futex_compat.c b/kernel/futex_compat.c index 83e368b005fc..f9f44fd4d34d 100644 --- a/kernel/futex_compat.c +++ b/kernel/futex_compat.c | |||
@@ -11,6 +11,7 @@ | |||
11 | #include <linux/nsproxy.h> | 11 | #include <linux/nsproxy.h> |
12 | #include <linux/futex.h> | 12 | #include <linux/futex.h> |
13 | #include <linux/ptrace.h> | 13 | #include <linux/ptrace.h> |
14 | #include <linux/syscalls.h> | ||
14 | 15 | ||
15 | #include <asm/uaccess.h> | 16 | #include <asm/uaccess.h> |
16 | 17 | ||
@@ -116,9 +117,9 @@ void compat_exit_robust_list(struct task_struct *curr) | |||
116 | } | 117 | } |
117 | } | 118 | } |
118 | 119 | ||
119 | asmlinkage long | 120 | COMPAT_SYSCALL_DEFINE2(set_robust_list, |
120 | compat_sys_set_robust_list(struct compat_robust_list_head __user *head, | 121 | struct compat_robust_list_head __user *, head, |
121 | compat_size_t len) | 122 | compat_size_t, len) |
122 | { | 123 | { |
123 | if (!futex_cmpxchg_enabled) | 124 | if (!futex_cmpxchg_enabled) |
124 | return -ENOSYS; | 125 | return -ENOSYS; |
@@ -131,9 +132,9 @@ compat_sys_set_robust_list(struct compat_robust_list_head __user *head, | |||
131 | return 0; | 132 | return 0; |
132 | } | 133 | } |
133 | 134 | ||
134 | asmlinkage long | 135 | COMPAT_SYSCALL_DEFINE3(get_robust_list, int, pid, |
135 | compat_sys_get_robust_list(int pid, compat_uptr_t __user *head_ptr, | 136 | compat_uptr_t __user *, head_ptr, |
136 | compat_size_t __user *len_ptr) | 137 | compat_size_t __user *, len_ptr) |
137 | { | 138 | { |
138 | struct compat_robust_list_head __user *head; | 139 | struct compat_robust_list_head __user *head; |
139 | unsigned long ret; | 140 | unsigned long ret; |
@@ -142,8 +143,6 @@ compat_sys_get_robust_list(int pid, compat_uptr_t __user *head_ptr, | |||
142 | if (!futex_cmpxchg_enabled) | 143 | if (!futex_cmpxchg_enabled) |
143 | return -ENOSYS; | 144 | return -ENOSYS; |
144 | 145 | ||
145 | WARN_ONCE(1, "deprecated: get_robust_list will be deleted in 2013.\n"); | ||
146 | |||
147 | rcu_read_lock(); | 146 | rcu_read_lock(); |
148 | 147 | ||
149 | ret = -ESRCH; | 148 | ret = -ESRCH; |
@@ -172,9 +171,9 @@ err_unlock: | |||
172 | return ret; | 171 | return ret; |
173 | } | 172 | } |
174 | 173 | ||
175 | asmlinkage long compat_sys_futex(u32 __user *uaddr, int op, u32 val, | 174 | COMPAT_SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val, |
176 | struct compat_timespec __user *utime, u32 __user *uaddr2, | 175 | struct compat_timespec __user *, utime, u32 __user *, uaddr2, |
177 | u32 val3) | 176 | u32, val3) |
178 | { | 177 | { |
179 | struct timespec ts; | 178 | struct timespec ts; |
180 | ktime_t t, *tp = NULL; | 179 | ktime_t t, *tp = NULL; |
diff --git a/kernel/gcov/Kconfig b/kernel/gcov/Kconfig index a92028196cc1..d4da55d1fb65 100644 --- a/kernel/gcov/Kconfig +++ b/kernel/gcov/Kconfig | |||
@@ -35,7 +35,7 @@ config GCOV_KERNEL | |||
35 | config GCOV_PROFILE_ALL | 35 | config GCOV_PROFILE_ALL |
36 | bool "Profile entire Kernel" | 36 | bool "Profile entire Kernel" |
37 | depends on GCOV_KERNEL | 37 | depends on GCOV_KERNEL |
38 | depends on SUPERH || S390 || X86 || (PPC && EXPERIMENTAL) || MICROBLAZE | 38 | depends on SUPERH || S390 || X86 || PPC || MICROBLAZE |
39 | default n | 39 | default n |
40 | ---help--- | 40 | ---help--- |
41 | This options activates profiling for the entire kernel. | 41 | This options activates profiling for the entire kernel. |
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c index 4bd4faa6323a..397db02209ed 100644 --- a/kernel/irq/proc.c +++ b/kernel/irq/proc.c | |||
@@ -76,7 +76,7 @@ static int irq_affinity_list_proc_show(struct seq_file *m, void *v) | |||
76 | static ssize_t write_irq_affinity(int type, struct file *file, | 76 | static ssize_t write_irq_affinity(int type, struct file *file, |
77 | const char __user *buffer, size_t count, loff_t *pos) | 77 | const char __user *buffer, size_t count, loff_t *pos) |
78 | { | 78 | { |
79 | unsigned int irq = (int)(long)PDE(file->f_path.dentry->d_inode)->data; | 79 | unsigned int irq = (int)(long)PDE(file_inode(file))->data; |
80 | cpumask_var_t new_value; | 80 | cpumask_var_t new_value; |
81 | int err; | 81 | int err; |
82 | 82 | ||
diff --git a/kernel/kexec.c b/kernel/kexec.c index 5e4bd7864c5d..bddd3d7a74b6 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c | |||
@@ -54,6 +54,12 @@ struct resource crashk_res = { | |||
54 | .end = 0, | 54 | .end = 0, |
55 | .flags = IORESOURCE_BUSY | IORESOURCE_MEM | 55 | .flags = IORESOURCE_BUSY | IORESOURCE_MEM |
56 | }; | 56 | }; |
57 | struct resource crashk_low_res = { | ||
58 | .name = "Crash kernel low", | ||
59 | .start = 0, | ||
60 | .end = 0, | ||
61 | .flags = IORESOURCE_BUSY | IORESOURCE_MEM | ||
62 | }; | ||
57 | 63 | ||
58 | int kexec_should_crash(struct task_struct *p) | 64 | int kexec_should_crash(struct task_struct *p) |
59 | { | 65 | { |
@@ -223,6 +229,8 @@ out: | |||
223 | 229 | ||
224 | } | 230 | } |
225 | 231 | ||
232 | static void kimage_free_page_list(struct list_head *list); | ||
233 | |||
226 | static int kimage_normal_alloc(struct kimage **rimage, unsigned long entry, | 234 | static int kimage_normal_alloc(struct kimage **rimage, unsigned long entry, |
227 | unsigned long nr_segments, | 235 | unsigned long nr_segments, |
228 | struct kexec_segment __user *segments) | 236 | struct kexec_segment __user *segments) |
@@ -236,8 +244,6 @@ static int kimage_normal_alloc(struct kimage **rimage, unsigned long entry, | |||
236 | if (result) | 244 | if (result) |
237 | goto out; | 245 | goto out; |
238 | 246 | ||
239 | *rimage = image; | ||
240 | |||
241 | /* | 247 | /* |
242 | * Find a location for the control code buffer, and add it | 248 | * Find a location for the control code buffer, and add it |
243 | * the vector of segments so that it's pages will also be | 249 | * the vector of segments so that it's pages will also be |
@@ -248,22 +254,22 @@ static int kimage_normal_alloc(struct kimage **rimage, unsigned long entry, | |||
248 | get_order(KEXEC_CONTROL_PAGE_SIZE)); | 254 | get_order(KEXEC_CONTROL_PAGE_SIZE)); |
249 | if (!image->control_code_page) { | 255 | if (!image->control_code_page) { |
250 | printk(KERN_ERR "Could not allocate control_code_buffer\n"); | 256 | printk(KERN_ERR "Could not allocate control_code_buffer\n"); |
251 | goto out; | 257 | goto out_free; |
252 | } | 258 | } |
253 | 259 | ||
254 | image->swap_page = kimage_alloc_control_pages(image, 0); | 260 | image->swap_page = kimage_alloc_control_pages(image, 0); |
255 | if (!image->swap_page) { | 261 | if (!image->swap_page) { |
256 | printk(KERN_ERR "Could not allocate swap buffer\n"); | 262 | printk(KERN_ERR "Could not allocate swap buffer\n"); |
257 | goto out; | 263 | goto out_free; |
258 | } | 264 | } |
259 | 265 | ||
260 | result = 0; | 266 | *rimage = image; |
261 | out: | 267 | return 0; |
262 | if (result == 0) | ||
263 | *rimage = image; | ||
264 | else | ||
265 | kfree(image); | ||
266 | 268 | ||
269 | out_free: | ||
270 | kimage_free_page_list(&image->control_pages); | ||
271 | kfree(image); | ||
272 | out: | ||
267 | return result; | 273 | return result; |
268 | } | 274 | } |
269 | 275 | ||
@@ -310,7 +316,7 @@ static int kimage_crash_alloc(struct kimage **rimage, unsigned long entry, | |||
310 | mend = mstart + image->segment[i].memsz - 1; | 316 | mend = mstart + image->segment[i].memsz - 1; |
311 | /* Ensure we are within the crash kernel limits */ | 317 | /* Ensure we are within the crash kernel limits */ |
312 | if ((mstart < crashk_res.start) || (mend > crashk_res.end)) | 318 | if ((mstart < crashk_res.start) || (mend > crashk_res.end)) |
313 | goto out; | 319 | goto out_free; |
314 | } | 320 | } |
315 | 321 | ||
316 | /* | 322 | /* |
@@ -323,16 +329,15 @@ static int kimage_crash_alloc(struct kimage **rimage, unsigned long entry, | |||
323 | get_order(KEXEC_CONTROL_PAGE_SIZE)); | 329 | get_order(KEXEC_CONTROL_PAGE_SIZE)); |
324 | if (!image->control_code_page) { | 330 | if (!image->control_code_page) { |
325 | printk(KERN_ERR "Could not allocate control_code_buffer\n"); | 331 | printk(KERN_ERR "Could not allocate control_code_buffer\n"); |
326 | goto out; | 332 | goto out_free; |
327 | } | 333 | } |
328 | 334 | ||
329 | result = 0; | 335 | *rimage = image; |
330 | out: | 336 | return 0; |
331 | if (result == 0) | ||
332 | *rimage = image; | ||
333 | else | ||
334 | kfree(image); | ||
335 | 337 | ||
338 | out_free: | ||
339 | kfree(image); | ||
340 | out: | ||
336 | return result; | 341 | return result; |
337 | } | 342 | } |
338 | 343 | ||
@@ -497,8 +502,6 @@ static struct page *kimage_alloc_crash_control_pages(struct kimage *image, | |||
497 | 502 | ||
498 | if (hole_end > KEXEC_CRASH_CONTROL_MEMORY_LIMIT) | 503 | if (hole_end > KEXEC_CRASH_CONTROL_MEMORY_LIMIT) |
499 | break; | 504 | break; |
500 | if (hole_end > crashk_res.end) | ||
501 | break; | ||
502 | /* See if I overlap any of the segments */ | 505 | /* See if I overlap any of the segments */ |
503 | for (i = 0; i < image->nr_segments; i++) { | 506 | for (i = 0; i < image->nr_segments; i++) { |
504 | unsigned long mstart, mend; | 507 | unsigned long mstart, mend; |
@@ -1369,10 +1372,11 @@ static int __init parse_crashkernel_simple(char *cmdline, | |||
1369 | * That function is the entry point for command line parsing and should be | 1372 | * That function is the entry point for command line parsing and should be |
1370 | * called from the arch-specific code. | 1373 | * called from the arch-specific code. |
1371 | */ | 1374 | */ |
1372 | int __init parse_crashkernel(char *cmdline, | 1375 | static int __init __parse_crashkernel(char *cmdline, |
1373 | unsigned long long system_ram, | 1376 | unsigned long long system_ram, |
1374 | unsigned long long *crash_size, | 1377 | unsigned long long *crash_size, |
1375 | unsigned long long *crash_base) | 1378 | unsigned long long *crash_base, |
1379 | const char *name) | ||
1376 | { | 1380 | { |
1377 | char *p = cmdline, *ck_cmdline = NULL; | 1381 | char *p = cmdline, *ck_cmdline = NULL; |
1378 | char *first_colon, *first_space; | 1382 | char *first_colon, *first_space; |
@@ -1382,16 +1386,16 @@ int __init parse_crashkernel(char *cmdline, | |||
1382 | *crash_base = 0; | 1386 | *crash_base = 0; |
1383 | 1387 | ||
1384 | /* find crashkernel and use the last one if there are more */ | 1388 | /* find crashkernel and use the last one if there are more */ |
1385 | p = strstr(p, "crashkernel="); | 1389 | p = strstr(p, name); |
1386 | while (p) { | 1390 | while (p) { |
1387 | ck_cmdline = p; | 1391 | ck_cmdline = p; |
1388 | p = strstr(p+1, "crashkernel="); | 1392 | p = strstr(p+1, name); |
1389 | } | 1393 | } |
1390 | 1394 | ||
1391 | if (!ck_cmdline) | 1395 | if (!ck_cmdline) |
1392 | return -EINVAL; | 1396 | return -EINVAL; |
1393 | 1397 | ||
1394 | ck_cmdline += 12; /* strlen("crashkernel=") */ | 1398 | ck_cmdline += strlen(name); |
1395 | 1399 | ||
1396 | /* | 1400 | /* |
1397 | * if the commandline contains a ':', then that's the extended | 1401 | * if the commandline contains a ':', then that's the extended |
@@ -1409,6 +1413,23 @@ int __init parse_crashkernel(char *cmdline, | |||
1409 | return 0; | 1413 | return 0; |
1410 | } | 1414 | } |
1411 | 1415 | ||
1416 | int __init parse_crashkernel(char *cmdline, | ||
1417 | unsigned long long system_ram, | ||
1418 | unsigned long long *crash_size, | ||
1419 | unsigned long long *crash_base) | ||
1420 | { | ||
1421 | return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base, | ||
1422 | "crashkernel="); | ||
1423 | } | ||
1424 | |||
1425 | int __init parse_crashkernel_low(char *cmdline, | ||
1426 | unsigned long long system_ram, | ||
1427 | unsigned long long *crash_size, | ||
1428 | unsigned long long *crash_base) | ||
1429 | { | ||
1430 | return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base, | ||
1431 | "crashkernel_low="); | ||
1432 | } | ||
1412 | 1433 | ||
1413 | static void update_vmcoreinfo_note(void) | 1434 | static void update_vmcoreinfo_note(void) |
1414 | { | 1435 | { |
@@ -1490,6 +1511,8 @@ static int __init crash_save_vmcoreinfo_init(void) | |||
1490 | VMCOREINFO_OFFSET(page, _count); | 1511 | VMCOREINFO_OFFSET(page, _count); |
1491 | VMCOREINFO_OFFSET(page, mapping); | 1512 | VMCOREINFO_OFFSET(page, mapping); |
1492 | VMCOREINFO_OFFSET(page, lru); | 1513 | VMCOREINFO_OFFSET(page, lru); |
1514 | VMCOREINFO_OFFSET(page, _mapcount); | ||
1515 | VMCOREINFO_OFFSET(page, private); | ||
1493 | VMCOREINFO_OFFSET(pglist_data, node_zones); | 1516 | VMCOREINFO_OFFSET(pglist_data, node_zones); |
1494 | VMCOREINFO_OFFSET(pglist_data, nr_zones); | 1517 | VMCOREINFO_OFFSET(pglist_data, nr_zones); |
1495 | #ifdef CONFIG_FLAT_NODE_MEM_MAP | 1518 | #ifdef CONFIG_FLAT_NODE_MEM_MAP |
@@ -1512,6 +1535,11 @@ static int __init crash_save_vmcoreinfo_init(void) | |||
1512 | VMCOREINFO_NUMBER(PG_lru); | 1535 | VMCOREINFO_NUMBER(PG_lru); |
1513 | VMCOREINFO_NUMBER(PG_private); | 1536 | VMCOREINFO_NUMBER(PG_private); |
1514 | VMCOREINFO_NUMBER(PG_swapcache); | 1537 | VMCOREINFO_NUMBER(PG_swapcache); |
1538 | VMCOREINFO_NUMBER(PG_slab); | ||
1539 | #ifdef CONFIG_MEMORY_FAILURE | ||
1540 | VMCOREINFO_NUMBER(PG_hwpoison); | ||
1541 | #endif | ||
1542 | VMCOREINFO_NUMBER(PAGE_BUDDY_MAPCOUNT_VALUE); | ||
1515 | 1543 | ||
1516 | arch_crash_save_vmcoreinfo(); | 1544 | arch_crash_save_vmcoreinfo(); |
1517 | update_vmcoreinfo_note(); | 1545 | update_vmcoreinfo_note(); |
diff --git a/kernel/kfifo.c b/kernel/kfifo.c deleted file mode 100644 index 59dcf5b81d24..000000000000 --- a/kernel/kfifo.c +++ /dev/null | |||
@@ -1,609 +0,0 @@ | |||
1 | /* | ||
2 | * A generic kernel FIFO implementation | ||
3 | * | ||
4 | * Copyright (C) 2009/2010 Stefani Seibold <stefani@seibold.net> | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program; if not, write to the Free Software | ||
18 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | ||
19 | * | ||
20 | */ | ||
21 | |||
22 | #include <linux/kernel.h> | ||
23 | #include <linux/export.h> | ||
24 | #include <linux/slab.h> | ||
25 | #include <linux/err.h> | ||
26 | #include <linux/log2.h> | ||
27 | #include <linux/uaccess.h> | ||
28 | #include <linux/kfifo.h> | ||
29 | |||
30 | /* | ||
31 | * internal helper to calculate the unused elements in a fifo | ||
32 | */ | ||
33 | static inline unsigned int kfifo_unused(struct __kfifo *fifo) | ||
34 | { | ||
35 | return (fifo->mask + 1) - (fifo->in - fifo->out); | ||
36 | } | ||
37 | |||
38 | int __kfifo_alloc(struct __kfifo *fifo, unsigned int size, | ||
39 | size_t esize, gfp_t gfp_mask) | ||
40 | { | ||
41 | /* | ||
42 | * round down to the next power of 2, since our 'let the indices | ||
43 | * wrap' technique works only in this case. | ||
44 | */ | ||
45 | if (!is_power_of_2(size)) | ||
46 | size = rounddown_pow_of_two(size); | ||
47 | |||
48 | fifo->in = 0; | ||
49 | fifo->out = 0; | ||
50 | fifo->esize = esize; | ||
51 | |||
52 | if (size < 2) { | ||
53 | fifo->data = NULL; | ||
54 | fifo->mask = 0; | ||
55 | return -EINVAL; | ||
56 | } | ||
57 | |||
58 | fifo->data = kmalloc(size * esize, gfp_mask); | ||
59 | |||
60 | if (!fifo->data) { | ||
61 | fifo->mask = 0; | ||
62 | return -ENOMEM; | ||
63 | } | ||
64 | fifo->mask = size - 1; | ||
65 | |||
66 | return 0; | ||
67 | } | ||
68 | EXPORT_SYMBOL(__kfifo_alloc); | ||
69 | |||
70 | void __kfifo_free(struct __kfifo *fifo) | ||
71 | { | ||
72 | kfree(fifo->data); | ||
73 | fifo->in = 0; | ||
74 | fifo->out = 0; | ||
75 | fifo->esize = 0; | ||
76 | fifo->data = NULL; | ||
77 | fifo->mask = 0; | ||
78 | } | ||
79 | EXPORT_SYMBOL(__kfifo_free); | ||
80 | |||
81 | int __kfifo_init(struct __kfifo *fifo, void *buffer, | ||
82 | unsigned int size, size_t esize) | ||
83 | { | ||
84 | size /= esize; | ||
85 | |||
86 | if (!is_power_of_2(size)) | ||
87 | size = rounddown_pow_of_two(size); | ||
88 | |||
89 | fifo->in = 0; | ||
90 | fifo->out = 0; | ||
91 | fifo->esize = esize; | ||
92 | fifo->data = buffer; | ||
93 | |||
94 | if (size < 2) { | ||
95 | fifo->mask = 0; | ||
96 | return -EINVAL; | ||
97 | } | ||
98 | fifo->mask = size - 1; | ||
99 | |||
100 | return 0; | ||
101 | } | ||
102 | EXPORT_SYMBOL(__kfifo_init); | ||
103 | |||
104 | static void kfifo_copy_in(struct __kfifo *fifo, const void *src, | ||
105 | unsigned int len, unsigned int off) | ||
106 | { | ||
107 | unsigned int size = fifo->mask + 1; | ||
108 | unsigned int esize = fifo->esize; | ||
109 | unsigned int l; | ||
110 | |||
111 | off &= fifo->mask; | ||
112 | if (esize != 1) { | ||
113 | off *= esize; | ||
114 | size *= esize; | ||
115 | len *= esize; | ||
116 | } | ||
117 | l = min(len, size - off); | ||
118 | |||
119 | memcpy(fifo->data + off, src, l); | ||
120 | memcpy(fifo->data, src + l, len - l); | ||
121 | /* | ||
122 | * make sure that the data in the fifo is up to date before | ||
123 | * incrementing the fifo->in index counter | ||
124 | */ | ||
125 | smp_wmb(); | ||
126 | } | ||
127 | |||
128 | unsigned int __kfifo_in(struct __kfifo *fifo, | ||
129 | const void *buf, unsigned int len) | ||
130 | { | ||
131 | unsigned int l; | ||
132 | |||
133 | l = kfifo_unused(fifo); | ||
134 | if (len > l) | ||
135 | len = l; | ||
136 | |||
137 | kfifo_copy_in(fifo, buf, len, fifo->in); | ||
138 | fifo->in += len; | ||
139 | return len; | ||
140 | } | ||
141 | EXPORT_SYMBOL(__kfifo_in); | ||
142 | |||
143 | static void kfifo_copy_out(struct __kfifo *fifo, void *dst, | ||
144 | unsigned int len, unsigned int off) | ||
145 | { | ||
146 | unsigned int size = fifo->mask + 1; | ||
147 | unsigned int esize = fifo->esize; | ||
148 | unsigned int l; | ||
149 | |||
150 | off &= fifo->mask; | ||
151 | if (esize != 1) { | ||
152 | off *= esize; | ||
153 | size *= esize; | ||
154 | len *= esize; | ||
155 | } | ||
156 | l = min(len, size - off); | ||
157 | |||
158 | memcpy(dst, fifo->data + off, l); | ||
159 | memcpy(dst + l, fifo->data, len - l); | ||
160 | /* | ||
161 | * make sure that the data is copied before | ||
162 | * incrementing the fifo->out index counter | ||
163 | */ | ||
164 | smp_wmb(); | ||
165 | } | ||
166 | |||
167 | unsigned int __kfifo_out_peek(struct __kfifo *fifo, | ||
168 | void *buf, unsigned int len) | ||
169 | { | ||
170 | unsigned int l; | ||
171 | |||
172 | l = fifo->in - fifo->out; | ||
173 | if (len > l) | ||
174 | len = l; | ||
175 | |||
176 | kfifo_copy_out(fifo, buf, len, fifo->out); | ||
177 | return len; | ||
178 | } | ||
179 | EXPORT_SYMBOL(__kfifo_out_peek); | ||
180 | |||
181 | unsigned int __kfifo_out(struct __kfifo *fifo, | ||
182 | void *buf, unsigned int len) | ||
183 | { | ||
184 | len = __kfifo_out_peek(fifo, buf, len); | ||
185 | fifo->out += len; | ||
186 | return len; | ||
187 | } | ||
188 | EXPORT_SYMBOL(__kfifo_out); | ||
189 | |||
190 | static unsigned long kfifo_copy_from_user(struct __kfifo *fifo, | ||
191 | const void __user *from, unsigned int len, unsigned int off, | ||
192 | unsigned int *copied) | ||
193 | { | ||
194 | unsigned int size = fifo->mask + 1; | ||
195 | unsigned int esize = fifo->esize; | ||
196 | unsigned int l; | ||
197 | unsigned long ret; | ||
198 | |||
199 | off &= fifo->mask; | ||
200 | if (esize != 1) { | ||
201 | off *= esize; | ||
202 | size *= esize; | ||
203 | len *= esize; | ||
204 | } | ||
205 | l = min(len, size - off); | ||
206 | |||
207 | ret = copy_from_user(fifo->data + off, from, l); | ||
208 | if (unlikely(ret)) | ||
209 | ret = DIV_ROUND_UP(ret + len - l, esize); | ||
210 | else { | ||
211 | ret = copy_from_user(fifo->data, from + l, len - l); | ||
212 | if (unlikely(ret)) | ||
213 | ret = DIV_ROUND_UP(ret, esize); | ||
214 | } | ||
215 | /* | ||
216 | * make sure that the data in the fifo is up to date before | ||
217 | * incrementing the fifo->in index counter | ||
218 | */ | ||
219 | smp_wmb(); | ||
220 | *copied = len - ret; | ||
221 | /* return the number of elements which are not copied */ | ||
222 | return ret; | ||
223 | } | ||
224 | |||
225 | int __kfifo_from_user(struct __kfifo *fifo, const void __user *from, | ||
226 | unsigned long len, unsigned int *copied) | ||
227 | { | ||
228 | unsigned int l; | ||
229 | unsigned long ret; | ||
230 | unsigned int esize = fifo->esize; | ||
231 | int err; | ||
232 | |||
233 | if (esize != 1) | ||
234 | len /= esize; | ||
235 | |||
236 | l = kfifo_unused(fifo); | ||
237 | if (len > l) | ||
238 | len = l; | ||
239 | |||
240 | ret = kfifo_copy_from_user(fifo, from, len, fifo->in, copied); | ||
241 | if (unlikely(ret)) { | ||
242 | len -= ret; | ||
243 | err = -EFAULT; | ||
244 | } else | ||
245 | err = 0; | ||
246 | fifo->in += len; | ||
247 | return err; | ||
248 | } | ||
249 | EXPORT_SYMBOL(__kfifo_from_user); | ||
250 | |||
251 | static unsigned long kfifo_copy_to_user(struct __kfifo *fifo, void __user *to, | ||
252 | unsigned int len, unsigned int off, unsigned int *copied) | ||
253 | { | ||
254 | unsigned int l; | ||
255 | unsigned long ret; | ||
256 | unsigned int size = fifo->mask + 1; | ||
257 | unsigned int esize = fifo->esize; | ||
258 | |||
259 | off &= fifo->mask; | ||
260 | if (esize != 1) { | ||
261 | off *= esize; | ||
262 | size *= esize; | ||
263 | len *= esize; | ||
264 | } | ||
265 | l = min(len, size - off); | ||
266 | |||
267 | ret = copy_to_user(to, fifo->data + off, l); | ||
268 | if (unlikely(ret)) | ||
269 | ret = DIV_ROUND_UP(ret + len - l, esize); | ||
270 | else { | ||
271 | ret = copy_to_user(to + l, fifo->data, len - l); | ||
272 | if (unlikely(ret)) | ||
273 | ret = DIV_ROUND_UP(ret, esize); | ||
274 | } | ||
275 | /* | ||
276 | * make sure that the data is copied before | ||
277 | * incrementing the fifo->out index counter | ||
278 | */ | ||
279 | smp_wmb(); | ||
280 | *copied = len - ret; | ||
281 | /* return the number of elements which are not copied */ | ||
282 | return ret; | ||
283 | } | ||
284 | |||
285 | int __kfifo_to_user(struct __kfifo *fifo, void __user *to, | ||
286 | unsigned long len, unsigned int *copied) | ||
287 | { | ||
288 | unsigned int l; | ||
289 | unsigned long ret; | ||
290 | unsigned int esize = fifo->esize; | ||
291 | int err; | ||
292 | |||
293 | if (esize != 1) | ||
294 | len /= esize; | ||
295 | |||
296 | l = fifo->in - fifo->out; | ||
297 | if (len > l) | ||
298 | len = l; | ||
299 | ret = kfifo_copy_to_user(fifo, to, len, fifo->out, copied); | ||
300 | if (unlikely(ret)) { | ||
301 | len -= ret; | ||
302 | err = -EFAULT; | ||
303 | } else | ||
304 | err = 0; | ||
305 | fifo->out += len; | ||
306 | return err; | ||
307 | } | ||
308 | EXPORT_SYMBOL(__kfifo_to_user); | ||
309 | |||
310 | static int setup_sgl_buf(struct scatterlist *sgl, void *buf, | ||
311 | int nents, unsigned int len) | ||
312 | { | ||
313 | int n; | ||
314 | unsigned int l; | ||
315 | unsigned int off; | ||
316 | struct page *page; | ||
317 | |||
318 | if (!nents) | ||
319 | return 0; | ||
320 | |||
321 | if (!len) | ||
322 | return 0; | ||
323 | |||
324 | n = 0; | ||
325 | page = virt_to_page(buf); | ||
326 | off = offset_in_page(buf); | ||
327 | l = 0; | ||
328 | |||
329 | while (len >= l + PAGE_SIZE - off) { | ||
330 | struct page *npage; | ||
331 | |||
332 | l += PAGE_SIZE; | ||
333 | buf += PAGE_SIZE; | ||
334 | npage = virt_to_page(buf); | ||
335 | if (page_to_phys(page) != page_to_phys(npage) - l) { | ||
336 | sg_set_page(sgl, page, l - off, off); | ||
337 | sgl = sg_next(sgl); | ||
338 | if (++n == nents || sgl == NULL) | ||
339 | return n; | ||
340 | page = npage; | ||
341 | len -= l - off; | ||
342 | l = off = 0; | ||
343 | } | ||
344 | } | ||
345 | sg_set_page(sgl, page, len, off); | ||
346 | return n + 1; | ||
347 | } | ||
348 | |||
349 | static unsigned int setup_sgl(struct __kfifo *fifo, struct scatterlist *sgl, | ||
350 | int nents, unsigned int len, unsigned int off) | ||
351 | { | ||
352 | unsigned int size = fifo->mask + 1; | ||
353 | unsigned int esize = fifo->esize; | ||
354 | unsigned int l; | ||
355 | unsigned int n; | ||
356 | |||
357 | off &= fifo->mask; | ||
358 | if (esize != 1) { | ||
359 | off *= esize; | ||
360 | size *= esize; | ||
361 | len *= esize; | ||
362 | } | ||
363 | l = min(len, size - off); | ||
364 | |||
365 | n = setup_sgl_buf(sgl, fifo->data + off, nents, l); | ||
366 | n += setup_sgl_buf(sgl + n, fifo->data, nents - n, len - l); | ||
367 | |||
368 | return n; | ||
369 | } | ||
370 | |||
371 | unsigned int __kfifo_dma_in_prepare(struct __kfifo *fifo, | ||
372 | struct scatterlist *sgl, int nents, unsigned int len) | ||
373 | { | ||
374 | unsigned int l; | ||
375 | |||
376 | l = kfifo_unused(fifo); | ||
377 | if (len > l) | ||
378 | len = l; | ||
379 | |||
380 | return setup_sgl(fifo, sgl, nents, len, fifo->in); | ||
381 | } | ||
382 | EXPORT_SYMBOL(__kfifo_dma_in_prepare); | ||
383 | |||
384 | unsigned int __kfifo_dma_out_prepare(struct __kfifo *fifo, | ||
385 | struct scatterlist *sgl, int nents, unsigned int len) | ||
386 | { | ||
387 | unsigned int l; | ||
388 | |||
389 | l = fifo->in - fifo->out; | ||
390 | if (len > l) | ||
391 | len = l; | ||
392 | |||
393 | return setup_sgl(fifo, sgl, nents, len, fifo->out); | ||
394 | } | ||
395 | EXPORT_SYMBOL(__kfifo_dma_out_prepare); | ||
396 | |||
397 | unsigned int __kfifo_max_r(unsigned int len, size_t recsize) | ||
398 | { | ||
399 | unsigned int max = (1 << (recsize << 3)) - 1; | ||
400 | |||
401 | if (len > max) | ||
402 | return max; | ||
403 | return len; | ||
404 | } | ||
405 | EXPORT_SYMBOL(__kfifo_max_r); | ||
406 | |||
407 | #define __KFIFO_PEEK(data, out, mask) \ | ||
408 | ((data)[(out) & (mask)]) | ||
409 | /* | ||
410 | * __kfifo_peek_n internal helper function for determinate the length of | ||
411 | * the next record in the fifo | ||
412 | */ | ||
413 | static unsigned int __kfifo_peek_n(struct __kfifo *fifo, size_t recsize) | ||
414 | { | ||
415 | unsigned int l; | ||
416 | unsigned int mask = fifo->mask; | ||
417 | unsigned char *data = fifo->data; | ||
418 | |||
419 | l = __KFIFO_PEEK(data, fifo->out, mask); | ||
420 | |||
421 | if (--recsize) | ||
422 | l |= __KFIFO_PEEK(data, fifo->out + 1, mask) << 8; | ||
423 | |||
424 | return l; | ||
425 | } | ||
426 | |||
427 | #define __KFIFO_POKE(data, in, mask, val) \ | ||
428 | ( \ | ||
429 | (data)[(in) & (mask)] = (unsigned char)(val) \ | ||
430 | ) | ||
431 | |||
432 | /* | ||
433 | * __kfifo_poke_n internal helper function for storeing the length of | ||
434 | * the record into the fifo | ||
435 | */ | ||
436 | static void __kfifo_poke_n(struct __kfifo *fifo, unsigned int n, size_t recsize) | ||
437 | { | ||
438 | unsigned int mask = fifo->mask; | ||
439 | unsigned char *data = fifo->data; | ||
440 | |||
441 | __KFIFO_POKE(data, fifo->in, mask, n); | ||
442 | |||
443 | if (recsize > 1) | ||
444 | __KFIFO_POKE(data, fifo->in + 1, mask, n >> 8); | ||
445 | } | ||
446 | |||
447 | unsigned int __kfifo_len_r(struct __kfifo *fifo, size_t recsize) | ||
448 | { | ||
449 | return __kfifo_peek_n(fifo, recsize); | ||
450 | } | ||
451 | EXPORT_SYMBOL(__kfifo_len_r); | ||
452 | |||
453 | unsigned int __kfifo_in_r(struct __kfifo *fifo, const void *buf, | ||
454 | unsigned int len, size_t recsize) | ||
455 | { | ||
456 | if (len + recsize > kfifo_unused(fifo)) | ||
457 | return 0; | ||
458 | |||
459 | __kfifo_poke_n(fifo, len, recsize); | ||
460 | |||
461 | kfifo_copy_in(fifo, buf, len, fifo->in + recsize); | ||
462 | fifo->in += len + recsize; | ||
463 | return len; | ||
464 | } | ||
465 | EXPORT_SYMBOL(__kfifo_in_r); | ||
466 | |||
467 | static unsigned int kfifo_out_copy_r(struct __kfifo *fifo, | ||
468 | void *buf, unsigned int len, size_t recsize, unsigned int *n) | ||
469 | { | ||
470 | *n = __kfifo_peek_n(fifo, recsize); | ||
471 | |||
472 | if (len > *n) | ||
473 | len = *n; | ||
474 | |||
475 | kfifo_copy_out(fifo, buf, len, fifo->out + recsize); | ||
476 | return len; | ||
477 | } | ||
478 | |||
479 | unsigned int __kfifo_out_peek_r(struct __kfifo *fifo, void *buf, | ||
480 | unsigned int len, size_t recsize) | ||
481 | { | ||
482 | unsigned int n; | ||
483 | |||
484 | if (fifo->in == fifo->out) | ||
485 | return 0; | ||
486 | |||
487 | return kfifo_out_copy_r(fifo, buf, len, recsize, &n); | ||
488 | } | ||
489 | EXPORT_SYMBOL(__kfifo_out_peek_r); | ||
490 | |||
491 | unsigned int __kfifo_out_r(struct __kfifo *fifo, void *buf, | ||
492 | unsigned int len, size_t recsize) | ||
493 | { | ||
494 | unsigned int n; | ||
495 | |||
496 | if (fifo->in == fifo->out) | ||
497 | return 0; | ||
498 | |||
499 | len = kfifo_out_copy_r(fifo, buf, len, recsize, &n); | ||
500 | fifo->out += n + recsize; | ||
501 | return len; | ||
502 | } | ||
503 | EXPORT_SYMBOL(__kfifo_out_r); | ||
504 | |||
505 | void __kfifo_skip_r(struct __kfifo *fifo, size_t recsize) | ||
506 | { | ||
507 | unsigned int n; | ||
508 | |||
509 | n = __kfifo_peek_n(fifo, recsize); | ||
510 | fifo->out += n + recsize; | ||
511 | } | ||
512 | EXPORT_SYMBOL(__kfifo_skip_r); | ||
513 | |||
514 | int __kfifo_from_user_r(struct __kfifo *fifo, const void __user *from, | ||
515 | unsigned long len, unsigned int *copied, size_t recsize) | ||
516 | { | ||
517 | unsigned long ret; | ||
518 | |||
519 | len = __kfifo_max_r(len, recsize); | ||
520 | |||
521 | if (len + recsize > kfifo_unused(fifo)) { | ||
522 | *copied = 0; | ||
523 | return 0; | ||
524 | } | ||
525 | |||
526 | __kfifo_poke_n(fifo, len, recsize); | ||
527 | |||
528 | ret = kfifo_copy_from_user(fifo, from, len, fifo->in + recsize, copied); | ||
529 | if (unlikely(ret)) { | ||
530 | *copied = 0; | ||
531 | return -EFAULT; | ||
532 | } | ||
533 | fifo->in += len + recsize; | ||
534 | return 0; | ||
535 | } | ||
536 | EXPORT_SYMBOL(__kfifo_from_user_r); | ||
537 | |||
538 | int __kfifo_to_user_r(struct __kfifo *fifo, void __user *to, | ||
539 | unsigned long len, unsigned int *copied, size_t recsize) | ||
540 | { | ||
541 | unsigned long ret; | ||
542 | unsigned int n; | ||
543 | |||
544 | if (fifo->in == fifo->out) { | ||
545 | *copied = 0; | ||
546 | return 0; | ||
547 | } | ||
548 | |||
549 | n = __kfifo_peek_n(fifo, recsize); | ||
550 | if (len > n) | ||
551 | len = n; | ||
552 | |||
553 | ret = kfifo_copy_to_user(fifo, to, len, fifo->out + recsize, copied); | ||
554 | if (unlikely(ret)) { | ||
555 | *copied = 0; | ||
556 | return -EFAULT; | ||
557 | } | ||
558 | fifo->out += n + recsize; | ||
559 | return 0; | ||
560 | } | ||
561 | EXPORT_SYMBOL(__kfifo_to_user_r); | ||
562 | |||
563 | unsigned int __kfifo_dma_in_prepare_r(struct __kfifo *fifo, | ||
564 | struct scatterlist *sgl, int nents, unsigned int len, size_t recsize) | ||
565 | { | ||
566 | if (!nents) | ||
567 | BUG(); | ||
568 | |||
569 | len = __kfifo_max_r(len, recsize); | ||
570 | |||
571 | if (len + recsize > kfifo_unused(fifo)) | ||
572 | return 0; | ||
573 | |||
574 | return setup_sgl(fifo, sgl, nents, len, fifo->in + recsize); | ||
575 | } | ||
576 | EXPORT_SYMBOL(__kfifo_dma_in_prepare_r); | ||
577 | |||
578 | void __kfifo_dma_in_finish_r(struct __kfifo *fifo, | ||
579 | unsigned int len, size_t recsize) | ||
580 | { | ||
581 | len = __kfifo_max_r(len, recsize); | ||
582 | __kfifo_poke_n(fifo, len, recsize); | ||
583 | fifo->in += len + recsize; | ||
584 | } | ||
585 | EXPORT_SYMBOL(__kfifo_dma_in_finish_r); | ||
586 | |||
587 | unsigned int __kfifo_dma_out_prepare_r(struct __kfifo *fifo, | ||
588 | struct scatterlist *sgl, int nents, unsigned int len, size_t recsize) | ||
589 | { | ||
590 | if (!nents) | ||
591 | BUG(); | ||
592 | |||
593 | len = __kfifo_max_r(len, recsize); | ||
594 | |||
595 | if (len + recsize > fifo->in - fifo->out) | ||
596 | return 0; | ||
597 | |||
598 | return setup_sgl(fifo, sgl, nents, len, fifo->out + recsize); | ||
599 | } | ||
600 | EXPORT_SYMBOL(__kfifo_dma_out_prepare_r); | ||
601 | |||
602 | void __kfifo_dma_out_finish_r(struct __kfifo *fifo, size_t recsize) | ||
603 | { | ||
604 | unsigned int len; | ||
605 | |||
606 | len = __kfifo_peek_n(fifo, recsize); | ||
607 | fifo->out += len + recsize; | ||
608 | } | ||
609 | EXPORT_SYMBOL(__kfifo_dma_out_finish_r); | ||
diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 550294d58a02..e35be53f6613 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c | |||
@@ -334,11 +334,10 @@ static inline void reset_kprobe_instance(void) | |||
334 | struct kprobe __kprobes *get_kprobe(void *addr) | 334 | struct kprobe __kprobes *get_kprobe(void *addr) |
335 | { | 335 | { |
336 | struct hlist_head *head; | 336 | struct hlist_head *head; |
337 | struct hlist_node *node; | ||
338 | struct kprobe *p; | 337 | struct kprobe *p; |
339 | 338 | ||
340 | head = &kprobe_table[hash_ptr(addr, KPROBE_HASH_BITS)]; | 339 | head = &kprobe_table[hash_ptr(addr, KPROBE_HASH_BITS)]; |
341 | hlist_for_each_entry_rcu(p, node, head, hlist) { | 340 | hlist_for_each_entry_rcu(p, head, hlist) { |
342 | if (p->addr == addr) | 341 | if (p->addr == addr) |
343 | return p; | 342 | return p; |
344 | } | 343 | } |
@@ -799,7 +798,6 @@ out: | |||
799 | static void __kprobes optimize_all_kprobes(void) | 798 | static void __kprobes optimize_all_kprobes(void) |
800 | { | 799 | { |
801 | struct hlist_head *head; | 800 | struct hlist_head *head; |
802 | struct hlist_node *node; | ||
803 | struct kprobe *p; | 801 | struct kprobe *p; |
804 | unsigned int i; | 802 | unsigned int i; |
805 | 803 | ||
@@ -810,7 +808,7 @@ static void __kprobes optimize_all_kprobes(void) | |||
810 | kprobes_allow_optimization = true; | 808 | kprobes_allow_optimization = true; |
811 | for (i = 0; i < KPROBE_TABLE_SIZE; i++) { | 809 | for (i = 0; i < KPROBE_TABLE_SIZE; i++) { |
812 | head = &kprobe_table[i]; | 810 | head = &kprobe_table[i]; |
813 | hlist_for_each_entry_rcu(p, node, head, hlist) | 811 | hlist_for_each_entry_rcu(p, head, hlist) |
814 | if (!kprobe_disabled(p)) | 812 | if (!kprobe_disabled(p)) |
815 | optimize_kprobe(p); | 813 | optimize_kprobe(p); |
816 | } | 814 | } |
@@ -821,7 +819,6 @@ static void __kprobes optimize_all_kprobes(void) | |||
821 | static void __kprobes unoptimize_all_kprobes(void) | 819 | static void __kprobes unoptimize_all_kprobes(void) |
822 | { | 820 | { |
823 | struct hlist_head *head; | 821 | struct hlist_head *head; |
824 | struct hlist_node *node; | ||
825 | struct kprobe *p; | 822 | struct kprobe *p; |
826 | unsigned int i; | 823 | unsigned int i; |
827 | 824 | ||
@@ -832,7 +829,7 @@ static void __kprobes unoptimize_all_kprobes(void) | |||
832 | kprobes_allow_optimization = false; | 829 | kprobes_allow_optimization = false; |
833 | for (i = 0; i < KPROBE_TABLE_SIZE; i++) { | 830 | for (i = 0; i < KPROBE_TABLE_SIZE; i++) { |
834 | head = &kprobe_table[i]; | 831 | head = &kprobe_table[i]; |
835 | hlist_for_each_entry_rcu(p, node, head, hlist) { | 832 | hlist_for_each_entry_rcu(p, head, hlist) { |
836 | if (!kprobe_disabled(p)) | 833 | if (!kprobe_disabled(p)) |
837 | unoptimize_kprobe(p, false); | 834 | unoptimize_kprobe(p, false); |
838 | } | 835 | } |
@@ -1148,7 +1145,7 @@ void __kprobes kprobe_flush_task(struct task_struct *tk) | |||
1148 | { | 1145 | { |
1149 | struct kretprobe_instance *ri; | 1146 | struct kretprobe_instance *ri; |
1150 | struct hlist_head *head, empty_rp; | 1147 | struct hlist_head *head, empty_rp; |
1151 | struct hlist_node *node, *tmp; | 1148 | struct hlist_node *tmp; |
1152 | unsigned long hash, flags = 0; | 1149 | unsigned long hash, flags = 0; |
1153 | 1150 | ||
1154 | if (unlikely(!kprobes_initialized)) | 1151 | if (unlikely(!kprobes_initialized)) |
@@ -1159,12 +1156,12 @@ void __kprobes kprobe_flush_task(struct task_struct *tk) | |||
1159 | hash = hash_ptr(tk, KPROBE_HASH_BITS); | 1156 | hash = hash_ptr(tk, KPROBE_HASH_BITS); |
1160 | head = &kretprobe_inst_table[hash]; | 1157 | head = &kretprobe_inst_table[hash]; |
1161 | kretprobe_table_lock(hash, &flags); | 1158 | kretprobe_table_lock(hash, &flags); |
1162 | hlist_for_each_entry_safe(ri, node, tmp, head, hlist) { | 1159 | hlist_for_each_entry_safe(ri, tmp, head, hlist) { |
1163 | if (ri->task == tk) | 1160 | if (ri->task == tk) |
1164 | recycle_rp_inst(ri, &empty_rp); | 1161 | recycle_rp_inst(ri, &empty_rp); |
1165 | } | 1162 | } |
1166 | kretprobe_table_unlock(hash, &flags); | 1163 | kretprobe_table_unlock(hash, &flags); |
1167 | hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) { | 1164 | hlist_for_each_entry_safe(ri, tmp, &empty_rp, hlist) { |
1168 | hlist_del(&ri->hlist); | 1165 | hlist_del(&ri->hlist); |
1169 | kfree(ri); | 1166 | kfree(ri); |
1170 | } | 1167 | } |
@@ -1173,9 +1170,9 @@ void __kprobes kprobe_flush_task(struct task_struct *tk) | |||
1173 | static inline void free_rp_inst(struct kretprobe *rp) | 1170 | static inline void free_rp_inst(struct kretprobe *rp) |
1174 | { | 1171 | { |
1175 | struct kretprobe_instance *ri; | 1172 | struct kretprobe_instance *ri; |
1176 | struct hlist_node *pos, *next; | 1173 | struct hlist_node *next; |
1177 | 1174 | ||
1178 | hlist_for_each_entry_safe(ri, pos, next, &rp->free_instances, hlist) { | 1175 | hlist_for_each_entry_safe(ri, next, &rp->free_instances, hlist) { |
1179 | hlist_del(&ri->hlist); | 1176 | hlist_del(&ri->hlist); |
1180 | kfree(ri); | 1177 | kfree(ri); |
1181 | } | 1178 | } |
@@ -1185,14 +1182,14 @@ static void __kprobes cleanup_rp_inst(struct kretprobe *rp) | |||
1185 | { | 1182 | { |
1186 | unsigned long flags, hash; | 1183 | unsigned long flags, hash; |
1187 | struct kretprobe_instance *ri; | 1184 | struct kretprobe_instance *ri; |
1188 | struct hlist_node *pos, *next; | 1185 | struct hlist_node *next; |
1189 | struct hlist_head *head; | 1186 | struct hlist_head *head; |
1190 | 1187 | ||
1191 | /* No race here */ | 1188 | /* No race here */ |
1192 | for (hash = 0; hash < KPROBE_TABLE_SIZE; hash++) { | 1189 | for (hash = 0; hash < KPROBE_TABLE_SIZE; hash++) { |
1193 | kretprobe_table_lock(hash, &flags); | 1190 | kretprobe_table_lock(hash, &flags); |
1194 | head = &kretprobe_inst_table[hash]; | 1191 | head = &kretprobe_inst_table[hash]; |
1195 | hlist_for_each_entry_safe(ri, pos, next, head, hlist) { | 1192 | hlist_for_each_entry_safe(ri, next, head, hlist) { |
1196 | if (ri->rp == rp) | 1193 | if (ri->rp == rp) |
1197 | ri->rp = NULL; | 1194 | ri->rp = NULL; |
1198 | } | 1195 | } |
@@ -2028,7 +2025,6 @@ static int __kprobes kprobes_module_callback(struct notifier_block *nb, | |||
2028 | { | 2025 | { |
2029 | struct module *mod = data; | 2026 | struct module *mod = data; |
2030 | struct hlist_head *head; | 2027 | struct hlist_head *head; |
2031 | struct hlist_node *node; | ||
2032 | struct kprobe *p; | 2028 | struct kprobe *p; |
2033 | unsigned int i; | 2029 | unsigned int i; |
2034 | int checkcore = (val == MODULE_STATE_GOING); | 2030 | int checkcore = (val == MODULE_STATE_GOING); |
@@ -2045,7 +2041,7 @@ static int __kprobes kprobes_module_callback(struct notifier_block *nb, | |||
2045 | mutex_lock(&kprobe_mutex); | 2041 | mutex_lock(&kprobe_mutex); |
2046 | for (i = 0; i < KPROBE_TABLE_SIZE; i++) { | 2042 | for (i = 0; i < KPROBE_TABLE_SIZE; i++) { |
2047 | head = &kprobe_table[i]; | 2043 | head = &kprobe_table[i]; |
2048 | hlist_for_each_entry_rcu(p, node, head, hlist) | 2044 | hlist_for_each_entry_rcu(p, head, hlist) |
2049 | if (within_module_init((unsigned long)p->addr, mod) || | 2045 | if (within_module_init((unsigned long)p->addr, mod) || |
2050 | (checkcore && | 2046 | (checkcore && |
2051 | within_module_core((unsigned long)p->addr, mod))) { | 2047 | within_module_core((unsigned long)p->addr, mod))) { |
@@ -2192,7 +2188,6 @@ static void __kprobes kprobe_seq_stop(struct seq_file *f, void *v) | |||
2192 | static int __kprobes show_kprobe_addr(struct seq_file *pi, void *v) | 2188 | static int __kprobes show_kprobe_addr(struct seq_file *pi, void *v) |
2193 | { | 2189 | { |
2194 | struct hlist_head *head; | 2190 | struct hlist_head *head; |
2195 | struct hlist_node *node; | ||
2196 | struct kprobe *p, *kp; | 2191 | struct kprobe *p, *kp; |
2197 | const char *sym = NULL; | 2192 | const char *sym = NULL; |
2198 | unsigned int i = *(loff_t *) v; | 2193 | unsigned int i = *(loff_t *) v; |
@@ -2201,7 +2196,7 @@ static int __kprobes show_kprobe_addr(struct seq_file *pi, void *v) | |||
2201 | 2196 | ||
2202 | head = &kprobe_table[i]; | 2197 | head = &kprobe_table[i]; |
2203 | preempt_disable(); | 2198 | preempt_disable(); |
2204 | hlist_for_each_entry_rcu(p, node, head, hlist) { | 2199 | hlist_for_each_entry_rcu(p, head, hlist) { |
2205 | sym = kallsyms_lookup((unsigned long)p->addr, NULL, | 2200 | sym = kallsyms_lookup((unsigned long)p->addr, NULL, |
2206 | &offset, &modname, namebuf); | 2201 | &offset, &modname, namebuf); |
2207 | if (kprobe_aggrprobe(p)) { | 2202 | if (kprobe_aggrprobe(p)) { |
@@ -2236,7 +2231,6 @@ static const struct file_operations debugfs_kprobes_operations = { | |||
2236 | static void __kprobes arm_all_kprobes(void) | 2231 | static void __kprobes arm_all_kprobes(void) |
2237 | { | 2232 | { |
2238 | struct hlist_head *head; | 2233 | struct hlist_head *head; |
2239 | struct hlist_node *node; | ||
2240 | struct kprobe *p; | 2234 | struct kprobe *p; |
2241 | unsigned int i; | 2235 | unsigned int i; |
2242 | 2236 | ||
@@ -2249,7 +2243,7 @@ static void __kprobes arm_all_kprobes(void) | |||
2249 | /* Arming kprobes doesn't optimize kprobe itself */ | 2243 | /* Arming kprobes doesn't optimize kprobe itself */ |
2250 | for (i = 0; i < KPROBE_TABLE_SIZE; i++) { | 2244 | for (i = 0; i < KPROBE_TABLE_SIZE; i++) { |
2251 | head = &kprobe_table[i]; | 2245 | head = &kprobe_table[i]; |
2252 | hlist_for_each_entry_rcu(p, node, head, hlist) | 2246 | hlist_for_each_entry_rcu(p, head, hlist) |
2253 | if (!kprobe_disabled(p)) | 2247 | if (!kprobe_disabled(p)) |
2254 | arm_kprobe(p); | 2248 | arm_kprobe(p); |
2255 | } | 2249 | } |
@@ -2265,7 +2259,6 @@ already_enabled: | |||
2265 | static void __kprobes disarm_all_kprobes(void) | 2259 | static void __kprobes disarm_all_kprobes(void) |
2266 | { | 2260 | { |
2267 | struct hlist_head *head; | 2261 | struct hlist_head *head; |
2268 | struct hlist_node *node; | ||
2269 | struct kprobe *p; | 2262 | struct kprobe *p; |
2270 | unsigned int i; | 2263 | unsigned int i; |
2271 | 2264 | ||
@@ -2282,7 +2275,7 @@ static void __kprobes disarm_all_kprobes(void) | |||
2282 | 2275 | ||
2283 | for (i = 0; i < KPROBE_TABLE_SIZE; i++) { | 2276 | for (i = 0; i < KPROBE_TABLE_SIZE; i++) { |
2284 | head = &kprobe_table[i]; | 2277 | head = &kprobe_table[i]; |
2285 | hlist_for_each_entry_rcu(p, node, head, hlist) { | 2278 | hlist_for_each_entry_rcu(p, head, hlist) { |
2286 | if (!arch_trampoline_kprobe(p) && !kprobe_disabled(p)) | 2279 | if (!arch_trampoline_kprobe(p) && !kprobe_disabled(p)) |
2287 | disarm_kprobe(p, false); | 2280 | disarm_kprobe(p, false); |
2288 | } | 2281 | } |
diff --git a/kernel/lockdep.c b/kernel/lockdep.c index 7981e5b2350d..259db207b5d9 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c | |||
@@ -3190,9 +3190,14 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, | |||
3190 | #endif | 3190 | #endif |
3191 | if (unlikely(curr->lockdep_depth >= MAX_LOCK_DEPTH)) { | 3191 | if (unlikely(curr->lockdep_depth >= MAX_LOCK_DEPTH)) { |
3192 | debug_locks_off(); | 3192 | debug_locks_off(); |
3193 | printk("BUG: MAX_LOCK_DEPTH too low!\n"); | 3193 | printk("BUG: MAX_LOCK_DEPTH too low, depth: %i max: %lu!\n", |
3194 | curr->lockdep_depth, MAX_LOCK_DEPTH); | ||
3194 | printk("turning off the locking correctness validator.\n"); | 3195 | printk("turning off the locking correctness validator.\n"); |
3196 | |||
3197 | lockdep_print_held_locks(current); | ||
3198 | debug_show_all_locks(); | ||
3195 | dump_stack(); | 3199 | dump_stack(); |
3200 | |||
3196 | return 0; | 3201 | return 0; |
3197 | } | 3202 | } |
3198 | 3203 | ||
@@ -3203,7 +3208,7 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, | |||
3203 | } | 3208 | } |
3204 | 3209 | ||
3205 | static int | 3210 | static int |
3206 | print_unlock_inbalance_bug(struct task_struct *curr, struct lockdep_map *lock, | 3211 | print_unlock_imbalance_bug(struct task_struct *curr, struct lockdep_map *lock, |
3207 | unsigned long ip) | 3212 | unsigned long ip) |
3208 | { | 3213 | { |
3209 | if (!debug_locks_off()) | 3214 | if (!debug_locks_off()) |
@@ -3246,7 +3251,7 @@ static int check_unlock(struct task_struct *curr, struct lockdep_map *lock, | |||
3246 | return 0; | 3251 | return 0; |
3247 | 3252 | ||
3248 | if (curr->lockdep_depth <= 0) | 3253 | if (curr->lockdep_depth <= 0) |
3249 | return print_unlock_inbalance_bug(curr, lock, ip); | 3254 | return print_unlock_imbalance_bug(curr, lock, ip); |
3250 | 3255 | ||
3251 | return 1; | 3256 | return 1; |
3252 | } | 3257 | } |
@@ -3317,7 +3322,7 @@ __lock_set_class(struct lockdep_map *lock, const char *name, | |||
3317 | goto found_it; | 3322 | goto found_it; |
3318 | prev_hlock = hlock; | 3323 | prev_hlock = hlock; |
3319 | } | 3324 | } |
3320 | return print_unlock_inbalance_bug(curr, lock, ip); | 3325 | return print_unlock_imbalance_bug(curr, lock, ip); |
3321 | 3326 | ||
3322 | found_it: | 3327 | found_it: |
3323 | lockdep_init_map(lock, name, key, 0); | 3328 | lockdep_init_map(lock, name, key, 0); |
@@ -3384,7 +3389,7 @@ lock_release_non_nested(struct task_struct *curr, | |||
3384 | goto found_it; | 3389 | goto found_it; |
3385 | prev_hlock = hlock; | 3390 | prev_hlock = hlock; |
3386 | } | 3391 | } |
3387 | return print_unlock_inbalance_bug(curr, lock, ip); | 3392 | return print_unlock_imbalance_bug(curr, lock, ip); |
3388 | 3393 | ||
3389 | found_it: | 3394 | found_it: |
3390 | if (hlock->instance == lock) | 3395 | if (hlock->instance == lock) |
@@ -4083,7 +4088,7 @@ void debug_check_no_locks_freed(const void *mem_from, unsigned long mem_len) | |||
4083 | } | 4088 | } |
4084 | EXPORT_SYMBOL_GPL(debug_check_no_locks_freed); | 4089 | EXPORT_SYMBOL_GPL(debug_check_no_locks_freed); |
4085 | 4090 | ||
4086 | static void print_held_locks_bug(struct task_struct *curr) | 4091 | static void print_held_locks_bug(void) |
4087 | { | 4092 | { |
4088 | if (!debug_locks_off()) | 4093 | if (!debug_locks_off()) |
4089 | return; | 4094 | return; |
@@ -4092,22 +4097,21 @@ static void print_held_locks_bug(struct task_struct *curr) | |||
4092 | 4097 | ||
4093 | printk("\n"); | 4098 | printk("\n"); |
4094 | printk("=====================================\n"); | 4099 | printk("=====================================\n"); |
4095 | printk("[ BUG: lock held at task exit time! ]\n"); | 4100 | printk("[ BUG: %s/%d still has locks held! ]\n", |
4101 | current->comm, task_pid_nr(current)); | ||
4096 | print_kernel_ident(); | 4102 | print_kernel_ident(); |
4097 | printk("-------------------------------------\n"); | 4103 | printk("-------------------------------------\n"); |
4098 | printk("%s/%d is exiting with locks still held!\n", | 4104 | lockdep_print_held_locks(current); |
4099 | curr->comm, task_pid_nr(curr)); | ||
4100 | lockdep_print_held_locks(curr); | ||
4101 | |||
4102 | printk("\nstack backtrace:\n"); | 4105 | printk("\nstack backtrace:\n"); |
4103 | dump_stack(); | 4106 | dump_stack(); |
4104 | } | 4107 | } |
4105 | 4108 | ||
4106 | void debug_check_no_locks_held(struct task_struct *task) | 4109 | void debug_check_no_locks_held(void) |
4107 | { | 4110 | { |
4108 | if (unlikely(task->lockdep_depth > 0)) | 4111 | if (unlikely(current->lockdep_depth > 0)) |
4109 | print_held_locks_bug(task); | 4112 | print_held_locks_bug(); |
4110 | } | 4113 | } |
4114 | EXPORT_SYMBOL_GPL(debug_check_no_locks_held); | ||
4111 | 4115 | ||
4112 | void debug_show_all_locks(void) | 4116 | void debug_show_all_locks(void) |
4113 | { | 4117 | { |
diff --git a/kernel/module.c b/kernel/module.c index eab08274ec9b..0925c9a71975 100644 --- a/kernel/module.c +++ b/kernel/module.c | |||
@@ -197,9 +197,10 @@ static inline int strong_try_module_get(struct module *mod) | |||
197 | return -ENOENT; | 197 | return -ENOENT; |
198 | } | 198 | } |
199 | 199 | ||
200 | static inline void add_taint_module(struct module *mod, unsigned flag) | 200 | static inline void add_taint_module(struct module *mod, unsigned flag, |
201 | enum lockdep_ok lockdep_ok) | ||
201 | { | 202 | { |
202 | add_taint(flag); | 203 | add_taint(flag, lockdep_ok); |
203 | mod->taints |= (1U << flag); | 204 | mod->taints |= (1U << flag); |
204 | } | 205 | } |
205 | 206 | ||
@@ -727,7 +728,7 @@ static inline int try_force_unload(unsigned int flags) | |||
727 | { | 728 | { |
728 | int ret = (flags & O_TRUNC); | 729 | int ret = (flags & O_TRUNC); |
729 | if (ret) | 730 | if (ret) |
730 | add_taint(TAINT_FORCED_RMMOD); | 731 | add_taint(TAINT_FORCED_RMMOD, LOCKDEP_NOW_UNRELIABLE); |
731 | return ret; | 732 | return ret; |
732 | } | 733 | } |
733 | #else | 734 | #else |
@@ -1138,7 +1139,7 @@ static int try_to_force_load(struct module *mod, const char *reason) | |||
1138 | if (!test_taint(TAINT_FORCED_MODULE)) | 1139 | if (!test_taint(TAINT_FORCED_MODULE)) |
1139 | printk(KERN_WARNING "%s: %s: kernel tainted.\n", | 1140 | printk(KERN_WARNING "%s: %s: kernel tainted.\n", |
1140 | mod->name, reason); | 1141 | mod->name, reason); |
1141 | add_taint_module(mod, TAINT_FORCED_MODULE); | 1142 | add_taint_module(mod, TAINT_FORCED_MODULE, LOCKDEP_NOW_UNRELIABLE); |
1142 | return 0; | 1143 | return 0; |
1143 | #else | 1144 | #else |
1144 | return -ENOEXEC; | 1145 | return -ENOEXEC; |
@@ -2147,7 +2148,8 @@ static void set_license(struct module *mod, const char *license) | |||
2147 | if (!test_taint(TAINT_PROPRIETARY_MODULE)) | 2148 | if (!test_taint(TAINT_PROPRIETARY_MODULE)) |
2148 | printk(KERN_WARNING "%s: module license '%s' taints " | 2149 | printk(KERN_WARNING "%s: module license '%s' taints " |
2149 | "kernel.\n", mod->name, license); | 2150 | "kernel.\n", mod->name, license); |
2150 | add_taint_module(mod, TAINT_PROPRIETARY_MODULE); | 2151 | add_taint_module(mod, TAINT_PROPRIETARY_MODULE, |
2152 | LOCKDEP_NOW_UNRELIABLE); | ||
2151 | } | 2153 | } |
2152 | } | 2154 | } |
2153 | 2155 | ||
@@ -2539,7 +2541,7 @@ static int copy_module_from_fd(int fd, struct load_info *info) | |||
2539 | if (err) | 2541 | if (err) |
2540 | goto out; | 2542 | goto out; |
2541 | 2543 | ||
2542 | err = vfs_getattr(file->f_vfsmnt, file->f_dentry, &stat); | 2544 | err = vfs_getattr(&file->f_path, &stat); |
2543 | if (err) | 2545 | if (err) |
2544 | goto out; | 2546 | goto out; |
2545 | 2547 | ||
@@ -2700,10 +2702,10 @@ static int check_modinfo(struct module *mod, struct load_info *info, int flags) | |||
2700 | } | 2702 | } |
2701 | 2703 | ||
2702 | if (!get_modinfo(info, "intree")) | 2704 | if (!get_modinfo(info, "intree")) |
2703 | add_taint_module(mod, TAINT_OOT_MODULE); | 2705 | add_taint_module(mod, TAINT_OOT_MODULE, LOCKDEP_STILL_OK); |
2704 | 2706 | ||
2705 | if (get_modinfo(info, "staging")) { | 2707 | if (get_modinfo(info, "staging")) { |
2706 | add_taint_module(mod, TAINT_CRAP); | 2708 | add_taint_module(mod, TAINT_CRAP, LOCKDEP_STILL_OK); |
2707 | printk(KERN_WARNING "%s: module is from the staging directory," | 2709 | printk(KERN_WARNING "%s: module is from the staging directory," |
2708 | " the quality is unknown, you have been warned.\n", | 2710 | " the quality is unknown, you have been warned.\n", |
2709 | mod->name); | 2711 | mod->name); |
@@ -2869,15 +2871,17 @@ static int check_module_license_and_versions(struct module *mod) | |||
2869 | * using GPL-only symbols it needs. | 2871 | * using GPL-only symbols it needs. |
2870 | */ | 2872 | */ |
2871 | if (strcmp(mod->name, "ndiswrapper") == 0) | 2873 | if (strcmp(mod->name, "ndiswrapper") == 0) |
2872 | add_taint(TAINT_PROPRIETARY_MODULE); | 2874 | add_taint(TAINT_PROPRIETARY_MODULE, LOCKDEP_NOW_UNRELIABLE); |
2873 | 2875 | ||
2874 | /* driverloader was caught wrongly pretending to be under GPL */ | 2876 | /* driverloader was caught wrongly pretending to be under GPL */ |
2875 | if (strcmp(mod->name, "driverloader") == 0) | 2877 | if (strcmp(mod->name, "driverloader") == 0) |
2876 | add_taint_module(mod, TAINT_PROPRIETARY_MODULE); | 2878 | add_taint_module(mod, TAINT_PROPRIETARY_MODULE, |
2879 | LOCKDEP_NOW_UNRELIABLE); | ||
2877 | 2880 | ||
2878 | /* lve claims to be GPL but upstream won't provide source */ | 2881 | /* lve claims to be GPL but upstream won't provide source */ |
2879 | if (strcmp(mod->name, "lve") == 0) | 2882 | if (strcmp(mod->name, "lve") == 0) |
2880 | add_taint_module(mod, TAINT_PROPRIETARY_MODULE); | 2883 | add_taint_module(mod, TAINT_PROPRIETARY_MODULE, |
2884 | LOCKDEP_NOW_UNRELIABLE); | ||
2881 | 2885 | ||
2882 | #ifdef CONFIG_MODVERSIONS | 2886 | #ifdef CONFIG_MODVERSIONS |
2883 | if ((mod->num_syms && !mod->crcs) | 2887 | if ((mod->num_syms && !mod->crcs) |
@@ -3141,12 +3145,72 @@ static int may_init_module(void) | |||
3141 | return 0; | 3145 | return 0; |
3142 | } | 3146 | } |
3143 | 3147 | ||
3148 | /* | ||
3149 | * We try to place it in the list now to make sure it's unique before | ||
3150 | * we dedicate too many resources. In particular, temporary percpu | ||
3151 | * memory exhaustion. | ||
3152 | */ | ||
3153 | static int add_unformed_module(struct module *mod) | ||
3154 | { | ||
3155 | int err; | ||
3156 | struct module *old; | ||
3157 | |||
3158 | mod->state = MODULE_STATE_UNFORMED; | ||
3159 | |||
3160 | again: | ||
3161 | mutex_lock(&module_mutex); | ||
3162 | if ((old = find_module_all(mod->name, true)) != NULL) { | ||
3163 | if (old->state == MODULE_STATE_COMING | ||
3164 | || old->state == MODULE_STATE_UNFORMED) { | ||
3165 | /* Wait in case it fails to load. */ | ||
3166 | mutex_unlock(&module_mutex); | ||
3167 | err = wait_event_interruptible(module_wq, | ||
3168 | finished_loading(mod->name)); | ||
3169 | if (err) | ||
3170 | goto out_unlocked; | ||
3171 | goto again; | ||
3172 | } | ||
3173 | err = -EEXIST; | ||
3174 | goto out; | ||
3175 | } | ||
3176 | list_add_rcu(&mod->list, &modules); | ||
3177 | err = 0; | ||
3178 | |||
3179 | out: | ||
3180 | mutex_unlock(&module_mutex); | ||
3181 | out_unlocked: | ||
3182 | return err; | ||
3183 | } | ||
3184 | |||
3185 | static int complete_formation(struct module *mod, struct load_info *info) | ||
3186 | { | ||
3187 | int err; | ||
3188 | |||
3189 | mutex_lock(&module_mutex); | ||
3190 | |||
3191 | /* Find duplicate symbols (must be called under lock). */ | ||
3192 | err = verify_export_symbols(mod); | ||
3193 | if (err < 0) | ||
3194 | goto out; | ||
3195 | |||
3196 | /* This relies on module_mutex for list integrity. */ | ||
3197 | module_bug_finalize(info->hdr, info->sechdrs, mod); | ||
3198 | |||
3199 | /* Mark state as coming so strong_try_module_get() ignores us, | ||
3200 | * but kallsyms etc. can see us. */ | ||
3201 | mod->state = MODULE_STATE_COMING; | ||
3202 | |||
3203 | out: | ||
3204 | mutex_unlock(&module_mutex); | ||
3205 | return err; | ||
3206 | } | ||
3207 | |||
3144 | /* Allocate and load the module: note that size of section 0 is always | 3208 | /* Allocate and load the module: note that size of section 0 is always |
3145 | zero, and we rely on this for optional sections. */ | 3209 | zero, and we rely on this for optional sections. */ |
3146 | static int load_module(struct load_info *info, const char __user *uargs, | 3210 | static int load_module(struct load_info *info, const char __user *uargs, |
3147 | int flags) | 3211 | int flags) |
3148 | { | 3212 | { |
3149 | struct module *mod, *old; | 3213 | struct module *mod; |
3150 | long err; | 3214 | long err; |
3151 | 3215 | ||
3152 | err = module_sig_check(info); | 3216 | err = module_sig_check(info); |
@@ -3164,36 +3228,20 @@ static int load_module(struct load_info *info, const char __user *uargs, | |||
3164 | goto free_copy; | 3228 | goto free_copy; |
3165 | } | 3229 | } |
3166 | 3230 | ||
3167 | /* | 3231 | /* Reserve our place in the list. */ |
3168 | * We try to place it in the list now to make sure it's unique | 3232 | err = add_unformed_module(mod); |
3169 | * before we dedicate too many resources. In particular, | 3233 | if (err) |
3170 | * temporary percpu memory exhaustion. | ||
3171 | */ | ||
3172 | mod->state = MODULE_STATE_UNFORMED; | ||
3173 | again: | ||
3174 | mutex_lock(&module_mutex); | ||
3175 | if ((old = find_module_all(mod->name, true)) != NULL) { | ||
3176 | if (old->state == MODULE_STATE_COMING | ||
3177 | || old->state == MODULE_STATE_UNFORMED) { | ||
3178 | /* Wait in case it fails to load. */ | ||
3179 | mutex_unlock(&module_mutex); | ||
3180 | err = wait_event_interruptible(module_wq, | ||
3181 | finished_loading(mod->name)); | ||
3182 | if (err) | ||
3183 | goto free_module; | ||
3184 | goto again; | ||
3185 | } | ||
3186 | err = -EEXIST; | ||
3187 | mutex_unlock(&module_mutex); | ||
3188 | goto free_module; | 3234 | goto free_module; |
3189 | } | ||
3190 | list_add_rcu(&mod->list, &modules); | ||
3191 | mutex_unlock(&module_mutex); | ||
3192 | 3235 | ||
3193 | #ifdef CONFIG_MODULE_SIG | 3236 | #ifdef CONFIG_MODULE_SIG |
3194 | mod->sig_ok = info->sig_ok; | 3237 | mod->sig_ok = info->sig_ok; |
3195 | if (!mod->sig_ok) | 3238 | if (!mod->sig_ok) { |
3196 | add_taint_module(mod, TAINT_FORCED_MODULE); | 3239 | printk_once(KERN_NOTICE |
3240 | "%s: module verification failed: signature and/or" | ||
3241 | " required key missing - tainting kernel\n", | ||
3242 | mod->name); | ||
3243 | add_taint_module(mod, TAINT_FORCED_MODULE, LOCKDEP_STILL_OK); | ||
3244 | } | ||
3197 | #endif | 3245 | #endif |
3198 | 3246 | ||
3199 | /* Now module is in final location, initialize linked lists, etc. */ | 3247 | /* Now module is in final location, initialize linked lists, etc. */ |
@@ -3236,21 +3284,11 @@ again: | |||
3236 | 3284 | ||
3237 | dynamic_debug_setup(info->debug, info->num_debug); | 3285 | dynamic_debug_setup(info->debug, info->num_debug); |
3238 | 3286 | ||
3239 | mutex_lock(&module_mutex); | 3287 | /* Finally it's fully formed, ready to start executing. */ |
3240 | /* Find duplicate symbols (must be called under lock). */ | 3288 | err = complete_formation(mod, info); |
3241 | err = verify_export_symbols(mod); | 3289 | if (err) |
3242 | if (err < 0) | ||
3243 | goto ddebug_cleanup; | 3290 | goto ddebug_cleanup; |
3244 | 3291 | ||
3245 | /* This relies on module_mutex for list integrity. */ | ||
3246 | module_bug_finalize(info->hdr, info->sechdrs, mod); | ||
3247 | |||
3248 | /* Mark state as coming so strong_try_module_get() ignores us, | ||
3249 | * but kallsyms etc. can see us. */ | ||
3250 | mod->state = MODULE_STATE_COMING; | ||
3251 | |||
3252 | mutex_unlock(&module_mutex); | ||
3253 | |||
3254 | /* Module is ready to execute: parsing args may do that. */ | 3292 | /* Module is ready to execute: parsing args may do that. */ |
3255 | err = parse_args(mod->name, mod->args, mod->kp, mod->num_kp, | 3293 | err = parse_args(mod->name, mod->args, mod->kp, mod->num_kp, |
3256 | -32768, 32767, &ddebug_dyndbg_module_param_cb); | 3294 | -32768, 32767, &ddebug_dyndbg_module_param_cb); |
@@ -3274,8 +3312,8 @@ again: | |||
3274 | /* module_bug_cleanup needs module_mutex protection */ | 3312 | /* module_bug_cleanup needs module_mutex protection */ |
3275 | mutex_lock(&module_mutex); | 3313 | mutex_lock(&module_mutex); |
3276 | module_bug_cleanup(mod); | 3314 | module_bug_cleanup(mod); |
3277 | ddebug_cleanup: | ||
3278 | mutex_unlock(&module_mutex); | 3315 | mutex_unlock(&module_mutex); |
3316 | ddebug_cleanup: | ||
3279 | dynamic_debug_remove(info->debug); | 3317 | dynamic_debug_remove(info->debug); |
3280 | synchronize_sched(); | 3318 | synchronize_sched(); |
3281 | kfree(mod->args); | 3319 | kfree(mod->args); |
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c index 78e2ecb20165..afc0456f227a 100644 --- a/kernel/nsproxy.c +++ b/kernel/nsproxy.c | |||
@@ -153,8 +153,7 @@ int copy_namespaces(unsigned long flags, struct task_struct *tsk) | |||
153 | goto out; | 153 | goto out; |
154 | } | 154 | } |
155 | 155 | ||
156 | new_ns = create_new_namespaces(flags, tsk, | 156 | new_ns = create_new_namespaces(flags, tsk, user_ns, tsk->fs); |
157 | task_cred_xxx(tsk, user_ns), tsk->fs); | ||
158 | if (IS_ERR(new_ns)) { | 157 | if (IS_ERR(new_ns)) { |
159 | err = PTR_ERR(new_ns); | 158 | err = PTR_ERR(new_ns); |
160 | goto out; | 159 | goto out; |
@@ -251,7 +250,7 @@ SYSCALL_DEFINE2(setns, int, fd, int, nstype) | |||
251 | return PTR_ERR(file); | 250 | return PTR_ERR(file); |
252 | 251 | ||
253 | err = -EINVAL; | 252 | err = -EINVAL; |
254 | ei = PROC_I(file->f_dentry->d_inode); | 253 | ei = PROC_I(file_inode(file)); |
255 | ops = ei->ns_ops; | 254 | ops = ei->ns_ops; |
256 | if (nstype && (ops->type != nstype)) | 255 | if (nstype && (ops->type != nstype)) |
257 | goto out; | 256 | goto out; |
diff --git a/kernel/panic.c b/kernel/panic.c index e1b2822fff97..7c57cc9eee2c 100644 --- a/kernel/panic.c +++ b/kernel/panic.c | |||
@@ -259,26 +259,19 @@ unsigned long get_taint(void) | |||
259 | return tainted_mask; | 259 | return tainted_mask; |
260 | } | 260 | } |
261 | 261 | ||
262 | void add_taint(unsigned flag) | 262 | /** |
263 | * add_taint: add a taint flag if not already set. | ||
264 | * @flag: one of the TAINT_* constants. | ||
265 | * @lockdep_ok: whether lock debugging is still OK. | ||
266 | * | ||
267 | * If something bad has gone wrong, you'll want @lockdebug_ok = false, but for | ||
268 | * some notewortht-but-not-corrupting cases, it can be set to true. | ||
269 | */ | ||
270 | void add_taint(unsigned flag, enum lockdep_ok lockdep_ok) | ||
263 | { | 271 | { |
264 | /* | 272 | if (lockdep_ok == LOCKDEP_NOW_UNRELIABLE && __debug_locks_off()) |
265 | * Can't trust the integrity of the kernel anymore. | 273 | printk(KERN_WARNING |
266 | * We don't call directly debug_locks_off() because the issue | 274 | "Disabling lock debugging due to kernel taint\n"); |
267 | * is not necessarily serious enough to set oops_in_progress to 1 | ||
268 | * Also we want to keep up lockdep for staging/out-of-tree | ||
269 | * development and post-warning case. | ||
270 | */ | ||
271 | switch (flag) { | ||
272 | case TAINT_CRAP: | ||
273 | case TAINT_OOT_MODULE: | ||
274 | case TAINT_WARN: | ||
275 | case TAINT_FIRMWARE_WORKAROUND: | ||
276 | break; | ||
277 | |||
278 | default: | ||
279 | if (__debug_locks_off()) | ||
280 | printk(KERN_WARNING "Disabling lock debugging due to kernel taint\n"); | ||
281 | } | ||
282 | 275 | ||
283 | set_bit(flag, &tainted_mask); | 276 | set_bit(flag, &tainted_mask); |
284 | } | 277 | } |
@@ -421,7 +414,8 @@ static void warn_slowpath_common(const char *file, int line, void *caller, | |||
421 | print_modules(); | 414 | print_modules(); |
422 | dump_stack(); | 415 | dump_stack(); |
423 | print_oops_end_marker(); | 416 | print_oops_end_marker(); |
424 | add_taint(taint); | 417 | /* Just a warning, don't kill lockdep. */ |
418 | add_taint(taint, LOCKDEP_STILL_OK); | ||
425 | } | 419 | } |
426 | 420 | ||
427 | void warn_slowpath_fmt(const char *file, int line, const char *fmt, ...) | 421 | void warn_slowpath_fmt(const char *file, int line, const char *fmt, ...) |
diff --git a/kernel/pid.c b/kernel/pid.c index f2c6a6825098..047dc6264638 100644 --- a/kernel/pid.c +++ b/kernel/pid.c | |||
@@ -350,10 +350,9 @@ void disable_pid_allocation(struct pid_namespace *ns) | |||
350 | 350 | ||
351 | struct pid *find_pid_ns(int nr, struct pid_namespace *ns) | 351 | struct pid *find_pid_ns(int nr, struct pid_namespace *ns) |
352 | { | 352 | { |
353 | struct hlist_node *elem; | ||
354 | struct upid *pnr; | 353 | struct upid *pnr; |
355 | 354 | ||
356 | hlist_for_each_entry_rcu(pnr, elem, | 355 | hlist_for_each_entry_rcu(pnr, |
357 | &pid_hash[pid_hashfn(nr, ns)], pid_chain) | 356 | &pid_hash[pid_hashfn(nr, ns)], pid_chain) |
358 | if (pnr->nr == nr && pnr->ns == ns) | 357 | if (pnr->nr == nr && pnr->ns == ns) |
359 | return container_of(pnr, struct pid, | 358 | return container_of(pnr, struct pid, |
diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c index 10349d5f2ec3..6edbb2c55c22 100644 --- a/kernel/posix-timers.c +++ b/kernel/posix-timers.c | |||
@@ -552,24 +552,22 @@ SYSCALL_DEFINE3(timer_create, const clockid_t, which_clock, | |||
552 | return -EAGAIN; | 552 | return -EAGAIN; |
553 | 553 | ||
554 | spin_lock_init(&new_timer->it_lock); | 554 | spin_lock_init(&new_timer->it_lock); |
555 | retry: | 555 | |
556 | if (unlikely(!idr_pre_get(&posix_timers_id, GFP_KERNEL))) { | 556 | idr_preload(GFP_KERNEL); |
557 | error = -EAGAIN; | ||
558 | goto out; | ||
559 | } | ||
560 | spin_lock_irq(&idr_lock); | 557 | spin_lock_irq(&idr_lock); |
561 | error = idr_get_new(&posix_timers_id, new_timer, &new_timer_id); | 558 | error = idr_alloc(&posix_timers_id, new_timer, 0, 0, GFP_NOWAIT); |
562 | spin_unlock_irq(&idr_lock); | 559 | spin_unlock_irq(&idr_lock); |
563 | if (error) { | 560 | idr_preload_end(); |
564 | if (error == -EAGAIN) | 561 | if (error < 0) { |
565 | goto retry; | ||
566 | /* | 562 | /* |
567 | * Weird looking, but we return EAGAIN if the IDR is | 563 | * Weird looking, but we return EAGAIN if the IDR is |
568 | * full (proper POSIX return value for this) | 564 | * full (proper POSIX return value for this) |
569 | */ | 565 | */ |
570 | error = -EAGAIN; | 566 | if (error == -ENOSPC) |
567 | error = -EAGAIN; | ||
571 | goto out; | 568 | goto out; |
572 | } | 569 | } |
570 | new_timer_id = error; | ||
573 | 571 | ||
574 | it_id_set = IT_ID_SET; | 572 | it_id_set = IT_ID_SET; |
575 | new_timer->it_id = (timer_t) new_timer_id; | 573 | new_timer->it_id = (timer_t) new_timer_id; |
@@ -639,6 +637,13 @@ static struct k_itimer *__lock_timer(timer_t timer_id, unsigned long *flags) | |||
639 | { | 637 | { |
640 | struct k_itimer *timr; | 638 | struct k_itimer *timr; |
641 | 639 | ||
640 | /* | ||
641 | * timer_t could be any type >= int and we want to make sure any | ||
642 | * @timer_id outside positive int range fails lookup. | ||
643 | */ | ||
644 | if ((unsigned long long)timer_id > INT_MAX) | ||
645 | return NULL; | ||
646 | |||
642 | rcu_read_lock(); | 647 | rcu_read_lock(); |
643 | timr = idr_find(&posix_timers_id, (int)timer_id); | 648 | timr = idr_find(&posix_timers_id, (int)timer_id); |
644 | if (timr) { | 649 | if (timr) { |
diff --git a/kernel/power/autosleep.c b/kernel/power/autosleep.c index ca304046d9e2..c6422ffeda9a 100644 --- a/kernel/power/autosleep.c +++ b/kernel/power/autosleep.c | |||
@@ -66,7 +66,7 @@ static DECLARE_WORK(suspend_work, try_to_suspend); | |||
66 | 66 | ||
67 | void queue_up_suspend_work(void) | 67 | void queue_up_suspend_work(void) |
68 | { | 68 | { |
69 | if (!work_pending(&suspend_work) && autosleep_state > PM_SUSPEND_ON) | 69 | if (autosleep_state > PM_SUSPEND_ON) |
70 | queue_work(autosleep_wq, &suspend_work); | 70 | queue_work(autosleep_wq, &suspend_work); |
71 | } | 71 | } |
72 | 72 | ||
diff --git a/kernel/power/main.c b/kernel/power/main.c index 1c16f9167de1..d77663bfedeb 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c | |||
@@ -313,7 +313,7 @@ static ssize_t state_show(struct kobject *kobj, struct kobj_attribute *attr, | |||
313 | static suspend_state_t decode_state(const char *buf, size_t n) | 313 | static suspend_state_t decode_state(const char *buf, size_t n) |
314 | { | 314 | { |
315 | #ifdef CONFIG_SUSPEND | 315 | #ifdef CONFIG_SUSPEND |
316 | suspend_state_t state = PM_SUSPEND_STANDBY; | 316 | suspend_state_t state = PM_SUSPEND_MIN; |
317 | const char * const *s; | 317 | const char * const *s; |
318 | #endif | 318 | #endif |
319 | char *p; | 319 | char *p; |
@@ -553,6 +553,30 @@ power_attr(pm_trace_dev_match); | |||
553 | 553 | ||
554 | #endif /* CONFIG_PM_TRACE */ | 554 | #endif /* CONFIG_PM_TRACE */ |
555 | 555 | ||
556 | #ifdef CONFIG_FREEZER | ||
557 | static ssize_t pm_freeze_timeout_show(struct kobject *kobj, | ||
558 | struct kobj_attribute *attr, char *buf) | ||
559 | { | ||
560 | return sprintf(buf, "%u\n", freeze_timeout_msecs); | ||
561 | } | ||
562 | |||
563 | static ssize_t pm_freeze_timeout_store(struct kobject *kobj, | ||
564 | struct kobj_attribute *attr, | ||
565 | const char *buf, size_t n) | ||
566 | { | ||
567 | unsigned long val; | ||
568 | |||
569 | if (kstrtoul(buf, 10, &val)) | ||
570 | return -EINVAL; | ||
571 | |||
572 | freeze_timeout_msecs = val; | ||
573 | return n; | ||
574 | } | ||
575 | |||
576 | power_attr(pm_freeze_timeout); | ||
577 | |||
578 | #endif /* CONFIG_FREEZER*/ | ||
579 | |||
556 | static struct attribute * g[] = { | 580 | static struct attribute * g[] = { |
557 | &state_attr.attr, | 581 | &state_attr.attr, |
558 | #ifdef CONFIG_PM_TRACE | 582 | #ifdef CONFIG_PM_TRACE |
@@ -576,6 +600,9 @@ static struct attribute * g[] = { | |||
576 | &pm_print_times_attr.attr, | 600 | &pm_print_times_attr.attr, |
577 | #endif | 601 | #endif |
578 | #endif | 602 | #endif |
603 | #ifdef CONFIG_FREEZER | ||
604 | &pm_freeze_timeout_attr.attr, | ||
605 | #endif | ||
579 | NULL, | 606 | NULL, |
580 | }; | 607 | }; |
581 | 608 | ||
diff --git a/kernel/power/process.c b/kernel/power/process.c index d5a258b60c6f..98088e0e71e8 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c | |||
@@ -21,7 +21,7 @@ | |||
21 | /* | 21 | /* |
22 | * Timeout for stopping processes | 22 | * Timeout for stopping processes |
23 | */ | 23 | */ |
24 | #define TIMEOUT (20 * HZ) | 24 | unsigned int __read_mostly freeze_timeout_msecs = 20 * MSEC_PER_SEC; |
25 | 25 | ||
26 | static int try_to_freeze_tasks(bool user_only) | 26 | static int try_to_freeze_tasks(bool user_only) |
27 | { | 27 | { |
@@ -36,7 +36,7 @@ static int try_to_freeze_tasks(bool user_only) | |||
36 | 36 | ||
37 | do_gettimeofday(&start); | 37 | do_gettimeofday(&start); |
38 | 38 | ||
39 | end_time = jiffies + TIMEOUT; | 39 | end_time = jiffies + msecs_to_jiffies(freeze_timeout_msecs); |
40 | 40 | ||
41 | if (!user_only) | 41 | if (!user_only) |
42 | freeze_workqueues_begin(); | 42 | freeze_workqueues_begin(); |
diff --git a/kernel/power/qos.c b/kernel/power/qos.c index 9322ff7eaad6..587dddeebf15 100644 --- a/kernel/power/qos.c +++ b/kernel/power/qos.c | |||
@@ -359,8 +359,7 @@ void pm_qos_update_request(struct pm_qos_request *req, | |||
359 | return; | 359 | return; |
360 | } | 360 | } |
361 | 361 | ||
362 | if (delayed_work_pending(&req->work)) | 362 | cancel_delayed_work_sync(&req->work); |
363 | cancel_delayed_work_sync(&req->work); | ||
364 | 363 | ||
365 | if (new_value != req->node.prio) | 364 | if (new_value != req->node.prio) |
366 | pm_qos_update_target( | 365 | pm_qos_update_target( |
@@ -386,8 +385,7 @@ void pm_qos_update_request_timeout(struct pm_qos_request *req, s32 new_value, | |||
386 | "%s called for unknown object.", __func__)) | 385 | "%s called for unknown object.", __func__)) |
387 | return; | 386 | return; |
388 | 387 | ||
389 | if (delayed_work_pending(&req->work)) | 388 | cancel_delayed_work_sync(&req->work); |
390 | cancel_delayed_work_sync(&req->work); | ||
391 | 389 | ||
392 | if (new_value != req->node.prio) | 390 | if (new_value != req->node.prio) |
393 | pm_qos_update_target( | 391 | pm_qos_update_target( |
@@ -416,8 +414,7 @@ void pm_qos_remove_request(struct pm_qos_request *req) | |||
416 | return; | 414 | return; |
417 | } | 415 | } |
418 | 416 | ||
419 | if (delayed_work_pending(&req->work)) | 417 | cancel_delayed_work_sync(&req->work); |
420 | cancel_delayed_work_sync(&req->work); | ||
421 | 418 | ||
422 | pm_qos_update_target(pm_qos_array[req->pm_qos_class]->constraints, | 419 | pm_qos_update_target(pm_qos_array[req->pm_qos_class]->constraints, |
423 | &req->node, PM_QOS_REMOVE_REQ, | 420 | &req->node, PM_QOS_REMOVE_REQ, |
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index c8b7446b27df..d4feda084a3a 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c | |||
@@ -30,12 +30,38 @@ | |||
30 | #include "power.h" | 30 | #include "power.h" |
31 | 31 | ||
32 | const char *const pm_states[PM_SUSPEND_MAX] = { | 32 | const char *const pm_states[PM_SUSPEND_MAX] = { |
33 | [PM_SUSPEND_FREEZE] = "freeze", | ||
33 | [PM_SUSPEND_STANDBY] = "standby", | 34 | [PM_SUSPEND_STANDBY] = "standby", |
34 | [PM_SUSPEND_MEM] = "mem", | 35 | [PM_SUSPEND_MEM] = "mem", |
35 | }; | 36 | }; |
36 | 37 | ||
37 | static const struct platform_suspend_ops *suspend_ops; | 38 | static const struct platform_suspend_ops *suspend_ops; |
38 | 39 | ||
40 | static bool need_suspend_ops(suspend_state_t state) | ||
41 | { | ||
42 | return !!(state > PM_SUSPEND_FREEZE); | ||
43 | } | ||
44 | |||
45 | static DECLARE_WAIT_QUEUE_HEAD(suspend_freeze_wait_head); | ||
46 | static bool suspend_freeze_wake; | ||
47 | |||
48 | static void freeze_begin(void) | ||
49 | { | ||
50 | suspend_freeze_wake = false; | ||
51 | } | ||
52 | |||
53 | static void freeze_enter(void) | ||
54 | { | ||
55 | wait_event(suspend_freeze_wait_head, suspend_freeze_wake); | ||
56 | } | ||
57 | |||
58 | void freeze_wake(void) | ||
59 | { | ||
60 | suspend_freeze_wake = true; | ||
61 | wake_up(&suspend_freeze_wait_head); | ||
62 | } | ||
63 | EXPORT_SYMBOL_GPL(freeze_wake); | ||
64 | |||
39 | /** | 65 | /** |
40 | * suspend_set_ops - Set the global suspend method table. | 66 | * suspend_set_ops - Set the global suspend method table. |
41 | * @ops: Suspend operations to use. | 67 | * @ops: Suspend operations to use. |
@@ -50,8 +76,11 @@ EXPORT_SYMBOL_GPL(suspend_set_ops); | |||
50 | 76 | ||
51 | bool valid_state(suspend_state_t state) | 77 | bool valid_state(suspend_state_t state) |
52 | { | 78 | { |
79 | if (state == PM_SUSPEND_FREEZE) | ||
80 | return true; | ||
53 | /* | 81 | /* |
54 | * All states need lowlevel support and need to be valid to the lowlevel | 82 | * PM_SUSPEND_STANDBY and PM_SUSPEND_MEMORY states need lowlevel |
83 | * support and need to be valid to the lowlevel | ||
55 | * implementation, no valid callback implies that none are valid. | 84 | * implementation, no valid callback implies that none are valid. |
56 | */ | 85 | */ |
57 | return suspend_ops && suspend_ops->valid && suspend_ops->valid(state); | 86 | return suspend_ops && suspend_ops->valid && suspend_ops->valid(state); |
@@ -89,11 +118,11 @@ static int suspend_test(int level) | |||
89 | * hibernation). Run suspend notifiers, allocate the "suspend" console and | 118 | * hibernation). Run suspend notifiers, allocate the "suspend" console and |
90 | * freeze processes. | 119 | * freeze processes. |
91 | */ | 120 | */ |
92 | static int suspend_prepare(void) | 121 | static int suspend_prepare(suspend_state_t state) |
93 | { | 122 | { |
94 | int error; | 123 | int error; |
95 | 124 | ||
96 | if (!suspend_ops || !suspend_ops->enter) | 125 | if (need_suspend_ops(state) && (!suspend_ops || !suspend_ops->enter)) |
97 | return -EPERM; | 126 | return -EPERM; |
98 | 127 | ||
99 | pm_prepare_console(); | 128 | pm_prepare_console(); |
@@ -137,7 +166,7 @@ static int suspend_enter(suspend_state_t state, bool *wakeup) | |||
137 | { | 166 | { |
138 | int error; | 167 | int error; |
139 | 168 | ||
140 | if (suspend_ops->prepare) { | 169 | if (need_suspend_ops(state) && suspend_ops->prepare) { |
141 | error = suspend_ops->prepare(); | 170 | error = suspend_ops->prepare(); |
142 | if (error) | 171 | if (error) |
143 | goto Platform_finish; | 172 | goto Platform_finish; |
@@ -149,12 +178,23 @@ static int suspend_enter(suspend_state_t state, bool *wakeup) | |||
149 | goto Platform_finish; | 178 | goto Platform_finish; |
150 | } | 179 | } |
151 | 180 | ||
152 | if (suspend_ops->prepare_late) { | 181 | if (need_suspend_ops(state) && suspend_ops->prepare_late) { |
153 | error = suspend_ops->prepare_late(); | 182 | error = suspend_ops->prepare_late(); |
154 | if (error) | 183 | if (error) |
155 | goto Platform_wake; | 184 | goto Platform_wake; |
156 | } | 185 | } |
157 | 186 | ||
187 | /* | ||
188 | * PM_SUSPEND_FREEZE equals | ||
189 | * frozen processes + suspended devices + idle processors. | ||
190 | * Thus we should invoke freeze_enter() soon after | ||
191 | * all the devices are suspended. | ||
192 | */ | ||
193 | if (state == PM_SUSPEND_FREEZE) { | ||
194 | freeze_enter(); | ||
195 | goto Platform_wake; | ||
196 | } | ||
197 | |||
158 | if (suspend_test(TEST_PLATFORM)) | 198 | if (suspend_test(TEST_PLATFORM)) |
159 | goto Platform_wake; | 199 | goto Platform_wake; |
160 | 200 | ||
@@ -182,13 +222,13 @@ static int suspend_enter(suspend_state_t state, bool *wakeup) | |||
182 | enable_nonboot_cpus(); | 222 | enable_nonboot_cpus(); |
183 | 223 | ||
184 | Platform_wake: | 224 | Platform_wake: |
185 | if (suspend_ops->wake) | 225 | if (need_suspend_ops(state) && suspend_ops->wake) |
186 | suspend_ops->wake(); | 226 | suspend_ops->wake(); |
187 | 227 | ||
188 | dpm_resume_start(PMSG_RESUME); | 228 | dpm_resume_start(PMSG_RESUME); |
189 | 229 | ||
190 | Platform_finish: | 230 | Platform_finish: |
191 | if (suspend_ops->finish) | 231 | if (need_suspend_ops(state) && suspend_ops->finish) |
192 | suspend_ops->finish(); | 232 | suspend_ops->finish(); |
193 | 233 | ||
194 | return error; | 234 | return error; |
@@ -203,11 +243,11 @@ int suspend_devices_and_enter(suspend_state_t state) | |||
203 | int error; | 243 | int error; |
204 | bool wakeup = false; | 244 | bool wakeup = false; |
205 | 245 | ||
206 | if (!suspend_ops) | 246 | if (need_suspend_ops(state) && !suspend_ops) |
207 | return -ENOSYS; | 247 | return -ENOSYS; |
208 | 248 | ||
209 | trace_machine_suspend(state); | 249 | trace_machine_suspend(state); |
210 | if (suspend_ops->begin) { | 250 | if (need_suspend_ops(state) && suspend_ops->begin) { |
211 | error = suspend_ops->begin(state); | 251 | error = suspend_ops->begin(state); |
212 | if (error) | 252 | if (error) |
213 | goto Close; | 253 | goto Close; |
@@ -226,7 +266,7 @@ int suspend_devices_and_enter(suspend_state_t state) | |||
226 | 266 | ||
227 | do { | 267 | do { |
228 | error = suspend_enter(state, &wakeup); | 268 | error = suspend_enter(state, &wakeup); |
229 | } while (!error && !wakeup | 269 | } while (!error && !wakeup && need_suspend_ops(state) |
230 | && suspend_ops->suspend_again && suspend_ops->suspend_again()); | 270 | && suspend_ops->suspend_again && suspend_ops->suspend_again()); |
231 | 271 | ||
232 | Resume_devices: | 272 | Resume_devices: |
@@ -236,13 +276,13 @@ int suspend_devices_and_enter(suspend_state_t state) | |||
236 | ftrace_start(); | 276 | ftrace_start(); |
237 | resume_console(); | 277 | resume_console(); |
238 | Close: | 278 | Close: |
239 | if (suspend_ops->end) | 279 | if (need_suspend_ops(state) && suspend_ops->end) |
240 | suspend_ops->end(); | 280 | suspend_ops->end(); |
241 | trace_machine_suspend(PWR_EVENT_EXIT); | 281 | trace_machine_suspend(PWR_EVENT_EXIT); |
242 | return error; | 282 | return error; |
243 | 283 | ||
244 | Recover_platform: | 284 | Recover_platform: |
245 | if (suspend_ops->recover) | 285 | if (need_suspend_ops(state) && suspend_ops->recover) |
246 | suspend_ops->recover(); | 286 | suspend_ops->recover(); |
247 | goto Resume_devices; | 287 | goto Resume_devices; |
248 | } | 288 | } |
@@ -278,12 +318,15 @@ static int enter_state(suspend_state_t state) | |||
278 | if (!mutex_trylock(&pm_mutex)) | 318 | if (!mutex_trylock(&pm_mutex)) |
279 | return -EBUSY; | 319 | return -EBUSY; |
280 | 320 | ||
321 | if (state == PM_SUSPEND_FREEZE) | ||
322 | freeze_begin(); | ||
323 | |||
281 | printk(KERN_INFO "PM: Syncing filesystems ... "); | 324 | printk(KERN_INFO "PM: Syncing filesystems ... "); |
282 | sys_sync(); | 325 | sys_sync(); |
283 | printk("done.\n"); | 326 | printk("done.\n"); |
284 | 327 | ||
285 | pr_debug("PM: Preparing system for %s sleep\n", pm_states[state]); | 328 | pr_debug("PM: Preparing system for %s sleep\n", pm_states[state]); |
286 | error = suspend_prepare(); | 329 | error = suspend_prepare(state); |
287 | if (error) | 330 | if (error) |
288 | goto Unlock; | 331 | goto Unlock; |
289 | 332 | ||
diff --git a/kernel/power/suspend_test.c b/kernel/power/suspend_test.c index 25596e450ac7..9b2a1d58558d 100644 --- a/kernel/power/suspend_test.c +++ b/kernel/power/suspend_test.c | |||
@@ -112,7 +112,7 @@ static void __init test_wakealarm(struct rtc_device *rtc, suspend_state_t state) | |||
112 | rtc_set_alarm(rtc, &alm); | 112 | rtc_set_alarm(rtc, &alm); |
113 | } | 113 | } |
114 | 114 | ||
115 | static int __init has_wakealarm(struct device *dev, void *name_ptr) | 115 | static int __init has_wakealarm(struct device *dev, const void *data) |
116 | { | 116 | { |
117 | struct rtc_device *candidate = to_rtc_device(dev); | 117 | struct rtc_device *candidate = to_rtc_device(dev); |
118 | 118 | ||
@@ -121,7 +121,6 @@ static int __init has_wakealarm(struct device *dev, void *name_ptr) | |||
121 | if (!device_may_wakeup(candidate->dev.parent)) | 121 | if (!device_may_wakeup(candidate->dev.parent)) |
122 | return 0; | 122 | return 0; |
123 | 123 | ||
124 | *(const char **)name_ptr = dev_name(dev); | ||
125 | return 1; | 124 | return 1; |
126 | } | 125 | } |
127 | 126 | ||
@@ -159,8 +158,8 @@ static int __init test_suspend(void) | |||
159 | static char warn_no_rtc[] __initdata = | 158 | static char warn_no_rtc[] __initdata = |
160 | KERN_WARNING "PM: no wakealarm-capable RTC driver is ready\n"; | 159 | KERN_WARNING "PM: no wakealarm-capable RTC driver is ready\n"; |
161 | 160 | ||
162 | char *pony = NULL; | ||
163 | struct rtc_device *rtc = NULL; | 161 | struct rtc_device *rtc = NULL; |
162 | struct device *dev; | ||
164 | 163 | ||
165 | /* PM is initialized by now; is that state testable? */ | 164 | /* PM is initialized by now; is that state testable? */ |
166 | if (test_state == PM_SUSPEND_ON) | 165 | if (test_state == PM_SUSPEND_ON) |
@@ -171,9 +170,9 @@ static int __init test_suspend(void) | |||
171 | } | 170 | } |
172 | 171 | ||
173 | /* RTCs have initialized by now too ... can we use one? */ | 172 | /* RTCs have initialized by now too ... can we use one? */ |
174 | class_find_device(rtc_class, NULL, &pony, has_wakealarm); | 173 | dev = class_find_device(rtc_class, NULL, NULL, has_wakealarm); |
175 | if (pony) | 174 | if (dev) |
176 | rtc = rtc_class_open(pony); | 175 | rtc = rtc_class_open(dev_name(dev)); |
177 | if (!rtc) { | 176 | if (!rtc) { |
178 | printk(warn_no_rtc); | 177 | printk(warn_no_rtc); |
179 | goto done; | 178 | goto done; |
diff --git a/kernel/printk.c b/kernel/printk.c index f24633afa46a..0b31715f335a 100644 --- a/kernel/printk.c +++ b/kernel/printk.c | |||
@@ -88,6 +88,12 @@ static DEFINE_SEMAPHORE(console_sem); | |||
88 | struct console *console_drivers; | 88 | struct console *console_drivers; |
89 | EXPORT_SYMBOL_GPL(console_drivers); | 89 | EXPORT_SYMBOL_GPL(console_drivers); |
90 | 90 | ||
91 | #ifdef CONFIG_LOCKDEP | ||
92 | static struct lockdep_map console_lock_dep_map = { | ||
93 | .name = "console_lock" | ||
94 | }; | ||
95 | #endif | ||
96 | |||
91 | /* | 97 | /* |
92 | * This is used for debugging the mess that is the VT code by | 98 | * This is used for debugging the mess that is the VT code by |
93 | * keeping track if we have the console semaphore held. It's | 99 | * keeping track if we have the console semaphore held. It's |
@@ -1919,6 +1925,7 @@ void console_lock(void) | |||
1919 | return; | 1925 | return; |
1920 | console_locked = 1; | 1926 | console_locked = 1; |
1921 | console_may_schedule = 1; | 1927 | console_may_schedule = 1; |
1928 | mutex_acquire(&console_lock_dep_map, 0, 0, _RET_IP_); | ||
1922 | } | 1929 | } |
1923 | EXPORT_SYMBOL(console_lock); | 1930 | EXPORT_SYMBOL(console_lock); |
1924 | 1931 | ||
@@ -1940,6 +1947,7 @@ int console_trylock(void) | |||
1940 | } | 1947 | } |
1941 | console_locked = 1; | 1948 | console_locked = 1; |
1942 | console_may_schedule = 0; | 1949 | console_may_schedule = 0; |
1950 | mutex_acquire(&console_lock_dep_map, 0, 1, _RET_IP_); | ||
1943 | return 1; | 1951 | return 1; |
1944 | } | 1952 | } |
1945 | EXPORT_SYMBOL(console_trylock); | 1953 | EXPORT_SYMBOL(console_trylock); |
@@ -2102,6 +2110,7 @@ skip: | |||
2102 | local_irq_restore(flags); | 2110 | local_irq_restore(flags); |
2103 | } | 2111 | } |
2104 | console_locked = 0; | 2112 | console_locked = 0; |
2113 | mutex_release(&console_lock_dep_map, 1, _RET_IP_); | ||
2105 | 2114 | ||
2106 | /* Release the exclusive_console once it is used */ | 2115 | /* Release the exclusive_console once it is used */ |
2107 | if (unlikely(exclusive_console)) | 2116 | if (unlikely(exclusive_console)) |
diff --git a/kernel/relay.c b/kernel/relay.c index e8cd2027abbd..01ab081ac53a 100644 --- a/kernel/relay.c +++ b/kernel/relay.c | |||
@@ -1139,7 +1139,7 @@ static ssize_t relay_file_read_subbufs(struct file *filp, loff_t *ppos, | |||
1139 | if (!desc->count) | 1139 | if (!desc->count) |
1140 | return 0; | 1140 | return 0; |
1141 | 1141 | ||
1142 | mutex_lock(&filp->f_path.dentry->d_inode->i_mutex); | 1142 | mutex_lock(&file_inode(filp)->i_mutex); |
1143 | do { | 1143 | do { |
1144 | if (!relay_file_read_avail(buf, *ppos)) | 1144 | if (!relay_file_read_avail(buf, *ppos)) |
1145 | break; | 1145 | break; |
@@ -1159,7 +1159,7 @@ static ssize_t relay_file_read_subbufs(struct file *filp, loff_t *ppos, | |||
1159 | *ppos = relay_file_read_end_pos(buf, read_start, ret); | 1159 | *ppos = relay_file_read_end_pos(buf, read_start, ret); |
1160 | } | 1160 | } |
1161 | } while (desc->count && ret); | 1161 | } while (desc->count && ret); |
1162 | mutex_unlock(&filp->f_path.dentry->d_inode->i_mutex); | 1162 | mutex_unlock(&file_inode(filp)->i_mutex); |
1163 | 1163 | ||
1164 | return desc->written; | 1164 | return desc->written; |
1165 | } | 1165 | } |
diff --git a/kernel/sched/auto_group.c b/kernel/sched/auto_group.c index 0984a21076a3..64de5f8b0c9e 100644 --- a/kernel/sched/auto_group.c +++ b/kernel/sched/auto_group.c | |||
@@ -35,6 +35,7 @@ static inline void autogroup_destroy(struct kref *kref) | |||
35 | ag->tg->rt_se = NULL; | 35 | ag->tg->rt_se = NULL; |
36 | ag->tg->rt_rq = NULL; | 36 | ag->tg->rt_rq = NULL; |
37 | #endif | 37 | #endif |
38 | sched_offline_group(ag->tg); | ||
38 | sched_destroy_group(ag->tg); | 39 | sched_destroy_group(ag->tg); |
39 | } | 40 | } |
40 | 41 | ||
@@ -76,6 +77,8 @@ static inline struct autogroup *autogroup_create(void) | |||
76 | if (IS_ERR(tg)) | 77 | if (IS_ERR(tg)) |
77 | goto out_free; | 78 | goto out_free; |
78 | 79 | ||
80 | sched_online_group(tg, &root_task_group); | ||
81 | |||
79 | kref_init(&ag->kref); | 82 | kref_init(&ag->kref); |
80 | init_rwsem(&ag->lock); | 83 | init_rwsem(&ag->lock); |
81 | ag->id = atomic_inc_return(&autogroup_seq_nr); | 84 | ag->id = atomic_inc_return(&autogroup_seq_nr); |
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 03d7784b7bd2..7f12624a393c 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c | |||
@@ -1132,18 +1132,28 @@ EXPORT_SYMBOL_GPL(kick_process); | |||
1132 | */ | 1132 | */ |
1133 | static int select_fallback_rq(int cpu, struct task_struct *p) | 1133 | static int select_fallback_rq(int cpu, struct task_struct *p) |
1134 | { | 1134 | { |
1135 | const struct cpumask *nodemask = cpumask_of_node(cpu_to_node(cpu)); | 1135 | int nid = cpu_to_node(cpu); |
1136 | const struct cpumask *nodemask = NULL; | ||
1136 | enum { cpuset, possible, fail } state = cpuset; | 1137 | enum { cpuset, possible, fail } state = cpuset; |
1137 | int dest_cpu; | 1138 | int dest_cpu; |
1138 | 1139 | ||
1139 | /* Look for allowed, online CPU in same node. */ | 1140 | /* |
1140 | for_each_cpu(dest_cpu, nodemask) { | 1141 | * If the node that the cpu is on has been offlined, cpu_to_node() |
1141 | if (!cpu_online(dest_cpu)) | 1142 | * will return -1. There is no cpu on the node, and we should |
1142 | continue; | 1143 | * select the cpu on the other node. |
1143 | if (!cpu_active(dest_cpu)) | 1144 | */ |
1144 | continue; | 1145 | if (nid != -1) { |
1145 | if (cpumask_test_cpu(dest_cpu, tsk_cpus_allowed(p))) | 1146 | nodemask = cpumask_of_node(nid); |
1146 | return dest_cpu; | 1147 | |
1148 | /* Look for allowed, online CPU in same node. */ | ||
1149 | for_each_cpu(dest_cpu, nodemask) { | ||
1150 | if (!cpu_online(dest_cpu)) | ||
1151 | continue; | ||
1152 | if (!cpu_active(dest_cpu)) | ||
1153 | continue; | ||
1154 | if (cpumask_test_cpu(dest_cpu, tsk_cpus_allowed(p))) | ||
1155 | return dest_cpu; | ||
1156 | } | ||
1147 | } | 1157 | } |
1148 | 1158 | ||
1149 | for (;;) { | 1159 | for (;;) { |
@@ -1742,9 +1752,8 @@ EXPORT_SYMBOL_GPL(preempt_notifier_unregister); | |||
1742 | static void fire_sched_in_preempt_notifiers(struct task_struct *curr) | 1752 | static void fire_sched_in_preempt_notifiers(struct task_struct *curr) |
1743 | { | 1753 | { |
1744 | struct preempt_notifier *notifier; | 1754 | struct preempt_notifier *notifier; |
1745 | struct hlist_node *node; | ||
1746 | 1755 | ||
1747 | hlist_for_each_entry(notifier, node, &curr->preempt_notifiers, link) | 1756 | hlist_for_each_entry(notifier, &curr->preempt_notifiers, link) |
1748 | notifier->ops->sched_in(notifier, raw_smp_processor_id()); | 1757 | notifier->ops->sched_in(notifier, raw_smp_processor_id()); |
1749 | } | 1758 | } |
1750 | 1759 | ||
@@ -1753,9 +1762,8 @@ fire_sched_out_preempt_notifiers(struct task_struct *curr, | |||
1753 | struct task_struct *next) | 1762 | struct task_struct *next) |
1754 | { | 1763 | { |
1755 | struct preempt_notifier *notifier; | 1764 | struct preempt_notifier *notifier; |
1756 | struct hlist_node *node; | ||
1757 | 1765 | ||
1758 | hlist_for_each_entry(notifier, node, &curr->preempt_notifiers, link) | 1766 | hlist_for_each_entry(notifier, &curr->preempt_notifiers, link) |
1759 | notifier->ops->sched_out(notifier, next); | 1767 | notifier->ops->sched_out(notifier, next); |
1760 | } | 1768 | } |
1761 | 1769 | ||
@@ -1969,11 +1977,10 @@ context_switch(struct rq *rq, struct task_struct *prev, | |||
1969 | } | 1977 | } |
1970 | 1978 | ||
1971 | /* | 1979 | /* |
1972 | * nr_running, nr_uninterruptible and nr_context_switches: | 1980 | * nr_running and nr_context_switches: |
1973 | * | 1981 | * |
1974 | * externally visible scheduler statistics: current number of runnable | 1982 | * externally visible scheduler statistics: current number of runnable |
1975 | * threads, current number of uninterruptible-sleeping threads, total | 1983 | * threads, total number of context switches performed since bootup. |
1976 | * number of context switches performed since bootup. | ||
1977 | */ | 1984 | */ |
1978 | unsigned long nr_running(void) | 1985 | unsigned long nr_running(void) |
1979 | { | 1986 | { |
@@ -1985,23 +1992,6 @@ unsigned long nr_running(void) | |||
1985 | return sum; | 1992 | return sum; |
1986 | } | 1993 | } |
1987 | 1994 | ||
1988 | unsigned long nr_uninterruptible(void) | ||
1989 | { | ||
1990 | unsigned long i, sum = 0; | ||
1991 | |||
1992 | for_each_possible_cpu(i) | ||
1993 | sum += cpu_rq(i)->nr_uninterruptible; | ||
1994 | |||
1995 | /* | ||
1996 | * Since we read the counters lockless, it might be slightly | ||
1997 | * inaccurate. Do not allow it to go below zero though: | ||
1998 | */ | ||
1999 | if (unlikely((long)sum < 0)) | ||
2000 | sum = 0; | ||
2001 | |||
2002 | return sum; | ||
2003 | } | ||
2004 | |||
2005 | unsigned long long nr_context_switches(void) | 1995 | unsigned long long nr_context_switches(void) |
2006 | { | 1996 | { |
2007 | int i; | 1997 | int i; |
@@ -2786,7 +2776,7 @@ static noinline void __schedule_bug(struct task_struct *prev) | |||
2786 | if (irqs_disabled()) | 2776 | if (irqs_disabled()) |
2787 | print_irqtrace_events(prev); | 2777 | print_irqtrace_events(prev); |
2788 | dump_stack(); | 2778 | dump_stack(); |
2789 | add_taint(TAINT_WARN); | 2779 | add_taint(TAINT_WARN, LOCKDEP_STILL_OK); |
2790 | } | 2780 | } |
2791 | 2781 | ||
2792 | /* | 2782 | /* |
@@ -3268,7 +3258,8 @@ void complete_all(struct completion *x) | |||
3268 | EXPORT_SYMBOL(complete_all); | 3258 | EXPORT_SYMBOL(complete_all); |
3269 | 3259 | ||
3270 | static inline long __sched | 3260 | static inline long __sched |
3271 | do_wait_for_common(struct completion *x, long timeout, int state) | 3261 | do_wait_for_common(struct completion *x, |
3262 | long (*action)(long), long timeout, int state) | ||
3272 | { | 3263 | { |
3273 | if (!x->done) { | 3264 | if (!x->done) { |
3274 | DECLARE_WAITQUEUE(wait, current); | 3265 | DECLARE_WAITQUEUE(wait, current); |
@@ -3281,7 +3272,7 @@ do_wait_for_common(struct completion *x, long timeout, int state) | |||
3281 | } | 3272 | } |
3282 | __set_current_state(state); | 3273 | __set_current_state(state); |
3283 | spin_unlock_irq(&x->wait.lock); | 3274 | spin_unlock_irq(&x->wait.lock); |
3284 | timeout = schedule_timeout(timeout); | 3275 | timeout = action(timeout); |
3285 | spin_lock_irq(&x->wait.lock); | 3276 | spin_lock_irq(&x->wait.lock); |
3286 | } while (!x->done && timeout); | 3277 | } while (!x->done && timeout); |
3287 | __remove_wait_queue(&x->wait, &wait); | 3278 | __remove_wait_queue(&x->wait, &wait); |
@@ -3292,17 +3283,30 @@ do_wait_for_common(struct completion *x, long timeout, int state) | |||
3292 | return timeout ?: 1; | 3283 | return timeout ?: 1; |
3293 | } | 3284 | } |
3294 | 3285 | ||
3295 | static long __sched | 3286 | static inline long __sched |
3296 | wait_for_common(struct completion *x, long timeout, int state) | 3287 | __wait_for_common(struct completion *x, |
3288 | long (*action)(long), long timeout, int state) | ||
3297 | { | 3289 | { |
3298 | might_sleep(); | 3290 | might_sleep(); |
3299 | 3291 | ||
3300 | spin_lock_irq(&x->wait.lock); | 3292 | spin_lock_irq(&x->wait.lock); |
3301 | timeout = do_wait_for_common(x, timeout, state); | 3293 | timeout = do_wait_for_common(x, action, timeout, state); |
3302 | spin_unlock_irq(&x->wait.lock); | 3294 | spin_unlock_irq(&x->wait.lock); |
3303 | return timeout; | 3295 | return timeout; |
3304 | } | 3296 | } |
3305 | 3297 | ||
3298 | static long __sched | ||
3299 | wait_for_common(struct completion *x, long timeout, int state) | ||
3300 | { | ||
3301 | return __wait_for_common(x, schedule_timeout, timeout, state); | ||
3302 | } | ||
3303 | |||
3304 | static long __sched | ||
3305 | wait_for_common_io(struct completion *x, long timeout, int state) | ||
3306 | { | ||
3307 | return __wait_for_common(x, io_schedule_timeout, timeout, state); | ||
3308 | } | ||
3309 | |||
3306 | /** | 3310 | /** |
3307 | * wait_for_completion: - waits for completion of a task | 3311 | * wait_for_completion: - waits for completion of a task |
3308 | * @x: holds the state of this particular completion | 3312 | * @x: holds the state of this particular completion |
@@ -3339,6 +3343,39 @@ wait_for_completion_timeout(struct completion *x, unsigned long timeout) | |||
3339 | EXPORT_SYMBOL(wait_for_completion_timeout); | 3343 | EXPORT_SYMBOL(wait_for_completion_timeout); |
3340 | 3344 | ||
3341 | /** | 3345 | /** |
3346 | * wait_for_completion_io: - waits for completion of a task | ||
3347 | * @x: holds the state of this particular completion | ||
3348 | * | ||
3349 | * This waits to be signaled for completion of a specific task. It is NOT | ||
3350 | * interruptible and there is no timeout. The caller is accounted as waiting | ||
3351 | * for IO. | ||
3352 | */ | ||
3353 | void __sched wait_for_completion_io(struct completion *x) | ||
3354 | { | ||
3355 | wait_for_common_io(x, MAX_SCHEDULE_TIMEOUT, TASK_UNINTERRUPTIBLE); | ||
3356 | } | ||
3357 | EXPORT_SYMBOL(wait_for_completion_io); | ||
3358 | |||
3359 | /** | ||
3360 | * wait_for_completion_io_timeout: - waits for completion of a task (w/timeout) | ||
3361 | * @x: holds the state of this particular completion | ||
3362 | * @timeout: timeout value in jiffies | ||
3363 | * | ||
3364 | * This waits for either a completion of a specific task to be signaled or for a | ||
3365 | * specified timeout to expire. The timeout is in jiffies. It is not | ||
3366 | * interruptible. The caller is accounted as waiting for IO. | ||
3367 | * | ||
3368 | * The return value is 0 if timed out, and positive (at least 1, or number of | ||
3369 | * jiffies left till timeout) if completed. | ||
3370 | */ | ||
3371 | unsigned long __sched | ||
3372 | wait_for_completion_io_timeout(struct completion *x, unsigned long timeout) | ||
3373 | { | ||
3374 | return wait_for_common_io(x, timeout, TASK_UNINTERRUPTIBLE); | ||
3375 | } | ||
3376 | EXPORT_SYMBOL(wait_for_completion_io_timeout); | ||
3377 | |||
3378 | /** | ||
3342 | * wait_for_completion_interruptible: - waits for completion of a task (w/intr) | 3379 | * wait_for_completion_interruptible: - waits for completion of a task (w/intr) |
3343 | * @x: holds the state of this particular completion | 3380 | * @x: holds the state of this particular completion |
3344 | * | 3381 | * |
@@ -4364,7 +4401,10 @@ EXPORT_SYMBOL(yield); | |||
4364 | * It's the caller's job to ensure that the target task struct | 4401 | * It's the caller's job to ensure that the target task struct |
4365 | * can't go away on us before we can do any checks. | 4402 | * can't go away on us before we can do any checks. |
4366 | * | 4403 | * |
4367 | * Returns true if we indeed boosted the target task. | 4404 | * Returns: |
4405 | * true (>0) if we indeed boosted the target task. | ||
4406 | * false (0) if we failed to boost the target. | ||
4407 | * -ESRCH if there's no task to yield to. | ||
4368 | */ | 4408 | */ |
4369 | bool __sched yield_to(struct task_struct *p, bool preempt) | 4409 | bool __sched yield_to(struct task_struct *p, bool preempt) |
4370 | { | 4410 | { |
@@ -4378,6 +4418,15 @@ bool __sched yield_to(struct task_struct *p, bool preempt) | |||
4378 | 4418 | ||
4379 | again: | 4419 | again: |
4380 | p_rq = task_rq(p); | 4420 | p_rq = task_rq(p); |
4421 | /* | ||
4422 | * If we're the only runnable task on the rq and target rq also | ||
4423 | * has only one task, there's absolutely no point in yielding. | ||
4424 | */ | ||
4425 | if (rq->nr_running == 1 && p_rq->nr_running == 1) { | ||
4426 | yielded = -ESRCH; | ||
4427 | goto out_irq; | ||
4428 | } | ||
4429 | |||
4381 | double_rq_lock(rq, p_rq); | 4430 | double_rq_lock(rq, p_rq); |
4382 | while (task_rq(p) != p_rq) { | 4431 | while (task_rq(p) != p_rq) { |
4383 | double_rq_unlock(rq, p_rq); | 4432 | double_rq_unlock(rq, p_rq); |
@@ -4385,13 +4434,13 @@ again: | |||
4385 | } | 4434 | } |
4386 | 4435 | ||
4387 | if (!curr->sched_class->yield_to_task) | 4436 | if (!curr->sched_class->yield_to_task) |
4388 | goto out; | 4437 | goto out_unlock; |
4389 | 4438 | ||
4390 | if (curr->sched_class != p->sched_class) | 4439 | if (curr->sched_class != p->sched_class) |
4391 | goto out; | 4440 | goto out_unlock; |
4392 | 4441 | ||
4393 | if (task_running(p_rq, p) || p->state) | 4442 | if (task_running(p_rq, p) || p->state) |
4394 | goto out; | 4443 | goto out_unlock; |
4395 | 4444 | ||
4396 | yielded = curr->sched_class->yield_to_task(rq, p, preempt); | 4445 | yielded = curr->sched_class->yield_to_task(rq, p, preempt); |
4397 | if (yielded) { | 4446 | if (yielded) { |
@@ -4404,11 +4453,12 @@ again: | |||
4404 | resched_task(p_rq->curr); | 4453 | resched_task(p_rq->curr); |
4405 | } | 4454 | } |
4406 | 4455 | ||
4407 | out: | 4456 | out_unlock: |
4408 | double_rq_unlock(rq, p_rq); | 4457 | double_rq_unlock(rq, p_rq); |
4458 | out_irq: | ||
4409 | local_irq_restore(flags); | 4459 | local_irq_restore(flags); |
4410 | 4460 | ||
4411 | if (yielded) | 4461 | if (yielded > 0) |
4412 | schedule(); | 4462 | schedule(); |
4413 | 4463 | ||
4414 | return yielded; | 4464 | return yielded; |
@@ -7161,7 +7211,6 @@ static void free_sched_group(struct task_group *tg) | |||
7161 | struct task_group *sched_create_group(struct task_group *parent) | 7211 | struct task_group *sched_create_group(struct task_group *parent) |
7162 | { | 7212 | { |
7163 | struct task_group *tg; | 7213 | struct task_group *tg; |
7164 | unsigned long flags; | ||
7165 | 7214 | ||
7166 | tg = kzalloc(sizeof(*tg), GFP_KERNEL); | 7215 | tg = kzalloc(sizeof(*tg), GFP_KERNEL); |
7167 | if (!tg) | 7216 | if (!tg) |
@@ -7173,6 +7222,17 @@ struct task_group *sched_create_group(struct task_group *parent) | |||
7173 | if (!alloc_rt_sched_group(tg, parent)) | 7222 | if (!alloc_rt_sched_group(tg, parent)) |
7174 | goto err; | 7223 | goto err; |
7175 | 7224 | ||
7225 | return tg; | ||
7226 | |||
7227 | err: | ||
7228 | free_sched_group(tg); | ||
7229 | return ERR_PTR(-ENOMEM); | ||
7230 | } | ||
7231 | |||
7232 | void sched_online_group(struct task_group *tg, struct task_group *parent) | ||
7233 | { | ||
7234 | unsigned long flags; | ||
7235 | |||
7176 | spin_lock_irqsave(&task_group_lock, flags); | 7236 | spin_lock_irqsave(&task_group_lock, flags); |
7177 | list_add_rcu(&tg->list, &task_groups); | 7237 | list_add_rcu(&tg->list, &task_groups); |
7178 | 7238 | ||
@@ -7182,12 +7242,6 @@ struct task_group *sched_create_group(struct task_group *parent) | |||
7182 | INIT_LIST_HEAD(&tg->children); | 7242 | INIT_LIST_HEAD(&tg->children); |
7183 | list_add_rcu(&tg->siblings, &parent->children); | 7243 | list_add_rcu(&tg->siblings, &parent->children); |
7184 | spin_unlock_irqrestore(&task_group_lock, flags); | 7244 | spin_unlock_irqrestore(&task_group_lock, flags); |
7185 | |||
7186 | return tg; | ||
7187 | |||
7188 | err: | ||
7189 | free_sched_group(tg); | ||
7190 | return ERR_PTR(-ENOMEM); | ||
7191 | } | 7245 | } |
7192 | 7246 | ||
7193 | /* rcu callback to free various structures associated with a task group */ | 7247 | /* rcu callback to free various structures associated with a task group */ |
@@ -7200,6 +7254,12 @@ static void free_sched_group_rcu(struct rcu_head *rhp) | |||
7200 | /* Destroy runqueue etc associated with a task group */ | 7254 | /* Destroy runqueue etc associated with a task group */ |
7201 | void sched_destroy_group(struct task_group *tg) | 7255 | void sched_destroy_group(struct task_group *tg) |
7202 | { | 7256 | { |
7257 | /* wait for possible concurrent references to cfs_rqs complete */ | ||
7258 | call_rcu(&tg->rcu, free_sched_group_rcu); | ||
7259 | } | ||
7260 | |||
7261 | void sched_offline_group(struct task_group *tg) | ||
7262 | { | ||
7203 | unsigned long flags; | 7263 | unsigned long flags; |
7204 | int i; | 7264 | int i; |
7205 | 7265 | ||
@@ -7211,9 +7271,6 @@ void sched_destroy_group(struct task_group *tg) | |||
7211 | list_del_rcu(&tg->list); | 7271 | list_del_rcu(&tg->list); |
7212 | list_del_rcu(&tg->siblings); | 7272 | list_del_rcu(&tg->siblings); |
7213 | spin_unlock_irqrestore(&task_group_lock, flags); | 7273 | spin_unlock_irqrestore(&task_group_lock, flags); |
7214 | |||
7215 | /* wait for possible concurrent references to cfs_rqs complete */ | ||
7216 | call_rcu(&tg->rcu, free_sched_group_rcu); | ||
7217 | } | 7274 | } |
7218 | 7275 | ||
7219 | /* change task's runqueue when it moves between groups. | 7276 | /* change task's runqueue when it moves between groups. |
@@ -7584,6 +7641,19 @@ static struct cgroup_subsys_state *cpu_cgroup_css_alloc(struct cgroup *cgrp) | |||
7584 | return &tg->css; | 7641 | return &tg->css; |
7585 | } | 7642 | } |
7586 | 7643 | ||
7644 | static int cpu_cgroup_css_online(struct cgroup *cgrp) | ||
7645 | { | ||
7646 | struct task_group *tg = cgroup_tg(cgrp); | ||
7647 | struct task_group *parent; | ||
7648 | |||
7649 | if (!cgrp->parent) | ||
7650 | return 0; | ||
7651 | |||
7652 | parent = cgroup_tg(cgrp->parent); | ||
7653 | sched_online_group(tg, parent); | ||
7654 | return 0; | ||
7655 | } | ||
7656 | |||
7587 | static void cpu_cgroup_css_free(struct cgroup *cgrp) | 7657 | static void cpu_cgroup_css_free(struct cgroup *cgrp) |
7588 | { | 7658 | { |
7589 | struct task_group *tg = cgroup_tg(cgrp); | 7659 | struct task_group *tg = cgroup_tg(cgrp); |
@@ -7591,6 +7661,13 @@ static void cpu_cgroup_css_free(struct cgroup *cgrp) | |||
7591 | sched_destroy_group(tg); | 7661 | sched_destroy_group(tg); |
7592 | } | 7662 | } |
7593 | 7663 | ||
7664 | static void cpu_cgroup_css_offline(struct cgroup *cgrp) | ||
7665 | { | ||
7666 | struct task_group *tg = cgroup_tg(cgrp); | ||
7667 | |||
7668 | sched_offline_group(tg); | ||
7669 | } | ||
7670 | |||
7594 | static int cpu_cgroup_can_attach(struct cgroup *cgrp, | 7671 | static int cpu_cgroup_can_attach(struct cgroup *cgrp, |
7595 | struct cgroup_taskset *tset) | 7672 | struct cgroup_taskset *tset) |
7596 | { | 7673 | { |
@@ -7946,6 +8023,8 @@ struct cgroup_subsys cpu_cgroup_subsys = { | |||
7946 | .name = "cpu", | 8023 | .name = "cpu", |
7947 | .css_alloc = cpu_cgroup_css_alloc, | 8024 | .css_alloc = cpu_cgroup_css_alloc, |
7948 | .css_free = cpu_cgroup_css_free, | 8025 | .css_free = cpu_cgroup_css_free, |
8026 | .css_online = cpu_cgroup_css_online, | ||
8027 | .css_offline = cpu_cgroup_css_offline, | ||
7949 | .can_attach = cpu_cgroup_can_attach, | 8028 | .can_attach = cpu_cgroup_can_attach, |
7950 | .attach = cpu_cgroup_attach, | 8029 | .attach = cpu_cgroup_attach, |
7951 | .exit = cpu_cgroup_exit, | 8030 | .exit = cpu_cgroup_exit, |
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index 9857329ed280..ed12cbb135f4 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c | |||
@@ -604,7 +604,7 @@ static unsigned long long vtime_delta(struct task_struct *tsk) | |||
604 | { | 604 | { |
605 | unsigned long long clock; | 605 | unsigned long long clock; |
606 | 606 | ||
607 | clock = sched_clock(); | 607 | clock = local_clock(); |
608 | if (clock < tsk->vtime_snap) | 608 | if (clock < tsk->vtime_snap) |
609 | return 0; | 609 | return 0; |
610 | 610 | ||
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c index 7ae4c4c5420e..75024a673520 100644 --- a/kernel/sched/debug.c +++ b/kernel/sched/debug.c | |||
@@ -110,13 +110,6 @@ static char *task_group_path(struct task_group *tg) | |||
110 | if (autogroup_path(tg, group_path, PATH_MAX)) | 110 | if (autogroup_path(tg, group_path, PATH_MAX)) |
111 | return group_path; | 111 | return group_path; |
112 | 112 | ||
113 | /* | ||
114 | * May be NULL if the underlying cgroup isn't fully-created yet | ||
115 | */ | ||
116 | if (!tg->css.cgroup) { | ||
117 | group_path[0] = '\0'; | ||
118 | return group_path; | ||
119 | } | ||
120 | cgroup_path(tg->css.cgroup, group_path, PATH_MAX); | 113 | cgroup_path(tg->css.cgroup, group_path, PATH_MAX); |
121 | return group_path; | 114 | return group_path; |
122 | } | 115 | } |
@@ -269,11 +262,11 @@ static void print_cpu(struct seq_file *m, int cpu) | |||
269 | { | 262 | { |
270 | unsigned int freq = cpu_khz ? : 1; | 263 | unsigned int freq = cpu_khz ? : 1; |
271 | 264 | ||
272 | SEQ_printf(m, "\ncpu#%d, %u.%03u MHz\n", | 265 | SEQ_printf(m, "cpu#%d, %u.%03u MHz\n", |
273 | cpu, freq / 1000, (freq % 1000)); | 266 | cpu, freq / 1000, (freq % 1000)); |
274 | } | 267 | } |
275 | #else | 268 | #else |
276 | SEQ_printf(m, "\ncpu#%d\n", cpu); | 269 | SEQ_printf(m, "cpu#%d\n", cpu); |
277 | #endif | 270 | #endif |
278 | 271 | ||
279 | #define P(x) \ | 272 | #define P(x) \ |
@@ -330,6 +323,7 @@ do { \ | |||
330 | print_rq(m, rq, cpu); | 323 | print_rq(m, rq, cpu); |
331 | rcu_read_unlock(); | 324 | rcu_read_unlock(); |
332 | spin_unlock_irqrestore(&sched_debug_lock, flags); | 325 | spin_unlock_irqrestore(&sched_debug_lock, flags); |
326 | SEQ_printf(m, "\n"); | ||
333 | } | 327 | } |
334 | 328 | ||
335 | static const char *sched_tunable_scaling_names[] = { | 329 | static const char *sched_tunable_scaling_names[] = { |
@@ -338,11 +332,10 @@ static const char *sched_tunable_scaling_names[] = { | |||
338 | "linear" | 332 | "linear" |
339 | }; | 333 | }; |
340 | 334 | ||
341 | static int sched_debug_show(struct seq_file *m, void *v) | 335 | static void sched_debug_header(struct seq_file *m) |
342 | { | 336 | { |
343 | u64 ktime, sched_clk, cpu_clk; | 337 | u64 ktime, sched_clk, cpu_clk; |
344 | unsigned long flags; | 338 | unsigned long flags; |
345 | int cpu; | ||
346 | 339 | ||
347 | local_irq_save(flags); | 340 | local_irq_save(flags); |
348 | ktime = ktime_to_ns(ktime_get()); | 341 | ktime = ktime_to_ns(ktime_get()); |
@@ -384,33 +377,101 @@ static int sched_debug_show(struct seq_file *m, void *v) | |||
384 | #undef PN | 377 | #undef PN |
385 | #undef P | 378 | #undef P |
386 | 379 | ||
387 | SEQ_printf(m, " .%-40s: %d (%s)\n", "sysctl_sched_tunable_scaling", | 380 | SEQ_printf(m, " .%-40s: %d (%s)\n", |
381 | "sysctl_sched_tunable_scaling", | ||
388 | sysctl_sched_tunable_scaling, | 382 | sysctl_sched_tunable_scaling, |
389 | sched_tunable_scaling_names[sysctl_sched_tunable_scaling]); | 383 | sched_tunable_scaling_names[sysctl_sched_tunable_scaling]); |
384 | SEQ_printf(m, "\n"); | ||
385 | } | ||
390 | 386 | ||
391 | for_each_online_cpu(cpu) | 387 | static int sched_debug_show(struct seq_file *m, void *v) |
392 | print_cpu(m, cpu); | 388 | { |
389 | int cpu = (unsigned long)(v - 2); | ||
393 | 390 | ||
394 | SEQ_printf(m, "\n"); | 391 | if (cpu != -1) |
392 | print_cpu(m, cpu); | ||
393 | else | ||
394 | sched_debug_header(m); | ||
395 | 395 | ||
396 | return 0; | 396 | return 0; |
397 | } | 397 | } |
398 | 398 | ||
399 | void sysrq_sched_debug_show(void) | 399 | void sysrq_sched_debug_show(void) |
400 | { | 400 | { |
401 | sched_debug_show(NULL, NULL); | 401 | int cpu; |
402 | |||
403 | sched_debug_header(NULL); | ||
404 | for_each_online_cpu(cpu) | ||
405 | print_cpu(NULL, cpu); | ||
406 | |||
407 | } | ||
408 | |||
409 | /* | ||
410 | * This itererator needs some explanation. | ||
411 | * It returns 1 for the header position. | ||
412 | * This means 2 is cpu 0. | ||
413 | * In a hotplugged system some cpus, including cpu 0, may be missing so we have | ||
414 | * to use cpumask_* to iterate over the cpus. | ||
415 | */ | ||
416 | static void *sched_debug_start(struct seq_file *file, loff_t *offset) | ||
417 | { | ||
418 | unsigned long n = *offset; | ||
419 | |||
420 | if (n == 0) | ||
421 | return (void *) 1; | ||
422 | |||
423 | n--; | ||
424 | |||
425 | if (n > 0) | ||
426 | n = cpumask_next(n - 1, cpu_online_mask); | ||
427 | else | ||
428 | n = cpumask_first(cpu_online_mask); | ||
429 | |||
430 | *offset = n + 1; | ||
431 | |||
432 | if (n < nr_cpu_ids) | ||
433 | return (void *)(unsigned long)(n + 2); | ||
434 | return NULL; | ||
435 | } | ||
436 | |||
437 | static void *sched_debug_next(struct seq_file *file, void *data, loff_t *offset) | ||
438 | { | ||
439 | (*offset)++; | ||
440 | return sched_debug_start(file, offset); | ||
441 | } | ||
442 | |||
443 | static void sched_debug_stop(struct seq_file *file, void *data) | ||
444 | { | ||
445 | } | ||
446 | |||
447 | static const struct seq_operations sched_debug_sops = { | ||
448 | .start = sched_debug_start, | ||
449 | .next = sched_debug_next, | ||
450 | .stop = sched_debug_stop, | ||
451 | .show = sched_debug_show, | ||
452 | }; | ||
453 | |||
454 | static int sched_debug_release(struct inode *inode, struct file *file) | ||
455 | { | ||
456 | seq_release(inode, file); | ||
457 | |||
458 | return 0; | ||
402 | } | 459 | } |
403 | 460 | ||
404 | static int sched_debug_open(struct inode *inode, struct file *filp) | 461 | static int sched_debug_open(struct inode *inode, struct file *filp) |
405 | { | 462 | { |
406 | return single_open(filp, sched_debug_show, NULL); | 463 | int ret = 0; |
464 | |||
465 | ret = seq_open(filp, &sched_debug_sops); | ||
466 | |||
467 | return ret; | ||
407 | } | 468 | } |
408 | 469 | ||
409 | static const struct file_operations sched_debug_fops = { | 470 | static const struct file_operations sched_debug_fops = { |
410 | .open = sched_debug_open, | 471 | .open = sched_debug_open, |
411 | .read = seq_read, | 472 | .read = seq_read, |
412 | .llseek = seq_lseek, | 473 | .llseek = seq_lseek, |
413 | .release = single_release, | 474 | .release = sched_debug_release, |
414 | }; | 475 | }; |
415 | 476 | ||
416 | static int __init init_sched_debug_procfs(void) | 477 | static int __init init_sched_debug_procfs(void) |
diff --git a/kernel/sched/stats.c b/kernel/sched/stats.c index 903ffa9e8872..e036eda1a9c9 100644 --- a/kernel/sched/stats.c +++ b/kernel/sched/stats.c | |||
@@ -21,14 +21,17 @@ static int show_schedstat(struct seq_file *seq, void *v) | |||
21 | if (mask_str == NULL) | 21 | if (mask_str == NULL) |
22 | return -ENOMEM; | 22 | return -ENOMEM; |
23 | 23 | ||
24 | seq_printf(seq, "version %d\n", SCHEDSTAT_VERSION); | 24 | if (v == (void *)1) { |
25 | seq_printf(seq, "timestamp %lu\n", jiffies); | 25 | seq_printf(seq, "version %d\n", SCHEDSTAT_VERSION); |
26 | for_each_online_cpu(cpu) { | 26 | seq_printf(seq, "timestamp %lu\n", jiffies); |
27 | struct rq *rq = cpu_rq(cpu); | 27 | } else { |
28 | struct rq *rq; | ||
28 | #ifdef CONFIG_SMP | 29 | #ifdef CONFIG_SMP |
29 | struct sched_domain *sd; | 30 | struct sched_domain *sd; |
30 | int dcount = 0; | 31 | int dcount = 0; |
31 | #endif | 32 | #endif |
33 | cpu = (unsigned long)(v - 2); | ||
34 | rq = cpu_rq(cpu); | ||
32 | 35 | ||
33 | /* runqueue-specific stats */ | 36 | /* runqueue-specific stats */ |
34 | seq_printf(seq, | 37 | seq_printf(seq, |
@@ -77,30 +80,66 @@ static int show_schedstat(struct seq_file *seq, void *v) | |||
77 | return 0; | 80 | return 0; |
78 | } | 81 | } |
79 | 82 | ||
80 | static int schedstat_open(struct inode *inode, struct file *file) | 83 | /* |
84 | * This itererator needs some explanation. | ||
85 | * It returns 1 for the header position. | ||
86 | * This means 2 is cpu 0. | ||
87 | * In a hotplugged system some cpus, including cpu 0, may be missing so we have | ||
88 | * to use cpumask_* to iterate over the cpus. | ||
89 | */ | ||
90 | static void *schedstat_start(struct seq_file *file, loff_t *offset) | ||
81 | { | 91 | { |
82 | unsigned int size = PAGE_SIZE * (1 + num_online_cpus() / 32); | 92 | unsigned long n = *offset; |
83 | char *buf = kmalloc(size, GFP_KERNEL); | ||
84 | struct seq_file *m; | ||
85 | int res; | ||
86 | 93 | ||
87 | if (!buf) | 94 | if (n == 0) |
88 | return -ENOMEM; | 95 | return (void *) 1; |
89 | res = single_open(file, show_schedstat, NULL); | 96 | |
90 | if (!res) { | 97 | n--; |
91 | m = file->private_data; | 98 | |
92 | m->buf = buf; | 99 | if (n > 0) |
93 | m->size = size; | 100 | n = cpumask_next(n - 1, cpu_online_mask); |
94 | } else | 101 | else |
95 | kfree(buf); | 102 | n = cpumask_first(cpu_online_mask); |
96 | return res; | 103 | |
104 | *offset = n + 1; | ||
105 | |||
106 | if (n < nr_cpu_ids) | ||
107 | return (void *)(unsigned long)(n + 2); | ||
108 | return NULL; | ||
109 | } | ||
110 | |||
111 | static void *schedstat_next(struct seq_file *file, void *data, loff_t *offset) | ||
112 | { | ||
113 | (*offset)++; | ||
114 | return schedstat_start(file, offset); | ||
115 | } | ||
116 | |||
117 | static void schedstat_stop(struct seq_file *file, void *data) | ||
118 | { | ||
119 | } | ||
120 | |||
121 | static const struct seq_operations schedstat_sops = { | ||
122 | .start = schedstat_start, | ||
123 | .next = schedstat_next, | ||
124 | .stop = schedstat_stop, | ||
125 | .show = show_schedstat, | ||
126 | }; | ||
127 | |||
128 | static int schedstat_open(struct inode *inode, struct file *file) | ||
129 | { | ||
130 | return seq_open(file, &schedstat_sops); | ||
97 | } | 131 | } |
98 | 132 | ||
133 | static int schedstat_release(struct inode *inode, struct file *file) | ||
134 | { | ||
135 | return 0; | ||
136 | }; | ||
137 | |||
99 | static const struct file_operations proc_schedstat_operations = { | 138 | static const struct file_operations proc_schedstat_operations = { |
100 | .open = schedstat_open, | 139 | .open = schedstat_open, |
101 | .read = seq_read, | 140 | .read = seq_read, |
102 | .llseek = seq_lseek, | 141 | .llseek = seq_lseek, |
103 | .release = single_release, | 142 | .release = schedstat_release, |
104 | }; | 143 | }; |
105 | 144 | ||
106 | static int __init proc_schedstat_init(void) | 145 | static int __init proc_schedstat_init(void) |
diff --git a/kernel/signal.c b/kernel/signal.c index 7f82adbad480..dd72567767d9 100644 --- a/kernel/signal.c +++ b/kernel/signal.c | |||
@@ -485,6 +485,9 @@ flush_signal_handlers(struct task_struct *t, int force_default) | |||
485 | if (force_default || ka->sa.sa_handler != SIG_IGN) | 485 | if (force_default || ka->sa.sa_handler != SIG_IGN) |
486 | ka->sa.sa_handler = SIG_DFL; | 486 | ka->sa.sa_handler = SIG_DFL; |
487 | ka->sa.sa_flags = 0; | 487 | ka->sa.sa_flags = 0; |
488 | #ifdef __ARCH_HAS_SA_RESTORER | ||
489 | ka->sa.sa_restorer = NULL; | ||
490 | #endif | ||
488 | sigemptyset(&ka->sa.sa_mask); | 491 | sigemptyset(&ka->sa.sa_mask); |
489 | ka++; | 492 | ka++; |
490 | } | 493 | } |
@@ -1157,11 +1160,11 @@ static int send_signal(int sig, struct siginfo *info, struct task_struct *t, | |||
1157 | static void print_fatal_signal(int signr) | 1160 | static void print_fatal_signal(int signr) |
1158 | { | 1161 | { |
1159 | struct pt_regs *regs = signal_pt_regs(); | 1162 | struct pt_regs *regs = signal_pt_regs(); |
1160 | printk("%s/%d: potentially unexpected fatal signal %d.\n", | 1163 | printk(KERN_INFO "%s/%d: potentially unexpected fatal signal %d.\n", |
1161 | current->comm, task_pid_nr(current), signr); | 1164 | current->comm, task_pid_nr(current), signr); |
1162 | 1165 | ||
1163 | #if defined(__i386__) && !defined(__arch_um__) | 1166 | #if defined(__i386__) && !defined(__arch_um__) |
1164 | printk("code at %08lx: ", regs->ip); | 1167 | printk(KERN_INFO "code at %08lx: ", regs->ip); |
1165 | { | 1168 | { |
1166 | int i; | 1169 | int i; |
1167 | for (i = 0; i < 16; i++) { | 1170 | for (i = 0; i < 16; i++) { |
@@ -1169,11 +1172,11 @@ static void print_fatal_signal(int signr) | |||
1169 | 1172 | ||
1170 | if (get_user(insn, (unsigned char *)(regs->ip + i))) | 1173 | if (get_user(insn, (unsigned char *)(regs->ip + i))) |
1171 | break; | 1174 | break; |
1172 | printk("%02x ", insn); | 1175 | printk(KERN_CONT "%02x ", insn); |
1173 | } | 1176 | } |
1174 | } | 1177 | } |
1178 | printk(KERN_CONT "\n"); | ||
1175 | #endif | 1179 | #endif |
1176 | printk("\n"); | ||
1177 | preempt_disable(); | 1180 | preempt_disable(); |
1178 | show_regs(regs); | 1181 | show_regs(regs); |
1179 | preempt_enable(); | 1182 | preempt_enable(); |
@@ -2399,6 +2402,15 @@ void signal_delivered(int sig, siginfo_t *info, struct k_sigaction *ka, | |||
2399 | tracehook_signal_handler(sig, info, ka, regs, stepping); | 2402 | tracehook_signal_handler(sig, info, ka, regs, stepping); |
2400 | } | 2403 | } |
2401 | 2404 | ||
2405 | void signal_setup_done(int failed, struct ksignal *ksig, int stepping) | ||
2406 | { | ||
2407 | if (failed) | ||
2408 | force_sigsegv(ksig->sig, current); | ||
2409 | else | ||
2410 | signal_delivered(ksig->sig, &ksig->info, &ksig->ka, | ||
2411 | signal_pt_regs(), stepping); | ||
2412 | } | ||
2413 | |||
2402 | /* | 2414 | /* |
2403 | * It could be that complete_signal() picked us to notify about the | 2415 | * It could be that complete_signal() picked us to notify about the |
2404 | * group-wide signal. Other threads should be notified now to take | 2416 | * group-wide signal. Other threads should be notified now to take |
@@ -2616,40 +2628,95 @@ SYSCALL_DEFINE4(rt_sigprocmask, int, how, sigset_t __user *, nset, | |||
2616 | return 0; | 2628 | return 0; |
2617 | } | 2629 | } |
2618 | 2630 | ||
2619 | long do_sigpending(void __user *set, unsigned long sigsetsize) | 2631 | #ifdef CONFIG_COMPAT |
2632 | COMPAT_SYSCALL_DEFINE4(rt_sigprocmask, int, how, compat_sigset_t __user *, nset, | ||
2633 | compat_sigset_t __user *, oset, compat_size_t, sigsetsize) | ||
2620 | { | 2634 | { |
2621 | long error = -EINVAL; | 2635 | #ifdef __BIG_ENDIAN |
2622 | sigset_t pending; | 2636 | sigset_t old_set = current->blocked; |
2637 | |||
2638 | /* XXX: Don't preclude handling different sized sigset_t's. */ | ||
2639 | if (sigsetsize != sizeof(sigset_t)) | ||
2640 | return -EINVAL; | ||
2641 | |||
2642 | if (nset) { | ||
2643 | compat_sigset_t new32; | ||
2644 | sigset_t new_set; | ||
2645 | int error; | ||
2646 | if (copy_from_user(&new32, nset, sizeof(compat_sigset_t))) | ||
2647 | return -EFAULT; | ||
2648 | |||
2649 | sigset_from_compat(&new_set, &new32); | ||
2650 | sigdelsetmask(&new_set, sigmask(SIGKILL)|sigmask(SIGSTOP)); | ||
2651 | |||
2652 | error = sigprocmask(how, &new_set, NULL); | ||
2653 | if (error) | ||
2654 | return error; | ||
2655 | } | ||
2656 | if (oset) { | ||
2657 | compat_sigset_t old32; | ||
2658 | sigset_to_compat(&old32, &old_set); | ||
2659 | if (copy_to_user(oset, &old32, sizeof(compat_sigset_t))) | ||
2660 | return -EFAULT; | ||
2661 | } | ||
2662 | return 0; | ||
2663 | #else | ||
2664 | return sys_rt_sigprocmask(how, (sigset_t __user *)nset, | ||
2665 | (sigset_t __user *)oset, sigsetsize); | ||
2666 | #endif | ||
2667 | } | ||
2668 | #endif | ||
2623 | 2669 | ||
2670 | static int do_sigpending(void *set, unsigned long sigsetsize) | ||
2671 | { | ||
2624 | if (sigsetsize > sizeof(sigset_t)) | 2672 | if (sigsetsize > sizeof(sigset_t)) |
2625 | goto out; | 2673 | return -EINVAL; |
2626 | 2674 | ||
2627 | spin_lock_irq(¤t->sighand->siglock); | 2675 | spin_lock_irq(¤t->sighand->siglock); |
2628 | sigorsets(&pending, ¤t->pending.signal, | 2676 | sigorsets(set, ¤t->pending.signal, |
2629 | ¤t->signal->shared_pending.signal); | 2677 | ¤t->signal->shared_pending.signal); |
2630 | spin_unlock_irq(¤t->sighand->siglock); | 2678 | spin_unlock_irq(¤t->sighand->siglock); |
2631 | 2679 | ||
2632 | /* Outside the lock because only this thread touches it. */ | 2680 | /* Outside the lock because only this thread touches it. */ |
2633 | sigandsets(&pending, ¤t->blocked, &pending); | 2681 | sigandsets(set, ¤t->blocked, set); |
2634 | 2682 | return 0; | |
2635 | error = -EFAULT; | ||
2636 | if (!copy_to_user(set, &pending, sigsetsize)) | ||
2637 | error = 0; | ||
2638 | |||
2639 | out: | ||
2640 | return error; | ||
2641 | } | 2683 | } |
2642 | 2684 | ||
2643 | /** | 2685 | /** |
2644 | * sys_rt_sigpending - examine a pending signal that has been raised | 2686 | * sys_rt_sigpending - examine a pending signal that has been raised |
2645 | * while blocked | 2687 | * while blocked |
2646 | * @set: stores pending signals | 2688 | * @uset: stores pending signals |
2647 | * @sigsetsize: size of sigset_t type or larger | 2689 | * @sigsetsize: size of sigset_t type or larger |
2648 | */ | 2690 | */ |
2649 | SYSCALL_DEFINE2(rt_sigpending, sigset_t __user *, set, size_t, sigsetsize) | 2691 | SYSCALL_DEFINE2(rt_sigpending, sigset_t __user *, uset, size_t, sigsetsize) |
2650 | { | 2692 | { |
2651 | return do_sigpending(set, sigsetsize); | 2693 | sigset_t set; |
2694 | int err = do_sigpending(&set, sigsetsize); | ||
2695 | if (!err && copy_to_user(uset, &set, sigsetsize)) | ||
2696 | err = -EFAULT; | ||
2697 | return err; | ||
2698 | } | ||
2699 | |||
2700 | #ifdef CONFIG_COMPAT | ||
2701 | COMPAT_SYSCALL_DEFINE2(rt_sigpending, compat_sigset_t __user *, uset, | ||
2702 | compat_size_t, sigsetsize) | ||
2703 | { | ||
2704 | #ifdef __BIG_ENDIAN | ||
2705 | sigset_t set; | ||
2706 | int err = do_sigpending(&set, sigsetsize); | ||
2707 | if (!err) { | ||
2708 | compat_sigset_t set32; | ||
2709 | sigset_to_compat(&set32, &set); | ||
2710 | /* we can get here only if sigsetsize <= sizeof(set) */ | ||
2711 | if (copy_to_user(uset, &set32, sigsetsize)) | ||
2712 | err = -EFAULT; | ||
2713 | } | ||
2714 | return err; | ||
2715 | #else | ||
2716 | return sys_rt_sigpending((sigset_t __user *)uset, sigsetsize); | ||
2717 | #endif | ||
2652 | } | 2718 | } |
2719 | #endif | ||
2653 | 2720 | ||
2654 | #ifndef HAVE_ARCH_COPY_SIGINFO_TO_USER | 2721 | #ifndef HAVE_ARCH_COPY_SIGINFO_TO_USER |
2655 | 2722 | ||
@@ -2927,6 +2994,23 @@ SYSCALL_DEFINE2(tkill, pid_t, pid, int, sig) | |||
2927 | return do_tkill(0, pid, sig); | 2994 | return do_tkill(0, pid, sig); |
2928 | } | 2995 | } |
2929 | 2996 | ||
2997 | static int do_rt_sigqueueinfo(pid_t pid, int sig, siginfo_t *info) | ||
2998 | { | ||
2999 | /* Not even root can pretend to send signals from the kernel. | ||
3000 | * Nor can they impersonate a kill()/tgkill(), which adds source info. | ||
3001 | */ | ||
3002 | if ((info->si_code >= 0 || info->si_code == SI_TKILL) && | ||
3003 | (task_pid_vnr(current) != pid)) { | ||
3004 | /* We used to allow any < 0 si_code */ | ||
3005 | WARN_ON_ONCE(info->si_code < 0); | ||
3006 | return -EPERM; | ||
3007 | } | ||
3008 | info->si_signo = sig; | ||
3009 | |||
3010 | /* POSIX.1b doesn't mention process groups. */ | ||
3011 | return kill_proc_info(sig, info, pid); | ||
3012 | } | ||
3013 | |||
2930 | /** | 3014 | /** |
2931 | * sys_rt_sigqueueinfo - send signal information to a signal | 3015 | * sys_rt_sigqueueinfo - send signal information to a signal |
2932 | * @pid: the PID of the thread | 3016 | * @pid: the PID of the thread |
@@ -2937,25 +3021,26 @@ SYSCALL_DEFINE3(rt_sigqueueinfo, pid_t, pid, int, sig, | |||
2937 | siginfo_t __user *, uinfo) | 3021 | siginfo_t __user *, uinfo) |
2938 | { | 3022 | { |
2939 | siginfo_t info; | 3023 | siginfo_t info; |
2940 | |||
2941 | if (copy_from_user(&info, uinfo, sizeof(siginfo_t))) | 3024 | if (copy_from_user(&info, uinfo, sizeof(siginfo_t))) |
2942 | return -EFAULT; | 3025 | return -EFAULT; |
3026 | return do_rt_sigqueueinfo(pid, sig, &info); | ||
3027 | } | ||
2943 | 3028 | ||
2944 | /* Not even root can pretend to send signals from the kernel. | 3029 | #ifdef CONFIG_COMPAT |
2945 | * Nor can they impersonate a kill()/tgkill(), which adds source info. | 3030 | COMPAT_SYSCALL_DEFINE3(rt_sigqueueinfo, |
2946 | */ | 3031 | compat_pid_t, pid, |
2947 | if (info.si_code >= 0 || info.si_code == SI_TKILL) { | 3032 | int, sig, |
2948 | /* We used to allow any < 0 si_code */ | 3033 | struct compat_siginfo __user *, uinfo) |
2949 | WARN_ON_ONCE(info.si_code < 0); | 3034 | { |
2950 | return -EPERM; | 3035 | siginfo_t info; |
2951 | } | 3036 | int ret = copy_siginfo_from_user32(&info, uinfo); |
2952 | info.si_signo = sig; | 3037 | if (unlikely(ret)) |
2953 | 3038 | return ret; | |
2954 | /* POSIX.1b doesn't mention process groups. */ | 3039 | return do_rt_sigqueueinfo(pid, sig, &info); |
2955 | return kill_proc_info(sig, &info, pid); | ||
2956 | } | 3040 | } |
3041 | #endif | ||
2957 | 3042 | ||
2958 | long do_rt_tgsigqueueinfo(pid_t tgid, pid_t pid, int sig, siginfo_t *info) | 3043 | static int do_rt_tgsigqueueinfo(pid_t tgid, pid_t pid, int sig, siginfo_t *info) |
2959 | { | 3044 | { |
2960 | /* This is only valid for single tasks */ | 3045 | /* This is only valid for single tasks */ |
2961 | if (pid <= 0 || tgid <= 0) | 3046 | if (pid <= 0 || tgid <= 0) |
@@ -2964,7 +3049,8 @@ long do_rt_tgsigqueueinfo(pid_t tgid, pid_t pid, int sig, siginfo_t *info) | |||
2964 | /* Not even root can pretend to send signals from the kernel. | 3049 | /* Not even root can pretend to send signals from the kernel. |
2965 | * Nor can they impersonate a kill()/tgkill(), which adds source info. | 3050 | * Nor can they impersonate a kill()/tgkill(), which adds source info. |
2966 | */ | 3051 | */ |
2967 | if (info->si_code >= 0 || info->si_code == SI_TKILL) { | 3052 | if (((info->si_code >= 0 || info->si_code == SI_TKILL)) && |
3053 | (task_pid_vnr(current) != pid)) { | ||
2968 | /* We used to allow any < 0 si_code */ | 3054 | /* We used to allow any < 0 si_code */ |
2969 | WARN_ON_ONCE(info->si_code < 0); | 3055 | WARN_ON_ONCE(info->si_code < 0); |
2970 | return -EPERM; | 3056 | return -EPERM; |
@@ -2985,6 +3071,21 @@ SYSCALL_DEFINE4(rt_tgsigqueueinfo, pid_t, tgid, pid_t, pid, int, sig, | |||
2985 | return do_rt_tgsigqueueinfo(tgid, pid, sig, &info); | 3071 | return do_rt_tgsigqueueinfo(tgid, pid, sig, &info); |
2986 | } | 3072 | } |
2987 | 3073 | ||
3074 | #ifdef CONFIG_COMPAT | ||
3075 | COMPAT_SYSCALL_DEFINE4(rt_tgsigqueueinfo, | ||
3076 | compat_pid_t, tgid, | ||
3077 | compat_pid_t, pid, | ||
3078 | int, sig, | ||
3079 | struct compat_siginfo __user *, uinfo) | ||
3080 | { | ||
3081 | siginfo_t info; | ||
3082 | |||
3083 | if (copy_siginfo_from_user32(&info, uinfo)) | ||
3084 | return -EFAULT; | ||
3085 | return do_rt_tgsigqueueinfo(tgid, pid, sig, &info); | ||
3086 | } | ||
3087 | #endif | ||
3088 | |||
2988 | int do_sigaction(int sig, struct k_sigaction *act, struct k_sigaction *oact) | 3089 | int do_sigaction(int sig, struct k_sigaction *act, struct k_sigaction *oact) |
2989 | { | 3090 | { |
2990 | struct task_struct *t = current; | 3091 | struct task_struct *t = current; |
@@ -3030,7 +3131,7 @@ int do_sigaction(int sig, struct k_sigaction *act, struct k_sigaction *oact) | |||
3030 | return 0; | 3131 | return 0; |
3031 | } | 3132 | } |
3032 | 3133 | ||
3033 | int | 3134 | static int |
3034 | do_sigaltstack (const stack_t __user *uss, stack_t __user *uoss, unsigned long sp) | 3135 | do_sigaltstack (const stack_t __user *uss, stack_t __user *uoss, unsigned long sp) |
3035 | { | 3136 | { |
3036 | stack_t oss; | 3137 | stack_t oss; |
@@ -3095,12 +3196,10 @@ do_sigaltstack (const stack_t __user *uss, stack_t __user *uoss, unsigned long s | |||
3095 | out: | 3196 | out: |
3096 | return error; | 3197 | return error; |
3097 | } | 3198 | } |
3098 | #ifdef CONFIG_GENERIC_SIGALTSTACK | ||
3099 | SYSCALL_DEFINE2(sigaltstack,const stack_t __user *,uss, stack_t __user *,uoss) | 3199 | SYSCALL_DEFINE2(sigaltstack,const stack_t __user *,uss, stack_t __user *,uoss) |
3100 | { | 3200 | { |
3101 | return do_sigaltstack(uss, uoss, current_user_stack_pointer()); | 3201 | return do_sigaltstack(uss, uoss, current_user_stack_pointer()); |
3102 | } | 3202 | } |
3103 | #endif | ||
3104 | 3203 | ||
3105 | int restore_altstack(const stack_t __user *uss) | 3204 | int restore_altstack(const stack_t __user *uss) |
3106 | { | 3205 | { |
@@ -3118,7 +3217,6 @@ int __save_altstack(stack_t __user *uss, unsigned long sp) | |||
3118 | } | 3217 | } |
3119 | 3218 | ||
3120 | #ifdef CONFIG_COMPAT | 3219 | #ifdef CONFIG_COMPAT |
3121 | #ifdef CONFIG_GENERIC_SIGALTSTACK | ||
3122 | COMPAT_SYSCALL_DEFINE2(sigaltstack, | 3220 | COMPAT_SYSCALL_DEFINE2(sigaltstack, |
3123 | const compat_stack_t __user *, uss_ptr, | 3221 | const compat_stack_t __user *, uss_ptr, |
3124 | compat_stack_t __user *, uoss_ptr) | 3222 | compat_stack_t __user *, uoss_ptr) |
@@ -3168,7 +3266,6 @@ int __compat_save_altstack(compat_stack_t __user *uss, unsigned long sp) | |||
3168 | __put_user(t->sas_ss_size, &uss->ss_size); | 3266 | __put_user(t->sas_ss_size, &uss->ss_size); |
3169 | } | 3267 | } |
3170 | #endif | 3268 | #endif |
3171 | #endif | ||
3172 | 3269 | ||
3173 | #ifdef __ARCH_WANT_SYS_SIGPENDING | 3270 | #ifdef __ARCH_WANT_SYS_SIGPENDING |
3174 | 3271 | ||
@@ -3178,7 +3275,7 @@ int __compat_save_altstack(compat_stack_t __user *uss, unsigned long sp) | |||
3178 | */ | 3275 | */ |
3179 | SYSCALL_DEFINE1(sigpending, old_sigset_t __user *, set) | 3276 | SYSCALL_DEFINE1(sigpending, old_sigset_t __user *, set) |
3180 | { | 3277 | { |
3181 | return do_sigpending(set, sizeof(*set)); | 3278 | return sys_rt_sigpending((sigset_t __user *)set, sizeof(old_sigset_t)); |
3182 | } | 3279 | } |
3183 | 3280 | ||
3184 | #endif | 3281 | #endif |
@@ -3234,7 +3331,7 @@ SYSCALL_DEFINE3(sigprocmask, int, how, old_sigset_t __user *, nset, | |||
3234 | } | 3331 | } |
3235 | #endif /* __ARCH_WANT_SYS_SIGPROCMASK */ | 3332 | #endif /* __ARCH_WANT_SYS_SIGPROCMASK */ |
3236 | 3333 | ||
3237 | #ifdef __ARCH_WANT_SYS_RT_SIGACTION | 3334 | #ifndef CONFIG_ODD_RT_SIGACTION |
3238 | /** | 3335 | /** |
3239 | * sys_rt_sigaction - alter an action taken by a process | 3336 | * sys_rt_sigaction - alter an action taken by a process |
3240 | * @sig: signal to be sent | 3337 | * @sig: signal to be sent |
@@ -3268,7 +3365,132 @@ SYSCALL_DEFINE4(rt_sigaction, int, sig, | |||
3268 | out: | 3365 | out: |
3269 | return ret; | 3366 | return ret; |
3270 | } | 3367 | } |
3271 | #endif /* __ARCH_WANT_SYS_RT_SIGACTION */ | 3368 | #ifdef CONFIG_COMPAT |
3369 | COMPAT_SYSCALL_DEFINE4(rt_sigaction, int, sig, | ||
3370 | const struct compat_sigaction __user *, act, | ||
3371 | struct compat_sigaction __user *, oact, | ||
3372 | compat_size_t, sigsetsize) | ||
3373 | { | ||
3374 | struct k_sigaction new_ka, old_ka; | ||
3375 | compat_sigset_t mask; | ||
3376 | #ifdef __ARCH_HAS_SA_RESTORER | ||
3377 | compat_uptr_t restorer; | ||
3378 | #endif | ||
3379 | int ret; | ||
3380 | |||
3381 | /* XXX: Don't preclude handling different sized sigset_t's. */ | ||
3382 | if (sigsetsize != sizeof(compat_sigset_t)) | ||
3383 | return -EINVAL; | ||
3384 | |||
3385 | if (act) { | ||
3386 | compat_uptr_t handler; | ||
3387 | ret = get_user(handler, &act->sa_handler); | ||
3388 | new_ka.sa.sa_handler = compat_ptr(handler); | ||
3389 | #ifdef __ARCH_HAS_SA_RESTORER | ||
3390 | ret |= get_user(restorer, &act->sa_restorer); | ||
3391 | new_ka.sa.sa_restorer = compat_ptr(restorer); | ||
3392 | #endif | ||
3393 | ret |= copy_from_user(&mask, &act->sa_mask, sizeof(mask)); | ||
3394 | ret |= __get_user(new_ka.sa.sa_flags, &act->sa_flags); | ||
3395 | if (ret) | ||
3396 | return -EFAULT; | ||
3397 | sigset_from_compat(&new_ka.sa.sa_mask, &mask); | ||
3398 | } | ||
3399 | |||
3400 | ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL); | ||
3401 | if (!ret && oact) { | ||
3402 | sigset_to_compat(&mask, &old_ka.sa.sa_mask); | ||
3403 | ret = put_user(ptr_to_compat(old_ka.sa.sa_handler), | ||
3404 | &oact->sa_handler); | ||
3405 | ret |= copy_to_user(&oact->sa_mask, &mask, sizeof(mask)); | ||
3406 | ret |= __put_user(old_ka.sa.sa_flags, &oact->sa_flags); | ||
3407 | #ifdef __ARCH_HAS_SA_RESTORER | ||
3408 | ret |= put_user(ptr_to_compat(old_ka.sa.sa_restorer), | ||
3409 | &oact->sa_restorer); | ||
3410 | #endif | ||
3411 | } | ||
3412 | return ret; | ||
3413 | } | ||
3414 | #endif | ||
3415 | #endif /* !CONFIG_ODD_RT_SIGACTION */ | ||
3416 | |||
3417 | #ifdef CONFIG_OLD_SIGACTION | ||
3418 | SYSCALL_DEFINE3(sigaction, int, sig, | ||
3419 | const struct old_sigaction __user *, act, | ||
3420 | struct old_sigaction __user *, oact) | ||
3421 | { | ||
3422 | struct k_sigaction new_ka, old_ka; | ||
3423 | int ret; | ||
3424 | |||
3425 | if (act) { | ||
3426 | old_sigset_t mask; | ||
3427 | if (!access_ok(VERIFY_READ, act, sizeof(*act)) || | ||
3428 | __get_user(new_ka.sa.sa_handler, &act->sa_handler) || | ||
3429 | __get_user(new_ka.sa.sa_restorer, &act->sa_restorer) || | ||
3430 | __get_user(new_ka.sa.sa_flags, &act->sa_flags) || | ||
3431 | __get_user(mask, &act->sa_mask)) | ||
3432 | return -EFAULT; | ||
3433 | #ifdef __ARCH_HAS_KA_RESTORER | ||
3434 | new_ka.ka_restorer = NULL; | ||
3435 | #endif | ||
3436 | siginitset(&new_ka.sa.sa_mask, mask); | ||
3437 | } | ||
3438 | |||
3439 | ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL); | ||
3440 | |||
3441 | if (!ret && oact) { | ||
3442 | if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)) || | ||
3443 | __put_user(old_ka.sa.sa_handler, &oact->sa_handler) || | ||
3444 | __put_user(old_ka.sa.sa_restorer, &oact->sa_restorer) || | ||
3445 | __put_user(old_ka.sa.sa_flags, &oact->sa_flags) || | ||
3446 | __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask)) | ||
3447 | return -EFAULT; | ||
3448 | } | ||
3449 | |||
3450 | return ret; | ||
3451 | } | ||
3452 | #endif | ||
3453 | #ifdef CONFIG_COMPAT_OLD_SIGACTION | ||
3454 | COMPAT_SYSCALL_DEFINE3(sigaction, int, sig, | ||
3455 | const struct compat_old_sigaction __user *, act, | ||
3456 | struct compat_old_sigaction __user *, oact) | ||
3457 | { | ||
3458 | struct k_sigaction new_ka, old_ka; | ||
3459 | int ret; | ||
3460 | compat_old_sigset_t mask; | ||
3461 | compat_uptr_t handler, restorer; | ||
3462 | |||
3463 | if (act) { | ||
3464 | if (!access_ok(VERIFY_READ, act, sizeof(*act)) || | ||
3465 | __get_user(handler, &act->sa_handler) || | ||
3466 | __get_user(restorer, &act->sa_restorer) || | ||
3467 | __get_user(new_ka.sa.sa_flags, &act->sa_flags) || | ||
3468 | __get_user(mask, &act->sa_mask)) | ||
3469 | return -EFAULT; | ||
3470 | |||
3471 | #ifdef __ARCH_HAS_KA_RESTORER | ||
3472 | new_ka.ka_restorer = NULL; | ||
3473 | #endif | ||
3474 | new_ka.sa.sa_handler = compat_ptr(handler); | ||
3475 | new_ka.sa.sa_restorer = compat_ptr(restorer); | ||
3476 | siginitset(&new_ka.sa.sa_mask, mask); | ||
3477 | } | ||
3478 | |||
3479 | ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL); | ||
3480 | |||
3481 | if (!ret && oact) { | ||
3482 | if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)) || | ||
3483 | __put_user(ptr_to_compat(old_ka.sa.sa_handler), | ||
3484 | &oact->sa_handler) || | ||
3485 | __put_user(ptr_to_compat(old_ka.sa.sa_restorer), | ||
3486 | &oact->sa_restorer) || | ||
3487 | __put_user(old_ka.sa.sa_flags, &oact->sa_flags) || | ||
3488 | __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask)) | ||
3489 | return -EFAULT; | ||
3490 | } | ||
3491 | return ret; | ||
3492 | } | ||
3493 | #endif | ||
3272 | 3494 | ||
3273 | #ifdef __ARCH_WANT_SYS_SGETMASK | 3495 | #ifdef __ARCH_WANT_SYS_SGETMASK |
3274 | 3496 | ||
@@ -3336,7 +3558,6 @@ int sigsuspend(sigset_t *set) | |||
3336 | return -ERESTARTNOHAND; | 3558 | return -ERESTARTNOHAND; |
3337 | } | 3559 | } |
3338 | 3560 | ||
3339 | #ifdef __ARCH_WANT_SYS_RT_SIGSUSPEND | ||
3340 | /** | 3561 | /** |
3341 | * sys_rt_sigsuspend - replace the signal mask for a value with the | 3562 | * sys_rt_sigsuspend - replace the signal mask for a value with the |
3342 | * @unewset value until a signal is received | 3563 | * @unewset value until a signal is received |
@@ -3355,7 +3576,45 @@ SYSCALL_DEFINE2(rt_sigsuspend, sigset_t __user *, unewset, size_t, sigsetsize) | |||
3355 | return -EFAULT; | 3576 | return -EFAULT; |
3356 | return sigsuspend(&newset); | 3577 | return sigsuspend(&newset); |
3357 | } | 3578 | } |
3358 | #endif /* __ARCH_WANT_SYS_RT_SIGSUSPEND */ | 3579 | |
3580 | #ifdef CONFIG_COMPAT | ||
3581 | COMPAT_SYSCALL_DEFINE2(rt_sigsuspend, compat_sigset_t __user *, unewset, compat_size_t, sigsetsize) | ||
3582 | { | ||
3583 | #ifdef __BIG_ENDIAN | ||
3584 | sigset_t newset; | ||
3585 | compat_sigset_t newset32; | ||
3586 | |||
3587 | /* XXX: Don't preclude handling different sized sigset_t's. */ | ||
3588 | if (sigsetsize != sizeof(sigset_t)) | ||
3589 | return -EINVAL; | ||
3590 | |||
3591 | if (copy_from_user(&newset32, unewset, sizeof(compat_sigset_t))) | ||
3592 | return -EFAULT; | ||
3593 | sigset_from_compat(&newset, &newset32); | ||
3594 | return sigsuspend(&newset); | ||
3595 | #else | ||
3596 | /* on little-endian bitmaps don't care about granularity */ | ||
3597 | return sys_rt_sigsuspend((sigset_t __user *)unewset, sigsetsize); | ||
3598 | #endif | ||
3599 | } | ||
3600 | #endif | ||
3601 | |||
3602 | #ifdef CONFIG_OLD_SIGSUSPEND | ||
3603 | SYSCALL_DEFINE1(sigsuspend, old_sigset_t, mask) | ||
3604 | { | ||
3605 | sigset_t blocked; | ||
3606 | siginitset(&blocked, mask); | ||
3607 | return sigsuspend(&blocked); | ||
3608 | } | ||
3609 | #endif | ||
3610 | #ifdef CONFIG_OLD_SIGSUSPEND3 | ||
3611 | SYSCALL_DEFINE3(sigsuspend, int, unused1, int, unused2, old_sigset_t, mask) | ||
3612 | { | ||
3613 | sigset_t blocked; | ||
3614 | siginitset(&blocked, mask); | ||
3615 | return sigsuspend(&blocked); | ||
3616 | } | ||
3617 | #endif | ||
3359 | 3618 | ||
3360 | __attribute__((weak)) const char *arch_vma_name(struct vm_area_struct *vma) | 3619 | __attribute__((weak)) const char *arch_vma_name(struct vm_area_struct *vma) |
3361 | { | 3620 | { |
diff --git a/kernel/smp.c b/kernel/smp.c index 69f38bd98b42..8e451f3ff51b 100644 --- a/kernel/smp.c +++ b/kernel/smp.c | |||
@@ -16,22 +16,12 @@ | |||
16 | #include "smpboot.h" | 16 | #include "smpboot.h" |
17 | 17 | ||
18 | #ifdef CONFIG_USE_GENERIC_SMP_HELPERS | 18 | #ifdef CONFIG_USE_GENERIC_SMP_HELPERS |
19 | static struct { | ||
20 | struct list_head queue; | ||
21 | raw_spinlock_t lock; | ||
22 | } call_function __cacheline_aligned_in_smp = | ||
23 | { | ||
24 | .queue = LIST_HEAD_INIT(call_function.queue), | ||
25 | .lock = __RAW_SPIN_LOCK_UNLOCKED(call_function.lock), | ||
26 | }; | ||
27 | |||
28 | enum { | 19 | enum { |
29 | CSD_FLAG_LOCK = 0x01, | 20 | CSD_FLAG_LOCK = 0x01, |
30 | }; | 21 | }; |
31 | 22 | ||
32 | struct call_function_data { | 23 | struct call_function_data { |
33 | struct call_single_data csd; | 24 | struct call_single_data __percpu *csd; |
34 | atomic_t refs; | ||
35 | cpumask_var_t cpumask; | 25 | cpumask_var_t cpumask; |
36 | cpumask_var_t cpumask_ipi; | 26 | cpumask_var_t cpumask_ipi; |
37 | }; | 27 | }; |
@@ -60,6 +50,11 @@ hotplug_cfd(struct notifier_block *nfb, unsigned long action, void *hcpu) | |||
60 | if (!zalloc_cpumask_var_node(&cfd->cpumask_ipi, GFP_KERNEL, | 50 | if (!zalloc_cpumask_var_node(&cfd->cpumask_ipi, GFP_KERNEL, |
61 | cpu_to_node(cpu))) | 51 | cpu_to_node(cpu))) |
62 | return notifier_from_errno(-ENOMEM); | 52 | return notifier_from_errno(-ENOMEM); |
53 | cfd->csd = alloc_percpu(struct call_single_data); | ||
54 | if (!cfd->csd) { | ||
55 | free_cpumask_var(cfd->cpumask); | ||
56 | return notifier_from_errno(-ENOMEM); | ||
57 | } | ||
63 | break; | 58 | break; |
64 | 59 | ||
65 | #ifdef CONFIG_HOTPLUG_CPU | 60 | #ifdef CONFIG_HOTPLUG_CPU |
@@ -70,6 +65,7 @@ hotplug_cfd(struct notifier_block *nfb, unsigned long action, void *hcpu) | |||
70 | case CPU_DEAD_FROZEN: | 65 | case CPU_DEAD_FROZEN: |
71 | free_cpumask_var(cfd->cpumask); | 66 | free_cpumask_var(cfd->cpumask); |
72 | free_cpumask_var(cfd->cpumask_ipi); | 67 | free_cpumask_var(cfd->cpumask_ipi); |
68 | free_percpu(cfd->csd); | ||
73 | break; | 69 | break; |
74 | #endif | 70 | #endif |
75 | }; | 71 | }; |
@@ -171,85 +167,6 @@ void generic_exec_single(int cpu, struct call_single_data *data, int wait) | |||
171 | } | 167 | } |
172 | 168 | ||
173 | /* | 169 | /* |
174 | * Invoked by arch to handle an IPI for call function. Must be called with | ||
175 | * interrupts disabled. | ||
176 | */ | ||
177 | void generic_smp_call_function_interrupt(void) | ||
178 | { | ||
179 | struct call_function_data *data; | ||
180 | int cpu = smp_processor_id(); | ||
181 | |||
182 | /* | ||
183 | * Shouldn't receive this interrupt on a cpu that is not yet online. | ||
184 | */ | ||
185 | WARN_ON_ONCE(!cpu_online(cpu)); | ||
186 | |||
187 | /* | ||
188 | * Ensure entry is visible on call_function_queue after we have | ||
189 | * entered the IPI. See comment in smp_call_function_many. | ||
190 | * If we don't have this, then we may miss an entry on the list | ||
191 | * and never get another IPI to process it. | ||
192 | */ | ||
193 | smp_mb(); | ||
194 | |||
195 | /* | ||
196 | * It's ok to use list_for_each_rcu() here even though we may | ||
197 | * delete 'pos', since list_del_rcu() doesn't clear ->next | ||
198 | */ | ||
199 | list_for_each_entry_rcu(data, &call_function.queue, csd.list) { | ||
200 | int refs; | ||
201 | smp_call_func_t func; | ||
202 | |||
203 | /* | ||
204 | * Since we walk the list without any locks, we might | ||
205 | * see an entry that was completed, removed from the | ||
206 | * list and is in the process of being reused. | ||
207 | * | ||
208 | * We must check that the cpu is in the cpumask before | ||
209 | * checking the refs, and both must be set before | ||
210 | * executing the callback on this cpu. | ||
211 | */ | ||
212 | |||
213 | if (!cpumask_test_cpu(cpu, data->cpumask)) | ||
214 | continue; | ||
215 | |||
216 | smp_rmb(); | ||
217 | |||
218 | if (atomic_read(&data->refs) == 0) | ||
219 | continue; | ||
220 | |||
221 | func = data->csd.func; /* save for later warn */ | ||
222 | func(data->csd.info); | ||
223 | |||
224 | /* | ||
225 | * If the cpu mask is not still set then func enabled | ||
226 | * interrupts (BUG), and this cpu took another smp call | ||
227 | * function interrupt and executed func(info) twice | ||
228 | * on this cpu. That nested execution decremented refs. | ||
229 | */ | ||
230 | if (!cpumask_test_and_clear_cpu(cpu, data->cpumask)) { | ||
231 | WARN(1, "%pf enabled interrupts and double executed\n", func); | ||
232 | continue; | ||
233 | } | ||
234 | |||
235 | refs = atomic_dec_return(&data->refs); | ||
236 | WARN_ON(refs < 0); | ||
237 | |||
238 | if (refs) | ||
239 | continue; | ||
240 | |||
241 | WARN_ON(!cpumask_empty(data->cpumask)); | ||
242 | |||
243 | raw_spin_lock(&call_function.lock); | ||
244 | list_del_rcu(&data->csd.list); | ||
245 | raw_spin_unlock(&call_function.lock); | ||
246 | |||
247 | csd_unlock(&data->csd); | ||
248 | } | ||
249 | |||
250 | } | ||
251 | |||
252 | /* | ||
253 | * Invoked by arch to handle an IPI for call function single. Must be | 170 | * Invoked by arch to handle an IPI for call function single. Must be |
254 | * called from the arch with interrupts disabled. | 171 | * called from the arch with interrupts disabled. |
255 | */ | 172 | */ |
@@ -453,8 +370,7 @@ void smp_call_function_many(const struct cpumask *mask, | |||
453 | smp_call_func_t func, void *info, bool wait) | 370 | smp_call_func_t func, void *info, bool wait) |
454 | { | 371 | { |
455 | struct call_function_data *data; | 372 | struct call_function_data *data; |
456 | unsigned long flags; | 373 | int cpu, next_cpu, this_cpu = smp_processor_id(); |
457 | int refs, cpu, next_cpu, this_cpu = smp_processor_id(); | ||
458 | 374 | ||
459 | /* | 375 | /* |
460 | * Can deadlock when called with interrupts disabled. | 376 | * Can deadlock when called with interrupts disabled. |
@@ -486,50 +402,13 @@ void smp_call_function_many(const struct cpumask *mask, | |||
486 | } | 402 | } |
487 | 403 | ||
488 | data = &__get_cpu_var(cfd_data); | 404 | data = &__get_cpu_var(cfd_data); |
489 | csd_lock(&data->csd); | ||
490 | |||
491 | /* This BUG_ON verifies our reuse assertions and can be removed */ | ||
492 | BUG_ON(atomic_read(&data->refs) || !cpumask_empty(data->cpumask)); | ||
493 | |||
494 | /* | ||
495 | * The global call function queue list add and delete are protected | ||
496 | * by a lock, but the list is traversed without any lock, relying | ||
497 | * on the rcu list add and delete to allow safe concurrent traversal. | ||
498 | * We reuse the call function data without waiting for any grace | ||
499 | * period after some other cpu removes it from the global queue. | ||
500 | * This means a cpu might find our data block as it is being | ||
501 | * filled out. | ||
502 | * | ||
503 | * We hold off the interrupt handler on the other cpu by | ||
504 | * ordering our writes to the cpu mask vs our setting of the | ||
505 | * refs counter. We assert only the cpu owning the data block | ||
506 | * will set a bit in cpumask, and each bit will only be cleared | ||
507 | * by the subject cpu. Each cpu must first find its bit is | ||
508 | * set and then check that refs is set indicating the element is | ||
509 | * ready to be processed, otherwise it must skip the entry. | ||
510 | * | ||
511 | * On the previous iteration refs was set to 0 by another cpu. | ||
512 | * To avoid the use of transitivity, set the counter to 0 here | ||
513 | * so the wmb will pair with the rmb in the interrupt handler. | ||
514 | */ | ||
515 | atomic_set(&data->refs, 0); /* convert 3rd to 1st party write */ | ||
516 | |||
517 | data->csd.func = func; | ||
518 | data->csd.info = info; | ||
519 | 405 | ||
520 | /* Ensure 0 refs is visible before mask. Also orders func and info */ | ||
521 | smp_wmb(); | ||
522 | |||
523 | /* We rely on the "and" being processed before the store */ | ||
524 | cpumask_and(data->cpumask, mask, cpu_online_mask); | 406 | cpumask_and(data->cpumask, mask, cpu_online_mask); |
525 | cpumask_clear_cpu(this_cpu, data->cpumask); | 407 | cpumask_clear_cpu(this_cpu, data->cpumask); |
526 | refs = cpumask_weight(data->cpumask); | ||
527 | 408 | ||
528 | /* Some callers race with other cpus changing the passed mask */ | 409 | /* Some callers race with other cpus changing the passed mask */ |
529 | if (unlikely(!refs)) { | 410 | if (unlikely(!cpumask_weight(data->cpumask))) |
530 | csd_unlock(&data->csd); | ||
531 | return; | 411 | return; |
532 | } | ||
533 | 412 | ||
534 | /* | 413 | /* |
535 | * After we put an entry into the list, data->cpumask | 414 | * After we put an entry into the list, data->cpumask |
@@ -537,34 +416,32 @@ void smp_call_function_many(const struct cpumask *mask, | |||
537 | * a SMP function call, so data->cpumask will be zero. | 416 | * a SMP function call, so data->cpumask will be zero. |
538 | */ | 417 | */ |
539 | cpumask_copy(data->cpumask_ipi, data->cpumask); | 418 | cpumask_copy(data->cpumask_ipi, data->cpumask); |
540 | raw_spin_lock_irqsave(&call_function.lock, flags); | ||
541 | /* | ||
542 | * Place entry at the _HEAD_ of the list, so that any cpu still | ||
543 | * observing the entry in generic_smp_call_function_interrupt() | ||
544 | * will not miss any other list entries: | ||
545 | */ | ||
546 | list_add_rcu(&data->csd.list, &call_function.queue); | ||
547 | /* | ||
548 | * We rely on the wmb() in list_add_rcu to complete our writes | ||
549 | * to the cpumask before this write to refs, which indicates | ||
550 | * data is on the list and is ready to be processed. | ||
551 | */ | ||
552 | atomic_set(&data->refs, refs); | ||
553 | raw_spin_unlock_irqrestore(&call_function.lock, flags); | ||
554 | 419 | ||
555 | /* | 420 | for_each_cpu(cpu, data->cpumask) { |
556 | * Make the list addition visible before sending the ipi. | 421 | struct call_single_data *csd = per_cpu_ptr(data->csd, cpu); |
557 | * (IPIs must obey or appear to obey normal Linux cache | 422 | struct call_single_queue *dst = |
558 | * coherency rules -- see comment in generic_exec_single). | 423 | &per_cpu(call_single_queue, cpu); |
559 | */ | 424 | unsigned long flags; |
560 | smp_mb(); | 425 | |
426 | csd_lock(csd); | ||
427 | csd->func = func; | ||
428 | csd->info = info; | ||
429 | |||
430 | raw_spin_lock_irqsave(&dst->lock, flags); | ||
431 | list_add_tail(&csd->list, &dst->list); | ||
432 | raw_spin_unlock_irqrestore(&dst->lock, flags); | ||
433 | } | ||
561 | 434 | ||
562 | /* Send a message to all CPUs in the map */ | 435 | /* Send a message to all CPUs in the map */ |
563 | arch_send_call_function_ipi_mask(data->cpumask_ipi); | 436 | arch_send_call_function_ipi_mask(data->cpumask_ipi); |
564 | 437 | ||
565 | /* Optionally wait for the CPUs to complete */ | 438 | if (wait) { |
566 | if (wait) | 439 | for_each_cpu(cpu, data->cpumask) { |
567 | csd_lock_wait(&data->csd); | 440 | struct call_single_data *csd = |
441 | per_cpu_ptr(data->csd, cpu); | ||
442 | csd_lock_wait(csd); | ||
443 | } | ||
444 | } | ||
568 | } | 445 | } |
569 | EXPORT_SYMBOL(smp_call_function_many); | 446 | EXPORT_SYMBOL(smp_call_function_many); |
570 | 447 | ||
diff --git a/kernel/smpboot.c b/kernel/smpboot.c index d4abac261779..8eaed9aa9cf0 100644 --- a/kernel/smpboot.c +++ b/kernel/smpboot.c | |||
@@ -209,6 +209,8 @@ static void smpboot_unpark_thread(struct smp_hotplug_thread *ht, unsigned int cp | |||
209 | { | 209 | { |
210 | struct task_struct *tsk = *per_cpu_ptr(ht->store, cpu); | 210 | struct task_struct *tsk = *per_cpu_ptr(ht->store, cpu); |
211 | 211 | ||
212 | if (ht->pre_unpark) | ||
213 | ht->pre_unpark(cpu); | ||
212 | kthread_unpark(tsk); | 214 | kthread_unpark(tsk); |
213 | } | 215 | } |
214 | 216 | ||
diff --git a/kernel/softirq.c b/kernel/softirq.c index f5cc25f147a6..14d7758074aa 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c | |||
@@ -195,21 +195,21 @@ void local_bh_enable_ip(unsigned long ip) | |||
195 | EXPORT_SYMBOL(local_bh_enable_ip); | 195 | EXPORT_SYMBOL(local_bh_enable_ip); |
196 | 196 | ||
197 | /* | 197 | /* |
198 | * We restart softirq processing MAX_SOFTIRQ_RESTART times, | 198 | * We restart softirq processing for at most 2 ms, |
199 | * and we fall back to softirqd after that. | 199 | * and if need_resched() is not set. |
200 | * | 200 | * |
201 | * This number has been established via experimentation. | 201 | * These limits have been established via experimentation. |
202 | * The two things to balance is latency against fairness - | 202 | * The two things to balance is latency against fairness - |
203 | * we want to handle softirqs as soon as possible, but they | 203 | * we want to handle softirqs as soon as possible, but they |
204 | * should not be able to lock up the box. | 204 | * should not be able to lock up the box. |
205 | */ | 205 | */ |
206 | #define MAX_SOFTIRQ_RESTART 10 | 206 | #define MAX_SOFTIRQ_TIME msecs_to_jiffies(2) |
207 | 207 | ||
208 | asmlinkage void __do_softirq(void) | 208 | asmlinkage void __do_softirq(void) |
209 | { | 209 | { |
210 | struct softirq_action *h; | 210 | struct softirq_action *h; |
211 | __u32 pending; | 211 | __u32 pending; |
212 | int max_restart = MAX_SOFTIRQ_RESTART; | 212 | unsigned long end = jiffies + MAX_SOFTIRQ_TIME; |
213 | int cpu; | 213 | int cpu; |
214 | unsigned long old_flags = current->flags; | 214 | unsigned long old_flags = current->flags; |
215 | 215 | ||
@@ -264,11 +264,12 @@ restart: | |||
264 | local_irq_disable(); | 264 | local_irq_disable(); |
265 | 265 | ||
266 | pending = local_softirq_pending(); | 266 | pending = local_softirq_pending(); |
267 | if (pending && --max_restart) | 267 | if (pending) { |
268 | goto restart; | 268 | if (time_before(jiffies, end) && !need_resched()) |
269 | goto restart; | ||
269 | 270 | ||
270 | if (pending) | ||
271 | wakeup_softirqd(); | 271 | wakeup_softirqd(); |
272 | } | ||
272 | 273 | ||
273 | lockdep_softirq_exit(); | 274 | lockdep_softirq_exit(); |
274 | 275 | ||
@@ -322,18 +323,10 @@ void irq_enter(void) | |||
322 | 323 | ||
323 | static inline void invoke_softirq(void) | 324 | static inline void invoke_softirq(void) |
324 | { | 325 | { |
325 | if (!force_irqthreads) { | 326 | if (!force_irqthreads) |
326 | #ifdef __ARCH_IRQ_EXIT_IRQS_DISABLED | ||
327 | __do_softirq(); | 327 | __do_softirq(); |
328 | #else | 328 | else |
329 | do_softirq(); | ||
330 | #endif | ||
331 | } else { | ||
332 | __local_bh_disable((unsigned long)__builtin_return_address(0), | ||
333 | SOFTIRQ_OFFSET); | ||
334 | wakeup_softirqd(); | 329 | wakeup_softirqd(); |
335 | __local_bh_enable(SOFTIRQ_OFFSET); | ||
336 | } | ||
337 | } | 330 | } |
338 | 331 | ||
339 | /* | 332 | /* |
@@ -341,9 +334,15 @@ static inline void invoke_softirq(void) | |||
341 | */ | 334 | */ |
342 | void irq_exit(void) | 335 | void irq_exit(void) |
343 | { | 336 | { |
337 | #ifndef __ARCH_IRQ_EXIT_IRQS_DISABLED | ||
338 | local_irq_disable(); | ||
339 | #else | ||
340 | WARN_ON_ONCE(!irqs_disabled()); | ||
341 | #endif | ||
342 | |||
344 | account_irq_exit_time(current); | 343 | account_irq_exit_time(current); |
345 | trace_hardirq_exit(); | 344 | trace_hardirq_exit(); |
346 | sub_preempt_count(IRQ_EXIT_OFFSET); | 345 | sub_preempt_count(HARDIRQ_OFFSET); |
347 | if (!in_interrupt() && local_softirq_pending()) | 346 | if (!in_interrupt() && local_softirq_pending()) |
348 | invoke_softirq(); | 347 | invoke_softirq(); |
349 | 348 | ||
@@ -353,7 +352,6 @@ void irq_exit(void) | |||
353 | tick_nohz_irq_exit(); | 352 | tick_nohz_irq_exit(); |
354 | #endif | 353 | #endif |
355 | rcu_irq_exit(); | 354 | rcu_irq_exit(); |
356 | sched_preempt_enable_no_resched(); | ||
357 | } | 355 | } |
358 | 356 | ||
359 | /* | 357 | /* |
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c index 95d178c62d5a..c09f2955ae30 100644 --- a/kernel/stop_machine.c +++ b/kernel/stop_machine.c | |||
@@ -336,7 +336,7 @@ static struct smp_hotplug_thread cpu_stop_threads = { | |||
336 | .create = cpu_stop_create, | 336 | .create = cpu_stop_create, |
337 | .setup = cpu_stop_unpark, | 337 | .setup = cpu_stop_unpark, |
338 | .park = cpu_stop_park, | 338 | .park = cpu_stop_park, |
339 | .unpark = cpu_stop_unpark, | 339 | .pre_unpark = cpu_stop_unpark, |
340 | .selfparking = true, | 340 | .selfparking = true, |
341 | }; | 341 | }; |
342 | 342 | ||
diff --git a/kernel/sys.c b/kernel/sys.c index 265b37690421..81f56445fba9 100644 --- a/kernel/sys.c +++ b/kernel/sys.c | |||
@@ -47,6 +47,7 @@ | |||
47 | #include <linux/syscalls.h> | 47 | #include <linux/syscalls.h> |
48 | #include <linux/kprobes.h> | 48 | #include <linux/kprobes.h> |
49 | #include <linux/user_namespace.h> | 49 | #include <linux/user_namespace.h> |
50 | #include <linux/binfmts.h> | ||
50 | 51 | ||
51 | #include <linux/kmsg_dump.h> | 52 | #include <linux/kmsg_dump.h> |
52 | /* Move somewhere else to avoid recompiling? */ | 53 | /* Move somewhere else to avoid recompiling? */ |
@@ -433,11 +434,12 @@ static DEFINE_MUTEX(reboot_mutex); | |||
433 | SYSCALL_DEFINE4(reboot, int, magic1, int, magic2, unsigned int, cmd, | 434 | SYSCALL_DEFINE4(reboot, int, magic1, int, magic2, unsigned int, cmd, |
434 | void __user *, arg) | 435 | void __user *, arg) |
435 | { | 436 | { |
437 | struct pid_namespace *pid_ns = task_active_pid_ns(current); | ||
436 | char buffer[256]; | 438 | char buffer[256]; |
437 | int ret = 0; | 439 | int ret = 0; |
438 | 440 | ||
439 | /* We only trust the superuser with rebooting the system. */ | 441 | /* We only trust the superuser with rebooting the system. */ |
440 | if (!capable(CAP_SYS_BOOT)) | 442 | if (!ns_capable(pid_ns->user_ns, CAP_SYS_BOOT)) |
441 | return -EPERM; | 443 | return -EPERM; |
442 | 444 | ||
443 | /* For safety, we require "magic" arguments. */ | 445 | /* For safety, we require "magic" arguments. */ |
@@ -453,7 +455,7 @@ SYSCALL_DEFINE4(reboot, int, magic1, int, magic2, unsigned int, cmd, | |||
453 | * pid_namespace, the command is handled by reboot_pid_ns() which will | 455 | * pid_namespace, the command is handled by reboot_pid_ns() which will |
454 | * call do_exit(). | 456 | * call do_exit(). |
455 | */ | 457 | */ |
456 | ret = reboot_pid_ns(task_active_pid_ns(current), cmd); | 458 | ret = reboot_pid_ns(pid_ns, cmd); |
457 | if (ret) | 459 | if (ret) |
458 | return ret; | 460 | return ret; |
459 | 461 | ||
@@ -1792,14 +1794,14 @@ SYSCALL_DEFINE1(umask, int, mask) | |||
1792 | static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd) | 1794 | static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd) |
1793 | { | 1795 | { |
1794 | struct fd exe; | 1796 | struct fd exe; |
1795 | struct dentry *dentry; | 1797 | struct inode *inode; |
1796 | int err; | 1798 | int err; |
1797 | 1799 | ||
1798 | exe = fdget(fd); | 1800 | exe = fdget(fd); |
1799 | if (!exe.file) | 1801 | if (!exe.file) |
1800 | return -EBADF; | 1802 | return -EBADF; |
1801 | 1803 | ||
1802 | dentry = exe.file->f_path.dentry; | 1804 | inode = file_inode(exe.file); |
1803 | 1805 | ||
1804 | /* | 1806 | /* |
1805 | * Because the original mm->exe_file points to executable file, make | 1807 | * Because the original mm->exe_file points to executable file, make |
@@ -1807,11 +1809,11 @@ static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd) | |||
1807 | * overall picture. | 1809 | * overall picture. |
1808 | */ | 1810 | */ |
1809 | err = -EACCES; | 1811 | err = -EACCES; |
1810 | if (!S_ISREG(dentry->d_inode->i_mode) || | 1812 | if (!S_ISREG(inode->i_mode) || |
1811 | exe.file->f_path.mnt->mnt_flags & MNT_NOEXEC) | 1813 | exe.file->f_path.mnt->mnt_flags & MNT_NOEXEC) |
1812 | goto exit; | 1814 | goto exit; |
1813 | 1815 | ||
1814 | err = inode_permission(dentry->d_inode, MAY_EXEC); | 1816 | err = inode_permission(inode, MAY_EXEC); |
1815 | if (err) | 1817 | if (err) |
1816 | goto exit; | 1818 | goto exit; |
1817 | 1819 | ||
@@ -2012,160 +2014,159 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, | |||
2012 | 2014 | ||
2013 | error = 0; | 2015 | error = 0; |
2014 | switch (option) { | 2016 | switch (option) { |
2015 | case PR_SET_PDEATHSIG: | 2017 | case PR_SET_PDEATHSIG: |
2016 | if (!valid_signal(arg2)) { | 2018 | if (!valid_signal(arg2)) { |
2017 | error = -EINVAL; | 2019 | error = -EINVAL; |
2018 | break; | ||
2019 | } | ||
2020 | me->pdeath_signal = arg2; | ||
2021 | break; | ||
2022 | case PR_GET_PDEATHSIG: | ||
2023 | error = put_user(me->pdeath_signal, (int __user *)arg2); | ||
2024 | break; | ||
2025 | case PR_GET_DUMPABLE: | ||
2026 | error = get_dumpable(me->mm); | ||
2027 | break; | 2020 | break; |
2028 | case PR_SET_DUMPABLE: | 2021 | } |
2029 | if (arg2 < 0 || arg2 > 1) { | 2022 | me->pdeath_signal = arg2; |
2030 | error = -EINVAL; | 2023 | break; |
2031 | break; | 2024 | case PR_GET_PDEATHSIG: |
2032 | } | 2025 | error = put_user(me->pdeath_signal, (int __user *)arg2); |
2033 | set_dumpable(me->mm, arg2); | 2026 | break; |
2027 | case PR_GET_DUMPABLE: | ||
2028 | error = get_dumpable(me->mm); | ||
2029 | break; | ||
2030 | case PR_SET_DUMPABLE: | ||
2031 | if (arg2 != SUID_DUMP_DISABLE && arg2 != SUID_DUMP_USER) { | ||
2032 | error = -EINVAL; | ||
2034 | break; | 2033 | break; |
2034 | } | ||
2035 | set_dumpable(me->mm, arg2); | ||
2036 | break; | ||
2035 | 2037 | ||
2036 | case PR_SET_UNALIGN: | 2038 | case PR_SET_UNALIGN: |
2037 | error = SET_UNALIGN_CTL(me, arg2); | 2039 | error = SET_UNALIGN_CTL(me, arg2); |
2038 | break; | 2040 | break; |
2039 | case PR_GET_UNALIGN: | 2041 | case PR_GET_UNALIGN: |
2040 | error = GET_UNALIGN_CTL(me, arg2); | 2042 | error = GET_UNALIGN_CTL(me, arg2); |
2041 | break; | 2043 | break; |
2042 | case PR_SET_FPEMU: | 2044 | case PR_SET_FPEMU: |
2043 | error = SET_FPEMU_CTL(me, arg2); | 2045 | error = SET_FPEMU_CTL(me, arg2); |
2044 | break; | 2046 | break; |
2045 | case PR_GET_FPEMU: | 2047 | case PR_GET_FPEMU: |
2046 | error = GET_FPEMU_CTL(me, arg2); | 2048 | error = GET_FPEMU_CTL(me, arg2); |
2047 | break; | 2049 | break; |
2048 | case PR_SET_FPEXC: | 2050 | case PR_SET_FPEXC: |
2049 | error = SET_FPEXC_CTL(me, arg2); | 2051 | error = SET_FPEXC_CTL(me, arg2); |
2050 | break; | 2052 | break; |
2051 | case PR_GET_FPEXC: | 2053 | case PR_GET_FPEXC: |
2052 | error = GET_FPEXC_CTL(me, arg2); | 2054 | error = GET_FPEXC_CTL(me, arg2); |
2053 | break; | 2055 | break; |
2054 | case PR_GET_TIMING: | 2056 | case PR_GET_TIMING: |
2055 | error = PR_TIMING_STATISTICAL; | 2057 | error = PR_TIMING_STATISTICAL; |
2056 | break; | 2058 | break; |
2057 | case PR_SET_TIMING: | 2059 | case PR_SET_TIMING: |
2058 | if (arg2 != PR_TIMING_STATISTICAL) | 2060 | if (arg2 != PR_TIMING_STATISTICAL) |
2059 | error = -EINVAL; | 2061 | error = -EINVAL; |
2060 | break; | 2062 | break; |
2061 | case PR_SET_NAME: | 2063 | case PR_SET_NAME: |
2062 | comm[sizeof(me->comm)-1] = 0; | 2064 | comm[sizeof(me->comm) - 1] = 0; |
2063 | if (strncpy_from_user(comm, (char __user *)arg2, | 2065 | if (strncpy_from_user(comm, (char __user *)arg2, |
2064 | sizeof(me->comm) - 1) < 0) | 2066 | sizeof(me->comm) - 1) < 0) |
2065 | return -EFAULT; | 2067 | return -EFAULT; |
2066 | set_task_comm(me, comm); | 2068 | set_task_comm(me, comm); |
2067 | proc_comm_connector(me); | 2069 | proc_comm_connector(me); |
2068 | break; | 2070 | break; |
2069 | case PR_GET_NAME: | 2071 | case PR_GET_NAME: |
2070 | get_task_comm(comm, me); | 2072 | get_task_comm(comm, me); |
2071 | if (copy_to_user((char __user *)arg2, comm, | 2073 | if (copy_to_user((char __user *)arg2, comm, sizeof(comm))) |
2072 | sizeof(comm))) | 2074 | return -EFAULT; |
2073 | return -EFAULT; | 2075 | break; |
2074 | break; | 2076 | case PR_GET_ENDIAN: |
2075 | case PR_GET_ENDIAN: | 2077 | error = GET_ENDIAN(me, arg2); |
2076 | error = GET_ENDIAN(me, arg2); | 2078 | break; |
2077 | break; | 2079 | case PR_SET_ENDIAN: |
2078 | case PR_SET_ENDIAN: | 2080 | error = SET_ENDIAN(me, arg2); |
2079 | error = SET_ENDIAN(me, arg2); | 2081 | break; |
2080 | break; | 2082 | case PR_GET_SECCOMP: |
2081 | case PR_GET_SECCOMP: | 2083 | error = prctl_get_seccomp(); |
2082 | error = prctl_get_seccomp(); | 2084 | break; |
2083 | break; | 2085 | case PR_SET_SECCOMP: |
2084 | case PR_SET_SECCOMP: | 2086 | error = prctl_set_seccomp(arg2, (char __user *)arg3); |
2085 | error = prctl_set_seccomp(arg2, (char __user *)arg3); | 2087 | break; |
2086 | break; | 2088 | case PR_GET_TSC: |
2087 | case PR_GET_TSC: | 2089 | error = GET_TSC_CTL(arg2); |
2088 | error = GET_TSC_CTL(arg2); | 2090 | break; |
2089 | break; | 2091 | case PR_SET_TSC: |
2090 | case PR_SET_TSC: | 2092 | error = SET_TSC_CTL(arg2); |
2091 | error = SET_TSC_CTL(arg2); | 2093 | break; |
2092 | break; | 2094 | case PR_TASK_PERF_EVENTS_DISABLE: |
2093 | case PR_TASK_PERF_EVENTS_DISABLE: | 2095 | error = perf_event_task_disable(); |
2094 | error = perf_event_task_disable(); | 2096 | break; |
2095 | break; | 2097 | case PR_TASK_PERF_EVENTS_ENABLE: |
2096 | case PR_TASK_PERF_EVENTS_ENABLE: | 2098 | error = perf_event_task_enable(); |
2097 | error = perf_event_task_enable(); | 2099 | break; |
2098 | break; | 2100 | case PR_GET_TIMERSLACK: |
2099 | case PR_GET_TIMERSLACK: | 2101 | error = current->timer_slack_ns; |
2100 | error = current->timer_slack_ns; | 2102 | break; |
2101 | break; | 2103 | case PR_SET_TIMERSLACK: |
2102 | case PR_SET_TIMERSLACK: | 2104 | if (arg2 <= 0) |
2103 | if (arg2 <= 0) | 2105 | current->timer_slack_ns = |
2104 | current->timer_slack_ns = | ||
2105 | current->default_timer_slack_ns; | 2106 | current->default_timer_slack_ns; |
2106 | else | 2107 | else |
2107 | current->timer_slack_ns = arg2; | 2108 | current->timer_slack_ns = arg2; |
2108 | break; | 2109 | break; |
2109 | case PR_MCE_KILL: | 2110 | case PR_MCE_KILL: |
2110 | if (arg4 | arg5) | 2111 | if (arg4 | arg5) |
2111 | return -EINVAL; | 2112 | return -EINVAL; |
2112 | switch (arg2) { | 2113 | switch (arg2) { |
2113 | case PR_MCE_KILL_CLEAR: | 2114 | case PR_MCE_KILL_CLEAR: |
2114 | if (arg3 != 0) | 2115 | if (arg3 != 0) |
2115 | return -EINVAL; | ||
2116 | current->flags &= ~PF_MCE_PROCESS; | ||
2117 | break; | ||
2118 | case PR_MCE_KILL_SET: | ||
2119 | current->flags |= PF_MCE_PROCESS; | ||
2120 | if (arg3 == PR_MCE_KILL_EARLY) | ||
2121 | current->flags |= PF_MCE_EARLY; | ||
2122 | else if (arg3 == PR_MCE_KILL_LATE) | ||
2123 | current->flags &= ~PF_MCE_EARLY; | ||
2124 | else if (arg3 == PR_MCE_KILL_DEFAULT) | ||
2125 | current->flags &= | ||
2126 | ~(PF_MCE_EARLY|PF_MCE_PROCESS); | ||
2127 | else | ||
2128 | return -EINVAL; | ||
2129 | break; | ||
2130 | default: | ||
2131 | return -EINVAL; | 2116 | return -EINVAL; |
2132 | } | 2117 | current->flags &= ~PF_MCE_PROCESS; |
2133 | break; | 2118 | break; |
2134 | case PR_MCE_KILL_GET: | 2119 | case PR_MCE_KILL_SET: |
2135 | if (arg2 | arg3 | arg4 | arg5) | 2120 | current->flags |= PF_MCE_PROCESS; |
2136 | return -EINVAL; | 2121 | if (arg3 == PR_MCE_KILL_EARLY) |
2137 | if (current->flags & PF_MCE_PROCESS) | 2122 | current->flags |= PF_MCE_EARLY; |
2138 | error = (current->flags & PF_MCE_EARLY) ? | 2123 | else if (arg3 == PR_MCE_KILL_LATE) |
2139 | PR_MCE_KILL_EARLY : PR_MCE_KILL_LATE; | 2124 | current->flags &= ~PF_MCE_EARLY; |
2125 | else if (arg3 == PR_MCE_KILL_DEFAULT) | ||
2126 | current->flags &= | ||
2127 | ~(PF_MCE_EARLY|PF_MCE_PROCESS); | ||
2140 | else | 2128 | else |
2141 | error = PR_MCE_KILL_DEFAULT; | ||
2142 | break; | ||
2143 | case PR_SET_MM: | ||
2144 | error = prctl_set_mm(arg2, arg3, arg4, arg5); | ||
2145 | break; | ||
2146 | case PR_GET_TID_ADDRESS: | ||
2147 | error = prctl_get_tid_address(me, (int __user **)arg2); | ||
2148 | break; | ||
2149 | case PR_SET_CHILD_SUBREAPER: | ||
2150 | me->signal->is_child_subreaper = !!arg2; | ||
2151 | break; | ||
2152 | case PR_GET_CHILD_SUBREAPER: | ||
2153 | error = put_user(me->signal->is_child_subreaper, | ||
2154 | (int __user *) arg2); | ||
2155 | break; | ||
2156 | case PR_SET_NO_NEW_PRIVS: | ||
2157 | if (arg2 != 1 || arg3 || arg4 || arg5) | ||
2158 | return -EINVAL; | 2129 | return -EINVAL; |
2159 | |||
2160 | current->no_new_privs = 1; | ||
2161 | break; | 2130 | break; |
2162 | case PR_GET_NO_NEW_PRIVS: | ||
2163 | if (arg2 || arg3 || arg4 || arg5) | ||
2164 | return -EINVAL; | ||
2165 | return current->no_new_privs ? 1 : 0; | ||
2166 | default: | 2131 | default: |
2167 | error = -EINVAL; | 2132 | return -EINVAL; |
2168 | break; | 2133 | } |
2134 | break; | ||
2135 | case PR_MCE_KILL_GET: | ||
2136 | if (arg2 | arg3 | arg4 | arg5) | ||
2137 | return -EINVAL; | ||
2138 | if (current->flags & PF_MCE_PROCESS) | ||
2139 | error = (current->flags & PF_MCE_EARLY) ? | ||
2140 | PR_MCE_KILL_EARLY : PR_MCE_KILL_LATE; | ||
2141 | else | ||
2142 | error = PR_MCE_KILL_DEFAULT; | ||
2143 | break; | ||
2144 | case PR_SET_MM: | ||
2145 | error = prctl_set_mm(arg2, arg3, arg4, arg5); | ||
2146 | break; | ||
2147 | case PR_GET_TID_ADDRESS: | ||
2148 | error = prctl_get_tid_address(me, (int __user **)arg2); | ||
2149 | break; | ||
2150 | case PR_SET_CHILD_SUBREAPER: | ||
2151 | me->signal->is_child_subreaper = !!arg2; | ||
2152 | break; | ||
2153 | case PR_GET_CHILD_SUBREAPER: | ||
2154 | error = put_user(me->signal->is_child_subreaper, | ||
2155 | (int __user *)arg2); | ||
2156 | break; | ||
2157 | case PR_SET_NO_NEW_PRIVS: | ||
2158 | if (arg2 != 1 || arg3 || arg4 || arg5) | ||
2159 | return -EINVAL; | ||
2160 | |||
2161 | current->no_new_privs = 1; | ||
2162 | break; | ||
2163 | case PR_GET_NO_NEW_PRIVS: | ||
2164 | if (arg2 || arg3 || arg4 || arg5) | ||
2165 | return -EINVAL; | ||
2166 | return current->no_new_privs ? 1 : 0; | ||
2167 | default: | ||
2168 | error = -EINVAL; | ||
2169 | break; | ||
2169 | } | 2170 | } |
2170 | return error; | 2171 | return error; |
2171 | } | 2172 | } |
@@ -2184,11 +2185,6 @@ SYSCALL_DEFINE3(getcpu, unsigned __user *, cpup, unsigned __user *, nodep, | |||
2184 | 2185 | ||
2185 | char poweroff_cmd[POWEROFF_CMD_PATH_LEN] = "/sbin/poweroff"; | 2186 | char poweroff_cmd[POWEROFF_CMD_PATH_LEN] = "/sbin/poweroff"; |
2186 | 2187 | ||
2187 | static void argv_cleanup(struct subprocess_info *info) | ||
2188 | { | ||
2189 | argv_free(info->argv); | ||
2190 | } | ||
2191 | |||
2192 | static int __orderly_poweroff(void) | 2188 | static int __orderly_poweroff(void) |
2193 | { | 2189 | { |
2194 | int argc; | 2190 | int argc; |
@@ -2208,9 +2204,8 @@ static int __orderly_poweroff(void) | |||
2208 | } | 2204 | } |
2209 | 2205 | ||
2210 | ret = call_usermodehelper_fns(argv[0], argv, envp, UMH_WAIT_EXEC, | 2206 | ret = call_usermodehelper_fns(argv[0], argv, envp, UMH_WAIT_EXEC, |
2211 | NULL, argv_cleanup, NULL); | 2207 | NULL, NULL, NULL); |
2212 | if (ret == -ENOMEM) | 2208 | argv_free(argv); |
2213 | argv_free(argv); | ||
2214 | 2209 | ||
2215 | return ret; | 2210 | return ret; |
2216 | } | 2211 | } |
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 4fc9be955c71..afc1dc60f3f8 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
@@ -105,7 +105,6 @@ extern char core_pattern[]; | |||
105 | extern unsigned int core_pipe_limit; | 105 | extern unsigned int core_pipe_limit; |
106 | #endif | 106 | #endif |
107 | extern int pid_max; | 107 | extern int pid_max; |
108 | extern int min_free_kbytes; | ||
109 | extern int pid_max_min, pid_max_max; | 108 | extern int pid_max_min, pid_max_max; |
110 | extern int sysctl_drop_caches; | 109 | extern int sysctl_drop_caches; |
111 | extern int percpu_pagelist_fraction; | 110 | extern int percpu_pagelist_fraction; |
@@ -158,14 +157,20 @@ extern int sysctl_tsb_ratio; | |||
158 | 157 | ||
159 | #ifdef __hppa__ | 158 | #ifdef __hppa__ |
160 | extern int pwrsw_enabled; | 159 | extern int pwrsw_enabled; |
160 | #endif | ||
161 | |||
162 | #ifdef CONFIG_SYSCTL_ARCH_UNALIGN_ALLOW | ||
161 | extern int unaligned_enabled; | 163 | extern int unaligned_enabled; |
162 | #endif | 164 | #endif |
163 | 165 | ||
164 | #ifdef CONFIG_IA64 | 166 | #ifdef CONFIG_IA64 |
165 | extern int no_unaligned_warning; | ||
166 | extern int unaligned_dump_stack; | 167 | extern int unaligned_dump_stack; |
167 | #endif | 168 | #endif |
168 | 169 | ||
170 | #ifdef CONFIG_SYSCTL_ARCH_UNALIGN_NO_WARN | ||
171 | extern int no_unaligned_warning; | ||
172 | #endif | ||
173 | |||
169 | #ifdef CONFIG_PROC_SYSCTL | 174 | #ifdef CONFIG_PROC_SYSCTL |
170 | static int proc_do_cad_pid(struct ctl_table *table, int write, | 175 | static int proc_do_cad_pid(struct ctl_table *table, int write, |
171 | void __user *buffer, size_t *lenp, loff_t *ppos); | 176 | void __user *buffer, size_t *lenp, loff_t *ppos); |
@@ -553,6 +558,8 @@ static struct ctl_table kern_table[] = { | |||
553 | .mode = 0644, | 558 | .mode = 0644, |
554 | .proc_handler = proc_dointvec, | 559 | .proc_handler = proc_dointvec, |
555 | }, | 560 | }, |
561 | #endif | ||
562 | #ifdef CONFIG_SYSCTL_ARCH_UNALIGN_ALLOW | ||
556 | { | 563 | { |
557 | .procname = "unaligned-trap", | 564 | .procname = "unaligned-trap", |
558 | .data = &unaligned_enabled, | 565 | .data = &unaligned_enabled, |
@@ -919,7 +926,7 @@ static struct ctl_table kern_table[] = { | |||
919 | .proc_handler = proc_doulongvec_minmax, | 926 | .proc_handler = proc_doulongvec_minmax, |
920 | }, | 927 | }, |
921 | #endif | 928 | #endif |
922 | #ifdef CONFIG_IA64 | 929 | #ifdef CONFIG_SYSCTL_ARCH_UNALIGN_NO_WARN |
923 | { | 930 | { |
924 | .procname = "ignore-unaligned-usertrap", | 931 | .procname = "ignore-unaligned-usertrap", |
925 | .data = &no_unaligned_warning, | 932 | .data = &no_unaligned_warning, |
@@ -927,6 +934,8 @@ static struct ctl_table kern_table[] = { | |||
927 | .mode = 0644, | 934 | .mode = 0644, |
928 | .proc_handler = proc_dointvec, | 935 | .proc_handler = proc_dointvec, |
929 | }, | 936 | }, |
937 | #endif | ||
938 | #ifdef CONFIG_IA64 | ||
930 | { | 939 | { |
931 | .procname = "unaligned-dump-stack", | 940 | .procname = "unaligned-dump-stack", |
932 | .data = &unaligned_dump_stack, | 941 | .data = &unaligned_dump_stack, |
@@ -2014,7 +2023,7 @@ static int proc_taint(struct ctl_table *table, int write, | |||
2014 | int i; | 2023 | int i; |
2015 | for (i = 0; i < BITS_PER_LONG && tmptaint >> i; i++) { | 2024 | for (i = 0; i < BITS_PER_LONG && tmptaint >> i; i++) { |
2016 | if ((tmptaint >> i) & 1) | 2025 | if ((tmptaint >> i) & 1) |
2017 | add_taint(i); | 2026 | add_taint(i, LOCKDEP_STILL_OK); |
2018 | } | 2027 | } |
2019 | } | 2028 | } |
2020 | 2029 | ||
@@ -2091,7 +2100,7 @@ int proc_dointvec_minmax(struct ctl_table *table, int write, | |||
2091 | static void validate_coredump_safety(void) | 2100 | static void validate_coredump_safety(void) |
2092 | { | 2101 | { |
2093 | #ifdef CONFIG_COREDUMP | 2102 | #ifdef CONFIG_COREDUMP |
2094 | if (suid_dumpable == SUID_DUMPABLE_SAFE && | 2103 | if (suid_dumpable == SUID_DUMP_ROOT && |
2095 | core_pattern[0] != '/' && core_pattern[0] != '|') { | 2104 | core_pattern[0] != '/' && core_pattern[0] != '|') { |
2096 | printk(KERN_WARNING "Unsafe core_pattern used with "\ | 2105 | printk(KERN_WARNING "Unsafe core_pattern used with "\ |
2097 | "suid_dumpable=2. Pipe handler or fully qualified "\ | 2106 | "suid_dumpable=2. Pipe handler or fully qualified "\ |
diff --git a/kernel/sysctl_binary.c b/kernel/sysctl_binary.c index 5a6384450501..ebf72358e86a 100644 --- a/kernel/sysctl_binary.c +++ b/kernel/sysctl_binary.c | |||
@@ -387,7 +387,6 @@ static const struct bin_table bin_net_ipv4_table[] = { | |||
387 | { CTL_INT, NET_TCP_MODERATE_RCVBUF, "tcp_moderate_rcvbuf" }, | 387 | { CTL_INT, NET_TCP_MODERATE_RCVBUF, "tcp_moderate_rcvbuf" }, |
388 | { CTL_INT, NET_TCP_TSO_WIN_DIVISOR, "tcp_tso_win_divisor" }, | 388 | { CTL_INT, NET_TCP_TSO_WIN_DIVISOR, "tcp_tso_win_divisor" }, |
389 | { CTL_STR, NET_TCP_CONG_CONTROL, "tcp_congestion_control" }, | 389 | { CTL_STR, NET_TCP_CONG_CONTROL, "tcp_congestion_control" }, |
390 | { CTL_INT, NET_TCP_ABC, "tcp_abc" }, | ||
391 | { CTL_INT, NET_TCP_MTU_PROBING, "tcp_mtu_probing" }, | 390 | { CTL_INT, NET_TCP_MTU_PROBING, "tcp_mtu_probing" }, |
392 | { CTL_INT, NET_TCP_BASE_MSS, "tcp_base_mss" }, | 391 | { CTL_INT, NET_TCP_BASE_MSS, "tcp_base_mss" }, |
393 | { CTL_INT, NET_IPV4_TCP_WORKAROUND_SIGNED_WINDOWS, "tcp_workaround_signed_windows" }, | 392 | { CTL_INT, NET_IPV4_TCP_WORKAROUND_SIGNED_WINDOWS, "tcp_workaround_signed_windows" }, |
@@ -971,7 +970,6 @@ out: | |||
971 | static ssize_t bin_intvec(struct file *file, | 970 | static ssize_t bin_intvec(struct file *file, |
972 | void __user *oldval, size_t oldlen, void __user *newval, size_t newlen) | 971 | void __user *oldval, size_t oldlen, void __user *newval, size_t newlen) |
973 | { | 972 | { |
974 | mm_segment_t old_fs = get_fs(); | ||
975 | ssize_t copied = 0; | 973 | ssize_t copied = 0; |
976 | char *buffer; | 974 | char *buffer; |
977 | ssize_t result; | 975 | ssize_t result; |
@@ -984,13 +982,10 @@ static ssize_t bin_intvec(struct file *file, | |||
984 | if (oldval && oldlen) { | 982 | if (oldval && oldlen) { |
985 | unsigned __user *vec = oldval; | 983 | unsigned __user *vec = oldval; |
986 | size_t length = oldlen / sizeof(*vec); | 984 | size_t length = oldlen / sizeof(*vec); |
987 | loff_t pos = 0; | ||
988 | char *str, *end; | 985 | char *str, *end; |
989 | int i; | 986 | int i; |
990 | 987 | ||
991 | set_fs(KERNEL_DS); | 988 | result = kernel_read(file, 0, buffer, BUFSZ - 1); |
992 | result = vfs_read(file, buffer, BUFSZ - 1, &pos); | ||
993 | set_fs(old_fs); | ||
994 | if (result < 0) | 989 | if (result < 0) |
995 | goto out_kfree; | 990 | goto out_kfree; |
996 | 991 | ||
@@ -1017,7 +1012,6 @@ static ssize_t bin_intvec(struct file *file, | |||
1017 | if (newval && newlen) { | 1012 | if (newval && newlen) { |
1018 | unsigned __user *vec = newval; | 1013 | unsigned __user *vec = newval; |
1019 | size_t length = newlen / sizeof(*vec); | 1014 | size_t length = newlen / sizeof(*vec); |
1020 | loff_t pos = 0; | ||
1021 | char *str, *end; | 1015 | char *str, *end; |
1022 | int i; | 1016 | int i; |
1023 | 1017 | ||
@@ -1033,9 +1027,7 @@ static ssize_t bin_intvec(struct file *file, | |||
1033 | str += snprintf(str, end - str, "%lu\t", value); | 1027 | str += snprintf(str, end - str, "%lu\t", value); |
1034 | } | 1028 | } |
1035 | 1029 | ||
1036 | set_fs(KERNEL_DS); | 1030 | result = kernel_write(file, buffer, str - buffer, 0); |
1037 | result = vfs_write(file, buffer, str - buffer, &pos); | ||
1038 | set_fs(old_fs); | ||
1039 | if (result < 0) | 1031 | if (result < 0) |
1040 | goto out_kfree; | 1032 | goto out_kfree; |
1041 | } | 1033 | } |
@@ -1049,7 +1041,6 @@ out: | |||
1049 | static ssize_t bin_ulongvec(struct file *file, | 1041 | static ssize_t bin_ulongvec(struct file *file, |
1050 | void __user *oldval, size_t oldlen, void __user *newval, size_t newlen) | 1042 | void __user *oldval, size_t oldlen, void __user *newval, size_t newlen) |
1051 | { | 1043 | { |
1052 | mm_segment_t old_fs = get_fs(); | ||
1053 | ssize_t copied = 0; | 1044 | ssize_t copied = 0; |
1054 | char *buffer; | 1045 | char *buffer; |
1055 | ssize_t result; | 1046 | ssize_t result; |
@@ -1062,13 +1053,10 @@ static ssize_t bin_ulongvec(struct file *file, | |||
1062 | if (oldval && oldlen) { | 1053 | if (oldval && oldlen) { |
1063 | unsigned long __user *vec = oldval; | 1054 | unsigned long __user *vec = oldval; |
1064 | size_t length = oldlen / sizeof(*vec); | 1055 | size_t length = oldlen / sizeof(*vec); |
1065 | loff_t pos = 0; | ||
1066 | char *str, *end; | 1056 | char *str, *end; |
1067 | int i; | 1057 | int i; |
1068 | 1058 | ||
1069 | set_fs(KERNEL_DS); | 1059 | result = kernel_read(file, 0, buffer, BUFSZ - 1); |
1070 | result = vfs_read(file, buffer, BUFSZ - 1, &pos); | ||
1071 | set_fs(old_fs); | ||
1072 | if (result < 0) | 1060 | if (result < 0) |
1073 | goto out_kfree; | 1061 | goto out_kfree; |
1074 | 1062 | ||
@@ -1095,7 +1083,6 @@ static ssize_t bin_ulongvec(struct file *file, | |||
1095 | if (newval && newlen) { | 1083 | if (newval && newlen) { |
1096 | unsigned long __user *vec = newval; | 1084 | unsigned long __user *vec = newval; |
1097 | size_t length = newlen / sizeof(*vec); | 1085 | size_t length = newlen / sizeof(*vec); |
1098 | loff_t pos = 0; | ||
1099 | char *str, *end; | 1086 | char *str, *end; |
1100 | int i; | 1087 | int i; |
1101 | 1088 | ||
@@ -1111,9 +1098,7 @@ static ssize_t bin_ulongvec(struct file *file, | |||
1111 | str += snprintf(str, end - str, "%lu\t", value); | 1098 | str += snprintf(str, end - str, "%lu\t", value); |
1112 | } | 1099 | } |
1113 | 1100 | ||
1114 | set_fs(KERNEL_DS); | 1101 | result = kernel_write(file, buffer, str - buffer, 0); |
1115 | result = vfs_write(file, buffer, str - buffer, &pos); | ||
1116 | set_fs(old_fs); | ||
1117 | if (result < 0) | 1102 | if (result < 0) |
1118 | goto out_kfree; | 1103 | goto out_kfree; |
1119 | } | 1104 | } |
@@ -1127,19 +1112,15 @@ out: | |||
1127 | static ssize_t bin_uuid(struct file *file, | 1112 | static ssize_t bin_uuid(struct file *file, |
1128 | void __user *oldval, size_t oldlen, void __user *newval, size_t newlen) | 1113 | void __user *oldval, size_t oldlen, void __user *newval, size_t newlen) |
1129 | { | 1114 | { |
1130 | mm_segment_t old_fs = get_fs(); | ||
1131 | ssize_t result, copied = 0; | 1115 | ssize_t result, copied = 0; |
1132 | 1116 | ||
1133 | /* Only supports reads */ | 1117 | /* Only supports reads */ |
1134 | if (oldval && oldlen) { | 1118 | if (oldval && oldlen) { |
1135 | loff_t pos = 0; | ||
1136 | char buf[40], *str = buf; | 1119 | char buf[40], *str = buf; |
1137 | unsigned char uuid[16]; | 1120 | unsigned char uuid[16]; |
1138 | int i; | 1121 | int i; |
1139 | 1122 | ||
1140 | set_fs(KERNEL_DS); | 1123 | result = kernel_read(file, 0, buf, sizeof(buf) - 1); |
1141 | result = vfs_read(file, buf, sizeof(buf) - 1, &pos); | ||
1142 | set_fs(old_fs); | ||
1143 | if (result < 0) | 1124 | if (result < 0) |
1144 | goto out; | 1125 | goto out; |
1145 | 1126 | ||
@@ -1175,18 +1156,14 @@ out: | |||
1175 | static ssize_t bin_dn_node_address(struct file *file, | 1156 | static ssize_t bin_dn_node_address(struct file *file, |
1176 | void __user *oldval, size_t oldlen, void __user *newval, size_t newlen) | 1157 | void __user *oldval, size_t oldlen, void __user *newval, size_t newlen) |
1177 | { | 1158 | { |
1178 | mm_segment_t old_fs = get_fs(); | ||
1179 | ssize_t result, copied = 0; | 1159 | ssize_t result, copied = 0; |
1180 | 1160 | ||
1181 | if (oldval && oldlen) { | 1161 | if (oldval && oldlen) { |
1182 | loff_t pos = 0; | ||
1183 | char buf[15], *nodep; | 1162 | char buf[15], *nodep; |
1184 | unsigned long area, node; | 1163 | unsigned long area, node; |
1185 | __le16 dnaddr; | 1164 | __le16 dnaddr; |
1186 | 1165 | ||
1187 | set_fs(KERNEL_DS); | 1166 | result = kernel_read(file, 0, buf, sizeof(buf) - 1); |
1188 | result = vfs_read(file, buf, sizeof(buf) - 1, &pos); | ||
1189 | set_fs(old_fs); | ||
1190 | if (result < 0) | 1167 | if (result < 0) |
1191 | goto out; | 1168 | goto out; |
1192 | 1169 | ||
@@ -1194,9 +1171,10 @@ static ssize_t bin_dn_node_address(struct file *file, | |||
1194 | 1171 | ||
1195 | /* Convert the decnet address to binary */ | 1172 | /* Convert the decnet address to binary */ |
1196 | result = -EIO; | 1173 | result = -EIO; |
1197 | nodep = strchr(buf, '.') + 1; | 1174 | nodep = strchr(buf, '.'); |
1198 | if (!nodep) | 1175 | if (!nodep) |
1199 | goto out; | 1176 | goto out; |
1177 | ++nodep; | ||
1200 | 1178 | ||
1201 | area = simple_strtoul(buf, NULL, 10); | 1179 | area = simple_strtoul(buf, NULL, 10); |
1202 | node = simple_strtoul(nodep, NULL, 10); | 1180 | node = simple_strtoul(nodep, NULL, 10); |
@@ -1215,7 +1193,6 @@ static ssize_t bin_dn_node_address(struct file *file, | |||
1215 | } | 1193 | } |
1216 | 1194 | ||
1217 | if (newval && newlen) { | 1195 | if (newval && newlen) { |
1218 | loff_t pos = 0; | ||
1219 | __le16 dnaddr; | 1196 | __le16 dnaddr; |
1220 | char buf[15]; | 1197 | char buf[15]; |
1221 | int len; | 1198 | int len; |
@@ -1232,9 +1209,7 @@ static ssize_t bin_dn_node_address(struct file *file, | |||
1232 | le16_to_cpu(dnaddr) >> 10, | 1209 | le16_to_cpu(dnaddr) >> 10, |
1233 | le16_to_cpu(dnaddr) & 0x3ff); | 1210 | le16_to_cpu(dnaddr) & 0x3ff); |
1234 | 1211 | ||
1235 | set_fs(KERNEL_DS); | 1212 | result = kernel_write(file, buf, len, 0); |
1236 | result = vfs_write(file, buf, len, &pos); | ||
1237 | set_fs(old_fs); | ||
1238 | if (result < 0) | 1213 | if (result < 0) |
1239 | goto out; | 1214 | goto out; |
1240 | } | 1215 | } |
diff --git a/kernel/time.c b/kernel/time.c index c2a27dd93142..f8342a41efa6 100644 --- a/kernel/time.c +++ b/kernel/time.c | |||
@@ -240,7 +240,7 @@ EXPORT_SYMBOL(current_fs_time); | |||
240 | * Avoid unnecessary multiplications/divisions in the | 240 | * Avoid unnecessary multiplications/divisions in the |
241 | * two most common HZ cases: | 241 | * two most common HZ cases: |
242 | */ | 242 | */ |
243 | inline unsigned int jiffies_to_msecs(const unsigned long j) | 243 | unsigned int jiffies_to_msecs(const unsigned long j) |
244 | { | 244 | { |
245 | #if HZ <= MSEC_PER_SEC && !(MSEC_PER_SEC % HZ) | 245 | #if HZ <= MSEC_PER_SEC && !(MSEC_PER_SEC % HZ) |
246 | return (MSEC_PER_SEC / HZ) * j; | 246 | return (MSEC_PER_SEC / HZ) * j; |
@@ -256,7 +256,7 @@ inline unsigned int jiffies_to_msecs(const unsigned long j) | |||
256 | } | 256 | } |
257 | EXPORT_SYMBOL(jiffies_to_msecs); | 257 | EXPORT_SYMBOL(jiffies_to_msecs); |
258 | 258 | ||
259 | inline unsigned int jiffies_to_usecs(const unsigned long j) | 259 | unsigned int jiffies_to_usecs(const unsigned long j) |
260 | { | 260 | { |
261 | #if HZ <= USEC_PER_SEC && !(USEC_PER_SEC % HZ) | 261 | #if HZ <= USEC_PER_SEC && !(USEC_PER_SEC % HZ) |
262 | return (USEC_PER_SEC / HZ) * j; | 262 | return (USEC_PER_SEC / HZ) * j; |
diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index 30b6de0d977c..c6d6400ee137 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c | |||
@@ -339,6 +339,7 @@ void clockevents_config_and_register(struct clock_event_device *dev, | |||
339 | clockevents_config(dev, freq); | 339 | clockevents_config(dev, freq); |
340 | clockevents_register_device(dev); | 340 | clockevents_register_device(dev); |
341 | } | 341 | } |
342 | EXPORT_SYMBOL_GPL(clockevents_config_and_register); | ||
342 | 343 | ||
343 | /** | 344 | /** |
344 | * clockevents_update_freq - Update frequency and reprogram a clock event device. | 345 | * clockevents_update_freq - Update frequency and reprogram a clock event device. |
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index b10a42bb0165..072bb066bb7d 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c | |||
@@ -23,7 +23,7 @@ | |||
23 | * NTP timekeeping variables: | 23 | * NTP timekeeping variables: |
24 | */ | 24 | */ |
25 | 25 | ||
26 | DEFINE_SPINLOCK(ntp_lock); | 26 | DEFINE_RAW_SPINLOCK(ntp_lock); |
27 | 27 | ||
28 | 28 | ||
29 | /* USER_HZ period (usecs): */ | 29 | /* USER_HZ period (usecs): */ |
@@ -348,7 +348,7 @@ void ntp_clear(void) | |||
348 | { | 348 | { |
349 | unsigned long flags; | 349 | unsigned long flags; |
350 | 350 | ||
351 | spin_lock_irqsave(&ntp_lock, flags); | 351 | raw_spin_lock_irqsave(&ntp_lock, flags); |
352 | 352 | ||
353 | time_adjust = 0; /* stop active adjtime() */ | 353 | time_adjust = 0; /* stop active adjtime() */ |
354 | time_status |= STA_UNSYNC; | 354 | time_status |= STA_UNSYNC; |
@@ -362,7 +362,7 @@ void ntp_clear(void) | |||
362 | 362 | ||
363 | /* Clear PPS state variables */ | 363 | /* Clear PPS state variables */ |
364 | pps_clear(); | 364 | pps_clear(); |
365 | spin_unlock_irqrestore(&ntp_lock, flags); | 365 | raw_spin_unlock_irqrestore(&ntp_lock, flags); |
366 | 366 | ||
367 | } | 367 | } |
368 | 368 | ||
@@ -372,9 +372,9 @@ u64 ntp_tick_length(void) | |||
372 | unsigned long flags; | 372 | unsigned long flags; |
373 | s64 ret; | 373 | s64 ret; |
374 | 374 | ||
375 | spin_lock_irqsave(&ntp_lock, flags); | 375 | raw_spin_lock_irqsave(&ntp_lock, flags); |
376 | ret = tick_length; | 376 | ret = tick_length; |
377 | spin_unlock_irqrestore(&ntp_lock, flags); | 377 | raw_spin_unlock_irqrestore(&ntp_lock, flags); |
378 | return ret; | 378 | return ret; |
379 | } | 379 | } |
380 | 380 | ||
@@ -395,7 +395,7 @@ int second_overflow(unsigned long secs) | |||
395 | int leap = 0; | 395 | int leap = 0; |
396 | unsigned long flags; | 396 | unsigned long flags; |
397 | 397 | ||
398 | spin_lock_irqsave(&ntp_lock, flags); | 398 | raw_spin_lock_irqsave(&ntp_lock, flags); |
399 | 399 | ||
400 | /* | 400 | /* |
401 | * Leap second processing. If in leap-insert state at the end of the | 401 | * Leap second processing. If in leap-insert state at the end of the |
@@ -479,7 +479,7 @@ int second_overflow(unsigned long secs) | |||
479 | time_adjust = 0; | 479 | time_adjust = 0; |
480 | 480 | ||
481 | out: | 481 | out: |
482 | spin_unlock_irqrestore(&ntp_lock, flags); | 482 | raw_spin_unlock_irqrestore(&ntp_lock, flags); |
483 | 483 | ||
484 | return leap; | 484 | return leap; |
485 | } | 485 | } |
@@ -672,7 +672,7 @@ int do_adjtimex(struct timex *txc) | |||
672 | 672 | ||
673 | getnstimeofday(&ts); | 673 | getnstimeofday(&ts); |
674 | 674 | ||
675 | spin_lock_irq(&ntp_lock); | 675 | raw_spin_lock_irq(&ntp_lock); |
676 | 676 | ||
677 | if (txc->modes & ADJ_ADJTIME) { | 677 | if (txc->modes & ADJ_ADJTIME) { |
678 | long save_adjust = time_adjust; | 678 | long save_adjust = time_adjust; |
@@ -714,7 +714,7 @@ int do_adjtimex(struct timex *txc) | |||
714 | /* fill PPS status fields */ | 714 | /* fill PPS status fields */ |
715 | pps_fill_timex(txc); | 715 | pps_fill_timex(txc); |
716 | 716 | ||
717 | spin_unlock_irq(&ntp_lock); | 717 | raw_spin_unlock_irq(&ntp_lock); |
718 | 718 | ||
719 | txc->time.tv_sec = ts.tv_sec; | 719 | txc->time.tv_sec = ts.tv_sec; |
720 | txc->time.tv_usec = ts.tv_nsec; | 720 | txc->time.tv_usec = ts.tv_nsec; |
@@ -912,7 +912,7 @@ void hardpps(const struct timespec *phase_ts, const struct timespec *raw_ts) | |||
912 | 912 | ||
913 | pts_norm = pps_normalize_ts(*phase_ts); | 913 | pts_norm = pps_normalize_ts(*phase_ts); |
914 | 914 | ||
915 | spin_lock_irqsave(&ntp_lock, flags); | 915 | raw_spin_lock_irqsave(&ntp_lock, flags); |
916 | 916 | ||
917 | /* clear the error bits, they will be set again if needed */ | 917 | /* clear the error bits, they will be set again if needed */ |
918 | time_status &= ~(STA_PPSJITTER | STA_PPSWANDER | STA_PPSERROR); | 918 | time_status &= ~(STA_PPSJITTER | STA_PPSWANDER | STA_PPSERROR); |
@@ -925,7 +925,7 @@ void hardpps(const struct timespec *phase_ts, const struct timespec *raw_ts) | |||
925 | * just start the frequency interval */ | 925 | * just start the frequency interval */ |
926 | if (unlikely(pps_fbase.tv_sec == 0)) { | 926 | if (unlikely(pps_fbase.tv_sec == 0)) { |
927 | pps_fbase = *raw_ts; | 927 | pps_fbase = *raw_ts; |
928 | spin_unlock_irqrestore(&ntp_lock, flags); | 928 | raw_spin_unlock_irqrestore(&ntp_lock, flags); |
929 | return; | 929 | return; |
930 | } | 930 | } |
931 | 931 | ||
@@ -940,7 +940,7 @@ void hardpps(const struct timespec *phase_ts, const struct timespec *raw_ts) | |||
940 | time_status |= STA_PPSJITTER; | 940 | time_status |= STA_PPSJITTER; |
941 | /* restart the frequency calibration interval */ | 941 | /* restart the frequency calibration interval */ |
942 | pps_fbase = *raw_ts; | 942 | pps_fbase = *raw_ts; |
943 | spin_unlock_irqrestore(&ntp_lock, flags); | 943 | raw_spin_unlock_irqrestore(&ntp_lock, flags); |
944 | pr_err("hardpps: PPSJITTER: bad pulse\n"); | 944 | pr_err("hardpps: PPSJITTER: bad pulse\n"); |
945 | return; | 945 | return; |
946 | } | 946 | } |
@@ -957,7 +957,7 @@ void hardpps(const struct timespec *phase_ts, const struct timespec *raw_ts) | |||
957 | 957 | ||
958 | hardpps_update_phase(pts_norm.nsec); | 958 | hardpps_update_phase(pts_norm.nsec); |
959 | 959 | ||
960 | spin_unlock_irqrestore(&ntp_lock, flags); | 960 | raw_spin_unlock_irqrestore(&ntp_lock, flags); |
961 | } | 961 | } |
962 | EXPORT_SYMBOL(hardpps); | 962 | EXPORT_SYMBOL(hardpps); |
963 | 963 | ||
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 314b9ee07edf..a19a39952c1b 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c | |||
@@ -554,6 +554,7 @@ void tick_nohz_idle_enter(void) | |||
554 | 554 | ||
555 | local_irq_enable(); | 555 | local_irq_enable(); |
556 | } | 556 | } |
557 | EXPORT_SYMBOL_GPL(tick_nohz_idle_enter); | ||
557 | 558 | ||
558 | /** | 559 | /** |
559 | * tick_nohz_irq_exit - update next tick event from interrupt exit | 560 | * tick_nohz_irq_exit - update next tick event from interrupt exit |
@@ -685,6 +686,7 @@ void tick_nohz_idle_exit(void) | |||
685 | 686 | ||
686 | local_irq_enable(); | 687 | local_irq_enable(); |
687 | } | 688 | } |
689 | EXPORT_SYMBOL_GPL(tick_nohz_idle_exit); | ||
688 | 690 | ||
689 | static int tick_nohz_reprogram(struct tick_sched *ts, ktime_t now) | 691 | static int tick_nohz_reprogram(struct tick_sched *ts, ktime_t now) |
690 | { | 692 | { |
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 1e35515a875e..9a0bc98fbe1d 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c | |||
@@ -138,6 +138,20 @@ static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock) | |||
138 | } | 138 | } |
139 | 139 | ||
140 | /* Timekeeper helper functions. */ | 140 | /* Timekeeper helper functions. */ |
141 | |||
142 | #ifdef CONFIG_ARCH_USES_GETTIMEOFFSET | ||
143 | u32 (*arch_gettimeoffset)(void); | ||
144 | |||
145 | u32 get_arch_timeoffset(void) | ||
146 | { | ||
147 | if (likely(arch_gettimeoffset)) | ||
148 | return arch_gettimeoffset(); | ||
149 | return 0; | ||
150 | } | ||
151 | #else | ||
152 | static inline u32 get_arch_timeoffset(void) { return 0; } | ||
153 | #endif | ||
154 | |||
141 | static inline s64 timekeeping_get_ns(struct timekeeper *tk) | 155 | static inline s64 timekeeping_get_ns(struct timekeeper *tk) |
142 | { | 156 | { |
143 | cycle_t cycle_now, cycle_delta; | 157 | cycle_t cycle_now, cycle_delta; |
@@ -154,8 +168,8 @@ static inline s64 timekeeping_get_ns(struct timekeeper *tk) | |||
154 | nsec = cycle_delta * tk->mult + tk->xtime_nsec; | 168 | nsec = cycle_delta * tk->mult + tk->xtime_nsec; |
155 | nsec >>= tk->shift; | 169 | nsec >>= tk->shift; |
156 | 170 | ||
157 | /* If arch requires, add in gettimeoffset() */ | 171 | /* If arch requires, add in get_arch_timeoffset() */ |
158 | return nsec + arch_gettimeoffset(); | 172 | return nsec + get_arch_timeoffset(); |
159 | } | 173 | } |
160 | 174 | ||
161 | static inline s64 timekeeping_get_ns_raw(struct timekeeper *tk) | 175 | static inline s64 timekeeping_get_ns_raw(struct timekeeper *tk) |
@@ -174,8 +188,8 @@ static inline s64 timekeeping_get_ns_raw(struct timekeeper *tk) | |||
174 | /* convert delta to nanoseconds. */ | 188 | /* convert delta to nanoseconds. */ |
175 | nsec = clocksource_cyc2ns(cycle_delta, clock->mult, clock->shift); | 189 | nsec = clocksource_cyc2ns(cycle_delta, clock->mult, clock->shift); |
176 | 190 | ||
177 | /* If arch requires, add in gettimeoffset() */ | 191 | /* If arch requires, add in get_arch_timeoffset() */ |
178 | return nsec + arch_gettimeoffset(); | 192 | return nsec + get_arch_timeoffset(); |
179 | } | 193 | } |
180 | 194 | ||
181 | static RAW_NOTIFIER_HEAD(pvclock_gtod_chain); | 195 | static RAW_NOTIFIER_HEAD(pvclock_gtod_chain); |
@@ -257,8 +271,8 @@ static void timekeeping_forward_now(struct timekeeper *tk) | |||
257 | 271 | ||
258 | tk->xtime_nsec += cycle_delta * tk->mult; | 272 | tk->xtime_nsec += cycle_delta * tk->mult; |
259 | 273 | ||
260 | /* If arch requires, add in gettimeoffset() */ | 274 | /* If arch requires, add in get_arch_timeoffset() */ |
261 | tk->xtime_nsec += (u64)arch_gettimeoffset() << tk->shift; | 275 | tk->xtime_nsec += (u64)get_arch_timeoffset() << tk->shift; |
262 | 276 | ||
263 | tk_normalize_xtime(tk); | 277 | tk_normalize_xtime(tk); |
264 | 278 | ||
diff --git a/kernel/timeconst.bc b/kernel/timeconst.bc new file mode 100644 index 000000000000..511bdf2cafda --- /dev/null +++ b/kernel/timeconst.bc | |||
@@ -0,0 +1,108 @@ | |||
1 | scale=0 | ||
2 | |||
3 | define gcd(a,b) { | ||
4 | auto t; | ||
5 | while (b) { | ||
6 | t = b; | ||
7 | b = a % b; | ||
8 | a = t; | ||
9 | } | ||
10 | return a; | ||
11 | } | ||
12 | |||
13 | /* Division by reciprocal multiplication. */ | ||
14 | define fmul(b,n,d) { | ||
15 | return (2^b*n+d-1)/d; | ||
16 | } | ||
17 | |||
18 | /* Adjustment factor when a ceiling value is used. Use as: | ||
19 | (imul * n) + (fmulxx * n + fadjxx) >> xx) */ | ||
20 | define fadj(b,n,d) { | ||
21 | auto v; | ||
22 | d = d/gcd(n,d); | ||
23 | v = 2^b*(d-1)/d; | ||
24 | return v; | ||
25 | } | ||
26 | |||
27 | /* Compute the appropriate mul/adj values as well as a shift count, | ||
28 | which brings the mul value into the range 2^b-1 <= x < 2^b. Such | ||
29 | a shift value will be correct in the signed integer range and off | ||
30 | by at most one in the upper half of the unsigned range. */ | ||
31 | define fmuls(b,n,d) { | ||
32 | auto s, m; | ||
33 | for (s = 0; 1; s++) { | ||
34 | m = fmul(s,n,d); | ||
35 | if (m >= 2^(b-1)) | ||
36 | return s; | ||
37 | } | ||
38 | return 0; | ||
39 | } | ||
40 | |||
41 | define timeconst(hz) { | ||
42 | print "/* Automatically generated by kernel/timeconst.bc */\n" | ||
43 | print "/* Time conversion constants for HZ == ", hz, " */\n" | ||
44 | print "\n" | ||
45 | |||
46 | print "#ifndef KERNEL_TIMECONST_H\n" | ||
47 | print "#define KERNEL_TIMECONST_H\n\n" | ||
48 | |||
49 | print "#include <linux/param.h>\n" | ||
50 | print "#include <linux/types.h>\n\n" | ||
51 | |||
52 | print "#if HZ != ", hz, "\n" | ||
53 | print "#error \qkernel/timeconst.h has the wrong HZ value!\q\n" | ||
54 | print "#endif\n\n" | ||
55 | |||
56 | if (hz < 2) { | ||
57 | print "#error Totally bogus HZ value!\n" | ||
58 | } else { | ||
59 | s=fmuls(32,1000,hz) | ||
60 | obase=16 | ||
61 | print "#define HZ_TO_MSEC_MUL32\tU64_C(0x", fmul(s,1000,hz), ")\n" | ||
62 | print "#define HZ_TO_MSEC_ADJ32\tU64_C(0x", fadj(s,1000,hz), ")\n" | ||
63 | obase=10 | ||
64 | print "#define HZ_TO_MSEC_SHR32\t", s, "\n" | ||
65 | |||
66 | s=fmuls(32,hz,1000) | ||
67 | obase=16 | ||
68 | print "#define MSEC_TO_HZ_MUL32\tU64_C(0x", fmul(s,hz,1000), ")\n" | ||
69 | print "#define MSEC_TO_HZ_ADJ32\tU64_C(0x", fadj(s,hz,1000), ")\n" | ||
70 | obase=10 | ||
71 | print "#define MSEC_TO_HZ_SHR32\t", s, "\n" | ||
72 | |||
73 | obase=10 | ||
74 | cd=gcd(hz,1000) | ||
75 | print "#define HZ_TO_MSEC_NUM\t\t", 1000/cd, "\n" | ||
76 | print "#define HZ_TO_MSEC_DEN\t\t", hz/cd, "\n" | ||
77 | print "#define MSEC_TO_HZ_NUM\t\t", hz/cd, "\n" | ||
78 | print "#define MSEC_TO_HZ_DEN\t\t", 1000/cd, "\n" | ||
79 | print "\n" | ||
80 | |||
81 | s=fmuls(32,1000000,hz) | ||
82 | obase=16 | ||
83 | print "#define HZ_TO_USEC_MUL32\tU64_C(0x", fmul(s,1000000,hz), ")\n" | ||
84 | print "#define HZ_TO_USEC_ADJ32\tU64_C(0x", fadj(s,1000000,hz), ")\n" | ||
85 | obase=10 | ||
86 | print "#define HZ_TO_USEC_SHR32\t", s, "\n" | ||
87 | |||
88 | s=fmuls(32,hz,1000000) | ||
89 | obase=16 | ||
90 | print "#define USEC_TO_HZ_MUL32\tU64_C(0x", fmul(s,hz,1000000), ")\n" | ||
91 | print "#define USEC_TO_HZ_ADJ32\tU64_C(0x", fadj(s,hz,1000000), ")\n" | ||
92 | obase=10 | ||
93 | print "#define USEC_TO_HZ_SHR32\t", s, "\n" | ||
94 | |||
95 | obase=10 | ||
96 | cd=gcd(hz,1000000) | ||
97 | print "#define HZ_TO_USEC_NUM\t\t", 1000000/cd, "\n" | ||
98 | print "#define HZ_TO_USEC_DEN\t\t", hz/cd, "\n" | ||
99 | print "#define USEC_TO_HZ_NUM\t\t", hz/cd, "\n" | ||
100 | print "#define USEC_TO_HZ_DEN\t\t", 1000000/cd, "\n" | ||
101 | print "\n" | ||
102 | |||
103 | print "#endif /* KERNEL_TIMECONST_H */\n" | ||
104 | } | ||
105 | halt | ||
106 | } | ||
107 | |||
108 | timeconst(hz) | ||
diff --git a/kernel/timeconst.pl b/kernel/timeconst.pl deleted file mode 100644 index 3f42652a6a37..000000000000 --- a/kernel/timeconst.pl +++ /dev/null | |||
@@ -1,376 +0,0 @@ | |||
1 | #!/usr/bin/perl | ||
2 | # ----------------------------------------------------------------------- | ||
3 | # | ||
4 | # Copyright 2007-2008 rPath, Inc. - All Rights Reserved | ||
5 | # | ||
6 | # This file is part of the Linux kernel, and is made available under | ||
7 | # the terms of the GNU General Public License version 2 or (at your | ||
8 | # option) any later version; incorporated herein by reference. | ||
9 | # | ||
10 | # ----------------------------------------------------------------------- | ||
11 | # | ||
12 | |||
13 | # | ||
14 | # Usage: timeconst.pl HZ > timeconst.h | ||
15 | # | ||
16 | |||
17 | # Precomputed values for systems without Math::BigInt | ||
18 | # Generated by: | ||
19 | # timeconst.pl --can 24 32 48 64 100 122 128 200 250 256 300 512 1000 1024 1200 | ||
20 | %canned_values = ( | ||
21 | 24 => [ | ||
22 | '0xa6aaaaab','0x2aaaaaa',26, | ||
23 | 125,3, | ||
24 | '0xc49ba5e4','0x1fbe76c8b4',37, | ||
25 | 3,125, | ||
26 | '0xa2c2aaab','0xaaaa',16, | ||
27 | 125000,3, | ||
28 | '0xc9539b89','0x7fffbce4217d',47, | ||
29 | 3,125000, | ||
30 | ], 32 => [ | ||
31 | '0xfa000000','0x6000000',27, | ||
32 | 125,4, | ||
33 | '0x83126e98','0xfdf3b645a',36, | ||
34 | 4,125, | ||
35 | '0xf4240000','0x0',17, | ||
36 | 31250,1, | ||
37 | '0x8637bd06','0x3fff79c842fa',46, | ||
38 | 1,31250, | ||
39 | ], 48 => [ | ||
40 | '0xa6aaaaab','0x6aaaaaa',27, | ||
41 | 125,6, | ||
42 | '0xc49ba5e4','0xfdf3b645a',36, | ||
43 | 6,125, | ||
44 | '0xa2c2aaab','0x15555',17, | ||
45 | 62500,3, | ||
46 | '0xc9539b89','0x3fffbce4217d',46, | ||
47 | 3,62500, | ||
48 | ], 64 => [ | ||
49 | '0xfa000000','0xe000000',28, | ||
50 | 125,8, | ||
51 | '0x83126e98','0x7ef9db22d',35, | ||
52 | 8,125, | ||
53 | '0xf4240000','0x0',18, | ||
54 | 15625,1, | ||
55 | '0x8637bd06','0x1fff79c842fa',45, | ||
56 | 1,15625, | ||
57 | ], 100 => [ | ||
58 | '0xa0000000','0x0',28, | ||
59 | 10,1, | ||
60 | '0xcccccccd','0x733333333',35, | ||
61 | 1,10, | ||
62 | '0x9c400000','0x0',18, | ||
63 | 10000,1, | ||
64 | '0xd1b71759','0x1fff2e48e8a7',45, | ||
65 | 1,10000, | ||
66 | ], 122 => [ | ||
67 | '0x8325c53f','0xfbcda3a',28, | ||
68 | 500,61, | ||
69 | '0xf9db22d1','0x7fbe76c8b',35, | ||
70 | 61,500, | ||
71 | '0x8012e2a0','0x3ef36',18, | ||
72 | 500000,61, | ||
73 | '0xffda4053','0x1ffffbce4217',45, | ||
74 | 61,500000, | ||
75 | ], 128 => [ | ||
76 | '0xfa000000','0x1e000000',29, | ||
77 | 125,16, | ||
78 | '0x83126e98','0x3f7ced916',34, | ||
79 | 16,125, | ||
80 | '0xf4240000','0x40000',19, | ||
81 | 15625,2, | ||
82 | '0x8637bd06','0xfffbce4217d',44, | ||
83 | 2,15625, | ||
84 | ], 200 => [ | ||
85 | '0xa0000000','0x0',29, | ||
86 | 5,1, | ||
87 | '0xcccccccd','0x333333333',34, | ||
88 | 1,5, | ||
89 | '0x9c400000','0x0',19, | ||
90 | 5000,1, | ||
91 | '0xd1b71759','0xfff2e48e8a7',44, | ||
92 | 1,5000, | ||
93 | ], 250 => [ | ||
94 | '0x80000000','0x0',29, | ||
95 | 4,1, | ||
96 | '0x80000000','0x180000000',33, | ||
97 | 1,4, | ||
98 | '0xfa000000','0x0',20, | ||
99 | 4000,1, | ||
100 | '0x83126e98','0x7ff7ced9168',43, | ||
101 | 1,4000, | ||
102 | ], 256 => [ | ||
103 | '0xfa000000','0x3e000000',30, | ||
104 | 125,32, | ||
105 | '0x83126e98','0x1fbe76c8b',33, | ||
106 | 32,125, | ||
107 | '0xf4240000','0xc0000',20, | ||
108 | 15625,4, | ||
109 | '0x8637bd06','0x7ffde7210be',43, | ||
110 | 4,15625, | ||
111 | ], 300 => [ | ||
112 | '0xd5555556','0x2aaaaaaa',30, | ||
113 | 10,3, | ||
114 | '0x9999999a','0x1cccccccc',33, | ||
115 | 3,10, | ||
116 | '0xd0555556','0xaaaaa',20, | ||
117 | 10000,3, | ||
118 | '0x9d495183','0x7ffcb923a29',43, | ||
119 | 3,10000, | ||
120 | ], 512 => [ | ||
121 | '0xfa000000','0x7e000000',31, | ||
122 | 125,64, | ||
123 | '0x83126e98','0xfdf3b645',32, | ||
124 | 64,125, | ||
125 | '0xf4240000','0x1c0000',21, | ||
126 | 15625,8, | ||
127 | '0x8637bd06','0x3ffef39085f',42, | ||
128 | 8,15625, | ||
129 | ], 1000 => [ | ||
130 | '0x80000000','0x0',31, | ||
131 | 1,1, | ||
132 | '0x80000000','0x0',31, | ||
133 | 1,1, | ||
134 | '0xfa000000','0x0',22, | ||
135 | 1000,1, | ||
136 | '0x83126e98','0x1ff7ced9168',41, | ||
137 | 1,1000, | ||
138 | ], 1024 => [ | ||
139 | '0xfa000000','0xfe000000',32, | ||
140 | 125,128, | ||
141 | '0x83126e98','0x7ef9db22',31, | ||
142 | 128,125, | ||
143 | '0xf4240000','0x3c0000',22, | ||
144 | 15625,16, | ||
145 | '0x8637bd06','0x1fff79c842f',41, | ||
146 | 16,15625, | ||
147 | ], 1200 => [ | ||
148 | '0xd5555556','0xd5555555',32, | ||
149 | 5,6, | ||
150 | '0x9999999a','0x66666666',31, | ||
151 | 6,5, | ||
152 | '0xd0555556','0x2aaaaa',22, | ||
153 | 2500,3, | ||
154 | '0x9d495183','0x1ffcb923a29',41, | ||
155 | 3,2500, | ||
156 | ] | ||
157 | ); | ||
158 | |||
159 | $has_bigint = eval 'use Math::BigInt qw(bgcd); 1;'; | ||
160 | |||
161 | sub bint($) | ||
162 | { | ||
163 | my($x) = @_; | ||
164 | return Math::BigInt->new($x); | ||
165 | } | ||
166 | |||
167 | # | ||
168 | # Constants for division by reciprocal multiplication. | ||
169 | # (bits, numerator, denominator) | ||
170 | # | ||
171 | sub fmul($$$) | ||
172 | { | ||
173 | my ($b,$n,$d) = @_; | ||
174 | |||
175 | $n = bint($n); | ||
176 | $d = bint($d); | ||
177 | |||
178 | return scalar (($n << $b)+$d-bint(1))/$d; | ||
179 | } | ||
180 | |||
181 | sub fadj($$$) | ||
182 | { | ||
183 | my($b,$n,$d) = @_; | ||
184 | |||
185 | $n = bint($n); | ||
186 | $d = bint($d); | ||
187 | |||
188 | $d = $d/bgcd($n, $d); | ||
189 | return scalar (($d-bint(1)) << $b)/$d; | ||
190 | } | ||
191 | |||
192 | sub fmuls($$$) { | ||
193 | my($b,$n,$d) = @_; | ||
194 | my($s,$m); | ||
195 | my($thres) = bint(1) << ($b-1); | ||
196 | |||
197 | $n = bint($n); | ||
198 | $d = bint($d); | ||
199 | |||
200 | for ($s = 0; 1; $s++) { | ||
201 | $m = fmul($s,$n,$d); | ||
202 | return $s if ($m >= $thres); | ||
203 | } | ||
204 | return 0; | ||
205 | } | ||
206 | |||
207 | # Generate a hex value if the result fits in 64 bits; | ||
208 | # otherwise skip. | ||
209 | sub bignum_hex($) { | ||
210 | my($x) = @_; | ||
211 | my $s = $x->as_hex(); | ||
212 | |||
213 | return (length($s) > 18) ? undef : $s; | ||
214 | } | ||
215 | |||
216 | # Provides mul, adj, and shr factors for a specific | ||
217 | # (bit, time, hz) combination | ||
218 | sub muladj($$$) { | ||
219 | my($b, $t, $hz) = @_; | ||
220 | my $s = fmuls($b, $t, $hz); | ||
221 | my $m = fmul($s, $t, $hz); | ||
222 | my $a = fadj($s, $t, $hz); | ||
223 | return (bignum_hex($m), bignum_hex($a), $s); | ||
224 | } | ||
225 | |||
226 | # Provides numerator, denominator values | ||
227 | sub numden($$) { | ||
228 | my($n, $d) = @_; | ||
229 | my $g = bgcd($n, $d); | ||
230 | return ($n/$g, $d/$g); | ||
231 | } | ||
232 | |||
233 | # All values for a specific (time, hz) combo | ||
234 | sub conversions($$) { | ||
235 | my ($t, $hz) = @_; | ||
236 | my @val = (); | ||
237 | |||
238 | # HZ_TO_xx | ||
239 | push(@val, muladj(32, $t, $hz)); | ||
240 | push(@val, numden($t, $hz)); | ||
241 | |||
242 | # xx_TO_HZ | ||
243 | push(@val, muladj(32, $hz, $t)); | ||
244 | push(@val, numden($hz, $t)); | ||
245 | |||
246 | return @val; | ||
247 | } | ||
248 | |||
249 | sub compute_values($) { | ||
250 | my($hz) = @_; | ||
251 | my @val = (); | ||
252 | my $s, $m, $a, $g; | ||
253 | |||
254 | if (!$has_bigint) { | ||
255 | die "$0: HZ == $hz not canned and ". | ||
256 | "Math::BigInt not available\n"; | ||
257 | } | ||
258 | |||
259 | # MSEC conversions | ||
260 | push(@val, conversions(1000, $hz)); | ||
261 | |||
262 | # USEC conversions | ||
263 | push(@val, conversions(1000000, $hz)); | ||
264 | |||
265 | return @val; | ||
266 | } | ||
267 | |||
268 | sub outputval($$) | ||
269 | { | ||
270 | my($name, $val) = @_; | ||
271 | my $csuf; | ||
272 | |||
273 | if (defined($val)) { | ||
274 | if ($name !~ /SHR/) { | ||
275 | $val = "U64_C($val)"; | ||
276 | } | ||
277 | printf "#define %-23s %s\n", $name.$csuf, $val.$csuf; | ||
278 | } | ||
279 | } | ||
280 | |||
281 | sub output($@) | ||
282 | { | ||
283 | my($hz, @val) = @_; | ||
284 | my $pfx, $bit, $suf, $s, $m, $a; | ||
285 | |||
286 | print "/* Automatically generated by kernel/timeconst.pl */\n"; | ||
287 | print "/* Conversion constants for HZ == $hz */\n"; | ||
288 | print "\n"; | ||
289 | print "#ifndef KERNEL_TIMECONST_H\n"; | ||
290 | print "#define KERNEL_TIMECONST_H\n"; | ||
291 | print "\n"; | ||
292 | |||
293 | print "#include <linux/param.h>\n"; | ||
294 | print "#include <linux/types.h>\n"; | ||
295 | |||
296 | print "\n"; | ||
297 | print "#if HZ != $hz\n"; | ||
298 | print "#error \"kernel/timeconst.h has the wrong HZ value!\"\n"; | ||
299 | print "#endif\n"; | ||
300 | print "\n"; | ||
301 | |||
302 | foreach $pfx ('HZ_TO_MSEC','MSEC_TO_HZ', | ||
303 | 'HZ_TO_USEC','USEC_TO_HZ') { | ||
304 | foreach $bit (32) { | ||
305 | foreach $suf ('MUL', 'ADJ', 'SHR') { | ||
306 | outputval("${pfx}_$suf$bit", shift(@val)); | ||
307 | } | ||
308 | } | ||
309 | foreach $suf ('NUM', 'DEN') { | ||
310 | outputval("${pfx}_$suf", shift(@val)); | ||
311 | } | ||
312 | } | ||
313 | |||
314 | print "\n"; | ||
315 | print "#endif /* KERNEL_TIMECONST_H */\n"; | ||
316 | } | ||
317 | |||
318 | # Pretty-print Perl values | ||
319 | sub perlvals(@) { | ||
320 | my $v; | ||
321 | my @l = (); | ||
322 | |||
323 | foreach $v (@_) { | ||
324 | if (!defined($v)) { | ||
325 | push(@l, 'undef'); | ||
326 | } elsif ($v =~ /^0x/) { | ||
327 | push(@l, "\'".$v."\'"); | ||
328 | } else { | ||
329 | push(@l, $v.''); | ||
330 | } | ||
331 | } | ||
332 | return join(',', @l); | ||
333 | } | ||
334 | |||
335 | ($hz) = @ARGV; | ||
336 | |||
337 | # Use this to generate the %canned_values structure | ||
338 | if ($hz eq '--can') { | ||
339 | shift(@ARGV); | ||
340 | @hzlist = sort {$a <=> $b} (@ARGV); | ||
341 | |||
342 | print "# Precomputed values for systems without Math::BigInt\n"; | ||
343 | print "# Generated by:\n"; | ||
344 | print "# timeconst.pl --can ", join(' ', @hzlist), "\n"; | ||
345 | print "\%canned_values = (\n"; | ||
346 | my $pf = "\t"; | ||
347 | foreach $hz (@hzlist) { | ||
348 | my @values = compute_values($hz); | ||
349 | print "$pf$hz => [\n"; | ||
350 | while (scalar(@values)) { | ||
351 | my $bit; | ||
352 | foreach $bit (32) { | ||
353 | my $m = shift(@values); | ||
354 | my $a = shift(@values); | ||
355 | my $s = shift(@values); | ||
356 | print "\t\t", perlvals($m,$a,$s), ",\n"; | ||
357 | } | ||
358 | my $n = shift(@values); | ||
359 | my $d = shift(@values); | ||
360 | print "\t\t", perlvals($n,$d), ",\n"; | ||
361 | } | ||
362 | print "\t]"; | ||
363 | $pf = ', '; | ||
364 | } | ||
365 | print "\n);\n"; | ||
366 | } else { | ||
367 | $hz += 0; # Force to number | ||
368 | if ($hz < 1) { | ||
369 | die "Usage: $0 HZ\n"; | ||
370 | } | ||
371 | |||
372 | $cv = $canned_values{$hz}; | ||
373 | @val = defined($cv) ? @$cv : compute_values($hz); | ||
374 | output($hz, @val); | ||
375 | } | ||
376 | exit 0; | ||
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index b516a8e19d51..fc382d6e2765 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig | |||
@@ -81,21 +81,6 @@ config EVENT_TRACING | |||
81 | select CONTEXT_SWITCH_TRACER | 81 | select CONTEXT_SWITCH_TRACER |
82 | bool | 82 | bool |
83 | 83 | ||
84 | config EVENT_POWER_TRACING_DEPRECATED | ||
85 | depends on EVENT_TRACING | ||
86 | bool "Deprecated power event trace API, to be removed" | ||
87 | default y | ||
88 | help | ||
89 | Provides old power event types: | ||
90 | C-state/idle accounting events: | ||
91 | power:power_start | ||
92 | power:power_end | ||
93 | and old cpufreq accounting event: | ||
94 | power:power_frequency | ||
95 | This is for userspace compatibility | ||
96 | and will vanish after 5 kernel iterations, | ||
97 | namely 3.1. | ||
98 | |||
99 | config CONTEXT_SWITCH_TRACER | 84 | config CONTEXT_SWITCH_TRACER |
100 | bool | 85 | bool |
101 | 86 | ||
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 71259e2b6b61..9e5b8c272eec 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c | |||
@@ -739,6 +739,12 @@ static void blk_add_trace_rq_complete(void *ignore, | |||
739 | struct request_queue *q, | 739 | struct request_queue *q, |
740 | struct request *rq) | 740 | struct request *rq) |
741 | { | 741 | { |
742 | struct blk_trace *bt = q->blk_trace; | ||
743 | |||
744 | /* if control ever passes through here, it's a request based driver */ | ||
745 | if (unlikely(bt && !bt->rq_based)) | ||
746 | bt->rq_based = true; | ||
747 | |||
742 | blk_add_trace_rq(q, rq, BLK_TA_COMPLETE); | 748 | blk_add_trace_rq(q, rq, BLK_TA_COMPLETE); |
743 | } | 749 | } |
744 | 750 | ||
@@ -774,15 +780,30 @@ static void blk_add_trace_bio_bounce(void *ignore, | |||
774 | blk_add_trace_bio(q, bio, BLK_TA_BOUNCE, 0); | 780 | blk_add_trace_bio(q, bio, BLK_TA_BOUNCE, 0); |
775 | } | 781 | } |
776 | 782 | ||
777 | static void blk_add_trace_bio_complete(void *ignore, | 783 | static void blk_add_trace_bio_complete(void *ignore, struct bio *bio, int error) |
778 | struct request_queue *q, struct bio *bio, | ||
779 | int error) | ||
780 | { | 784 | { |
785 | struct request_queue *q; | ||
786 | struct blk_trace *bt; | ||
787 | |||
788 | if (!bio->bi_bdev) | ||
789 | return; | ||
790 | |||
791 | q = bdev_get_queue(bio->bi_bdev); | ||
792 | bt = q->blk_trace; | ||
793 | |||
794 | /* | ||
795 | * Request based drivers will generate both rq and bio completions. | ||
796 | * Ignore bio ones. | ||
797 | */ | ||
798 | if (likely(!bt) || bt->rq_based) | ||
799 | return; | ||
800 | |||
781 | blk_add_trace_bio(q, bio, BLK_TA_COMPLETE, error); | 801 | blk_add_trace_bio(q, bio, BLK_TA_COMPLETE, error); |
782 | } | 802 | } |
783 | 803 | ||
784 | static void blk_add_trace_bio_backmerge(void *ignore, | 804 | static void blk_add_trace_bio_backmerge(void *ignore, |
785 | struct request_queue *q, | 805 | struct request_queue *q, |
806 | struct request *rq, | ||
786 | struct bio *bio) | 807 | struct bio *bio) |
787 | { | 808 | { |
788 | blk_add_trace_bio(q, bio, BLK_TA_BACKMERGE, 0); | 809 | blk_add_trace_bio(q, bio, BLK_TA_BACKMERGE, 0); |
@@ -790,6 +811,7 @@ static void blk_add_trace_bio_backmerge(void *ignore, | |||
790 | 811 | ||
791 | static void blk_add_trace_bio_frontmerge(void *ignore, | 812 | static void blk_add_trace_bio_frontmerge(void *ignore, |
792 | struct request_queue *q, | 813 | struct request_queue *q, |
814 | struct request *rq, | ||
793 | struct bio *bio) | 815 | struct bio *bio) |
794 | { | 816 | { |
795 | blk_add_trace_bio(q, bio, BLK_TA_FRONTMERGE, 0); | 817 | blk_add_trace_bio(q, bio, BLK_TA_FRONTMERGE, 0); |
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index e6effd0c40a9..6893d5a2bf08 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c | |||
@@ -762,7 +762,6 @@ ftrace_find_profiled_func(struct ftrace_profile_stat *stat, unsigned long ip) | |||
762 | { | 762 | { |
763 | struct ftrace_profile *rec; | 763 | struct ftrace_profile *rec; |
764 | struct hlist_head *hhd; | 764 | struct hlist_head *hhd; |
765 | struct hlist_node *n; | ||
766 | unsigned long key; | 765 | unsigned long key; |
767 | 766 | ||
768 | key = hash_long(ip, ftrace_profile_bits); | 767 | key = hash_long(ip, ftrace_profile_bits); |
@@ -771,7 +770,7 @@ ftrace_find_profiled_func(struct ftrace_profile_stat *stat, unsigned long ip) | |||
771 | if (hlist_empty(hhd)) | 770 | if (hlist_empty(hhd)) |
772 | return NULL; | 771 | return NULL; |
773 | 772 | ||
774 | hlist_for_each_entry_rcu(rec, n, hhd, node) { | 773 | hlist_for_each_entry_rcu(rec, hhd, node) { |
775 | if (rec->ip == ip) | 774 | if (rec->ip == ip) |
776 | return rec; | 775 | return rec; |
777 | } | 776 | } |
@@ -1133,7 +1132,6 @@ ftrace_lookup_ip(struct ftrace_hash *hash, unsigned long ip) | |||
1133 | unsigned long key; | 1132 | unsigned long key; |
1134 | struct ftrace_func_entry *entry; | 1133 | struct ftrace_func_entry *entry; |
1135 | struct hlist_head *hhd; | 1134 | struct hlist_head *hhd; |
1136 | struct hlist_node *n; | ||
1137 | 1135 | ||
1138 | if (ftrace_hash_empty(hash)) | 1136 | if (ftrace_hash_empty(hash)) |
1139 | return NULL; | 1137 | return NULL; |
@@ -1145,7 +1143,7 @@ ftrace_lookup_ip(struct ftrace_hash *hash, unsigned long ip) | |||
1145 | 1143 | ||
1146 | hhd = &hash->buckets[key]; | 1144 | hhd = &hash->buckets[key]; |
1147 | 1145 | ||
1148 | hlist_for_each_entry_rcu(entry, n, hhd, hlist) { | 1146 | hlist_for_each_entry_rcu(entry, hhd, hlist) { |
1149 | if (entry->ip == ip) | 1147 | if (entry->ip == ip) |
1150 | return entry; | 1148 | return entry; |
1151 | } | 1149 | } |
@@ -1202,7 +1200,7 @@ remove_hash_entry(struct ftrace_hash *hash, | |||
1202 | static void ftrace_hash_clear(struct ftrace_hash *hash) | 1200 | static void ftrace_hash_clear(struct ftrace_hash *hash) |
1203 | { | 1201 | { |
1204 | struct hlist_head *hhd; | 1202 | struct hlist_head *hhd; |
1205 | struct hlist_node *tp, *tn; | 1203 | struct hlist_node *tn; |
1206 | struct ftrace_func_entry *entry; | 1204 | struct ftrace_func_entry *entry; |
1207 | int size = 1 << hash->size_bits; | 1205 | int size = 1 << hash->size_bits; |
1208 | int i; | 1206 | int i; |
@@ -1212,7 +1210,7 @@ static void ftrace_hash_clear(struct ftrace_hash *hash) | |||
1212 | 1210 | ||
1213 | for (i = 0; i < size; i++) { | 1211 | for (i = 0; i < size; i++) { |
1214 | hhd = &hash->buckets[i]; | 1212 | hhd = &hash->buckets[i]; |
1215 | hlist_for_each_entry_safe(entry, tp, tn, hhd, hlist) | 1213 | hlist_for_each_entry_safe(entry, tn, hhd, hlist) |
1216 | free_hash_entry(hash, entry); | 1214 | free_hash_entry(hash, entry); |
1217 | } | 1215 | } |
1218 | FTRACE_WARN_ON(hash->count); | 1216 | FTRACE_WARN_ON(hash->count); |
@@ -1275,7 +1273,6 @@ alloc_and_copy_ftrace_hash(int size_bits, struct ftrace_hash *hash) | |||
1275 | { | 1273 | { |
1276 | struct ftrace_func_entry *entry; | 1274 | struct ftrace_func_entry *entry; |
1277 | struct ftrace_hash *new_hash; | 1275 | struct ftrace_hash *new_hash; |
1278 | struct hlist_node *tp; | ||
1279 | int size; | 1276 | int size; |
1280 | int ret; | 1277 | int ret; |
1281 | int i; | 1278 | int i; |
@@ -1290,7 +1287,7 @@ alloc_and_copy_ftrace_hash(int size_bits, struct ftrace_hash *hash) | |||
1290 | 1287 | ||
1291 | size = 1 << hash->size_bits; | 1288 | size = 1 << hash->size_bits; |
1292 | for (i = 0; i < size; i++) { | 1289 | for (i = 0; i < size; i++) { |
1293 | hlist_for_each_entry(entry, tp, &hash->buckets[i], hlist) { | 1290 | hlist_for_each_entry(entry, &hash->buckets[i], hlist) { |
1294 | ret = add_hash_entry(new_hash, entry->ip); | 1291 | ret = add_hash_entry(new_hash, entry->ip); |
1295 | if (ret < 0) | 1292 | if (ret < 0) |
1296 | goto free_hash; | 1293 | goto free_hash; |
@@ -1316,7 +1313,7 @@ ftrace_hash_move(struct ftrace_ops *ops, int enable, | |||
1316 | struct ftrace_hash **dst, struct ftrace_hash *src) | 1313 | struct ftrace_hash **dst, struct ftrace_hash *src) |
1317 | { | 1314 | { |
1318 | struct ftrace_func_entry *entry; | 1315 | struct ftrace_func_entry *entry; |
1319 | struct hlist_node *tp, *tn; | 1316 | struct hlist_node *tn; |
1320 | struct hlist_head *hhd; | 1317 | struct hlist_head *hhd; |
1321 | struct ftrace_hash *old_hash; | 1318 | struct ftrace_hash *old_hash; |
1322 | struct ftrace_hash *new_hash; | 1319 | struct ftrace_hash *new_hash; |
@@ -1362,7 +1359,7 @@ ftrace_hash_move(struct ftrace_ops *ops, int enable, | |||
1362 | size = 1 << src->size_bits; | 1359 | size = 1 << src->size_bits; |
1363 | for (i = 0; i < size; i++) { | 1360 | for (i = 0; i < size; i++) { |
1364 | hhd = &src->buckets[i]; | 1361 | hhd = &src->buckets[i]; |
1365 | hlist_for_each_entry_safe(entry, tp, tn, hhd, hlist) { | 1362 | hlist_for_each_entry_safe(entry, tn, hhd, hlist) { |
1366 | if (bits > 0) | 1363 | if (bits > 0) |
1367 | key = hash_long(entry->ip, bits); | 1364 | key = hash_long(entry->ip, bits); |
1368 | else | 1365 | else |
@@ -2901,7 +2898,6 @@ static void function_trace_probe_call(unsigned long ip, unsigned long parent_ip, | |||
2901 | { | 2898 | { |
2902 | struct ftrace_func_probe *entry; | 2899 | struct ftrace_func_probe *entry; |
2903 | struct hlist_head *hhd; | 2900 | struct hlist_head *hhd; |
2904 | struct hlist_node *n; | ||
2905 | unsigned long key; | 2901 | unsigned long key; |
2906 | 2902 | ||
2907 | key = hash_long(ip, FTRACE_HASH_BITS); | 2903 | key = hash_long(ip, FTRACE_HASH_BITS); |
@@ -2917,7 +2913,7 @@ static void function_trace_probe_call(unsigned long ip, unsigned long parent_ip, | |||
2917 | * on the hash. rcu_read_lock is too dangerous here. | 2913 | * on the hash. rcu_read_lock is too dangerous here. |
2918 | */ | 2914 | */ |
2919 | preempt_disable_notrace(); | 2915 | preempt_disable_notrace(); |
2920 | hlist_for_each_entry_rcu(entry, n, hhd, node) { | 2916 | hlist_for_each_entry_rcu(entry, hhd, node) { |
2921 | if (entry->ip == ip) | 2917 | if (entry->ip == ip) |
2922 | entry->ops->func(ip, parent_ip, &entry->data); | 2918 | entry->ops->func(ip, parent_ip, &entry->data); |
2923 | } | 2919 | } |
@@ -3068,7 +3064,7 @@ __unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops, | |||
3068 | void *data, int flags) | 3064 | void *data, int flags) |
3069 | { | 3065 | { |
3070 | struct ftrace_func_probe *entry; | 3066 | struct ftrace_func_probe *entry; |
3071 | struct hlist_node *n, *tmp; | 3067 | struct hlist_node *tmp; |
3072 | char str[KSYM_SYMBOL_LEN]; | 3068 | char str[KSYM_SYMBOL_LEN]; |
3073 | int type = MATCH_FULL; | 3069 | int type = MATCH_FULL; |
3074 | int i, len = 0; | 3070 | int i, len = 0; |
@@ -3091,7 +3087,7 @@ __unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops, | |||
3091 | for (i = 0; i < FTRACE_FUNC_HASHSIZE; i++) { | 3087 | for (i = 0; i < FTRACE_FUNC_HASHSIZE; i++) { |
3092 | struct hlist_head *hhd = &ftrace_func_hash[i]; | 3088 | struct hlist_head *hhd = &ftrace_func_hash[i]; |
3093 | 3089 | ||
3094 | hlist_for_each_entry_safe(entry, n, tmp, hhd, node) { | 3090 | hlist_for_each_entry_safe(entry, tmp, hhd, node) { |
3095 | 3091 | ||
3096 | /* break up if statements for readability */ | 3092 | /* break up if statements for readability */ |
3097 | if ((flags & PROBE_TEST_FUNC) && entry->ops != ops) | 3093 | if ((flags & PROBE_TEST_FUNC) && entry->ops != ops) |
diff --git a/kernel/trace/power-traces.c b/kernel/trace/power-traces.c index f55fcf61b223..1c71382b283d 100644 --- a/kernel/trace/power-traces.c +++ b/kernel/trace/power-traces.c | |||
@@ -13,8 +13,5 @@ | |||
13 | #define CREATE_TRACE_POINTS | 13 | #define CREATE_TRACE_POINTS |
14 | #include <trace/events/power.h> | 14 | #include <trace/events/power.h> |
15 | 15 | ||
16 | #ifdef EVENT_POWER_TRACING_DEPRECATED | ||
17 | EXPORT_TRACEPOINT_SYMBOL_GPL(power_start); | ||
18 | #endif | ||
19 | EXPORT_TRACEPOINT_SYMBOL_GPL(cpu_idle); | 16 | EXPORT_TRACEPOINT_SYMBOL_GPL(cpu_idle); |
20 | 17 | ||
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 7244acde77b0..6989df2ba194 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c | |||
@@ -178,7 +178,7 @@ void tracing_off_permanent(void) | |||
178 | #define RB_MAX_SMALL_DATA (RB_ALIGNMENT * RINGBUF_TYPE_DATA_TYPE_LEN_MAX) | 178 | #define RB_MAX_SMALL_DATA (RB_ALIGNMENT * RINGBUF_TYPE_DATA_TYPE_LEN_MAX) |
179 | #define RB_EVNT_MIN_SIZE 8U /* two 32bit words */ | 179 | #define RB_EVNT_MIN_SIZE 8U /* two 32bit words */ |
180 | 180 | ||
181 | #if !defined(CONFIG_64BIT) || defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) | 181 | #ifndef CONFIG_HAVE_64BIT_ALIGNED_ACCESS |
182 | # define RB_FORCE_8BYTE_ALIGNMENT 0 | 182 | # define RB_FORCE_8BYTE_ALIGNMENT 0 |
183 | # define RB_ARCH_ALIGNMENT RB_ALIGNMENT | 183 | # define RB_ARCH_ALIGNMENT RB_ALIGNMENT |
184 | #else | 184 | #else |
@@ -186,6 +186,8 @@ void tracing_off_permanent(void) | |||
186 | # define RB_ARCH_ALIGNMENT 8U | 186 | # define RB_ARCH_ALIGNMENT 8U |
187 | #endif | 187 | #endif |
188 | 188 | ||
189 | #define RB_ALIGN_DATA __aligned(RB_ARCH_ALIGNMENT) | ||
190 | |||
189 | /* define RINGBUF_TYPE_DATA for 'case RINGBUF_TYPE_DATA:' */ | 191 | /* define RINGBUF_TYPE_DATA for 'case RINGBUF_TYPE_DATA:' */ |
190 | #define RINGBUF_TYPE_DATA 0 ... RINGBUF_TYPE_DATA_TYPE_LEN_MAX | 192 | #define RINGBUF_TYPE_DATA 0 ... RINGBUF_TYPE_DATA_TYPE_LEN_MAX |
191 | 193 | ||
@@ -334,7 +336,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_event_data); | |||
334 | struct buffer_data_page { | 336 | struct buffer_data_page { |
335 | u64 time_stamp; /* page time stamp */ | 337 | u64 time_stamp; /* page time stamp */ |
336 | local_t commit; /* write committed index */ | 338 | local_t commit; /* write committed index */ |
337 | unsigned char data[]; /* data of buffer page */ | 339 | unsigned char data[] RB_ALIGN_DATA; /* data of buffer page */ |
338 | }; | 340 | }; |
339 | 341 | ||
340 | /* | 342 | /* |
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index 194d79602dc7..697e88d13907 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c | |||
@@ -739,12 +739,11 @@ static int task_state_char(unsigned long state) | |||
739 | struct trace_event *ftrace_find_event(int type) | 739 | struct trace_event *ftrace_find_event(int type) |
740 | { | 740 | { |
741 | struct trace_event *event; | 741 | struct trace_event *event; |
742 | struct hlist_node *n; | ||
743 | unsigned key; | 742 | unsigned key; |
744 | 743 | ||
745 | key = type & (EVENT_HASHSIZE - 1); | 744 | key = type & (EVENT_HASHSIZE - 1); |
746 | 745 | ||
747 | hlist_for_each_entry(event, n, &event_hash[key], node) { | 746 | hlist_for_each_entry(event, &event_hash[key], node) { |
748 | if (event->type == type) | 747 | if (event->type == type) |
749 | return event; | 748 | return event; |
750 | } | 749 | } |
diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c index d96ba22dabfa..0c05a4592047 100644 --- a/kernel/tracepoint.c +++ b/kernel/tracepoint.c | |||
@@ -192,12 +192,11 @@ tracepoint_entry_remove_probe(struct tracepoint_entry *entry, | |||
192 | static struct tracepoint_entry *get_tracepoint(const char *name) | 192 | static struct tracepoint_entry *get_tracepoint(const char *name) |
193 | { | 193 | { |
194 | struct hlist_head *head; | 194 | struct hlist_head *head; |
195 | struct hlist_node *node; | ||
196 | struct tracepoint_entry *e; | 195 | struct tracepoint_entry *e; |
197 | u32 hash = jhash(name, strlen(name), 0); | 196 | u32 hash = jhash(name, strlen(name), 0); |
198 | 197 | ||
199 | head = &tracepoint_table[hash & (TRACEPOINT_TABLE_SIZE - 1)]; | 198 | head = &tracepoint_table[hash & (TRACEPOINT_TABLE_SIZE - 1)]; |
200 | hlist_for_each_entry(e, node, head, hlist) { | 199 | hlist_for_each_entry(e, head, hlist) { |
201 | if (!strcmp(name, e->name)) | 200 | if (!strcmp(name, e->name)) |
202 | return e; | 201 | return e; |
203 | } | 202 | } |
@@ -211,13 +210,12 @@ static struct tracepoint_entry *get_tracepoint(const char *name) | |||
211 | static struct tracepoint_entry *add_tracepoint(const char *name) | 210 | static struct tracepoint_entry *add_tracepoint(const char *name) |
212 | { | 211 | { |
213 | struct hlist_head *head; | 212 | struct hlist_head *head; |
214 | struct hlist_node *node; | ||
215 | struct tracepoint_entry *e; | 213 | struct tracepoint_entry *e; |
216 | size_t name_len = strlen(name) + 1; | 214 | size_t name_len = strlen(name) + 1; |
217 | u32 hash = jhash(name, name_len-1, 0); | 215 | u32 hash = jhash(name, name_len-1, 0); |
218 | 216 | ||
219 | head = &tracepoint_table[hash & (TRACEPOINT_TABLE_SIZE - 1)]; | 217 | head = &tracepoint_table[hash & (TRACEPOINT_TABLE_SIZE - 1)]; |
220 | hlist_for_each_entry(e, node, head, hlist) { | 218 | hlist_for_each_entry(e, head, hlist) { |
221 | if (!strcmp(name, e->name)) { | 219 | if (!strcmp(name, e->name)) { |
222 | printk(KERN_NOTICE | 220 | printk(KERN_NOTICE |
223 | "tracepoint %s busy\n", name); | 221 | "tracepoint %s busy\n", name); |
diff --git a/kernel/user-return-notifier.c b/kernel/user-return-notifier.c index 1744bb80f1fb..394f70b17162 100644 --- a/kernel/user-return-notifier.c +++ b/kernel/user-return-notifier.c | |||
@@ -34,11 +34,11 @@ EXPORT_SYMBOL_GPL(user_return_notifier_unregister); | |||
34 | void fire_user_return_notifiers(void) | 34 | void fire_user_return_notifiers(void) |
35 | { | 35 | { |
36 | struct user_return_notifier *urn; | 36 | struct user_return_notifier *urn; |
37 | struct hlist_node *tmp1, *tmp2; | 37 | struct hlist_node *tmp2; |
38 | struct hlist_head *head; | 38 | struct hlist_head *head; |
39 | 39 | ||
40 | head = &get_cpu_var(return_notifier_list); | 40 | head = &get_cpu_var(return_notifier_list); |
41 | hlist_for_each_entry_safe(urn, tmp1, tmp2, head, link) | 41 | hlist_for_each_entry_safe(urn, tmp2, head, link) |
42 | urn->on_user_return(urn); | 42 | urn->on_user_return(urn); |
43 | put_cpu_var(return_notifier_list); | 43 | put_cpu_var(return_notifier_list); |
44 | } | 44 | } |
diff --git a/kernel/user.c b/kernel/user.c index 33acb5e53a5f..e81978e8c03b 100644 --- a/kernel/user.c +++ b/kernel/user.c | |||
@@ -47,9 +47,7 @@ struct user_namespace init_user_ns = { | |||
47 | .count = 4294967295U, | 47 | .count = 4294967295U, |
48 | }, | 48 | }, |
49 | }, | 49 | }, |
50 | .kref = { | 50 | .count = ATOMIC_INIT(3), |
51 | .refcount = ATOMIC_INIT(3), | ||
52 | }, | ||
53 | .owner = GLOBAL_ROOT_UID, | 51 | .owner = GLOBAL_ROOT_UID, |
54 | .group = GLOBAL_ROOT_GID, | 52 | .group = GLOBAL_ROOT_GID, |
55 | .proc_inum = PROC_USER_INIT_INO, | 53 | .proc_inum = PROC_USER_INIT_INO, |
@@ -107,9 +105,8 @@ static void uid_hash_remove(struct user_struct *up) | |||
107 | static struct user_struct *uid_hash_find(kuid_t uid, struct hlist_head *hashent) | 105 | static struct user_struct *uid_hash_find(kuid_t uid, struct hlist_head *hashent) |
108 | { | 106 | { |
109 | struct user_struct *user; | 107 | struct user_struct *user; |
110 | struct hlist_node *h; | ||
111 | 108 | ||
112 | hlist_for_each_entry(user, h, hashent, uidhash_node) { | 109 | hlist_for_each_entry(user, hashent, uidhash_node) { |
113 | if (uid_eq(user->uid, uid)) { | 110 | if (uid_eq(user->uid, uid)) { |
114 | atomic_inc(&user->__count); | 111 | atomic_inc(&user->__count); |
115 | return user; | 112 | return user; |
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index 2b042c42fbc4..b14f4d342043 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c | |||
@@ -21,6 +21,7 @@ | |||
21 | #include <linux/uaccess.h> | 21 | #include <linux/uaccess.h> |
22 | #include <linux/ctype.h> | 22 | #include <linux/ctype.h> |
23 | #include <linux/projid.h> | 23 | #include <linux/projid.h> |
24 | #include <linux/fs_struct.h> | ||
24 | 25 | ||
25 | static struct kmem_cache *user_ns_cachep __read_mostly; | 26 | static struct kmem_cache *user_ns_cachep __read_mostly; |
26 | 27 | ||
@@ -78,7 +79,7 @@ int create_user_ns(struct cred *new) | |||
78 | return ret; | 79 | return ret; |
79 | } | 80 | } |
80 | 81 | ||
81 | kref_init(&ns->kref); | 82 | atomic_set(&ns->count, 1); |
82 | /* Leave the new->user_ns reference with the new user namespace. */ | 83 | /* Leave the new->user_ns reference with the new user namespace. */ |
83 | ns->parent = parent_ns; | 84 | ns->parent = parent_ns; |
84 | ns->owner = owner; | 85 | ns->owner = owner; |
@@ -104,15 +105,16 @@ int unshare_userns(unsigned long unshare_flags, struct cred **new_cred) | |||
104 | return create_user_ns(cred); | 105 | return create_user_ns(cred); |
105 | } | 106 | } |
106 | 107 | ||
107 | void free_user_ns(struct kref *kref) | 108 | void free_user_ns(struct user_namespace *ns) |
108 | { | 109 | { |
109 | struct user_namespace *parent, *ns = | 110 | struct user_namespace *parent; |
110 | container_of(kref, struct user_namespace, kref); | ||
111 | 111 | ||
112 | parent = ns->parent; | 112 | do { |
113 | proc_free_inum(ns->proc_inum); | 113 | parent = ns->parent; |
114 | kmem_cache_free(user_ns_cachep, ns); | 114 | proc_free_inum(ns->proc_inum); |
115 | put_user_ns(parent); | 115 | kmem_cache_free(user_ns_cachep, ns); |
116 | ns = parent; | ||
117 | } while (atomic_dec_and_test(&parent->count)); | ||
116 | } | 118 | } |
117 | EXPORT_SYMBOL(free_user_ns); | 119 | EXPORT_SYMBOL(free_user_ns); |
118 | 120 | ||
@@ -519,6 +521,42 @@ struct seq_operations proc_projid_seq_operations = { | |||
519 | .show = projid_m_show, | 521 | .show = projid_m_show, |
520 | }; | 522 | }; |
521 | 523 | ||
524 | static bool mappings_overlap(struct uid_gid_map *new_map, struct uid_gid_extent *extent) | ||
525 | { | ||
526 | u32 upper_first, lower_first, upper_last, lower_last; | ||
527 | unsigned idx; | ||
528 | |||
529 | upper_first = extent->first; | ||
530 | lower_first = extent->lower_first; | ||
531 | upper_last = upper_first + extent->count - 1; | ||
532 | lower_last = lower_first + extent->count - 1; | ||
533 | |||
534 | for (idx = 0; idx < new_map->nr_extents; idx++) { | ||
535 | u32 prev_upper_first, prev_lower_first; | ||
536 | u32 prev_upper_last, prev_lower_last; | ||
537 | struct uid_gid_extent *prev; | ||
538 | |||
539 | prev = &new_map->extent[idx]; | ||
540 | |||
541 | prev_upper_first = prev->first; | ||
542 | prev_lower_first = prev->lower_first; | ||
543 | prev_upper_last = prev_upper_first + prev->count - 1; | ||
544 | prev_lower_last = prev_lower_first + prev->count - 1; | ||
545 | |||
546 | /* Does the upper range intersect a previous extent? */ | ||
547 | if ((prev_upper_first <= upper_last) && | ||
548 | (prev_upper_last >= upper_first)) | ||
549 | return true; | ||
550 | |||
551 | /* Does the lower range intersect a previous extent? */ | ||
552 | if ((prev_lower_first <= lower_last) && | ||
553 | (prev_lower_last >= lower_first)) | ||
554 | return true; | ||
555 | } | ||
556 | return false; | ||
557 | } | ||
558 | |||
559 | |||
522 | static DEFINE_MUTEX(id_map_mutex); | 560 | static DEFINE_MUTEX(id_map_mutex); |
523 | 561 | ||
524 | static ssize_t map_write(struct file *file, const char __user *buf, | 562 | static ssize_t map_write(struct file *file, const char __user *buf, |
@@ -531,7 +569,7 @@ static ssize_t map_write(struct file *file, const char __user *buf, | |||
531 | struct user_namespace *ns = seq->private; | 569 | struct user_namespace *ns = seq->private; |
532 | struct uid_gid_map new_map; | 570 | struct uid_gid_map new_map; |
533 | unsigned idx; | 571 | unsigned idx; |
534 | struct uid_gid_extent *extent, *last = NULL; | 572 | struct uid_gid_extent *extent = NULL; |
535 | unsigned long page = 0; | 573 | unsigned long page = 0; |
536 | char *kbuf, *pos, *next_line; | 574 | char *kbuf, *pos, *next_line; |
537 | ssize_t ret = -EINVAL; | 575 | ssize_t ret = -EINVAL; |
@@ -634,14 +672,11 @@ static ssize_t map_write(struct file *file, const char __user *buf, | |||
634 | if ((extent->lower_first + extent->count) <= extent->lower_first) | 672 | if ((extent->lower_first + extent->count) <= extent->lower_first) |
635 | goto out; | 673 | goto out; |
636 | 674 | ||
637 | /* For now only accept extents that are strictly in order */ | 675 | /* Do the ranges in extent overlap any previous extents? */ |
638 | if (last && | 676 | if (mappings_overlap(&new_map, extent)) |
639 | (((last->first + last->count) > extent->first) || | ||
640 | ((last->lower_first + last->count) > extent->lower_first))) | ||
641 | goto out; | 677 | goto out; |
642 | 678 | ||
643 | new_map.nr_extents++; | 679 | new_map.nr_extents++; |
644 | last = extent; | ||
645 | 680 | ||
646 | /* Fail if the file contains too many extents */ | 681 | /* Fail if the file contains too many extents */ |
647 | if ((new_map.nr_extents == UID_GID_MAP_MAX_EXTENTS) && | 682 | if ((new_map.nr_extents == UID_GID_MAP_MAX_EXTENTS) && |
@@ -803,6 +838,9 @@ static int userns_install(struct nsproxy *nsproxy, void *ns) | |||
803 | if (atomic_read(¤t->mm->mm_users) > 1) | 838 | if (atomic_read(¤t->mm->mm_users) > 1) |
804 | return -EINVAL; | 839 | return -EINVAL; |
805 | 840 | ||
841 | if (current->fs->users != 1) | ||
842 | return -EINVAL; | ||
843 | |||
806 | if (!ns_capable(user_ns, CAP_SYS_ADMIN)) | 844 | if (!ns_capable(user_ns, CAP_SYS_ADMIN)) |
807 | return -EPERM; | 845 | return -EPERM; |
808 | 846 | ||
diff --git a/kernel/utsname.c b/kernel/utsname.c index 08b197e8c485..a47fc5de3113 100644 --- a/kernel/utsname.c +++ b/kernel/utsname.c | |||
@@ -30,7 +30,7 @@ static struct uts_namespace *create_uts_ns(void) | |||
30 | /* | 30 | /* |
31 | * Clone a new ns copying an original utsname, setting refcount to 1 | 31 | * Clone a new ns copying an original utsname, setting refcount to 1 |
32 | * @old_ns: namespace to clone | 32 | * @old_ns: namespace to clone |
33 | * Return NULL on error (failure to kmalloc), new ns otherwise | 33 | * Return ERR_PTR(-ENOMEM) on error (failure to kmalloc), new ns otherwise |
34 | */ | 34 | */ |
35 | static struct uts_namespace *clone_uts_ns(struct user_namespace *user_ns, | 35 | static struct uts_namespace *clone_uts_ns(struct user_namespace *user_ns, |
36 | struct uts_namespace *old_ns) | 36 | struct uts_namespace *old_ns) |
diff --git a/kernel/utsname_sysctl.c b/kernel/utsname_sysctl.c index 63da38c2d820..4f69f9a5e221 100644 --- a/kernel/utsname_sysctl.c +++ b/kernel/utsname_sysctl.c | |||
@@ -15,6 +15,8 @@ | |||
15 | #include <linux/sysctl.h> | 15 | #include <linux/sysctl.h> |
16 | #include <linux/wait.h> | 16 | #include <linux/wait.h> |
17 | 17 | ||
18 | #ifdef CONFIG_PROC_SYSCTL | ||
19 | |||
18 | static void *get_uts(ctl_table *table, int write) | 20 | static void *get_uts(ctl_table *table, int write) |
19 | { | 21 | { |
20 | char *which = table->data; | 22 | char *which = table->data; |
@@ -38,7 +40,6 @@ static void put_uts(ctl_table *table, int write, void *which) | |||
38 | up_write(&uts_sem); | 40 | up_write(&uts_sem); |
39 | } | 41 | } |
40 | 42 | ||
41 | #ifdef CONFIG_PROC_SYSCTL | ||
42 | /* | 43 | /* |
43 | * Special case of dostring for the UTS structure. This has locks | 44 | * Special case of dostring for the UTS structure. This has locks |
44 | * to observe. Should this be in kernel/sys.c ???? | 45 | * to observe. Should this be in kernel/sys.c ???? |
diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 27689422aa92..4a944676358e 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c | |||
@@ -113,9 +113,9 @@ static int get_softlockup_thresh(void) | |||
113 | * resolution, and we don't need to waste time with a big divide when | 113 | * resolution, and we don't need to waste time with a big divide when |
114 | * 2^30ns == 1.074s. | 114 | * 2^30ns == 1.074s. |
115 | */ | 115 | */ |
116 | static unsigned long get_timestamp(int this_cpu) | 116 | static unsigned long get_timestamp(void) |
117 | { | 117 | { |
118 | return cpu_clock(this_cpu) >> 30LL; /* 2^30 ~= 10^9 */ | 118 | return local_clock() >> 30LL; /* 2^30 ~= 10^9 */ |
119 | } | 119 | } |
120 | 120 | ||
121 | static void set_sample_period(void) | 121 | static void set_sample_period(void) |
@@ -133,9 +133,7 @@ static void set_sample_period(void) | |||
133 | /* Commands for resetting the watchdog */ | 133 | /* Commands for resetting the watchdog */ |
134 | static void __touch_watchdog(void) | 134 | static void __touch_watchdog(void) |
135 | { | 135 | { |
136 | int this_cpu = smp_processor_id(); | 136 | __this_cpu_write(watchdog_touch_ts, get_timestamp()); |
137 | |||
138 | __this_cpu_write(watchdog_touch_ts, get_timestamp(this_cpu)); | ||
139 | } | 137 | } |
140 | 138 | ||
141 | void touch_softlockup_watchdog(void) | 139 | void touch_softlockup_watchdog(void) |
@@ -196,7 +194,7 @@ static int is_hardlockup(void) | |||
196 | 194 | ||
197 | static int is_softlockup(unsigned long touch_ts) | 195 | static int is_softlockup(unsigned long touch_ts) |
198 | { | 196 | { |
199 | unsigned long now = get_timestamp(smp_processor_id()); | 197 | unsigned long now = get_timestamp(); |
200 | 198 | ||
201 | /* Warn about unreasonable delays: */ | 199 | /* Warn about unreasonable delays: */ |
202 | if (time_after(now, touch_ts + get_softlockup_thresh())) | 200 | if (time_after(now, touch_ts + get_softlockup_thresh())) |
diff --git a/kernel/workqueue.c b/kernel/workqueue.c index f4feacad3812..55fac5b991b7 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c | |||
@@ -251,8 +251,8 @@ EXPORT_SYMBOL_GPL(system_freezable_wq); | |||
251 | for ((pool) = &std_worker_pools(cpu)[0]; \ | 251 | for ((pool) = &std_worker_pools(cpu)[0]; \ |
252 | (pool) < &std_worker_pools(cpu)[NR_STD_WORKER_POOLS]; (pool)++) | 252 | (pool) < &std_worker_pools(cpu)[NR_STD_WORKER_POOLS]; (pool)++) |
253 | 253 | ||
254 | #define for_each_busy_worker(worker, i, pos, pool) \ | 254 | #define for_each_busy_worker(worker, i, pool) \ |
255 | hash_for_each(pool->busy_hash, i, pos, worker, hentry) | 255 | hash_for_each(pool->busy_hash, i, worker, hentry) |
256 | 256 | ||
257 | static inline int __next_wq_cpu(int cpu, const struct cpumask *mask, | 257 | static inline int __next_wq_cpu(int cpu, const struct cpumask *mask, |
258 | unsigned int sw) | 258 | unsigned int sw) |
@@ -457,11 +457,12 @@ static int worker_pool_assign_id(struct worker_pool *pool) | |||
457 | int ret; | 457 | int ret; |
458 | 458 | ||
459 | mutex_lock(&worker_pool_idr_mutex); | 459 | mutex_lock(&worker_pool_idr_mutex); |
460 | idr_pre_get(&worker_pool_idr, GFP_KERNEL); | 460 | ret = idr_alloc(&worker_pool_idr, pool, 0, 0, GFP_KERNEL); |
461 | ret = idr_get_new(&worker_pool_idr, pool, &pool->id); | 461 | if (ret >= 0) |
462 | pool->id = ret; | ||
462 | mutex_unlock(&worker_pool_idr_mutex); | 463 | mutex_unlock(&worker_pool_idr_mutex); |
463 | 464 | ||
464 | return ret; | 465 | return ret < 0 ? ret : 0; |
465 | } | 466 | } |
466 | 467 | ||
467 | /* | 468 | /* |
@@ -909,9 +910,8 @@ static struct worker *find_worker_executing_work(struct worker_pool *pool, | |||
909 | struct work_struct *work) | 910 | struct work_struct *work) |
910 | { | 911 | { |
911 | struct worker *worker; | 912 | struct worker *worker; |
912 | struct hlist_node *tmp; | ||
913 | 913 | ||
914 | hash_for_each_possible(pool->busy_hash, worker, tmp, hentry, | 914 | hash_for_each_possible(pool->busy_hash, worker, hentry, |
915 | (unsigned long)work) | 915 | (unsigned long)work) |
916 | if (worker->current_work == work && | 916 | if (worker->current_work == work && |
917 | worker->current_func == work->func) | 917 | worker->current_func == work->func) |
@@ -1626,7 +1626,6 @@ static void busy_worker_rebind_fn(struct work_struct *work) | |||
1626 | static void rebind_workers(struct worker_pool *pool) | 1626 | static void rebind_workers(struct worker_pool *pool) |
1627 | { | 1627 | { |
1628 | struct worker *worker, *n; | 1628 | struct worker *worker, *n; |
1629 | struct hlist_node *pos; | ||
1630 | int i; | 1629 | int i; |
1631 | 1630 | ||
1632 | lockdep_assert_held(&pool->assoc_mutex); | 1631 | lockdep_assert_held(&pool->assoc_mutex); |
@@ -1648,7 +1647,7 @@ static void rebind_workers(struct worker_pool *pool) | |||
1648 | } | 1647 | } |
1649 | 1648 | ||
1650 | /* rebind busy workers */ | 1649 | /* rebind busy workers */ |
1651 | for_each_busy_worker(worker, i, pos, pool) { | 1650 | for_each_busy_worker(worker, i, pool) { |
1652 | struct work_struct *rebind_work = &worker->rebind_work; | 1651 | struct work_struct *rebind_work = &worker->rebind_work; |
1653 | struct workqueue_struct *wq; | 1652 | struct workqueue_struct *wq; |
1654 | 1653 | ||
@@ -3423,7 +3422,6 @@ static void wq_unbind_fn(struct work_struct *work) | |||
3423 | int cpu = smp_processor_id(); | 3422 | int cpu = smp_processor_id(); |
3424 | struct worker_pool *pool; | 3423 | struct worker_pool *pool; |
3425 | struct worker *worker; | 3424 | struct worker *worker; |
3426 | struct hlist_node *pos; | ||
3427 | int i; | 3425 | int i; |
3428 | 3426 | ||
3429 | for_each_std_worker_pool(pool, cpu) { | 3427 | for_each_std_worker_pool(pool, cpu) { |
@@ -3442,7 +3440,7 @@ static void wq_unbind_fn(struct work_struct *work) | |||
3442 | list_for_each_entry(worker, &pool->idle_list, entry) | 3440 | list_for_each_entry(worker, &pool->idle_list, entry) |
3443 | worker->flags |= WORKER_UNBOUND; | 3441 | worker->flags |= WORKER_UNBOUND; |
3444 | 3442 | ||
3445 | for_each_busy_worker(worker, i, pos, pool) | 3443 | for_each_busy_worker(worker, i, pool) |
3446 | worker->flags |= WORKER_UNBOUND; | 3444 | worker->flags |= WORKER_UNBOUND; |
3447 | 3445 | ||
3448 | pool->flags |= POOL_DISASSOCIATED; | 3446 | pool->flags |= POOL_DISASSOCIATED; |