diff options
Diffstat (limited to 'kernel')
78 files changed, 2083 insertions, 1638 deletions
diff --git a/kernel/Kconfig.locks b/kernel/Kconfig.locks index 5068e2a4e75f..2251882daf53 100644 --- a/kernel/Kconfig.locks +++ b/kernel/Kconfig.locks | |||
| @@ -124,8 +124,8 @@ config INLINE_SPIN_LOCK_IRQSAVE | |||
| 124 | def_bool !DEBUG_SPINLOCK && !GENERIC_LOCKBREAK && \ | 124 | def_bool !DEBUG_SPINLOCK && !GENERIC_LOCKBREAK && \ |
| 125 | ARCH_INLINE_SPIN_LOCK_IRQSAVE | 125 | ARCH_INLINE_SPIN_LOCK_IRQSAVE |
| 126 | 126 | ||
| 127 | config INLINE_SPIN_UNLOCK | 127 | config UNINLINE_SPIN_UNLOCK |
| 128 | def_bool !DEBUG_SPINLOCK && (!PREEMPT || ARCH_INLINE_SPIN_UNLOCK) | 128 | bool |
| 129 | 129 | ||
| 130 | config INLINE_SPIN_UNLOCK_BH | 130 | config INLINE_SPIN_UNLOCK_BH |
| 131 | def_bool !DEBUG_SPINLOCK && ARCH_INLINE_SPIN_UNLOCK_BH | 131 | def_bool !DEBUG_SPINLOCK && ARCH_INLINE_SPIN_UNLOCK_BH |
diff --git a/kernel/Kconfig.preempt b/kernel/Kconfig.preempt index 24e7cb0ba26a..3f9c97419f02 100644 --- a/kernel/Kconfig.preempt +++ b/kernel/Kconfig.preempt | |||
| @@ -36,6 +36,7 @@ config PREEMPT_VOLUNTARY | |||
| 36 | config PREEMPT | 36 | config PREEMPT |
| 37 | bool "Preemptible Kernel (Low-Latency Desktop)" | 37 | bool "Preemptible Kernel (Low-Latency Desktop)" |
| 38 | select PREEMPT_COUNT | 38 | select PREEMPT_COUNT |
| 39 | select UNINLINE_SPIN_UNLOCK if !ARCH_INLINE_SPIN_UNLOCK | ||
| 39 | help | 40 | help |
| 40 | This option reduces the latency of the kernel by making | 41 | This option reduces the latency of the kernel by making |
| 41 | all kernel code (that is not executing in a critical section) | 42 | all kernel code (that is not executing in a critical section) |
diff --git a/kernel/Makefile b/kernel/Makefile index 2d9de86b7e76..cb41b9547c9f 100644 --- a/kernel/Makefile +++ b/kernel/Makefile | |||
| @@ -27,7 +27,6 @@ obj-y += power/ | |||
| 27 | 27 | ||
| 28 | obj-$(CONFIG_FREEZER) += freezer.o | 28 | obj-$(CONFIG_FREEZER) += freezer.o |
| 29 | obj-$(CONFIG_PROFILING) += profile.o | 29 | obj-$(CONFIG_PROFILING) += profile.o |
| 30 | obj-$(CONFIG_SYSCTL_SYSCALL_CHECK) += sysctl_check.o | ||
| 31 | obj-$(CONFIG_STACKTRACE) += stacktrace.o | 30 | obj-$(CONFIG_STACKTRACE) += stacktrace.o |
| 32 | obj-y += time/ | 31 | obj-y += time/ |
| 33 | obj-$(CONFIG_DEBUG_MUTEXES) += mutex-debug.o | 32 | obj-$(CONFIG_DEBUG_MUTEXES) += mutex-debug.o |
diff --git a/kernel/audit.c b/kernel/audit.c index bb0eb5bb9a0a..1c7f2c61416b 100644 --- a/kernel/audit.c +++ b/kernel/audit.c | |||
| @@ -1418,7 +1418,7 @@ void audit_log_untrustedstring(struct audit_buffer *ab, const char *string) | |||
| 1418 | 1418 | ||
| 1419 | /* This is a helper-function to print the escaped d_path */ | 1419 | /* This is a helper-function to print the escaped d_path */ |
| 1420 | void audit_log_d_path(struct audit_buffer *ab, const char *prefix, | 1420 | void audit_log_d_path(struct audit_buffer *ab, const char *prefix, |
| 1421 | struct path *path) | 1421 | const struct path *path) |
| 1422 | { | 1422 | { |
| 1423 | char *p, *pathname; | 1423 | char *p, *pathname; |
| 1424 | 1424 | ||
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index c6877fe9a831..ed64ccac67c9 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c | |||
| @@ -1472,7 +1472,6 @@ static int cgroup_get_rootdir(struct super_block *sb) | |||
| 1472 | 1472 | ||
| 1473 | struct inode *inode = | 1473 | struct inode *inode = |
| 1474 | cgroup_new_inode(S_IFDIR | S_IRUGO | S_IXUGO | S_IWUSR, sb); | 1474 | cgroup_new_inode(S_IFDIR | S_IRUGO | S_IXUGO | S_IWUSR, sb); |
| 1475 | struct dentry *dentry; | ||
| 1476 | 1475 | ||
| 1477 | if (!inode) | 1476 | if (!inode) |
| 1478 | return -ENOMEM; | 1477 | return -ENOMEM; |
| @@ -1481,12 +1480,9 @@ static int cgroup_get_rootdir(struct super_block *sb) | |||
| 1481 | inode->i_op = &cgroup_dir_inode_operations; | 1480 | inode->i_op = &cgroup_dir_inode_operations; |
| 1482 | /* directories start off with i_nlink == 2 (for "." entry) */ | 1481 | /* directories start off with i_nlink == 2 (for "." entry) */ |
| 1483 | inc_nlink(inode); | 1482 | inc_nlink(inode); |
| 1484 | dentry = d_alloc_root(inode); | 1483 | sb->s_root = d_make_root(inode); |
| 1485 | if (!dentry) { | 1484 | if (!sb->s_root) |
| 1486 | iput(inode); | ||
| 1487 | return -ENOMEM; | 1485 | return -ENOMEM; |
| 1488 | } | ||
| 1489 | sb->s_root = dentry; | ||
| 1490 | /* for everything else we want ->d_op set */ | 1486 | /* for everything else we want ->d_op set */ |
| 1491 | sb->s_d_op = &cgroup_dops; | 1487 | sb->s_d_op = &cgroup_dops; |
| 1492 | return 0; | 1488 | return 0; |
| @@ -1887,7 +1883,7 @@ static void cgroup_task_migrate(struct cgroup *cgrp, struct cgroup *oldcgrp, | |||
| 1887 | */ | 1883 | */ |
| 1888 | int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk) | 1884 | int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk) |
| 1889 | { | 1885 | { |
| 1890 | int retval; | 1886 | int retval = 0; |
| 1891 | struct cgroup_subsys *ss, *failed_ss = NULL; | 1887 | struct cgroup_subsys *ss, *failed_ss = NULL; |
| 1892 | struct cgroup *oldcgrp; | 1888 | struct cgroup *oldcgrp; |
| 1893 | struct cgroupfs_root *root = cgrp->root; | 1889 | struct cgroupfs_root *root = cgrp->root; |
| @@ -4885,9 +4881,9 @@ void free_css_id(struct cgroup_subsys *ss, struct cgroup_subsys_state *css) | |||
| 4885 | 4881 | ||
| 4886 | rcu_assign_pointer(id->css, NULL); | 4882 | rcu_assign_pointer(id->css, NULL); |
| 4887 | rcu_assign_pointer(css->id, NULL); | 4883 | rcu_assign_pointer(css->id, NULL); |
| 4888 | write_lock(&ss->id_lock); | 4884 | spin_lock(&ss->id_lock); |
| 4889 | idr_remove(&ss->idr, id->id); | 4885 | idr_remove(&ss->idr, id->id); |
| 4890 | write_unlock(&ss->id_lock); | 4886 | spin_unlock(&ss->id_lock); |
| 4891 | kfree_rcu(id, rcu_head); | 4887 | kfree_rcu(id, rcu_head); |
| 4892 | } | 4888 | } |
| 4893 | EXPORT_SYMBOL_GPL(free_css_id); | 4889 | EXPORT_SYMBOL_GPL(free_css_id); |
| @@ -4913,10 +4909,10 @@ static struct css_id *get_new_cssid(struct cgroup_subsys *ss, int depth) | |||
| 4913 | error = -ENOMEM; | 4909 | error = -ENOMEM; |
| 4914 | goto err_out; | 4910 | goto err_out; |
| 4915 | } | 4911 | } |
| 4916 | write_lock(&ss->id_lock); | 4912 | spin_lock(&ss->id_lock); |
| 4917 | /* Don't use 0. allocates an ID of 1-65535 */ | 4913 | /* Don't use 0. allocates an ID of 1-65535 */ |
| 4918 | error = idr_get_new_above(&ss->idr, newid, 1, &myid); | 4914 | error = idr_get_new_above(&ss->idr, newid, 1, &myid); |
| 4919 | write_unlock(&ss->id_lock); | 4915 | spin_unlock(&ss->id_lock); |
| 4920 | 4916 | ||
| 4921 | /* Returns error when there are no free spaces for new ID.*/ | 4917 | /* Returns error when there are no free spaces for new ID.*/ |
| 4922 | if (error) { | 4918 | if (error) { |
| @@ -4931,9 +4927,9 @@ static struct css_id *get_new_cssid(struct cgroup_subsys *ss, int depth) | |||
| 4931 | return newid; | 4927 | return newid; |
| 4932 | remove_idr: | 4928 | remove_idr: |
| 4933 | error = -ENOSPC; | 4929 | error = -ENOSPC; |
| 4934 | write_lock(&ss->id_lock); | 4930 | spin_lock(&ss->id_lock); |
| 4935 | idr_remove(&ss->idr, myid); | 4931 | idr_remove(&ss->idr, myid); |
| 4936 | write_unlock(&ss->id_lock); | 4932 | spin_unlock(&ss->id_lock); |
| 4937 | err_out: | 4933 | err_out: |
| 4938 | kfree(newid); | 4934 | kfree(newid); |
| 4939 | return ERR_PTR(error); | 4935 | return ERR_PTR(error); |
| @@ -4945,7 +4941,7 @@ static int __init_or_module cgroup_init_idr(struct cgroup_subsys *ss, | |||
| 4945 | { | 4941 | { |
| 4946 | struct css_id *newid; | 4942 | struct css_id *newid; |
| 4947 | 4943 | ||
| 4948 | rwlock_init(&ss->id_lock); | 4944 | spin_lock_init(&ss->id_lock); |
| 4949 | idr_init(&ss->idr); | 4945 | idr_init(&ss->idr); |
| 4950 | 4946 | ||
| 4951 | newid = get_new_cssid(ss, 0); | 4947 | newid = get_new_cssid(ss, 0); |
| @@ -5033,6 +5029,8 @@ css_get_next(struct cgroup_subsys *ss, int id, | |||
| 5033 | return NULL; | 5029 | return NULL; |
| 5034 | 5030 | ||
| 5035 | BUG_ON(!ss->use_id); | 5031 | BUG_ON(!ss->use_id); |
| 5032 | WARN_ON_ONCE(!rcu_read_lock_held()); | ||
| 5033 | |||
| 5036 | /* fill start point for scan */ | 5034 | /* fill start point for scan */ |
| 5037 | tmpid = id; | 5035 | tmpid = id; |
| 5038 | while (1) { | 5036 | while (1) { |
| @@ -5040,10 +5038,7 @@ css_get_next(struct cgroup_subsys *ss, int id, | |||
| 5040 | * scan next entry from bitmap(tree), tmpid is updated after | 5038 | * scan next entry from bitmap(tree), tmpid is updated after |
| 5041 | * idr_get_next(). | 5039 | * idr_get_next(). |
| 5042 | */ | 5040 | */ |
| 5043 | read_lock(&ss->id_lock); | ||
| 5044 | tmp = idr_get_next(&ss->idr, &tmpid); | 5041 | tmp = idr_get_next(&ss->idr, &tmpid); |
| 5045 | read_unlock(&ss->id_lock); | ||
| 5046 | |||
| 5047 | if (!tmp) | 5042 | if (!tmp) |
| 5048 | break; | 5043 | break; |
| 5049 | if (tmp->depth >= depth && tmp->stack[depth] == rootid) { | 5044 | if (tmp->depth >= depth && tmp->stack[depth] == rootid) { |
diff --git a/kernel/compat.c b/kernel/compat.c index f346cedfe24d..74ff8498809a 100644 --- a/kernel/compat.c +++ b/kernel/compat.c | |||
| @@ -31,11 +31,10 @@ | |||
| 31 | #include <asm/uaccess.h> | 31 | #include <asm/uaccess.h> |
| 32 | 32 | ||
| 33 | /* | 33 | /* |
| 34 | * Note that the native side is already converted to a timespec, because | 34 | * Get/set struct timeval with struct timespec on the native side |
| 35 | * that's what we want anyway. | ||
| 36 | */ | 35 | */ |
| 37 | static int compat_get_timeval(struct timespec *o, | 36 | static int compat_get_timeval_convert(struct timespec *o, |
| 38 | struct compat_timeval __user *i) | 37 | struct compat_timeval __user *i) |
| 39 | { | 38 | { |
| 40 | long usec; | 39 | long usec; |
| 41 | 40 | ||
| @@ -46,8 +45,8 @@ static int compat_get_timeval(struct timespec *o, | |||
| 46 | return 0; | 45 | return 0; |
| 47 | } | 46 | } |
| 48 | 47 | ||
| 49 | static int compat_put_timeval(struct compat_timeval __user *o, | 48 | static int compat_put_timeval_convert(struct compat_timeval __user *o, |
| 50 | struct timeval *i) | 49 | struct timeval *i) |
| 51 | { | 50 | { |
| 52 | return (put_user(i->tv_sec, &o->tv_sec) || | 51 | return (put_user(i->tv_sec, &o->tv_sec) || |
| 53 | put_user(i->tv_usec, &o->tv_usec)) ? -EFAULT : 0; | 52 | put_user(i->tv_usec, &o->tv_usec)) ? -EFAULT : 0; |
| @@ -117,7 +116,7 @@ asmlinkage long compat_sys_gettimeofday(struct compat_timeval __user *tv, | |||
| 117 | if (tv) { | 116 | if (tv) { |
| 118 | struct timeval ktv; | 117 | struct timeval ktv; |
| 119 | do_gettimeofday(&ktv); | 118 | do_gettimeofday(&ktv); |
| 120 | if (compat_put_timeval(tv, &ktv)) | 119 | if (compat_put_timeval_convert(tv, &ktv)) |
| 121 | return -EFAULT; | 120 | return -EFAULT; |
| 122 | } | 121 | } |
| 123 | if (tz) { | 122 | if (tz) { |
| @@ -135,7 +134,7 @@ asmlinkage long compat_sys_settimeofday(struct compat_timeval __user *tv, | |||
| 135 | struct timezone ktz; | 134 | struct timezone ktz; |
| 136 | 135 | ||
| 137 | if (tv) { | 136 | if (tv) { |
| 138 | if (compat_get_timeval(&kts, tv)) | 137 | if (compat_get_timeval_convert(&kts, tv)) |
| 139 | return -EFAULT; | 138 | return -EFAULT; |
| 140 | } | 139 | } |
| 141 | if (tz) { | 140 | if (tz) { |
| @@ -146,12 +145,29 @@ asmlinkage long compat_sys_settimeofday(struct compat_timeval __user *tv, | |||
| 146 | return do_sys_settimeofday(tv ? &kts : NULL, tz ? &ktz : NULL); | 145 | return do_sys_settimeofday(tv ? &kts : NULL, tz ? &ktz : NULL); |
| 147 | } | 146 | } |
| 148 | 147 | ||
| 148 | int get_compat_timeval(struct timeval *tv, const struct compat_timeval __user *ctv) | ||
| 149 | { | ||
| 150 | return (!access_ok(VERIFY_READ, ctv, sizeof(*ctv)) || | ||
| 151 | __get_user(tv->tv_sec, &ctv->tv_sec) || | ||
| 152 | __get_user(tv->tv_usec, &ctv->tv_usec)) ? -EFAULT : 0; | ||
| 153 | } | ||
| 154 | EXPORT_SYMBOL_GPL(get_compat_timeval); | ||
| 155 | |||
| 156 | int put_compat_timeval(const struct timeval *tv, struct compat_timeval __user *ctv) | ||
| 157 | { | ||
| 158 | return (!access_ok(VERIFY_WRITE, ctv, sizeof(*ctv)) || | ||
| 159 | __put_user(tv->tv_sec, &ctv->tv_sec) || | ||
| 160 | __put_user(tv->tv_usec, &ctv->tv_usec)) ? -EFAULT : 0; | ||
| 161 | } | ||
| 162 | EXPORT_SYMBOL_GPL(put_compat_timeval); | ||
| 163 | |||
| 149 | int get_compat_timespec(struct timespec *ts, const struct compat_timespec __user *cts) | 164 | int get_compat_timespec(struct timespec *ts, const struct compat_timespec __user *cts) |
| 150 | { | 165 | { |
| 151 | return (!access_ok(VERIFY_READ, cts, sizeof(*cts)) || | 166 | return (!access_ok(VERIFY_READ, cts, sizeof(*cts)) || |
| 152 | __get_user(ts->tv_sec, &cts->tv_sec) || | 167 | __get_user(ts->tv_sec, &cts->tv_sec) || |
| 153 | __get_user(ts->tv_nsec, &cts->tv_nsec)) ? -EFAULT : 0; | 168 | __get_user(ts->tv_nsec, &cts->tv_nsec)) ? -EFAULT : 0; |
| 154 | } | 169 | } |
| 170 | EXPORT_SYMBOL_GPL(get_compat_timespec); | ||
| 155 | 171 | ||
| 156 | int put_compat_timespec(const struct timespec *ts, struct compat_timespec __user *cts) | 172 | int put_compat_timespec(const struct timespec *ts, struct compat_timespec __user *cts) |
| 157 | { | 173 | { |
| @@ -161,6 +177,42 @@ int put_compat_timespec(const struct timespec *ts, struct compat_timespec __user | |||
| 161 | } | 177 | } |
| 162 | EXPORT_SYMBOL_GPL(put_compat_timespec); | 178 | EXPORT_SYMBOL_GPL(put_compat_timespec); |
| 163 | 179 | ||
| 180 | int compat_get_timeval(struct timeval *tv, const void __user *utv) | ||
| 181 | { | ||
| 182 | if (COMPAT_USE_64BIT_TIME) | ||
| 183 | return copy_from_user(tv, utv, sizeof *tv) ? -EFAULT : 0; | ||
| 184 | else | ||
| 185 | return get_compat_timeval(tv, utv); | ||
| 186 | } | ||
| 187 | EXPORT_SYMBOL_GPL(compat_get_timeval); | ||
| 188 | |||
| 189 | int compat_put_timeval(const struct timeval *tv, void __user *utv) | ||
| 190 | { | ||
| 191 | if (COMPAT_USE_64BIT_TIME) | ||
| 192 | return copy_to_user(utv, tv, sizeof *tv) ? -EFAULT : 0; | ||
| 193 | else | ||
| 194 | return put_compat_timeval(tv, utv); | ||
| 195 | } | ||
| 196 | EXPORT_SYMBOL_GPL(compat_put_timeval); | ||
| 197 | |||
| 198 | int compat_get_timespec(struct timespec *ts, const void __user *uts) | ||
| 199 | { | ||
| 200 | if (COMPAT_USE_64BIT_TIME) | ||
| 201 | return copy_from_user(ts, uts, sizeof *ts) ? -EFAULT : 0; | ||
| 202 | else | ||
| 203 | return get_compat_timespec(ts, uts); | ||
| 204 | } | ||
| 205 | EXPORT_SYMBOL_GPL(compat_get_timespec); | ||
| 206 | |||
| 207 | int compat_put_timespec(const struct timespec *ts, void __user *uts) | ||
| 208 | { | ||
| 209 | if (COMPAT_USE_64BIT_TIME) | ||
| 210 | return copy_to_user(uts, ts, sizeof *ts) ? -EFAULT : 0; | ||
| 211 | else | ||
| 212 | return put_compat_timespec(ts, uts); | ||
| 213 | } | ||
| 214 | EXPORT_SYMBOL_GPL(compat_put_timespec); | ||
| 215 | |||
| 164 | static long compat_nanosleep_restart(struct restart_block *restart) | 216 | static long compat_nanosleep_restart(struct restart_block *restart) |
| 165 | { | 217 | { |
| 166 | struct compat_timespec __user *rmtp; | 218 | struct compat_timespec __user *rmtp; |
diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 5d575836dba6..14f7070b4ba2 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c | |||
| @@ -270,11 +270,11 @@ static struct file_system_type cpuset_fs_type = { | |||
| 270 | * are online. If none are online, walk up the cpuset hierarchy | 270 | * are online. If none are online, walk up the cpuset hierarchy |
| 271 | * until we find one that does have some online cpus. If we get | 271 | * until we find one that does have some online cpus. If we get |
| 272 | * all the way to the top and still haven't found any online cpus, | 272 | * all the way to the top and still haven't found any online cpus, |
| 273 | * return cpu_online_map. Or if passed a NULL cs from an exit'ing | 273 | * return cpu_online_mask. Or if passed a NULL cs from an exit'ing |
| 274 | * task, return cpu_online_map. | 274 | * task, return cpu_online_mask. |
| 275 | * | 275 | * |
| 276 | * One way or another, we guarantee to return some non-empty subset | 276 | * One way or another, we guarantee to return some non-empty subset |
| 277 | * of cpu_online_map. | 277 | * of cpu_online_mask. |
| 278 | * | 278 | * |
| 279 | * Call with callback_mutex held. | 279 | * Call with callback_mutex held. |
| 280 | */ | 280 | */ |
| @@ -867,7 +867,7 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs, | |||
| 867 | int retval; | 867 | int retval; |
| 868 | int is_load_balanced; | 868 | int is_load_balanced; |
| 869 | 869 | ||
| 870 | /* top_cpuset.cpus_allowed tracks cpu_online_map; it's read-only */ | 870 | /* top_cpuset.cpus_allowed tracks cpu_online_mask; it's read-only */ |
| 871 | if (cs == &top_cpuset) | 871 | if (cs == &top_cpuset) |
| 872 | return -EACCES; | 872 | return -EACCES; |
| 873 | 873 | ||
| @@ -964,7 +964,6 @@ static void cpuset_change_task_nodemask(struct task_struct *tsk, | |||
| 964 | { | 964 | { |
| 965 | bool need_loop; | 965 | bool need_loop; |
| 966 | 966 | ||
| 967 | repeat: | ||
| 968 | /* | 967 | /* |
| 969 | * Allow tasks that have access to memory reserves because they have | 968 | * Allow tasks that have access to memory reserves because they have |
| 970 | * been OOM killed to get memory anywhere. | 969 | * been OOM killed to get memory anywhere. |
| @@ -983,45 +982,19 @@ repeat: | |||
| 983 | */ | 982 | */ |
| 984 | need_loop = task_has_mempolicy(tsk) || | 983 | need_loop = task_has_mempolicy(tsk) || |
| 985 | !nodes_intersects(*newmems, tsk->mems_allowed); | 984 | !nodes_intersects(*newmems, tsk->mems_allowed); |
| 986 | nodes_or(tsk->mems_allowed, tsk->mems_allowed, *newmems); | ||
| 987 | mpol_rebind_task(tsk, newmems, MPOL_REBIND_STEP1); | ||
| 988 | |||
| 989 | /* | ||
| 990 | * ensure checking ->mems_allowed_change_disable after setting all new | ||
| 991 | * allowed nodes. | ||
| 992 | * | ||
| 993 | * the read-side task can see an nodemask with new allowed nodes and | ||
| 994 | * old allowed nodes. and if it allocates page when cpuset clears newly | ||
| 995 | * disallowed ones continuous, it can see the new allowed bits. | ||
| 996 | * | ||
| 997 | * And if setting all new allowed nodes is after the checking, setting | ||
| 998 | * all new allowed nodes and clearing newly disallowed ones will be done | ||
| 999 | * continuous, and the read-side task may find no node to alloc page. | ||
| 1000 | */ | ||
| 1001 | smp_mb(); | ||
| 1002 | 985 | ||
| 1003 | /* | 986 | if (need_loop) |
| 1004 | * Allocation of memory is very fast, we needn't sleep when waiting | 987 | write_seqcount_begin(&tsk->mems_allowed_seq); |
| 1005 | * for the read-side. | ||
| 1006 | */ | ||
| 1007 | while (need_loop && ACCESS_ONCE(tsk->mems_allowed_change_disable)) { | ||
| 1008 | task_unlock(tsk); | ||
| 1009 | if (!task_curr(tsk)) | ||
| 1010 | yield(); | ||
| 1011 | goto repeat; | ||
| 1012 | } | ||
| 1013 | 988 | ||
| 1014 | /* | 989 | nodes_or(tsk->mems_allowed, tsk->mems_allowed, *newmems); |
| 1015 | * ensure checking ->mems_allowed_change_disable before clearing all new | 990 | mpol_rebind_task(tsk, newmems, MPOL_REBIND_STEP1); |
| 1016 | * disallowed nodes. | ||
| 1017 | * | ||
| 1018 | * if clearing newly disallowed bits before the checking, the read-side | ||
| 1019 | * task may find no node to alloc page. | ||
| 1020 | */ | ||
| 1021 | smp_mb(); | ||
| 1022 | 991 | ||
| 1023 | mpol_rebind_task(tsk, newmems, MPOL_REBIND_STEP2); | 992 | mpol_rebind_task(tsk, newmems, MPOL_REBIND_STEP2); |
| 1024 | tsk->mems_allowed = *newmems; | 993 | tsk->mems_allowed = *newmems; |
| 994 | |||
| 995 | if (need_loop) | ||
| 996 | write_seqcount_end(&tsk->mems_allowed_seq); | ||
| 997 | |||
| 1025 | task_unlock(tsk); | 998 | task_unlock(tsk); |
| 1026 | } | 999 | } |
| 1027 | 1000 | ||
| @@ -2176,7 +2149,7 @@ void __init cpuset_init_smp(void) | |||
| 2176 | * | 2149 | * |
| 2177 | * Description: Returns the cpumask_var_t cpus_allowed of the cpuset | 2150 | * Description: Returns the cpumask_var_t cpus_allowed of the cpuset |
| 2178 | * attached to the specified @tsk. Guaranteed to return some non-empty | 2151 | * attached to the specified @tsk. Guaranteed to return some non-empty |
| 2179 | * subset of cpu_online_map, even if this means going outside the | 2152 | * subset of cpu_online_mask, even if this means going outside the |
| 2180 | * tasks cpuset. | 2153 | * tasks cpuset. |
| 2181 | **/ | 2154 | **/ |
| 2182 | 2155 | ||
| @@ -2189,10 +2162,9 @@ void cpuset_cpus_allowed(struct task_struct *tsk, struct cpumask *pmask) | |||
| 2189 | mutex_unlock(&callback_mutex); | 2162 | mutex_unlock(&callback_mutex); |
| 2190 | } | 2163 | } |
| 2191 | 2164 | ||
| 2192 | int cpuset_cpus_allowed_fallback(struct task_struct *tsk) | 2165 | void cpuset_cpus_allowed_fallback(struct task_struct *tsk) |
| 2193 | { | 2166 | { |
| 2194 | const struct cpuset *cs; | 2167 | const struct cpuset *cs; |
| 2195 | int cpu; | ||
| 2196 | 2168 | ||
| 2197 | rcu_read_lock(); | 2169 | rcu_read_lock(); |
| 2198 | cs = task_cs(tsk); | 2170 | cs = task_cs(tsk); |
| @@ -2213,22 +2185,10 @@ int cpuset_cpus_allowed_fallback(struct task_struct *tsk) | |||
| 2213 | * changes in tsk_cs()->cpus_allowed. Otherwise we can temporary | 2185 | * changes in tsk_cs()->cpus_allowed. Otherwise we can temporary |
| 2214 | * set any mask even if it is not right from task_cs() pov, | 2186 | * set any mask even if it is not right from task_cs() pov, |
| 2215 | * the pending set_cpus_allowed_ptr() will fix things. | 2187 | * the pending set_cpus_allowed_ptr() will fix things. |
| 2188 | * | ||
| 2189 | * select_fallback_rq() will fix things ups and set cpu_possible_mask | ||
| 2190 | * if required. | ||
| 2216 | */ | 2191 | */ |
| 2217 | |||
| 2218 | cpu = cpumask_any_and(&tsk->cpus_allowed, cpu_active_mask); | ||
| 2219 | if (cpu >= nr_cpu_ids) { | ||
| 2220 | /* | ||
| 2221 | * Either tsk->cpus_allowed is wrong (see above) or it | ||
| 2222 | * is actually empty. The latter case is only possible | ||
| 2223 | * if we are racing with remove_tasks_in_empty_cpuset(). | ||
| 2224 | * Like above we can temporary set any mask and rely on | ||
| 2225 | * set_cpus_allowed_ptr() as synchronization point. | ||
| 2226 | */ | ||
| 2227 | do_set_cpus_allowed(tsk, cpu_possible_mask); | ||
| 2228 | cpu = cpumask_any(cpu_active_mask); | ||
| 2229 | } | ||
| 2230 | |||
| 2231 | return cpu; | ||
| 2232 | } | 2192 | } |
| 2233 | 2193 | ||
| 2234 | void cpuset_init_current_mems_allowed(void) | 2194 | void cpuset_init_current_mems_allowed(void) |
diff --git a/kernel/cred.c b/kernel/cred.c index 5791612a4045..e70683d9ec32 100644 --- a/kernel/cred.c +++ b/kernel/cred.c | |||
| @@ -16,6 +16,7 @@ | |||
| 16 | #include <linux/keyctl.h> | 16 | #include <linux/keyctl.h> |
| 17 | #include <linux/init_task.h> | 17 | #include <linux/init_task.h> |
| 18 | #include <linux/security.h> | 18 | #include <linux/security.h> |
| 19 | #include <linux/binfmts.h> | ||
| 19 | #include <linux/cn_proc.h> | 20 | #include <linux/cn_proc.h> |
| 20 | 21 | ||
| 21 | #if 0 | 22 | #if 0 |
| @@ -385,6 +386,8 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags) | |||
| 385 | struct cred *new; | 386 | struct cred *new; |
| 386 | int ret; | 387 | int ret; |
| 387 | 388 | ||
| 389 | p->replacement_session_keyring = NULL; | ||
| 390 | |||
| 388 | if ( | 391 | if ( |
| 389 | #ifdef CONFIG_KEYS | 392 | #ifdef CONFIG_KEYS |
| 390 | !p->cred->thread_keyring && | 393 | !p->cred->thread_keyring && |
diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c index 0d7c08784efb..0557f24c6bca 100644 --- a/kernel/debug/debug_core.c +++ b/kernel/debug/debug_core.c | |||
| @@ -41,6 +41,7 @@ | |||
| 41 | #include <linux/delay.h> | 41 | #include <linux/delay.h> |
| 42 | #include <linux/sched.h> | 42 | #include <linux/sched.h> |
| 43 | #include <linux/sysrq.h> | 43 | #include <linux/sysrq.h> |
| 44 | #include <linux/reboot.h> | ||
| 44 | #include <linux/init.h> | 45 | #include <linux/init.h> |
| 45 | #include <linux/kgdb.h> | 46 | #include <linux/kgdb.h> |
| 46 | #include <linux/kdb.h> | 47 | #include <linux/kdb.h> |
| @@ -52,7 +53,6 @@ | |||
| 52 | #include <asm/cacheflush.h> | 53 | #include <asm/cacheflush.h> |
| 53 | #include <asm/byteorder.h> | 54 | #include <asm/byteorder.h> |
| 54 | #include <linux/atomic.h> | 55 | #include <linux/atomic.h> |
| 55 | #include <asm/system.h> | ||
| 56 | 56 | ||
| 57 | #include "debug_core.h" | 57 | #include "debug_core.h" |
| 58 | 58 | ||
| @@ -75,6 +75,8 @@ static int exception_level; | |||
| 75 | struct kgdb_io *dbg_io_ops; | 75 | struct kgdb_io *dbg_io_ops; |
| 76 | static DEFINE_SPINLOCK(kgdb_registration_lock); | 76 | static DEFINE_SPINLOCK(kgdb_registration_lock); |
| 77 | 77 | ||
| 78 | /* Action for the reboot notifiter, a global allow kdb to change it */ | ||
| 79 | static int kgdbreboot; | ||
| 78 | /* kgdb console driver is loaded */ | 80 | /* kgdb console driver is loaded */ |
| 79 | static int kgdb_con_registered; | 81 | static int kgdb_con_registered; |
| 80 | /* determine if kgdb console output should be used */ | 82 | /* determine if kgdb console output should be used */ |
| @@ -96,6 +98,7 @@ static int __init opt_kgdb_con(char *str) | |||
| 96 | early_param("kgdbcon", opt_kgdb_con); | 98 | early_param("kgdbcon", opt_kgdb_con); |
| 97 | 99 | ||
| 98 | module_param(kgdb_use_con, int, 0644); | 100 | module_param(kgdb_use_con, int, 0644); |
| 101 | module_param(kgdbreboot, int, 0644); | ||
| 99 | 102 | ||
| 100 | /* | 103 | /* |
| 101 | * Holds information about breakpoints in a kernel. These breakpoints are | 104 | * Holds information about breakpoints in a kernel. These breakpoints are |
| @@ -157,37 +160,39 @@ early_param("nokgdbroundup", opt_nokgdbroundup); | |||
| 157 | * Weak aliases for breakpoint management, | 160 | * Weak aliases for breakpoint management, |
| 158 | * can be overriden by architectures when needed: | 161 | * can be overriden by architectures when needed: |
| 159 | */ | 162 | */ |
| 160 | int __weak kgdb_arch_set_breakpoint(unsigned long addr, char *saved_instr) | 163 | int __weak kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt) |
| 161 | { | 164 | { |
| 162 | int err; | 165 | int err; |
| 163 | 166 | ||
| 164 | err = probe_kernel_read(saved_instr, (char *)addr, BREAK_INSTR_SIZE); | 167 | err = probe_kernel_read(bpt->saved_instr, (char *)bpt->bpt_addr, |
| 168 | BREAK_INSTR_SIZE); | ||
| 165 | if (err) | 169 | if (err) |
| 166 | return err; | 170 | return err; |
| 167 | 171 | err = probe_kernel_write((char *)bpt->bpt_addr, | |
| 168 | return probe_kernel_write((char *)addr, arch_kgdb_ops.gdb_bpt_instr, | 172 | arch_kgdb_ops.gdb_bpt_instr, BREAK_INSTR_SIZE); |
| 169 | BREAK_INSTR_SIZE); | 173 | return err; |
| 170 | } | 174 | } |
| 171 | 175 | ||
| 172 | int __weak kgdb_arch_remove_breakpoint(unsigned long addr, char *bundle) | 176 | int __weak kgdb_arch_remove_breakpoint(struct kgdb_bkpt *bpt) |
| 173 | { | 177 | { |
| 174 | return probe_kernel_write((char *)addr, | 178 | return probe_kernel_write((char *)bpt->bpt_addr, |
| 175 | (char *)bundle, BREAK_INSTR_SIZE); | 179 | (char *)bpt->saved_instr, BREAK_INSTR_SIZE); |
| 176 | } | 180 | } |
| 177 | 181 | ||
| 178 | int __weak kgdb_validate_break_address(unsigned long addr) | 182 | int __weak kgdb_validate_break_address(unsigned long addr) |
| 179 | { | 183 | { |
| 180 | char tmp_variable[BREAK_INSTR_SIZE]; | 184 | struct kgdb_bkpt tmp; |
| 181 | int err; | 185 | int err; |
| 182 | /* Validate setting the breakpoint and then removing it. In the | 186 | /* Validate setting the breakpoint and then removing it. If the |
| 183 | * remove fails, the kernel needs to emit a bad message because we | 187 | * remove fails, the kernel needs to emit a bad message because we |
| 184 | * are deep trouble not being able to put things back the way we | 188 | * are deep trouble not being able to put things back the way we |
| 185 | * found them. | 189 | * found them. |
| 186 | */ | 190 | */ |
| 187 | err = kgdb_arch_set_breakpoint(addr, tmp_variable); | 191 | tmp.bpt_addr = addr; |
| 192 | err = kgdb_arch_set_breakpoint(&tmp); | ||
| 188 | if (err) | 193 | if (err) |
| 189 | return err; | 194 | return err; |
| 190 | err = kgdb_arch_remove_breakpoint(addr, tmp_variable); | 195 | err = kgdb_arch_remove_breakpoint(&tmp); |
| 191 | if (err) | 196 | if (err) |
| 192 | printk(KERN_ERR "KGDB: Critical breakpoint error, kernel " | 197 | printk(KERN_ERR "KGDB: Critical breakpoint error, kernel " |
| 193 | "memory destroyed at: %lx", addr); | 198 | "memory destroyed at: %lx", addr); |
| @@ -231,7 +236,6 @@ static void kgdb_flush_swbreak_addr(unsigned long addr) | |||
| 231 | */ | 236 | */ |
| 232 | int dbg_activate_sw_breakpoints(void) | 237 | int dbg_activate_sw_breakpoints(void) |
| 233 | { | 238 | { |
| 234 | unsigned long addr; | ||
| 235 | int error; | 239 | int error; |
| 236 | int ret = 0; | 240 | int ret = 0; |
| 237 | int i; | 241 | int i; |
| @@ -240,16 +244,15 @@ int dbg_activate_sw_breakpoints(void) | |||
| 240 | if (kgdb_break[i].state != BP_SET) | 244 | if (kgdb_break[i].state != BP_SET) |
| 241 | continue; | 245 | continue; |
| 242 | 246 | ||
| 243 | addr = kgdb_break[i].bpt_addr; | 247 | error = kgdb_arch_set_breakpoint(&kgdb_break[i]); |
| 244 | error = kgdb_arch_set_breakpoint(addr, | ||
| 245 | kgdb_break[i].saved_instr); | ||
| 246 | if (error) { | 248 | if (error) { |
| 247 | ret = error; | 249 | ret = error; |
| 248 | printk(KERN_INFO "KGDB: BP install failed: %lx", addr); | 250 | printk(KERN_INFO "KGDB: BP install failed: %lx", |
| 251 | kgdb_break[i].bpt_addr); | ||
| 249 | continue; | 252 | continue; |
| 250 | } | 253 | } |
| 251 | 254 | ||
| 252 | kgdb_flush_swbreak_addr(addr); | 255 | kgdb_flush_swbreak_addr(kgdb_break[i].bpt_addr); |
| 253 | kgdb_break[i].state = BP_ACTIVE; | 256 | kgdb_break[i].state = BP_ACTIVE; |
| 254 | } | 257 | } |
| 255 | return ret; | 258 | return ret; |
| @@ -298,7 +301,6 @@ int dbg_set_sw_break(unsigned long addr) | |||
| 298 | 301 | ||
| 299 | int dbg_deactivate_sw_breakpoints(void) | 302 | int dbg_deactivate_sw_breakpoints(void) |
| 300 | { | 303 | { |
| 301 | unsigned long addr; | ||
| 302 | int error; | 304 | int error; |
| 303 | int ret = 0; | 305 | int ret = 0; |
| 304 | int i; | 306 | int i; |
| @@ -306,15 +308,14 @@ int dbg_deactivate_sw_breakpoints(void) | |||
| 306 | for (i = 0; i < KGDB_MAX_BREAKPOINTS; i++) { | 308 | for (i = 0; i < KGDB_MAX_BREAKPOINTS; i++) { |
| 307 | if (kgdb_break[i].state != BP_ACTIVE) | 309 | if (kgdb_break[i].state != BP_ACTIVE) |
| 308 | continue; | 310 | continue; |
| 309 | addr = kgdb_break[i].bpt_addr; | 311 | error = kgdb_arch_remove_breakpoint(&kgdb_break[i]); |
| 310 | error = kgdb_arch_remove_breakpoint(addr, | ||
| 311 | kgdb_break[i].saved_instr); | ||
| 312 | if (error) { | 312 | if (error) { |
| 313 | printk(KERN_INFO "KGDB: BP remove failed: %lx\n", addr); | 313 | printk(KERN_INFO "KGDB: BP remove failed: %lx\n", |
| 314 | kgdb_break[i].bpt_addr); | ||
| 314 | ret = error; | 315 | ret = error; |
| 315 | } | 316 | } |
| 316 | 317 | ||
| 317 | kgdb_flush_swbreak_addr(addr); | 318 | kgdb_flush_swbreak_addr(kgdb_break[i].bpt_addr); |
| 318 | kgdb_break[i].state = BP_SET; | 319 | kgdb_break[i].state = BP_SET; |
| 319 | } | 320 | } |
| 320 | return ret; | 321 | return ret; |
| @@ -348,7 +349,6 @@ int kgdb_isremovedbreak(unsigned long addr) | |||
| 348 | 349 | ||
| 349 | int dbg_remove_all_break(void) | 350 | int dbg_remove_all_break(void) |
| 350 | { | 351 | { |
| 351 | unsigned long addr; | ||
| 352 | int error; | 352 | int error; |
| 353 | int i; | 353 | int i; |
| 354 | 354 | ||
| @@ -356,12 +356,10 @@ int dbg_remove_all_break(void) | |||
| 356 | for (i = 0; i < KGDB_MAX_BREAKPOINTS; i++) { | 356 | for (i = 0; i < KGDB_MAX_BREAKPOINTS; i++) { |
| 357 | if (kgdb_break[i].state != BP_ACTIVE) | 357 | if (kgdb_break[i].state != BP_ACTIVE) |
| 358 | goto setundefined; | 358 | goto setundefined; |
| 359 | addr = kgdb_break[i].bpt_addr; | 359 | error = kgdb_arch_remove_breakpoint(&kgdb_break[i]); |
| 360 | error = kgdb_arch_remove_breakpoint(addr, | ||
| 361 | kgdb_break[i].saved_instr); | ||
| 362 | if (error) | 360 | if (error) |
| 363 | printk(KERN_ERR "KGDB: breakpoint remove failed: %lx\n", | 361 | printk(KERN_ERR "KGDB: breakpoint remove failed: %lx\n", |
| 364 | addr); | 362 | kgdb_break[i].bpt_addr); |
| 365 | setundefined: | 363 | setundefined: |
| 366 | kgdb_break[i].state = BP_UNDEFINED; | 364 | kgdb_break[i].state = BP_UNDEFINED; |
| 367 | } | 365 | } |
| @@ -784,6 +782,33 @@ void __init dbg_late_init(void) | |||
| 784 | kdb_init(KDB_INIT_FULL); | 782 | kdb_init(KDB_INIT_FULL); |
| 785 | } | 783 | } |
| 786 | 784 | ||
| 785 | static int | ||
| 786 | dbg_notify_reboot(struct notifier_block *this, unsigned long code, void *x) | ||
| 787 | { | ||
| 788 | /* | ||
| 789 | * Take the following action on reboot notify depending on value: | ||
| 790 | * 1 == Enter debugger | ||
| 791 | * 0 == [the default] detatch debug client | ||
| 792 | * -1 == Do nothing... and use this until the board resets | ||
| 793 | */ | ||
| 794 | switch (kgdbreboot) { | ||
| 795 | case 1: | ||
| 796 | kgdb_breakpoint(); | ||
| 797 | case -1: | ||
| 798 | goto done; | ||
| 799 | } | ||
| 800 | if (!dbg_kdb_mode) | ||
| 801 | gdbstub_exit(code); | ||
| 802 | done: | ||
| 803 | return NOTIFY_DONE; | ||
| 804 | } | ||
| 805 | |||
| 806 | static struct notifier_block dbg_reboot_notifier = { | ||
| 807 | .notifier_call = dbg_notify_reboot, | ||
| 808 | .next = NULL, | ||
| 809 | .priority = INT_MAX, | ||
| 810 | }; | ||
| 811 | |||
| 787 | static void kgdb_register_callbacks(void) | 812 | static void kgdb_register_callbacks(void) |
| 788 | { | 813 | { |
| 789 | if (!kgdb_io_module_registered) { | 814 | if (!kgdb_io_module_registered) { |
| @@ -791,6 +816,7 @@ static void kgdb_register_callbacks(void) | |||
| 791 | kgdb_arch_init(); | 816 | kgdb_arch_init(); |
| 792 | if (!dbg_is_early) | 817 | if (!dbg_is_early) |
| 793 | kgdb_arch_late(); | 818 | kgdb_arch_late(); |
| 819 | register_reboot_notifier(&dbg_reboot_notifier); | ||
| 794 | atomic_notifier_chain_register(&panic_notifier_list, | 820 | atomic_notifier_chain_register(&panic_notifier_list, |
| 795 | &kgdb_panic_event_nb); | 821 | &kgdb_panic_event_nb); |
| 796 | #ifdef CONFIG_MAGIC_SYSRQ | 822 | #ifdef CONFIG_MAGIC_SYSRQ |
| @@ -812,6 +838,7 @@ static void kgdb_unregister_callbacks(void) | |||
| 812 | */ | 838 | */ |
| 813 | if (kgdb_io_module_registered) { | 839 | if (kgdb_io_module_registered) { |
| 814 | kgdb_io_module_registered = 0; | 840 | kgdb_io_module_registered = 0; |
| 841 | unregister_reboot_notifier(&dbg_reboot_notifier); | ||
| 815 | atomic_notifier_chain_unregister(&panic_notifier_list, | 842 | atomic_notifier_chain_unregister(&panic_notifier_list, |
| 816 | &kgdb_panic_event_nb); | 843 | &kgdb_panic_event_nb); |
| 817 | kgdb_arch_exit(); | 844 | kgdb_arch_exit(); |
diff --git a/kernel/debug/gdbstub.c b/kernel/debug/gdbstub.c index c22d8c28ad84..ce615e064482 100644 --- a/kernel/debug/gdbstub.c +++ b/kernel/debug/gdbstub.c | |||
| @@ -1111,6 +1111,13 @@ void gdbstub_exit(int status) | |||
| 1111 | unsigned char checksum, ch, buffer[3]; | 1111 | unsigned char checksum, ch, buffer[3]; |
| 1112 | int loop; | 1112 | int loop; |
| 1113 | 1113 | ||
| 1114 | if (!kgdb_connected) | ||
| 1115 | return; | ||
| 1116 | kgdb_connected = 0; | ||
| 1117 | |||
| 1118 | if (!dbg_io_ops || dbg_kdb_mode) | ||
| 1119 | return; | ||
| 1120 | |||
| 1114 | buffer[0] = 'W'; | 1121 | buffer[0] = 'W'; |
| 1115 | buffer[1] = hex_asc_hi(status); | 1122 | buffer[1] = hex_asc_hi(status); |
| 1116 | buffer[2] = hex_asc_lo(status); | 1123 | buffer[2] = hex_asc_lo(status); |
| @@ -1129,5 +1136,6 @@ void gdbstub_exit(int status) | |||
| 1129 | dbg_io_ops->write_char(hex_asc_lo(checksum)); | 1136 | dbg_io_ops->write_char(hex_asc_lo(checksum)); |
| 1130 | 1137 | ||
| 1131 | /* make sure the output is flushed, lest the bootloader clobber it */ | 1138 | /* make sure the output is flushed, lest the bootloader clobber it */ |
| 1132 | dbg_io_ops->flush(); | 1139 | if (dbg_io_ops->flush) |
| 1140 | dbg_io_ops->flush(); | ||
| 1133 | } | 1141 | } |
diff --git a/kernel/debug/kdb/kdb_bp.c b/kernel/debug/kdb/kdb_bp.c index 20059ef4459a..8418c2f8ec5d 100644 --- a/kernel/debug/kdb/kdb_bp.c +++ b/kernel/debug/kdb/kdb_bp.c | |||
| @@ -153,6 +153,13 @@ static int _kdb_bp_install(struct pt_regs *regs, kdb_bp_t *bp) | |||
| 153 | } else { | 153 | } else { |
| 154 | kdb_printf("%s: failed to set breakpoint at 0x%lx\n", | 154 | kdb_printf("%s: failed to set breakpoint at 0x%lx\n", |
| 155 | __func__, bp->bp_addr); | 155 | __func__, bp->bp_addr); |
| 156 | #ifdef CONFIG_DEBUG_RODATA | ||
| 157 | if (!bp->bp_type) { | ||
| 158 | kdb_printf("Software breakpoints are unavailable.\n" | ||
| 159 | " Change the kernel CONFIG_DEBUG_RODATA=n\n" | ||
| 160 | " OR use hw breaks: help bph\n"); | ||
| 161 | } | ||
| 162 | #endif | ||
| 156 | return 1; | 163 | return 1; |
| 157 | } | 164 | } |
| 158 | return 0; | 165 | return 0; |
diff --git a/kernel/debug/kdb/kdb_bt.c b/kernel/debug/kdb/kdb_bt.c index 7179eac7b41c..07c9bbb94a0b 100644 --- a/kernel/debug/kdb/kdb_bt.c +++ b/kernel/debug/kdb/kdb_bt.c | |||
| @@ -15,7 +15,6 @@ | |||
| 15 | #include <linux/sched.h> | 15 | #include <linux/sched.h> |
| 16 | #include <linux/kdb.h> | 16 | #include <linux/kdb.h> |
| 17 | #include <linux/nmi.h> | 17 | #include <linux/nmi.h> |
| 18 | #include <asm/system.h> | ||
| 19 | #include "kdb_private.h" | 18 | #include "kdb_private.h" |
| 20 | 19 | ||
| 21 | 20 | ||
diff --git a/kernel/debug/kdb/kdb_io.c b/kernel/debug/kdb/kdb_io.c index 4802eb5840e1..bb9520f0f6ff 100644 --- a/kernel/debug/kdb/kdb_io.c +++ b/kernel/debug/kdb/kdb_io.c | |||
| @@ -689,7 +689,7 @@ kdb_printit: | |||
| 689 | if (!dbg_kdb_mode && kgdb_connected) { | 689 | if (!dbg_kdb_mode && kgdb_connected) { |
| 690 | gdbstub_msg_write(kdb_buffer, retlen); | 690 | gdbstub_msg_write(kdb_buffer, retlen); |
| 691 | } else { | 691 | } else { |
| 692 | if (!dbg_io_ops->is_console) { | 692 | if (dbg_io_ops && !dbg_io_ops->is_console) { |
| 693 | len = strlen(kdb_buffer); | 693 | len = strlen(kdb_buffer); |
| 694 | cp = kdb_buffer; | 694 | cp = kdb_buffer; |
| 695 | while (len--) { | 695 | while (len--) { |
| @@ -743,7 +743,7 @@ kdb_printit: | |||
| 743 | kdb_input_flush(); | 743 | kdb_input_flush(); |
| 744 | c = console_drivers; | 744 | c = console_drivers; |
| 745 | 745 | ||
| 746 | if (!dbg_io_ops->is_console) { | 746 | if (dbg_io_ops && !dbg_io_ops->is_console) { |
| 747 | len = strlen(moreprompt); | 747 | len = strlen(moreprompt); |
| 748 | cp = moreprompt; | 748 | cp = moreprompt; |
| 749 | while (len--) { | 749 | while (len--) { |
diff --git a/kernel/debug/kdb/kdb_keyboard.c b/kernel/debug/kdb/kdb_keyboard.c index 4bca634975c0..118527aa60ea 100644 --- a/kernel/debug/kdb/kdb_keyboard.c +++ b/kernel/debug/kdb/kdb_keyboard.c | |||
| @@ -25,6 +25,7 @@ | |||
| 25 | #define KBD_STAT_MOUSE_OBF 0x20 /* Mouse output buffer full */ | 25 | #define KBD_STAT_MOUSE_OBF 0x20 /* Mouse output buffer full */ |
| 26 | 26 | ||
| 27 | static int kbd_exists; | 27 | static int kbd_exists; |
| 28 | static int kbd_last_ret; | ||
| 28 | 29 | ||
| 29 | /* | 30 | /* |
| 30 | * Check if the keyboard controller has a keypress for us. | 31 | * Check if the keyboard controller has a keypress for us. |
| @@ -90,8 +91,11 @@ int kdb_get_kbd_char(void) | |||
| 90 | return -1; | 91 | return -1; |
| 91 | } | 92 | } |
| 92 | 93 | ||
| 93 | if ((scancode & 0x80) != 0) | 94 | if ((scancode & 0x80) != 0) { |
| 95 | if (scancode == 0x9c) | ||
| 96 | kbd_last_ret = 0; | ||
| 94 | return -1; | 97 | return -1; |
| 98 | } | ||
| 95 | 99 | ||
| 96 | scancode &= 0x7f; | 100 | scancode &= 0x7f; |
| 97 | 101 | ||
| @@ -178,35 +182,82 @@ int kdb_get_kbd_char(void) | |||
| 178 | return -1; /* ignore unprintables */ | 182 | return -1; /* ignore unprintables */ |
| 179 | } | 183 | } |
| 180 | 184 | ||
| 181 | if ((scancode & 0x7f) == 0x1c) { | 185 | if (scancode == 0x1c) { |
| 182 | /* | 186 | kbd_last_ret = 1; |
| 183 | * enter key. All done. Absorb the release scancode. | 187 | return 13; |
| 184 | */ | 188 | } |
| 189 | |||
| 190 | return keychar & 0xff; | ||
| 191 | } | ||
| 192 | EXPORT_SYMBOL_GPL(kdb_get_kbd_char); | ||
| 193 | |||
| 194 | /* | ||
| 195 | * Best effort cleanup of ENTER break codes on leaving KDB. Called on | ||
| 196 | * exiting KDB, when we know we processed an ENTER or KP ENTER scan | ||
| 197 | * code. | ||
| 198 | */ | ||
| 199 | void kdb_kbd_cleanup_state(void) | ||
| 200 | { | ||
| 201 | int scancode, scanstatus; | ||
| 202 | |||
| 203 | /* | ||
| 204 | * Nothing to clean up, since either | ||
| 205 | * ENTER was never pressed, or has already | ||
| 206 | * gotten cleaned up. | ||
| 207 | */ | ||
| 208 | if (!kbd_last_ret) | ||
| 209 | return; | ||
| 210 | |||
| 211 | kbd_last_ret = 0; | ||
| 212 | /* | ||
| 213 | * Enter key. Need to absorb the break code here, lest it gets | ||
| 214 | * leaked out if we exit KDB as the result of processing 'g'. | ||
| 215 | * | ||
| 216 | * This has several interesting implications: | ||
| 217 | * + Need to handle KP ENTER, which has break code 0xe0 0x9c. | ||
| 218 | * + Need to handle repeat ENTER and repeat KP ENTER. Repeats | ||
| 219 | * only get a break code at the end of the repeated | ||
| 220 | * sequence. This means we can't propagate the repeated key | ||
| 221 | * press, and must swallow it away. | ||
| 222 | * + Need to handle possible PS/2 mouse input. | ||
| 223 | * + Need to handle mashed keys. | ||
| 224 | */ | ||
| 225 | |||
| 226 | while (1) { | ||
| 185 | while ((inb(KBD_STATUS_REG) & KBD_STAT_OBF) == 0) | 227 | while ((inb(KBD_STATUS_REG) & KBD_STAT_OBF) == 0) |
| 186 | ; | 228 | cpu_relax(); |
| 187 | 229 | ||
| 188 | /* | 230 | /* |
| 189 | * Fetch the scancode | 231 | * Fetch the scancode. |
| 190 | */ | 232 | */ |
| 191 | scancode = inb(KBD_DATA_REG); | 233 | scancode = inb(KBD_DATA_REG); |
| 192 | scanstatus = inb(KBD_STATUS_REG); | 234 | scanstatus = inb(KBD_STATUS_REG); |
| 193 | 235 | ||
| 194 | while (scanstatus & KBD_STAT_MOUSE_OBF) { | 236 | /* |
| 195 | scancode = inb(KBD_DATA_REG); | 237 | * Skip mouse input. |
| 196 | scanstatus = inb(KBD_STATUS_REG); | 238 | */ |
| 197 | } | 239 | if (scanstatus & KBD_STAT_MOUSE_OBF) |
| 240 | continue; | ||
| 198 | 241 | ||
| 199 | if (scancode != 0x9c) { | 242 | /* |
| 200 | /* | 243 | * If we see 0xe0, this is either a break code for KP |
| 201 | * Wasn't an enter-release, why not? | 244 | * ENTER, or a repeat make for KP ENTER. Either way, |
| 202 | */ | 245 | * since the second byte is equivalent to an ENTER, |
| 203 | kdb_printf("kdb: expected enter got 0x%x status 0x%x\n", | 246 | * skip the 0xe0 and try again. |
| 204 | scancode, scanstatus); | 247 | * |
| 205 | } | 248 | * If we see 0x1c, this must be a repeat ENTER or KP |
| 249 | * ENTER (and we swallowed 0xe0 before). Try again. | ||
| 250 | * | ||
| 251 | * We can also see make and break codes for other keys | ||
| 252 | * mashed before or after pressing ENTER. Thus, if we | ||
| 253 | * see anything other than 0x9c, we have to try again. | ||
| 254 | * | ||
| 255 | * Note, if you held some key as ENTER was depressed, | ||
| 256 | * that break code would get leaked out. | ||
| 257 | */ | ||
| 258 | if (scancode != 0x9c) | ||
| 259 | continue; | ||
| 206 | 260 | ||
| 207 | return 13; | 261 | return; |
| 208 | } | 262 | } |
| 209 | |||
| 210 | return keychar & 0xff; | ||
| 211 | } | 263 | } |
| 212 | EXPORT_SYMBOL_GPL(kdb_get_kbd_char); | ||
diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c index e2ae7349437f..67b847dfa2bb 100644 --- a/kernel/debug/kdb/kdb_main.c +++ b/kernel/debug/kdb/kdb_main.c | |||
| @@ -1400,6 +1400,9 @@ int kdb_main_loop(kdb_reason_t reason, kdb_reason_t reason2, int error, | |||
| 1400 | if (KDB_STATE(DOING_SS)) | 1400 | if (KDB_STATE(DOING_SS)) |
| 1401 | KDB_STATE_CLEAR(SSBPT); | 1401 | KDB_STATE_CLEAR(SSBPT); |
| 1402 | 1402 | ||
| 1403 | /* Clean up any keyboard devices before leaving */ | ||
| 1404 | kdb_kbd_cleanup_state(); | ||
| 1405 | |||
| 1403 | return result; | 1406 | return result; |
| 1404 | } | 1407 | } |
| 1405 | 1408 | ||
diff --git a/kernel/debug/kdb/kdb_private.h b/kernel/debug/kdb/kdb_private.h index e381d105b40b..47c4e56e513b 100644 --- a/kernel/debug/kdb/kdb_private.h +++ b/kernel/debug/kdb/kdb_private.h | |||
| @@ -246,6 +246,13 @@ extern void debug_kusage(void); | |||
| 246 | 246 | ||
| 247 | extern void kdb_set_current_task(struct task_struct *); | 247 | extern void kdb_set_current_task(struct task_struct *); |
| 248 | extern struct task_struct *kdb_current_task; | 248 | extern struct task_struct *kdb_current_task; |
| 249 | |||
| 250 | #ifdef CONFIG_KDB_KEYBOARD | ||
| 251 | extern void kdb_kbd_cleanup_state(void); | ||
| 252 | #else /* ! CONFIG_KDB_KEYBOARD */ | ||
| 253 | #define kdb_kbd_cleanup_state() | ||
| 254 | #endif /* ! CONFIG_KDB_KEYBOARD */ | ||
| 255 | |||
| 249 | #ifdef CONFIG_MODULES | 256 | #ifdef CONFIG_MODULES |
| 250 | extern struct list_head *kdb_modules; | 257 | extern struct list_head *kdb_modules; |
| 251 | #endif /* CONFIG_MODULES */ | 258 | #endif /* CONFIG_MODULES */ |
diff --git a/kernel/debug/kdb/kdb_support.c b/kernel/debug/kdb/kdb_support.c index 7d6fb40d2188..d35cc2d3a4cc 100644 --- a/kernel/debug/kdb/kdb_support.c +++ b/kernel/debug/kdb/kdb_support.c | |||
| @@ -384,9 +384,9 @@ static int kdb_getphys(void *res, unsigned long addr, size_t size) | |||
| 384 | if (!pfn_valid(pfn)) | 384 | if (!pfn_valid(pfn)) |
| 385 | return 1; | 385 | return 1; |
| 386 | page = pfn_to_page(pfn); | 386 | page = pfn_to_page(pfn); |
| 387 | vaddr = kmap_atomic(page, KM_KDB); | 387 | vaddr = kmap_atomic(page); |
| 388 | memcpy(res, vaddr + (addr & (PAGE_SIZE - 1)), size); | 388 | memcpy(res, vaddr + (addr & (PAGE_SIZE - 1)), size); |
| 389 | kunmap_atomic(vaddr, KM_KDB); | 389 | kunmap_atomic(vaddr); |
| 390 | 390 | ||
| 391 | return 0; | 391 | return 0; |
| 392 | } | 392 | } |
diff --git a/kernel/dma.c b/kernel/dma.c index 68a2306522c8..6c6262f86c17 100644 --- a/kernel/dma.c +++ b/kernel/dma.c | |||
| @@ -18,7 +18,6 @@ | |||
| 18 | #include <linux/proc_fs.h> | 18 | #include <linux/proc_fs.h> |
| 19 | #include <linux/init.h> | 19 | #include <linux/init.h> |
| 20 | #include <asm/dma.h> | 20 | #include <asm/dma.h> |
| 21 | #include <asm/system.h> | ||
| 22 | 21 | ||
| 23 | 22 | ||
| 24 | 23 | ||
diff --git a/kernel/events/core.c b/kernel/events/core.c index 4b50357914fb..a6a9ec4cd8f5 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c | |||
| @@ -3348,7 +3348,7 @@ static void calc_timer_values(struct perf_event *event, | |||
| 3348 | *running = ctx_time - event->tstamp_running; | 3348 | *running = ctx_time - event->tstamp_running; |
| 3349 | } | 3349 | } |
| 3350 | 3350 | ||
| 3351 | void __weak perf_update_user_clock(struct perf_event_mmap_page *userpg, u64 now) | 3351 | void __weak arch_perf_update_userpage(struct perf_event_mmap_page *userpg, u64 now) |
| 3352 | { | 3352 | { |
| 3353 | } | 3353 | } |
| 3354 | 3354 | ||
| @@ -3398,7 +3398,7 @@ void perf_event_update_userpage(struct perf_event *event) | |||
| 3398 | userpg->time_running = running + | 3398 | userpg->time_running = running + |
| 3399 | atomic64_read(&event->child_total_time_running); | 3399 | atomic64_read(&event->child_total_time_running); |
| 3400 | 3400 | ||
| 3401 | perf_update_user_clock(userpg, now); | 3401 | arch_perf_update_userpage(userpg, now); |
| 3402 | 3402 | ||
| 3403 | barrier(); | 3403 | barrier(); |
| 3404 | ++userpg->lock; | 3404 | ++userpg->lock; |
| @@ -7116,6 +7116,13 @@ void __init perf_event_init(void) | |||
| 7116 | 7116 | ||
| 7117 | /* do not patch jump label more than once per second */ | 7117 | /* do not patch jump label more than once per second */ |
| 7118 | jump_label_rate_limit(&perf_sched_events, HZ); | 7118 | jump_label_rate_limit(&perf_sched_events, HZ); |
| 7119 | |||
| 7120 | /* | ||
| 7121 | * Build time assertion that we keep the data_head at the intended | ||
| 7122 | * location. IOW, validation we got the __reserved[] size right. | ||
| 7123 | */ | ||
| 7124 | BUILD_BUG_ON((offsetof(struct perf_event_mmap_page, data_head)) | ||
| 7125 | != 1024); | ||
| 7119 | } | 7126 | } |
| 7120 | 7127 | ||
| 7121 | static int __init perf_event_sysfs_init(void) | 7128 | static int __init perf_event_sysfs_init(void) |
diff --git a/kernel/exit.c b/kernel/exit.c index ce5f758f40bd..d8bd3b425fa7 100644 --- a/kernel/exit.c +++ b/kernel/exit.c | |||
| @@ -52,6 +52,7 @@ | |||
| 52 | #include <linux/hw_breakpoint.h> | 52 | #include <linux/hw_breakpoint.h> |
| 53 | #include <linux/oom.h> | 53 | #include <linux/oom.h> |
| 54 | #include <linux/writeback.h> | 54 | #include <linux/writeback.h> |
| 55 | #include <linux/shm.h> | ||
| 55 | 56 | ||
| 56 | #include <asm/uaccess.h> | 57 | #include <asm/uaccess.h> |
| 57 | #include <asm/unistd.h> | 58 | #include <asm/unistd.h> |
| @@ -424,7 +425,7 @@ void daemonize(const char *name, ...) | |||
| 424 | */ | 425 | */ |
| 425 | exit_mm(current); | 426 | exit_mm(current); |
| 426 | /* | 427 | /* |
| 427 | * We don't want to have TIF_FREEZE set if the system-wide hibernation | 428 | * We don't want to get frozen, in case system-wide hibernation |
| 428 | * or suspend transition begins right now. | 429 | * or suspend transition begins right now. |
| 429 | */ | 430 | */ |
| 430 | current->flags |= (PF_NOFREEZE | PF_KTHREAD); | 431 | current->flags |= (PF_NOFREEZE | PF_KTHREAD); |
| @@ -473,7 +474,7 @@ static void close_files(struct files_struct * files) | |||
| 473 | i = j * __NFDBITS; | 474 | i = j * __NFDBITS; |
| 474 | if (i >= fdt->max_fds) | 475 | if (i >= fdt->max_fds) |
| 475 | break; | 476 | break; |
| 476 | set = fdt->open_fds->fds_bits[j++]; | 477 | set = fdt->open_fds[j++]; |
| 477 | while (set) { | 478 | while (set) { |
| 478 | if (set & 1) { | 479 | if (set & 1) { |
| 479 | struct file * file = xchg(&fdt->fd[i], NULL); | 480 | struct file * file = xchg(&fdt->fd[i], NULL); |
| @@ -686,11 +687,11 @@ static void exit_mm(struct task_struct * tsk) | |||
| 686 | } | 687 | } |
| 687 | 688 | ||
| 688 | /* | 689 | /* |
| 689 | * When we die, we re-parent all our children. | 690 | * When we die, we re-parent all our children, and try to: |
| 690 | * Try to give them to another thread in our thread | 691 | * 1. give them to another thread in our thread group, if such a member exists |
| 691 | * group, and if no such member exists, give it to | 692 | * 2. give it to the first ancestor process which prctl'd itself as a |
| 692 | * the child reaper process (ie "init") in our pid | 693 | * child_subreaper for its children (like a service manager) |
| 693 | * space. | 694 | * 3. give it to the init process (PID 1) in our pid namespace |
| 694 | */ | 695 | */ |
| 695 | static struct task_struct *find_new_reaper(struct task_struct *father) | 696 | static struct task_struct *find_new_reaper(struct task_struct *father) |
| 696 | __releases(&tasklist_lock) | 697 | __releases(&tasklist_lock) |
| @@ -710,8 +711,11 @@ static struct task_struct *find_new_reaper(struct task_struct *father) | |||
| 710 | 711 | ||
| 711 | if (unlikely(pid_ns->child_reaper == father)) { | 712 | if (unlikely(pid_ns->child_reaper == father)) { |
| 712 | write_unlock_irq(&tasklist_lock); | 713 | write_unlock_irq(&tasklist_lock); |
| 713 | if (unlikely(pid_ns == &init_pid_ns)) | 714 | if (unlikely(pid_ns == &init_pid_ns)) { |
| 714 | panic("Attempted to kill init!"); | 715 | panic("Attempted to kill init! exitcode=0x%08x\n", |
| 716 | father->signal->group_exit_code ?: | ||
| 717 | father->exit_code); | ||
| 718 | } | ||
| 715 | 719 | ||
| 716 | zap_pid_ns_processes(pid_ns); | 720 | zap_pid_ns_processes(pid_ns); |
| 717 | write_lock_irq(&tasklist_lock); | 721 | write_lock_irq(&tasklist_lock); |
| @@ -721,6 +725,29 @@ static struct task_struct *find_new_reaper(struct task_struct *father) | |||
| 721 | * forget_original_parent() must move them somewhere. | 725 | * forget_original_parent() must move them somewhere. |
| 722 | */ | 726 | */ |
| 723 | pid_ns->child_reaper = init_pid_ns.child_reaper; | 727 | pid_ns->child_reaper = init_pid_ns.child_reaper; |
| 728 | } else if (father->signal->has_child_subreaper) { | ||
| 729 | struct task_struct *reaper; | ||
| 730 | |||
| 731 | /* | ||
| 732 | * Find the first ancestor marked as child_subreaper. | ||
| 733 | * Note that the code below checks same_thread_group(reaper, | ||
| 734 | * pid_ns->child_reaper). This is what we need to DTRT in a | ||
| 735 | * PID namespace. However we still need the check above, see | ||
| 736 | * http://marc.info/?l=linux-kernel&m=131385460420380 | ||
| 737 | */ | ||
| 738 | for (reaper = father->real_parent; | ||
| 739 | reaper != &init_task; | ||
| 740 | reaper = reaper->real_parent) { | ||
| 741 | if (same_thread_group(reaper, pid_ns->child_reaper)) | ||
| 742 | break; | ||
| 743 | if (!reaper->signal->is_child_subreaper) | ||
| 744 | continue; | ||
| 745 | thread = reaper; | ||
| 746 | do { | ||
| 747 | if (!(thread->flags & PF_EXITING)) | ||
| 748 | return reaper; | ||
| 749 | } while_each_thread(reaper, thread); | ||
| 750 | } | ||
| 724 | } | 751 | } |
| 725 | 752 | ||
| 726 | return pid_ns->child_reaper; | 753 | return pid_ns->child_reaper; |
| @@ -934,7 +961,7 @@ void do_exit(long code) | |||
| 934 | acct_update_integrals(tsk); | 961 | acct_update_integrals(tsk); |
| 935 | /* sync mm's RSS info before statistics gathering */ | 962 | /* sync mm's RSS info before statistics gathering */ |
| 936 | if (tsk->mm) | 963 | if (tsk->mm) |
| 937 | sync_mm_rss(tsk, tsk->mm); | 964 | sync_mm_rss(tsk->mm); |
| 938 | group_dead = atomic_dec_and_test(&tsk->signal->live); | 965 | group_dead = atomic_dec_and_test(&tsk->signal->live); |
| 939 | if (group_dead) { | 966 | if (group_dead) { |
| 940 | hrtimer_cancel(&tsk->signal->real_timer); | 967 | hrtimer_cancel(&tsk->signal->real_timer); |
diff --git a/kernel/fork.c b/kernel/fork.c index c4f38a849436..b9372a0bff18 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
| @@ -193,6 +193,7 @@ void __put_task_struct(struct task_struct *tsk) | |||
| 193 | WARN_ON(atomic_read(&tsk->usage)); | 193 | WARN_ON(atomic_read(&tsk->usage)); |
| 194 | WARN_ON(tsk == current); | 194 | WARN_ON(tsk == current); |
| 195 | 195 | ||
| 196 | security_task_free(tsk); | ||
| 196 | exit_creds(tsk); | 197 | exit_creds(tsk); |
| 197 | delayacct_tsk_free(tsk); | 198 | delayacct_tsk_free(tsk); |
| 198 | put_signal_struct(tsk->signal); | 199 | put_signal_struct(tsk->signal); |
| @@ -355,7 +356,7 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) | |||
| 355 | charge = 0; | 356 | charge = 0; |
| 356 | if (mpnt->vm_flags & VM_ACCOUNT) { | 357 | if (mpnt->vm_flags & VM_ACCOUNT) { |
| 357 | unsigned int len = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT; | 358 | unsigned int len = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT; |
| 358 | if (security_vm_enough_memory(len)) | 359 | if (security_vm_enough_memory_mm(oldmm, len)) /* sic */ |
| 359 | goto fail_nomem; | 360 | goto fail_nomem; |
| 360 | charge = len; | 361 | charge = len; |
| 361 | } | 362 | } |
| @@ -511,6 +512,23 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p) | |||
| 511 | return NULL; | 512 | return NULL; |
| 512 | } | 513 | } |
| 513 | 514 | ||
| 515 | static void check_mm(struct mm_struct *mm) | ||
| 516 | { | ||
| 517 | int i; | ||
| 518 | |||
| 519 | for (i = 0; i < NR_MM_COUNTERS; i++) { | ||
| 520 | long x = atomic_long_read(&mm->rss_stat.count[i]); | ||
| 521 | |||
| 522 | if (unlikely(x)) | ||
| 523 | printk(KERN_ALERT "BUG: Bad rss-counter state " | ||
| 524 | "mm:%p idx:%d val:%ld\n", mm, i, x); | ||
| 525 | } | ||
| 526 | |||
| 527 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | ||
| 528 | VM_BUG_ON(mm->pmd_huge_pte); | ||
| 529 | #endif | ||
| 530 | } | ||
| 531 | |||
| 514 | /* | 532 | /* |
| 515 | * Allocate and initialize an mm_struct. | 533 | * Allocate and initialize an mm_struct. |
| 516 | */ | 534 | */ |
| @@ -538,9 +556,7 @@ void __mmdrop(struct mm_struct *mm) | |||
| 538 | mm_free_pgd(mm); | 556 | mm_free_pgd(mm); |
| 539 | destroy_context(mm); | 557 | destroy_context(mm); |
| 540 | mmu_notifier_mm_destroy(mm); | 558 | mmu_notifier_mm_destroy(mm); |
| 541 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | 559 | check_mm(mm); |
| 542 | VM_BUG_ON(mm->pmd_huge_pte); | ||
| 543 | #endif | ||
| 544 | free_mm(mm); | 560 | free_mm(mm); |
| 545 | } | 561 | } |
| 546 | EXPORT_SYMBOL_GPL(__mmdrop); | 562 | EXPORT_SYMBOL_GPL(__mmdrop); |
| @@ -1035,6 +1051,9 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) | |||
| 1035 | sig->oom_score_adj = current->signal->oom_score_adj; | 1051 | sig->oom_score_adj = current->signal->oom_score_adj; |
| 1036 | sig->oom_score_adj_min = current->signal->oom_score_adj_min; | 1052 | sig->oom_score_adj_min = current->signal->oom_score_adj_min; |
| 1037 | 1053 | ||
| 1054 | sig->has_child_subreaper = current->signal->has_child_subreaper || | ||
| 1055 | current->signal->is_child_subreaper; | ||
| 1056 | |||
| 1038 | mutex_init(&sig->cred_guard_mutex); | 1057 | mutex_init(&sig->cred_guard_mutex); |
| 1039 | 1058 | ||
| 1040 | return 0; | 1059 | return 0; |
| @@ -1222,6 +1241,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
| 1222 | #ifdef CONFIG_CPUSETS | 1241 | #ifdef CONFIG_CPUSETS |
| 1223 | p->cpuset_mem_spread_rotor = NUMA_NO_NODE; | 1242 | p->cpuset_mem_spread_rotor = NUMA_NO_NODE; |
| 1224 | p->cpuset_slab_spread_rotor = NUMA_NO_NODE; | 1243 | p->cpuset_slab_spread_rotor = NUMA_NO_NODE; |
| 1244 | seqcount_init(&p->mems_allowed_seq); | ||
| 1225 | #endif | 1245 | #endif |
| 1226 | #ifdef CONFIG_TRACE_IRQFLAGS | 1246 | #ifdef CONFIG_TRACE_IRQFLAGS |
| 1227 | p->irq_events = 0; | 1247 | p->irq_events = 0; |
diff --git a/kernel/freezer.c b/kernel/freezer.c index 9815b8d1eed5..11f82a4d4eae 100644 --- a/kernel/freezer.c +++ b/kernel/freezer.c | |||
| @@ -99,9 +99,9 @@ static void fake_signal_wake_up(struct task_struct *p) | |||
| 99 | * freeze_task - send a freeze request to given task | 99 | * freeze_task - send a freeze request to given task |
| 100 | * @p: task to send the request to | 100 | * @p: task to send the request to |
| 101 | * | 101 | * |
| 102 | * If @p is freezing, the freeze request is sent by setting %TIF_FREEZE | 102 | * If @p is freezing, the freeze request is sent either by sending a fake |
| 103 | * flag and either sending a fake signal to it or waking it up, depending | 103 | * signal (if it's not a kernel thread) or waking it up (if it's a kernel |
| 104 | * on whether it has %PF_FREEZER_NOSIG set. | 104 | * thread). |
| 105 | * | 105 | * |
| 106 | * RETURNS: | 106 | * RETURNS: |
| 107 | * %false, if @p is not freezing or already frozen; %true, otherwise | 107 | * %false, if @p is not freezing or already frozen; %true, otherwise |
diff --git a/kernel/futex.c b/kernel/futex.c index 72efa1e4359a..e2b0fb9a0b3b 100644 --- a/kernel/futex.c +++ b/kernel/futex.c | |||
| @@ -59,6 +59,7 @@ | |||
| 59 | #include <linux/magic.h> | 59 | #include <linux/magic.h> |
| 60 | #include <linux/pid.h> | 60 | #include <linux/pid.h> |
| 61 | #include <linux/nsproxy.h> | 61 | #include <linux/nsproxy.h> |
| 62 | #include <linux/ptrace.h> | ||
| 62 | 63 | ||
| 63 | #include <asm/futex.h> | 64 | #include <asm/futex.h> |
| 64 | 65 | ||
| @@ -2443,40 +2444,31 @@ SYSCALL_DEFINE3(get_robust_list, int, pid, | |||
| 2443 | { | 2444 | { |
| 2444 | struct robust_list_head __user *head; | 2445 | struct robust_list_head __user *head; |
| 2445 | unsigned long ret; | 2446 | unsigned long ret; |
| 2446 | const struct cred *cred = current_cred(), *pcred; | 2447 | struct task_struct *p; |
| 2447 | 2448 | ||
| 2448 | if (!futex_cmpxchg_enabled) | 2449 | if (!futex_cmpxchg_enabled) |
| 2449 | return -ENOSYS; | 2450 | return -ENOSYS; |
| 2450 | 2451 | ||
| 2452 | WARN_ONCE(1, "deprecated: get_robust_list will be deleted in 2013.\n"); | ||
| 2453 | |||
| 2454 | rcu_read_lock(); | ||
| 2455 | |||
| 2456 | ret = -ESRCH; | ||
| 2451 | if (!pid) | 2457 | if (!pid) |
| 2452 | head = current->robust_list; | 2458 | p = current; |
| 2453 | else { | 2459 | else { |
| 2454 | struct task_struct *p; | ||
| 2455 | |||
| 2456 | ret = -ESRCH; | ||
| 2457 | rcu_read_lock(); | ||
| 2458 | p = find_task_by_vpid(pid); | 2460 | p = find_task_by_vpid(pid); |
| 2459 | if (!p) | 2461 | if (!p) |
| 2460 | goto err_unlock; | 2462 | goto err_unlock; |
| 2461 | ret = -EPERM; | ||
| 2462 | pcred = __task_cred(p); | ||
| 2463 | /* If victim is in different user_ns, then uids are not | ||
| 2464 | comparable, so we must have CAP_SYS_PTRACE */ | ||
| 2465 | if (cred->user->user_ns != pcred->user->user_ns) { | ||
| 2466 | if (!ns_capable(pcred->user->user_ns, CAP_SYS_PTRACE)) | ||
| 2467 | goto err_unlock; | ||
| 2468 | goto ok; | ||
| 2469 | } | ||
| 2470 | /* If victim is in same user_ns, then uids are comparable */ | ||
| 2471 | if (cred->euid != pcred->euid && | ||
| 2472 | cred->euid != pcred->uid && | ||
| 2473 | !ns_capable(pcred->user->user_ns, CAP_SYS_PTRACE)) | ||
| 2474 | goto err_unlock; | ||
| 2475 | ok: | ||
| 2476 | head = p->robust_list; | ||
| 2477 | rcu_read_unlock(); | ||
| 2478 | } | 2463 | } |
| 2479 | 2464 | ||
| 2465 | ret = -EPERM; | ||
| 2466 | if (!ptrace_may_access(p, PTRACE_MODE_READ)) | ||
| 2467 | goto err_unlock; | ||
| 2468 | |||
| 2469 | head = p->robust_list; | ||
| 2470 | rcu_read_unlock(); | ||
| 2471 | |||
| 2480 | if (put_user(sizeof(*head), len_ptr)) | 2472 | if (put_user(sizeof(*head), len_ptr)) |
| 2481 | return -EFAULT; | 2473 | return -EFAULT; |
| 2482 | return put_user(head, head_ptr); | 2474 | return put_user(head, head_ptr); |
diff --git a/kernel/futex_compat.c b/kernel/futex_compat.c index 5f9e689dc8f0..83e368b005fc 100644 --- a/kernel/futex_compat.c +++ b/kernel/futex_compat.c | |||
| @@ -10,6 +10,7 @@ | |||
| 10 | #include <linux/compat.h> | 10 | #include <linux/compat.h> |
| 11 | #include <linux/nsproxy.h> | 11 | #include <linux/nsproxy.h> |
| 12 | #include <linux/futex.h> | 12 | #include <linux/futex.h> |
| 13 | #include <linux/ptrace.h> | ||
| 13 | 14 | ||
| 14 | #include <asm/uaccess.h> | 15 | #include <asm/uaccess.h> |
| 15 | 16 | ||
| @@ -136,40 +137,31 @@ compat_sys_get_robust_list(int pid, compat_uptr_t __user *head_ptr, | |||
| 136 | { | 137 | { |
| 137 | struct compat_robust_list_head __user *head; | 138 | struct compat_robust_list_head __user *head; |
| 138 | unsigned long ret; | 139 | unsigned long ret; |
| 139 | const struct cred *cred = current_cred(), *pcred; | 140 | struct task_struct *p; |
| 140 | 141 | ||
| 141 | if (!futex_cmpxchg_enabled) | 142 | if (!futex_cmpxchg_enabled) |
| 142 | return -ENOSYS; | 143 | return -ENOSYS; |
| 143 | 144 | ||
| 145 | WARN_ONCE(1, "deprecated: get_robust_list will be deleted in 2013.\n"); | ||
| 146 | |||
| 147 | rcu_read_lock(); | ||
| 148 | |||
| 149 | ret = -ESRCH; | ||
| 144 | if (!pid) | 150 | if (!pid) |
| 145 | head = current->compat_robust_list; | 151 | p = current; |
| 146 | else { | 152 | else { |
| 147 | struct task_struct *p; | ||
| 148 | |||
| 149 | ret = -ESRCH; | ||
| 150 | rcu_read_lock(); | ||
| 151 | p = find_task_by_vpid(pid); | 153 | p = find_task_by_vpid(pid); |
| 152 | if (!p) | 154 | if (!p) |
| 153 | goto err_unlock; | 155 | goto err_unlock; |
| 154 | ret = -EPERM; | ||
| 155 | pcred = __task_cred(p); | ||
| 156 | /* If victim is in different user_ns, then uids are not | ||
| 157 | comparable, so we must have CAP_SYS_PTRACE */ | ||
| 158 | if (cred->user->user_ns != pcred->user->user_ns) { | ||
| 159 | if (!ns_capable(pcred->user->user_ns, CAP_SYS_PTRACE)) | ||
| 160 | goto err_unlock; | ||
| 161 | goto ok; | ||
| 162 | } | ||
| 163 | /* If victim is in same user_ns, then uids are comparable */ | ||
| 164 | if (cred->euid != pcred->euid && | ||
| 165 | cred->euid != pcred->uid && | ||
| 166 | !ns_capable(pcred->user->user_ns, CAP_SYS_PTRACE)) | ||
| 167 | goto err_unlock; | ||
| 168 | ok: | ||
| 169 | head = p->compat_robust_list; | ||
| 170 | rcu_read_unlock(); | ||
| 171 | } | 156 | } |
| 172 | 157 | ||
| 158 | ret = -EPERM; | ||
| 159 | if (!ptrace_may_access(p, PTRACE_MODE_READ)) | ||
| 160 | goto err_unlock; | ||
| 161 | |||
| 162 | head = p->compat_robust_list; | ||
| 163 | rcu_read_unlock(); | ||
| 164 | |||
| 173 | if (put_user(sizeof(*head), len_ptr)) | 165 | if (put_user(sizeof(*head), len_ptr)) |
| 174 | return -EFAULT; | 166 | return -EFAULT; |
| 175 | return put_user(ptr_to_compat(head), head_ptr); | 167 | return put_user(ptr_to_compat(head), head_ptr); |
diff --git a/kernel/irq/Kconfig b/kernel/irq/Kconfig index 5a38bf4de641..d1a758bc972a 100644 --- a/kernel/irq/Kconfig +++ b/kernel/irq/Kconfig | |||
| @@ -13,7 +13,7 @@ config GENERIC_HARDIRQS | |||
| 13 | # Options selectable by the architecture code | 13 | # Options selectable by the architecture code |
| 14 | 14 | ||
| 15 | # Make sparse irq Kconfig switch below available | 15 | # Make sparse irq Kconfig switch below available |
| 16 | config HAVE_SPARSE_IRQ | 16 | config MAY_HAVE_SPARSE_IRQ |
| 17 | bool | 17 | bool |
| 18 | 18 | ||
| 19 | # Enable the generic irq autoprobe mechanism | 19 | # Enable the generic irq autoprobe mechanism |
| @@ -56,13 +56,22 @@ config GENERIC_IRQ_CHIP | |||
| 56 | config IRQ_DOMAIN | 56 | config IRQ_DOMAIN |
| 57 | bool | 57 | bool |
| 58 | 58 | ||
| 59 | config IRQ_DOMAIN_DEBUG | ||
| 60 | bool "Expose hardware/virtual IRQ mapping via debugfs" | ||
| 61 | depends on IRQ_DOMAIN && DEBUG_FS | ||
| 62 | help | ||
| 63 | This option will show the mapping relationship between hardware irq | ||
| 64 | numbers and Linux irq numbers. The mapping is exposed via debugfs | ||
| 65 | in the file "irq_domain_mapping". | ||
| 66 | |||
| 67 | If you don't know what this means you don't need it. | ||
| 68 | |||
| 59 | # Support forced irq threading | 69 | # Support forced irq threading |
| 60 | config IRQ_FORCED_THREADING | 70 | config IRQ_FORCED_THREADING |
| 61 | bool | 71 | bool |
| 62 | 72 | ||
| 63 | config SPARSE_IRQ | 73 | config SPARSE_IRQ |
| 64 | bool "Support sparse irq numbering" | 74 | bool "Support sparse irq numbering" if MAY_HAVE_SPARSE_IRQ |
| 65 | depends on HAVE_SPARSE_IRQ | ||
| 66 | ---help--- | 75 | ---help--- |
| 67 | 76 | ||
| 68 | Sparse irq numbering is useful for distro kernels that want | 77 | Sparse irq numbering is useful for distro kernels that want |
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index 6ff84e6a954c..bdb180325551 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c | |||
| @@ -54,14 +54,18 @@ static void warn_no_thread(unsigned int irq, struct irqaction *action) | |||
| 54 | static void irq_wake_thread(struct irq_desc *desc, struct irqaction *action) | 54 | static void irq_wake_thread(struct irq_desc *desc, struct irqaction *action) |
| 55 | { | 55 | { |
| 56 | /* | 56 | /* |
| 57 | * Wake up the handler thread for this action. In case the | 57 | * In case the thread crashed and was killed we just pretend that |
| 58 | * thread crashed and was killed we just pretend that we | 58 | * we handled the interrupt. The hardirq handler has disabled the |
| 59 | * handled the interrupt. The hardirq handler has disabled the | 59 | * device interrupt, so no irq storm is lurking. |
| 60 | * device interrupt, so no irq storm is lurking. If the | 60 | */ |
| 61 | if (action->thread->flags & PF_EXITING) | ||
| 62 | return; | ||
| 63 | |||
| 64 | /* | ||
| 65 | * Wake up the handler thread for this action. If the | ||
| 61 | * RUNTHREAD bit is already set, nothing to do. | 66 | * RUNTHREAD bit is already set, nothing to do. |
| 62 | */ | 67 | */ |
| 63 | if ((action->thread->flags & PF_EXITING) || | 68 | if (test_and_set_bit(IRQTF_RUNTHREAD, &action->thread_flags)) |
| 64 | test_and_set_bit(IRQTF_RUNTHREAD, &action->thread_flags)) | ||
| 65 | return; | 69 | return; |
| 66 | 70 | ||
| 67 | /* | 71 | /* |
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c index 1f9e26526b69..0e0ba5f840b2 100644 --- a/kernel/irq/irqdomain.c +++ b/kernel/irq/irqdomain.c | |||
| @@ -1,189 +1,780 @@ | |||
| 1 | #include <linux/debugfs.h> | ||
| 2 | #include <linux/hardirq.h> | ||
| 3 | #include <linux/interrupt.h> | ||
| 1 | #include <linux/irq.h> | 4 | #include <linux/irq.h> |
| 5 | #include <linux/irqdesc.h> | ||
| 2 | #include <linux/irqdomain.h> | 6 | #include <linux/irqdomain.h> |
| 3 | #include <linux/module.h> | 7 | #include <linux/module.h> |
| 4 | #include <linux/mutex.h> | 8 | #include <linux/mutex.h> |
| 5 | #include <linux/of.h> | 9 | #include <linux/of.h> |
| 6 | #include <linux/of_address.h> | 10 | #include <linux/of_address.h> |
| 11 | #include <linux/seq_file.h> | ||
| 7 | #include <linux/slab.h> | 12 | #include <linux/slab.h> |
| 13 | #include <linux/smp.h> | ||
| 14 | #include <linux/fs.h> | ||
| 15 | |||
| 16 | #define IRQ_DOMAIN_MAP_LEGACY 0 /* driver allocated fixed range of irqs. | ||
| 17 | * ie. legacy 8259, gets irqs 1..15 */ | ||
| 18 | #define IRQ_DOMAIN_MAP_NOMAP 1 /* no fast reverse mapping */ | ||
| 19 | #define IRQ_DOMAIN_MAP_LINEAR 2 /* linear map of interrupts */ | ||
| 20 | #define IRQ_DOMAIN_MAP_TREE 3 /* radix tree */ | ||
| 8 | 21 | ||
| 9 | static LIST_HEAD(irq_domain_list); | 22 | static LIST_HEAD(irq_domain_list); |
| 10 | static DEFINE_MUTEX(irq_domain_mutex); | 23 | static DEFINE_MUTEX(irq_domain_mutex); |
| 11 | 24 | ||
| 25 | static DEFINE_MUTEX(revmap_trees_mutex); | ||
| 26 | static struct irq_domain *irq_default_domain; | ||
| 27 | |||
| 12 | /** | 28 | /** |
| 13 | * irq_domain_add() - Register an irq_domain | 29 | * irq_domain_alloc() - Allocate a new irq_domain data structure |
| 14 | * @domain: ptr to initialized irq_domain structure | 30 | * @of_node: optional device-tree node of the interrupt controller |
| 31 | * @revmap_type: type of reverse mapping to use | ||
| 32 | * @ops: map/unmap domain callbacks | ||
| 33 | * @host_data: Controller private data pointer | ||
| 15 | * | 34 | * |
| 16 | * Registers an irq_domain structure. The irq_domain must at a minimum be | 35 | * Allocates and initialize and irq_domain structure. Caller is expected to |
| 17 | * initialized with an ops structure pointer, and either a ->to_irq hook or | 36 | * register allocated irq_domain with irq_domain_register(). Returns pointer |
| 18 | * a valid irq_base value. Everything else is optional. | 37 | * to IRQ domain, or NULL on failure. |
| 19 | */ | 38 | */ |
| 20 | void irq_domain_add(struct irq_domain *domain) | 39 | static struct irq_domain *irq_domain_alloc(struct device_node *of_node, |
| 40 | unsigned int revmap_type, | ||
| 41 | const struct irq_domain_ops *ops, | ||
| 42 | void *host_data) | ||
| 21 | { | 43 | { |
| 22 | struct irq_data *d; | 44 | struct irq_domain *domain; |
| 23 | int hwirq, irq; | ||
| 24 | 45 | ||
| 25 | /* | 46 | domain = kzalloc(sizeof(*domain), GFP_KERNEL); |
| 26 | * This assumes that the irq_domain owner has already allocated | 47 | if (WARN_ON(!domain)) |
| 27 | * the irq_descs. This block will be removed when support for dynamic | 48 | return NULL; |
| 28 | * allocation of irq_descs is added to irq_domain. | 49 | |
| 29 | */ | 50 | /* Fill structure */ |
| 30 | irq_domain_for_each_irq(domain, hwirq, irq) { | 51 | domain->revmap_type = revmap_type; |
| 31 | d = irq_get_irq_data(irq); | 52 | domain->ops = ops; |
| 32 | if (!d) { | 53 | domain->host_data = host_data; |
| 33 | WARN(1, "error: assigning domain to non existant irq_desc"); | 54 | domain->of_node = of_node_get(of_node); |
| 34 | return; | 55 | |
| 35 | } | 56 | return domain; |
| 36 | if (d->domain) { | 57 | } |
| 37 | /* things are broken; just report, don't clean up */ | 58 | |
| 38 | WARN(1, "error: irq_desc already assigned to a domain"); | 59 | static void irq_domain_add(struct irq_domain *domain) |
| 39 | return; | 60 | { |
| 61 | mutex_lock(&irq_domain_mutex); | ||
| 62 | list_add(&domain->link, &irq_domain_list); | ||
| 63 | mutex_unlock(&irq_domain_mutex); | ||
| 64 | pr_debug("irq: Allocated domain of type %d @0x%p\n", | ||
| 65 | domain->revmap_type, domain); | ||
| 66 | } | ||
| 67 | |||
| 68 | static unsigned int irq_domain_legacy_revmap(struct irq_domain *domain, | ||
| 69 | irq_hw_number_t hwirq) | ||
| 70 | { | ||
| 71 | irq_hw_number_t first_hwirq = domain->revmap_data.legacy.first_hwirq; | ||
| 72 | int size = domain->revmap_data.legacy.size; | ||
| 73 | |||
| 74 | if (WARN_ON(hwirq < first_hwirq || hwirq >= first_hwirq + size)) | ||
| 75 | return 0; | ||
| 76 | return hwirq - first_hwirq + domain->revmap_data.legacy.first_irq; | ||
| 77 | } | ||
| 78 | |||
| 79 | /** | ||
| 80 | * irq_domain_add_legacy() - Allocate and register a legacy revmap irq_domain. | ||
| 81 | * @of_node: pointer to interrupt controller's device tree node. | ||
| 82 | * @size: total number of irqs in legacy mapping | ||
| 83 | * @first_irq: first number of irq block assigned to the domain | ||
| 84 | * @first_hwirq: first hwirq number to use for the translation. Should normally | ||
| 85 | * be '0', but a positive integer can be used if the effective | ||
| 86 | * hwirqs numbering does not begin at zero. | ||
| 87 | * @ops: map/unmap domain callbacks | ||
| 88 | * @host_data: Controller private data pointer | ||
| 89 | * | ||
| 90 | * Note: the map() callback will be called before this function returns | ||
| 91 | * for all legacy interrupts except 0 (which is always the invalid irq for | ||
| 92 | * a legacy controller). | ||
| 93 | */ | ||
| 94 | struct irq_domain *irq_domain_add_legacy(struct device_node *of_node, | ||
| 95 | unsigned int size, | ||
| 96 | unsigned int first_irq, | ||
| 97 | irq_hw_number_t first_hwirq, | ||
| 98 | const struct irq_domain_ops *ops, | ||
| 99 | void *host_data) | ||
| 100 | { | ||
| 101 | struct irq_domain *domain; | ||
| 102 | unsigned int i; | ||
| 103 | |||
| 104 | domain = irq_domain_alloc(of_node, IRQ_DOMAIN_MAP_LEGACY, ops, host_data); | ||
| 105 | if (!domain) | ||
| 106 | return NULL; | ||
| 107 | |||
| 108 | domain->revmap_data.legacy.first_irq = first_irq; | ||
| 109 | domain->revmap_data.legacy.first_hwirq = first_hwirq; | ||
| 110 | domain->revmap_data.legacy.size = size; | ||
| 111 | |||
| 112 | mutex_lock(&irq_domain_mutex); | ||
| 113 | /* Verify that all the irqs are available */ | ||
| 114 | for (i = 0; i < size; i++) { | ||
| 115 | int irq = first_irq + i; | ||
| 116 | struct irq_data *irq_data = irq_get_irq_data(irq); | ||
| 117 | |||
| 118 | if (WARN_ON(!irq_data || irq_data->domain)) { | ||
| 119 | mutex_unlock(&irq_domain_mutex); | ||
| 120 | of_node_put(domain->of_node); | ||
| 121 | kfree(domain); | ||
| 122 | return NULL; | ||
| 40 | } | 123 | } |
| 41 | d->domain = domain; | ||
| 42 | d->hwirq = hwirq; | ||
| 43 | } | 124 | } |
| 44 | 125 | ||
| 45 | mutex_lock(&irq_domain_mutex); | 126 | /* Claim all of the irqs before registering a legacy domain */ |
| 46 | list_add(&domain->list, &irq_domain_list); | 127 | for (i = 0; i < size; i++) { |
| 128 | struct irq_data *irq_data = irq_get_irq_data(first_irq + i); | ||
| 129 | irq_data->hwirq = first_hwirq + i; | ||
| 130 | irq_data->domain = domain; | ||
| 131 | } | ||
| 47 | mutex_unlock(&irq_domain_mutex); | 132 | mutex_unlock(&irq_domain_mutex); |
| 133 | |||
| 134 | for (i = 0; i < size; i++) { | ||
| 135 | int irq = first_irq + i; | ||
| 136 | int hwirq = first_hwirq + i; | ||
| 137 | |||
| 138 | /* IRQ0 gets ignored */ | ||
| 139 | if (!irq) | ||
| 140 | continue; | ||
| 141 | |||
| 142 | /* Legacy flags are left to default at this point, | ||
| 143 | * one can then use irq_create_mapping() to | ||
| 144 | * explicitly change them | ||
| 145 | */ | ||
| 146 | ops->map(domain, irq, hwirq); | ||
| 147 | |||
| 148 | /* Clear norequest flags */ | ||
| 149 | irq_clear_status_flags(irq, IRQ_NOREQUEST); | ||
| 150 | } | ||
| 151 | |||
| 152 | irq_domain_add(domain); | ||
| 153 | return domain; | ||
| 154 | } | ||
| 155 | |||
| 156 | /** | ||
| 157 | * irq_domain_add_linear() - Allocate and register a legacy revmap irq_domain. | ||
| 158 | * @of_node: pointer to interrupt controller's device tree node. | ||
| 159 | * @ops: map/unmap domain callbacks | ||
| 160 | * @host_data: Controller private data pointer | ||
| 161 | */ | ||
| 162 | struct irq_domain *irq_domain_add_linear(struct device_node *of_node, | ||
| 163 | unsigned int size, | ||
| 164 | const struct irq_domain_ops *ops, | ||
| 165 | void *host_data) | ||
| 166 | { | ||
| 167 | struct irq_domain *domain; | ||
| 168 | unsigned int *revmap; | ||
| 169 | |||
| 170 | revmap = kzalloc(sizeof(*revmap) * size, GFP_KERNEL); | ||
| 171 | if (WARN_ON(!revmap)) | ||
| 172 | return NULL; | ||
| 173 | |||
| 174 | domain = irq_domain_alloc(of_node, IRQ_DOMAIN_MAP_LINEAR, ops, host_data); | ||
| 175 | if (!domain) { | ||
| 176 | kfree(revmap); | ||
| 177 | return NULL; | ||
| 178 | } | ||
| 179 | domain->revmap_data.linear.size = size; | ||
| 180 | domain->revmap_data.linear.revmap = revmap; | ||
| 181 | irq_domain_add(domain); | ||
| 182 | return domain; | ||
| 183 | } | ||
| 184 | |||
| 185 | struct irq_domain *irq_domain_add_nomap(struct device_node *of_node, | ||
| 186 | unsigned int max_irq, | ||
| 187 | const struct irq_domain_ops *ops, | ||
| 188 | void *host_data) | ||
| 189 | { | ||
| 190 | struct irq_domain *domain = irq_domain_alloc(of_node, | ||
| 191 | IRQ_DOMAIN_MAP_NOMAP, ops, host_data); | ||
| 192 | if (domain) { | ||
| 193 | domain->revmap_data.nomap.max_irq = max_irq ? max_irq : ~0; | ||
| 194 | irq_domain_add(domain); | ||
| 195 | } | ||
| 196 | return domain; | ||
| 197 | } | ||
| 198 | |||
| 199 | /** | ||
| 200 | * irq_domain_add_tree() | ||
| 201 | * @of_node: pointer to interrupt controller's device tree node. | ||
| 202 | * @ops: map/unmap domain callbacks | ||
| 203 | * | ||
| 204 | * Note: The radix tree will be allocated later during boot automatically | ||
| 205 | * (the reverse mapping will use the slow path until that happens). | ||
| 206 | */ | ||
| 207 | struct irq_domain *irq_domain_add_tree(struct device_node *of_node, | ||
| 208 | const struct irq_domain_ops *ops, | ||
| 209 | void *host_data) | ||
| 210 | { | ||
| 211 | struct irq_domain *domain = irq_domain_alloc(of_node, | ||
| 212 | IRQ_DOMAIN_MAP_TREE, ops, host_data); | ||
| 213 | if (domain) { | ||
| 214 | INIT_RADIX_TREE(&domain->revmap_data.tree, GFP_KERNEL); | ||
| 215 | irq_domain_add(domain); | ||
| 216 | } | ||
| 217 | return domain; | ||
| 48 | } | 218 | } |
| 49 | 219 | ||
| 50 | /** | 220 | /** |
| 51 | * irq_domain_del() - Unregister an irq_domain | 221 | * irq_find_host() - Locates a domain for a given device node |
| 52 | * @domain: ptr to registered irq_domain. | 222 | * @node: device-tree node of the interrupt controller |
| 53 | */ | 223 | */ |
| 54 | void irq_domain_del(struct irq_domain *domain) | 224 | struct irq_domain *irq_find_host(struct device_node *node) |
| 55 | { | 225 | { |
| 56 | struct irq_data *d; | 226 | struct irq_domain *h, *found = NULL; |
| 57 | int hwirq, irq; | 227 | int rc; |
| 58 | 228 | ||
| 229 | /* We might want to match the legacy controller last since | ||
| 230 | * it might potentially be set to match all interrupts in | ||
| 231 | * the absence of a device node. This isn't a problem so far | ||
| 232 | * yet though... | ||
| 233 | */ | ||
| 59 | mutex_lock(&irq_domain_mutex); | 234 | mutex_lock(&irq_domain_mutex); |
| 60 | list_del(&domain->list); | 235 | list_for_each_entry(h, &irq_domain_list, link) { |
| 236 | if (h->ops->match) | ||
| 237 | rc = h->ops->match(h, node); | ||
| 238 | else | ||
| 239 | rc = (h->of_node != NULL) && (h->of_node == node); | ||
| 240 | |||
| 241 | if (rc) { | ||
| 242 | found = h; | ||
| 243 | break; | ||
| 244 | } | ||
| 245 | } | ||
| 61 | mutex_unlock(&irq_domain_mutex); | 246 | mutex_unlock(&irq_domain_mutex); |
| 247 | return found; | ||
| 248 | } | ||
| 249 | EXPORT_SYMBOL_GPL(irq_find_host); | ||
| 250 | |||
| 251 | /** | ||
| 252 | * irq_set_default_host() - Set a "default" irq domain | ||
| 253 | * @domain: default domain pointer | ||
| 254 | * | ||
| 255 | * For convenience, it's possible to set a "default" domain that will be used | ||
| 256 | * whenever NULL is passed to irq_create_mapping(). It makes life easier for | ||
| 257 | * platforms that want to manipulate a few hard coded interrupt numbers that | ||
| 258 | * aren't properly represented in the device-tree. | ||
| 259 | */ | ||
| 260 | void irq_set_default_host(struct irq_domain *domain) | ||
| 261 | { | ||
| 262 | pr_debug("irq: Default domain set to @0x%p\n", domain); | ||
| 263 | |||
| 264 | irq_default_domain = domain; | ||
| 265 | } | ||
| 266 | |||
| 267 | static int irq_setup_virq(struct irq_domain *domain, unsigned int virq, | ||
| 268 | irq_hw_number_t hwirq) | ||
| 269 | { | ||
| 270 | struct irq_data *irq_data = irq_get_irq_data(virq); | ||
| 62 | 271 | ||
| 63 | /* Clear the irq_domain assignments */ | 272 | irq_data->hwirq = hwirq; |
| 64 | irq_domain_for_each_irq(domain, hwirq, irq) { | 273 | irq_data->domain = domain; |
| 65 | d = irq_get_irq_data(irq); | 274 | if (domain->ops->map(domain, virq, hwirq)) { |
| 66 | d->domain = NULL; | 275 | pr_debug("irq: -> mapping failed, freeing\n"); |
| 276 | irq_data->domain = NULL; | ||
| 277 | irq_data->hwirq = 0; | ||
| 278 | return -1; | ||
| 67 | } | 279 | } |
| 280 | |||
| 281 | irq_clear_status_flags(virq, IRQ_NOREQUEST); | ||
| 282 | |||
| 283 | return 0; | ||
| 68 | } | 284 | } |
| 69 | 285 | ||
| 70 | #if defined(CONFIG_OF_IRQ) | ||
| 71 | /** | 286 | /** |
| 72 | * irq_create_of_mapping() - Map a linux irq number from a DT interrupt spec | 287 | * irq_create_direct_mapping() - Allocate an irq for direct mapping |
| 288 | * @domain: domain to allocate the irq for or NULL for default domain | ||
| 73 | * | 289 | * |
| 74 | * Used by the device tree interrupt mapping code to translate a device tree | 290 | * This routine is used for irq controllers which can choose the hardware |
| 75 | * interrupt specifier to a valid linux irq number. Returns either a valid | 291 | * interrupt numbers they generate. In such a case it's simplest to use |
| 76 | * linux IRQ number or 0. | 292 | * the linux irq as the hardware interrupt number. |
| 293 | */ | ||
| 294 | unsigned int irq_create_direct_mapping(struct irq_domain *domain) | ||
| 295 | { | ||
| 296 | unsigned int virq; | ||
| 297 | |||
| 298 | if (domain == NULL) | ||
| 299 | domain = irq_default_domain; | ||
| 300 | |||
| 301 | BUG_ON(domain == NULL); | ||
| 302 | WARN_ON(domain->revmap_type != IRQ_DOMAIN_MAP_NOMAP); | ||
| 303 | |||
| 304 | virq = irq_alloc_desc_from(1, 0); | ||
| 305 | if (!virq) { | ||
| 306 | pr_debug("irq: create_direct virq allocation failed\n"); | ||
| 307 | return 0; | ||
| 308 | } | ||
| 309 | if (virq >= domain->revmap_data.nomap.max_irq) { | ||
| 310 | pr_err("ERROR: no free irqs available below %i maximum\n", | ||
| 311 | domain->revmap_data.nomap.max_irq); | ||
| 312 | irq_free_desc(virq); | ||
| 313 | return 0; | ||
| 314 | } | ||
| 315 | pr_debug("irq: create_direct obtained virq %d\n", virq); | ||
| 316 | |||
| 317 | if (irq_setup_virq(domain, virq, virq)) { | ||
| 318 | irq_free_desc(virq); | ||
| 319 | return 0; | ||
| 320 | } | ||
| 321 | |||
| 322 | return virq; | ||
| 323 | } | ||
| 324 | |||
| 325 | /** | ||
| 326 | * irq_create_mapping() - Map a hardware interrupt into linux irq space | ||
| 327 | * @domain: domain owning this hardware interrupt or NULL for default domain | ||
| 328 | * @hwirq: hardware irq number in that domain space | ||
| 77 | * | 329 | * |
| 78 | * When the caller no longer need the irq number returned by this function it | 330 | * Only one mapping per hardware interrupt is permitted. Returns a linux |
| 79 | * should arrange to call irq_dispose_mapping(). | 331 | * irq number. |
| 332 | * If the sense/trigger is to be specified, set_irq_type() should be called | ||
| 333 | * on the number returned from that call. | ||
| 80 | */ | 334 | */ |
| 335 | unsigned int irq_create_mapping(struct irq_domain *domain, | ||
| 336 | irq_hw_number_t hwirq) | ||
| 337 | { | ||
| 338 | unsigned int hint; | ||
| 339 | int virq; | ||
| 340 | |||
| 341 | pr_debug("irq: irq_create_mapping(0x%p, 0x%lx)\n", domain, hwirq); | ||
| 342 | |||
| 343 | /* Look for default domain if nececssary */ | ||
| 344 | if (domain == NULL) | ||
| 345 | domain = irq_default_domain; | ||
| 346 | if (domain == NULL) { | ||
| 347 | printk(KERN_WARNING "irq_create_mapping called for" | ||
| 348 | " NULL domain, hwirq=%lx\n", hwirq); | ||
| 349 | WARN_ON(1); | ||
| 350 | return 0; | ||
| 351 | } | ||
| 352 | pr_debug("irq: -> using domain @%p\n", domain); | ||
| 353 | |||
| 354 | /* Check if mapping already exists */ | ||
| 355 | virq = irq_find_mapping(domain, hwirq); | ||
| 356 | if (virq) { | ||
| 357 | pr_debug("irq: -> existing mapping on virq %d\n", virq); | ||
| 358 | return virq; | ||
| 359 | } | ||
| 360 | |||
| 361 | /* Get a virtual interrupt number */ | ||
| 362 | if (domain->revmap_type == IRQ_DOMAIN_MAP_LEGACY) | ||
| 363 | return irq_domain_legacy_revmap(domain, hwirq); | ||
| 364 | |||
| 365 | /* Allocate a virtual interrupt number */ | ||
| 366 | hint = hwirq % nr_irqs; | ||
| 367 | if (hint == 0) | ||
| 368 | hint++; | ||
| 369 | virq = irq_alloc_desc_from(hint, 0); | ||
| 370 | if (virq <= 0) | ||
| 371 | virq = irq_alloc_desc_from(1, 0); | ||
| 372 | if (virq <= 0) { | ||
| 373 | pr_debug("irq: -> virq allocation failed\n"); | ||
| 374 | return 0; | ||
| 375 | } | ||
| 376 | |||
| 377 | if (irq_setup_virq(domain, virq, hwirq)) { | ||
| 378 | if (domain->revmap_type != IRQ_DOMAIN_MAP_LEGACY) | ||
| 379 | irq_free_desc(virq); | ||
| 380 | return 0; | ||
| 381 | } | ||
| 382 | |||
| 383 | pr_debug("irq: irq %lu on domain %s mapped to virtual irq %u\n", | ||
| 384 | hwirq, domain->of_node ? domain->of_node->full_name : "null", virq); | ||
| 385 | |||
| 386 | return virq; | ||
| 387 | } | ||
| 388 | EXPORT_SYMBOL_GPL(irq_create_mapping); | ||
| 389 | |||
| 81 | unsigned int irq_create_of_mapping(struct device_node *controller, | 390 | unsigned int irq_create_of_mapping(struct device_node *controller, |
| 82 | const u32 *intspec, unsigned int intsize) | 391 | const u32 *intspec, unsigned int intsize) |
| 83 | { | 392 | { |
| 84 | struct irq_domain *domain; | 393 | struct irq_domain *domain; |
| 85 | unsigned long hwirq; | 394 | irq_hw_number_t hwirq; |
| 86 | unsigned int irq, type; | 395 | unsigned int type = IRQ_TYPE_NONE; |
| 87 | int rc = -EINVAL; | 396 | unsigned int virq; |
| 88 | 397 | ||
| 89 | /* Find a domain which can translate the irq spec */ | 398 | domain = controller ? irq_find_host(controller) : irq_default_domain; |
| 90 | mutex_lock(&irq_domain_mutex); | 399 | if (!domain) { |
| 91 | list_for_each_entry(domain, &irq_domain_list, list) { | 400 | #ifdef CONFIG_MIPS |
| 92 | if (!domain->ops->dt_translate) | 401 | /* |
| 93 | continue; | 402 | * Workaround to avoid breaking interrupt controller drivers |
| 94 | rc = domain->ops->dt_translate(domain, controller, | 403 | * that don't yet register an irq_domain. This is temporary |
| 95 | intspec, intsize, &hwirq, &type); | 404 | * code. ~~~gcl, Feb 24, 2012 |
| 96 | if (rc == 0) | 405 | * |
| 97 | break; | 406 | * Scheduled for removal in Linux v3.6. That should be enough |
| 407 | * time. | ||
| 408 | */ | ||
| 409 | if (intsize > 0) | ||
| 410 | return intspec[0]; | ||
| 411 | #endif | ||
| 412 | printk(KERN_WARNING "irq: no irq domain found for %s !\n", | ||
| 413 | controller->full_name); | ||
| 414 | return 0; | ||
| 98 | } | 415 | } |
| 99 | mutex_unlock(&irq_domain_mutex); | ||
| 100 | 416 | ||
| 101 | if (rc != 0) | 417 | /* If domain has no translation, then we assume interrupt line */ |
| 102 | return 0; | 418 | if (domain->ops->xlate == NULL) |
| 419 | hwirq = intspec[0]; | ||
| 420 | else { | ||
| 421 | if (domain->ops->xlate(domain, controller, intspec, intsize, | ||
| 422 | &hwirq, &type)) | ||
| 423 | return 0; | ||
| 424 | } | ||
| 425 | |||
| 426 | /* Create mapping */ | ||
| 427 | virq = irq_create_mapping(domain, hwirq); | ||
| 428 | if (!virq) | ||
| 429 | return virq; | ||
| 103 | 430 | ||
| 104 | irq = irq_domain_to_irq(domain, hwirq); | 431 | /* Set type if specified and different than the current one */ |
| 105 | if (type != IRQ_TYPE_NONE) | 432 | if (type != IRQ_TYPE_NONE && |
| 106 | irq_set_irq_type(irq, type); | 433 | type != (irqd_get_trigger_type(irq_get_irq_data(virq)))) |
| 107 | pr_debug("%s: mapped hwirq=%i to irq=%i, flags=%x\n", | 434 | irq_set_irq_type(virq, type); |
| 108 | controller->full_name, (int)hwirq, irq, type); | 435 | return virq; |
| 109 | return irq; | ||
| 110 | } | 436 | } |
| 111 | EXPORT_SYMBOL_GPL(irq_create_of_mapping); | 437 | EXPORT_SYMBOL_GPL(irq_create_of_mapping); |
| 112 | 438 | ||
| 113 | /** | 439 | /** |
| 114 | * irq_dispose_mapping() - Discard a mapping created by irq_create_of_mapping() | 440 | * irq_dispose_mapping() - Unmap an interrupt |
| 115 | * @irq: linux irq number to be discarded | 441 | * @virq: linux irq number of the interrupt to unmap |
| 442 | */ | ||
| 443 | void irq_dispose_mapping(unsigned int virq) | ||
| 444 | { | ||
| 445 | struct irq_data *irq_data = irq_get_irq_data(virq); | ||
| 446 | struct irq_domain *domain; | ||
| 447 | irq_hw_number_t hwirq; | ||
| 448 | |||
| 449 | if (!virq || !irq_data) | ||
| 450 | return; | ||
| 451 | |||
| 452 | domain = irq_data->domain; | ||
| 453 | if (WARN_ON(domain == NULL)) | ||
| 454 | return; | ||
| 455 | |||
| 456 | /* Never unmap legacy interrupts */ | ||
| 457 | if (domain->revmap_type == IRQ_DOMAIN_MAP_LEGACY) | ||
| 458 | return; | ||
| 459 | |||
| 460 | irq_set_status_flags(virq, IRQ_NOREQUEST); | ||
| 461 | |||
| 462 | /* remove chip and handler */ | ||
| 463 | irq_set_chip_and_handler(virq, NULL, NULL); | ||
| 464 | |||
| 465 | /* Make sure it's completed */ | ||
| 466 | synchronize_irq(virq); | ||
| 467 | |||
| 468 | /* Tell the PIC about it */ | ||
| 469 | if (domain->ops->unmap) | ||
| 470 | domain->ops->unmap(domain, virq); | ||
| 471 | smp_mb(); | ||
| 472 | |||
| 473 | /* Clear reverse map */ | ||
| 474 | hwirq = irq_data->hwirq; | ||
| 475 | switch(domain->revmap_type) { | ||
| 476 | case IRQ_DOMAIN_MAP_LINEAR: | ||
| 477 | if (hwirq < domain->revmap_data.linear.size) | ||
| 478 | domain->revmap_data.linear.revmap[hwirq] = 0; | ||
| 479 | break; | ||
| 480 | case IRQ_DOMAIN_MAP_TREE: | ||
| 481 | mutex_lock(&revmap_trees_mutex); | ||
| 482 | radix_tree_delete(&domain->revmap_data.tree, hwirq); | ||
| 483 | mutex_unlock(&revmap_trees_mutex); | ||
| 484 | break; | ||
| 485 | } | ||
| 486 | |||
| 487 | irq_free_desc(virq); | ||
| 488 | } | ||
| 489 | EXPORT_SYMBOL_GPL(irq_dispose_mapping); | ||
| 490 | |||
| 491 | /** | ||
| 492 | * irq_find_mapping() - Find a linux irq from an hw irq number. | ||
| 493 | * @domain: domain owning this hardware interrupt | ||
| 494 | * @hwirq: hardware irq number in that domain space | ||
| 495 | * | ||
| 496 | * This is a slow path, for use by generic code. It's expected that an | ||
| 497 | * irq controller implementation directly calls the appropriate low level | ||
| 498 | * mapping function. | ||
| 499 | */ | ||
| 500 | unsigned int irq_find_mapping(struct irq_domain *domain, | ||
| 501 | irq_hw_number_t hwirq) | ||
| 502 | { | ||
| 503 | unsigned int i; | ||
| 504 | unsigned int hint = hwirq % nr_irqs; | ||
| 505 | |||
| 506 | /* Look for default domain if nececssary */ | ||
| 507 | if (domain == NULL) | ||
| 508 | domain = irq_default_domain; | ||
| 509 | if (domain == NULL) | ||
| 510 | return 0; | ||
| 511 | |||
| 512 | /* legacy -> bail early */ | ||
| 513 | if (domain->revmap_type == IRQ_DOMAIN_MAP_LEGACY) | ||
| 514 | return irq_domain_legacy_revmap(domain, hwirq); | ||
| 515 | |||
| 516 | /* Slow path does a linear search of the map */ | ||
| 517 | if (hint == 0) | ||
| 518 | hint = 1; | ||
| 519 | i = hint; | ||
| 520 | do { | ||
| 521 | struct irq_data *data = irq_get_irq_data(i); | ||
| 522 | if (data && (data->domain == domain) && (data->hwirq == hwirq)) | ||
| 523 | return i; | ||
| 524 | i++; | ||
| 525 | if (i >= nr_irqs) | ||
| 526 | i = 1; | ||
| 527 | } while(i != hint); | ||
| 528 | return 0; | ||
| 529 | } | ||
| 530 | EXPORT_SYMBOL_GPL(irq_find_mapping); | ||
| 531 | |||
| 532 | /** | ||
| 533 | * irq_radix_revmap_lookup() - Find a linux irq from a hw irq number. | ||
| 534 | * @domain: domain owning this hardware interrupt | ||
| 535 | * @hwirq: hardware irq number in that domain space | ||
| 116 | * | 536 | * |
| 117 | * Calling this function indicates the caller no longer needs a reference to | 537 | * This is a fast path, for use by irq controller code that uses radix tree |
| 118 | * the linux irq number returned by a prior call to irq_create_of_mapping(). | 538 | * revmaps |
| 119 | */ | 539 | */ |
| 120 | void irq_dispose_mapping(unsigned int irq) | 540 | unsigned int irq_radix_revmap_lookup(struct irq_domain *domain, |
| 541 | irq_hw_number_t hwirq) | ||
| 121 | { | 542 | { |
| 543 | struct irq_data *irq_data; | ||
| 544 | |||
| 545 | if (WARN_ON_ONCE(domain->revmap_type != IRQ_DOMAIN_MAP_TREE)) | ||
| 546 | return irq_find_mapping(domain, hwirq); | ||
| 547 | |||
| 548 | /* | ||
| 549 | * Freeing an irq can delete nodes along the path to | ||
| 550 | * do the lookup via call_rcu. | ||
| 551 | */ | ||
| 552 | rcu_read_lock(); | ||
| 553 | irq_data = radix_tree_lookup(&domain->revmap_data.tree, hwirq); | ||
| 554 | rcu_read_unlock(); | ||
| 555 | |||
| 122 | /* | 556 | /* |
| 123 | * nothing yet; will be filled when support for dynamic allocation of | 557 | * If found in radix tree, then fine. |
| 124 | * irq_descs is added to irq_domain | 558 | * Else fallback to linear lookup - this should not happen in practice |
| 559 | * as it means that we failed to insert the node in the radix tree. | ||
| 125 | */ | 560 | */ |
| 561 | return irq_data ? irq_data->irq : irq_find_mapping(domain, hwirq); | ||
| 126 | } | 562 | } |
| 127 | EXPORT_SYMBOL_GPL(irq_dispose_mapping); | ||
| 128 | 563 | ||
| 129 | int irq_domain_simple_dt_translate(struct irq_domain *d, | 564 | /** |
| 130 | struct device_node *controller, | 565 | * irq_radix_revmap_insert() - Insert a hw irq to linux irq number mapping. |
| 131 | const u32 *intspec, unsigned int intsize, | 566 | * @domain: domain owning this hardware interrupt |
| 132 | unsigned long *out_hwirq, unsigned int *out_type) | 567 | * @virq: linux irq number |
| 568 | * @hwirq: hardware irq number in that domain space | ||
| 569 | * | ||
| 570 | * This is for use by irq controllers that use a radix tree reverse | ||
| 571 | * mapping for fast lookup. | ||
| 572 | */ | ||
| 573 | void irq_radix_revmap_insert(struct irq_domain *domain, unsigned int virq, | ||
| 574 | irq_hw_number_t hwirq) | ||
| 133 | { | 575 | { |
| 134 | if (d->of_node != controller) | 576 | struct irq_data *irq_data = irq_get_irq_data(virq); |
| 135 | return -EINVAL; | 577 | |
| 136 | if (intsize < 1) | 578 | if (WARN_ON(domain->revmap_type != IRQ_DOMAIN_MAP_TREE)) |
| 137 | return -EINVAL; | 579 | return; |
| 138 | if (d->nr_irq && ((intspec[0] < d->hwirq_base) || | 580 | |
| 139 | (intspec[0] >= d->hwirq_base + d->nr_irq))) | 581 | if (virq) { |
| 140 | return -EINVAL; | 582 | mutex_lock(&revmap_trees_mutex); |
| 583 | radix_tree_insert(&domain->revmap_data.tree, hwirq, irq_data); | ||
| 584 | mutex_unlock(&revmap_trees_mutex); | ||
| 585 | } | ||
| 586 | } | ||
| 587 | |||
| 588 | /** | ||
| 589 | * irq_linear_revmap() - Find a linux irq from a hw irq number. | ||
| 590 | * @domain: domain owning this hardware interrupt | ||
| 591 | * @hwirq: hardware irq number in that domain space | ||
| 592 | * | ||
| 593 | * This is a fast path, for use by irq controller code that uses linear | ||
| 594 | * revmaps. It does fallback to the slow path if the revmap doesn't exist | ||
| 595 | * yet and will create the revmap entry with appropriate locking | ||
| 596 | */ | ||
| 597 | unsigned int irq_linear_revmap(struct irq_domain *domain, | ||
| 598 | irq_hw_number_t hwirq) | ||
| 599 | { | ||
| 600 | unsigned int *revmap; | ||
| 601 | |||
| 602 | if (WARN_ON_ONCE(domain->revmap_type != IRQ_DOMAIN_MAP_LINEAR)) | ||
| 603 | return irq_find_mapping(domain, hwirq); | ||
| 604 | |||
| 605 | /* Check revmap bounds */ | ||
| 606 | if (unlikely(hwirq >= domain->revmap_data.linear.size)) | ||
| 607 | return irq_find_mapping(domain, hwirq); | ||
| 608 | |||
| 609 | /* Check if revmap was allocated */ | ||
| 610 | revmap = domain->revmap_data.linear.revmap; | ||
| 611 | if (unlikely(revmap == NULL)) | ||
| 612 | return irq_find_mapping(domain, hwirq); | ||
| 613 | |||
| 614 | /* Fill up revmap with slow path if no mapping found */ | ||
| 615 | if (unlikely(!revmap[hwirq])) | ||
| 616 | revmap[hwirq] = irq_find_mapping(domain, hwirq); | ||
| 617 | |||
| 618 | return revmap[hwirq]; | ||
| 619 | } | ||
| 620 | |||
| 621 | #ifdef CONFIG_IRQ_DOMAIN_DEBUG | ||
| 622 | static int virq_debug_show(struct seq_file *m, void *private) | ||
| 623 | { | ||
| 624 | unsigned long flags; | ||
| 625 | struct irq_desc *desc; | ||
| 626 | const char *p; | ||
| 627 | static const char none[] = "none"; | ||
| 628 | void *data; | ||
| 629 | int i; | ||
| 630 | |||
| 631 | seq_printf(m, "%-5s %-7s %-15s %-*s %s\n", "irq", "hwirq", | ||
| 632 | "chip name", (int)(2 * sizeof(void *) + 2), "chip data", | ||
| 633 | "domain name"); | ||
| 141 | 634 | ||
| 635 | for (i = 1; i < nr_irqs; i++) { | ||
| 636 | desc = irq_to_desc(i); | ||
| 637 | if (!desc) | ||
| 638 | continue; | ||
| 639 | |||
| 640 | raw_spin_lock_irqsave(&desc->lock, flags); | ||
| 641 | |||
| 642 | if (desc->action && desc->action->handler) { | ||
| 643 | struct irq_chip *chip; | ||
| 644 | |||
| 645 | seq_printf(m, "%5d ", i); | ||
| 646 | seq_printf(m, "0x%05lx ", desc->irq_data.hwirq); | ||
| 647 | |||
| 648 | chip = irq_desc_get_chip(desc); | ||
| 649 | if (chip && chip->name) | ||
| 650 | p = chip->name; | ||
| 651 | else | ||
| 652 | p = none; | ||
| 653 | seq_printf(m, "%-15s ", p); | ||
| 654 | |||
| 655 | data = irq_desc_get_chip_data(desc); | ||
| 656 | seq_printf(m, data ? "0x%p " : " %p ", data); | ||
| 657 | |||
| 658 | if (desc->irq_data.domain && desc->irq_data.domain->of_node) | ||
| 659 | p = desc->irq_data.domain->of_node->full_name; | ||
| 660 | else | ||
| 661 | p = none; | ||
| 662 | seq_printf(m, "%s\n", p); | ||
| 663 | } | ||
| 664 | |||
| 665 | raw_spin_unlock_irqrestore(&desc->lock, flags); | ||
| 666 | } | ||
| 667 | |||
| 668 | return 0; | ||
| 669 | } | ||
| 670 | |||
| 671 | static int virq_debug_open(struct inode *inode, struct file *file) | ||
| 672 | { | ||
| 673 | return single_open(file, virq_debug_show, inode->i_private); | ||
| 674 | } | ||
| 675 | |||
| 676 | static const struct file_operations virq_debug_fops = { | ||
| 677 | .open = virq_debug_open, | ||
| 678 | .read = seq_read, | ||
| 679 | .llseek = seq_lseek, | ||
| 680 | .release = single_release, | ||
| 681 | }; | ||
| 682 | |||
| 683 | static int __init irq_debugfs_init(void) | ||
| 684 | { | ||
| 685 | if (debugfs_create_file("irq_domain_mapping", S_IRUGO, NULL, | ||
| 686 | NULL, &virq_debug_fops) == NULL) | ||
| 687 | return -ENOMEM; | ||
| 688 | |||
| 689 | return 0; | ||
| 690 | } | ||
| 691 | __initcall(irq_debugfs_init); | ||
| 692 | #endif /* CONFIG_IRQ_DOMAIN_DEBUG */ | ||
| 693 | |||
| 694 | int irq_domain_simple_map(struct irq_domain *d, unsigned int irq, | ||
| 695 | irq_hw_number_t hwirq) | ||
| 696 | { | ||
| 697 | return 0; | ||
| 698 | } | ||
| 699 | |||
| 700 | /** | ||
| 701 | * irq_domain_xlate_onecell() - Generic xlate for direct one cell bindings | ||
| 702 | * | ||
| 703 | * Device Tree IRQ specifier translation function which works with one cell | ||
| 704 | * bindings where the cell value maps directly to the hwirq number. | ||
| 705 | */ | ||
| 706 | int irq_domain_xlate_onecell(struct irq_domain *d, struct device_node *ctrlr, | ||
| 707 | const u32 *intspec, unsigned int intsize, | ||
| 708 | unsigned long *out_hwirq, unsigned int *out_type) | ||
| 709 | { | ||
| 710 | if (WARN_ON(intsize < 1)) | ||
| 711 | return -EINVAL; | ||
| 142 | *out_hwirq = intspec[0]; | 712 | *out_hwirq = intspec[0]; |
| 143 | *out_type = IRQ_TYPE_NONE; | 713 | *out_type = IRQ_TYPE_NONE; |
| 144 | if (intsize > 1) | ||
| 145 | *out_type = intspec[1] & IRQ_TYPE_SENSE_MASK; | ||
| 146 | return 0; | 714 | return 0; |
| 147 | } | 715 | } |
| 716 | EXPORT_SYMBOL_GPL(irq_domain_xlate_onecell); | ||
| 148 | 717 | ||
| 149 | /** | 718 | /** |
| 150 | * irq_domain_create_simple() - Set up a 'simple' translation range | 719 | * irq_domain_xlate_twocell() - Generic xlate for direct two cell bindings |
| 720 | * | ||
| 721 | * Device Tree IRQ specifier translation function which works with two cell | ||
| 722 | * bindings where the cell values map directly to the hwirq number | ||
| 723 | * and linux irq flags. | ||
| 151 | */ | 724 | */ |
| 152 | void irq_domain_add_simple(struct device_node *controller, int irq_base) | 725 | int irq_domain_xlate_twocell(struct irq_domain *d, struct device_node *ctrlr, |
| 726 | const u32 *intspec, unsigned int intsize, | ||
| 727 | irq_hw_number_t *out_hwirq, unsigned int *out_type) | ||
| 153 | { | 728 | { |
| 154 | struct irq_domain *domain; | 729 | if (WARN_ON(intsize < 2)) |
| 155 | 730 | return -EINVAL; | |
| 156 | domain = kzalloc(sizeof(*domain), GFP_KERNEL); | 731 | *out_hwirq = intspec[0]; |
| 157 | if (!domain) { | 732 | *out_type = intspec[1] & IRQ_TYPE_SENSE_MASK; |
| 158 | WARN_ON(1); | 733 | return 0; |
| 159 | return; | 734 | } |
| 160 | } | 735 | EXPORT_SYMBOL_GPL(irq_domain_xlate_twocell); |
| 161 | 736 | ||
| 162 | domain->irq_base = irq_base; | 737 | /** |
| 163 | domain->of_node = of_node_get(controller); | 738 | * irq_domain_xlate_onetwocell() - Generic xlate for one or two cell bindings |
| 164 | domain->ops = &irq_domain_simple_ops; | 739 | * |
| 165 | irq_domain_add(domain); | 740 | * Device Tree IRQ specifier translation function which works with either one |
| 741 | * or two cell bindings where the cell values map directly to the hwirq number | ||
| 742 | * and linux irq flags. | ||
| 743 | * | ||
| 744 | * Note: don't use this function unless your interrupt controller explicitly | ||
| 745 | * supports both one and two cell bindings. For the majority of controllers | ||
| 746 | * the _onecell() or _twocell() variants above should be used. | ||
| 747 | */ | ||
| 748 | int irq_domain_xlate_onetwocell(struct irq_domain *d, | ||
| 749 | struct device_node *ctrlr, | ||
| 750 | const u32 *intspec, unsigned int intsize, | ||
| 751 | unsigned long *out_hwirq, unsigned int *out_type) | ||
| 752 | { | ||
| 753 | if (WARN_ON(intsize < 1)) | ||
| 754 | return -EINVAL; | ||
| 755 | *out_hwirq = intspec[0]; | ||
| 756 | *out_type = (intsize > 1) ? intspec[1] : IRQ_TYPE_NONE; | ||
| 757 | return 0; | ||
| 166 | } | 758 | } |
| 167 | EXPORT_SYMBOL_GPL(irq_domain_add_simple); | 759 | EXPORT_SYMBOL_GPL(irq_domain_xlate_onetwocell); |
| 168 | 760 | ||
| 761 | const struct irq_domain_ops irq_domain_simple_ops = { | ||
| 762 | .map = irq_domain_simple_map, | ||
| 763 | .xlate = irq_domain_xlate_onetwocell, | ||
| 764 | }; | ||
| 765 | EXPORT_SYMBOL_GPL(irq_domain_simple_ops); | ||
| 766 | |||
| 767 | #ifdef CONFIG_OF_IRQ | ||
| 169 | void irq_domain_generate_simple(const struct of_device_id *match, | 768 | void irq_domain_generate_simple(const struct of_device_id *match, |
| 170 | u64 phys_base, unsigned int irq_start) | 769 | u64 phys_base, unsigned int irq_start) |
| 171 | { | 770 | { |
| 172 | struct device_node *node; | 771 | struct device_node *node; |
| 173 | pr_info("looking for phys_base=%llx, irq_start=%i\n", | 772 | pr_debug("looking for phys_base=%llx, irq_start=%i\n", |
| 174 | (unsigned long long) phys_base, (int) irq_start); | 773 | (unsigned long long) phys_base, (int) irq_start); |
| 175 | node = of_find_matching_node_by_address(NULL, match, phys_base); | 774 | node = of_find_matching_node_by_address(NULL, match, phys_base); |
| 176 | if (node) | 775 | if (node) |
| 177 | irq_domain_add_simple(node, irq_start); | 776 | irq_domain_add_legacy(node, 32, irq_start, 0, |
| 178 | else | 777 | &irq_domain_simple_ops, NULL); |
| 179 | pr_info("no node found\n"); | ||
| 180 | } | 778 | } |
| 181 | EXPORT_SYMBOL_GPL(irq_domain_generate_simple); | 779 | EXPORT_SYMBOL_GPL(irq_domain_generate_simple); |
| 182 | #endif /* CONFIG_OF_IRQ */ | 780 | #endif |
| 183 | |||
| 184 | struct irq_domain_ops irq_domain_simple_ops = { | ||
| 185 | #ifdef CONFIG_OF_IRQ | ||
| 186 | .dt_translate = irq_domain_simple_dt_translate, | ||
| 187 | #endif /* CONFIG_OF_IRQ */ | ||
| 188 | }; | ||
| 189 | EXPORT_SYMBOL_GPL(irq_domain_simple_ops); | ||
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index b0ccd1ac2d6a..89a3ea82569b 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c | |||
| @@ -282,7 +282,7 @@ setup_affinity(unsigned int irq, struct irq_desc *desc, struct cpumask *mask) | |||
| 282 | { | 282 | { |
| 283 | struct irq_chip *chip = irq_desc_get_chip(desc); | 283 | struct irq_chip *chip = irq_desc_get_chip(desc); |
| 284 | struct cpumask *set = irq_default_affinity; | 284 | struct cpumask *set = irq_default_affinity; |
| 285 | int ret; | 285 | int ret, node = desc->irq_data.node; |
| 286 | 286 | ||
| 287 | /* Excludes PER_CPU and NO_BALANCE interrupts */ | 287 | /* Excludes PER_CPU and NO_BALANCE interrupts */ |
| 288 | if (!irq_can_set_affinity(irq)) | 288 | if (!irq_can_set_affinity(irq)) |
| @@ -301,6 +301,13 @@ setup_affinity(unsigned int irq, struct irq_desc *desc, struct cpumask *mask) | |||
| 301 | } | 301 | } |
| 302 | 302 | ||
| 303 | cpumask_and(mask, cpu_online_mask, set); | 303 | cpumask_and(mask, cpu_online_mask, set); |
| 304 | if (node != NUMA_NO_NODE) { | ||
| 305 | const struct cpumask *nodemask = cpumask_of_node(node); | ||
| 306 | |||
| 307 | /* make sure at least one of the cpus in nodemask is online */ | ||
| 308 | if (cpumask_intersects(mask, nodemask)) | ||
| 309 | cpumask_and(mask, mask, nodemask); | ||
| 310 | } | ||
| 304 | ret = chip->irq_set_affinity(&desc->irq_data, mask, false); | 311 | ret = chip->irq_set_affinity(&desc->irq_data, mask, false); |
| 305 | switch (ret) { | 312 | switch (ret) { |
| 306 | case IRQ_SET_MASK_OK: | 313 | case IRQ_SET_MASK_OK: |
| @@ -645,7 +652,7 @@ static int irq_wait_for_interrupt(struct irqaction *action) | |||
| 645 | * is marked MASKED. | 652 | * is marked MASKED. |
| 646 | */ | 653 | */ |
| 647 | static void irq_finalize_oneshot(struct irq_desc *desc, | 654 | static void irq_finalize_oneshot(struct irq_desc *desc, |
| 648 | struct irqaction *action, bool force) | 655 | struct irqaction *action) |
| 649 | { | 656 | { |
| 650 | if (!(desc->istate & IRQS_ONESHOT)) | 657 | if (!(desc->istate & IRQS_ONESHOT)) |
| 651 | return; | 658 | return; |
| @@ -679,7 +686,7 @@ again: | |||
| 679 | * we would clear the threads_oneshot bit of this thread which | 686 | * we would clear the threads_oneshot bit of this thread which |
| 680 | * was just set. | 687 | * was just set. |
| 681 | */ | 688 | */ |
| 682 | if (!force && test_bit(IRQTF_RUNTHREAD, &action->thread_flags)) | 689 | if (test_bit(IRQTF_RUNTHREAD, &action->thread_flags)) |
| 683 | goto out_unlock; | 690 | goto out_unlock; |
| 684 | 691 | ||
| 685 | desc->threads_oneshot &= ~action->thread_mask; | 692 | desc->threads_oneshot &= ~action->thread_mask; |
| @@ -739,7 +746,7 @@ irq_forced_thread_fn(struct irq_desc *desc, struct irqaction *action) | |||
| 739 | 746 | ||
| 740 | local_bh_disable(); | 747 | local_bh_disable(); |
| 741 | ret = action->thread_fn(action->irq, action->dev_id); | 748 | ret = action->thread_fn(action->irq, action->dev_id); |
| 742 | irq_finalize_oneshot(desc, action, false); | 749 | irq_finalize_oneshot(desc, action); |
| 743 | local_bh_enable(); | 750 | local_bh_enable(); |
| 744 | return ret; | 751 | return ret; |
| 745 | } | 752 | } |
| @@ -755,7 +762,7 @@ static irqreturn_t irq_thread_fn(struct irq_desc *desc, | |||
| 755 | irqreturn_t ret; | 762 | irqreturn_t ret; |
| 756 | 763 | ||
| 757 | ret = action->thread_fn(action->irq, action->dev_id); | 764 | ret = action->thread_fn(action->irq, action->dev_id); |
| 758 | irq_finalize_oneshot(desc, action, false); | 765 | irq_finalize_oneshot(desc, action); |
| 759 | return ret; | 766 | return ret; |
| 760 | } | 767 | } |
| 761 | 768 | ||
| @@ -844,7 +851,7 @@ void exit_irq_thread(void) | |||
| 844 | wake_threads_waitq(desc); | 851 | wake_threads_waitq(desc); |
| 845 | 852 | ||
| 846 | /* Prevent a stale desc->threads_oneshot */ | 853 | /* Prevent a stale desc->threads_oneshot */ |
| 847 | irq_finalize_oneshot(desc, action, true); | 854 | irq_finalize_oneshot(desc, action); |
| 848 | } | 855 | } |
| 849 | 856 | ||
| 850 | static void irq_setup_forced_threading(struct irqaction *new) | 857 | static void irq_setup_forced_threading(struct irqaction *new) |
diff --git a/kernel/irq/migration.c b/kernel/irq/migration.c index 47420908fba0..c3c89751b327 100644 --- a/kernel/irq/migration.c +++ b/kernel/irq/migration.c | |||
| @@ -43,12 +43,16 @@ void irq_move_masked_irq(struct irq_data *idata) | |||
| 43 | * masking the irqs. | 43 | * masking the irqs. |
| 44 | */ | 44 | */ |
| 45 | if (likely(cpumask_any_and(desc->pending_mask, cpu_online_mask) | 45 | if (likely(cpumask_any_and(desc->pending_mask, cpu_online_mask) |
| 46 | < nr_cpu_ids)) | 46 | < nr_cpu_ids)) { |
| 47 | if (!chip->irq_set_affinity(&desc->irq_data, | 47 | int ret = chip->irq_set_affinity(&desc->irq_data, |
| 48 | desc->pending_mask, false)) { | 48 | desc->pending_mask, false); |
| 49 | switch (ret) { | ||
| 50 | case IRQ_SET_MASK_OK: | ||
| 49 | cpumask_copy(desc->irq_data.affinity, desc->pending_mask); | 51 | cpumask_copy(desc->irq_data.affinity, desc->pending_mask); |
| 52 | case IRQ_SET_MASK_OK_NOCOPY: | ||
| 50 | irq_set_thread_affinity(desc); | 53 | irq_set_thread_affinity(desc); |
| 51 | } | 54 | } |
| 55 | } | ||
| 52 | 56 | ||
| 53 | cpumask_clear(desc->pending_mask); | 57 | cpumask_clear(desc->pending_mask); |
| 54 | } | 58 | } |
diff --git a/kernel/irq_work.c b/kernel/irq_work.c index c3c46c72046e..1588e3b2871b 100644 --- a/kernel/irq_work.c +++ b/kernel/irq_work.c | |||
| @@ -5,11 +5,13 @@ | |||
| 5 | * context. The enqueueing is NMI-safe. | 5 | * context. The enqueueing is NMI-safe. |
| 6 | */ | 6 | */ |
| 7 | 7 | ||
| 8 | #include <linux/bug.h> | ||
| 8 | #include <linux/kernel.h> | 9 | #include <linux/kernel.h> |
| 9 | #include <linux/export.h> | 10 | #include <linux/export.h> |
| 10 | #include <linux/irq_work.h> | 11 | #include <linux/irq_work.h> |
| 11 | #include <linux/percpu.h> | 12 | #include <linux/percpu.h> |
| 12 | #include <linux/hardirq.h> | 13 | #include <linux/hardirq.h> |
| 14 | #include <linux/irqflags.h> | ||
| 13 | #include <asm/processor.h> | 15 | #include <asm/processor.h> |
| 14 | 16 | ||
| 15 | /* | 17 | /* |
diff --git a/kernel/itimer.c b/kernel/itimer.c index 22000c3db0dd..8d262b467573 100644 --- a/kernel/itimer.c +++ b/kernel/itimer.c | |||
| @@ -284,8 +284,12 @@ SYSCALL_DEFINE3(setitimer, int, which, struct itimerval __user *, value, | |||
| 284 | if (value) { | 284 | if (value) { |
| 285 | if(copy_from_user(&set_buffer, value, sizeof(set_buffer))) | 285 | if(copy_from_user(&set_buffer, value, sizeof(set_buffer))) |
| 286 | return -EFAULT; | 286 | return -EFAULT; |
| 287 | } else | 287 | } else { |
| 288 | memset((char *) &set_buffer, 0, sizeof(set_buffer)); | 288 | memset(&set_buffer, 0, sizeof(set_buffer)); |
| 289 | printk_once(KERN_WARNING "%s calls setitimer() with new_value NULL pointer." | ||
| 290 | " Misfeature support will be removed\n", | ||
| 291 | current->comm); | ||
| 292 | } | ||
| 289 | 293 | ||
| 290 | error = do_setitimer(which, &set_buffer, ovalue ? &get_buffer : NULL); | 294 | error = do_setitimer(which, &set_buffer, ovalue ? &get_buffer : NULL); |
| 291 | if (error || !ovalue) | 295 | if (error || !ovalue) |
diff --git a/kernel/kexec.c b/kernel/kexec.c index 7b0886786701..4e2e472f6aeb 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c | |||
| @@ -37,7 +37,6 @@ | |||
| 37 | #include <asm/page.h> | 37 | #include <asm/page.h> |
| 38 | #include <asm/uaccess.h> | 38 | #include <asm/uaccess.h> |
| 39 | #include <asm/io.h> | 39 | #include <asm/io.h> |
| 40 | #include <asm/system.h> | ||
| 41 | #include <asm/sections.h> | 40 | #include <asm/sections.h> |
| 42 | 41 | ||
| 43 | /* Per cpu memory for storing cpu states in case of system crash. */ | 42 | /* Per cpu memory for storing cpu states in case of system crash. */ |
| @@ -1359,6 +1358,10 @@ static int __init parse_crashkernel_simple(char *cmdline, | |||
| 1359 | 1358 | ||
| 1360 | if (*cur == '@') | 1359 | if (*cur == '@') |
| 1361 | *crash_base = memparse(cur+1, &cur); | 1360 | *crash_base = memparse(cur+1, &cur); |
| 1361 | else if (*cur != ' ' && *cur != '\0') { | ||
| 1362 | pr_warning("crashkernel: unrecognized char\n"); | ||
| 1363 | return -EINVAL; | ||
| 1364 | } | ||
| 1362 | 1365 | ||
| 1363 | return 0; | 1366 | return 0; |
| 1364 | } | 1367 | } |
| @@ -1462,7 +1465,9 @@ static int __init crash_save_vmcoreinfo_init(void) | |||
| 1462 | 1465 | ||
| 1463 | VMCOREINFO_SYMBOL(init_uts_ns); | 1466 | VMCOREINFO_SYMBOL(init_uts_ns); |
| 1464 | VMCOREINFO_SYMBOL(node_online_map); | 1467 | VMCOREINFO_SYMBOL(node_online_map); |
| 1468 | #ifdef CONFIG_MMU | ||
| 1465 | VMCOREINFO_SYMBOL(swapper_pg_dir); | 1469 | VMCOREINFO_SYMBOL(swapper_pg_dir); |
| 1470 | #endif | ||
| 1466 | VMCOREINFO_SYMBOL(_stext); | 1471 | VMCOREINFO_SYMBOL(_stext); |
| 1467 | VMCOREINFO_SYMBOL(vmlist); | 1472 | VMCOREINFO_SYMBOL(vmlist); |
| 1468 | 1473 | ||
| @@ -1546,13 +1551,13 @@ int kernel_kexec(void) | |||
| 1546 | if (error) | 1551 | if (error) |
| 1547 | goto Resume_console; | 1552 | goto Resume_console; |
| 1548 | /* At this point, dpm_suspend_start() has been called, | 1553 | /* At this point, dpm_suspend_start() has been called, |
| 1549 | * but *not* dpm_suspend_noirq(). We *must* call | 1554 | * but *not* dpm_suspend_end(). We *must* call |
| 1550 | * dpm_suspend_noirq() now. Otherwise, drivers for | 1555 | * dpm_suspend_end() now. Otherwise, drivers for |
| 1551 | * some devices (e.g. interrupt controllers) become | 1556 | * some devices (e.g. interrupt controllers) become |
| 1552 | * desynchronized with the actual state of the | 1557 | * desynchronized with the actual state of the |
| 1553 | * hardware at resume time, and evil weirdness ensues. | 1558 | * hardware at resume time, and evil weirdness ensues. |
| 1554 | */ | 1559 | */ |
| 1555 | error = dpm_suspend_noirq(PMSG_FREEZE); | 1560 | error = dpm_suspend_end(PMSG_FREEZE); |
| 1556 | if (error) | 1561 | if (error) |
| 1557 | goto Resume_devices; | 1562 | goto Resume_devices; |
| 1558 | error = disable_nonboot_cpus(); | 1563 | error = disable_nonboot_cpus(); |
| @@ -1579,7 +1584,7 @@ int kernel_kexec(void) | |||
| 1579 | local_irq_enable(); | 1584 | local_irq_enable(); |
| 1580 | Enable_cpus: | 1585 | Enable_cpus: |
| 1581 | enable_nonboot_cpus(); | 1586 | enable_nonboot_cpus(); |
| 1582 | dpm_resume_noirq(PMSG_RESTORE); | 1587 | dpm_resume_start(PMSG_RESTORE); |
| 1583 | Resume_devices: | 1588 | Resume_devices: |
| 1584 | dpm_resume_end(PMSG_RESTORE); | 1589 | dpm_resume_end(PMSG_RESTORE); |
| 1585 | Resume_console: | 1590 | Resume_console: |
diff --git a/kernel/kmod.c b/kernel/kmod.c index a0a88543934e..05698a7415fe 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c | |||
| @@ -60,6 +60,43 @@ static DECLARE_RWSEM(umhelper_sem); | |||
| 60 | */ | 60 | */ |
| 61 | char modprobe_path[KMOD_PATH_LEN] = "/sbin/modprobe"; | 61 | char modprobe_path[KMOD_PATH_LEN] = "/sbin/modprobe"; |
| 62 | 62 | ||
| 63 | static void free_modprobe_argv(struct subprocess_info *info) | ||
| 64 | { | ||
| 65 | kfree(info->argv[3]); /* check call_modprobe() */ | ||
| 66 | kfree(info->argv); | ||
| 67 | } | ||
| 68 | |||
| 69 | static int call_modprobe(char *module_name, int wait) | ||
| 70 | { | ||
| 71 | static char *envp[] = { | ||
| 72 | "HOME=/", | ||
| 73 | "TERM=linux", | ||
| 74 | "PATH=/sbin:/usr/sbin:/bin:/usr/bin", | ||
| 75 | NULL | ||
| 76 | }; | ||
| 77 | |||
| 78 | char **argv = kmalloc(sizeof(char *[5]), GFP_KERNEL); | ||
| 79 | if (!argv) | ||
| 80 | goto out; | ||
| 81 | |||
| 82 | module_name = kstrdup(module_name, GFP_KERNEL); | ||
| 83 | if (!module_name) | ||
| 84 | goto free_argv; | ||
| 85 | |||
| 86 | argv[0] = modprobe_path; | ||
| 87 | argv[1] = "-q"; | ||
| 88 | argv[2] = "--"; | ||
| 89 | argv[3] = module_name; /* check free_modprobe_argv() */ | ||
| 90 | argv[4] = NULL; | ||
| 91 | |||
| 92 | return call_usermodehelper_fns(modprobe_path, argv, envp, | ||
| 93 | wait | UMH_KILLABLE, NULL, free_modprobe_argv, NULL); | ||
| 94 | free_argv: | ||
| 95 | kfree(argv); | ||
| 96 | out: | ||
| 97 | return -ENOMEM; | ||
| 98 | } | ||
| 99 | |||
| 63 | /** | 100 | /** |
| 64 | * __request_module - try to load a kernel module | 101 | * __request_module - try to load a kernel module |
| 65 | * @wait: wait (or not) for the operation to complete | 102 | * @wait: wait (or not) for the operation to complete |
| @@ -81,11 +118,6 @@ int __request_module(bool wait, const char *fmt, ...) | |||
| 81 | char module_name[MODULE_NAME_LEN]; | 118 | char module_name[MODULE_NAME_LEN]; |
| 82 | unsigned int max_modprobes; | 119 | unsigned int max_modprobes; |
| 83 | int ret; | 120 | int ret; |
| 84 | char *argv[] = { modprobe_path, "-q", "--", module_name, NULL }; | ||
| 85 | static char *envp[] = { "HOME=/", | ||
| 86 | "TERM=linux", | ||
| 87 | "PATH=/sbin:/usr/sbin:/bin:/usr/bin", | ||
| 88 | NULL }; | ||
| 89 | static atomic_t kmod_concurrent = ATOMIC_INIT(0); | 121 | static atomic_t kmod_concurrent = ATOMIC_INIT(0); |
| 90 | #define MAX_KMOD_CONCURRENT 50 /* Completely arbitrary value - KAO */ | 122 | #define MAX_KMOD_CONCURRENT 50 /* Completely arbitrary value - KAO */ |
| 91 | static int kmod_loop_msg; | 123 | static int kmod_loop_msg; |
| @@ -128,9 +160,7 @@ int __request_module(bool wait, const char *fmt, ...) | |||
| 128 | 160 | ||
| 129 | trace_module_request(module_name, wait, _RET_IP_); | 161 | trace_module_request(module_name, wait, _RET_IP_); |
| 130 | 162 | ||
| 131 | ret = call_usermodehelper_fns(modprobe_path, argv, envp, | 163 | ret = call_modprobe(module_name, wait ? UMH_WAIT_PROC : UMH_WAIT_EXEC); |
| 132 | wait ? UMH_WAIT_PROC : UMH_WAIT_EXEC, | ||
| 133 | NULL, NULL, NULL); | ||
| 134 | 164 | ||
| 135 | atomic_dec(&kmod_concurrent); | 165 | atomic_dec(&kmod_concurrent); |
| 136 | return ret; | 166 | return ret; |
| @@ -188,7 +218,7 @@ static int ____call_usermodehelper(void *data) | |||
| 188 | /* Exec failed? */ | 218 | /* Exec failed? */ |
| 189 | fail: | 219 | fail: |
| 190 | sub_info->retval = retval; | 220 | sub_info->retval = retval; |
| 191 | do_exit(0); | 221 | return 0; |
| 192 | } | 222 | } |
| 193 | 223 | ||
| 194 | void call_usermodehelper_freeinfo(struct subprocess_info *info) | 224 | void call_usermodehelper_freeinfo(struct subprocess_info *info) |
| @@ -199,6 +229,19 @@ void call_usermodehelper_freeinfo(struct subprocess_info *info) | |||
| 199 | } | 229 | } |
| 200 | EXPORT_SYMBOL(call_usermodehelper_freeinfo); | 230 | EXPORT_SYMBOL(call_usermodehelper_freeinfo); |
| 201 | 231 | ||
| 232 | static void umh_complete(struct subprocess_info *sub_info) | ||
| 233 | { | ||
| 234 | struct completion *comp = xchg(&sub_info->complete, NULL); | ||
| 235 | /* | ||
| 236 | * See call_usermodehelper_exec(). If xchg() returns NULL | ||
| 237 | * we own sub_info, the UMH_KILLABLE caller has gone away. | ||
| 238 | */ | ||
| 239 | if (comp) | ||
| 240 | complete(comp); | ||
| 241 | else | ||
| 242 | call_usermodehelper_freeinfo(sub_info); | ||
| 243 | } | ||
| 244 | |||
| 202 | /* Keventd can't block, but this (a child) can. */ | 245 | /* Keventd can't block, but this (a child) can. */ |
| 203 | static int wait_for_helper(void *data) | 246 | static int wait_for_helper(void *data) |
| 204 | { | 247 | { |
| @@ -235,7 +278,7 @@ static int wait_for_helper(void *data) | |||
| 235 | sub_info->retval = ret; | 278 | sub_info->retval = ret; |
| 236 | } | 279 | } |
| 237 | 280 | ||
| 238 | complete(sub_info->complete); | 281 | umh_complete(sub_info); |
| 239 | return 0; | 282 | return 0; |
| 240 | } | 283 | } |
| 241 | 284 | ||
| @@ -244,7 +287,7 @@ static void __call_usermodehelper(struct work_struct *work) | |||
| 244 | { | 287 | { |
| 245 | struct subprocess_info *sub_info = | 288 | struct subprocess_info *sub_info = |
| 246 | container_of(work, struct subprocess_info, work); | 289 | container_of(work, struct subprocess_info, work); |
| 247 | enum umh_wait wait = sub_info->wait; | 290 | int wait = sub_info->wait & ~UMH_KILLABLE; |
| 248 | pid_t pid; | 291 | pid_t pid; |
| 249 | 292 | ||
| 250 | /* CLONE_VFORK: wait until the usermode helper has execve'd | 293 | /* CLONE_VFORK: wait until the usermode helper has execve'd |
| @@ -269,7 +312,7 @@ static void __call_usermodehelper(struct work_struct *work) | |||
| 269 | case UMH_WAIT_EXEC: | 312 | case UMH_WAIT_EXEC: |
| 270 | if (pid < 0) | 313 | if (pid < 0) |
| 271 | sub_info->retval = pid; | 314 | sub_info->retval = pid; |
| 272 | complete(sub_info->complete); | 315 | umh_complete(sub_info); |
| 273 | } | 316 | } |
| 274 | } | 317 | } |
| 275 | 318 | ||
| @@ -279,7 +322,7 @@ static void __call_usermodehelper(struct work_struct *work) | |||
| 279 | * land has been frozen during a system-wide hibernation or suspend operation). | 322 | * land has been frozen during a system-wide hibernation or suspend operation). |
| 280 | * Should always be manipulated under umhelper_sem acquired for write. | 323 | * Should always be manipulated under umhelper_sem acquired for write. |
| 281 | */ | 324 | */ |
| 282 | static int usermodehelper_disabled = 1; | 325 | static enum umh_disable_depth usermodehelper_disabled = UMH_DISABLED; |
| 283 | 326 | ||
| 284 | /* Number of helpers running */ | 327 | /* Number of helpers running */ |
| 285 | static atomic_t running_helpers = ATOMIC_INIT(0); | 328 | static atomic_t running_helpers = ATOMIC_INIT(0); |
| @@ -291,32 +334,110 @@ static atomic_t running_helpers = ATOMIC_INIT(0); | |||
| 291 | static DECLARE_WAIT_QUEUE_HEAD(running_helpers_waitq); | 334 | static DECLARE_WAIT_QUEUE_HEAD(running_helpers_waitq); |
| 292 | 335 | ||
| 293 | /* | 336 | /* |
| 337 | * Used by usermodehelper_read_lock_wait() to wait for usermodehelper_disabled | ||
| 338 | * to become 'false'. | ||
| 339 | */ | ||
| 340 | static DECLARE_WAIT_QUEUE_HEAD(usermodehelper_disabled_waitq); | ||
| 341 | |||
| 342 | /* | ||
| 294 | * Time to wait for running_helpers to become zero before the setting of | 343 | * Time to wait for running_helpers to become zero before the setting of |
| 295 | * usermodehelper_disabled in usermodehelper_disable() fails | 344 | * usermodehelper_disabled in usermodehelper_disable() fails |
| 296 | */ | 345 | */ |
| 297 | #define RUNNING_HELPERS_TIMEOUT (5 * HZ) | 346 | #define RUNNING_HELPERS_TIMEOUT (5 * HZ) |
| 298 | 347 | ||
| 299 | void read_lock_usermodehelper(void) | 348 | int usermodehelper_read_trylock(void) |
| 349 | { | ||
| 350 | DEFINE_WAIT(wait); | ||
| 351 | int ret = 0; | ||
| 352 | |||
| 353 | down_read(&umhelper_sem); | ||
| 354 | for (;;) { | ||
| 355 | prepare_to_wait(&usermodehelper_disabled_waitq, &wait, | ||
| 356 | TASK_INTERRUPTIBLE); | ||
| 357 | if (!usermodehelper_disabled) | ||
| 358 | break; | ||
| 359 | |||
| 360 | if (usermodehelper_disabled == UMH_DISABLED) | ||
| 361 | ret = -EAGAIN; | ||
| 362 | |||
| 363 | up_read(&umhelper_sem); | ||
| 364 | |||
| 365 | if (ret) | ||
| 366 | break; | ||
| 367 | |||
| 368 | schedule(); | ||
| 369 | try_to_freeze(); | ||
| 370 | |||
| 371 | down_read(&umhelper_sem); | ||
| 372 | } | ||
| 373 | finish_wait(&usermodehelper_disabled_waitq, &wait); | ||
| 374 | return ret; | ||
| 375 | } | ||
| 376 | EXPORT_SYMBOL_GPL(usermodehelper_read_trylock); | ||
| 377 | |||
| 378 | long usermodehelper_read_lock_wait(long timeout) | ||
| 300 | { | 379 | { |
| 380 | DEFINE_WAIT(wait); | ||
| 381 | |||
| 382 | if (timeout < 0) | ||
| 383 | return -EINVAL; | ||
| 384 | |||
| 301 | down_read(&umhelper_sem); | 385 | down_read(&umhelper_sem); |
| 386 | for (;;) { | ||
| 387 | prepare_to_wait(&usermodehelper_disabled_waitq, &wait, | ||
| 388 | TASK_UNINTERRUPTIBLE); | ||
| 389 | if (!usermodehelper_disabled) | ||
| 390 | break; | ||
| 391 | |||
| 392 | up_read(&umhelper_sem); | ||
| 393 | |||
| 394 | timeout = schedule_timeout(timeout); | ||
| 395 | if (!timeout) | ||
| 396 | break; | ||
| 397 | |||
| 398 | down_read(&umhelper_sem); | ||
| 399 | } | ||
| 400 | finish_wait(&usermodehelper_disabled_waitq, &wait); | ||
| 401 | return timeout; | ||
| 302 | } | 402 | } |
| 303 | EXPORT_SYMBOL_GPL(read_lock_usermodehelper); | 403 | EXPORT_SYMBOL_GPL(usermodehelper_read_lock_wait); |
| 304 | 404 | ||
| 305 | void read_unlock_usermodehelper(void) | 405 | void usermodehelper_read_unlock(void) |
| 306 | { | 406 | { |
| 307 | up_read(&umhelper_sem); | 407 | up_read(&umhelper_sem); |
| 308 | } | 408 | } |
| 309 | EXPORT_SYMBOL_GPL(read_unlock_usermodehelper); | 409 | EXPORT_SYMBOL_GPL(usermodehelper_read_unlock); |
| 310 | 410 | ||
| 311 | /** | 411 | /** |
| 312 | * usermodehelper_disable - prevent new helpers from being started | 412 | * __usermodehelper_set_disable_depth - Modify usermodehelper_disabled. |
| 413 | * depth: New value to assign to usermodehelper_disabled. | ||
| 414 | * | ||
| 415 | * Change the value of usermodehelper_disabled (under umhelper_sem locked for | ||
| 416 | * writing) and wakeup tasks waiting for it to change. | ||
| 313 | */ | 417 | */ |
| 314 | int usermodehelper_disable(void) | 418 | void __usermodehelper_set_disable_depth(enum umh_disable_depth depth) |
| 419 | { | ||
| 420 | down_write(&umhelper_sem); | ||
| 421 | usermodehelper_disabled = depth; | ||
| 422 | wake_up(&usermodehelper_disabled_waitq); | ||
| 423 | up_write(&umhelper_sem); | ||
| 424 | } | ||
| 425 | |||
| 426 | /** | ||
| 427 | * __usermodehelper_disable - Prevent new helpers from being started. | ||
| 428 | * @depth: New value to assign to usermodehelper_disabled. | ||
| 429 | * | ||
| 430 | * Set usermodehelper_disabled to @depth and wait for running helpers to exit. | ||
| 431 | */ | ||
| 432 | int __usermodehelper_disable(enum umh_disable_depth depth) | ||
| 315 | { | 433 | { |
| 316 | long retval; | 434 | long retval; |
| 317 | 435 | ||
| 436 | if (!depth) | ||
| 437 | return -EINVAL; | ||
| 438 | |||
| 318 | down_write(&umhelper_sem); | 439 | down_write(&umhelper_sem); |
| 319 | usermodehelper_disabled = 1; | 440 | usermodehelper_disabled = depth; |
| 320 | up_write(&umhelper_sem); | 441 | up_write(&umhelper_sem); |
| 321 | 442 | ||
| 322 | /* | 443 | /* |
| @@ -331,31 +452,10 @@ int usermodehelper_disable(void) | |||
| 331 | if (retval) | 452 | if (retval) |
| 332 | return 0; | 453 | return 0; |
| 333 | 454 | ||
| 334 | down_write(&umhelper_sem); | 455 | __usermodehelper_set_disable_depth(UMH_ENABLED); |
| 335 | usermodehelper_disabled = 0; | ||
| 336 | up_write(&umhelper_sem); | ||
| 337 | return -EAGAIN; | 456 | return -EAGAIN; |
| 338 | } | 457 | } |
| 339 | 458 | ||
| 340 | /** | ||
| 341 | * usermodehelper_enable - allow new helpers to be started again | ||
| 342 | */ | ||
| 343 | void usermodehelper_enable(void) | ||
| 344 | { | ||
| 345 | down_write(&umhelper_sem); | ||
| 346 | usermodehelper_disabled = 0; | ||
| 347 | up_write(&umhelper_sem); | ||
| 348 | } | ||
| 349 | |||
| 350 | /** | ||
| 351 | * usermodehelper_is_disabled - check if new helpers are allowed to be started | ||
| 352 | */ | ||
| 353 | bool usermodehelper_is_disabled(void) | ||
| 354 | { | ||
| 355 | return usermodehelper_disabled; | ||
| 356 | } | ||
| 357 | EXPORT_SYMBOL_GPL(usermodehelper_is_disabled); | ||
| 358 | |||
| 359 | static void helper_lock(void) | 459 | static void helper_lock(void) |
| 360 | { | 460 | { |
| 361 | atomic_inc(&running_helpers); | 461 | atomic_inc(&running_helpers); |
| @@ -435,8 +535,7 @@ EXPORT_SYMBOL(call_usermodehelper_setfns); | |||
| 435 | * asynchronously if wait is not set, and runs as a child of keventd. | 535 | * asynchronously if wait is not set, and runs as a child of keventd. |
| 436 | * (ie. it runs with full root capabilities). | 536 | * (ie. it runs with full root capabilities). |
| 437 | */ | 537 | */ |
| 438 | int call_usermodehelper_exec(struct subprocess_info *sub_info, | 538 | int call_usermodehelper_exec(struct subprocess_info *sub_info, int wait) |
| 439 | enum umh_wait wait) | ||
| 440 | { | 539 | { |
| 441 | DECLARE_COMPLETION_ONSTACK(done); | 540 | DECLARE_COMPLETION_ONSTACK(done); |
| 442 | int retval = 0; | 541 | int retval = 0; |
| @@ -456,9 +555,21 @@ int call_usermodehelper_exec(struct subprocess_info *sub_info, | |||
| 456 | queue_work(khelper_wq, &sub_info->work); | 555 | queue_work(khelper_wq, &sub_info->work); |
| 457 | if (wait == UMH_NO_WAIT) /* task has freed sub_info */ | 556 | if (wait == UMH_NO_WAIT) /* task has freed sub_info */ |
| 458 | goto unlock; | 557 | goto unlock; |
| 558 | |||
| 559 | if (wait & UMH_KILLABLE) { | ||
| 560 | retval = wait_for_completion_killable(&done); | ||
| 561 | if (!retval) | ||
| 562 | goto wait_done; | ||
| 563 | |||
| 564 | /* umh_complete() will see NULL and free sub_info */ | ||
| 565 | if (xchg(&sub_info->complete, NULL)) | ||
| 566 | goto unlock; | ||
| 567 | /* fallthrough, umh_complete() was already called */ | ||
| 568 | } | ||
| 569 | |||
| 459 | wait_for_completion(&done); | 570 | wait_for_completion(&done); |
| 571 | wait_done: | ||
| 460 | retval = sub_info->retval; | 572 | retval = sub_info->retval; |
| 461 | |||
| 462 | out: | 573 | out: |
| 463 | call_usermodehelper_freeinfo(sub_info); | 574 | call_usermodehelper_freeinfo(sub_info); |
| 464 | unlock: | 575 | unlock: |
diff --git a/kernel/module.c b/kernel/module.c index 2c932760fd33..78ac6ec1e425 100644 --- a/kernel/module.c +++ b/kernel/module.c | |||
| @@ -105,6 +105,7 @@ struct list_head *kdb_modules = &modules; /* kdb needs the list of modules */ | |||
| 105 | 105 | ||
| 106 | /* Block module loading/unloading? */ | 106 | /* Block module loading/unloading? */ |
| 107 | int modules_disabled = 0; | 107 | int modules_disabled = 0; |
| 108 | core_param(nomodule, modules_disabled, bint, 0); | ||
| 108 | 109 | ||
| 109 | /* Waiting for a module to finish initializing? */ | 110 | /* Waiting for a module to finish initializing? */ |
| 110 | static DECLARE_WAIT_QUEUE_HEAD(module_wq); | 111 | static DECLARE_WAIT_QUEUE_HEAD(module_wq); |
| @@ -903,6 +904,36 @@ static ssize_t show_refcnt(struct module_attribute *mattr, | |||
| 903 | static struct module_attribute modinfo_refcnt = | 904 | static struct module_attribute modinfo_refcnt = |
| 904 | __ATTR(refcnt, 0444, show_refcnt, NULL); | 905 | __ATTR(refcnt, 0444, show_refcnt, NULL); |
| 905 | 906 | ||
| 907 | void __module_get(struct module *module) | ||
| 908 | { | ||
| 909 | if (module) { | ||
| 910 | preempt_disable(); | ||
| 911 | __this_cpu_inc(module->refptr->incs); | ||
| 912 | trace_module_get(module, _RET_IP_); | ||
| 913 | preempt_enable(); | ||
| 914 | } | ||
| 915 | } | ||
| 916 | EXPORT_SYMBOL(__module_get); | ||
| 917 | |||
| 918 | bool try_module_get(struct module *module) | ||
| 919 | { | ||
| 920 | bool ret = true; | ||
| 921 | |||
| 922 | if (module) { | ||
| 923 | preempt_disable(); | ||
| 924 | |||
| 925 | if (likely(module_is_live(module))) { | ||
| 926 | __this_cpu_inc(module->refptr->incs); | ||
| 927 | trace_module_get(module, _RET_IP_); | ||
| 928 | } else | ||
| 929 | ret = false; | ||
| 930 | |||
| 931 | preempt_enable(); | ||
| 932 | } | ||
| 933 | return ret; | ||
| 934 | } | ||
| 935 | EXPORT_SYMBOL(try_module_get); | ||
| 936 | |||
| 906 | void module_put(struct module *module) | 937 | void module_put(struct module *module) |
| 907 | { | 938 | { |
| 908 | if (module) { | 939 | if (module) { |
| @@ -2380,8 +2411,7 @@ static int copy_and_check(struct load_info *info, | |||
| 2380 | return -ENOEXEC; | 2411 | return -ENOEXEC; |
| 2381 | 2412 | ||
| 2382 | /* Suck in entire file: we'll want most of it. */ | 2413 | /* Suck in entire file: we'll want most of it. */ |
| 2383 | /* vmalloc barfs on "unusual" numbers. Check here */ | 2414 | if ((hdr = vmalloc(len)) == NULL) |
| 2384 | if (len > 64 * 1024 * 1024 || (hdr = vmalloc(len)) == NULL) | ||
| 2385 | return -ENOMEM; | 2415 | return -ENOMEM; |
| 2386 | 2416 | ||
| 2387 | if (copy_from_user(hdr, umod, len) != 0) { | 2417 | if (copy_from_user(hdr, umod, len) != 0) { |
| @@ -2922,7 +2952,8 @@ static struct module *load_module(void __user *umod, | |||
| 2922 | mutex_unlock(&module_mutex); | 2952 | mutex_unlock(&module_mutex); |
| 2923 | 2953 | ||
| 2924 | /* Module is ready to execute: parsing args may do that. */ | 2954 | /* Module is ready to execute: parsing args may do that. */ |
| 2925 | err = parse_args(mod->name, mod->args, mod->kp, mod->num_kp, NULL); | 2955 | err = parse_args(mod->name, mod->args, mod->kp, mod->num_kp, |
| 2956 | -32768, 32767, NULL); | ||
| 2926 | if (err < 0) | 2957 | if (err < 0) |
| 2927 | goto unlink; | 2958 | goto unlink; |
| 2928 | 2959 | ||
diff --git a/kernel/padata.c b/kernel/padata.c index b45259931512..89fe3d1b9efb 100644 --- a/kernel/padata.c +++ b/kernel/padata.c | |||
| @@ -1,6 +1,8 @@ | |||
| 1 | /* | 1 | /* |
| 2 | * padata.c - generic interface to process data streams in parallel | 2 | * padata.c - generic interface to process data streams in parallel |
| 3 | * | 3 | * |
| 4 | * See Documentation/padata.txt for an api documentation. | ||
| 5 | * | ||
| 4 | * Copyright (C) 2008, 2009 secunet Security Networks AG | 6 | * Copyright (C) 2008, 2009 secunet Security Networks AG |
| 5 | * Copyright (C) 2008, 2009 Steffen Klassert <steffen.klassert@secunet.com> | 7 | * Copyright (C) 2008, 2009 Steffen Klassert <steffen.klassert@secunet.com> |
| 6 | * | 8 | * |
| @@ -29,7 +31,6 @@ | |||
| 29 | #include <linux/sysfs.h> | 31 | #include <linux/sysfs.h> |
| 30 | #include <linux/rcupdate.h> | 32 | #include <linux/rcupdate.h> |
| 31 | 33 | ||
| 32 | #define MAX_SEQ_NR (INT_MAX - NR_CPUS) | ||
| 33 | #define MAX_OBJ_NUM 1000 | 34 | #define MAX_OBJ_NUM 1000 |
| 34 | 35 | ||
| 35 | static int padata_index_to_cpu(struct parallel_data *pd, int cpu_index) | 36 | static int padata_index_to_cpu(struct parallel_data *pd, int cpu_index) |
| @@ -43,18 +44,19 @@ static int padata_index_to_cpu(struct parallel_data *pd, int cpu_index) | |||
| 43 | return target_cpu; | 44 | return target_cpu; |
| 44 | } | 45 | } |
| 45 | 46 | ||
| 46 | static int padata_cpu_hash(struct padata_priv *padata) | 47 | static int padata_cpu_hash(struct parallel_data *pd) |
| 47 | { | 48 | { |
| 48 | int cpu_index; | 49 | int cpu_index; |
| 49 | struct parallel_data *pd; | ||
| 50 | |||
| 51 | pd = padata->pd; | ||
| 52 | 50 | ||
| 53 | /* | 51 | /* |
| 54 | * Hash the sequence numbers to the cpus by taking | 52 | * Hash the sequence numbers to the cpus by taking |
| 55 | * seq_nr mod. number of cpus in use. | 53 | * seq_nr mod. number of cpus in use. |
| 56 | */ | 54 | */ |
| 57 | cpu_index = padata->seq_nr % cpumask_weight(pd->cpumask.pcpu); | 55 | |
| 56 | spin_lock(&pd->seq_lock); | ||
| 57 | cpu_index = pd->seq_nr % cpumask_weight(pd->cpumask.pcpu); | ||
| 58 | pd->seq_nr++; | ||
| 59 | spin_unlock(&pd->seq_lock); | ||
| 58 | 60 | ||
| 59 | return padata_index_to_cpu(pd, cpu_index); | 61 | return padata_index_to_cpu(pd, cpu_index); |
| 60 | } | 62 | } |
| @@ -132,12 +134,7 @@ int padata_do_parallel(struct padata_instance *pinst, | |||
| 132 | padata->pd = pd; | 134 | padata->pd = pd; |
| 133 | padata->cb_cpu = cb_cpu; | 135 | padata->cb_cpu = cb_cpu; |
| 134 | 136 | ||
| 135 | if (unlikely(atomic_read(&pd->seq_nr) == pd->max_seq_nr)) | 137 | target_cpu = padata_cpu_hash(pd); |
| 136 | atomic_set(&pd->seq_nr, -1); | ||
| 137 | |||
| 138 | padata->seq_nr = atomic_inc_return(&pd->seq_nr); | ||
| 139 | |||
| 140 | target_cpu = padata_cpu_hash(padata); | ||
| 141 | queue = per_cpu_ptr(pd->pqueue, target_cpu); | 138 | queue = per_cpu_ptr(pd->pqueue, target_cpu); |
| 142 | 139 | ||
| 143 | spin_lock(&queue->parallel.lock); | 140 | spin_lock(&queue->parallel.lock); |
| @@ -173,7 +170,7 @@ EXPORT_SYMBOL(padata_do_parallel); | |||
| 173 | static struct padata_priv *padata_get_next(struct parallel_data *pd) | 170 | static struct padata_priv *padata_get_next(struct parallel_data *pd) |
| 174 | { | 171 | { |
| 175 | int cpu, num_cpus; | 172 | int cpu, num_cpus; |
| 176 | int next_nr, next_index; | 173 | unsigned int next_nr, next_index; |
| 177 | struct padata_parallel_queue *queue, *next_queue; | 174 | struct padata_parallel_queue *queue, *next_queue; |
| 178 | struct padata_priv *padata; | 175 | struct padata_priv *padata; |
| 179 | struct padata_list *reorder; | 176 | struct padata_list *reorder; |
| @@ -189,14 +186,6 @@ static struct padata_priv *padata_get_next(struct parallel_data *pd) | |||
| 189 | cpu = padata_index_to_cpu(pd, next_index); | 186 | cpu = padata_index_to_cpu(pd, next_index); |
| 190 | next_queue = per_cpu_ptr(pd->pqueue, cpu); | 187 | next_queue = per_cpu_ptr(pd->pqueue, cpu); |
| 191 | 188 | ||
| 192 | if (unlikely(next_nr > pd->max_seq_nr)) { | ||
| 193 | next_nr = next_nr - pd->max_seq_nr - 1; | ||
| 194 | next_index = next_nr % num_cpus; | ||
| 195 | cpu = padata_index_to_cpu(pd, next_index); | ||
| 196 | next_queue = per_cpu_ptr(pd->pqueue, cpu); | ||
| 197 | pd->processed = 0; | ||
| 198 | } | ||
| 199 | |||
| 200 | padata = NULL; | 189 | padata = NULL; |
| 201 | 190 | ||
| 202 | reorder = &next_queue->reorder; | 191 | reorder = &next_queue->reorder; |
| @@ -205,8 +194,6 @@ static struct padata_priv *padata_get_next(struct parallel_data *pd) | |||
| 205 | padata = list_entry(reorder->list.next, | 194 | padata = list_entry(reorder->list.next, |
| 206 | struct padata_priv, list); | 195 | struct padata_priv, list); |
| 207 | 196 | ||
| 208 | BUG_ON(next_nr != padata->seq_nr); | ||
| 209 | |||
| 210 | spin_lock(&reorder->lock); | 197 | spin_lock(&reorder->lock); |
| 211 | list_del_init(&padata->list); | 198 | list_del_init(&padata->list); |
| 212 | atomic_dec(&pd->reorder_objects); | 199 | atomic_dec(&pd->reorder_objects); |
| @@ -230,6 +217,7 @@ out: | |||
| 230 | 217 | ||
| 231 | static void padata_reorder(struct parallel_data *pd) | 218 | static void padata_reorder(struct parallel_data *pd) |
| 232 | { | 219 | { |
| 220 | int cb_cpu; | ||
| 233 | struct padata_priv *padata; | 221 | struct padata_priv *padata; |
| 234 | struct padata_serial_queue *squeue; | 222 | struct padata_serial_queue *squeue; |
| 235 | struct padata_instance *pinst = pd->pinst; | 223 | struct padata_instance *pinst = pd->pinst; |
| @@ -270,13 +258,14 @@ static void padata_reorder(struct parallel_data *pd) | |||
| 270 | return; | 258 | return; |
| 271 | } | 259 | } |
| 272 | 260 | ||
| 273 | squeue = per_cpu_ptr(pd->squeue, padata->cb_cpu); | 261 | cb_cpu = padata->cb_cpu; |
| 262 | squeue = per_cpu_ptr(pd->squeue, cb_cpu); | ||
| 274 | 263 | ||
| 275 | spin_lock(&squeue->serial.lock); | 264 | spin_lock(&squeue->serial.lock); |
| 276 | list_add_tail(&padata->list, &squeue->serial.list); | 265 | list_add_tail(&padata->list, &squeue->serial.list); |
| 277 | spin_unlock(&squeue->serial.lock); | 266 | spin_unlock(&squeue->serial.lock); |
| 278 | 267 | ||
| 279 | queue_work_on(padata->cb_cpu, pinst->wq, &squeue->work); | 268 | queue_work_on(cb_cpu, pinst->wq, &squeue->work); |
| 280 | } | 269 | } |
| 281 | 270 | ||
| 282 | spin_unlock_bh(&pd->lock); | 271 | spin_unlock_bh(&pd->lock); |
| @@ -367,13 +356,13 @@ static int padata_setup_cpumasks(struct parallel_data *pd, | |||
| 367 | if (!alloc_cpumask_var(&pd->cpumask.pcpu, GFP_KERNEL)) | 356 | if (!alloc_cpumask_var(&pd->cpumask.pcpu, GFP_KERNEL)) |
| 368 | return -ENOMEM; | 357 | return -ENOMEM; |
| 369 | 358 | ||
| 370 | cpumask_and(pd->cpumask.pcpu, pcpumask, cpu_active_mask); | 359 | cpumask_and(pd->cpumask.pcpu, pcpumask, cpu_online_mask); |
| 371 | if (!alloc_cpumask_var(&pd->cpumask.cbcpu, GFP_KERNEL)) { | 360 | if (!alloc_cpumask_var(&pd->cpumask.cbcpu, GFP_KERNEL)) { |
| 372 | free_cpumask_var(pd->cpumask.cbcpu); | 361 | free_cpumask_var(pd->cpumask.cbcpu); |
| 373 | return -ENOMEM; | 362 | return -ENOMEM; |
| 374 | } | 363 | } |
| 375 | 364 | ||
| 376 | cpumask_and(pd->cpumask.cbcpu, cbcpumask, cpu_active_mask); | 365 | cpumask_and(pd->cpumask.cbcpu, cbcpumask, cpu_online_mask); |
| 377 | return 0; | 366 | return 0; |
| 378 | } | 367 | } |
| 379 | 368 | ||
| @@ -400,7 +389,7 @@ static void padata_init_squeues(struct parallel_data *pd) | |||
| 400 | /* Initialize all percpu queues used by parallel workers */ | 389 | /* Initialize all percpu queues used by parallel workers */ |
| 401 | static void padata_init_pqueues(struct parallel_data *pd) | 390 | static void padata_init_pqueues(struct parallel_data *pd) |
| 402 | { | 391 | { |
| 403 | int cpu_index, num_cpus, cpu; | 392 | int cpu_index, cpu; |
| 404 | struct padata_parallel_queue *pqueue; | 393 | struct padata_parallel_queue *pqueue; |
| 405 | 394 | ||
| 406 | cpu_index = 0; | 395 | cpu_index = 0; |
| @@ -415,9 +404,6 @@ static void padata_init_pqueues(struct parallel_data *pd) | |||
| 415 | INIT_WORK(&pqueue->work, padata_parallel_worker); | 404 | INIT_WORK(&pqueue->work, padata_parallel_worker); |
| 416 | atomic_set(&pqueue->num_obj, 0); | 405 | atomic_set(&pqueue->num_obj, 0); |
| 417 | } | 406 | } |
| 418 | |||
| 419 | num_cpus = cpumask_weight(pd->cpumask.pcpu); | ||
| 420 | pd->max_seq_nr = num_cpus ? (MAX_SEQ_NR / num_cpus) * num_cpus - 1 : 0; | ||
| 421 | } | 407 | } |
| 422 | 408 | ||
| 423 | /* Allocate and initialize the internal cpumask dependend resources. */ | 409 | /* Allocate and initialize the internal cpumask dependend resources. */ |
| @@ -444,7 +430,7 @@ static struct parallel_data *padata_alloc_pd(struct padata_instance *pinst, | |||
| 444 | padata_init_pqueues(pd); | 430 | padata_init_pqueues(pd); |
| 445 | padata_init_squeues(pd); | 431 | padata_init_squeues(pd); |
| 446 | setup_timer(&pd->timer, padata_reorder_timer, (unsigned long)pd); | 432 | setup_timer(&pd->timer, padata_reorder_timer, (unsigned long)pd); |
| 447 | atomic_set(&pd->seq_nr, -1); | 433 | pd->seq_nr = 0; |
| 448 | atomic_set(&pd->reorder_objects, 0); | 434 | atomic_set(&pd->reorder_objects, 0); |
| 449 | atomic_set(&pd->refcnt, 0); | 435 | atomic_set(&pd->refcnt, 0); |
| 450 | pd->pinst = pinst; | 436 | pd->pinst = pinst; |
| @@ -580,7 +566,7 @@ EXPORT_SYMBOL(padata_unregister_cpumask_notifier); | |||
| 580 | static bool padata_validate_cpumask(struct padata_instance *pinst, | 566 | static bool padata_validate_cpumask(struct padata_instance *pinst, |
| 581 | const struct cpumask *cpumask) | 567 | const struct cpumask *cpumask) |
| 582 | { | 568 | { |
| 583 | if (!cpumask_intersects(cpumask, cpu_active_mask)) { | 569 | if (!cpumask_intersects(cpumask, cpu_online_mask)) { |
| 584 | pinst->flags |= PADATA_INVALID; | 570 | pinst->flags |= PADATA_INVALID; |
| 585 | return false; | 571 | return false; |
| 586 | } | 572 | } |
| @@ -694,7 +680,7 @@ static int __padata_add_cpu(struct padata_instance *pinst, int cpu) | |||
| 694 | { | 680 | { |
| 695 | struct parallel_data *pd; | 681 | struct parallel_data *pd; |
| 696 | 682 | ||
| 697 | if (cpumask_test_cpu(cpu, cpu_active_mask)) { | 683 | if (cpumask_test_cpu(cpu, cpu_online_mask)) { |
| 698 | pd = padata_alloc_pd(pinst, pinst->cpumask.pcpu, | 684 | pd = padata_alloc_pd(pinst, pinst->cpumask.pcpu, |
| 699 | pinst->cpumask.cbcpu); | 685 | pinst->cpumask.cbcpu); |
| 700 | if (!pd) | 686 | if (!pd) |
| @@ -762,6 +748,9 @@ static int __padata_remove_cpu(struct padata_instance *pinst, int cpu) | |||
| 762 | return -ENOMEM; | 748 | return -ENOMEM; |
| 763 | 749 | ||
| 764 | padata_replace(pinst, pd); | 750 | padata_replace(pinst, pd); |
| 751 | |||
| 752 | cpumask_clear_cpu(cpu, pd->cpumask.cbcpu); | ||
| 753 | cpumask_clear_cpu(cpu, pd->cpumask.pcpu); | ||
| 765 | } | 754 | } |
| 766 | 755 | ||
| 767 | return 0; | 756 | return 0; |
diff --git a/kernel/panic.c b/kernel/panic.c index 80aed44e345a..8ed89a175d79 100644 --- a/kernel/panic.c +++ b/kernel/panic.c | |||
| @@ -97,7 +97,7 @@ void panic(const char *fmt, ...) | |||
| 97 | /* | 97 | /* |
| 98 | * Avoid nested stack-dumping if a panic occurs during oops processing | 98 | * Avoid nested stack-dumping if a panic occurs during oops processing |
| 99 | */ | 99 | */ |
| 100 | if (!oops_in_progress) | 100 | if (!test_taint(TAINT_DIE) && oops_in_progress <= 1) |
| 101 | dump_stack(); | 101 | dump_stack(); |
| 102 | #endif | 102 | #endif |
| 103 | 103 | ||
diff --git a/kernel/params.c b/kernel/params.c index 4bc965d8a1fe..f37d82631347 100644 --- a/kernel/params.c +++ b/kernel/params.c | |||
| @@ -15,7 +15,6 @@ | |||
| 15 | along with this program; if not, write to the Free Software | 15 | along with this program; if not, write to the Free Software |
| 16 | Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | 16 | Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
| 17 | */ | 17 | */ |
| 18 | #include <linux/module.h> | ||
| 19 | #include <linux/kernel.h> | 18 | #include <linux/kernel.h> |
| 20 | #include <linux/string.h> | 19 | #include <linux/string.h> |
| 21 | #include <linux/errno.h> | 20 | #include <linux/errno.h> |
| @@ -88,6 +87,8 @@ static int parse_one(char *param, | |||
| 88 | char *val, | 87 | char *val, |
| 89 | const struct kernel_param *params, | 88 | const struct kernel_param *params, |
| 90 | unsigned num_params, | 89 | unsigned num_params, |
| 90 | s16 min_level, | ||
| 91 | s16 max_level, | ||
| 91 | int (*handle_unknown)(char *param, char *val)) | 92 | int (*handle_unknown)(char *param, char *val)) |
| 92 | { | 93 | { |
| 93 | unsigned int i; | 94 | unsigned int i; |
| @@ -96,6 +97,9 @@ static int parse_one(char *param, | |||
| 96 | /* Find parameter */ | 97 | /* Find parameter */ |
| 97 | for (i = 0; i < num_params; i++) { | 98 | for (i = 0; i < num_params; i++) { |
| 98 | if (parameq(param, params[i].name)) { | 99 | if (parameq(param, params[i].name)) { |
| 100 | if (params[i].level < min_level | ||
| 101 | || params[i].level > max_level) | ||
| 102 | return 0; | ||
| 99 | /* No one handled NULL, so do it here. */ | 103 | /* No one handled NULL, so do it here. */ |
| 100 | if (!val && params[i].ops->set != param_set_bool | 104 | if (!val && params[i].ops->set != param_set_bool |
| 101 | && params[i].ops->set != param_set_bint) | 105 | && params[i].ops->set != param_set_bint) |
| @@ -175,6 +179,8 @@ int parse_args(const char *name, | |||
| 175 | char *args, | 179 | char *args, |
| 176 | const struct kernel_param *params, | 180 | const struct kernel_param *params, |
| 177 | unsigned num, | 181 | unsigned num, |
| 182 | s16 min_level, | ||
| 183 | s16 max_level, | ||
| 178 | int (*unknown)(char *param, char *val)) | 184 | int (*unknown)(char *param, char *val)) |
| 179 | { | 185 | { |
| 180 | char *param, *val; | 186 | char *param, *val; |
| @@ -190,7 +196,8 @@ int parse_args(const char *name, | |||
| 190 | 196 | ||
| 191 | args = next_arg(args, ¶m, &val); | 197 | args = next_arg(args, ¶m, &val); |
| 192 | irq_was_disabled = irqs_disabled(); | 198 | irq_was_disabled = irqs_disabled(); |
| 193 | ret = parse_one(param, val, params, num, unknown); | 199 | ret = parse_one(param, val, params, num, |
| 200 | min_level, max_level, unknown); | ||
| 194 | if (irq_was_disabled && !irqs_disabled()) { | 201 | if (irq_was_disabled && !irqs_disabled()) { |
| 195 | printk(KERN_WARNING "parse_args(): option '%s' enabled " | 202 | printk(KERN_WARNING "parse_args(): option '%s' enabled " |
| 196 | "irq's!\n", param); | 203 | "irq's!\n", param); |
| @@ -298,35 +305,18 @@ EXPORT_SYMBOL(param_ops_charp); | |||
| 298 | /* Actually could be a bool or an int, for historical reasons. */ | 305 | /* Actually could be a bool or an int, for historical reasons. */ |
| 299 | int param_set_bool(const char *val, const struct kernel_param *kp) | 306 | int param_set_bool(const char *val, const struct kernel_param *kp) |
| 300 | { | 307 | { |
| 301 | bool v; | ||
| 302 | int ret; | ||
| 303 | |||
| 304 | /* No equals means "set"... */ | 308 | /* No equals means "set"... */ |
| 305 | if (!val) val = "1"; | 309 | if (!val) val = "1"; |
| 306 | 310 | ||
| 307 | /* One of =[yYnN01] */ | 311 | /* One of =[yYnN01] */ |
| 308 | ret = strtobool(val, &v); | 312 | return strtobool(val, kp->arg); |
| 309 | if (ret) | ||
| 310 | return ret; | ||
| 311 | |||
| 312 | if (kp->flags & KPARAM_ISBOOL) | ||
| 313 | *(bool *)kp->arg = v; | ||
| 314 | else | ||
| 315 | *(int *)kp->arg = v; | ||
| 316 | return 0; | ||
| 317 | } | 313 | } |
| 318 | EXPORT_SYMBOL(param_set_bool); | 314 | EXPORT_SYMBOL(param_set_bool); |
| 319 | 315 | ||
| 320 | int param_get_bool(char *buffer, const struct kernel_param *kp) | 316 | int param_get_bool(char *buffer, const struct kernel_param *kp) |
| 321 | { | 317 | { |
| 322 | bool val; | ||
| 323 | if (kp->flags & KPARAM_ISBOOL) | ||
| 324 | val = *(bool *)kp->arg; | ||
| 325 | else | ||
| 326 | val = *(int *)kp->arg; | ||
| 327 | |||
| 328 | /* Y and N chosen as being relatively non-coder friendly */ | 318 | /* Y and N chosen as being relatively non-coder friendly */ |
| 329 | return sprintf(buffer, "%c", val ? 'Y' : 'N'); | 319 | return sprintf(buffer, "%c", *(bool *)kp->arg ? 'Y' : 'N'); |
| 330 | } | 320 | } |
| 331 | EXPORT_SYMBOL(param_get_bool); | 321 | EXPORT_SYMBOL(param_get_bool); |
| 332 | 322 | ||
| @@ -344,7 +334,6 @@ int param_set_invbool(const char *val, const struct kernel_param *kp) | |||
| 344 | struct kernel_param dummy; | 334 | struct kernel_param dummy; |
| 345 | 335 | ||
| 346 | dummy.arg = &boolval; | 336 | dummy.arg = &boolval; |
| 347 | dummy.flags = KPARAM_ISBOOL; | ||
| 348 | ret = param_set_bool(val, &dummy); | 337 | ret = param_set_bool(val, &dummy); |
| 349 | if (ret == 0) | 338 | if (ret == 0) |
| 350 | *(bool *)kp->arg = !boolval; | 339 | *(bool *)kp->arg = !boolval; |
| @@ -373,7 +362,6 @@ int param_set_bint(const char *val, const struct kernel_param *kp) | |||
| 373 | /* Match bool exactly, by re-using it. */ | 362 | /* Match bool exactly, by re-using it. */ |
| 374 | boolkp = *kp; | 363 | boolkp = *kp; |
| 375 | boolkp.arg = &v; | 364 | boolkp.arg = &v; |
| 376 | boolkp.flags |= KPARAM_ISBOOL; | ||
| 377 | 365 | ||
| 378 | ret = param_set_bool(val, &boolkp); | 366 | ret = param_set_bool(val, &boolkp); |
| 379 | if (ret == 0) | 367 | if (ret == 0) |
| @@ -394,7 +382,7 @@ static int param_array(const char *name, | |||
| 394 | unsigned int min, unsigned int max, | 382 | unsigned int min, unsigned int max, |
| 395 | void *elem, int elemsize, | 383 | void *elem, int elemsize, |
| 396 | int (*set)(const char *, const struct kernel_param *kp), | 384 | int (*set)(const char *, const struct kernel_param *kp), |
| 397 | u16 flags, | 385 | s16 level, |
| 398 | unsigned int *num) | 386 | unsigned int *num) |
| 399 | { | 387 | { |
| 400 | int ret; | 388 | int ret; |
| @@ -404,7 +392,7 @@ static int param_array(const char *name, | |||
| 404 | /* Get the name right for errors. */ | 392 | /* Get the name right for errors. */ |
| 405 | kp.name = name; | 393 | kp.name = name; |
| 406 | kp.arg = elem; | 394 | kp.arg = elem; |
| 407 | kp.flags = flags; | 395 | kp.level = level; |
| 408 | 396 | ||
| 409 | *num = 0; | 397 | *num = 0; |
| 410 | /* We expect a comma-separated list of values. */ | 398 | /* We expect a comma-separated list of values. */ |
| @@ -445,7 +433,7 @@ static int param_array_set(const char *val, const struct kernel_param *kp) | |||
| 445 | unsigned int temp_num; | 433 | unsigned int temp_num; |
| 446 | 434 | ||
| 447 | return param_array(kp->name, val, 1, arr->max, arr->elem, | 435 | return param_array(kp->name, val, 1, arr->max, arr->elem, |
| 448 | arr->elemsize, arr->ops->set, kp->flags, | 436 | arr->elemsize, arr->ops->set, kp->level, |
| 449 | arr->num ?: &temp_num); | 437 | arr->num ?: &temp_num); |
| 450 | } | 438 | } |
| 451 | 439 | ||
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c index a8968396046d..57bc1fd35b3c 100644 --- a/kernel/pid_namespace.c +++ b/kernel/pid_namespace.c | |||
| @@ -15,6 +15,7 @@ | |||
| 15 | #include <linux/acct.h> | 15 | #include <linux/acct.h> |
| 16 | #include <linux/slab.h> | 16 | #include <linux/slab.h> |
| 17 | #include <linux/proc_fs.h> | 17 | #include <linux/proc_fs.h> |
| 18 | #include <linux/reboot.h> | ||
| 18 | 19 | ||
| 19 | #define BITS_PER_PAGE (PAGE_SIZE*8) | 20 | #define BITS_PER_PAGE (PAGE_SIZE*8) |
| 20 | 21 | ||
| @@ -168,13 +169,9 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns) | |||
| 168 | while (nr > 0) { | 169 | while (nr > 0) { |
| 169 | rcu_read_lock(); | 170 | rcu_read_lock(); |
| 170 | 171 | ||
| 171 | /* | ||
| 172 | * Any nested-container's init processes won't ignore the | ||
| 173 | * SEND_SIG_NOINFO signal, see send_signal()->si_fromuser(). | ||
| 174 | */ | ||
| 175 | task = pid_task(find_vpid(nr), PIDTYPE_PID); | 172 | task = pid_task(find_vpid(nr), PIDTYPE_PID); |
| 176 | if (task) | 173 | if (task && !__fatal_signal_pending(task)) |
| 177 | send_sig_info(SIGKILL, SEND_SIG_NOINFO, task); | 174 | send_sig_info(SIGKILL, SEND_SIG_FORCED, task); |
| 178 | 175 | ||
| 179 | rcu_read_unlock(); | 176 | rcu_read_unlock(); |
| 180 | 177 | ||
| @@ -187,6 +184,9 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns) | |||
| 187 | rc = sys_wait4(-1, NULL, __WALL, NULL); | 184 | rc = sys_wait4(-1, NULL, __WALL, NULL); |
| 188 | } while (rc != -ECHILD); | 185 | } while (rc != -ECHILD); |
| 189 | 186 | ||
| 187 | if (pid_ns->reboot) | ||
| 188 | current->signal->group_exit_code = pid_ns->reboot; | ||
| 189 | |||
| 190 | acct_exit_ns(pid_ns); | 190 | acct_exit_ns(pid_ns); |
| 191 | return; | 191 | return; |
| 192 | } | 192 | } |
| @@ -221,6 +221,35 @@ static struct ctl_table pid_ns_ctl_table[] = { | |||
| 221 | 221 | ||
| 222 | static struct ctl_path kern_path[] = { { .procname = "kernel", }, { } }; | 222 | static struct ctl_path kern_path[] = { { .procname = "kernel", }, { } }; |
| 223 | 223 | ||
| 224 | int reboot_pid_ns(struct pid_namespace *pid_ns, int cmd) | ||
| 225 | { | ||
| 226 | if (pid_ns == &init_pid_ns) | ||
| 227 | return 0; | ||
| 228 | |||
| 229 | switch (cmd) { | ||
| 230 | case LINUX_REBOOT_CMD_RESTART2: | ||
| 231 | case LINUX_REBOOT_CMD_RESTART: | ||
| 232 | pid_ns->reboot = SIGHUP; | ||
| 233 | break; | ||
| 234 | |||
| 235 | case LINUX_REBOOT_CMD_POWER_OFF: | ||
| 236 | case LINUX_REBOOT_CMD_HALT: | ||
| 237 | pid_ns->reboot = SIGINT; | ||
| 238 | break; | ||
| 239 | default: | ||
| 240 | return -EINVAL; | ||
| 241 | } | ||
| 242 | |||
| 243 | read_lock(&tasklist_lock); | ||
| 244 | force_sig(SIGKILL, pid_ns->child_reaper); | ||
| 245 | read_unlock(&tasklist_lock); | ||
| 246 | |||
| 247 | do_exit(0); | ||
| 248 | |||
| 249 | /* Not reached */ | ||
| 250 | return 0; | ||
| 251 | } | ||
| 252 | |||
| 224 | static __init int pid_namespaces_init(void) | 253 | static __init int pid_namespaces_init(void) |
| 225 | { | 254 | { |
| 226 | pid_ns_cachep = KMEM_CACHE(pid_namespace, SLAB_PANIC); | 255 | pid_ns_cachep = KMEM_CACHE(pid_namespace, SLAB_PANIC); |
diff --git a/kernel/power/Makefile b/kernel/power/Makefile index 07e0e28ffba7..66d808ec5252 100644 --- a/kernel/power/Makefile +++ b/kernel/power/Makefile | |||
| @@ -1,7 +1,8 @@ | |||
| 1 | 1 | ||
| 2 | ccflags-$(CONFIG_PM_DEBUG) := -DDEBUG | 2 | ccflags-$(CONFIG_PM_DEBUG) := -DDEBUG |
| 3 | 3 | ||
| 4 | obj-$(CONFIG_PM) += main.o qos.o | 4 | obj-y += qos.o |
| 5 | obj-$(CONFIG_PM) += main.o | ||
| 5 | obj-$(CONFIG_VT_CONSOLE_SLEEP) += console.o | 6 | obj-$(CONFIG_VT_CONSOLE_SLEEP) += console.o |
| 6 | obj-$(CONFIG_FREEZER) += process.o | 7 | obj-$(CONFIG_FREEZER) += process.o |
| 7 | obj-$(CONFIG_SUSPEND) += suspend.o | 8 | obj-$(CONFIG_SUSPEND) += suspend.o |
diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index 6d6d28870335..e09dfbfeecee 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c | |||
| @@ -16,7 +16,6 @@ | |||
| 16 | #include <linux/string.h> | 16 | #include <linux/string.h> |
| 17 | #include <linux/device.h> | 17 | #include <linux/device.h> |
| 18 | #include <linux/async.h> | 18 | #include <linux/async.h> |
| 19 | #include <linux/kmod.h> | ||
| 20 | #include <linux/delay.h> | 19 | #include <linux/delay.h> |
| 21 | #include <linux/fs.h> | 20 | #include <linux/fs.h> |
| 22 | #include <linux/mount.h> | 21 | #include <linux/mount.h> |
| @@ -245,8 +244,8 @@ void swsusp_show_speed(struct timeval *start, struct timeval *stop, | |||
| 245 | * create_image - Create a hibernation image. | 244 | * create_image - Create a hibernation image. |
| 246 | * @platform_mode: Whether or not to use the platform driver. | 245 | * @platform_mode: Whether or not to use the platform driver. |
| 247 | * | 246 | * |
| 248 | * Execute device drivers' .freeze_noirq() callbacks, create a hibernation image | 247 | * Execute device drivers' "late" and "noirq" freeze callbacks, create a |
| 249 | * and execute the drivers' .thaw_noirq() callbacks. | 248 | * hibernation image and run the drivers' "noirq" and "early" thaw callbacks. |
| 250 | * | 249 | * |
| 251 | * Control reappears in this routine after the subsequent restore. | 250 | * Control reappears in this routine after the subsequent restore. |
| 252 | */ | 251 | */ |
| @@ -254,7 +253,7 @@ static int create_image(int platform_mode) | |||
| 254 | { | 253 | { |
| 255 | int error; | 254 | int error; |
| 256 | 255 | ||
| 257 | error = dpm_suspend_noirq(PMSG_FREEZE); | 256 | error = dpm_suspend_end(PMSG_FREEZE); |
| 258 | if (error) { | 257 | if (error) { |
| 259 | printk(KERN_ERR "PM: Some devices failed to power down, " | 258 | printk(KERN_ERR "PM: Some devices failed to power down, " |
| 260 | "aborting hibernation\n"); | 259 | "aborting hibernation\n"); |
| @@ -306,7 +305,7 @@ static int create_image(int platform_mode) | |||
| 306 | Platform_finish: | 305 | Platform_finish: |
| 307 | platform_finish(platform_mode); | 306 | platform_finish(platform_mode); |
| 308 | 307 | ||
| 309 | dpm_resume_noirq(in_suspend ? | 308 | dpm_resume_start(in_suspend ? |
| 310 | (error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE); | 309 | (error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE); |
| 311 | 310 | ||
| 312 | return error; | 311 | return error; |
| @@ -343,13 +342,13 @@ int hibernation_snapshot(int platform_mode) | |||
| 343 | * successful freezer test. | 342 | * successful freezer test. |
| 344 | */ | 343 | */ |
| 345 | freezer_test_done = true; | 344 | freezer_test_done = true; |
| 346 | goto Cleanup; | 345 | goto Thaw; |
| 347 | } | 346 | } |
| 348 | 347 | ||
| 349 | error = dpm_prepare(PMSG_FREEZE); | 348 | error = dpm_prepare(PMSG_FREEZE); |
| 350 | if (error) { | 349 | if (error) { |
| 351 | dpm_complete(PMSG_RECOVER); | 350 | dpm_complete(PMSG_RECOVER); |
| 352 | goto Cleanup; | 351 | goto Thaw; |
| 353 | } | 352 | } |
| 354 | 353 | ||
| 355 | suspend_console(); | 354 | suspend_console(); |
| @@ -385,6 +384,8 @@ int hibernation_snapshot(int platform_mode) | |||
| 385 | platform_end(platform_mode); | 384 | platform_end(platform_mode); |
| 386 | return error; | 385 | return error; |
| 387 | 386 | ||
| 387 | Thaw: | ||
| 388 | thaw_kernel_threads(); | ||
| 388 | Cleanup: | 389 | Cleanup: |
| 389 | swsusp_free(); | 390 | swsusp_free(); |
| 390 | goto Close; | 391 | goto Close; |
| @@ -394,16 +395,16 @@ int hibernation_snapshot(int platform_mode) | |||
| 394 | * resume_target_kernel - Restore system state from a hibernation image. | 395 | * resume_target_kernel - Restore system state from a hibernation image. |
| 395 | * @platform_mode: Whether or not to use the platform driver. | 396 | * @platform_mode: Whether or not to use the platform driver. |
| 396 | * | 397 | * |
| 397 | * Execute device drivers' .freeze_noirq() callbacks, restore the contents of | 398 | * Execute device drivers' "noirq" and "late" freeze callbacks, restore the |
| 398 | * highmem that have not been restored yet from the image and run the low-level | 399 | * contents of highmem that have not been restored yet from the image and run |
| 399 | * code that will restore the remaining contents of memory and switch to the | 400 | * the low-level code that will restore the remaining contents of memory and |
| 400 | * just restored target kernel. | 401 | * switch to the just restored target kernel. |
| 401 | */ | 402 | */ |
| 402 | static int resume_target_kernel(bool platform_mode) | 403 | static int resume_target_kernel(bool platform_mode) |
| 403 | { | 404 | { |
| 404 | int error; | 405 | int error; |
| 405 | 406 | ||
| 406 | error = dpm_suspend_noirq(PMSG_QUIESCE); | 407 | error = dpm_suspend_end(PMSG_QUIESCE); |
| 407 | if (error) { | 408 | if (error) { |
| 408 | printk(KERN_ERR "PM: Some devices failed to power down, " | 409 | printk(KERN_ERR "PM: Some devices failed to power down, " |
| 409 | "aborting resume\n"); | 410 | "aborting resume\n"); |
| @@ -460,7 +461,7 @@ static int resume_target_kernel(bool platform_mode) | |||
| 460 | Cleanup: | 461 | Cleanup: |
| 461 | platform_restore_cleanup(platform_mode); | 462 | platform_restore_cleanup(platform_mode); |
| 462 | 463 | ||
| 463 | dpm_resume_noirq(PMSG_RECOVER); | 464 | dpm_resume_start(PMSG_RECOVER); |
| 464 | 465 | ||
| 465 | return error; | 466 | return error; |
| 466 | } | 467 | } |
| @@ -518,7 +519,7 @@ int hibernation_platform_enter(void) | |||
| 518 | goto Resume_devices; | 519 | goto Resume_devices; |
| 519 | } | 520 | } |
| 520 | 521 | ||
| 521 | error = dpm_suspend_noirq(PMSG_HIBERNATE); | 522 | error = dpm_suspend_end(PMSG_HIBERNATE); |
| 522 | if (error) | 523 | if (error) |
| 523 | goto Resume_devices; | 524 | goto Resume_devices; |
| 524 | 525 | ||
| @@ -549,7 +550,7 @@ int hibernation_platform_enter(void) | |||
| 549 | Platform_finish: | 550 | Platform_finish: |
| 550 | hibernation_ops->finish(); | 551 | hibernation_ops->finish(); |
| 551 | 552 | ||
| 552 | dpm_resume_noirq(PMSG_RESTORE); | 553 | dpm_resume_start(PMSG_RESTORE); |
| 553 | 554 | ||
| 554 | Resume_devices: | 555 | Resume_devices: |
| 555 | entering_platform_hibernation = false; | 556 | entering_platform_hibernation = false; |
| @@ -609,10 +610,6 @@ int hibernate(void) | |||
| 609 | if (error) | 610 | if (error) |
| 610 | goto Exit; | 611 | goto Exit; |
| 611 | 612 | ||
| 612 | error = usermodehelper_disable(); | ||
| 613 | if (error) | ||
| 614 | goto Exit; | ||
| 615 | |||
| 616 | /* Allocate memory management structures */ | 613 | /* Allocate memory management structures */ |
| 617 | error = create_basic_memory_bitmaps(); | 614 | error = create_basic_memory_bitmaps(); |
| 618 | if (error) | 615 | if (error) |
| @@ -624,15 +621,11 @@ int hibernate(void) | |||
| 624 | 621 | ||
| 625 | error = freeze_processes(); | 622 | error = freeze_processes(); |
| 626 | if (error) | 623 | if (error) |
| 627 | goto Finish; | 624 | goto Free_bitmaps; |
| 628 | 625 | ||
| 629 | error = hibernation_snapshot(hibernation_mode == HIBERNATION_PLATFORM); | 626 | error = hibernation_snapshot(hibernation_mode == HIBERNATION_PLATFORM); |
| 630 | if (error) | 627 | if (error || freezer_test_done) |
| 631 | goto Thaw; | ||
| 632 | if (freezer_test_done) { | ||
| 633 | freezer_test_done = false; | ||
| 634 | goto Thaw; | 628 | goto Thaw; |
| 635 | } | ||
| 636 | 629 | ||
| 637 | if (in_suspend) { | 630 | if (in_suspend) { |
| 638 | unsigned int flags = 0; | 631 | unsigned int flags = 0; |
| @@ -657,9 +650,12 @@ int hibernate(void) | |||
| 657 | 650 | ||
| 658 | Thaw: | 651 | Thaw: |
| 659 | thaw_processes(); | 652 | thaw_processes(); |
| 660 | Finish: | 653 | |
| 654 | /* Don't bother checking whether freezer_test_done is true */ | ||
| 655 | freezer_test_done = false; | ||
| 656 | |||
| 657 | Free_bitmaps: | ||
| 661 | free_basic_memory_bitmaps(); | 658 | free_basic_memory_bitmaps(); |
| 662 | usermodehelper_enable(); | ||
| 663 | Exit: | 659 | Exit: |
| 664 | pm_notifier_call_chain(PM_POST_HIBERNATION); | 660 | pm_notifier_call_chain(PM_POST_HIBERNATION); |
| 665 | pm_restore_console(); | 661 | pm_restore_console(); |
| @@ -774,15 +770,9 @@ static int software_resume(void) | |||
| 774 | if (error) | 770 | if (error) |
| 775 | goto close_finish; | 771 | goto close_finish; |
| 776 | 772 | ||
| 777 | error = usermodehelper_disable(); | ||
| 778 | if (error) | ||
| 779 | goto close_finish; | ||
| 780 | |||
| 781 | error = create_basic_memory_bitmaps(); | 773 | error = create_basic_memory_bitmaps(); |
| 782 | if (error) { | 774 | if (error) |
| 783 | usermodehelper_enable(); | ||
| 784 | goto close_finish; | 775 | goto close_finish; |
| 785 | } | ||
| 786 | 776 | ||
| 787 | pr_debug("PM: Preparing processes for restore.\n"); | 777 | pr_debug("PM: Preparing processes for restore.\n"); |
| 788 | error = freeze_processes(); | 778 | error = freeze_processes(); |
| @@ -803,7 +793,6 @@ static int software_resume(void) | |||
| 803 | thaw_processes(); | 793 | thaw_processes(); |
| 804 | Done: | 794 | Done: |
| 805 | free_basic_memory_bitmaps(); | 795 | free_basic_memory_bitmaps(); |
| 806 | usermodehelper_enable(); | ||
| 807 | Finish: | 796 | Finish: |
| 808 | pm_notifier_call_chain(PM_POST_RESTORE); | 797 | pm_notifier_call_chain(PM_POST_RESTORE); |
| 809 | pm_restore_console(); | 798 | pm_restore_console(); |
diff --git a/kernel/power/main.c b/kernel/power/main.c index 9824b41e5a18..1c12581f1c62 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c | |||
| @@ -165,16 +165,20 @@ static int suspend_stats_show(struct seq_file *s, void *unused) | |||
| 165 | last_errno %= REC_FAILED_NUM; | 165 | last_errno %= REC_FAILED_NUM; |
| 166 | last_step = suspend_stats.last_failed_step + REC_FAILED_NUM - 1; | 166 | last_step = suspend_stats.last_failed_step + REC_FAILED_NUM - 1; |
| 167 | last_step %= REC_FAILED_NUM; | 167 | last_step %= REC_FAILED_NUM; |
| 168 | seq_printf(s, "%s: %d\n%s: %d\n%s: %d\n%s: %d\n" | 168 | seq_printf(s, "%s: %d\n%s: %d\n%s: %d\n%s: %d\n%s: %d\n" |
| 169 | "%s: %d\n%s: %d\n%s: %d\n%s: %d\n", | 169 | "%s: %d\n%s: %d\n%s: %d\n%s: %d\n%s: %d\n", |
| 170 | "success", suspend_stats.success, | 170 | "success", suspend_stats.success, |
| 171 | "fail", suspend_stats.fail, | 171 | "fail", suspend_stats.fail, |
| 172 | "failed_freeze", suspend_stats.failed_freeze, | 172 | "failed_freeze", suspend_stats.failed_freeze, |
| 173 | "failed_prepare", suspend_stats.failed_prepare, | 173 | "failed_prepare", suspend_stats.failed_prepare, |
| 174 | "failed_suspend", suspend_stats.failed_suspend, | 174 | "failed_suspend", suspend_stats.failed_suspend, |
| 175 | "failed_suspend_late", | ||
| 176 | suspend_stats.failed_suspend_late, | ||
| 175 | "failed_suspend_noirq", | 177 | "failed_suspend_noirq", |
| 176 | suspend_stats.failed_suspend_noirq, | 178 | suspend_stats.failed_suspend_noirq, |
| 177 | "failed_resume", suspend_stats.failed_resume, | 179 | "failed_resume", suspend_stats.failed_resume, |
| 180 | "failed_resume_early", | ||
| 181 | suspend_stats.failed_resume_early, | ||
| 178 | "failed_resume_noirq", | 182 | "failed_resume_noirq", |
| 179 | suspend_stats.failed_resume_noirq); | 183 | suspend_stats.failed_resume_noirq); |
| 180 | seq_printf(s, "failures:\n last_failed_dev:\t%-s\n", | 184 | seq_printf(s, "failures:\n last_failed_dev:\t%-s\n", |
| @@ -287,16 +291,10 @@ static ssize_t state_store(struct kobject *kobj, struct kobj_attribute *attr, | |||
| 287 | 291 | ||
| 288 | #ifdef CONFIG_SUSPEND | 292 | #ifdef CONFIG_SUSPEND |
| 289 | for (s = &pm_states[state]; state < PM_SUSPEND_MAX; s++, state++) { | 293 | for (s = &pm_states[state]; state < PM_SUSPEND_MAX; s++, state++) { |
| 290 | if (*s && len == strlen(*s) && !strncmp(buf, *s, len)) | 294 | if (*s && len == strlen(*s) && !strncmp(buf, *s, len)) { |
| 295 | error = pm_suspend(state); | ||
| 291 | break; | 296 | break; |
| 292 | } | 297 | } |
| 293 | if (state < PM_SUSPEND_MAX && *s) { | ||
| 294 | error = enter_state(state); | ||
| 295 | if (error) { | ||
| 296 | suspend_stats.fail++; | ||
| 297 | dpm_save_failed_errno(error); | ||
| 298 | } else | ||
| 299 | suspend_stats.success++; | ||
| 300 | } | 298 | } |
| 301 | #endif | 299 | #endif |
| 302 | 300 | ||
diff --git a/kernel/power/power.h b/kernel/power/power.h index 21724eee5206..98f3622d7407 100644 --- a/kernel/power/power.h +++ b/kernel/power/power.h | |||
| @@ -177,13 +177,11 @@ extern const char *const pm_states[]; | |||
| 177 | 177 | ||
| 178 | extern bool valid_state(suspend_state_t state); | 178 | extern bool valid_state(suspend_state_t state); |
| 179 | extern int suspend_devices_and_enter(suspend_state_t state); | 179 | extern int suspend_devices_and_enter(suspend_state_t state); |
| 180 | extern int enter_state(suspend_state_t state); | ||
| 181 | #else /* !CONFIG_SUSPEND */ | 180 | #else /* !CONFIG_SUSPEND */ |
| 182 | static inline int suspend_devices_and_enter(suspend_state_t state) | 181 | static inline int suspend_devices_and_enter(suspend_state_t state) |
| 183 | { | 182 | { |
| 184 | return -ENOSYS; | 183 | return -ENOSYS; |
| 185 | } | 184 | } |
| 186 | static inline int enter_state(suspend_state_t state) { return -ENOSYS; } | ||
| 187 | static inline bool valid_state(suspend_state_t state) { return false; } | 185 | static inline bool valid_state(suspend_state_t state) { return false; } |
| 188 | #endif /* !CONFIG_SUSPEND */ | 186 | #endif /* !CONFIG_SUSPEND */ |
| 189 | 187 | ||
| @@ -234,16 +232,14 @@ static inline int suspend_freeze_processes(void) | |||
| 234 | int error; | 232 | int error; |
| 235 | 233 | ||
| 236 | error = freeze_processes(); | 234 | error = freeze_processes(); |
| 237 | |||
| 238 | /* | 235 | /* |
| 239 | * freeze_processes() automatically thaws every task if freezing | 236 | * freeze_processes() automatically thaws every task if freezing |
| 240 | * fails. So we need not do anything extra upon error. | 237 | * fails. So we need not do anything extra upon error. |
| 241 | */ | 238 | */ |
| 242 | if (error) | 239 | if (error) |
| 243 | goto Finish; | 240 | return error; |
| 244 | 241 | ||
| 245 | error = freeze_kernel_threads(); | 242 | error = freeze_kernel_threads(); |
| 246 | |||
| 247 | /* | 243 | /* |
| 248 | * freeze_kernel_threads() thaws only kernel threads upon freezing | 244 | * freeze_kernel_threads() thaws only kernel threads upon freezing |
| 249 | * failure. So we have to thaw the userspace tasks ourselves. | 245 | * failure. So we have to thaw the userspace tasks ourselves. |
| @@ -251,7 +247,6 @@ static inline int suspend_freeze_processes(void) | |||
| 251 | if (error) | 247 | if (error) |
| 252 | thaw_processes(); | 248 | thaw_processes(); |
| 253 | 249 | ||
| 254 | Finish: | ||
| 255 | return error; | 250 | return error; |
| 256 | } | 251 | } |
| 257 | 252 | ||
diff --git a/kernel/power/process.c b/kernel/power/process.c index 7e426459e60a..19db29f67558 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c | |||
| @@ -16,6 +16,7 @@ | |||
| 16 | #include <linux/freezer.h> | 16 | #include <linux/freezer.h> |
| 17 | #include <linux/delay.h> | 17 | #include <linux/delay.h> |
| 18 | #include <linux/workqueue.h> | 18 | #include <linux/workqueue.h> |
| 19 | #include <linux/kmod.h> | ||
| 19 | 20 | ||
| 20 | /* | 21 | /* |
| 21 | * Timeout for stopping processes | 22 | * Timeout for stopping processes |
| @@ -53,11 +54,9 @@ static int try_to_freeze_tasks(bool user_only) | |||
| 53 | * It is "frozen enough". If the task does wake | 54 | * It is "frozen enough". If the task does wake |
| 54 | * up, it will immediately call try_to_freeze. | 55 | * up, it will immediately call try_to_freeze. |
| 55 | * | 56 | * |
| 56 | * Because freeze_task() goes through p's | 57 | * Because freeze_task() goes through p's scheduler lock, it's |
| 57 | * scheduler lock after setting TIF_FREEZE, it's | 58 | * guaranteed that TASK_STOPPED/TRACED -> TASK_RUNNING |
| 58 | * guaranteed that either we see TASK_RUNNING or | 59 | * transition can't race with task state testing here. |
| 59 | * try_to_stop() after schedule() in ptrace/signal | ||
| 60 | * stop sees TIF_FREEZE. | ||
| 61 | */ | 60 | */ |
| 62 | if (!task_is_stopped_or_traced(p) && | 61 | if (!task_is_stopped_or_traced(p) && |
| 63 | !freezer_should_skip(p)) | 62 | !freezer_should_skip(p)) |
| @@ -98,13 +97,15 @@ static int try_to_freeze_tasks(bool user_only) | |||
| 98 | elapsed_csecs / 100, elapsed_csecs % 100, | 97 | elapsed_csecs / 100, elapsed_csecs % 100, |
| 99 | todo - wq_busy, wq_busy); | 98 | todo - wq_busy, wq_busy); |
| 100 | 99 | ||
| 101 | read_lock(&tasklist_lock); | 100 | if (!wakeup) { |
| 102 | do_each_thread(g, p) { | 101 | read_lock(&tasklist_lock); |
| 103 | if (!wakeup && !freezer_should_skip(p) && | 102 | do_each_thread(g, p) { |
| 104 | p != current && freezing(p) && !frozen(p)) | 103 | if (p != current && !freezer_should_skip(p) |
| 105 | sched_show_task(p); | 104 | && freezing(p) && !frozen(p)) |
| 106 | } while_each_thread(g, p); | 105 | sched_show_task(p); |
| 107 | read_unlock(&tasklist_lock); | 106 | } while_each_thread(g, p); |
| 107 | read_unlock(&tasklist_lock); | ||
| 108 | } | ||
| 108 | } else { | 109 | } else { |
| 109 | printk("(elapsed %d.%02d seconds) ", elapsed_csecs / 100, | 110 | printk("(elapsed %d.%02d seconds) ", elapsed_csecs / 100, |
| 110 | elapsed_csecs % 100); | 111 | elapsed_csecs % 100); |
| @@ -122,6 +123,10 @@ int freeze_processes(void) | |||
| 122 | { | 123 | { |
| 123 | int error; | 124 | int error; |
| 124 | 125 | ||
| 126 | error = __usermodehelper_disable(UMH_FREEZING); | ||
| 127 | if (error) | ||
| 128 | return error; | ||
| 129 | |||
| 125 | if (!pm_freezing) | 130 | if (!pm_freezing) |
| 126 | atomic_inc(&system_freezing_cnt); | 131 | atomic_inc(&system_freezing_cnt); |
| 127 | 132 | ||
| @@ -130,6 +135,7 @@ int freeze_processes(void) | |||
| 130 | error = try_to_freeze_tasks(true); | 135 | error = try_to_freeze_tasks(true); |
| 131 | if (!error) { | 136 | if (!error) { |
| 132 | printk("done."); | 137 | printk("done."); |
| 138 | __usermodehelper_set_disable_depth(UMH_DISABLED); | ||
| 133 | oom_killer_disable(); | 139 | oom_killer_disable(); |
| 134 | } | 140 | } |
| 135 | printk("\n"); | 141 | printk("\n"); |
| @@ -187,6 +193,8 @@ void thaw_processes(void) | |||
| 187 | } while_each_thread(g, p); | 193 | } while_each_thread(g, p); |
| 188 | read_unlock(&tasklist_lock); | 194 | read_unlock(&tasklist_lock); |
| 189 | 195 | ||
| 196 | usermodehelper_enable(); | ||
| 197 | |||
| 190 | schedule(); | 198 | schedule(); |
| 191 | printk("done.\n"); | 199 | printk("done.\n"); |
| 192 | } | 200 | } |
diff --git a/kernel/power/qos.c b/kernel/power/qos.c index 995e3bd3417b..6a031e684026 100644 --- a/kernel/power/qos.c +++ b/kernel/power/qos.c | |||
| @@ -230,6 +230,21 @@ int pm_qos_request_active(struct pm_qos_request *req) | |||
| 230 | EXPORT_SYMBOL_GPL(pm_qos_request_active); | 230 | EXPORT_SYMBOL_GPL(pm_qos_request_active); |
| 231 | 231 | ||
| 232 | /** | 232 | /** |
| 233 | * pm_qos_work_fn - the timeout handler of pm_qos_update_request_timeout | ||
| 234 | * @work: work struct for the delayed work (timeout) | ||
| 235 | * | ||
| 236 | * This cancels the timeout request by falling back to the default at timeout. | ||
| 237 | */ | ||
| 238 | static void pm_qos_work_fn(struct work_struct *work) | ||
| 239 | { | ||
| 240 | struct pm_qos_request *req = container_of(to_delayed_work(work), | ||
| 241 | struct pm_qos_request, | ||
| 242 | work); | ||
| 243 | |||
| 244 | pm_qos_update_request(req, PM_QOS_DEFAULT_VALUE); | ||
| 245 | } | ||
| 246 | |||
| 247 | /** | ||
| 233 | * pm_qos_add_request - inserts new qos request into the list | 248 | * pm_qos_add_request - inserts new qos request into the list |
| 234 | * @req: pointer to a preallocated handle | 249 | * @req: pointer to a preallocated handle |
| 235 | * @pm_qos_class: identifies which list of qos request to use | 250 | * @pm_qos_class: identifies which list of qos request to use |
| @@ -253,6 +268,7 @@ void pm_qos_add_request(struct pm_qos_request *req, | |||
| 253 | return; | 268 | return; |
| 254 | } | 269 | } |
| 255 | req->pm_qos_class = pm_qos_class; | 270 | req->pm_qos_class = pm_qos_class; |
| 271 | INIT_DELAYED_WORK(&req->work, pm_qos_work_fn); | ||
| 256 | pm_qos_update_target(pm_qos_array[pm_qos_class]->constraints, | 272 | pm_qos_update_target(pm_qos_array[pm_qos_class]->constraints, |
| 257 | &req->node, PM_QOS_ADD_REQ, value); | 273 | &req->node, PM_QOS_ADD_REQ, value); |
| 258 | } | 274 | } |
| @@ -279,6 +295,9 @@ void pm_qos_update_request(struct pm_qos_request *req, | |||
| 279 | return; | 295 | return; |
| 280 | } | 296 | } |
| 281 | 297 | ||
| 298 | if (delayed_work_pending(&req->work)) | ||
| 299 | cancel_delayed_work_sync(&req->work); | ||
| 300 | |||
| 282 | if (new_value != req->node.prio) | 301 | if (new_value != req->node.prio) |
| 283 | pm_qos_update_target( | 302 | pm_qos_update_target( |
| 284 | pm_qos_array[req->pm_qos_class]->constraints, | 303 | pm_qos_array[req->pm_qos_class]->constraints, |
| @@ -287,6 +306,34 @@ void pm_qos_update_request(struct pm_qos_request *req, | |||
| 287 | EXPORT_SYMBOL_GPL(pm_qos_update_request); | 306 | EXPORT_SYMBOL_GPL(pm_qos_update_request); |
| 288 | 307 | ||
| 289 | /** | 308 | /** |
| 309 | * pm_qos_update_request_timeout - modifies an existing qos request temporarily. | ||
| 310 | * @req : handle to list element holding a pm_qos request to use | ||
| 311 | * @new_value: defines the temporal qos request | ||
| 312 | * @timeout_us: the effective duration of this qos request in usecs. | ||
| 313 | * | ||
| 314 | * After timeout_us, this qos request is cancelled automatically. | ||
| 315 | */ | ||
| 316 | void pm_qos_update_request_timeout(struct pm_qos_request *req, s32 new_value, | ||
| 317 | unsigned long timeout_us) | ||
| 318 | { | ||
| 319 | if (!req) | ||
| 320 | return; | ||
| 321 | if (WARN(!pm_qos_request_active(req), | ||
| 322 | "%s called for unknown object.", __func__)) | ||
| 323 | return; | ||
| 324 | |||
| 325 | if (delayed_work_pending(&req->work)) | ||
| 326 | cancel_delayed_work_sync(&req->work); | ||
| 327 | |||
| 328 | if (new_value != req->node.prio) | ||
| 329 | pm_qos_update_target( | ||
| 330 | pm_qos_array[req->pm_qos_class]->constraints, | ||
| 331 | &req->node, PM_QOS_UPDATE_REQ, new_value); | ||
| 332 | |||
| 333 | schedule_delayed_work(&req->work, usecs_to_jiffies(timeout_us)); | ||
| 334 | } | ||
| 335 | |||
| 336 | /** | ||
| 290 | * pm_qos_remove_request - modifies an existing qos request | 337 | * pm_qos_remove_request - modifies an existing qos request |
| 291 | * @req: handle to request list element | 338 | * @req: handle to request list element |
| 292 | * | 339 | * |
| @@ -305,6 +352,9 @@ void pm_qos_remove_request(struct pm_qos_request *req) | |||
| 305 | return; | 352 | return; |
| 306 | } | 353 | } |
| 307 | 354 | ||
| 355 | if (delayed_work_pending(&req->work)) | ||
| 356 | cancel_delayed_work_sync(&req->work); | ||
| 357 | |||
| 308 | pm_qos_update_target(pm_qos_array[req->pm_qos_class]->constraints, | 358 | pm_qos_update_target(pm_qos_array[req->pm_qos_class]->constraints, |
| 309 | &req->node, PM_QOS_REMOVE_REQ, | 359 | &req->node, PM_QOS_REMOVE_REQ, |
| 310 | PM_QOS_DEFAULT_VALUE); | 360 | PM_QOS_DEFAULT_VALUE); |
| @@ -469,21 +519,18 @@ static ssize_t pm_qos_power_write(struct file *filp, const char __user *buf, | |||
| 469 | static int __init pm_qos_power_init(void) | 519 | static int __init pm_qos_power_init(void) |
| 470 | { | 520 | { |
| 471 | int ret = 0; | 521 | int ret = 0; |
| 522 | int i; | ||
| 472 | 523 | ||
| 473 | ret = register_pm_qos_misc(&cpu_dma_pm_qos); | 524 | BUILD_BUG_ON(ARRAY_SIZE(pm_qos_array) != PM_QOS_NUM_CLASSES); |
| 474 | if (ret < 0) { | 525 | |
| 475 | printk(KERN_ERR "pm_qos_param: cpu_dma_latency setup failed\n"); | 526 | for (i = 1; i < PM_QOS_NUM_CLASSES; i++) { |
| 476 | return ret; | 527 | ret = register_pm_qos_misc(pm_qos_array[i]); |
| 477 | } | 528 | if (ret < 0) { |
| 478 | ret = register_pm_qos_misc(&network_lat_pm_qos); | 529 | printk(KERN_ERR "pm_qos_param: %s setup failed\n", |
| 479 | if (ret < 0) { | 530 | pm_qos_array[i]->name); |
| 480 | printk(KERN_ERR "pm_qos_param: network_latency setup failed\n"); | 531 | return ret; |
| 481 | return ret; | 532 | } |
| 482 | } | 533 | } |
| 483 | ret = register_pm_qos_misc(&network_throughput_pm_qos); | ||
| 484 | if (ret < 0) | ||
| 485 | printk(KERN_ERR | ||
| 486 | "pm_qos_param: network_throughput setup failed\n"); | ||
| 487 | 534 | ||
| 488 | return ret; | 535 | return ret; |
| 489 | } | 536 | } |
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index 6a768e537001..0de28576807d 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c | |||
| @@ -711,9 +711,10 @@ static void mark_nosave_pages(struct memory_bitmap *bm) | |||
| 711 | list_for_each_entry(region, &nosave_regions, list) { | 711 | list_for_each_entry(region, &nosave_regions, list) { |
| 712 | unsigned long pfn; | 712 | unsigned long pfn; |
| 713 | 713 | ||
| 714 | pr_debug("PM: Marking nosave pages: %016lx - %016lx\n", | 714 | pr_debug("PM: Marking nosave pages: [mem %#010llx-%#010llx]\n", |
| 715 | region->start_pfn << PAGE_SHIFT, | 715 | (unsigned long long) region->start_pfn << PAGE_SHIFT, |
| 716 | region->end_pfn << PAGE_SHIFT); | 716 | ((unsigned long long) region->end_pfn << PAGE_SHIFT) |
| 717 | - 1); | ||
| 717 | 718 | ||
| 718 | for (pfn = region->start_pfn; pfn < region->end_pfn; pfn++) | 719 | for (pfn = region->start_pfn; pfn < region->end_pfn; pfn++) |
| 719 | if (pfn_valid(pfn)) { | 720 | if (pfn_valid(pfn)) { |
| @@ -1000,20 +1001,20 @@ static void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn) | |||
| 1000 | s_page = pfn_to_page(src_pfn); | 1001 | s_page = pfn_to_page(src_pfn); |
| 1001 | d_page = pfn_to_page(dst_pfn); | 1002 | d_page = pfn_to_page(dst_pfn); |
| 1002 | if (PageHighMem(s_page)) { | 1003 | if (PageHighMem(s_page)) { |
| 1003 | src = kmap_atomic(s_page, KM_USER0); | 1004 | src = kmap_atomic(s_page); |
| 1004 | dst = kmap_atomic(d_page, KM_USER1); | 1005 | dst = kmap_atomic(d_page); |
| 1005 | do_copy_page(dst, src); | 1006 | do_copy_page(dst, src); |
| 1006 | kunmap_atomic(dst, KM_USER1); | 1007 | kunmap_atomic(dst); |
| 1007 | kunmap_atomic(src, KM_USER0); | 1008 | kunmap_atomic(src); |
| 1008 | } else { | 1009 | } else { |
| 1009 | if (PageHighMem(d_page)) { | 1010 | if (PageHighMem(d_page)) { |
| 1010 | /* Page pointed to by src may contain some kernel | 1011 | /* Page pointed to by src may contain some kernel |
| 1011 | * data modified by kmap_atomic() | 1012 | * data modified by kmap_atomic() |
| 1012 | */ | 1013 | */ |
| 1013 | safe_copy_page(buffer, s_page); | 1014 | safe_copy_page(buffer, s_page); |
| 1014 | dst = kmap_atomic(d_page, KM_USER0); | 1015 | dst = kmap_atomic(d_page); |
| 1015 | copy_page(dst, buffer); | 1016 | copy_page(dst, buffer); |
| 1016 | kunmap_atomic(dst, KM_USER0); | 1017 | kunmap_atomic(dst); |
| 1017 | } else { | 1018 | } else { |
| 1018 | safe_copy_page(page_address(d_page), s_page); | 1019 | safe_copy_page(page_address(d_page), s_page); |
| 1019 | } | 1020 | } |
| @@ -1728,9 +1729,9 @@ int snapshot_read_next(struct snapshot_handle *handle) | |||
| 1728 | */ | 1729 | */ |
| 1729 | void *kaddr; | 1730 | void *kaddr; |
| 1730 | 1731 | ||
| 1731 | kaddr = kmap_atomic(page, KM_USER0); | 1732 | kaddr = kmap_atomic(page); |
| 1732 | copy_page(buffer, kaddr); | 1733 | copy_page(buffer, kaddr); |
| 1733 | kunmap_atomic(kaddr, KM_USER0); | 1734 | kunmap_atomic(kaddr); |
| 1734 | handle->buffer = buffer; | 1735 | handle->buffer = buffer; |
| 1735 | } else { | 1736 | } else { |
| 1736 | handle->buffer = page_address(page); | 1737 | handle->buffer = page_address(page); |
| @@ -2014,9 +2015,9 @@ static void copy_last_highmem_page(void) | |||
| 2014 | if (last_highmem_page) { | 2015 | if (last_highmem_page) { |
| 2015 | void *dst; | 2016 | void *dst; |
| 2016 | 2017 | ||
| 2017 | dst = kmap_atomic(last_highmem_page, KM_USER0); | 2018 | dst = kmap_atomic(last_highmem_page); |
| 2018 | copy_page(dst, buffer); | 2019 | copy_page(dst, buffer); |
| 2019 | kunmap_atomic(dst, KM_USER0); | 2020 | kunmap_atomic(dst); |
| 2020 | last_highmem_page = NULL; | 2021 | last_highmem_page = NULL; |
| 2021 | } | 2022 | } |
| 2022 | } | 2023 | } |
| @@ -2309,13 +2310,13 @@ swap_two_pages_data(struct page *p1, struct page *p2, void *buf) | |||
| 2309 | { | 2310 | { |
| 2310 | void *kaddr1, *kaddr2; | 2311 | void *kaddr1, *kaddr2; |
| 2311 | 2312 | ||
| 2312 | kaddr1 = kmap_atomic(p1, KM_USER0); | 2313 | kaddr1 = kmap_atomic(p1); |
| 2313 | kaddr2 = kmap_atomic(p2, KM_USER1); | 2314 | kaddr2 = kmap_atomic(p2); |
| 2314 | copy_page(buf, kaddr1); | 2315 | copy_page(buf, kaddr1); |
| 2315 | copy_page(kaddr1, kaddr2); | 2316 | copy_page(kaddr1, kaddr2); |
| 2316 | copy_page(kaddr2, buf); | 2317 | copy_page(kaddr2, buf); |
| 2317 | kunmap_atomic(kaddr2, KM_USER1); | 2318 | kunmap_atomic(kaddr2); |
| 2318 | kunmap_atomic(kaddr1, KM_USER0); | 2319 | kunmap_atomic(kaddr1); |
| 2319 | } | 2320 | } |
| 2320 | 2321 | ||
| 2321 | /** | 2322 | /** |
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index 4fd51beed879..396d262b8fd0 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c | |||
| @@ -12,7 +12,6 @@ | |||
| 12 | #include <linux/delay.h> | 12 | #include <linux/delay.h> |
| 13 | #include <linux/errno.h> | 13 | #include <linux/errno.h> |
| 14 | #include <linux/init.h> | 14 | #include <linux/init.h> |
| 15 | #include <linux/kmod.h> | ||
| 16 | #include <linux/console.h> | 15 | #include <linux/console.h> |
| 17 | #include <linux/cpu.h> | 16 | #include <linux/cpu.h> |
| 18 | #include <linux/syscalls.h> | 17 | #include <linux/syscalls.h> |
| @@ -37,8 +36,8 @@ const char *const pm_states[PM_SUSPEND_MAX] = { | |||
| 37 | static const struct platform_suspend_ops *suspend_ops; | 36 | static const struct platform_suspend_ops *suspend_ops; |
| 38 | 37 | ||
| 39 | /** | 38 | /** |
| 40 | * suspend_set_ops - Set the global suspend method table. | 39 | * suspend_set_ops - Set the global suspend method table. |
| 41 | * @ops: Pointer to ops structure. | 40 | * @ops: Suspend operations to use. |
| 42 | */ | 41 | */ |
| 43 | void suspend_set_ops(const struct platform_suspend_ops *ops) | 42 | void suspend_set_ops(const struct platform_suspend_ops *ops) |
| 44 | { | 43 | { |
| @@ -58,11 +57,11 @@ bool valid_state(suspend_state_t state) | |||
| 58 | } | 57 | } |
| 59 | 58 | ||
| 60 | /** | 59 | /** |
| 61 | * suspend_valid_only_mem - generic memory-only valid callback | 60 | * suspend_valid_only_mem - Generic memory-only valid callback. |
| 62 | * | 61 | * |
| 63 | * Platform drivers that implement mem suspend only and only need | 62 | * Platform drivers that implement mem suspend only and only need to check for |
| 64 | * to check for that in their .valid callback can use this instead | 63 | * that in their .valid() callback can use this instead of rolling their own |
| 65 | * of rolling their own .valid callback. | 64 | * .valid() callback. |
| 66 | */ | 65 | */ |
| 67 | int suspend_valid_only_mem(suspend_state_t state) | 66 | int suspend_valid_only_mem(suspend_state_t state) |
| 68 | { | 67 | { |
| @@ -83,10 +82,11 @@ static int suspend_test(int level) | |||
| 83 | } | 82 | } |
| 84 | 83 | ||
| 85 | /** | 84 | /** |
| 86 | * suspend_prepare - Do prep work before entering low-power state. | 85 | * suspend_prepare - Prepare for entering system sleep state. |
| 87 | * | 86 | * |
| 88 | * This is common code that is called for each state that we're entering. | 87 | * Common code run for every system sleep state that can be entered (except for |
| 89 | * Run suspend notifiers, allocate a console and stop all processes. | 88 | * hibernation). Run suspend notifiers, allocate the "suspend" console and |
| 89 | * freeze processes. | ||
| 90 | */ | 90 | */ |
| 91 | static int suspend_prepare(void) | 91 | static int suspend_prepare(void) |
| 92 | { | 92 | { |
| @@ -101,17 +101,12 @@ static int suspend_prepare(void) | |||
| 101 | if (error) | 101 | if (error) |
| 102 | goto Finish; | 102 | goto Finish; |
| 103 | 103 | ||
| 104 | error = usermodehelper_disable(); | ||
| 105 | if (error) | ||
| 106 | goto Finish; | ||
| 107 | |||
| 108 | error = suspend_freeze_processes(); | 104 | error = suspend_freeze_processes(); |
| 109 | if (!error) | 105 | if (!error) |
| 110 | return 0; | 106 | return 0; |
| 111 | 107 | ||
| 112 | suspend_stats.failed_freeze++; | 108 | suspend_stats.failed_freeze++; |
| 113 | dpm_save_failed_step(SUSPEND_FREEZE); | 109 | dpm_save_failed_step(SUSPEND_FREEZE); |
| 114 | usermodehelper_enable(); | ||
| 115 | Finish: | 110 | Finish: |
| 116 | pm_notifier_call_chain(PM_POST_SUSPEND); | 111 | pm_notifier_call_chain(PM_POST_SUSPEND); |
| 117 | pm_restore_console(); | 112 | pm_restore_console(); |
| @@ -131,9 +126,9 @@ void __attribute__ ((weak)) arch_suspend_enable_irqs(void) | |||
| 131 | } | 126 | } |
| 132 | 127 | ||
| 133 | /** | 128 | /** |
| 134 | * suspend_enter - enter the desired system sleep state. | 129 | * suspend_enter - Make the system enter the given sleep state. |
| 135 | * @state: State to enter | 130 | * @state: System sleep state to enter. |
| 136 | * @wakeup: Returns information that suspend should not be entered again. | 131 | * @wakeup: Returns information that the sleep state should not be re-entered. |
| 137 | * | 132 | * |
| 138 | * This function should be called after devices have been suspended. | 133 | * This function should be called after devices have been suspended. |
| 139 | */ | 134 | */ |
| @@ -147,7 +142,7 @@ static int suspend_enter(suspend_state_t state, bool *wakeup) | |||
| 147 | goto Platform_finish; | 142 | goto Platform_finish; |
| 148 | } | 143 | } |
| 149 | 144 | ||
| 150 | error = dpm_suspend_noirq(PMSG_SUSPEND); | 145 | error = dpm_suspend_end(PMSG_SUSPEND); |
| 151 | if (error) { | 146 | if (error) { |
| 152 | printk(KERN_ERR "PM: Some devices failed to power down\n"); | 147 | printk(KERN_ERR "PM: Some devices failed to power down\n"); |
| 153 | goto Platform_finish; | 148 | goto Platform_finish; |
| @@ -189,7 +184,7 @@ static int suspend_enter(suspend_state_t state, bool *wakeup) | |||
| 189 | if (suspend_ops->wake) | 184 | if (suspend_ops->wake) |
| 190 | suspend_ops->wake(); | 185 | suspend_ops->wake(); |
| 191 | 186 | ||
| 192 | dpm_resume_noirq(PMSG_RESUME); | 187 | dpm_resume_start(PMSG_RESUME); |
| 193 | 188 | ||
| 194 | Platform_finish: | 189 | Platform_finish: |
| 195 | if (suspend_ops->finish) | 190 | if (suspend_ops->finish) |
| @@ -199,9 +194,8 @@ static int suspend_enter(suspend_state_t state, bool *wakeup) | |||
| 199 | } | 194 | } |
| 200 | 195 | ||
| 201 | /** | 196 | /** |
| 202 | * suspend_devices_and_enter - suspend devices and enter the desired system | 197 | * suspend_devices_and_enter - Suspend devices and enter system sleep state. |
| 203 | * sleep state. | 198 | * @state: System sleep state to enter. |
| 204 | * @state: state to enter | ||
| 205 | */ | 199 | */ |
| 206 | int suspend_devices_and_enter(suspend_state_t state) | 200 | int suspend_devices_and_enter(suspend_state_t state) |
| 207 | { | 201 | { |
| @@ -251,30 +245,27 @@ int suspend_devices_and_enter(suspend_state_t state) | |||
| 251 | } | 245 | } |
| 252 | 246 | ||
| 253 | /** | 247 | /** |
| 254 | * suspend_finish - Do final work before exiting suspend sequence. | 248 | * suspend_finish - Clean up before finishing the suspend sequence. |
| 255 | * | 249 | * |
| 256 | * Call platform code to clean up, restart processes, and free the | 250 | * Call platform code to clean up, restart processes, and free the console that |
| 257 | * console that we've allocated. This is not called for suspend-to-disk. | 251 | * we've allocated. This routine is not called for hibernation. |
| 258 | */ | 252 | */ |
| 259 | static void suspend_finish(void) | 253 | static void suspend_finish(void) |
| 260 | { | 254 | { |
| 261 | suspend_thaw_processes(); | 255 | suspend_thaw_processes(); |
| 262 | usermodehelper_enable(); | ||
| 263 | pm_notifier_call_chain(PM_POST_SUSPEND); | 256 | pm_notifier_call_chain(PM_POST_SUSPEND); |
| 264 | pm_restore_console(); | 257 | pm_restore_console(); |
| 265 | } | 258 | } |
| 266 | 259 | ||
| 267 | /** | 260 | /** |
| 268 | * enter_state - Do common work of entering low-power state. | 261 | * enter_state - Do common work needed to enter system sleep state. |
| 269 | * @state: pm_state structure for state we're entering. | 262 | * @state: System sleep state to enter. |
| 270 | * | 263 | * |
| 271 | * Make sure we're the only ones trying to enter a sleep state. Fail | 264 | * Make sure that no one else is trying to put the system into a sleep state. |
| 272 | * if someone has beat us to it, since we don't want anything weird to | 265 | * Fail if that's not the case. Otherwise, prepare for system suspend, make the |
| 273 | * happen when we wake up. | 266 | * system enter the given sleep state and clean up after wakeup. |
| 274 | * Then, do the setup for suspend, enter the state, and cleaup (after | ||
| 275 | * we've woken up). | ||
| 276 | */ | 267 | */ |
| 277 | int enter_state(suspend_state_t state) | 268 | static int enter_state(suspend_state_t state) |
| 278 | { | 269 | { |
| 279 | int error; | 270 | int error; |
| 280 | 271 | ||
| @@ -310,24 +301,26 @@ int enter_state(suspend_state_t state) | |||
| 310 | } | 301 | } |
| 311 | 302 | ||
| 312 | /** | 303 | /** |
| 313 | * pm_suspend - Externally visible function for suspending system. | 304 | * pm_suspend - Externally visible function for suspending the system. |
| 314 | * @state: Enumerated value of state to enter. | 305 | * @state: System sleep state to enter. |
| 315 | * | 306 | * |
| 316 | * Determine whether or not value is within range, get state | 307 | * Check if the value of @state represents one of the supported states, |
| 317 | * structure, and enter (above). | 308 | * execute enter_state() and update system suspend statistics. |
| 318 | */ | 309 | */ |
| 319 | int pm_suspend(suspend_state_t state) | 310 | int pm_suspend(suspend_state_t state) |
| 320 | { | 311 | { |
| 321 | int ret; | 312 | int error; |
| 322 | if (state > PM_SUSPEND_ON && state < PM_SUSPEND_MAX) { | 313 | |
| 323 | ret = enter_state(state); | 314 | if (state <= PM_SUSPEND_ON || state >= PM_SUSPEND_MAX) |
| 324 | if (ret) { | 315 | return -EINVAL; |
| 325 | suspend_stats.fail++; | 316 | |
| 326 | dpm_save_failed_errno(ret); | 317 | error = enter_state(state); |
| 327 | } else | 318 | if (error) { |
| 328 | suspend_stats.success++; | 319 | suspend_stats.fail++; |
| 329 | return ret; | 320 | dpm_save_failed_errno(error); |
| 321 | } else { | ||
| 322 | suspend_stats.success++; | ||
| 330 | } | 323 | } |
| 331 | return -EINVAL; | 324 | return error; |
| 332 | } | 325 | } |
| 333 | EXPORT_SYMBOL(pm_suspend); | 326 | EXPORT_SYMBOL(pm_suspend); |
diff --git a/kernel/power/user.c b/kernel/power/user.c index 3e100075b13c..91b0fd021a95 100644 --- a/kernel/power/user.c +++ b/kernel/power/user.c | |||
| @@ -12,7 +12,6 @@ | |||
| 12 | #include <linux/suspend.h> | 12 | #include <linux/suspend.h> |
| 13 | #include <linux/syscalls.h> | 13 | #include <linux/syscalls.h> |
| 14 | #include <linux/reboot.h> | 14 | #include <linux/reboot.h> |
| 15 | #include <linux/kmod.h> | ||
| 16 | #include <linux/string.h> | 15 | #include <linux/string.h> |
| 17 | #include <linux/device.h> | 16 | #include <linux/device.h> |
| 18 | #include <linux/miscdevice.h> | 17 | #include <linux/miscdevice.h> |
| @@ -222,14 +221,8 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd, | |||
| 222 | sys_sync(); | 221 | sys_sync(); |
| 223 | printk("done.\n"); | 222 | printk("done.\n"); |
| 224 | 223 | ||
| 225 | error = usermodehelper_disable(); | ||
| 226 | if (error) | ||
| 227 | break; | ||
| 228 | |||
| 229 | error = freeze_processes(); | 224 | error = freeze_processes(); |
| 230 | if (error) | 225 | if (!error) |
| 231 | usermodehelper_enable(); | ||
| 232 | else | ||
| 233 | data->frozen = 1; | 226 | data->frozen = 1; |
| 234 | break; | 227 | break; |
| 235 | 228 | ||
| @@ -238,7 +231,6 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd, | |||
| 238 | break; | 231 | break; |
| 239 | pm_restore_gfp_mask(); | 232 | pm_restore_gfp_mask(); |
| 240 | thaw_processes(); | 233 | thaw_processes(); |
| 241 | usermodehelper_enable(); | ||
| 242 | data->frozen = 0; | 234 | data->frozen = 0; |
| 243 | break; | 235 | break; |
| 244 | 236 | ||
| @@ -249,16 +241,10 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd, | |||
| 249 | } | 241 | } |
| 250 | pm_restore_gfp_mask(); | 242 | pm_restore_gfp_mask(); |
| 251 | error = hibernation_snapshot(data->platform_support); | 243 | error = hibernation_snapshot(data->platform_support); |
| 252 | if (error) { | 244 | if (!error) { |
| 253 | thaw_kernel_threads(); | ||
| 254 | } else { | ||
| 255 | error = put_user(in_suspend, (int __user *)arg); | 245 | error = put_user(in_suspend, (int __user *)arg); |
| 256 | if (!error && !freezer_test_done) | 246 | data->ready = !freezer_test_done && !error; |
| 257 | data->ready = 1; | 247 | freezer_test_done = false; |
| 258 | if (freezer_test_done) { | ||
| 259 | freezer_test_done = false; | ||
| 260 | thaw_kernel_threads(); | ||
| 261 | } | ||
| 262 | } | 248 | } |
| 263 | break; | 249 | break; |
| 264 | 250 | ||
diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 00ab2ca5ed11..ee8d49b9c309 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c | |||
| @@ -231,26 +231,22 @@ bool ptrace_may_access(struct task_struct *task, unsigned int mode) | |||
| 231 | } | 231 | } |
| 232 | 232 | ||
| 233 | static int ptrace_attach(struct task_struct *task, long request, | 233 | static int ptrace_attach(struct task_struct *task, long request, |
| 234 | unsigned long addr, | ||
| 234 | unsigned long flags) | 235 | unsigned long flags) |
| 235 | { | 236 | { |
| 236 | bool seize = (request == PTRACE_SEIZE); | 237 | bool seize = (request == PTRACE_SEIZE); |
| 237 | int retval; | 238 | int retval; |
| 238 | 239 | ||
| 239 | /* | ||
| 240 | * SEIZE will enable new ptrace behaviors which will be implemented | ||
| 241 | * gradually. SEIZE_DEVEL is used to prevent applications | ||
| 242 | * expecting full SEIZE behaviors trapping on kernel commits which | ||
| 243 | * are still in the process of implementing them. | ||
| 244 | * | ||
| 245 | * Only test programs for new ptrace behaviors being implemented | ||
| 246 | * should set SEIZE_DEVEL. If unset, SEIZE will fail with -EIO. | ||
| 247 | * | ||
| 248 | * Once SEIZE behaviors are completely implemented, this flag and | ||
| 249 | * the following test will be removed. | ||
| 250 | */ | ||
| 251 | retval = -EIO; | 240 | retval = -EIO; |
| 252 | if (seize && !(flags & PTRACE_SEIZE_DEVEL)) | 241 | if (seize) { |
| 253 | goto out; | 242 | if (addr != 0) |
| 243 | goto out; | ||
| 244 | if (flags & ~(unsigned long)PTRACE_O_MASK) | ||
| 245 | goto out; | ||
| 246 | flags = PT_PTRACED | PT_SEIZED | (flags << PT_OPT_FLAG_SHIFT); | ||
| 247 | } else { | ||
| 248 | flags = PT_PTRACED; | ||
| 249 | } | ||
| 254 | 250 | ||
| 255 | audit_ptrace(task); | 251 | audit_ptrace(task); |
| 256 | 252 | ||
| @@ -262,7 +258,7 @@ static int ptrace_attach(struct task_struct *task, long request, | |||
| 262 | 258 | ||
| 263 | /* | 259 | /* |
| 264 | * Protect exec's credential calculations against our interference; | 260 | * Protect exec's credential calculations against our interference; |
| 265 | * interference; SUID, SGID and LSM creds get determined differently | 261 | * SUID, SGID and LSM creds get determined differently |
| 266 | * under ptrace. | 262 | * under ptrace. |
| 267 | */ | 263 | */ |
| 268 | retval = -ERESTARTNOINTR; | 264 | retval = -ERESTARTNOINTR; |
| @@ -282,11 +278,11 @@ static int ptrace_attach(struct task_struct *task, long request, | |||
| 282 | if (task->ptrace) | 278 | if (task->ptrace) |
| 283 | goto unlock_tasklist; | 279 | goto unlock_tasklist; |
| 284 | 280 | ||
| 285 | task->ptrace = PT_PTRACED; | ||
| 286 | if (seize) | 281 | if (seize) |
| 287 | task->ptrace |= PT_SEIZED; | 282 | flags |= PT_SEIZED; |
| 288 | if (ns_capable(task_user_ns(task), CAP_SYS_PTRACE)) | 283 | if (ns_capable(task_user_ns(task), CAP_SYS_PTRACE)) |
| 289 | task->ptrace |= PT_PTRACE_CAP; | 284 | flags |= PT_PTRACE_CAP; |
| 285 | task->ptrace = flags; | ||
| 290 | 286 | ||
| 291 | __ptrace_link(task, current); | 287 | __ptrace_link(task, current); |
| 292 | 288 | ||
| @@ -528,30 +524,18 @@ int ptrace_writedata(struct task_struct *tsk, char __user *src, unsigned long ds | |||
| 528 | 524 | ||
| 529 | static int ptrace_setoptions(struct task_struct *child, unsigned long data) | 525 | static int ptrace_setoptions(struct task_struct *child, unsigned long data) |
| 530 | { | 526 | { |
| 531 | child->ptrace &= ~PT_TRACE_MASK; | 527 | unsigned flags; |
| 532 | 528 | ||
| 533 | if (data & PTRACE_O_TRACESYSGOOD) | 529 | if (data & ~(unsigned long)PTRACE_O_MASK) |
| 534 | child->ptrace |= PT_TRACESYSGOOD; | 530 | return -EINVAL; |
| 535 | |||
| 536 | if (data & PTRACE_O_TRACEFORK) | ||
| 537 | child->ptrace |= PT_TRACE_FORK; | ||
| 538 | |||
| 539 | if (data & PTRACE_O_TRACEVFORK) | ||
| 540 | child->ptrace |= PT_TRACE_VFORK; | ||
| 541 | |||
| 542 | if (data & PTRACE_O_TRACECLONE) | ||
| 543 | child->ptrace |= PT_TRACE_CLONE; | ||
| 544 | |||
| 545 | if (data & PTRACE_O_TRACEEXEC) | ||
| 546 | child->ptrace |= PT_TRACE_EXEC; | ||
| 547 | |||
| 548 | if (data & PTRACE_O_TRACEVFORKDONE) | ||
| 549 | child->ptrace |= PT_TRACE_VFORK_DONE; | ||
| 550 | 531 | ||
| 551 | if (data & PTRACE_O_TRACEEXIT) | 532 | /* Avoid intermediate state when all opts are cleared */ |
| 552 | child->ptrace |= PT_TRACE_EXIT; | 533 | flags = child->ptrace; |
| 534 | flags &= ~(PTRACE_O_MASK << PT_OPT_FLAG_SHIFT); | ||
| 535 | flags |= (data << PT_OPT_FLAG_SHIFT); | ||
| 536 | child->ptrace = flags; | ||
| 553 | 537 | ||
| 554 | return (data & ~PTRACE_O_MASK) ? -EINVAL : 0; | 538 | return 0; |
| 555 | } | 539 | } |
| 556 | 540 | ||
| 557 | static int ptrace_getsiginfo(struct task_struct *child, siginfo_t *info) | 541 | static int ptrace_getsiginfo(struct task_struct *child, siginfo_t *info) |
| @@ -891,7 +875,7 @@ SYSCALL_DEFINE4(ptrace, long, request, long, pid, unsigned long, addr, | |||
| 891 | } | 875 | } |
| 892 | 876 | ||
| 893 | if (request == PTRACE_ATTACH || request == PTRACE_SEIZE) { | 877 | if (request == PTRACE_ATTACH || request == PTRACE_SEIZE) { |
| 894 | ret = ptrace_attach(child, request, data); | 878 | ret = ptrace_attach(child, request, addr, data); |
| 895 | /* | 879 | /* |
| 896 | * Some architectures need to do book-keeping after | 880 | * Some architectures need to do book-keeping after |
| 897 | * a ptrace attach. | 881 | * a ptrace attach. |
| @@ -1034,7 +1018,7 @@ asmlinkage long compat_sys_ptrace(compat_long_t request, compat_long_t pid, | |||
| 1034 | } | 1018 | } |
| 1035 | 1019 | ||
| 1036 | if (request == PTRACE_ATTACH || request == PTRACE_SEIZE) { | 1020 | if (request == PTRACE_ATTACH || request == PTRACE_SEIZE) { |
| 1037 | ret = ptrace_attach(child, request, data); | 1021 | ret = ptrace_attach(child, request, addr, data); |
| 1038 | /* | 1022 | /* |
| 1039 | * Some architectures need to do book-keeping after | 1023 | * Some architectures need to do book-keeping after |
| 1040 | * a ptrace attach. | 1024 | * a ptrace attach. |
diff --git a/kernel/resource.c b/kernel/resource.c index 7640b3a947d0..7e8ea66a8c01 100644 --- a/kernel/resource.c +++ b/kernel/resource.c | |||
| @@ -749,6 +749,7 @@ int adjust_resource(struct resource *res, resource_size_t start, resource_size_t | |||
| 749 | write_unlock(&resource_lock); | 749 | write_unlock(&resource_lock); |
| 750 | return result; | 750 | return result; |
| 751 | } | 751 | } |
| 752 | EXPORT_SYMBOL(adjust_resource); | ||
| 752 | 753 | ||
| 753 | static void __init __reserve_region_with_split(struct resource *root, | 754 | static void __init __reserve_region_with_split(struct resource *root, |
| 754 | resource_size_t start, resource_size_t end, | 755 | resource_size_t start, resource_size_t end, |
| @@ -792,8 +793,6 @@ void __init reserve_region_with_split(struct resource *root, | |||
| 792 | write_unlock(&resource_lock); | 793 | write_unlock(&resource_lock); |
| 793 | } | 794 | } |
| 794 | 795 | ||
| 795 | EXPORT_SYMBOL(adjust_resource); | ||
| 796 | |||
| 797 | /** | 796 | /** |
| 798 | * resource_alignment - calculate resource's alignment | 797 | * resource_alignment - calculate resource's alignment |
| 799 | * @res: resource pointer | 798 | * @res: resource pointer |
diff --git a/kernel/rwsem.c b/kernel/rwsem.c index b152f74f02de..6850f53e02d8 100644 --- a/kernel/rwsem.c +++ b/kernel/rwsem.c | |||
| @@ -10,7 +10,6 @@ | |||
| 10 | #include <linux/export.h> | 10 | #include <linux/export.h> |
| 11 | #include <linux/rwsem.h> | 11 | #include <linux/rwsem.h> |
| 12 | 12 | ||
| 13 | #include <asm/system.h> | ||
| 14 | #include <linux/atomic.h> | 13 | #include <linux/atomic.h> |
| 15 | 14 | ||
| 16 | /* | 15 | /* |
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index a35cb8dbd8c4..4603b9d8f30a 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c | |||
| @@ -71,7 +71,9 @@ | |||
| 71 | #include <linux/ftrace.h> | 71 | #include <linux/ftrace.h> |
| 72 | #include <linux/slab.h> | 72 | #include <linux/slab.h> |
| 73 | #include <linux/init_task.h> | 73 | #include <linux/init_task.h> |
| 74 | #include <linux/binfmts.h> | ||
| 74 | 75 | ||
| 76 | #include <asm/switch_to.h> | ||
| 75 | #include <asm/tlb.h> | 77 | #include <asm/tlb.h> |
| 76 | #include <asm/irq_regs.h> | 78 | #include <asm/irq_regs.h> |
| 77 | #include <asm/mutex.h> | 79 | #include <asm/mutex.h> |
| @@ -1263,29 +1265,59 @@ EXPORT_SYMBOL_GPL(kick_process); | |||
| 1263 | */ | 1265 | */ |
| 1264 | static int select_fallback_rq(int cpu, struct task_struct *p) | 1266 | static int select_fallback_rq(int cpu, struct task_struct *p) |
| 1265 | { | 1267 | { |
| 1266 | int dest_cpu; | ||
| 1267 | const struct cpumask *nodemask = cpumask_of_node(cpu_to_node(cpu)); | 1268 | const struct cpumask *nodemask = cpumask_of_node(cpu_to_node(cpu)); |
| 1269 | enum { cpuset, possible, fail } state = cpuset; | ||
| 1270 | int dest_cpu; | ||
| 1268 | 1271 | ||
| 1269 | /* Look for allowed, online CPU in same node. */ | 1272 | /* Look for allowed, online CPU in same node. */ |
| 1270 | for_each_cpu_and(dest_cpu, nodemask, cpu_active_mask) | 1273 | for_each_cpu(dest_cpu, nodemask) { |
| 1274 | if (!cpu_online(dest_cpu)) | ||
| 1275 | continue; | ||
| 1276 | if (!cpu_active(dest_cpu)) | ||
| 1277 | continue; | ||
| 1271 | if (cpumask_test_cpu(dest_cpu, tsk_cpus_allowed(p))) | 1278 | if (cpumask_test_cpu(dest_cpu, tsk_cpus_allowed(p))) |
| 1272 | return dest_cpu; | 1279 | return dest_cpu; |
| 1280 | } | ||
| 1281 | |||
| 1282 | for (;;) { | ||
| 1283 | /* Any allowed, online CPU? */ | ||
| 1284 | for_each_cpu(dest_cpu, tsk_cpus_allowed(p)) { | ||
| 1285 | if (!cpu_online(dest_cpu)) | ||
| 1286 | continue; | ||
| 1287 | if (!cpu_active(dest_cpu)) | ||
| 1288 | continue; | ||
| 1289 | goto out; | ||
| 1290 | } | ||
| 1273 | 1291 | ||
| 1274 | /* Any allowed, online CPU? */ | 1292 | switch (state) { |
| 1275 | dest_cpu = cpumask_any_and(tsk_cpus_allowed(p), cpu_active_mask); | 1293 | case cpuset: |
| 1276 | if (dest_cpu < nr_cpu_ids) | 1294 | /* No more Mr. Nice Guy. */ |
| 1277 | return dest_cpu; | 1295 | cpuset_cpus_allowed_fallback(p); |
| 1296 | state = possible; | ||
| 1297 | break; | ||
| 1278 | 1298 | ||
| 1279 | /* No more Mr. Nice Guy. */ | 1299 | case possible: |
| 1280 | dest_cpu = cpuset_cpus_allowed_fallback(p); | 1300 | do_set_cpus_allowed(p, cpu_possible_mask); |
| 1281 | /* | 1301 | state = fail; |
| 1282 | * Don't tell them about moving exiting tasks or | 1302 | break; |
| 1283 | * kernel threads (both mm NULL), since they never | 1303 | |
| 1284 | * leave kernel. | 1304 | case fail: |
| 1285 | */ | 1305 | BUG(); |
| 1286 | if (p->mm && printk_ratelimit()) { | 1306 | break; |
| 1287 | printk_sched("process %d (%s) no longer affine to cpu%d\n", | 1307 | } |
| 1288 | task_pid_nr(p), p->comm, cpu); | 1308 | } |
| 1309 | |||
| 1310 | out: | ||
| 1311 | if (state != cpuset) { | ||
| 1312 | /* | ||
| 1313 | * Don't tell them about moving exiting tasks or | ||
| 1314 | * kernel threads (both mm NULL), since they never | ||
| 1315 | * leave kernel. | ||
| 1316 | */ | ||
| 1317 | if (p->mm && printk_ratelimit()) { | ||
| 1318 | printk_sched("process %d (%s) no longer affine to cpu%d\n", | ||
| 1319 | task_pid_nr(p), p->comm, cpu); | ||
| 1320 | } | ||
| 1289 | } | 1321 | } |
| 1290 | 1322 | ||
| 1291 | return dest_cpu; | 1323 | return dest_cpu; |
| @@ -1932,6 +1964,7 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev) | |||
| 1932 | local_irq_enable(); | 1964 | local_irq_enable(); |
| 1933 | #endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */ | 1965 | #endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */ |
| 1934 | finish_lock_switch(rq, prev); | 1966 | finish_lock_switch(rq, prev); |
| 1967 | finish_arch_post_lock_switch(); | ||
| 1935 | 1968 | ||
| 1936 | fire_sched_in_preempt_notifiers(current); | 1969 | fire_sched_in_preempt_notifiers(current); |
| 1937 | if (mm) | 1970 | if (mm) |
| @@ -3069,8 +3102,6 @@ EXPORT_SYMBOL(sub_preempt_count); | |||
| 3069 | */ | 3102 | */ |
| 3070 | static noinline void __schedule_bug(struct task_struct *prev) | 3103 | static noinline void __schedule_bug(struct task_struct *prev) |
| 3071 | { | 3104 | { |
| 3072 | struct pt_regs *regs = get_irq_regs(); | ||
| 3073 | |||
| 3074 | if (oops_in_progress) | 3105 | if (oops_in_progress) |
| 3075 | return; | 3106 | return; |
| 3076 | 3107 | ||
| @@ -3081,11 +3112,7 @@ static noinline void __schedule_bug(struct task_struct *prev) | |||
| 3081 | print_modules(); | 3112 | print_modules(); |
| 3082 | if (irqs_disabled()) | 3113 | if (irqs_disabled()) |
| 3083 | print_irqtrace_events(prev); | 3114 | print_irqtrace_events(prev); |
| 3084 | 3115 | dump_stack(); | |
| 3085 | if (regs) | ||
| 3086 | show_regs(regs); | ||
| 3087 | else | ||
| 3088 | dump_stack(); | ||
| 3089 | } | 3116 | } |
| 3090 | 3117 | ||
| 3091 | /* | 3118 | /* |
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 94340c7544a9..0d97ebdc58f0 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c | |||
| @@ -416,8 +416,8 @@ find_matching_se(struct sched_entity **se, struct sched_entity **pse) | |||
| 416 | 416 | ||
| 417 | #endif /* CONFIG_FAIR_GROUP_SCHED */ | 417 | #endif /* CONFIG_FAIR_GROUP_SCHED */ |
| 418 | 418 | ||
| 419 | static void account_cfs_rq_runtime(struct cfs_rq *cfs_rq, | 419 | static __always_inline |
| 420 | unsigned long delta_exec); | 420 | void account_cfs_rq_runtime(struct cfs_rq *cfs_rq, unsigned long delta_exec); |
| 421 | 421 | ||
| 422 | /************************************************************** | 422 | /************************************************************** |
| 423 | * Scheduling class tree data structure manipulation methods: | 423 | * Scheduling class tree data structure manipulation methods: |
| @@ -1162,7 +1162,7 @@ static void clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se) | |||
| 1162 | __clear_buddies_skip(se); | 1162 | __clear_buddies_skip(se); |
| 1163 | } | 1163 | } |
| 1164 | 1164 | ||
| 1165 | static void return_cfs_rq_runtime(struct cfs_rq *cfs_rq); | 1165 | static __always_inline void return_cfs_rq_runtime(struct cfs_rq *cfs_rq); |
| 1166 | 1166 | ||
| 1167 | static void | 1167 | static void |
| 1168 | dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) | 1168 | dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) |
| @@ -1546,8 +1546,8 @@ static void __account_cfs_rq_runtime(struct cfs_rq *cfs_rq, | |||
| 1546 | resched_task(rq_of(cfs_rq)->curr); | 1546 | resched_task(rq_of(cfs_rq)->curr); |
| 1547 | } | 1547 | } |
| 1548 | 1548 | ||
| 1549 | static __always_inline void account_cfs_rq_runtime(struct cfs_rq *cfs_rq, | 1549 | static __always_inline |
| 1550 | unsigned long delta_exec) | 1550 | void account_cfs_rq_runtime(struct cfs_rq *cfs_rq, unsigned long delta_exec) |
| 1551 | { | 1551 | { |
| 1552 | if (!cfs_bandwidth_used() || !cfs_rq->runtime_enabled) | 1552 | if (!cfs_bandwidth_used() || !cfs_rq->runtime_enabled) |
| 1553 | return; | 1553 | return; |
| @@ -2073,11 +2073,11 @@ void unthrottle_offline_cfs_rqs(struct rq *rq) | |||
| 2073 | } | 2073 | } |
| 2074 | 2074 | ||
| 2075 | #else /* CONFIG_CFS_BANDWIDTH */ | 2075 | #else /* CONFIG_CFS_BANDWIDTH */ |
| 2076 | static void account_cfs_rq_runtime(struct cfs_rq *cfs_rq, | 2076 | static __always_inline |
| 2077 | unsigned long delta_exec) {} | 2077 | void account_cfs_rq_runtime(struct cfs_rq *cfs_rq, unsigned long delta_exec) {} |
| 2078 | static void check_cfs_rq_runtime(struct cfs_rq *cfs_rq) {} | 2078 | static void check_cfs_rq_runtime(struct cfs_rq *cfs_rq) {} |
| 2079 | static void check_enqueue_throttle(struct cfs_rq *cfs_rq) {} | 2079 | static void check_enqueue_throttle(struct cfs_rq *cfs_rq) {} |
| 2080 | static void return_cfs_rq_runtime(struct cfs_rq *cfs_rq) {} | 2080 | static __always_inline void return_cfs_rq_runtime(struct cfs_rq *cfs_rq) {} |
| 2081 | 2081 | ||
| 2082 | static inline int cfs_rq_throttled(struct cfs_rq *cfs_rq) | 2082 | static inline int cfs_rq_throttled(struct cfs_rq *cfs_rq) |
| 2083 | { | 2083 | { |
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index b60dad720173..44af55e6d5d0 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c | |||
| @@ -1428,7 +1428,7 @@ static struct task_struct *pick_next_highest_task_rt(struct rq *rq, int cpu) | |||
| 1428 | next_idx: | 1428 | next_idx: |
| 1429 | if (idx >= MAX_RT_PRIO) | 1429 | if (idx >= MAX_RT_PRIO) |
| 1430 | continue; | 1430 | continue; |
| 1431 | if (next && next->prio < idx) | 1431 | if (next && next->prio <= idx) |
| 1432 | continue; | 1432 | continue; |
| 1433 | list_for_each_entry(rt_se, array->queue + idx, run_list) { | 1433 | list_for_each_entry(rt_se, array->queue + idx, run_list) { |
| 1434 | struct task_struct *p; | 1434 | struct task_struct *p; |
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 42b1f304b044..fb3acba4d52e 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h | |||
| @@ -681,6 +681,9 @@ static inline int task_running(struct rq *rq, struct task_struct *p) | |||
| 681 | #ifndef finish_arch_switch | 681 | #ifndef finish_arch_switch |
| 682 | # define finish_arch_switch(prev) do { } while (0) | 682 | # define finish_arch_switch(prev) do { } while (0) |
| 683 | #endif | 683 | #endif |
| 684 | #ifndef finish_arch_post_lock_switch | ||
| 685 | # define finish_arch_post_lock_switch() do { } while (0) | ||
| 686 | #endif | ||
| 684 | 687 | ||
| 685 | #ifndef __ARCH_WANT_UNLOCKED_CTXSW | 688 | #ifndef __ARCH_WANT_UNLOCKED_CTXSW |
| 686 | static inline void prepare_lock_switch(struct rq *rq, struct task_struct *next) | 689 | static inline void prepare_lock_switch(struct rq *rq, struct task_struct *next) |
diff --git a/kernel/signal.c b/kernel/signal.c index e76001ccf5cd..17afcaf582d0 100644 --- a/kernel/signal.c +++ b/kernel/signal.c | |||
| @@ -36,6 +36,7 @@ | |||
| 36 | #include <asm/uaccess.h> | 36 | #include <asm/uaccess.h> |
| 37 | #include <asm/unistd.h> | 37 | #include <asm/unistd.h> |
| 38 | #include <asm/siginfo.h> | 38 | #include <asm/siginfo.h> |
| 39 | #include <asm/cacheflush.h> | ||
| 39 | #include "audit.h" /* audit_signal_info() */ | 40 | #include "audit.h" /* audit_signal_info() */ |
| 40 | 41 | ||
| 41 | /* | 42 | /* |
| @@ -58,21 +59,20 @@ static int sig_handler_ignored(void __user *handler, int sig) | |||
| 58 | (handler == SIG_DFL && sig_kernel_ignore(sig)); | 59 | (handler == SIG_DFL && sig_kernel_ignore(sig)); |
| 59 | } | 60 | } |
| 60 | 61 | ||
| 61 | static int sig_task_ignored(struct task_struct *t, int sig, | 62 | static int sig_task_ignored(struct task_struct *t, int sig, bool force) |
| 62 | int from_ancestor_ns) | ||
| 63 | { | 63 | { |
| 64 | void __user *handler; | 64 | void __user *handler; |
| 65 | 65 | ||
| 66 | handler = sig_handler(t, sig); | 66 | handler = sig_handler(t, sig); |
| 67 | 67 | ||
| 68 | if (unlikely(t->signal->flags & SIGNAL_UNKILLABLE) && | 68 | if (unlikely(t->signal->flags & SIGNAL_UNKILLABLE) && |
| 69 | handler == SIG_DFL && !from_ancestor_ns) | 69 | handler == SIG_DFL && !force) |
| 70 | return 1; | 70 | return 1; |
| 71 | 71 | ||
| 72 | return sig_handler_ignored(handler, sig); | 72 | return sig_handler_ignored(handler, sig); |
| 73 | } | 73 | } |
| 74 | 74 | ||
| 75 | static int sig_ignored(struct task_struct *t, int sig, int from_ancestor_ns) | 75 | static int sig_ignored(struct task_struct *t, int sig, bool force) |
| 76 | { | 76 | { |
| 77 | /* | 77 | /* |
| 78 | * Blocked signals are never ignored, since the | 78 | * Blocked signals are never ignored, since the |
| @@ -82,7 +82,7 @@ static int sig_ignored(struct task_struct *t, int sig, int from_ancestor_ns) | |||
| 82 | if (sigismember(&t->blocked, sig) || sigismember(&t->real_blocked, sig)) | 82 | if (sigismember(&t->blocked, sig) || sigismember(&t->real_blocked, sig)) |
| 83 | return 0; | 83 | return 0; |
| 84 | 84 | ||
| 85 | if (!sig_task_ignored(t, sig, from_ancestor_ns)) | 85 | if (!sig_task_ignored(t, sig, force)) |
| 86 | return 0; | 86 | return 0; |
| 87 | 87 | ||
| 88 | /* | 88 | /* |
| @@ -855,7 +855,7 @@ static void ptrace_trap_notify(struct task_struct *t) | |||
| 855 | * Returns true if the signal should be actually delivered, otherwise | 855 | * Returns true if the signal should be actually delivered, otherwise |
| 856 | * it should be dropped. | 856 | * it should be dropped. |
| 857 | */ | 857 | */ |
| 858 | static int prepare_signal(int sig, struct task_struct *p, int from_ancestor_ns) | 858 | static int prepare_signal(int sig, struct task_struct *p, bool force) |
| 859 | { | 859 | { |
| 860 | struct signal_struct *signal = p->signal; | 860 | struct signal_struct *signal = p->signal; |
| 861 | struct task_struct *t; | 861 | struct task_struct *t; |
| @@ -915,7 +915,7 @@ static int prepare_signal(int sig, struct task_struct *p, int from_ancestor_ns) | |||
| 915 | } | 915 | } |
| 916 | } | 916 | } |
| 917 | 917 | ||
| 918 | return !sig_ignored(p, sig, from_ancestor_ns); | 918 | return !sig_ignored(p, sig, force); |
| 919 | } | 919 | } |
| 920 | 920 | ||
| 921 | /* | 921 | /* |
| @@ -1059,7 +1059,8 @@ static int __send_signal(int sig, struct siginfo *info, struct task_struct *t, | |||
| 1059 | assert_spin_locked(&t->sighand->siglock); | 1059 | assert_spin_locked(&t->sighand->siglock); |
| 1060 | 1060 | ||
| 1061 | result = TRACE_SIGNAL_IGNORED; | 1061 | result = TRACE_SIGNAL_IGNORED; |
| 1062 | if (!prepare_signal(sig, t, from_ancestor_ns)) | 1062 | if (!prepare_signal(sig, t, |
| 1063 | from_ancestor_ns || (info == SEND_SIG_FORCED))) | ||
| 1063 | goto ret; | 1064 | goto ret; |
| 1064 | 1065 | ||
| 1065 | pending = group ? &t->signal->shared_pending : &t->pending; | 1066 | pending = group ? &t->signal->shared_pending : &t->pending; |
| @@ -1601,7 +1602,7 @@ int send_sigqueue(struct sigqueue *q, struct task_struct *t, int group) | |||
| 1601 | 1602 | ||
| 1602 | ret = 1; /* the signal is ignored */ | 1603 | ret = 1; /* the signal is ignored */ |
| 1603 | result = TRACE_SIGNAL_IGNORED; | 1604 | result = TRACE_SIGNAL_IGNORED; |
| 1604 | if (!prepare_signal(sig, t, 0)) | 1605 | if (!prepare_signal(sig, t, false)) |
| 1605 | goto out; | 1606 | goto out; |
| 1606 | 1607 | ||
| 1607 | ret = 0; | 1608 | ret = 0; |
diff --git a/kernel/smp.c b/kernel/smp.c index db197d60489b..2f8b10ecf759 100644 --- a/kernel/smp.c +++ b/kernel/smp.c | |||
| @@ -701,3 +701,93 @@ int on_each_cpu(void (*func) (void *info), void *info, int wait) | |||
| 701 | return ret; | 701 | return ret; |
| 702 | } | 702 | } |
| 703 | EXPORT_SYMBOL(on_each_cpu); | 703 | EXPORT_SYMBOL(on_each_cpu); |
| 704 | |||
| 705 | /** | ||
| 706 | * on_each_cpu_mask(): Run a function on processors specified by | ||
| 707 | * cpumask, which may include the local processor. | ||
| 708 | * @mask: The set of cpus to run on (only runs on online subset). | ||
| 709 | * @func: The function to run. This must be fast and non-blocking. | ||
| 710 | * @info: An arbitrary pointer to pass to the function. | ||
| 711 | * @wait: If true, wait (atomically) until function has completed | ||
| 712 | * on other CPUs. | ||
| 713 | * | ||
| 714 | * If @wait is true, then returns once @func has returned. | ||
| 715 | * | ||
| 716 | * You must not call this function with disabled interrupts or | ||
| 717 | * from a hardware interrupt handler or from a bottom half handler. | ||
| 718 | */ | ||
| 719 | void on_each_cpu_mask(const struct cpumask *mask, smp_call_func_t func, | ||
| 720 | void *info, bool wait) | ||
| 721 | { | ||
| 722 | int cpu = get_cpu(); | ||
| 723 | |||
| 724 | smp_call_function_many(mask, func, info, wait); | ||
| 725 | if (cpumask_test_cpu(cpu, mask)) { | ||
| 726 | local_irq_disable(); | ||
| 727 | func(info); | ||
| 728 | local_irq_enable(); | ||
| 729 | } | ||
| 730 | put_cpu(); | ||
| 731 | } | ||
| 732 | EXPORT_SYMBOL(on_each_cpu_mask); | ||
| 733 | |||
| 734 | /* | ||
| 735 | * on_each_cpu_cond(): Call a function on each processor for which | ||
| 736 | * the supplied function cond_func returns true, optionally waiting | ||
| 737 | * for all the required CPUs to finish. This may include the local | ||
| 738 | * processor. | ||
| 739 | * @cond_func: A callback function that is passed a cpu id and | ||
| 740 | * the the info parameter. The function is called | ||
| 741 | * with preemption disabled. The function should | ||
| 742 | * return a blooean value indicating whether to IPI | ||
| 743 | * the specified CPU. | ||
| 744 | * @func: The function to run on all applicable CPUs. | ||
| 745 | * This must be fast and non-blocking. | ||
| 746 | * @info: An arbitrary pointer to pass to both functions. | ||
| 747 | * @wait: If true, wait (atomically) until function has | ||
| 748 | * completed on other CPUs. | ||
| 749 | * @gfp_flags: GFP flags to use when allocating the cpumask | ||
| 750 | * used internally by the function. | ||
| 751 | * | ||
| 752 | * The function might sleep if the GFP flags indicates a non | ||
| 753 | * atomic allocation is allowed. | ||
| 754 | * | ||
| 755 | * Preemption is disabled to protect against CPUs going offline but not online. | ||
| 756 | * CPUs going online during the call will not be seen or sent an IPI. | ||
| 757 | * | ||
| 758 | * You must not call this function with disabled interrupts or | ||
| 759 | * from a hardware interrupt handler or from a bottom half handler. | ||
| 760 | */ | ||
| 761 | void on_each_cpu_cond(bool (*cond_func)(int cpu, void *info), | ||
| 762 | smp_call_func_t func, void *info, bool wait, | ||
| 763 | gfp_t gfp_flags) | ||
| 764 | { | ||
| 765 | cpumask_var_t cpus; | ||
| 766 | int cpu, ret; | ||
| 767 | |||
| 768 | might_sleep_if(gfp_flags & __GFP_WAIT); | ||
| 769 | |||
| 770 | if (likely(zalloc_cpumask_var(&cpus, (gfp_flags|__GFP_NOWARN)))) { | ||
| 771 | preempt_disable(); | ||
| 772 | for_each_online_cpu(cpu) | ||
| 773 | if (cond_func(cpu, info)) | ||
| 774 | cpumask_set_cpu(cpu, cpus); | ||
| 775 | on_each_cpu_mask(cpus, func, info, wait); | ||
| 776 | preempt_enable(); | ||
| 777 | free_cpumask_var(cpus); | ||
| 778 | } else { | ||
| 779 | /* | ||
| 780 | * No free cpumask, bother. No matter, we'll | ||
| 781 | * just have to IPI them one by one. | ||
| 782 | */ | ||
| 783 | preempt_disable(); | ||
| 784 | for_each_online_cpu(cpu) | ||
| 785 | if (cond_func(cpu, info)) { | ||
| 786 | ret = smp_call_function_single(cpu, func, | ||
| 787 | info, wait); | ||
| 788 | WARN_ON_ONCE(!ret); | ||
| 789 | } | ||
| 790 | preempt_enable(); | ||
| 791 | } | ||
| 792 | } | ||
| 793 | EXPORT_SYMBOL(on_each_cpu_cond); | ||
diff --git a/kernel/spinlock.c b/kernel/spinlock.c index 84c7d96918bf..5cdd8065a3ce 100644 --- a/kernel/spinlock.c +++ b/kernel/spinlock.c | |||
| @@ -163,7 +163,7 @@ void __lockfunc _raw_spin_lock_bh(raw_spinlock_t *lock) | |||
| 163 | EXPORT_SYMBOL(_raw_spin_lock_bh); | 163 | EXPORT_SYMBOL(_raw_spin_lock_bh); |
| 164 | #endif | 164 | #endif |
| 165 | 165 | ||
| 166 | #ifndef CONFIG_INLINE_SPIN_UNLOCK | 166 | #ifdef CONFIG_UNINLINE_SPIN_UNLOCK |
| 167 | void __lockfunc _raw_spin_unlock(raw_spinlock_t *lock) | 167 | void __lockfunc _raw_spin_unlock(raw_spinlock_t *lock) |
| 168 | { | 168 | { |
| 169 | __raw_spin_unlock(lock); | 169 | __raw_spin_unlock(lock); |
diff --git a/kernel/sys.c b/kernel/sys.c index 888d227fd195..e7006eb6c1e4 100644 --- a/kernel/sys.c +++ b/kernel/sys.c | |||
| @@ -444,6 +444,15 @@ SYSCALL_DEFINE4(reboot, int, magic1, int, magic2, unsigned int, cmd, | |||
| 444 | magic2 != LINUX_REBOOT_MAGIC2C)) | 444 | magic2 != LINUX_REBOOT_MAGIC2C)) |
| 445 | return -EINVAL; | 445 | return -EINVAL; |
| 446 | 446 | ||
| 447 | /* | ||
| 448 | * If pid namespaces are enabled and the current task is in a child | ||
| 449 | * pid_namespace, the command is handled by reboot_pid_ns() which will | ||
| 450 | * call do_exit(). | ||
| 451 | */ | ||
| 452 | ret = reboot_pid_ns(task_active_pid_ns(current), cmd); | ||
| 453 | if (ret) | ||
| 454 | return ret; | ||
| 455 | |||
| 447 | /* Instead of trying to make the power_off code look like | 456 | /* Instead of trying to make the power_off code look like |
| 448 | * halt when pm_power_off is not set do it the easy way. | 457 | * halt when pm_power_off is not set do it the easy way. |
| 449 | */ | 458 | */ |
| @@ -1962,6 +1971,14 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, | |||
| 1962 | case PR_SET_MM: | 1971 | case PR_SET_MM: |
| 1963 | error = prctl_set_mm(arg2, arg3, arg4, arg5); | 1972 | error = prctl_set_mm(arg2, arg3, arg4, arg5); |
| 1964 | break; | 1973 | break; |
| 1974 | case PR_SET_CHILD_SUBREAPER: | ||
| 1975 | me->signal->is_child_subreaper = !!arg2; | ||
| 1976 | error = 0; | ||
| 1977 | break; | ||
| 1978 | case PR_GET_CHILD_SUBREAPER: | ||
| 1979 | error = put_user(me->signal->is_child_subreaper, | ||
| 1980 | (int __user *) arg2); | ||
| 1981 | break; | ||
| 1965 | default: | 1982 | default: |
| 1966 | error = -EINVAL; | 1983 | error = -EINVAL; |
| 1967 | break; | 1984 | break; |
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index f487f257e05e..4ab11879aeb4 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
| @@ -23,6 +23,7 @@ | |||
| 23 | #include <linux/swap.h> | 23 | #include <linux/swap.h> |
| 24 | #include <linux/slab.h> | 24 | #include <linux/slab.h> |
| 25 | #include <linux/sysctl.h> | 25 | #include <linux/sysctl.h> |
| 26 | #include <linux/bitmap.h> | ||
| 26 | #include <linux/signal.h> | 27 | #include <linux/signal.h> |
| 27 | #include <linux/printk.h> | 28 | #include <linux/printk.h> |
| 28 | #include <linux/proc_fs.h> | 29 | #include <linux/proc_fs.h> |
| @@ -58,6 +59,7 @@ | |||
| 58 | #include <linux/oom.h> | 59 | #include <linux/oom.h> |
| 59 | #include <linux/kmod.h> | 60 | #include <linux/kmod.h> |
| 60 | #include <linux/capability.h> | 61 | #include <linux/capability.h> |
| 62 | #include <linux/binfmts.h> | ||
| 61 | 63 | ||
| 62 | #include <asm/uaccess.h> | 64 | #include <asm/uaccess.h> |
| 63 | #include <asm/processor.h> | 65 | #include <asm/processor.h> |
| @@ -67,6 +69,9 @@ | |||
| 67 | #include <asm/stacktrace.h> | 69 | #include <asm/stacktrace.h> |
| 68 | #include <asm/io.h> | 70 | #include <asm/io.h> |
| 69 | #endif | 71 | #endif |
| 72 | #ifdef CONFIG_SPARC | ||
| 73 | #include <asm/setup.h> | ||
| 74 | #endif | ||
| 70 | #ifdef CONFIG_BSD_PROCESS_ACCT | 75 | #ifdef CONFIG_BSD_PROCESS_ACCT |
| 71 | #include <linux/acct.h> | 76 | #include <linux/acct.h> |
| 72 | #endif | 77 | #endif |
| @@ -141,7 +146,6 @@ static const int cap_last_cap = CAP_LAST_CAP; | |||
| 141 | #include <linux/inotify.h> | 146 | #include <linux/inotify.h> |
| 142 | #endif | 147 | #endif |
| 143 | #ifdef CONFIG_SPARC | 148 | #ifdef CONFIG_SPARC |
| 144 | #include <asm/system.h> | ||
| 145 | #endif | 149 | #endif |
| 146 | 150 | ||
| 147 | #ifdef CONFIG_SPARC64 | 151 | #ifdef CONFIG_SPARC64 |
| @@ -166,7 +170,7 @@ static int proc_taint(struct ctl_table *table, int write, | |||
| 166 | #endif | 170 | #endif |
| 167 | 171 | ||
| 168 | #ifdef CONFIG_PRINTK | 172 | #ifdef CONFIG_PRINTK |
| 169 | static int proc_dmesg_restrict(struct ctl_table *table, int write, | 173 | static int proc_dointvec_minmax_sysadmin(struct ctl_table *table, int write, |
| 170 | void __user *buffer, size_t *lenp, loff_t *ppos); | 174 | void __user *buffer, size_t *lenp, loff_t *ppos); |
| 171 | #endif | 175 | #endif |
| 172 | 176 | ||
| @@ -192,20 +196,6 @@ static int sysrq_sysctl_handler(ctl_table *table, int write, | |||
| 192 | 196 | ||
| 193 | #endif | 197 | #endif |
| 194 | 198 | ||
| 195 | static struct ctl_table root_table[]; | ||
| 196 | static struct ctl_table_root sysctl_table_root; | ||
| 197 | static struct ctl_table_header root_table_header = { | ||
| 198 | {{.count = 1, | ||
| 199 | .ctl_table = root_table, | ||
| 200 | .ctl_entry = LIST_HEAD_INIT(sysctl_table_root.default_set.list),}}, | ||
| 201 | .root = &sysctl_table_root, | ||
| 202 | .set = &sysctl_table_root.default_set, | ||
| 203 | }; | ||
| 204 | static struct ctl_table_root sysctl_table_root = { | ||
| 205 | .root_list = LIST_HEAD_INIT(sysctl_table_root.root_list), | ||
| 206 | .default_set.list = LIST_HEAD_INIT(root_table_header.ctl_entry), | ||
| 207 | }; | ||
| 208 | |||
| 209 | static struct ctl_table kern_table[]; | 199 | static struct ctl_table kern_table[]; |
| 210 | static struct ctl_table vm_table[]; | 200 | static struct ctl_table vm_table[]; |
| 211 | static struct ctl_table fs_table[]; | 201 | static struct ctl_table fs_table[]; |
| @@ -222,7 +212,7 @@ int sysctl_legacy_va_layout; | |||
| 222 | 212 | ||
| 223 | /* The default sysctl tables: */ | 213 | /* The default sysctl tables: */ |
| 224 | 214 | ||
| 225 | static struct ctl_table root_table[] = { | 215 | static struct ctl_table sysctl_base_table[] = { |
| 226 | { | 216 | { |
| 227 | .procname = "kernel", | 217 | .procname = "kernel", |
| 228 | .mode = 0555, | 218 | .mode = 0555, |
| @@ -713,7 +703,7 @@ static struct ctl_table kern_table[] = { | |||
| 713 | .data = &dmesg_restrict, | 703 | .data = &dmesg_restrict, |
| 714 | .maxlen = sizeof(int), | 704 | .maxlen = sizeof(int), |
| 715 | .mode = 0644, | 705 | .mode = 0644, |
| 716 | .proc_handler = proc_dointvec_minmax, | 706 | .proc_handler = proc_dointvec_minmax_sysadmin, |
| 717 | .extra1 = &zero, | 707 | .extra1 = &zero, |
| 718 | .extra2 = &one, | 708 | .extra2 = &one, |
| 719 | }, | 709 | }, |
| @@ -722,7 +712,7 @@ static struct ctl_table kern_table[] = { | |||
| 722 | .data = &kptr_restrict, | 712 | .data = &kptr_restrict, |
| 723 | .maxlen = sizeof(int), | 713 | .maxlen = sizeof(int), |
| 724 | .mode = 0644, | 714 | .mode = 0644, |
| 725 | .proc_handler = proc_dmesg_restrict, | 715 | .proc_handler = proc_dointvec_minmax_sysadmin, |
| 726 | .extra1 = &zero, | 716 | .extra1 = &zero, |
| 727 | .extra2 = &two, | 717 | .extra2 = &two, |
| 728 | }, | 718 | }, |
| @@ -1559,490 +1549,12 @@ static struct ctl_table dev_table[] = { | |||
| 1559 | { } | 1549 | { } |
| 1560 | }; | 1550 | }; |
| 1561 | 1551 | ||
| 1562 | static DEFINE_SPINLOCK(sysctl_lock); | 1552 | int __init sysctl_init(void) |
| 1563 | |||
| 1564 | /* called under sysctl_lock */ | ||
| 1565 | static int use_table(struct ctl_table_header *p) | ||
| 1566 | { | ||
| 1567 | if (unlikely(p->unregistering)) | ||
| 1568 | return 0; | ||
| 1569 | p->used++; | ||
| 1570 | return 1; | ||
| 1571 | } | ||
| 1572 | |||
| 1573 | /* called under sysctl_lock */ | ||
| 1574 | static void unuse_table(struct ctl_table_header *p) | ||
| 1575 | { | ||
| 1576 | if (!--p->used) | ||
| 1577 | if (unlikely(p->unregistering)) | ||
| 1578 | complete(p->unregistering); | ||
| 1579 | } | ||
| 1580 | |||
| 1581 | /* called under sysctl_lock, will reacquire if has to wait */ | ||
| 1582 | static void start_unregistering(struct ctl_table_header *p) | ||
| 1583 | { | ||
| 1584 | /* | ||
| 1585 | * if p->used is 0, nobody will ever touch that entry again; | ||
| 1586 | * we'll eliminate all paths to it before dropping sysctl_lock | ||
| 1587 | */ | ||
| 1588 | if (unlikely(p->used)) { | ||
| 1589 | struct completion wait; | ||
| 1590 | init_completion(&wait); | ||
| 1591 | p->unregistering = &wait; | ||
| 1592 | spin_unlock(&sysctl_lock); | ||
| 1593 | wait_for_completion(&wait); | ||
| 1594 | spin_lock(&sysctl_lock); | ||
| 1595 | } else { | ||
| 1596 | /* anything non-NULL; we'll never dereference it */ | ||
| 1597 | p->unregistering = ERR_PTR(-EINVAL); | ||
| 1598 | } | ||
| 1599 | /* | ||
| 1600 | * do not remove from the list until nobody holds it; walking the | ||
| 1601 | * list in do_sysctl() relies on that. | ||
| 1602 | */ | ||
| 1603 | list_del_init(&p->ctl_entry); | ||
| 1604 | } | ||
| 1605 | |||
| 1606 | void sysctl_head_get(struct ctl_table_header *head) | ||
| 1607 | { | ||
| 1608 | spin_lock(&sysctl_lock); | ||
| 1609 | head->count++; | ||
| 1610 | spin_unlock(&sysctl_lock); | ||
| 1611 | } | ||
| 1612 | |||
| 1613 | void sysctl_head_put(struct ctl_table_header *head) | ||
| 1614 | { | ||
| 1615 | spin_lock(&sysctl_lock); | ||
| 1616 | if (!--head->count) | ||
| 1617 | kfree_rcu(head, rcu); | ||
| 1618 | spin_unlock(&sysctl_lock); | ||
| 1619 | } | ||
| 1620 | |||
| 1621 | struct ctl_table_header *sysctl_head_grab(struct ctl_table_header *head) | ||
| 1622 | { | ||
| 1623 | if (!head) | ||
| 1624 | BUG(); | ||
| 1625 | spin_lock(&sysctl_lock); | ||
| 1626 | if (!use_table(head)) | ||
| 1627 | head = ERR_PTR(-ENOENT); | ||
| 1628 | spin_unlock(&sysctl_lock); | ||
| 1629 | return head; | ||
| 1630 | } | ||
| 1631 | |||
| 1632 | void sysctl_head_finish(struct ctl_table_header *head) | ||
| 1633 | { | ||
| 1634 | if (!head) | ||
| 1635 | return; | ||
| 1636 | spin_lock(&sysctl_lock); | ||
| 1637 | unuse_table(head); | ||
| 1638 | spin_unlock(&sysctl_lock); | ||
| 1639 | } | ||
| 1640 | |||
| 1641 | static struct ctl_table_set * | ||
| 1642 | lookup_header_set(struct ctl_table_root *root, struct nsproxy *namespaces) | ||
| 1643 | { | ||
| 1644 | struct ctl_table_set *set = &root->default_set; | ||
| 1645 | if (root->lookup) | ||
| 1646 | set = root->lookup(root, namespaces); | ||
| 1647 | return set; | ||
| 1648 | } | ||
| 1649 | |||
| 1650 | static struct list_head * | ||
| 1651 | lookup_header_list(struct ctl_table_root *root, struct nsproxy *namespaces) | ||
| 1652 | { | ||
| 1653 | struct ctl_table_set *set = lookup_header_set(root, namespaces); | ||
| 1654 | return &set->list; | ||
| 1655 | } | ||
| 1656 | |||
| 1657 | struct ctl_table_header *__sysctl_head_next(struct nsproxy *namespaces, | ||
| 1658 | struct ctl_table_header *prev) | ||
| 1659 | { | 1553 | { |
| 1660 | struct ctl_table_root *root; | 1554 | register_sysctl_table(sysctl_base_table); |
| 1661 | struct list_head *header_list; | ||
| 1662 | struct ctl_table_header *head; | ||
| 1663 | struct list_head *tmp; | ||
| 1664 | |||
| 1665 | spin_lock(&sysctl_lock); | ||
| 1666 | if (prev) { | ||
| 1667 | head = prev; | ||
| 1668 | tmp = &prev->ctl_entry; | ||
| 1669 | unuse_table(prev); | ||
| 1670 | goto next; | ||
| 1671 | } | ||
| 1672 | tmp = &root_table_header.ctl_entry; | ||
| 1673 | for (;;) { | ||
| 1674 | head = list_entry(tmp, struct ctl_table_header, ctl_entry); | ||
| 1675 | |||
| 1676 | if (!use_table(head)) | ||
| 1677 | goto next; | ||
| 1678 | spin_unlock(&sysctl_lock); | ||
| 1679 | return head; | ||
| 1680 | next: | ||
| 1681 | root = head->root; | ||
| 1682 | tmp = tmp->next; | ||
| 1683 | header_list = lookup_header_list(root, namespaces); | ||
| 1684 | if (tmp != header_list) | ||
| 1685 | continue; | ||
| 1686 | |||
| 1687 | do { | ||
| 1688 | root = list_entry(root->root_list.next, | ||
| 1689 | struct ctl_table_root, root_list); | ||
| 1690 | if (root == &sysctl_table_root) | ||
| 1691 | goto out; | ||
| 1692 | header_list = lookup_header_list(root, namespaces); | ||
| 1693 | } while (list_empty(header_list)); | ||
| 1694 | tmp = header_list->next; | ||
| 1695 | } | ||
| 1696 | out: | ||
| 1697 | spin_unlock(&sysctl_lock); | ||
| 1698 | return NULL; | ||
| 1699 | } | ||
| 1700 | |||
| 1701 | struct ctl_table_header *sysctl_head_next(struct ctl_table_header *prev) | ||
| 1702 | { | ||
| 1703 | return __sysctl_head_next(current->nsproxy, prev); | ||
| 1704 | } | ||
| 1705 | |||
| 1706 | void register_sysctl_root(struct ctl_table_root *root) | ||
| 1707 | { | ||
| 1708 | spin_lock(&sysctl_lock); | ||
| 1709 | list_add_tail(&root->root_list, &sysctl_table_root.root_list); | ||
| 1710 | spin_unlock(&sysctl_lock); | ||
| 1711 | } | ||
| 1712 | |||
| 1713 | /* | ||
| 1714 | * sysctl_perm does NOT grant the superuser all rights automatically, because | ||
| 1715 | * some sysctl variables are readonly even to root. | ||
| 1716 | */ | ||
| 1717 | |||
| 1718 | static int test_perm(int mode, int op) | ||
| 1719 | { | ||
| 1720 | if (!current_euid()) | ||
| 1721 | mode >>= 6; | ||
| 1722 | else if (in_egroup_p(0)) | ||
| 1723 | mode >>= 3; | ||
| 1724 | if ((op & ~mode & (MAY_READ|MAY_WRITE|MAY_EXEC)) == 0) | ||
| 1725 | return 0; | ||
| 1726 | return -EACCES; | ||
| 1727 | } | ||
| 1728 | |||
| 1729 | int sysctl_perm(struct ctl_table_root *root, struct ctl_table *table, int op) | ||
| 1730 | { | ||
| 1731 | int mode; | ||
| 1732 | |||
| 1733 | if (root->permissions) | ||
| 1734 | mode = root->permissions(root, current->nsproxy, table); | ||
| 1735 | else | ||
| 1736 | mode = table->mode; | ||
| 1737 | |||
| 1738 | return test_perm(mode, op); | ||
| 1739 | } | ||
| 1740 | |||
| 1741 | static void sysctl_set_parent(struct ctl_table *parent, struct ctl_table *table) | ||
| 1742 | { | ||
| 1743 | for (; table->procname; table++) { | ||
| 1744 | table->parent = parent; | ||
| 1745 | if (table->child) | ||
| 1746 | sysctl_set_parent(table, table->child); | ||
| 1747 | } | ||
| 1748 | } | ||
| 1749 | |||
| 1750 | static __init int sysctl_init(void) | ||
| 1751 | { | ||
| 1752 | sysctl_set_parent(NULL, root_table); | ||
| 1753 | #ifdef CONFIG_SYSCTL_SYSCALL_CHECK | ||
| 1754 | sysctl_check_table(current->nsproxy, root_table); | ||
| 1755 | #endif | ||
| 1756 | return 0; | 1555 | return 0; |
| 1757 | } | 1556 | } |
| 1758 | 1557 | ||
| 1759 | core_initcall(sysctl_init); | ||
| 1760 | |||
| 1761 | static struct ctl_table *is_branch_in(struct ctl_table *branch, | ||
| 1762 | struct ctl_table *table) | ||
| 1763 | { | ||
| 1764 | struct ctl_table *p; | ||
| 1765 | const char *s = branch->procname; | ||
| 1766 | |||
| 1767 | /* branch should have named subdirectory as its first element */ | ||
| 1768 | if (!s || !branch->child) | ||
| 1769 | return NULL; | ||
| 1770 | |||
| 1771 | /* ... and nothing else */ | ||
| 1772 | if (branch[1].procname) | ||
| 1773 | return NULL; | ||
| 1774 | |||
| 1775 | /* table should contain subdirectory with the same name */ | ||
| 1776 | for (p = table; p->procname; p++) { | ||
| 1777 | if (!p->child) | ||
| 1778 | continue; | ||
| 1779 | if (p->procname && strcmp(p->procname, s) == 0) | ||
| 1780 | return p; | ||
| 1781 | } | ||
| 1782 | return NULL; | ||
| 1783 | } | ||
| 1784 | |||
| 1785 | /* see if attaching q to p would be an improvement */ | ||
| 1786 | static void try_attach(struct ctl_table_header *p, struct ctl_table_header *q) | ||
| 1787 | { | ||
| 1788 | struct ctl_table *to = p->ctl_table, *by = q->ctl_table; | ||
| 1789 | struct ctl_table *next; | ||
| 1790 | int is_better = 0; | ||
| 1791 | int not_in_parent = !p->attached_by; | ||
| 1792 | |||
| 1793 | while ((next = is_branch_in(by, to)) != NULL) { | ||
| 1794 | if (by == q->attached_by) | ||
| 1795 | is_better = 1; | ||
| 1796 | if (to == p->attached_by) | ||
| 1797 | not_in_parent = 1; | ||
| 1798 | by = by->child; | ||
| 1799 | to = next->child; | ||
| 1800 | } | ||
| 1801 | |||
| 1802 | if (is_better && not_in_parent) { | ||
| 1803 | q->attached_by = by; | ||
| 1804 | q->attached_to = to; | ||
| 1805 | q->parent = p; | ||
| 1806 | } | ||
| 1807 | } | ||
| 1808 | |||
| 1809 | /** | ||
| 1810 | * __register_sysctl_paths - register a sysctl hierarchy | ||
| 1811 | * @root: List of sysctl headers to register on | ||
| 1812 | * @namespaces: Data to compute which lists of sysctl entries are visible | ||
| 1813 | * @path: The path to the directory the sysctl table is in. | ||
| 1814 | * @table: the top-level table structure | ||
| 1815 | * | ||
| 1816 | * Register a sysctl table hierarchy. @table should be a filled in ctl_table | ||
| 1817 | * array. A completely 0 filled entry terminates the table. | ||
| 1818 | * | ||
| 1819 | * The members of the &struct ctl_table structure are used as follows: | ||
| 1820 | * | ||
| 1821 | * procname - the name of the sysctl file under /proc/sys. Set to %NULL to not | ||
| 1822 | * enter a sysctl file | ||
| 1823 | * | ||
| 1824 | * data - a pointer to data for use by proc_handler | ||
| 1825 | * | ||
| 1826 | * maxlen - the maximum size in bytes of the data | ||
| 1827 | * | ||
| 1828 | * mode - the file permissions for the /proc/sys file, and for sysctl(2) | ||
| 1829 | * | ||
| 1830 | * child - a pointer to the child sysctl table if this entry is a directory, or | ||
| 1831 | * %NULL. | ||
| 1832 | * | ||
| 1833 | * proc_handler - the text handler routine (described below) | ||
| 1834 | * | ||
| 1835 | * de - for internal use by the sysctl routines | ||
| 1836 | * | ||
| 1837 | * extra1, extra2 - extra pointers usable by the proc handler routines | ||
| 1838 | * | ||
| 1839 | * Leaf nodes in the sysctl tree will be represented by a single file | ||
| 1840 | * under /proc; non-leaf nodes will be represented by directories. | ||
| 1841 | * | ||
| 1842 | * sysctl(2) can automatically manage read and write requests through | ||
| 1843 | * the sysctl table. The data and maxlen fields of the ctl_table | ||
| 1844 | * struct enable minimal validation of the values being written to be | ||
| 1845 | * performed, and the mode field allows minimal authentication. | ||
| 1846 | * | ||
| 1847 | * There must be a proc_handler routine for any terminal nodes | ||
| 1848 | * mirrored under /proc/sys (non-terminals are handled by a built-in | ||
| 1849 | * directory handler). Several default handlers are available to | ||
| 1850 | * cover common cases - | ||
| 1851 | * | ||
| 1852 | * proc_dostring(), proc_dointvec(), proc_dointvec_jiffies(), | ||
| 1853 | * proc_dointvec_userhz_jiffies(), proc_dointvec_minmax(), | ||
| 1854 | * proc_doulongvec_ms_jiffies_minmax(), proc_doulongvec_minmax() | ||
| 1855 | * | ||
| 1856 | * It is the handler's job to read the input buffer from user memory | ||
| 1857 | * and process it. The handler should return 0 on success. | ||
| 1858 | * | ||
| 1859 | * This routine returns %NULL on a failure to register, and a pointer | ||
| 1860 | * to the table header on success. | ||
| 1861 | */ | ||
| 1862 | struct ctl_table_header *__register_sysctl_paths( | ||
| 1863 | struct ctl_table_root *root, | ||
| 1864 | struct nsproxy *namespaces, | ||
| 1865 | const struct ctl_path *path, struct ctl_table *table) | ||
| 1866 | { | ||
| 1867 | struct ctl_table_header *header; | ||
| 1868 | struct ctl_table *new, **prevp; | ||
| 1869 | unsigned int n, npath; | ||
| 1870 | struct ctl_table_set *set; | ||
| 1871 | |||
| 1872 | /* Count the path components */ | ||
| 1873 | for (npath = 0; path[npath].procname; ++npath) | ||
| 1874 | ; | ||
| 1875 | |||
| 1876 | /* | ||
| 1877 | * For each path component, allocate a 2-element ctl_table array. | ||
| 1878 | * The first array element will be filled with the sysctl entry | ||
| 1879 | * for this, the second will be the sentinel (procname == 0). | ||
| 1880 | * | ||
| 1881 | * We allocate everything in one go so that we don't have to | ||
| 1882 | * worry about freeing additional memory in unregister_sysctl_table. | ||
| 1883 | */ | ||
| 1884 | header = kzalloc(sizeof(struct ctl_table_header) + | ||
| 1885 | (2 * npath * sizeof(struct ctl_table)), GFP_KERNEL); | ||
| 1886 | if (!header) | ||
| 1887 | return NULL; | ||
| 1888 | |||
| 1889 | new = (struct ctl_table *) (header + 1); | ||
| 1890 | |||
| 1891 | /* Now connect the dots */ | ||
| 1892 | prevp = &header->ctl_table; | ||
| 1893 | for (n = 0; n < npath; ++n, ++path) { | ||
| 1894 | /* Copy the procname */ | ||
| 1895 | new->procname = path->procname; | ||
| 1896 | new->mode = 0555; | ||
| 1897 | |||
| 1898 | *prevp = new; | ||
| 1899 | prevp = &new->child; | ||
| 1900 | |||
| 1901 | new += 2; | ||
| 1902 | } | ||
| 1903 | *prevp = table; | ||
| 1904 | header->ctl_table_arg = table; | ||
| 1905 | |||
| 1906 | INIT_LIST_HEAD(&header->ctl_entry); | ||
| 1907 | header->used = 0; | ||
| 1908 | header->unregistering = NULL; | ||
| 1909 | header->root = root; | ||
| 1910 | sysctl_set_parent(NULL, header->ctl_table); | ||
| 1911 | header->count = 1; | ||
| 1912 | #ifdef CONFIG_SYSCTL_SYSCALL_CHECK | ||
| 1913 | if (sysctl_check_table(namespaces, header->ctl_table)) { | ||
| 1914 | kfree(header); | ||
| 1915 | return NULL; | ||
| 1916 | } | ||
| 1917 | #endif | ||
| 1918 | spin_lock(&sysctl_lock); | ||
| 1919 | header->set = lookup_header_set(root, namespaces); | ||
| 1920 | header->attached_by = header->ctl_table; | ||
| 1921 | header->attached_to = root_table; | ||
| 1922 | header->parent = &root_table_header; | ||
| 1923 | for (set = header->set; set; set = set->parent) { | ||
| 1924 | struct ctl_table_header *p; | ||
| 1925 | list_for_each_entry(p, &set->list, ctl_entry) { | ||
| 1926 | if (p->unregistering) | ||
| 1927 | continue; | ||
| 1928 | try_attach(p, header); | ||
| 1929 | } | ||
| 1930 | } | ||
| 1931 | header->parent->count++; | ||
| 1932 | list_add_tail(&header->ctl_entry, &header->set->list); | ||
| 1933 | spin_unlock(&sysctl_lock); | ||
| 1934 | |||
| 1935 | return header; | ||
| 1936 | } | ||
| 1937 | |||
| 1938 | /** | ||
| 1939 | * register_sysctl_table_path - register a sysctl table hierarchy | ||
| 1940 | * @path: The path to the directory the sysctl table is in. | ||
| 1941 | * @table: the top-level table structure | ||
| 1942 | * | ||
| 1943 | * Register a sysctl table hierarchy. @table should be a filled in ctl_table | ||
| 1944 | * array. A completely 0 filled entry terminates the table. | ||
| 1945 | * | ||
| 1946 | * See __register_sysctl_paths for more details. | ||
| 1947 | */ | ||
| 1948 | struct ctl_table_header *register_sysctl_paths(const struct ctl_path *path, | ||
| 1949 | struct ctl_table *table) | ||
| 1950 | { | ||
| 1951 | return __register_sysctl_paths(&sysctl_table_root, current->nsproxy, | ||
| 1952 | path, table); | ||
| 1953 | } | ||
| 1954 | |||
| 1955 | /** | ||
| 1956 | * register_sysctl_table - register a sysctl table hierarchy | ||
| 1957 | * @table: the top-level table structure | ||
| 1958 | * | ||
| 1959 | * Register a sysctl table hierarchy. @table should be a filled in ctl_table | ||
| 1960 | * array. A completely 0 filled entry terminates the table. | ||
| 1961 | * | ||
| 1962 | * See register_sysctl_paths for more details. | ||
| 1963 | */ | ||
| 1964 | struct ctl_table_header *register_sysctl_table(struct ctl_table *table) | ||
| 1965 | { | ||
| 1966 | static const struct ctl_path null_path[] = { {} }; | ||
| 1967 | |||
| 1968 | return register_sysctl_paths(null_path, table); | ||
| 1969 | } | ||
| 1970 | |||
| 1971 | /** | ||
| 1972 | * unregister_sysctl_table - unregister a sysctl table hierarchy | ||
| 1973 | * @header: the header returned from register_sysctl_table | ||
| 1974 | * | ||
| 1975 | * Unregisters the sysctl table and all children. proc entries may not | ||
| 1976 | * actually be removed until they are no longer used by anyone. | ||
| 1977 | */ | ||
| 1978 | void unregister_sysctl_table(struct ctl_table_header * header) | ||
| 1979 | { | ||
| 1980 | might_sleep(); | ||
| 1981 | |||
| 1982 | if (header == NULL) | ||
| 1983 | return; | ||
| 1984 | |||
| 1985 | spin_lock(&sysctl_lock); | ||
| 1986 | start_unregistering(header); | ||
| 1987 | if (!--header->parent->count) { | ||
| 1988 | WARN_ON(1); | ||
| 1989 | kfree_rcu(header->parent, rcu); | ||
| 1990 | } | ||
| 1991 | if (!--header->count) | ||
| 1992 | kfree_rcu(header, rcu); | ||
| 1993 | spin_unlock(&sysctl_lock); | ||
| 1994 | } | ||
| 1995 | |||
| 1996 | int sysctl_is_seen(struct ctl_table_header *p) | ||
| 1997 | { | ||
| 1998 | struct ctl_table_set *set = p->set; | ||
| 1999 | int res; | ||
| 2000 | spin_lock(&sysctl_lock); | ||
| 2001 | if (p->unregistering) | ||
| 2002 | res = 0; | ||
| 2003 | else if (!set->is_seen) | ||
| 2004 | res = 1; | ||
| 2005 | else | ||
| 2006 | res = set->is_seen(set); | ||
| 2007 | spin_unlock(&sysctl_lock); | ||
| 2008 | return res; | ||
| 2009 | } | ||
| 2010 | |||
| 2011 | void setup_sysctl_set(struct ctl_table_set *p, | ||
| 2012 | struct ctl_table_set *parent, | ||
| 2013 | int (*is_seen)(struct ctl_table_set *)) | ||
| 2014 | { | ||
| 2015 | INIT_LIST_HEAD(&p->list); | ||
| 2016 | p->parent = parent ? parent : &sysctl_table_root.default_set; | ||
| 2017 | p->is_seen = is_seen; | ||
| 2018 | } | ||
| 2019 | |||
| 2020 | #else /* !CONFIG_SYSCTL */ | ||
| 2021 | struct ctl_table_header *register_sysctl_table(struct ctl_table * table) | ||
| 2022 | { | ||
| 2023 | return NULL; | ||
| 2024 | } | ||
| 2025 | |||
| 2026 | struct ctl_table_header *register_sysctl_paths(const struct ctl_path *path, | ||
| 2027 | struct ctl_table *table) | ||
| 2028 | { | ||
| 2029 | return NULL; | ||
| 2030 | } | ||
| 2031 | |||
| 2032 | void unregister_sysctl_table(struct ctl_table_header * table) | ||
| 2033 | { | ||
| 2034 | } | ||
| 2035 | |||
| 2036 | void setup_sysctl_set(struct ctl_table_set *p, | ||
| 2037 | struct ctl_table_set *parent, | ||
| 2038 | int (*is_seen)(struct ctl_table_set *)) | ||
| 2039 | { | ||
| 2040 | } | ||
| 2041 | |||
| 2042 | void sysctl_head_put(struct ctl_table_header *head) | ||
| 2043 | { | ||
| 2044 | } | ||
| 2045 | |||
| 2046 | #endif /* CONFIG_SYSCTL */ | 1558 | #endif /* CONFIG_SYSCTL */ |
| 2047 | 1559 | ||
| 2048 | /* | 1560 | /* |
| @@ -2431,7 +1943,7 @@ static int proc_taint(struct ctl_table *table, int write, | |||
| 2431 | } | 1943 | } |
| 2432 | 1944 | ||
| 2433 | #ifdef CONFIG_PRINTK | 1945 | #ifdef CONFIG_PRINTK |
| 2434 | static int proc_dmesg_restrict(struct ctl_table *table, int write, | 1946 | static int proc_dointvec_minmax_sysadmin(struct ctl_table *table, int write, |
| 2435 | void __user *buffer, size_t *lenp, loff_t *ppos) | 1947 | void __user *buffer, size_t *lenp, loff_t *ppos) |
| 2436 | { | 1948 | { |
| 2437 | if (write && !capable(CAP_SYS_ADMIN)) | 1949 | if (write && !capable(CAP_SYS_ADMIN)) |
| @@ -2884,9 +2396,7 @@ int proc_do_large_bitmap(struct ctl_table *table, int write, | |||
| 2884 | } | 2396 | } |
| 2885 | } | 2397 | } |
| 2886 | 2398 | ||
| 2887 | while (val_a <= val_b) | 2399 | bitmap_set(tmp_bitmap, val_a, val_b - val_a + 1); |
| 2888 | set_bit(val_a++, tmp_bitmap); | ||
| 2889 | |||
| 2890 | first = 0; | 2400 | first = 0; |
| 2891 | proc_skip_char(&kbuf, &left, '\n'); | 2401 | proc_skip_char(&kbuf, &left, '\n'); |
| 2892 | } | 2402 | } |
| @@ -2929,8 +2439,7 @@ int proc_do_large_bitmap(struct ctl_table *table, int write, | |||
| 2929 | if (*ppos) | 2439 | if (*ppos) |
| 2930 | bitmap_or(bitmap, bitmap, tmp_bitmap, bitmap_len); | 2440 | bitmap_or(bitmap, bitmap, tmp_bitmap, bitmap_len); |
| 2931 | else | 2441 | else |
| 2932 | memcpy(bitmap, tmp_bitmap, | 2442 | bitmap_copy(bitmap, tmp_bitmap, bitmap_len); |
| 2933 | BITS_TO_LONGS(bitmap_len) * sizeof(unsigned long)); | ||
| 2934 | } | 2443 | } |
| 2935 | kfree(tmp_bitmap); | 2444 | kfree(tmp_bitmap); |
| 2936 | *lenp -= left; | 2445 | *lenp -= left; |
| @@ -3008,6 +2517,3 @@ EXPORT_SYMBOL(proc_dointvec_ms_jiffies); | |||
| 3008 | EXPORT_SYMBOL(proc_dostring); | 2517 | EXPORT_SYMBOL(proc_dostring); |
| 3009 | EXPORT_SYMBOL(proc_doulongvec_minmax); | 2518 | EXPORT_SYMBOL(proc_doulongvec_minmax); |
| 3010 | EXPORT_SYMBOL(proc_doulongvec_ms_jiffies_minmax); | 2519 | EXPORT_SYMBOL(proc_doulongvec_ms_jiffies_minmax); |
| 3011 | EXPORT_SYMBOL(register_sysctl_table); | ||
| 3012 | EXPORT_SYMBOL(register_sysctl_paths); | ||
| 3013 | EXPORT_SYMBOL(unregister_sysctl_table); | ||
diff --git a/kernel/sysctl_check.c b/kernel/sysctl_check.c deleted file mode 100644 index 362da653813d..000000000000 --- a/kernel/sysctl_check.c +++ /dev/null | |||
| @@ -1,160 +0,0 @@ | |||
| 1 | #include <linux/stat.h> | ||
| 2 | #include <linux/sysctl.h> | ||
| 3 | #include "../fs/xfs/xfs_sysctl.h" | ||
| 4 | #include <linux/sunrpc/debug.h> | ||
| 5 | #include <linux/string.h> | ||
| 6 | #include <net/ip_vs.h> | ||
| 7 | |||
| 8 | |||
| 9 | static int sysctl_depth(struct ctl_table *table) | ||
| 10 | { | ||
| 11 | struct ctl_table *tmp; | ||
| 12 | int depth; | ||
| 13 | |||
| 14 | depth = 0; | ||
| 15 | for (tmp = table; tmp->parent; tmp = tmp->parent) | ||
| 16 | depth++; | ||
| 17 | |||
| 18 | return depth; | ||
| 19 | } | ||
| 20 | |||
| 21 | static struct ctl_table *sysctl_parent(struct ctl_table *table, int n) | ||
| 22 | { | ||
| 23 | int i; | ||
| 24 | |||
| 25 | for (i = 0; table && i < n; i++) | ||
| 26 | table = table->parent; | ||
| 27 | |||
| 28 | return table; | ||
| 29 | } | ||
| 30 | |||
| 31 | |||
| 32 | static void sysctl_print_path(struct ctl_table *table) | ||
| 33 | { | ||
| 34 | struct ctl_table *tmp; | ||
| 35 | int depth, i; | ||
| 36 | depth = sysctl_depth(table); | ||
| 37 | if (table->procname) { | ||
| 38 | for (i = depth; i >= 0; i--) { | ||
| 39 | tmp = sysctl_parent(table, i); | ||
| 40 | printk("/%s", tmp->procname?tmp->procname:""); | ||
| 41 | } | ||
| 42 | } | ||
| 43 | printk(" "); | ||
| 44 | } | ||
| 45 | |||
| 46 | static struct ctl_table *sysctl_check_lookup(struct nsproxy *namespaces, | ||
| 47 | struct ctl_table *table) | ||
| 48 | { | ||
| 49 | struct ctl_table_header *head; | ||
| 50 | struct ctl_table *ref, *test; | ||
| 51 | int depth, cur_depth; | ||
| 52 | |||
| 53 | depth = sysctl_depth(table); | ||
| 54 | |||
| 55 | for (head = __sysctl_head_next(namespaces, NULL); head; | ||
| 56 | head = __sysctl_head_next(namespaces, head)) { | ||
| 57 | cur_depth = depth; | ||
| 58 | ref = head->ctl_table; | ||
| 59 | repeat: | ||
| 60 | test = sysctl_parent(table, cur_depth); | ||
| 61 | for (; ref->procname; ref++) { | ||
| 62 | int match = 0; | ||
| 63 | if (cur_depth && !ref->child) | ||
| 64 | continue; | ||
| 65 | |||
| 66 | if (test->procname && ref->procname && | ||
| 67 | (strcmp(test->procname, ref->procname) == 0)) | ||
| 68 | match++; | ||
| 69 | |||
| 70 | if (match) { | ||
| 71 | if (cur_depth != 0) { | ||
| 72 | cur_depth--; | ||
| 73 | ref = ref->child; | ||
| 74 | goto repeat; | ||
| 75 | } | ||
| 76 | goto out; | ||
| 77 | } | ||
| 78 | } | ||
| 79 | } | ||
| 80 | ref = NULL; | ||
| 81 | out: | ||
| 82 | sysctl_head_finish(head); | ||
| 83 | return ref; | ||
| 84 | } | ||
| 85 | |||
| 86 | static void set_fail(const char **fail, struct ctl_table *table, const char *str) | ||
| 87 | { | ||
| 88 | if (*fail) { | ||
| 89 | printk(KERN_ERR "sysctl table check failed: "); | ||
| 90 | sysctl_print_path(table); | ||
| 91 | printk(" %s\n", *fail); | ||
| 92 | dump_stack(); | ||
| 93 | } | ||
| 94 | *fail = str; | ||
| 95 | } | ||
| 96 | |||
| 97 | static void sysctl_check_leaf(struct nsproxy *namespaces, | ||
| 98 | struct ctl_table *table, const char **fail) | ||
| 99 | { | ||
| 100 | struct ctl_table *ref; | ||
| 101 | |||
| 102 | ref = sysctl_check_lookup(namespaces, table); | ||
| 103 | if (ref && (ref != table)) | ||
| 104 | set_fail(fail, table, "Sysctl already exists"); | ||
| 105 | } | ||
| 106 | |||
| 107 | int sysctl_check_table(struct nsproxy *namespaces, struct ctl_table *table) | ||
| 108 | { | ||
| 109 | int error = 0; | ||
| 110 | for (; table->procname; table++) { | ||
| 111 | const char *fail = NULL; | ||
| 112 | |||
| 113 | if (table->parent) { | ||
| 114 | if (!table->parent->procname) | ||
| 115 | set_fail(&fail, table, "Parent without procname"); | ||
| 116 | } | ||
| 117 | if (table->child) { | ||
| 118 | if (table->data) | ||
| 119 | set_fail(&fail, table, "Directory with data?"); | ||
| 120 | if (table->maxlen) | ||
| 121 | set_fail(&fail, table, "Directory with maxlen?"); | ||
| 122 | if ((table->mode & (S_IRUGO|S_IXUGO)) != table->mode) | ||
| 123 | set_fail(&fail, table, "Writable sysctl directory"); | ||
| 124 | if (table->proc_handler) | ||
| 125 | set_fail(&fail, table, "Directory with proc_handler"); | ||
| 126 | if (table->extra1) | ||
| 127 | set_fail(&fail, table, "Directory with extra1"); | ||
| 128 | if (table->extra2) | ||
| 129 | set_fail(&fail, table, "Directory with extra2"); | ||
| 130 | } else { | ||
| 131 | if ((table->proc_handler == proc_dostring) || | ||
| 132 | (table->proc_handler == proc_dointvec) || | ||
| 133 | (table->proc_handler == proc_dointvec_minmax) || | ||
| 134 | (table->proc_handler == proc_dointvec_jiffies) || | ||
| 135 | (table->proc_handler == proc_dointvec_userhz_jiffies) || | ||
| 136 | (table->proc_handler == proc_dointvec_ms_jiffies) || | ||
| 137 | (table->proc_handler == proc_doulongvec_minmax) || | ||
| 138 | (table->proc_handler == proc_doulongvec_ms_jiffies_minmax)) { | ||
| 139 | if (!table->data) | ||
| 140 | set_fail(&fail, table, "No data"); | ||
| 141 | if (!table->maxlen) | ||
| 142 | set_fail(&fail, table, "No maxlen"); | ||
| 143 | } | ||
| 144 | #ifdef CONFIG_PROC_SYSCTL | ||
| 145 | if (!table->proc_handler) | ||
| 146 | set_fail(&fail, table, "No proc_handler"); | ||
| 147 | #endif | ||
| 148 | sysctl_check_leaf(namespaces, table, &fail); | ||
| 149 | } | ||
| 150 | if (table->mode > 0777) | ||
| 151 | set_fail(&fail, table, "bogus .mode"); | ||
| 152 | if (fail) { | ||
| 153 | set_fail(&fail, table, NULL); | ||
| 154 | error = -EINVAL; | ||
| 155 | } | ||
| 156 | if (table->child) | ||
| 157 | error |= sysctl_check_table(namespaces, table->child); | ||
| 158 | } | ||
| 159 | return error; | ||
| 160 | } | ||
diff --git a/kernel/time.c b/kernel/time.c index 73e416db0a1e..ba744cf80696 100644 --- a/kernel/time.c +++ b/kernel/time.c | |||
| @@ -163,7 +163,6 @@ int do_sys_settimeofday(const struct timespec *tv, const struct timezone *tz) | |||
| 163 | return error; | 163 | return error; |
| 164 | 164 | ||
| 165 | if (tz) { | 165 | if (tz) { |
| 166 | /* SMP safe, global irq locking makes it work. */ | ||
| 167 | sys_tz = *tz; | 166 | sys_tz = *tz; |
| 168 | update_vsyscall_tz(); | 167 | update_vsyscall_tz(); |
| 169 | if (firsttime) { | 168 | if (firsttime) { |
| @@ -173,12 +172,7 @@ int do_sys_settimeofday(const struct timespec *tv, const struct timezone *tz) | |||
| 173 | } | 172 | } |
| 174 | } | 173 | } |
| 175 | if (tv) | 174 | if (tv) |
| 176 | { | ||
| 177 | /* SMP safe, again the code in arch/foo/time.c should | ||
| 178 | * globally block out interrupts when it runs. | ||
| 179 | */ | ||
| 180 | return do_settimeofday(tv); | 175 | return do_settimeofday(tv); |
| 181 | } | ||
| 182 | return 0; | 176 | return 0; |
| 183 | } | 177 | } |
| 184 | 178 | ||
diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig index 2cf9cc7aa103..a20dc8a3c949 100644 --- a/kernel/time/Kconfig +++ b/kernel/time/Kconfig | |||
| @@ -1,6 +1,10 @@ | |||
| 1 | # | 1 | # |
| 2 | # Timer subsystem related configuration options | 2 | # Timer subsystem related configuration options |
| 3 | # | 3 | # |
| 4 | |||
| 5 | # Core internal switch. Selected by NO_HZ / HIGH_RES_TIMERS. This is | ||
| 6 | # only related to the tick functionality. Oneshot clockevent devices | ||
| 7 | # are supported independ of this. | ||
| 4 | config TICK_ONESHOT | 8 | config TICK_ONESHOT |
| 5 | bool | 9 | bool |
| 6 | 10 | ||
diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c index 8a46f5d64504..8a538c55fc7b 100644 --- a/kernel/time/alarmtimer.c +++ b/kernel/time/alarmtimer.c | |||
| @@ -96,6 +96,11 @@ static int alarmtimer_rtc_add_device(struct device *dev, | |||
| 96 | return 0; | 96 | return 0; |
| 97 | } | 97 | } |
| 98 | 98 | ||
| 99 | static inline void alarmtimer_rtc_timer_init(void) | ||
| 100 | { | ||
| 101 | rtc_timer_init(&rtctimer, NULL, NULL); | ||
| 102 | } | ||
| 103 | |||
| 99 | static struct class_interface alarmtimer_rtc_interface = { | 104 | static struct class_interface alarmtimer_rtc_interface = { |
| 100 | .add_dev = &alarmtimer_rtc_add_device, | 105 | .add_dev = &alarmtimer_rtc_add_device, |
| 101 | }; | 106 | }; |
| @@ -117,6 +122,7 @@ static inline struct rtc_device *alarmtimer_get_rtcdev(void) | |||
| 117 | #define rtcdev (NULL) | 122 | #define rtcdev (NULL) |
| 118 | static inline int alarmtimer_rtc_interface_setup(void) { return 0; } | 123 | static inline int alarmtimer_rtc_interface_setup(void) { return 0; } |
| 119 | static inline void alarmtimer_rtc_interface_remove(void) { } | 124 | static inline void alarmtimer_rtc_interface_remove(void) { } |
| 125 | static inline void alarmtimer_rtc_timer_init(void) { } | ||
| 120 | #endif | 126 | #endif |
| 121 | 127 | ||
| 122 | /** | 128 | /** |
| @@ -783,6 +789,8 @@ static int __init alarmtimer_init(void) | |||
| 783 | .nsleep = alarm_timer_nsleep, | 789 | .nsleep = alarm_timer_nsleep, |
| 784 | }; | 790 | }; |
| 785 | 791 | ||
| 792 | alarmtimer_rtc_timer_init(); | ||
| 793 | |||
| 786 | posix_timers_register_clock(CLOCK_REALTIME_ALARM, &alarm_clock); | 794 | posix_timers_register_clock(CLOCK_REALTIME_ALARM, &alarm_clock); |
| 787 | posix_timers_register_clock(CLOCK_BOOTTIME_ALARM, &alarm_clock); | 795 | posix_timers_register_clock(CLOCK_BOOTTIME_ALARM, &alarm_clock); |
| 788 | 796 | ||
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index a45ca167ab24..c9583382141a 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c | |||
| @@ -500,7 +500,7 @@ static u32 clocksource_max_adjustment(struct clocksource *cs) | |||
| 500 | { | 500 | { |
| 501 | u64 ret; | 501 | u64 ret; |
| 502 | /* | 502 | /* |
| 503 | * We won't try to correct for more then 11% adjustments (110,000 ppm), | 503 | * We won't try to correct for more than 11% adjustments (110,000 ppm), |
| 504 | */ | 504 | */ |
| 505 | ret = (u64)cs->mult * 11; | 505 | ret = (u64)cs->mult * 11; |
| 506 | do_div(ret,100); | 506 | do_div(ret,100); |
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index 6e039b144daf..f03fd83b170b 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c | |||
| @@ -34,8 +34,6 @@ unsigned long tick_nsec; | |||
| 34 | static u64 tick_length; | 34 | static u64 tick_length; |
| 35 | static u64 tick_length_base; | 35 | static u64 tick_length_base; |
| 36 | 36 | ||
| 37 | static struct hrtimer leap_timer; | ||
| 38 | |||
| 39 | #define MAX_TICKADJ 500LL /* usecs */ | 37 | #define MAX_TICKADJ 500LL /* usecs */ |
| 40 | #define MAX_TICKADJ_SCALED \ | 38 | #define MAX_TICKADJ_SCALED \ |
| 41 | (((MAX_TICKADJ * NSEC_PER_USEC) << NTP_SCALE_SHIFT) / NTP_INTERVAL_FREQ) | 39 | (((MAX_TICKADJ * NSEC_PER_USEC) << NTP_SCALE_SHIFT) / NTP_INTERVAL_FREQ) |
| @@ -381,70 +379,63 @@ u64 ntp_tick_length(void) | |||
| 381 | 379 | ||
| 382 | 380 | ||
| 383 | /* | 381 | /* |
| 384 | * Leap second processing. If in leap-insert state at the end of the | 382 | * this routine handles the overflow of the microsecond field |
| 385 | * day, the system clock is set back one second; if in leap-delete | 383 | * |
| 386 | * state, the system clock is set ahead one second. | 384 | * The tricky bits of code to handle the accurate clock support |
| 385 | * were provided by Dave Mills (Mills@UDEL.EDU) of NTP fame. | ||
| 386 | * They were originally developed for SUN and DEC kernels. | ||
| 387 | * All the kudos should go to Dave for this stuff. | ||
| 388 | * | ||
| 389 | * Also handles leap second processing, and returns leap offset | ||
| 387 | */ | 390 | */ |
| 388 | static enum hrtimer_restart ntp_leap_second(struct hrtimer *timer) | 391 | int second_overflow(unsigned long secs) |
| 389 | { | 392 | { |
| 390 | enum hrtimer_restart res = HRTIMER_NORESTART; | 393 | s64 delta; |
| 391 | unsigned long flags; | ||
| 392 | int leap = 0; | 394 | int leap = 0; |
| 395 | unsigned long flags; | ||
| 393 | 396 | ||
| 394 | spin_lock_irqsave(&ntp_lock, flags); | 397 | spin_lock_irqsave(&ntp_lock, flags); |
| 398 | |||
| 399 | /* | ||
| 400 | * Leap second processing. If in leap-insert state at the end of the | ||
| 401 | * day, the system clock is set back one second; if in leap-delete | ||
| 402 | * state, the system clock is set ahead one second. | ||
| 403 | */ | ||
| 395 | switch (time_state) { | 404 | switch (time_state) { |
| 396 | case TIME_OK: | 405 | case TIME_OK: |
| 406 | if (time_status & STA_INS) | ||
| 407 | time_state = TIME_INS; | ||
| 408 | else if (time_status & STA_DEL) | ||
| 409 | time_state = TIME_DEL; | ||
| 397 | break; | 410 | break; |
| 398 | case TIME_INS: | 411 | case TIME_INS: |
| 399 | leap = -1; | 412 | if (secs % 86400 == 0) { |
| 400 | time_state = TIME_OOP; | 413 | leap = -1; |
| 401 | printk(KERN_NOTICE | 414 | time_state = TIME_OOP; |
| 402 | "Clock: inserting leap second 23:59:60 UTC\n"); | 415 | printk(KERN_NOTICE |
| 403 | hrtimer_add_expires_ns(&leap_timer, NSEC_PER_SEC); | 416 | "Clock: inserting leap second 23:59:60 UTC\n"); |
| 404 | res = HRTIMER_RESTART; | 417 | } |
| 405 | break; | 418 | break; |
| 406 | case TIME_DEL: | 419 | case TIME_DEL: |
| 407 | leap = 1; | 420 | if ((secs + 1) % 86400 == 0) { |
| 408 | time_tai--; | 421 | leap = 1; |
| 409 | time_state = TIME_WAIT; | 422 | time_tai--; |
| 410 | printk(KERN_NOTICE | 423 | time_state = TIME_WAIT; |
| 411 | "Clock: deleting leap second 23:59:59 UTC\n"); | 424 | printk(KERN_NOTICE |
| 425 | "Clock: deleting leap second 23:59:59 UTC\n"); | ||
| 426 | } | ||
| 412 | break; | 427 | break; |
| 413 | case TIME_OOP: | 428 | case TIME_OOP: |
| 414 | time_tai++; | 429 | time_tai++; |
| 415 | time_state = TIME_WAIT; | 430 | time_state = TIME_WAIT; |
| 416 | /* fall through */ | 431 | break; |
| 432 | |||
| 417 | case TIME_WAIT: | 433 | case TIME_WAIT: |
| 418 | if (!(time_status & (STA_INS | STA_DEL))) | 434 | if (!(time_status & (STA_INS | STA_DEL))) |
| 419 | time_state = TIME_OK; | 435 | time_state = TIME_OK; |
| 420 | break; | 436 | break; |
| 421 | } | 437 | } |
| 422 | spin_unlock_irqrestore(&ntp_lock, flags); | ||
| 423 | 438 | ||
| 424 | /* | ||
| 425 | * We have to call this outside of the ntp_lock to keep | ||
| 426 | * the proper locking hierarchy | ||
| 427 | */ | ||
| 428 | if (leap) | ||
| 429 | timekeeping_leap_insert(leap); | ||
| 430 | |||
| 431 | return res; | ||
| 432 | } | ||
| 433 | |||
| 434 | /* | ||
| 435 | * this routine handles the overflow of the microsecond field | ||
| 436 | * | ||
| 437 | * The tricky bits of code to handle the accurate clock support | ||
| 438 | * were provided by Dave Mills (Mills@UDEL.EDU) of NTP fame. | ||
| 439 | * They were originally developed for SUN and DEC kernels. | ||
| 440 | * All the kudos should go to Dave for this stuff. | ||
| 441 | */ | ||
| 442 | void second_overflow(void) | ||
| 443 | { | ||
| 444 | s64 delta; | ||
| 445 | unsigned long flags; | ||
| 446 | |||
| 447 | spin_lock_irqsave(&ntp_lock, flags); | ||
| 448 | 439 | ||
| 449 | /* Bump the maxerror field */ | 440 | /* Bump the maxerror field */ |
| 450 | time_maxerror += MAXFREQ / NSEC_PER_USEC; | 441 | time_maxerror += MAXFREQ / NSEC_PER_USEC; |
| @@ -481,15 +472,17 @@ void second_overflow(void) | |||
| 481 | tick_length += (s64)(time_adjust * NSEC_PER_USEC / NTP_INTERVAL_FREQ) | 472 | tick_length += (s64)(time_adjust * NSEC_PER_USEC / NTP_INTERVAL_FREQ) |
| 482 | << NTP_SCALE_SHIFT; | 473 | << NTP_SCALE_SHIFT; |
| 483 | time_adjust = 0; | 474 | time_adjust = 0; |
| 475 | |||
| 476 | |||
| 477 | |||
| 484 | out: | 478 | out: |
| 485 | spin_unlock_irqrestore(&ntp_lock, flags); | 479 | spin_unlock_irqrestore(&ntp_lock, flags); |
| 480 | |||
| 481 | return leap; | ||
| 486 | } | 482 | } |
| 487 | 483 | ||
| 488 | #ifdef CONFIG_GENERIC_CMOS_UPDATE | 484 | #ifdef CONFIG_GENERIC_CMOS_UPDATE |
| 489 | 485 | ||
| 490 | /* Disable the cmos update - used by virtualization and embedded */ | ||
| 491 | int no_sync_cmos_clock __read_mostly; | ||
| 492 | |||
| 493 | static void sync_cmos_clock(struct work_struct *work); | 486 | static void sync_cmos_clock(struct work_struct *work); |
| 494 | 487 | ||
| 495 | static DECLARE_DELAYED_WORK(sync_cmos_work, sync_cmos_clock); | 488 | static DECLARE_DELAYED_WORK(sync_cmos_work, sync_cmos_clock); |
| @@ -536,35 +529,13 @@ static void sync_cmos_clock(struct work_struct *work) | |||
| 536 | 529 | ||
| 537 | static void notify_cmos_timer(void) | 530 | static void notify_cmos_timer(void) |
| 538 | { | 531 | { |
| 539 | if (!no_sync_cmos_clock) | 532 | schedule_delayed_work(&sync_cmos_work, 0); |
| 540 | schedule_delayed_work(&sync_cmos_work, 0); | ||
| 541 | } | 533 | } |
| 542 | 534 | ||
| 543 | #else | 535 | #else |
| 544 | static inline void notify_cmos_timer(void) { } | 536 | static inline void notify_cmos_timer(void) { } |
| 545 | #endif | 537 | #endif |
| 546 | 538 | ||
| 547 | /* | ||
| 548 | * Start the leap seconds timer: | ||
| 549 | */ | ||
| 550 | static inline void ntp_start_leap_timer(struct timespec *ts) | ||
| 551 | { | ||
| 552 | long now = ts->tv_sec; | ||
| 553 | |||
| 554 | if (time_status & STA_INS) { | ||
| 555 | time_state = TIME_INS; | ||
| 556 | now += 86400 - now % 86400; | ||
| 557 | hrtimer_start(&leap_timer, ktime_set(now, 0), HRTIMER_MODE_ABS); | ||
| 558 | |||
| 559 | return; | ||
| 560 | } | ||
| 561 | |||
| 562 | if (time_status & STA_DEL) { | ||
| 563 | time_state = TIME_DEL; | ||
| 564 | now += 86400 - (now + 1) % 86400; | ||
| 565 | hrtimer_start(&leap_timer, ktime_set(now, 0), HRTIMER_MODE_ABS); | ||
| 566 | } | ||
| 567 | } | ||
| 568 | 539 | ||
| 569 | /* | 540 | /* |
| 570 | * Propagate a new txc->status value into the NTP state: | 541 | * Propagate a new txc->status value into the NTP state: |
| @@ -589,22 +560,6 @@ static inline void process_adj_status(struct timex *txc, struct timespec *ts) | |||
| 589 | time_status &= STA_RONLY; | 560 | time_status &= STA_RONLY; |
| 590 | time_status |= txc->status & ~STA_RONLY; | 561 | time_status |= txc->status & ~STA_RONLY; |
| 591 | 562 | ||
| 592 | switch (time_state) { | ||
| 593 | case TIME_OK: | ||
| 594 | ntp_start_leap_timer(ts); | ||
| 595 | break; | ||
| 596 | case TIME_INS: | ||
| 597 | case TIME_DEL: | ||
| 598 | time_state = TIME_OK; | ||
| 599 | ntp_start_leap_timer(ts); | ||
| 600 | case TIME_WAIT: | ||
| 601 | if (!(time_status & (STA_INS | STA_DEL))) | ||
| 602 | time_state = TIME_OK; | ||
| 603 | break; | ||
| 604 | case TIME_OOP: | ||
| 605 | hrtimer_restart(&leap_timer); | ||
| 606 | break; | ||
| 607 | } | ||
| 608 | } | 563 | } |
| 609 | /* | 564 | /* |
| 610 | * Called with the xtime lock held, so we can access and modify | 565 | * Called with the xtime lock held, so we can access and modify |
| @@ -686,9 +641,6 @@ int do_adjtimex(struct timex *txc) | |||
| 686 | (txc->tick < 900000/USER_HZ || | 641 | (txc->tick < 900000/USER_HZ || |
| 687 | txc->tick > 1100000/USER_HZ)) | 642 | txc->tick > 1100000/USER_HZ)) |
| 688 | return -EINVAL; | 643 | return -EINVAL; |
| 689 | |||
| 690 | if (txc->modes & ADJ_STATUS && time_state != TIME_OK) | ||
| 691 | hrtimer_cancel(&leap_timer); | ||
| 692 | } | 644 | } |
| 693 | 645 | ||
| 694 | if (txc->modes & ADJ_SETOFFSET) { | 646 | if (txc->modes & ADJ_SETOFFSET) { |
| @@ -1010,6 +962,4 @@ __setup("ntp_tick_adj=", ntp_tick_adj_setup); | |||
| 1010 | void __init ntp_init(void) | 962 | void __init ntp_init(void) |
| 1011 | { | 963 | { |
| 1012 | ntp_clear(); | 964 | ntp_clear(); |
| 1013 | hrtimer_init(&leap_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS); | ||
| 1014 | leap_timer.function = ntp_leap_second; | ||
| 1015 | } | 965 | } |
diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index e883f57a3cd3..bf57abdc7bd0 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c | |||
| @@ -575,10 +575,12 @@ void tick_broadcast_switch_to_oneshot(void) | |||
| 575 | unsigned long flags; | 575 | unsigned long flags; |
| 576 | 576 | ||
| 577 | raw_spin_lock_irqsave(&tick_broadcast_lock, flags); | 577 | raw_spin_lock_irqsave(&tick_broadcast_lock, flags); |
| 578 | |||
| 579 | tick_broadcast_device.mode = TICKDEV_MODE_ONESHOT; | ||
| 580 | |||
| 578 | if (cpumask_empty(tick_get_broadcast_mask())) | 581 | if (cpumask_empty(tick_get_broadcast_mask())) |
| 579 | goto end; | 582 | goto end; |
| 580 | 583 | ||
| 581 | tick_broadcast_device.mode = TICKDEV_MODE_ONESHOT; | ||
| 582 | bc = tick_broadcast_device.evtdev; | 584 | bc = tick_broadcast_device.evtdev; |
| 583 | if (bc) | 585 | if (bc) |
| 584 | tick_broadcast_setup_oneshot(bc); | 586 | tick_broadcast_setup_oneshot(bc); |
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 3526038f2836..6a3a5b9ff561 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c | |||
| @@ -534,9 +534,9 @@ static void tick_nohz_restart(struct tick_sched *ts, ktime_t now) | |||
| 534 | hrtimer_get_expires(&ts->sched_timer), 0)) | 534 | hrtimer_get_expires(&ts->sched_timer), 0)) |
| 535 | break; | 535 | break; |
| 536 | } | 536 | } |
| 537 | /* Update jiffies and reread time */ | 537 | /* Reread time and update jiffies */ |
| 538 | tick_do_update_jiffies64(now); | ||
| 539 | now = ktime_get(); | 538 | now = ktime_get(); |
| 539 | tick_do_update_jiffies64(now); | ||
| 540 | } | 540 | } |
| 541 | } | 541 | } |
| 542 | 542 | ||
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 403c2a092830..d66b21308f7c 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c | |||
| @@ -184,18 +184,6 @@ static void timekeeping_update(bool clearntp) | |||
| 184 | } | 184 | } |
| 185 | 185 | ||
| 186 | 186 | ||
| 187 | void timekeeping_leap_insert(int leapsecond) | ||
| 188 | { | ||
| 189 | unsigned long flags; | ||
| 190 | |||
| 191 | write_seqlock_irqsave(&timekeeper.lock, flags); | ||
| 192 | timekeeper.xtime.tv_sec += leapsecond; | ||
| 193 | timekeeper.wall_to_monotonic.tv_sec -= leapsecond; | ||
| 194 | timekeeping_update(false); | ||
| 195 | write_sequnlock_irqrestore(&timekeeper.lock, flags); | ||
| 196 | |||
| 197 | } | ||
| 198 | |||
| 199 | /** | 187 | /** |
| 200 | * timekeeping_forward_now - update clock to the current time | 188 | * timekeeping_forward_now - update clock to the current time |
| 201 | * | 189 | * |
| @@ -448,9 +436,12 @@ EXPORT_SYMBOL(timekeeping_inject_offset); | |||
| 448 | static int change_clocksource(void *data) | 436 | static int change_clocksource(void *data) |
| 449 | { | 437 | { |
| 450 | struct clocksource *new, *old; | 438 | struct clocksource *new, *old; |
| 439 | unsigned long flags; | ||
| 451 | 440 | ||
| 452 | new = (struct clocksource *) data; | 441 | new = (struct clocksource *) data; |
| 453 | 442 | ||
| 443 | write_seqlock_irqsave(&timekeeper.lock, flags); | ||
| 444 | |||
| 454 | timekeeping_forward_now(); | 445 | timekeeping_forward_now(); |
| 455 | if (!new->enable || new->enable(new) == 0) { | 446 | if (!new->enable || new->enable(new) == 0) { |
| 456 | old = timekeeper.clock; | 447 | old = timekeeper.clock; |
| @@ -458,6 +449,10 @@ static int change_clocksource(void *data) | |||
| 458 | if (old->disable) | 449 | if (old->disable) |
| 459 | old->disable(old); | 450 | old->disable(old); |
| 460 | } | 451 | } |
| 452 | timekeeping_update(true); | ||
| 453 | |||
| 454 | write_sequnlock_irqrestore(&timekeeper.lock, flags); | ||
| 455 | |||
| 461 | return 0; | 456 | return 0; |
| 462 | } | 457 | } |
| 463 | 458 | ||
| @@ -827,7 +822,7 @@ static void timekeeping_adjust(s64 offset) | |||
| 827 | int adj; | 822 | int adj; |
| 828 | 823 | ||
| 829 | /* | 824 | /* |
| 830 | * The point of this is to check if the error is greater then half | 825 | * The point of this is to check if the error is greater than half |
| 831 | * an interval. | 826 | * an interval. |
| 832 | * | 827 | * |
| 833 | * First we shift it down from NTP_SHIFT to clocksource->shifted nsecs. | 828 | * First we shift it down from NTP_SHIFT to clocksource->shifted nsecs. |
| @@ -835,7 +830,7 @@ static void timekeeping_adjust(s64 offset) | |||
| 835 | * Note we subtract one in the shift, so that error is really error*2. | 830 | * Note we subtract one in the shift, so that error is really error*2. |
| 836 | * This "saves" dividing(shifting) interval twice, but keeps the | 831 | * This "saves" dividing(shifting) interval twice, but keeps the |
| 837 | * (error > interval) comparison as still measuring if error is | 832 | * (error > interval) comparison as still measuring if error is |
| 838 | * larger then half an interval. | 833 | * larger than half an interval. |
| 839 | * | 834 | * |
| 840 | * Note: It does not "save" on aggravation when reading the code. | 835 | * Note: It does not "save" on aggravation when reading the code. |
| 841 | */ | 836 | */ |
| @@ -843,7 +838,7 @@ static void timekeeping_adjust(s64 offset) | |||
| 843 | if (error > interval) { | 838 | if (error > interval) { |
| 844 | /* | 839 | /* |
| 845 | * We now divide error by 4(via shift), which checks if | 840 | * We now divide error by 4(via shift), which checks if |
| 846 | * the error is greater then twice the interval. | 841 | * the error is greater than twice the interval. |
| 847 | * If it is greater, we need a bigadjust, if its smaller, | 842 | * If it is greater, we need a bigadjust, if its smaller, |
| 848 | * we can adjust by 1. | 843 | * we can adjust by 1. |
| 849 | */ | 844 | */ |
| @@ -874,13 +869,15 @@ static void timekeeping_adjust(s64 offset) | |||
| 874 | } else /* No adjustment needed */ | 869 | } else /* No adjustment needed */ |
| 875 | return; | 870 | return; |
| 876 | 871 | ||
| 877 | WARN_ONCE(timekeeper.clock->maxadj && | 872 | if (unlikely(timekeeper.clock->maxadj && |
| 878 | (timekeeper.mult + adj > timekeeper.clock->mult + | 873 | (timekeeper.mult + adj > |
| 879 | timekeeper.clock->maxadj), | 874 | timekeeper.clock->mult + timekeeper.clock->maxadj))) { |
| 880 | "Adjusting %s more then 11%% (%ld vs %ld)\n", | 875 | printk_once(KERN_WARNING |
| 876 | "Adjusting %s more than 11%% (%ld vs %ld)\n", | ||
| 881 | timekeeper.clock->name, (long)timekeeper.mult + adj, | 877 | timekeeper.clock->name, (long)timekeeper.mult + adj, |
| 882 | (long)timekeeper.clock->mult + | 878 | (long)timekeeper.clock->mult + |
| 883 | timekeeper.clock->maxadj); | 879 | timekeeper.clock->maxadj); |
| 880 | } | ||
| 884 | /* | 881 | /* |
| 885 | * So the following can be confusing. | 882 | * So the following can be confusing. |
| 886 | * | 883 | * |
| @@ -952,7 +949,7 @@ static cycle_t logarithmic_accumulation(cycle_t offset, int shift) | |||
| 952 | u64 nsecps = (u64)NSEC_PER_SEC << timekeeper.shift; | 949 | u64 nsecps = (u64)NSEC_PER_SEC << timekeeper.shift; |
| 953 | u64 raw_nsecs; | 950 | u64 raw_nsecs; |
| 954 | 951 | ||
| 955 | /* If the offset is smaller then a shifted interval, do nothing */ | 952 | /* If the offset is smaller than a shifted interval, do nothing */ |
| 956 | if (offset < timekeeper.cycle_interval<<shift) | 953 | if (offset < timekeeper.cycle_interval<<shift) |
| 957 | return offset; | 954 | return offset; |
| 958 | 955 | ||
| @@ -962,9 +959,11 @@ static cycle_t logarithmic_accumulation(cycle_t offset, int shift) | |||
| 962 | 959 | ||
| 963 | timekeeper.xtime_nsec += timekeeper.xtime_interval << shift; | 960 | timekeeper.xtime_nsec += timekeeper.xtime_interval << shift; |
| 964 | while (timekeeper.xtime_nsec >= nsecps) { | 961 | while (timekeeper.xtime_nsec >= nsecps) { |
| 962 | int leap; | ||
| 965 | timekeeper.xtime_nsec -= nsecps; | 963 | timekeeper.xtime_nsec -= nsecps; |
| 966 | timekeeper.xtime.tv_sec++; | 964 | timekeeper.xtime.tv_sec++; |
| 967 | second_overflow(); | 965 | leap = second_overflow(timekeeper.xtime.tv_sec); |
| 966 | timekeeper.xtime.tv_sec += leap; | ||
| 968 | } | 967 | } |
| 969 | 968 | ||
| 970 | /* Accumulate raw time */ | 969 | /* Accumulate raw time */ |
| @@ -1018,13 +1017,13 @@ static void update_wall_time(void) | |||
| 1018 | * With NO_HZ we may have to accumulate many cycle_intervals | 1017 | * With NO_HZ we may have to accumulate many cycle_intervals |
| 1019 | * (think "ticks") worth of time at once. To do this efficiently, | 1018 | * (think "ticks") worth of time at once. To do this efficiently, |
| 1020 | * we calculate the largest doubling multiple of cycle_intervals | 1019 | * we calculate the largest doubling multiple of cycle_intervals |
| 1021 | * that is smaller then the offset. We then accumulate that | 1020 | * that is smaller than the offset. We then accumulate that |
| 1022 | * chunk in one go, and then try to consume the next smaller | 1021 | * chunk in one go, and then try to consume the next smaller |
| 1023 | * doubled multiple. | 1022 | * doubled multiple. |
| 1024 | */ | 1023 | */ |
| 1025 | shift = ilog2(offset) - ilog2(timekeeper.cycle_interval); | 1024 | shift = ilog2(offset) - ilog2(timekeeper.cycle_interval); |
| 1026 | shift = max(0, shift); | 1025 | shift = max(0, shift); |
| 1027 | /* Bound shift to one less then what overflows tick_length */ | 1026 | /* Bound shift to one less than what overflows tick_length */ |
| 1028 | maxshift = (64 - (ilog2(ntp_tick_length())+1)) - 1; | 1027 | maxshift = (64 - (ilog2(ntp_tick_length())+1)) - 1; |
| 1029 | shift = min(shift, maxshift); | 1028 | shift = min(shift, maxshift); |
| 1030 | while (offset >= timekeeper.cycle_interval) { | 1029 | while (offset >= timekeeper.cycle_interval) { |
| @@ -1072,12 +1071,14 @@ static void update_wall_time(void) | |||
| 1072 | 1071 | ||
| 1073 | /* | 1072 | /* |
| 1074 | * Finally, make sure that after the rounding | 1073 | * Finally, make sure that after the rounding |
| 1075 | * xtime.tv_nsec isn't larger then NSEC_PER_SEC | 1074 | * xtime.tv_nsec isn't larger than NSEC_PER_SEC |
| 1076 | */ | 1075 | */ |
| 1077 | if (unlikely(timekeeper.xtime.tv_nsec >= NSEC_PER_SEC)) { | 1076 | if (unlikely(timekeeper.xtime.tv_nsec >= NSEC_PER_SEC)) { |
| 1077 | int leap; | ||
| 1078 | timekeeper.xtime.tv_nsec -= NSEC_PER_SEC; | 1078 | timekeeper.xtime.tv_nsec -= NSEC_PER_SEC; |
| 1079 | timekeeper.xtime.tv_sec++; | 1079 | timekeeper.xtime.tv_sec++; |
| 1080 | second_overflow(); | 1080 | leap = second_overflow(timekeeper.xtime.tv_sec); |
| 1081 | timekeeper.xtime.tv_sec += leap; | ||
| 1081 | } | 1082 | } |
| 1082 | 1083 | ||
| 1083 | timekeeping_update(false); | 1084 | timekeeping_update(false); |
| @@ -1260,6 +1261,8 @@ ktime_t ktime_get_monotonic_offset(void) | |||
| 1260 | 1261 | ||
| 1261 | return timespec_to_ktime(wtom); | 1262 | return timespec_to_ktime(wtom); |
| 1262 | } | 1263 | } |
| 1264 | EXPORT_SYMBOL_GPL(ktime_get_monotonic_offset); | ||
| 1265 | |||
| 1263 | 1266 | ||
| 1264 | /** | 1267 | /** |
| 1265 | * xtime_update() - advances the timekeeping infrastructure | 1268 | * xtime_update() - advances the timekeeping infrastructure |
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index cd3134510f3d..a1d2849f2473 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig | |||
| @@ -141,7 +141,7 @@ if FTRACE | |||
| 141 | config FUNCTION_TRACER | 141 | config FUNCTION_TRACER |
| 142 | bool "Kernel Function Tracer" | 142 | bool "Kernel Function Tracer" |
| 143 | depends on HAVE_FUNCTION_TRACER | 143 | depends on HAVE_FUNCTION_TRACER |
| 144 | select FRAME_POINTER if !ARM_UNWIND && !S390 && !MICROBLAZE | 144 | select FRAME_POINTER if !ARM_UNWIND && !PPC && !S390 && !MICROBLAZE |
| 145 | select KALLSYMS | 145 | select KALLSYMS |
| 146 | select GENERIC_TRACER | 146 | select GENERIC_TRACER |
| 147 | select CONTEXT_SWITCH_TRACER | 147 | select CONTEXT_SWITCH_TRACER |
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index cdea7b56b0c9..c0bd0308741c 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c | |||
| @@ -311,13 +311,6 @@ int blk_trace_remove(struct request_queue *q) | |||
| 311 | } | 311 | } |
| 312 | EXPORT_SYMBOL_GPL(blk_trace_remove); | 312 | EXPORT_SYMBOL_GPL(blk_trace_remove); |
| 313 | 313 | ||
| 314 | static int blk_dropped_open(struct inode *inode, struct file *filp) | ||
| 315 | { | ||
| 316 | filp->private_data = inode->i_private; | ||
| 317 | |||
| 318 | return 0; | ||
| 319 | } | ||
| 320 | |||
| 321 | static ssize_t blk_dropped_read(struct file *filp, char __user *buffer, | 314 | static ssize_t blk_dropped_read(struct file *filp, char __user *buffer, |
| 322 | size_t count, loff_t *ppos) | 315 | size_t count, loff_t *ppos) |
| 323 | { | 316 | { |
| @@ -331,18 +324,11 @@ static ssize_t blk_dropped_read(struct file *filp, char __user *buffer, | |||
| 331 | 324 | ||
| 332 | static const struct file_operations blk_dropped_fops = { | 325 | static const struct file_operations blk_dropped_fops = { |
| 333 | .owner = THIS_MODULE, | 326 | .owner = THIS_MODULE, |
| 334 | .open = blk_dropped_open, | 327 | .open = simple_open, |
| 335 | .read = blk_dropped_read, | 328 | .read = blk_dropped_read, |
| 336 | .llseek = default_llseek, | 329 | .llseek = default_llseek, |
| 337 | }; | 330 | }; |
| 338 | 331 | ||
| 339 | static int blk_msg_open(struct inode *inode, struct file *filp) | ||
| 340 | { | ||
| 341 | filp->private_data = inode->i_private; | ||
| 342 | |||
| 343 | return 0; | ||
| 344 | } | ||
| 345 | |||
| 346 | static ssize_t blk_msg_write(struct file *filp, const char __user *buffer, | 332 | static ssize_t blk_msg_write(struct file *filp, const char __user *buffer, |
| 347 | size_t count, loff_t *ppos) | 333 | size_t count, loff_t *ppos) |
| 348 | { | 334 | { |
| @@ -371,7 +357,7 @@ static ssize_t blk_msg_write(struct file *filp, const char __user *buffer, | |||
| 371 | 357 | ||
| 372 | static const struct file_operations blk_msg_fops = { | 358 | static const struct file_operations blk_msg_fops = { |
| 373 | .owner = THIS_MODULE, | 359 | .owner = THIS_MODULE, |
| 374 | .open = blk_msg_open, | 360 | .open = simple_open, |
| 375 | .write = blk_msg_write, | 361 | .write = blk_msg_write, |
| 376 | .llseek = noop_llseek, | 362 | .llseek = noop_llseek, |
| 377 | }; | 363 | }; |
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 867bd1dd2dd0..0fa92f677c92 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c | |||
| @@ -249,7 +249,8 @@ static void update_ftrace_function(void) | |||
| 249 | #else | 249 | #else |
| 250 | __ftrace_trace_function = func; | 250 | __ftrace_trace_function = func; |
| 251 | #endif | 251 | #endif |
| 252 | ftrace_trace_function = ftrace_test_stop_func; | 252 | ftrace_trace_function = |
| 253 | (func == ftrace_stub) ? func : ftrace_test_stop_func; | ||
| 253 | #endif | 254 | #endif |
| 254 | } | 255 | } |
| 255 | 256 | ||
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index f5b7b5c1195b..cf8d11e91efd 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c | |||
| @@ -154,33 +154,10 @@ enum { | |||
| 154 | 154 | ||
| 155 | static unsigned long ring_buffer_flags __read_mostly = RB_BUFFERS_ON; | 155 | static unsigned long ring_buffer_flags __read_mostly = RB_BUFFERS_ON; |
| 156 | 156 | ||
| 157 | #define BUF_PAGE_HDR_SIZE offsetof(struct buffer_data_page, data) | 157 | /* Used for individual buffers (after the counter) */ |
| 158 | 158 | #define RB_BUFFER_OFF (1 << 20) | |
| 159 | /** | ||
| 160 | * tracing_on - enable all tracing buffers | ||
| 161 | * | ||
| 162 | * This function enables all tracing buffers that may have been | ||
| 163 | * disabled with tracing_off. | ||
| 164 | */ | ||
| 165 | void tracing_on(void) | ||
| 166 | { | ||
| 167 | set_bit(RB_BUFFERS_ON_BIT, &ring_buffer_flags); | ||
| 168 | } | ||
| 169 | EXPORT_SYMBOL_GPL(tracing_on); | ||
| 170 | 159 | ||
| 171 | /** | 160 | #define BUF_PAGE_HDR_SIZE offsetof(struct buffer_data_page, data) |
| 172 | * tracing_off - turn off all tracing buffers | ||
| 173 | * | ||
| 174 | * This function stops all tracing buffers from recording data. | ||
| 175 | * It does not disable any overhead the tracers themselves may | ||
| 176 | * be causing. This function simply causes all recording to | ||
| 177 | * the ring buffers to fail. | ||
| 178 | */ | ||
| 179 | void tracing_off(void) | ||
| 180 | { | ||
| 181 | clear_bit(RB_BUFFERS_ON_BIT, &ring_buffer_flags); | ||
| 182 | } | ||
| 183 | EXPORT_SYMBOL_GPL(tracing_off); | ||
| 184 | 161 | ||
| 185 | /** | 162 | /** |
| 186 | * tracing_off_permanent - permanently disable ring buffers | 163 | * tracing_off_permanent - permanently disable ring buffers |
| @@ -193,15 +170,6 @@ void tracing_off_permanent(void) | |||
| 193 | set_bit(RB_BUFFERS_DISABLED_BIT, &ring_buffer_flags); | 170 | set_bit(RB_BUFFERS_DISABLED_BIT, &ring_buffer_flags); |
| 194 | } | 171 | } |
| 195 | 172 | ||
| 196 | /** | ||
| 197 | * tracing_is_on - show state of ring buffers enabled | ||
| 198 | */ | ||
| 199 | int tracing_is_on(void) | ||
| 200 | { | ||
| 201 | return ring_buffer_flags == RB_BUFFERS_ON; | ||
| 202 | } | ||
| 203 | EXPORT_SYMBOL_GPL(tracing_is_on); | ||
| 204 | |||
| 205 | #define RB_EVNT_HDR_SIZE (offsetof(struct ring_buffer_event, array)) | 173 | #define RB_EVNT_HDR_SIZE (offsetof(struct ring_buffer_event, array)) |
| 206 | #define RB_ALIGNMENT 4U | 174 | #define RB_ALIGNMENT 4U |
| 207 | #define RB_MAX_SMALL_DATA (RB_ALIGNMENT * RINGBUF_TYPE_DATA_TYPE_LEN_MAX) | 175 | #define RB_MAX_SMALL_DATA (RB_ALIGNMENT * RINGBUF_TYPE_DATA_TYPE_LEN_MAX) |
| @@ -2619,6 +2587,63 @@ void ring_buffer_record_enable(struct ring_buffer *buffer) | |||
| 2619 | EXPORT_SYMBOL_GPL(ring_buffer_record_enable); | 2587 | EXPORT_SYMBOL_GPL(ring_buffer_record_enable); |
| 2620 | 2588 | ||
| 2621 | /** | 2589 | /** |
| 2590 | * ring_buffer_record_off - stop all writes into the buffer | ||
| 2591 | * @buffer: The ring buffer to stop writes to. | ||
| 2592 | * | ||
| 2593 | * This prevents all writes to the buffer. Any attempt to write | ||
| 2594 | * to the buffer after this will fail and return NULL. | ||
| 2595 | * | ||
| 2596 | * This is different than ring_buffer_record_disable() as | ||
| 2597 | * it works like an on/off switch, where as the disable() verison | ||
| 2598 | * must be paired with a enable(). | ||
| 2599 | */ | ||
| 2600 | void ring_buffer_record_off(struct ring_buffer *buffer) | ||
| 2601 | { | ||
| 2602 | unsigned int rd; | ||
| 2603 | unsigned int new_rd; | ||
| 2604 | |||
| 2605 | do { | ||
| 2606 | rd = atomic_read(&buffer->record_disabled); | ||
| 2607 | new_rd = rd | RB_BUFFER_OFF; | ||
| 2608 | } while (atomic_cmpxchg(&buffer->record_disabled, rd, new_rd) != rd); | ||
| 2609 | } | ||
| 2610 | EXPORT_SYMBOL_GPL(ring_buffer_record_off); | ||
| 2611 | |||
| 2612 | /** | ||
| 2613 | * ring_buffer_record_on - restart writes into the buffer | ||
| 2614 | * @buffer: The ring buffer to start writes to. | ||
| 2615 | * | ||
| 2616 | * This enables all writes to the buffer that was disabled by | ||
| 2617 | * ring_buffer_record_off(). | ||
| 2618 | * | ||
| 2619 | * This is different than ring_buffer_record_enable() as | ||
| 2620 | * it works like an on/off switch, where as the enable() verison | ||
| 2621 | * must be paired with a disable(). | ||
| 2622 | */ | ||
| 2623 | void ring_buffer_record_on(struct ring_buffer *buffer) | ||
| 2624 | { | ||
| 2625 | unsigned int rd; | ||
| 2626 | unsigned int new_rd; | ||
| 2627 | |||
| 2628 | do { | ||
| 2629 | rd = atomic_read(&buffer->record_disabled); | ||
| 2630 | new_rd = rd & ~RB_BUFFER_OFF; | ||
| 2631 | } while (atomic_cmpxchg(&buffer->record_disabled, rd, new_rd) != rd); | ||
| 2632 | } | ||
| 2633 | EXPORT_SYMBOL_GPL(ring_buffer_record_on); | ||
| 2634 | |||
| 2635 | /** | ||
| 2636 | * ring_buffer_record_is_on - return true if the ring buffer can write | ||
| 2637 | * @buffer: The ring buffer to see if write is enabled | ||
| 2638 | * | ||
| 2639 | * Returns true if the ring buffer is in a state that it accepts writes. | ||
| 2640 | */ | ||
| 2641 | int ring_buffer_record_is_on(struct ring_buffer *buffer) | ||
| 2642 | { | ||
| 2643 | return !atomic_read(&buffer->record_disabled); | ||
| 2644 | } | ||
| 2645 | |||
| 2646 | /** | ||
| 2622 | * ring_buffer_record_disable_cpu - stop all writes into the cpu_buffer | 2647 | * ring_buffer_record_disable_cpu - stop all writes into the cpu_buffer |
| 2623 | * @buffer: The ring buffer to stop writes to. | 2648 | * @buffer: The ring buffer to stop writes to. |
| 2624 | * @cpu: The CPU buffer to stop | 2649 | * @cpu: The CPU buffer to stop |
| @@ -4039,68 +4064,6 @@ int ring_buffer_read_page(struct ring_buffer *buffer, | |||
| 4039 | } | 4064 | } |
| 4040 | EXPORT_SYMBOL_GPL(ring_buffer_read_page); | 4065 | EXPORT_SYMBOL_GPL(ring_buffer_read_page); |
| 4041 | 4066 | ||
| 4042 | #ifdef CONFIG_TRACING | ||
| 4043 | static ssize_t | ||
| 4044 | rb_simple_read(struct file *filp, char __user *ubuf, | ||
| 4045 | size_t cnt, loff_t *ppos) | ||
| 4046 | { | ||
| 4047 | unsigned long *p = filp->private_data; | ||
| 4048 | char buf[64]; | ||
| 4049 | int r; | ||
| 4050 | |||
| 4051 | if (test_bit(RB_BUFFERS_DISABLED_BIT, p)) | ||
| 4052 | r = sprintf(buf, "permanently disabled\n"); | ||
| 4053 | else | ||
| 4054 | r = sprintf(buf, "%d\n", test_bit(RB_BUFFERS_ON_BIT, p)); | ||
| 4055 | |||
| 4056 | return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); | ||
| 4057 | } | ||
| 4058 | |||
| 4059 | static ssize_t | ||
| 4060 | rb_simple_write(struct file *filp, const char __user *ubuf, | ||
| 4061 | size_t cnt, loff_t *ppos) | ||
| 4062 | { | ||
| 4063 | unsigned long *p = filp->private_data; | ||
| 4064 | unsigned long val; | ||
| 4065 | int ret; | ||
| 4066 | |||
| 4067 | ret = kstrtoul_from_user(ubuf, cnt, 10, &val); | ||
| 4068 | if (ret) | ||
| 4069 | return ret; | ||
| 4070 | |||
| 4071 | if (val) | ||
| 4072 | set_bit(RB_BUFFERS_ON_BIT, p); | ||
| 4073 | else | ||
| 4074 | clear_bit(RB_BUFFERS_ON_BIT, p); | ||
| 4075 | |||
| 4076 | (*ppos)++; | ||
| 4077 | |||
| 4078 | return cnt; | ||
| 4079 | } | ||
| 4080 | |||
| 4081 | static const struct file_operations rb_simple_fops = { | ||
| 4082 | .open = tracing_open_generic, | ||
| 4083 | .read = rb_simple_read, | ||
| 4084 | .write = rb_simple_write, | ||
| 4085 | .llseek = default_llseek, | ||
| 4086 | }; | ||
| 4087 | |||
| 4088 | |||
| 4089 | static __init int rb_init_debugfs(void) | ||
| 4090 | { | ||
| 4091 | struct dentry *d_tracer; | ||
| 4092 | |||
| 4093 | d_tracer = tracing_init_dentry(); | ||
| 4094 | |||
| 4095 | trace_create_file("tracing_on", 0644, d_tracer, | ||
| 4096 | &ring_buffer_flags, &rb_simple_fops); | ||
| 4097 | |||
| 4098 | return 0; | ||
| 4099 | } | ||
| 4100 | |||
| 4101 | fs_initcall(rb_init_debugfs); | ||
| 4102 | #endif | ||
| 4103 | |||
| 4104 | #ifdef CONFIG_HOTPLUG_CPU | 4067 | #ifdef CONFIG_HOTPLUG_CPU |
| 4105 | static int rb_cpu_notify(struct notifier_block *self, | 4068 | static int rb_cpu_notify(struct notifier_block *self, |
| 4106 | unsigned long action, void *hcpu) | 4069 | unsigned long action, void *hcpu) |
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 10d5503f0d04..ed7b5d1e12f4 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c | |||
| @@ -36,6 +36,7 @@ | |||
| 36 | #include <linux/ctype.h> | 36 | #include <linux/ctype.h> |
| 37 | #include <linux/init.h> | 37 | #include <linux/init.h> |
| 38 | #include <linux/poll.h> | 38 | #include <linux/poll.h> |
| 39 | #include <linux/nmi.h> | ||
| 39 | #include <linux/fs.h> | 40 | #include <linux/fs.h> |
| 40 | 41 | ||
| 41 | #include "trace.h" | 42 | #include "trace.h" |
| @@ -352,6 +353,59 @@ static void wakeup_work_handler(struct work_struct *work) | |||
| 352 | static DECLARE_DELAYED_WORK(wakeup_work, wakeup_work_handler); | 353 | static DECLARE_DELAYED_WORK(wakeup_work, wakeup_work_handler); |
| 353 | 354 | ||
| 354 | /** | 355 | /** |
| 356 | * tracing_on - enable tracing buffers | ||
| 357 | * | ||
| 358 | * This function enables tracing buffers that may have been | ||
| 359 | * disabled with tracing_off. | ||
| 360 | */ | ||
| 361 | void tracing_on(void) | ||
| 362 | { | ||
| 363 | if (global_trace.buffer) | ||
| 364 | ring_buffer_record_on(global_trace.buffer); | ||
| 365 | /* | ||
| 366 | * This flag is only looked at when buffers haven't been | ||
| 367 | * allocated yet. We don't really care about the race | ||
| 368 | * between setting this flag and actually turning | ||
| 369 | * on the buffer. | ||
| 370 | */ | ||
| 371 | global_trace.buffer_disabled = 0; | ||
| 372 | } | ||
| 373 | EXPORT_SYMBOL_GPL(tracing_on); | ||
| 374 | |||
| 375 | /** | ||
| 376 | * tracing_off - turn off tracing buffers | ||
| 377 | * | ||
| 378 | * This function stops the tracing buffers from recording data. | ||
| 379 | * It does not disable any overhead the tracers themselves may | ||
| 380 | * be causing. This function simply causes all recording to | ||
| 381 | * the ring buffers to fail. | ||
| 382 | */ | ||
| 383 | void tracing_off(void) | ||
| 384 | { | ||
| 385 | if (global_trace.buffer) | ||
| 386 | ring_buffer_record_on(global_trace.buffer); | ||
| 387 | /* | ||
| 388 | * This flag is only looked at when buffers haven't been | ||
| 389 | * allocated yet. We don't really care about the race | ||
| 390 | * between setting this flag and actually turning | ||
| 391 | * on the buffer. | ||
| 392 | */ | ||
| 393 | global_trace.buffer_disabled = 1; | ||
| 394 | } | ||
| 395 | EXPORT_SYMBOL_GPL(tracing_off); | ||
| 396 | |||
| 397 | /** | ||
| 398 | * tracing_is_on - show state of ring buffers enabled | ||
| 399 | */ | ||
| 400 | int tracing_is_on(void) | ||
| 401 | { | ||
| 402 | if (global_trace.buffer) | ||
| 403 | return ring_buffer_record_is_on(global_trace.buffer); | ||
| 404 | return !global_trace.buffer_disabled; | ||
| 405 | } | ||
| 406 | EXPORT_SYMBOL_GPL(tracing_is_on); | ||
| 407 | |||
| 408 | /** | ||
| 355 | * trace_wake_up - wake up tasks waiting for trace input | 409 | * trace_wake_up - wake up tasks waiting for trace input |
| 356 | * | 410 | * |
| 357 | * Schedules a delayed work to wake up any task that is blocked on the | 411 | * Schedules a delayed work to wake up any task that is blocked on the |
| @@ -1644,6 +1698,7 @@ __find_next_entry(struct trace_iterator *iter, int *ent_cpu, | |||
| 1644 | int cpu_file = iter->cpu_file; | 1698 | int cpu_file = iter->cpu_file; |
| 1645 | u64 next_ts = 0, ts; | 1699 | u64 next_ts = 0, ts; |
| 1646 | int next_cpu = -1; | 1700 | int next_cpu = -1; |
| 1701 | int next_size = 0; | ||
| 1647 | int cpu; | 1702 | int cpu; |
| 1648 | 1703 | ||
| 1649 | /* | 1704 | /* |
| @@ -1675,9 +1730,12 @@ __find_next_entry(struct trace_iterator *iter, int *ent_cpu, | |||
| 1675 | next_cpu = cpu; | 1730 | next_cpu = cpu; |
| 1676 | next_ts = ts; | 1731 | next_ts = ts; |
| 1677 | next_lost = lost_events; | 1732 | next_lost = lost_events; |
| 1733 | next_size = iter->ent_size; | ||
| 1678 | } | 1734 | } |
| 1679 | } | 1735 | } |
| 1680 | 1736 | ||
| 1737 | iter->ent_size = next_size; | ||
| 1738 | |||
| 1681 | if (ent_cpu) | 1739 | if (ent_cpu) |
| 1682 | *ent_cpu = next_cpu; | 1740 | *ent_cpu = next_cpu; |
| 1683 | 1741 | ||
| @@ -4567,6 +4625,55 @@ static __init void create_trace_options_dir(void) | |||
| 4567 | create_trace_option_core_file(trace_options[i], i); | 4625 | create_trace_option_core_file(trace_options[i], i); |
| 4568 | } | 4626 | } |
| 4569 | 4627 | ||
| 4628 | static ssize_t | ||
| 4629 | rb_simple_read(struct file *filp, char __user *ubuf, | ||
| 4630 | size_t cnt, loff_t *ppos) | ||
| 4631 | { | ||
| 4632 | struct ring_buffer *buffer = filp->private_data; | ||
| 4633 | char buf[64]; | ||
| 4634 | int r; | ||
| 4635 | |||
| 4636 | if (buffer) | ||
| 4637 | r = ring_buffer_record_is_on(buffer); | ||
| 4638 | else | ||
| 4639 | r = 0; | ||
| 4640 | |||
| 4641 | r = sprintf(buf, "%d\n", r); | ||
| 4642 | |||
| 4643 | return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); | ||
| 4644 | } | ||
| 4645 | |||
| 4646 | static ssize_t | ||
| 4647 | rb_simple_write(struct file *filp, const char __user *ubuf, | ||
| 4648 | size_t cnt, loff_t *ppos) | ||
| 4649 | { | ||
| 4650 | struct ring_buffer *buffer = filp->private_data; | ||
| 4651 | unsigned long val; | ||
| 4652 | int ret; | ||
| 4653 | |||
| 4654 | ret = kstrtoul_from_user(ubuf, cnt, 10, &val); | ||
| 4655 | if (ret) | ||
| 4656 | return ret; | ||
| 4657 | |||
| 4658 | if (buffer) { | ||
| 4659 | if (val) | ||
| 4660 | ring_buffer_record_on(buffer); | ||
| 4661 | else | ||
| 4662 | ring_buffer_record_off(buffer); | ||
| 4663 | } | ||
| 4664 | |||
| 4665 | (*ppos)++; | ||
| 4666 | |||
| 4667 | return cnt; | ||
| 4668 | } | ||
| 4669 | |||
| 4670 | static const struct file_operations rb_simple_fops = { | ||
| 4671 | .open = tracing_open_generic, | ||
| 4672 | .read = rb_simple_read, | ||
| 4673 | .write = rb_simple_write, | ||
| 4674 | .llseek = default_llseek, | ||
| 4675 | }; | ||
| 4676 | |||
| 4570 | static __init int tracer_init_debugfs(void) | 4677 | static __init int tracer_init_debugfs(void) |
| 4571 | { | 4678 | { |
| 4572 | struct dentry *d_tracer; | 4679 | struct dentry *d_tracer; |
| @@ -4626,6 +4733,9 @@ static __init int tracer_init_debugfs(void) | |||
| 4626 | trace_create_file("trace_clock", 0644, d_tracer, NULL, | 4733 | trace_create_file("trace_clock", 0644, d_tracer, NULL, |
| 4627 | &trace_clock_fops); | 4734 | &trace_clock_fops); |
| 4628 | 4735 | ||
| 4736 | trace_create_file("tracing_on", 0644, d_tracer, | ||
| 4737 | global_trace.buffer, &rb_simple_fops); | ||
| 4738 | |||
| 4629 | #ifdef CONFIG_DYNAMIC_FTRACE | 4739 | #ifdef CONFIG_DYNAMIC_FTRACE |
| 4630 | trace_create_file("dyn_ftrace_total_info", 0444, d_tracer, | 4740 | trace_create_file("dyn_ftrace_total_info", 0444, d_tracer, |
| 4631 | &ftrace_update_tot_cnt, &tracing_dyn_info_fops); | 4741 | &ftrace_update_tot_cnt, &tracing_dyn_info_fops); |
| @@ -4798,6 +4908,7 @@ __ftrace_dump(bool disable_tracing, enum ftrace_dump_mode oops_dump_mode) | |||
| 4798 | if (ret != TRACE_TYPE_NO_CONSUME) | 4908 | if (ret != TRACE_TYPE_NO_CONSUME) |
| 4799 | trace_consume(&iter); | 4909 | trace_consume(&iter); |
| 4800 | } | 4910 | } |
| 4911 | touch_nmi_watchdog(); | ||
| 4801 | 4912 | ||
| 4802 | trace_printk_seq(&iter.seq); | 4913 | trace_printk_seq(&iter.seq); |
| 4803 | } | 4914 | } |
| @@ -4863,6 +4974,8 @@ __init static int tracer_alloc_buffers(void) | |||
| 4863 | goto out_free_cpumask; | 4974 | goto out_free_cpumask; |
| 4864 | } | 4975 | } |
| 4865 | global_trace.entries = ring_buffer_size(global_trace.buffer); | 4976 | global_trace.entries = ring_buffer_size(global_trace.buffer); |
| 4977 | if (global_trace.buffer_disabled) | ||
| 4978 | tracing_off(); | ||
| 4866 | 4979 | ||
| 4867 | 4980 | ||
| 4868 | #ifdef CONFIG_TRACER_MAX_TRACE | 4981 | #ifdef CONFIG_TRACER_MAX_TRACE |
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 54faec790bc1..95059f091a24 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h | |||
| @@ -154,6 +154,7 @@ struct trace_array { | |||
| 154 | struct ring_buffer *buffer; | 154 | struct ring_buffer *buffer; |
| 155 | unsigned long entries; | 155 | unsigned long entries; |
| 156 | int cpu; | 156 | int cpu; |
| 157 | int buffer_disabled; | ||
| 157 | cycle_t time_start; | 158 | cycle_t time_start; |
| 158 | struct task_struct *waiter; | 159 | struct task_struct *waiter; |
| 159 | struct trace_array_cpu *data[NR_CPUS]; | 160 | struct trace_array_cpu *data[NR_CPUS]; |
| @@ -835,13 +836,11 @@ extern const char *__stop___trace_bprintk_fmt[]; | |||
| 835 | filter) | 836 | filter) |
| 836 | #include "trace_entries.h" | 837 | #include "trace_entries.h" |
| 837 | 838 | ||
| 838 | #ifdef CONFIG_PERF_EVENTS | ||
| 839 | #ifdef CONFIG_FUNCTION_TRACER | 839 | #ifdef CONFIG_FUNCTION_TRACER |
| 840 | int perf_ftrace_event_register(struct ftrace_event_call *call, | 840 | int perf_ftrace_event_register(struct ftrace_event_call *call, |
| 841 | enum trace_reg type, void *data); | 841 | enum trace_reg type, void *data); |
| 842 | #else | 842 | #else |
| 843 | #define perf_ftrace_event_register NULL | 843 | #define perf_ftrace_event_register NULL |
| 844 | #endif /* CONFIG_FUNCTION_TRACER */ | 844 | #endif /* CONFIG_FUNCTION_TRACER */ |
| 845 | #endif /* CONFIG_PERF_EVENTS */ | ||
| 846 | 845 | ||
| 847 | #endif /* _LINUX_KERNEL_TRACE_H */ | 846 | #endif /* _LINUX_KERNEL_TRACE_H */ |
diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h index d91eb0541b3a..4108e1250ca2 100644 --- a/kernel/trace/trace_entries.h +++ b/kernel/trace/trace_entries.h | |||
| @@ -166,6 +166,12 @@ FTRACE_ENTRY_DUP(wakeup, ctx_switch_entry, | |||
| 166 | 166 | ||
| 167 | #define FTRACE_STACK_ENTRIES 8 | 167 | #define FTRACE_STACK_ENTRIES 8 |
| 168 | 168 | ||
| 169 | #ifndef CONFIG_64BIT | ||
| 170 | # define IP_FMT "%08lx" | ||
| 171 | #else | ||
| 172 | # define IP_FMT "%016lx" | ||
| 173 | #endif | ||
| 174 | |||
| 169 | FTRACE_ENTRY(kernel_stack, stack_entry, | 175 | FTRACE_ENTRY(kernel_stack, stack_entry, |
| 170 | 176 | ||
| 171 | TRACE_STACK, | 177 | TRACE_STACK, |
| @@ -175,8 +181,9 @@ FTRACE_ENTRY(kernel_stack, stack_entry, | |||
| 175 | __dynamic_array(unsigned long, caller ) | 181 | __dynamic_array(unsigned long, caller ) |
| 176 | ), | 182 | ), |
| 177 | 183 | ||
| 178 | F_printk("\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n" | 184 | F_printk("\t=> (" IP_FMT ")\n\t=> (" IP_FMT ")\n\t=> (" IP_FMT ")\n" |
| 179 | "\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n", | 185 | "\t=> (" IP_FMT ")\n\t=> (" IP_FMT ")\n\t=> (" IP_FMT ")\n" |
| 186 | "\t=> (" IP_FMT ")\n\t=> (" IP_FMT ")\n", | ||
| 180 | __entry->caller[0], __entry->caller[1], __entry->caller[2], | 187 | __entry->caller[0], __entry->caller[1], __entry->caller[2], |
| 181 | __entry->caller[3], __entry->caller[4], __entry->caller[5], | 188 | __entry->caller[3], __entry->caller[4], __entry->caller[5], |
| 182 | __entry->caller[6], __entry->caller[7]), | 189 | __entry->caller[6], __entry->caller[7]), |
| @@ -193,8 +200,9 @@ FTRACE_ENTRY(user_stack, userstack_entry, | |||
| 193 | __array( unsigned long, caller, FTRACE_STACK_ENTRIES ) | 200 | __array( unsigned long, caller, FTRACE_STACK_ENTRIES ) |
| 194 | ), | 201 | ), |
| 195 | 202 | ||
| 196 | F_printk("\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n" | 203 | F_printk("\t=> (" IP_FMT ")\n\t=> (" IP_FMT ")\n\t=> (" IP_FMT ")\n" |
| 197 | "\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n", | 204 | "\t=> (" IP_FMT ")\n\t=> (" IP_FMT ")\n\t=> (" IP_FMT ")\n" |
| 205 | "\t=> (" IP_FMT ")\n\t=> (" IP_FMT ")\n", | ||
| 198 | __entry->caller[0], __entry->caller[1], __entry->caller[2], | 206 | __entry->caller[0], __entry->caller[1], __entry->caller[2], |
| 199 | __entry->caller[3], __entry->caller[4], __entry->caller[5], | 207 | __entry->caller[3], __entry->caller[4], __entry->caller[5], |
| 200 | __entry->caller[6], __entry->caller[7]), | 208 | __entry->caller[6], __entry->caller[7]), |
diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c index 7b46c9bd22ae..3dd15e8bc856 100644 --- a/kernel/trace/trace_export.c +++ b/kernel/trace/trace_export.c | |||
| @@ -162,7 +162,7 @@ ftrace_define_fields_##name(struct ftrace_event_call *event_call) \ | |||
| 162 | #define __dynamic_array(type, item) | 162 | #define __dynamic_array(type, item) |
| 163 | 163 | ||
| 164 | #undef F_printk | 164 | #undef F_printk |
| 165 | #define F_printk(fmt, args...) #fmt ", " __stringify(args) | 165 | #define F_printk(fmt, args...) __stringify(fmt) ", " __stringify(args) |
| 166 | 166 | ||
| 167 | #undef FTRACE_ENTRY_REG | 167 | #undef FTRACE_ENTRY_REG |
| 168 | #define FTRACE_ENTRY_REG(call, struct_name, etype, tstruct, print, filter,\ | 168 | #define FTRACE_ENTRY_REG(call, struct_name, etype, tstruct, print, filter,\ |
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index c5a01873567d..859fae6b1825 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c | |||
| @@ -264,7 +264,7 @@ void *trace_seq_reserve(struct trace_seq *s, size_t len) | |||
| 264 | return ret; | 264 | return ret; |
| 265 | } | 265 | } |
| 266 | 266 | ||
| 267 | int trace_seq_path(struct trace_seq *s, struct path *path) | 267 | int trace_seq_path(struct trace_seq *s, const struct path *path) |
| 268 | { | 268 | { |
| 269 | unsigned char *p; | 269 | unsigned char *p; |
| 270 | 270 | ||
diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 14bc092fb12c..df30ee08bdd4 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c | |||
| @@ -9,6 +9,8 @@ | |||
| 9 | * to those contributors as well. | 9 | * to those contributors as well. |
| 10 | */ | 10 | */ |
| 11 | 11 | ||
| 12 | #define pr_fmt(fmt) "NMI watchdog: " fmt | ||
| 13 | |||
| 12 | #include <linux/mm.h> | 14 | #include <linux/mm.h> |
| 13 | #include <linux/cpu.h> | 15 | #include <linux/cpu.h> |
| 14 | #include <linux/nmi.h> | 16 | #include <linux/nmi.h> |
| @@ -319,11 +321,9 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) | |||
| 319 | */ | 321 | */ |
| 320 | static int watchdog(void *unused) | 322 | static int watchdog(void *unused) |
| 321 | { | 323 | { |
| 322 | struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 }; | 324 | struct sched_param param = { .sched_priority = 0 }; |
| 323 | struct hrtimer *hrtimer = &__raw_get_cpu_var(watchdog_hrtimer); | 325 | struct hrtimer *hrtimer = &__raw_get_cpu_var(watchdog_hrtimer); |
| 324 | 326 | ||
| 325 | sched_setscheduler(current, SCHED_FIFO, ¶m); | ||
| 326 | |||
| 327 | /* initialize timestamp */ | 327 | /* initialize timestamp */ |
| 328 | __touch_watchdog(); | 328 | __touch_watchdog(); |
| 329 | 329 | ||
| @@ -349,8 +349,11 @@ static int watchdog(void *unused) | |||
| 349 | 349 | ||
| 350 | set_current_state(TASK_INTERRUPTIBLE); | 350 | set_current_state(TASK_INTERRUPTIBLE); |
| 351 | } | 351 | } |
| 352 | /* | ||
| 353 | * Drop the policy/priority elevation during thread exit to avoid a | ||
| 354 | * scheduling latency spike. | ||
| 355 | */ | ||
| 352 | __set_current_state(TASK_RUNNING); | 356 | __set_current_state(TASK_RUNNING); |
| 353 | param.sched_priority = 0; | ||
| 354 | sched_setscheduler(current, SCHED_NORMAL, ¶m); | 357 | sched_setscheduler(current, SCHED_NORMAL, ¶m); |
| 355 | return 0; | 358 | return 0; |
| 356 | } | 359 | } |
| @@ -376,18 +379,20 @@ static int watchdog_nmi_enable(int cpu) | |||
| 376 | /* Try to register using hardware perf events */ | 379 | /* Try to register using hardware perf events */ |
| 377 | event = perf_event_create_kernel_counter(wd_attr, cpu, NULL, watchdog_overflow_callback, NULL); | 380 | event = perf_event_create_kernel_counter(wd_attr, cpu, NULL, watchdog_overflow_callback, NULL); |
| 378 | if (!IS_ERR(event)) { | 381 | if (!IS_ERR(event)) { |
| 379 | printk(KERN_INFO "NMI watchdog enabled, takes one hw-pmu counter.\n"); | 382 | pr_info("enabled, takes one hw-pmu counter.\n"); |
| 380 | goto out_save; | 383 | goto out_save; |
| 381 | } | 384 | } |
| 382 | 385 | ||
| 383 | 386 | ||
| 384 | /* vary the KERN level based on the returned errno */ | 387 | /* vary the KERN level based on the returned errno */ |
| 385 | if (PTR_ERR(event) == -EOPNOTSUPP) | 388 | if (PTR_ERR(event) == -EOPNOTSUPP) |
| 386 | printk(KERN_INFO "NMI watchdog disabled (cpu%i): not supported (no LAPIC?)\n", cpu); | 389 | pr_info("disabled (cpu%i): not supported (no LAPIC?)\n", cpu); |
| 387 | else if (PTR_ERR(event) == -ENOENT) | 390 | else if (PTR_ERR(event) == -ENOENT) |
| 388 | printk(KERN_WARNING "NMI watchdog disabled (cpu%i): hardware events not enabled\n", cpu); | 391 | pr_warning("disabled (cpu%i): hardware events not enabled\n", |
| 392 | cpu); | ||
| 389 | else | 393 | else |
| 390 | printk(KERN_ERR "NMI watchdog disabled (cpu%i): unable to create perf event: %ld\n", cpu, PTR_ERR(event)); | 394 | pr_err("disabled (cpu%i): unable to create perf event: %ld\n", |
| 395 | cpu, PTR_ERR(event)); | ||
| 391 | return PTR_ERR(event); | 396 | return PTR_ERR(event); |
| 392 | 397 | ||
| 393 | /* success path */ | 398 | /* success path */ |
| @@ -439,9 +444,10 @@ static int watchdog_enable(int cpu) | |||
| 439 | 444 | ||
| 440 | /* create the watchdog thread */ | 445 | /* create the watchdog thread */ |
| 441 | if (!p) { | 446 | if (!p) { |
| 447 | struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 }; | ||
| 442 | p = kthread_create_on_node(watchdog, NULL, cpu_to_node(cpu), "watchdog/%d", cpu); | 448 | p = kthread_create_on_node(watchdog, NULL, cpu_to_node(cpu), "watchdog/%d", cpu); |
| 443 | if (IS_ERR(p)) { | 449 | if (IS_ERR(p)) { |
| 444 | printk(KERN_ERR "softlockup watchdog for %i failed\n", cpu); | 450 | pr_err("softlockup watchdog for %i failed\n", cpu); |
| 445 | if (!err) { | 451 | if (!err) { |
| 446 | /* if hardlockup hasn't already set this */ | 452 | /* if hardlockup hasn't already set this */ |
| 447 | err = PTR_ERR(p); | 453 | err = PTR_ERR(p); |
| @@ -450,6 +456,7 @@ static int watchdog_enable(int cpu) | |||
| 450 | } | 456 | } |
| 451 | goto out; | 457 | goto out; |
| 452 | } | 458 | } |
| 459 | sched_setscheduler(p, SCHED_FIFO, ¶m); | ||
| 453 | kthread_bind(p, cpu); | 460 | kthread_bind(p, cpu); |
| 454 | per_cpu(watchdog_touch_ts, cpu) = 0; | 461 | per_cpu(watchdog_touch_ts, cpu) = 0; |
| 455 | per_cpu(softlockup_watchdog, cpu) = p; | 462 | per_cpu(softlockup_watchdog, cpu) = p; |
| @@ -496,7 +503,7 @@ static void watchdog_enable_all_cpus(void) | |||
| 496 | watchdog_enabled = 1; | 503 | watchdog_enabled = 1; |
| 497 | 504 | ||
| 498 | if (!watchdog_enabled) | 505 | if (!watchdog_enabled) |
| 499 | printk(KERN_ERR "watchdog: failed to be enabled on some cpus\n"); | 506 | pr_err("failed to be enabled on some cpus\n"); |
| 500 | 507 | ||
| 501 | } | 508 | } |
| 502 | 509 | ||
