diff options
Diffstat (limited to 'kernel')
47 files changed, 1801 insertions, 1140 deletions
diff --git a/kernel/Kconfig.freezer b/kernel/Kconfig.freezer new file mode 100644 index 000000000000..a3bb4cb52539 --- /dev/null +++ b/kernel/Kconfig.freezer | |||
@@ -0,0 +1,2 @@ | |||
1 | config FREEZER | ||
2 | def_bool PM_SLEEP || CGROUP_FREEZER | ||
diff --git a/kernel/Makefile b/kernel/Makefile index 85f588a9d0b1..9a3ec66a9d84 100644 --- a/kernel/Makefile +++ b/kernel/Makefile | |||
@@ -24,6 +24,7 @@ CFLAGS_REMOVE_sched_clock.o = -pg | |||
24 | CFLAGS_REMOVE_sched.o = -mno-spe -pg | 24 | CFLAGS_REMOVE_sched.o = -mno-spe -pg |
25 | endif | 25 | endif |
26 | 26 | ||
27 | obj-$(CONFIG_FREEZER) += freezer.o | ||
27 | obj-$(CONFIG_PROFILING) += profile.o | 28 | obj-$(CONFIG_PROFILING) += profile.o |
28 | obj-$(CONFIG_SYSCTL_SYSCALL_CHECK) += sysctl_check.o | 29 | obj-$(CONFIG_SYSCTL_SYSCALL_CHECK) += sysctl_check.o |
29 | obj-$(CONFIG_STACKTRACE) += stacktrace.o | 30 | obj-$(CONFIG_STACKTRACE) += stacktrace.o |
@@ -55,6 +56,7 @@ obj-$(CONFIG_BACKTRACE_SELF_TEST) += backtracetest.o | |||
55 | obj-$(CONFIG_COMPAT) += compat.o | 56 | obj-$(CONFIG_COMPAT) += compat.o |
56 | obj-$(CONFIG_CGROUPS) += cgroup.o | 57 | obj-$(CONFIG_CGROUPS) += cgroup.o |
57 | obj-$(CONFIG_CGROUP_DEBUG) += cgroup_debug.o | 58 | obj-$(CONFIG_CGROUP_DEBUG) += cgroup_debug.o |
59 | obj-$(CONFIG_CGROUP_FREEZER) += cgroup_freezer.o | ||
58 | obj-$(CONFIG_CPUSETS) += cpuset.o | 60 | obj-$(CONFIG_CPUSETS) += cpuset.o |
59 | obj-$(CONFIG_CGROUP_NS) += ns_cgroup.o | 61 | obj-$(CONFIG_CGROUP_NS) += ns_cgroup.o |
60 | obj-$(CONFIG_UTS_NS) += utsname.o | 62 | obj-$(CONFIG_UTS_NS) += utsname.o |
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 8c6e1c17e6d3..046c1609606b 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c | |||
@@ -241,7 +241,6 @@ static void unlink_css_set(struct css_set *cg) | |||
241 | struct cg_cgroup_link *link; | 241 | struct cg_cgroup_link *link; |
242 | struct cg_cgroup_link *saved_link; | 242 | struct cg_cgroup_link *saved_link; |
243 | 243 | ||
244 | write_lock(&css_set_lock); | ||
245 | hlist_del(&cg->hlist); | 244 | hlist_del(&cg->hlist); |
246 | css_set_count--; | 245 | css_set_count--; |
247 | 246 | ||
@@ -251,16 +250,25 @@ static void unlink_css_set(struct css_set *cg) | |||
251 | list_del(&link->cgrp_link_list); | 250 | list_del(&link->cgrp_link_list); |
252 | kfree(link); | 251 | kfree(link); |
253 | } | 252 | } |
254 | |||
255 | write_unlock(&css_set_lock); | ||
256 | } | 253 | } |
257 | 254 | ||
258 | static void __release_css_set(struct kref *k, int taskexit) | 255 | static void __put_css_set(struct css_set *cg, int taskexit) |
259 | { | 256 | { |
260 | int i; | 257 | int i; |
261 | struct css_set *cg = container_of(k, struct css_set, ref); | 258 | /* |
262 | 259 | * Ensure that the refcount doesn't hit zero while any readers | |
260 | * can see it. Similar to atomic_dec_and_lock(), but for an | ||
261 | * rwlock | ||
262 | */ | ||
263 | if (atomic_add_unless(&cg->refcount, -1, 1)) | ||
264 | return; | ||
265 | write_lock(&css_set_lock); | ||
266 | if (!atomic_dec_and_test(&cg->refcount)) { | ||
267 | write_unlock(&css_set_lock); | ||
268 | return; | ||
269 | } | ||
263 | unlink_css_set(cg); | 270 | unlink_css_set(cg); |
271 | write_unlock(&css_set_lock); | ||
264 | 272 | ||
265 | rcu_read_lock(); | 273 | rcu_read_lock(); |
266 | for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { | 274 | for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { |
@@ -276,32 +284,22 @@ static void __release_css_set(struct kref *k, int taskexit) | |||
276 | kfree(cg); | 284 | kfree(cg); |
277 | } | 285 | } |
278 | 286 | ||
279 | static void release_css_set(struct kref *k) | ||
280 | { | ||
281 | __release_css_set(k, 0); | ||
282 | } | ||
283 | |||
284 | static void release_css_set_taskexit(struct kref *k) | ||
285 | { | ||
286 | __release_css_set(k, 1); | ||
287 | } | ||
288 | |||
289 | /* | 287 | /* |
290 | * refcounted get/put for css_set objects | 288 | * refcounted get/put for css_set objects |
291 | */ | 289 | */ |
292 | static inline void get_css_set(struct css_set *cg) | 290 | static inline void get_css_set(struct css_set *cg) |
293 | { | 291 | { |
294 | kref_get(&cg->ref); | 292 | atomic_inc(&cg->refcount); |
295 | } | 293 | } |
296 | 294 | ||
297 | static inline void put_css_set(struct css_set *cg) | 295 | static inline void put_css_set(struct css_set *cg) |
298 | { | 296 | { |
299 | kref_put(&cg->ref, release_css_set); | 297 | __put_css_set(cg, 0); |
300 | } | 298 | } |
301 | 299 | ||
302 | static inline void put_css_set_taskexit(struct css_set *cg) | 300 | static inline void put_css_set_taskexit(struct css_set *cg) |
303 | { | 301 | { |
304 | kref_put(&cg->ref, release_css_set_taskexit); | 302 | __put_css_set(cg, 1); |
305 | } | 303 | } |
306 | 304 | ||
307 | /* | 305 | /* |
@@ -427,7 +425,7 @@ static struct css_set *find_css_set( | |||
427 | return NULL; | 425 | return NULL; |
428 | } | 426 | } |
429 | 427 | ||
430 | kref_init(&res->ref); | 428 | atomic_set(&res->refcount, 1); |
431 | INIT_LIST_HEAD(&res->cg_links); | 429 | INIT_LIST_HEAD(&res->cg_links); |
432 | INIT_LIST_HEAD(&res->tasks); | 430 | INIT_LIST_HEAD(&res->tasks); |
433 | INIT_HLIST_NODE(&res->hlist); | 431 | INIT_HLIST_NODE(&res->hlist); |
@@ -870,6 +868,14 @@ static struct super_operations cgroup_ops = { | |||
870 | .remount_fs = cgroup_remount, | 868 | .remount_fs = cgroup_remount, |
871 | }; | 869 | }; |
872 | 870 | ||
871 | static void init_cgroup_housekeeping(struct cgroup *cgrp) | ||
872 | { | ||
873 | INIT_LIST_HEAD(&cgrp->sibling); | ||
874 | INIT_LIST_HEAD(&cgrp->children); | ||
875 | INIT_LIST_HEAD(&cgrp->css_sets); | ||
876 | INIT_LIST_HEAD(&cgrp->release_list); | ||
877 | init_rwsem(&cgrp->pids_mutex); | ||
878 | } | ||
873 | static void init_cgroup_root(struct cgroupfs_root *root) | 879 | static void init_cgroup_root(struct cgroupfs_root *root) |
874 | { | 880 | { |
875 | struct cgroup *cgrp = &root->top_cgroup; | 881 | struct cgroup *cgrp = &root->top_cgroup; |
@@ -878,10 +884,7 @@ static void init_cgroup_root(struct cgroupfs_root *root) | |||
878 | root->number_of_cgroups = 1; | 884 | root->number_of_cgroups = 1; |
879 | cgrp->root = root; | 885 | cgrp->root = root; |
880 | cgrp->top_cgroup = cgrp; | 886 | cgrp->top_cgroup = cgrp; |
881 | INIT_LIST_HEAD(&cgrp->sibling); | 887 | init_cgroup_housekeeping(cgrp); |
882 | INIT_LIST_HEAD(&cgrp->children); | ||
883 | INIT_LIST_HEAD(&cgrp->css_sets); | ||
884 | INIT_LIST_HEAD(&cgrp->release_list); | ||
885 | } | 888 | } |
886 | 889 | ||
887 | static int cgroup_test_super(struct super_block *sb, void *data) | 890 | static int cgroup_test_super(struct super_block *sb, void *data) |
@@ -1728,7 +1731,7 @@ int cgroup_task_count(const struct cgroup *cgrp) | |||
1728 | 1731 | ||
1729 | read_lock(&css_set_lock); | 1732 | read_lock(&css_set_lock); |
1730 | list_for_each_entry(link, &cgrp->css_sets, cgrp_link_list) { | 1733 | list_for_each_entry(link, &cgrp->css_sets, cgrp_link_list) { |
1731 | count += atomic_read(&link->cg->ref.refcount); | 1734 | count += atomic_read(&link->cg->refcount); |
1732 | } | 1735 | } |
1733 | read_unlock(&css_set_lock); | 1736 | read_unlock(&css_set_lock); |
1734 | return count; | 1737 | return count; |
@@ -1997,16 +2000,7 @@ int cgroup_scan_tasks(struct cgroup_scanner *scan) | |||
1997 | * but we cannot guarantee that the information we produce is correct | 2000 | * but we cannot guarantee that the information we produce is correct |
1998 | * unless we produce it entirely atomically. | 2001 | * unless we produce it entirely atomically. |
1999 | * | 2002 | * |
2000 | * Upon tasks file open(), a struct ctr_struct is allocated, that | ||
2001 | * will have a pointer to an array (also allocated here). The struct | ||
2002 | * ctr_struct * is stored in file->private_data. Its resources will | ||
2003 | * be freed by release() when the file is closed. The array is used | ||
2004 | * to sprintf the PIDs and then used by read(). | ||
2005 | */ | 2003 | */ |
2006 | struct ctr_struct { | ||
2007 | char *buf; | ||
2008 | int bufsz; | ||
2009 | }; | ||
2010 | 2004 | ||
2011 | /* | 2005 | /* |
2012 | * Load into 'pidarray' up to 'npids' of the tasks using cgroup | 2006 | * Load into 'pidarray' up to 'npids' of the tasks using cgroup |
@@ -2088,42 +2082,132 @@ static int cmppid(const void *a, const void *b) | |||
2088 | return *(pid_t *)a - *(pid_t *)b; | 2082 | return *(pid_t *)a - *(pid_t *)b; |
2089 | } | 2083 | } |
2090 | 2084 | ||
2085 | |||
2091 | /* | 2086 | /* |
2092 | * Convert array 'a' of 'npids' pid_t's to a string of newline separated | 2087 | * seq_file methods for the "tasks" file. The seq_file position is the |
2093 | * decimal pids in 'buf'. Don't write more than 'sz' chars, but return | 2088 | * next pid to display; the seq_file iterator is a pointer to the pid |
2094 | * count 'cnt' of how many chars would be written if buf were large enough. | 2089 | * in the cgroup->tasks_pids array. |
2095 | */ | 2090 | */ |
2096 | static int pid_array_to_buf(char *buf, int sz, pid_t *a, int npids) | 2091 | |
2092 | static void *cgroup_tasks_start(struct seq_file *s, loff_t *pos) | ||
2097 | { | 2093 | { |
2098 | int cnt = 0; | 2094 | /* |
2099 | int i; | 2095 | * Initially we receive a position value that corresponds to |
2096 | * one more than the last pid shown (or 0 on the first call or | ||
2097 | * after a seek to the start). Use a binary-search to find the | ||
2098 | * next pid to display, if any | ||
2099 | */ | ||
2100 | struct cgroup *cgrp = s->private; | ||
2101 | int index = 0, pid = *pos; | ||
2102 | int *iter; | ||
2100 | 2103 | ||
2101 | for (i = 0; i < npids; i++) | 2104 | down_read(&cgrp->pids_mutex); |
2102 | cnt += snprintf(buf + cnt, max(sz - cnt, 0), "%d\n", a[i]); | 2105 | if (pid) { |
2103 | return cnt; | 2106 | int end = cgrp->pids_length; |
2107 | int i; | ||
2108 | while (index < end) { | ||
2109 | int mid = (index + end) / 2; | ||
2110 | if (cgrp->tasks_pids[mid] == pid) { | ||
2111 | index = mid; | ||
2112 | break; | ||
2113 | } else if (cgrp->tasks_pids[mid] <= pid) | ||
2114 | index = mid + 1; | ||
2115 | else | ||
2116 | end = mid; | ||
2117 | } | ||
2118 | } | ||
2119 | /* If we're off the end of the array, we're done */ | ||
2120 | if (index >= cgrp->pids_length) | ||
2121 | return NULL; | ||
2122 | /* Update the abstract position to be the actual pid that we found */ | ||
2123 | iter = cgrp->tasks_pids + index; | ||
2124 | *pos = *iter; | ||
2125 | return iter; | ||
2126 | } | ||
2127 | |||
2128 | static void cgroup_tasks_stop(struct seq_file *s, void *v) | ||
2129 | { | ||
2130 | struct cgroup *cgrp = s->private; | ||
2131 | up_read(&cgrp->pids_mutex); | ||
2104 | } | 2132 | } |
2105 | 2133 | ||
2134 | static void *cgroup_tasks_next(struct seq_file *s, void *v, loff_t *pos) | ||
2135 | { | ||
2136 | struct cgroup *cgrp = s->private; | ||
2137 | int *p = v; | ||
2138 | int *end = cgrp->tasks_pids + cgrp->pids_length; | ||
2139 | |||
2140 | /* | ||
2141 | * Advance to the next pid in the array. If this goes off the | ||
2142 | * end, we're done | ||
2143 | */ | ||
2144 | p++; | ||
2145 | if (p >= end) { | ||
2146 | return NULL; | ||
2147 | } else { | ||
2148 | *pos = *p; | ||
2149 | return p; | ||
2150 | } | ||
2151 | } | ||
2152 | |||
2153 | static int cgroup_tasks_show(struct seq_file *s, void *v) | ||
2154 | { | ||
2155 | return seq_printf(s, "%d\n", *(int *)v); | ||
2156 | } | ||
2157 | |||
2158 | static struct seq_operations cgroup_tasks_seq_operations = { | ||
2159 | .start = cgroup_tasks_start, | ||
2160 | .stop = cgroup_tasks_stop, | ||
2161 | .next = cgroup_tasks_next, | ||
2162 | .show = cgroup_tasks_show, | ||
2163 | }; | ||
2164 | |||
2165 | static void release_cgroup_pid_array(struct cgroup *cgrp) | ||
2166 | { | ||
2167 | down_write(&cgrp->pids_mutex); | ||
2168 | BUG_ON(!cgrp->pids_use_count); | ||
2169 | if (!--cgrp->pids_use_count) { | ||
2170 | kfree(cgrp->tasks_pids); | ||
2171 | cgrp->tasks_pids = NULL; | ||
2172 | cgrp->pids_length = 0; | ||
2173 | } | ||
2174 | up_write(&cgrp->pids_mutex); | ||
2175 | } | ||
2176 | |||
2177 | static int cgroup_tasks_release(struct inode *inode, struct file *file) | ||
2178 | { | ||
2179 | struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent); | ||
2180 | |||
2181 | if (!(file->f_mode & FMODE_READ)) | ||
2182 | return 0; | ||
2183 | |||
2184 | release_cgroup_pid_array(cgrp); | ||
2185 | return seq_release(inode, file); | ||
2186 | } | ||
2187 | |||
2188 | static struct file_operations cgroup_tasks_operations = { | ||
2189 | .read = seq_read, | ||
2190 | .llseek = seq_lseek, | ||
2191 | .write = cgroup_file_write, | ||
2192 | .release = cgroup_tasks_release, | ||
2193 | }; | ||
2194 | |||
2106 | /* | 2195 | /* |
2107 | * Handle an open on 'tasks' file. Prepare a buffer listing the | 2196 | * Handle an open on 'tasks' file. Prepare an array containing the |
2108 | * process id's of tasks currently attached to the cgroup being opened. | 2197 | * process id's of tasks currently attached to the cgroup being opened. |
2109 | * | ||
2110 | * Does not require any specific cgroup mutexes, and does not take any. | ||
2111 | */ | 2198 | */ |
2199 | |||
2112 | static int cgroup_tasks_open(struct inode *unused, struct file *file) | 2200 | static int cgroup_tasks_open(struct inode *unused, struct file *file) |
2113 | { | 2201 | { |
2114 | struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent); | 2202 | struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent); |
2115 | struct ctr_struct *ctr; | ||
2116 | pid_t *pidarray; | 2203 | pid_t *pidarray; |
2117 | int npids; | 2204 | int npids; |
2118 | char c; | 2205 | int retval; |
2119 | 2206 | ||
2207 | /* Nothing to do for write-only files */ | ||
2120 | if (!(file->f_mode & FMODE_READ)) | 2208 | if (!(file->f_mode & FMODE_READ)) |
2121 | return 0; | 2209 | return 0; |
2122 | 2210 | ||
2123 | ctr = kmalloc(sizeof(*ctr), GFP_KERNEL); | ||
2124 | if (!ctr) | ||
2125 | goto err0; | ||
2126 | |||
2127 | /* | 2211 | /* |
2128 | * If cgroup gets more users after we read count, we won't have | 2212 | * If cgroup gets more users after we read count, we won't have |
2129 | * enough space - tough. This race is indistinguishable to the | 2213 | * enough space - tough. This race is indistinguishable to the |
@@ -2131,57 +2215,31 @@ static int cgroup_tasks_open(struct inode *unused, struct file *file) | |||
2131 | * show up until sometime later on. | 2215 | * show up until sometime later on. |
2132 | */ | 2216 | */ |
2133 | npids = cgroup_task_count(cgrp); | 2217 | npids = cgroup_task_count(cgrp); |
2134 | if (npids) { | 2218 | pidarray = kmalloc(npids * sizeof(pid_t), GFP_KERNEL); |
2135 | pidarray = kmalloc(npids * sizeof(pid_t), GFP_KERNEL); | 2219 | if (!pidarray) |
2136 | if (!pidarray) | 2220 | return -ENOMEM; |
2137 | goto err1; | 2221 | npids = pid_array_load(pidarray, npids, cgrp); |
2138 | 2222 | sort(pidarray, npids, sizeof(pid_t), cmppid, NULL); | |
2139 | npids = pid_array_load(pidarray, npids, cgrp); | ||
2140 | sort(pidarray, npids, sizeof(pid_t), cmppid, NULL); | ||
2141 | |||
2142 | /* Call pid_array_to_buf() twice, first just to get bufsz */ | ||
2143 | ctr->bufsz = pid_array_to_buf(&c, sizeof(c), pidarray, npids) + 1; | ||
2144 | ctr->buf = kmalloc(ctr->bufsz, GFP_KERNEL); | ||
2145 | if (!ctr->buf) | ||
2146 | goto err2; | ||
2147 | ctr->bufsz = pid_array_to_buf(ctr->buf, ctr->bufsz, pidarray, npids); | ||
2148 | |||
2149 | kfree(pidarray); | ||
2150 | } else { | ||
2151 | ctr->buf = NULL; | ||
2152 | ctr->bufsz = 0; | ||
2153 | } | ||
2154 | file->private_data = ctr; | ||
2155 | return 0; | ||
2156 | |||
2157 | err2: | ||
2158 | kfree(pidarray); | ||
2159 | err1: | ||
2160 | kfree(ctr); | ||
2161 | err0: | ||
2162 | return -ENOMEM; | ||
2163 | } | ||
2164 | 2223 | ||
2165 | static ssize_t cgroup_tasks_read(struct cgroup *cgrp, | 2224 | /* |
2166 | struct cftype *cft, | 2225 | * Store the array in the cgroup, freeing the old |
2167 | struct file *file, char __user *buf, | 2226 | * array if necessary |
2168 | size_t nbytes, loff_t *ppos) | 2227 | */ |
2169 | { | 2228 | down_write(&cgrp->pids_mutex); |
2170 | struct ctr_struct *ctr = file->private_data; | 2229 | kfree(cgrp->tasks_pids); |
2230 | cgrp->tasks_pids = pidarray; | ||
2231 | cgrp->pids_length = npids; | ||
2232 | cgrp->pids_use_count++; | ||
2233 | up_write(&cgrp->pids_mutex); | ||
2171 | 2234 | ||
2172 | return simple_read_from_buffer(buf, nbytes, ppos, ctr->buf, ctr->bufsz); | 2235 | file->f_op = &cgroup_tasks_operations; |
2173 | } | ||
2174 | 2236 | ||
2175 | static int cgroup_tasks_release(struct inode *unused_inode, | 2237 | retval = seq_open(file, &cgroup_tasks_seq_operations); |
2176 | struct file *file) | 2238 | if (retval) { |
2177 | { | 2239 | release_cgroup_pid_array(cgrp); |
2178 | struct ctr_struct *ctr; | 2240 | return retval; |
2179 | |||
2180 | if (file->f_mode & FMODE_READ) { | ||
2181 | ctr = file->private_data; | ||
2182 | kfree(ctr->buf); | ||
2183 | kfree(ctr); | ||
2184 | } | 2241 | } |
2242 | ((struct seq_file *)file->private_data)->private = cgrp; | ||
2185 | return 0; | 2243 | return 0; |
2186 | } | 2244 | } |
2187 | 2245 | ||
@@ -2210,7 +2268,6 @@ static struct cftype files[] = { | |||
2210 | { | 2268 | { |
2211 | .name = "tasks", | 2269 | .name = "tasks", |
2212 | .open = cgroup_tasks_open, | 2270 | .open = cgroup_tasks_open, |
2213 | .read = cgroup_tasks_read, | ||
2214 | .write_u64 = cgroup_tasks_write, | 2271 | .write_u64 = cgroup_tasks_write, |
2215 | .release = cgroup_tasks_release, | 2272 | .release = cgroup_tasks_release, |
2216 | .private = FILE_TASKLIST, | 2273 | .private = FILE_TASKLIST, |
@@ -2300,10 +2357,7 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry, | |||
2300 | 2357 | ||
2301 | mutex_lock(&cgroup_mutex); | 2358 | mutex_lock(&cgroup_mutex); |
2302 | 2359 | ||
2303 | INIT_LIST_HEAD(&cgrp->sibling); | 2360 | init_cgroup_housekeeping(cgrp); |
2304 | INIT_LIST_HEAD(&cgrp->children); | ||
2305 | INIT_LIST_HEAD(&cgrp->css_sets); | ||
2306 | INIT_LIST_HEAD(&cgrp->release_list); | ||
2307 | 2361 | ||
2308 | cgrp->parent = parent; | 2362 | cgrp->parent = parent; |
2309 | cgrp->root = parent->root; | 2363 | cgrp->root = parent->root; |
@@ -2495,8 +2549,7 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss) | |||
2495 | int __init cgroup_init_early(void) | 2549 | int __init cgroup_init_early(void) |
2496 | { | 2550 | { |
2497 | int i; | 2551 | int i; |
2498 | kref_init(&init_css_set.ref); | 2552 | atomic_set(&init_css_set.refcount, 1); |
2499 | kref_get(&init_css_set.ref); | ||
2500 | INIT_LIST_HEAD(&init_css_set.cg_links); | 2553 | INIT_LIST_HEAD(&init_css_set.cg_links); |
2501 | INIT_LIST_HEAD(&init_css_set.tasks); | 2554 | INIT_LIST_HEAD(&init_css_set.tasks); |
2502 | INIT_HLIST_NODE(&init_css_set.hlist); | 2555 | INIT_HLIST_NODE(&init_css_set.hlist); |
diff --git a/kernel/cgroup_debug.c b/kernel/cgroup_debug.c index c3dc3aba4c02..daca6209202d 100644 --- a/kernel/cgroup_debug.c +++ b/kernel/cgroup_debug.c | |||
@@ -57,7 +57,7 @@ static u64 current_css_set_refcount_read(struct cgroup *cont, | |||
57 | u64 count; | 57 | u64 count; |
58 | 58 | ||
59 | rcu_read_lock(); | 59 | rcu_read_lock(); |
60 | count = atomic_read(¤t->cgroups->ref.refcount); | 60 | count = atomic_read(¤t->cgroups->refcount); |
61 | rcu_read_unlock(); | 61 | rcu_read_unlock(); |
62 | return count; | 62 | return count; |
63 | } | 63 | } |
@@ -90,7 +90,7 @@ static struct cftype files[] = { | |||
90 | { | 90 | { |
91 | .name = "releasable", | 91 | .name = "releasable", |
92 | .read_u64 = releasable_read, | 92 | .read_u64 = releasable_read, |
93 | } | 93 | }, |
94 | }; | 94 | }; |
95 | 95 | ||
96 | static int debug_populate(struct cgroup_subsys *ss, struct cgroup *cont) | 96 | static int debug_populate(struct cgroup_subsys *ss, struct cgroup *cont) |
diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c new file mode 100644 index 000000000000..e95056954498 --- /dev/null +++ b/kernel/cgroup_freezer.c | |||
@@ -0,0 +1,379 @@ | |||
1 | /* | ||
2 | * cgroup_freezer.c - control group freezer subsystem | ||
3 | * | ||
4 | * Copyright IBM Corporation, 2007 | ||
5 | * | ||
6 | * Author : Cedric Le Goater <clg@fr.ibm.com> | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify it | ||
9 | * under the terms of version 2.1 of the GNU Lesser General Public License | ||
10 | * as published by the Free Software Foundation. | ||
11 | * | ||
12 | * This program is distributed in the hope that it would be useful, but | ||
13 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | ||
15 | */ | ||
16 | |||
17 | #include <linux/module.h> | ||
18 | #include <linux/cgroup.h> | ||
19 | #include <linux/fs.h> | ||
20 | #include <linux/uaccess.h> | ||
21 | #include <linux/freezer.h> | ||
22 | #include <linux/seq_file.h> | ||
23 | |||
24 | enum freezer_state { | ||
25 | CGROUP_THAWED = 0, | ||
26 | CGROUP_FREEZING, | ||
27 | CGROUP_FROZEN, | ||
28 | }; | ||
29 | |||
30 | struct freezer { | ||
31 | struct cgroup_subsys_state css; | ||
32 | enum freezer_state state; | ||
33 | spinlock_t lock; /* protects _writes_ to state */ | ||
34 | }; | ||
35 | |||
36 | static inline struct freezer *cgroup_freezer( | ||
37 | struct cgroup *cgroup) | ||
38 | { | ||
39 | return container_of( | ||
40 | cgroup_subsys_state(cgroup, freezer_subsys_id), | ||
41 | struct freezer, css); | ||
42 | } | ||
43 | |||
44 | static inline struct freezer *task_freezer(struct task_struct *task) | ||
45 | { | ||
46 | return container_of(task_subsys_state(task, freezer_subsys_id), | ||
47 | struct freezer, css); | ||
48 | } | ||
49 | |||
50 | int cgroup_frozen(struct task_struct *task) | ||
51 | { | ||
52 | struct freezer *freezer; | ||
53 | enum freezer_state state; | ||
54 | |||
55 | task_lock(task); | ||
56 | freezer = task_freezer(task); | ||
57 | state = freezer->state; | ||
58 | task_unlock(task); | ||
59 | |||
60 | return state == CGROUP_FROZEN; | ||
61 | } | ||
62 | |||
63 | /* | ||
64 | * cgroups_write_string() limits the size of freezer state strings to | ||
65 | * CGROUP_LOCAL_BUFFER_SIZE | ||
66 | */ | ||
67 | static const char *freezer_state_strs[] = { | ||
68 | "THAWED", | ||
69 | "FREEZING", | ||
70 | "FROZEN", | ||
71 | }; | ||
72 | |||
73 | /* | ||
74 | * State diagram | ||
75 | * Transitions are caused by userspace writes to the freezer.state file. | ||
76 | * The values in parenthesis are state labels. The rest are edge labels. | ||
77 | * | ||
78 | * (THAWED) --FROZEN--> (FREEZING) --FROZEN--> (FROZEN) | ||
79 | * ^ ^ | | | ||
80 | * | \_______THAWED_______/ | | ||
81 | * \__________________________THAWED____________/ | ||
82 | */ | ||
83 | |||
84 | struct cgroup_subsys freezer_subsys; | ||
85 | |||
86 | /* Locks taken and their ordering | ||
87 | * ------------------------------ | ||
88 | * css_set_lock | ||
89 | * cgroup_mutex (AKA cgroup_lock) | ||
90 | * task->alloc_lock (AKA task_lock) | ||
91 | * freezer->lock | ||
92 | * task->sighand->siglock | ||
93 | * | ||
94 | * cgroup code forces css_set_lock to be taken before task->alloc_lock | ||
95 | * | ||
96 | * freezer_create(), freezer_destroy(): | ||
97 | * cgroup_mutex [ by cgroup core ] | ||
98 | * | ||
99 | * can_attach(): | ||
100 | * cgroup_mutex | ||
101 | * | ||
102 | * cgroup_frozen(): | ||
103 | * task->alloc_lock (to get task's cgroup) | ||
104 | * | ||
105 | * freezer_fork() (preserving fork() performance means can't take cgroup_mutex): | ||
106 | * task->alloc_lock (to get task's cgroup) | ||
107 | * freezer->lock | ||
108 | * sighand->siglock (if the cgroup is freezing) | ||
109 | * | ||
110 | * freezer_read(): | ||
111 | * cgroup_mutex | ||
112 | * freezer->lock | ||
113 | * read_lock css_set_lock (cgroup iterator start) | ||
114 | * | ||
115 | * freezer_write() (freeze): | ||
116 | * cgroup_mutex | ||
117 | * freezer->lock | ||
118 | * read_lock css_set_lock (cgroup iterator start) | ||
119 | * sighand->siglock | ||
120 | * | ||
121 | * freezer_write() (unfreeze): | ||
122 | * cgroup_mutex | ||
123 | * freezer->lock | ||
124 | * read_lock css_set_lock (cgroup iterator start) | ||
125 | * task->alloc_lock (to prevent races with freeze_task()) | ||
126 | * sighand->siglock | ||
127 | */ | ||
128 | static struct cgroup_subsys_state *freezer_create(struct cgroup_subsys *ss, | ||
129 | struct cgroup *cgroup) | ||
130 | { | ||
131 | struct freezer *freezer; | ||
132 | |||
133 | freezer = kzalloc(sizeof(struct freezer), GFP_KERNEL); | ||
134 | if (!freezer) | ||
135 | return ERR_PTR(-ENOMEM); | ||
136 | |||
137 | spin_lock_init(&freezer->lock); | ||
138 | freezer->state = CGROUP_THAWED; | ||
139 | return &freezer->css; | ||
140 | } | ||
141 | |||
142 | static void freezer_destroy(struct cgroup_subsys *ss, | ||
143 | struct cgroup *cgroup) | ||
144 | { | ||
145 | kfree(cgroup_freezer(cgroup)); | ||
146 | } | ||
147 | |||
148 | /* Task is frozen or will freeze immediately when next it gets woken */ | ||
149 | static bool is_task_frozen_enough(struct task_struct *task) | ||
150 | { | ||
151 | return frozen(task) || | ||
152 | (task_is_stopped_or_traced(task) && freezing(task)); | ||
153 | } | ||
154 | |||
155 | /* | ||
156 | * The call to cgroup_lock() in the freezer.state write method prevents | ||
157 | * a write to that file racing against an attach, and hence the | ||
158 | * can_attach() result will remain valid until the attach completes. | ||
159 | */ | ||
160 | static int freezer_can_attach(struct cgroup_subsys *ss, | ||
161 | struct cgroup *new_cgroup, | ||
162 | struct task_struct *task) | ||
163 | { | ||
164 | struct freezer *freezer; | ||
165 | int retval; | ||
166 | |||
167 | /* Anything frozen can't move or be moved to/from */ | ||
168 | |||
169 | if (is_task_frozen_enough(task)) | ||
170 | return -EBUSY; | ||
171 | |||
172 | freezer = cgroup_freezer(new_cgroup); | ||
173 | if (freezer->state == CGROUP_FROZEN) | ||
174 | return -EBUSY; | ||
175 | |||
176 | retval = 0; | ||
177 | task_lock(task); | ||
178 | freezer = task_freezer(task); | ||
179 | if (freezer->state == CGROUP_FROZEN) | ||
180 | retval = -EBUSY; | ||
181 | task_unlock(task); | ||
182 | return retval; | ||
183 | } | ||
184 | |||
185 | static void freezer_fork(struct cgroup_subsys *ss, struct task_struct *task) | ||
186 | { | ||
187 | struct freezer *freezer; | ||
188 | |||
189 | task_lock(task); | ||
190 | freezer = task_freezer(task); | ||
191 | task_unlock(task); | ||
192 | |||
193 | BUG_ON(freezer->state == CGROUP_FROZEN); | ||
194 | spin_lock_irq(&freezer->lock); | ||
195 | /* Locking avoids race with FREEZING -> THAWED transitions. */ | ||
196 | if (freezer->state == CGROUP_FREEZING) | ||
197 | freeze_task(task, true); | ||
198 | spin_unlock_irq(&freezer->lock); | ||
199 | } | ||
200 | |||
201 | /* | ||
202 | * caller must hold freezer->lock | ||
203 | */ | ||
204 | static void update_freezer_state(struct cgroup *cgroup, | ||
205 | struct freezer *freezer) | ||
206 | { | ||
207 | struct cgroup_iter it; | ||
208 | struct task_struct *task; | ||
209 | unsigned int nfrozen = 0, ntotal = 0; | ||
210 | |||
211 | cgroup_iter_start(cgroup, &it); | ||
212 | while ((task = cgroup_iter_next(cgroup, &it))) { | ||
213 | ntotal++; | ||
214 | if (is_task_frozen_enough(task)) | ||
215 | nfrozen++; | ||
216 | } | ||
217 | |||
218 | /* | ||
219 | * Transition to FROZEN when no new tasks can be added ensures | ||
220 | * that we never exist in the FROZEN state while there are unfrozen | ||
221 | * tasks. | ||
222 | */ | ||
223 | if (nfrozen == ntotal) | ||
224 | freezer->state = CGROUP_FROZEN; | ||
225 | else if (nfrozen > 0) | ||
226 | freezer->state = CGROUP_FREEZING; | ||
227 | else | ||
228 | freezer->state = CGROUP_THAWED; | ||
229 | cgroup_iter_end(cgroup, &it); | ||
230 | } | ||
231 | |||
232 | static int freezer_read(struct cgroup *cgroup, struct cftype *cft, | ||
233 | struct seq_file *m) | ||
234 | { | ||
235 | struct freezer *freezer; | ||
236 | enum freezer_state state; | ||
237 | |||
238 | if (!cgroup_lock_live_group(cgroup)) | ||
239 | return -ENODEV; | ||
240 | |||
241 | freezer = cgroup_freezer(cgroup); | ||
242 | spin_lock_irq(&freezer->lock); | ||
243 | state = freezer->state; | ||
244 | if (state == CGROUP_FREEZING) { | ||
245 | /* We change from FREEZING to FROZEN lazily if the cgroup was | ||
246 | * only partially frozen when we exitted write. */ | ||
247 | update_freezer_state(cgroup, freezer); | ||
248 | state = freezer->state; | ||
249 | } | ||
250 | spin_unlock_irq(&freezer->lock); | ||
251 | cgroup_unlock(); | ||
252 | |||
253 | seq_puts(m, freezer_state_strs[state]); | ||
254 | seq_putc(m, '\n'); | ||
255 | return 0; | ||
256 | } | ||
257 | |||
258 | static int try_to_freeze_cgroup(struct cgroup *cgroup, struct freezer *freezer) | ||
259 | { | ||
260 | struct cgroup_iter it; | ||
261 | struct task_struct *task; | ||
262 | unsigned int num_cant_freeze_now = 0; | ||
263 | |||
264 | freezer->state = CGROUP_FREEZING; | ||
265 | cgroup_iter_start(cgroup, &it); | ||
266 | while ((task = cgroup_iter_next(cgroup, &it))) { | ||
267 | if (!freeze_task(task, true)) | ||
268 | continue; | ||
269 | if (is_task_frozen_enough(task)) | ||
270 | continue; | ||
271 | if (!freezing(task) && !freezer_should_skip(task)) | ||
272 | num_cant_freeze_now++; | ||
273 | } | ||
274 | cgroup_iter_end(cgroup, &it); | ||
275 | |||
276 | return num_cant_freeze_now ? -EBUSY : 0; | ||
277 | } | ||
278 | |||
279 | static int unfreeze_cgroup(struct cgroup *cgroup, struct freezer *freezer) | ||
280 | { | ||
281 | struct cgroup_iter it; | ||
282 | struct task_struct *task; | ||
283 | |||
284 | cgroup_iter_start(cgroup, &it); | ||
285 | while ((task = cgroup_iter_next(cgroup, &it))) { | ||
286 | int do_wake; | ||
287 | |||
288 | task_lock(task); | ||
289 | do_wake = __thaw_process(task); | ||
290 | task_unlock(task); | ||
291 | if (do_wake) | ||
292 | wake_up_process(task); | ||
293 | } | ||
294 | cgroup_iter_end(cgroup, &it); | ||
295 | freezer->state = CGROUP_THAWED; | ||
296 | |||
297 | return 0; | ||
298 | } | ||
299 | |||
300 | static int freezer_change_state(struct cgroup *cgroup, | ||
301 | enum freezer_state goal_state) | ||
302 | { | ||
303 | struct freezer *freezer; | ||
304 | int retval = 0; | ||
305 | |||
306 | freezer = cgroup_freezer(cgroup); | ||
307 | spin_lock_irq(&freezer->lock); | ||
308 | update_freezer_state(cgroup, freezer); | ||
309 | if (goal_state == freezer->state) | ||
310 | goto out; | ||
311 | switch (freezer->state) { | ||
312 | case CGROUP_THAWED: | ||
313 | retval = try_to_freeze_cgroup(cgroup, freezer); | ||
314 | break; | ||
315 | case CGROUP_FREEZING: | ||
316 | if (goal_state == CGROUP_FROZEN) { | ||
317 | /* Userspace is retrying after | ||
318 | * "/bin/echo FROZEN > freezer.state" returned -EBUSY */ | ||
319 | retval = try_to_freeze_cgroup(cgroup, freezer); | ||
320 | break; | ||
321 | } | ||
322 | /* state == FREEZING and goal_state == THAWED, so unfreeze */ | ||
323 | case CGROUP_FROZEN: | ||
324 | retval = unfreeze_cgroup(cgroup, freezer); | ||
325 | break; | ||
326 | default: | ||
327 | break; | ||
328 | } | ||
329 | out: | ||
330 | spin_unlock_irq(&freezer->lock); | ||
331 | |||
332 | return retval; | ||
333 | } | ||
334 | |||
335 | static int freezer_write(struct cgroup *cgroup, | ||
336 | struct cftype *cft, | ||
337 | const char *buffer) | ||
338 | { | ||
339 | int retval; | ||
340 | enum freezer_state goal_state; | ||
341 | |||
342 | if (strcmp(buffer, freezer_state_strs[CGROUP_THAWED]) == 0) | ||
343 | goal_state = CGROUP_THAWED; | ||
344 | else if (strcmp(buffer, freezer_state_strs[CGROUP_FROZEN]) == 0) | ||
345 | goal_state = CGROUP_FROZEN; | ||
346 | else | ||
347 | return -EIO; | ||
348 | |||
349 | if (!cgroup_lock_live_group(cgroup)) | ||
350 | return -ENODEV; | ||
351 | retval = freezer_change_state(cgroup, goal_state); | ||
352 | cgroup_unlock(); | ||
353 | return retval; | ||
354 | } | ||
355 | |||
356 | static struct cftype files[] = { | ||
357 | { | ||
358 | .name = "state", | ||
359 | .read_seq_string = freezer_read, | ||
360 | .write_string = freezer_write, | ||
361 | }, | ||
362 | }; | ||
363 | |||
364 | static int freezer_populate(struct cgroup_subsys *ss, struct cgroup *cgroup) | ||
365 | { | ||
366 | return cgroup_add_files(cgroup, ss, files, ARRAY_SIZE(files)); | ||
367 | } | ||
368 | |||
369 | struct cgroup_subsys freezer_subsys = { | ||
370 | .name = "freezer", | ||
371 | .create = freezer_create, | ||
372 | .destroy = freezer_destroy, | ||
373 | .populate = freezer_populate, | ||
374 | .subsys_id = freezer_subsys_id, | ||
375 | .can_attach = freezer_can_attach, | ||
376 | .attach = NULL, | ||
377 | .fork = freezer_fork, | ||
378 | .exit = NULL, | ||
379 | }; | ||
diff --git a/kernel/compat.c b/kernel/compat.c index 143990e48cb9..8eafe3eb50d9 100644 --- a/kernel/compat.c +++ b/kernel/compat.c | |||
@@ -23,6 +23,7 @@ | |||
23 | #include <linux/timex.h> | 23 | #include <linux/timex.h> |
24 | #include <linux/migrate.h> | 24 | #include <linux/migrate.h> |
25 | #include <linux/posix-timers.h> | 25 | #include <linux/posix-timers.h> |
26 | #include <linux/times.h> | ||
26 | 27 | ||
27 | #include <asm/uaccess.h> | 28 | #include <asm/uaccess.h> |
28 | 29 | ||
@@ -208,49 +209,23 @@ asmlinkage long compat_sys_setitimer(int which, | |||
208 | return 0; | 209 | return 0; |
209 | } | 210 | } |
210 | 211 | ||
212 | static compat_clock_t clock_t_to_compat_clock_t(clock_t x) | ||
213 | { | ||
214 | return compat_jiffies_to_clock_t(clock_t_to_jiffies(x)); | ||
215 | } | ||
216 | |||
211 | asmlinkage long compat_sys_times(struct compat_tms __user *tbuf) | 217 | asmlinkage long compat_sys_times(struct compat_tms __user *tbuf) |
212 | { | 218 | { |
213 | /* | ||
214 | * In the SMP world we might just be unlucky and have one of | ||
215 | * the times increment as we use it. Since the value is an | ||
216 | * atomically safe type this is just fine. Conceptually its | ||
217 | * as if the syscall took an instant longer to occur. | ||
218 | */ | ||
219 | if (tbuf) { | 219 | if (tbuf) { |
220 | struct tms tms; | ||
220 | struct compat_tms tmp; | 221 | struct compat_tms tmp; |
221 | struct task_struct *tsk = current; | 222 | |
222 | struct task_struct *t; | 223 | do_sys_times(&tms); |
223 | cputime_t utime, stime, cutime, cstime; | 224 | /* Convert our struct tms to the compat version. */ |
224 | 225 | tmp.tms_utime = clock_t_to_compat_clock_t(tms.tms_utime); | |
225 | read_lock(&tasklist_lock); | 226 | tmp.tms_stime = clock_t_to_compat_clock_t(tms.tms_stime); |
226 | utime = tsk->signal->utime; | 227 | tmp.tms_cutime = clock_t_to_compat_clock_t(tms.tms_cutime); |
227 | stime = tsk->signal->stime; | 228 | tmp.tms_cstime = clock_t_to_compat_clock_t(tms.tms_cstime); |
228 | t = tsk; | ||
229 | do { | ||
230 | utime = cputime_add(utime, t->utime); | ||
231 | stime = cputime_add(stime, t->stime); | ||
232 | t = next_thread(t); | ||
233 | } while (t != tsk); | ||
234 | |||
235 | /* | ||
236 | * While we have tasklist_lock read-locked, no dying thread | ||
237 | * can be updating current->signal->[us]time. Instead, | ||
238 | * we got their counts included in the live thread loop. | ||
239 | * However, another thread can come in right now and | ||
240 | * do a wait call that updates current->signal->c[us]time. | ||
241 | * To make sure we always see that pair updated atomically, | ||
242 | * we take the siglock around fetching them. | ||
243 | */ | ||
244 | spin_lock_irq(&tsk->sighand->siglock); | ||
245 | cutime = tsk->signal->cutime; | ||
246 | cstime = tsk->signal->cstime; | ||
247 | spin_unlock_irq(&tsk->sighand->siglock); | ||
248 | read_unlock(&tasklist_lock); | ||
249 | |||
250 | tmp.tms_utime = compat_jiffies_to_clock_t(cputime_to_jiffies(utime)); | ||
251 | tmp.tms_stime = compat_jiffies_to_clock_t(cputime_to_jiffies(stime)); | ||
252 | tmp.tms_cutime = compat_jiffies_to_clock_t(cputime_to_jiffies(cutime)); | ||
253 | tmp.tms_cstime = compat_jiffies_to_clock_t(cputime_to_jiffies(cstime)); | ||
254 | if (copy_to_user(tbuf, &tmp, sizeof(tmp))) | 229 | if (copy_to_user(tbuf, &tmp, sizeof(tmp))) |
255 | return -EFAULT; | 230 | return -EFAULT; |
256 | } | 231 | } |
diff --git a/kernel/configs.c b/kernel/configs.c index 4c345210ed8c..abaee684ecbf 100644 --- a/kernel/configs.c +++ b/kernel/configs.c | |||
@@ -54,9 +54,6 @@ | |||
54 | 54 | ||
55 | #ifdef CONFIG_IKCONFIG_PROC | 55 | #ifdef CONFIG_IKCONFIG_PROC |
56 | 56 | ||
57 | /**************************************************/ | ||
58 | /* globals and useful constants */ | ||
59 | |||
60 | static ssize_t | 57 | static ssize_t |
61 | ikconfig_read_current(struct file *file, char __user *buf, | 58 | ikconfig_read_current(struct file *file, char __user *buf, |
62 | size_t len, loff_t * offset) | 59 | size_t len, loff_t * offset) |
@@ -71,9 +68,6 @@ static const struct file_operations ikconfig_file_ops = { | |||
71 | .read = ikconfig_read_current, | 68 | .read = ikconfig_read_current, |
72 | }; | 69 | }; |
73 | 70 | ||
74 | /***************************************************/ | ||
75 | /* ikconfig_init: start up everything we need to */ | ||
76 | |||
77 | static int __init ikconfig_init(void) | 71 | static int __init ikconfig_init(void) |
78 | { | 72 | { |
79 | struct proc_dir_entry *entry; | 73 | struct proc_dir_entry *entry; |
@@ -89,9 +83,6 @@ static int __init ikconfig_init(void) | |||
89 | return 0; | 83 | return 0; |
90 | } | 84 | } |
91 | 85 | ||
92 | /***************************************************/ | ||
93 | /* ikconfig_cleanup: clean up our mess */ | ||
94 | |||
95 | static void __exit ikconfig_cleanup(void) | 86 | static void __exit ikconfig_cleanup(void) |
96 | { | 87 | { |
97 | remove_proc_entry("config.gz", NULL); | 88 | remove_proc_entry("config.gz", NULL); |
diff --git a/kernel/cpuset.c b/kernel/cpuset.c index eab7bd6628e0..3e00526f52ec 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c | |||
@@ -1172,7 +1172,7 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs, | |||
1172 | { | 1172 | { |
1173 | struct cpuset trialcs; | 1173 | struct cpuset trialcs; |
1174 | int err; | 1174 | int err; |
1175 | int cpus_nonempty, balance_flag_changed; | 1175 | int balance_flag_changed; |
1176 | 1176 | ||
1177 | trialcs = *cs; | 1177 | trialcs = *cs; |
1178 | if (turning_on) | 1178 | if (turning_on) |
@@ -1184,7 +1184,6 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs, | |||
1184 | if (err < 0) | 1184 | if (err < 0) |
1185 | return err; | 1185 | return err; |
1186 | 1186 | ||
1187 | cpus_nonempty = !cpus_empty(trialcs.cpus_allowed); | ||
1188 | balance_flag_changed = (is_sched_load_balance(cs) != | 1187 | balance_flag_changed = (is_sched_load_balance(cs) != |
1189 | is_sched_load_balance(&trialcs)); | 1188 | is_sched_load_balance(&trialcs)); |
1190 | 1189 | ||
@@ -1192,7 +1191,7 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs, | |||
1192 | cs->flags = trialcs.flags; | 1191 | cs->flags = trialcs.flags; |
1193 | mutex_unlock(&callback_mutex); | 1192 | mutex_unlock(&callback_mutex); |
1194 | 1193 | ||
1195 | if (cpus_nonempty && balance_flag_changed) | 1194 | if (!cpus_empty(trialcs.cpus_allowed) && balance_flag_changed) |
1196 | async_rebuild_sched_domains(); | 1195 | async_rebuild_sched_domains(); |
1197 | 1196 | ||
1198 | return 0; | 1197 | return 0; |
@@ -2437,19 +2436,15 @@ const struct file_operations proc_cpuset_operations = { | |||
2437 | void cpuset_task_status_allowed(struct seq_file *m, struct task_struct *task) | 2436 | void cpuset_task_status_allowed(struct seq_file *m, struct task_struct *task) |
2438 | { | 2437 | { |
2439 | seq_printf(m, "Cpus_allowed:\t"); | 2438 | seq_printf(m, "Cpus_allowed:\t"); |
2440 | m->count += cpumask_scnprintf(m->buf + m->count, m->size - m->count, | 2439 | seq_cpumask(m, &task->cpus_allowed); |
2441 | task->cpus_allowed); | ||
2442 | seq_printf(m, "\n"); | 2440 | seq_printf(m, "\n"); |
2443 | seq_printf(m, "Cpus_allowed_list:\t"); | 2441 | seq_printf(m, "Cpus_allowed_list:\t"); |
2444 | m->count += cpulist_scnprintf(m->buf + m->count, m->size - m->count, | 2442 | seq_cpumask_list(m, &task->cpus_allowed); |
2445 | task->cpus_allowed); | ||
2446 | seq_printf(m, "\n"); | 2443 | seq_printf(m, "\n"); |
2447 | seq_printf(m, "Mems_allowed:\t"); | 2444 | seq_printf(m, "Mems_allowed:\t"); |
2448 | m->count += nodemask_scnprintf(m->buf + m->count, m->size - m->count, | 2445 | seq_nodemask(m, &task->mems_allowed); |
2449 | task->mems_allowed); | ||
2450 | seq_printf(m, "\n"); | 2446 | seq_printf(m, "\n"); |
2451 | seq_printf(m, "Mems_allowed_list:\t"); | 2447 | seq_printf(m, "Mems_allowed_list:\t"); |
2452 | m->count += nodelist_scnprintf(m->buf + m->count, m->size - m->count, | 2448 | seq_nodemask_list(m, &task->mems_allowed); |
2453 | task->mems_allowed); | ||
2454 | seq_printf(m, "\n"); | 2449 | seq_printf(m, "\n"); |
2455 | } | 2450 | } |
diff --git a/kernel/exit.c b/kernel/exit.c index 138887881a11..80137a5d9467 100644 --- a/kernel/exit.c +++ b/kernel/exit.c | |||
@@ -113,8 +113,6 @@ static void __exit_signal(struct task_struct *tsk) | |||
113 | * We won't ever get here for the group leader, since it | 113 | * We won't ever get here for the group leader, since it |
114 | * will have been the last reference on the signal_struct. | 114 | * will have been the last reference on the signal_struct. |
115 | */ | 115 | */ |
116 | sig->utime = cputime_add(sig->utime, task_utime(tsk)); | ||
117 | sig->stime = cputime_add(sig->stime, task_stime(tsk)); | ||
118 | sig->gtime = cputime_add(sig->gtime, task_gtime(tsk)); | 116 | sig->gtime = cputime_add(sig->gtime, task_gtime(tsk)); |
119 | sig->min_flt += tsk->min_flt; | 117 | sig->min_flt += tsk->min_flt; |
120 | sig->maj_flt += tsk->maj_flt; | 118 | sig->maj_flt += tsk->maj_flt; |
@@ -123,7 +121,6 @@ static void __exit_signal(struct task_struct *tsk) | |||
123 | sig->inblock += task_io_get_inblock(tsk); | 121 | sig->inblock += task_io_get_inblock(tsk); |
124 | sig->oublock += task_io_get_oublock(tsk); | 122 | sig->oublock += task_io_get_oublock(tsk); |
125 | task_io_accounting_add(&sig->ioac, &tsk->ioac); | 123 | task_io_accounting_add(&sig->ioac, &tsk->ioac); |
126 | sig->sum_sched_runtime += tsk->se.sum_exec_runtime; | ||
127 | sig = NULL; /* Marker for below. */ | 124 | sig = NULL; /* Marker for below. */ |
128 | } | 125 | } |
129 | 126 | ||
@@ -1307,6 +1304,7 @@ static int wait_task_zombie(struct task_struct *p, int options, | |||
1307 | if (likely(!traced)) { | 1304 | if (likely(!traced)) { |
1308 | struct signal_struct *psig; | 1305 | struct signal_struct *psig; |
1309 | struct signal_struct *sig; | 1306 | struct signal_struct *sig; |
1307 | struct task_cputime cputime; | ||
1310 | 1308 | ||
1311 | /* | 1309 | /* |
1312 | * The resource counters for the group leader are in its | 1310 | * The resource counters for the group leader are in its |
@@ -1322,20 +1320,23 @@ static int wait_task_zombie(struct task_struct *p, int options, | |||
1322 | * need to protect the access to p->parent->signal fields, | 1320 | * need to protect the access to p->parent->signal fields, |
1323 | * as other threads in the parent group can be right | 1321 | * as other threads in the parent group can be right |
1324 | * here reaping other children at the same time. | 1322 | * here reaping other children at the same time. |
1323 | * | ||
1324 | * We use thread_group_cputime() to get times for the thread | ||
1325 | * group, which consolidates times for all threads in the | ||
1326 | * group including the group leader. | ||
1325 | */ | 1327 | */ |
1326 | spin_lock_irq(&p->parent->sighand->siglock); | 1328 | spin_lock_irq(&p->parent->sighand->siglock); |
1327 | psig = p->parent->signal; | 1329 | psig = p->parent->signal; |
1328 | sig = p->signal; | 1330 | sig = p->signal; |
1331 | thread_group_cputime(p, &cputime); | ||
1329 | psig->cutime = | 1332 | psig->cutime = |
1330 | cputime_add(psig->cutime, | 1333 | cputime_add(psig->cutime, |
1331 | cputime_add(p->utime, | 1334 | cputime_add(cputime.utime, |
1332 | cputime_add(sig->utime, | 1335 | sig->cutime)); |
1333 | sig->cutime))); | ||
1334 | psig->cstime = | 1336 | psig->cstime = |
1335 | cputime_add(psig->cstime, | 1337 | cputime_add(psig->cstime, |
1336 | cputime_add(p->stime, | 1338 | cputime_add(cputime.stime, |
1337 | cputime_add(sig->stime, | 1339 | sig->cstime)); |
1338 | sig->cstime))); | ||
1339 | psig->cgtime = | 1340 | psig->cgtime = |
1340 | cputime_add(psig->cgtime, | 1341 | cputime_add(psig->cgtime, |
1341 | cputime_add(p->gtime, | 1342 | cputime_add(p->gtime, |
diff --git a/kernel/fork.c b/kernel/fork.c index cfaff92f61ff..4d093552dd6e 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -760,15 +760,44 @@ void __cleanup_sighand(struct sighand_struct *sighand) | |||
760 | kmem_cache_free(sighand_cachep, sighand); | 760 | kmem_cache_free(sighand_cachep, sighand); |
761 | } | 761 | } |
762 | 762 | ||
763 | |||
764 | /* | ||
765 | * Initialize POSIX timer handling for a thread group. | ||
766 | */ | ||
767 | static void posix_cpu_timers_init_group(struct signal_struct *sig) | ||
768 | { | ||
769 | /* Thread group counters. */ | ||
770 | thread_group_cputime_init(sig); | ||
771 | |||
772 | /* Expiration times and increments. */ | ||
773 | sig->it_virt_expires = cputime_zero; | ||
774 | sig->it_virt_incr = cputime_zero; | ||
775 | sig->it_prof_expires = cputime_zero; | ||
776 | sig->it_prof_incr = cputime_zero; | ||
777 | |||
778 | /* Cached expiration times. */ | ||
779 | sig->cputime_expires.prof_exp = cputime_zero; | ||
780 | sig->cputime_expires.virt_exp = cputime_zero; | ||
781 | sig->cputime_expires.sched_exp = 0; | ||
782 | |||
783 | /* The timer lists. */ | ||
784 | INIT_LIST_HEAD(&sig->cpu_timers[0]); | ||
785 | INIT_LIST_HEAD(&sig->cpu_timers[1]); | ||
786 | INIT_LIST_HEAD(&sig->cpu_timers[2]); | ||
787 | } | ||
788 | |||
763 | static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) | 789 | static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) |
764 | { | 790 | { |
765 | struct signal_struct *sig; | 791 | struct signal_struct *sig; |
766 | int ret; | 792 | int ret; |
767 | 793 | ||
768 | if (clone_flags & CLONE_THREAD) { | 794 | if (clone_flags & CLONE_THREAD) { |
769 | atomic_inc(¤t->signal->count); | 795 | ret = thread_group_cputime_clone_thread(current); |
770 | atomic_inc(¤t->signal->live); | 796 | if (likely(!ret)) { |
771 | return 0; | 797 | atomic_inc(¤t->signal->count); |
798 | atomic_inc(¤t->signal->live); | ||
799 | } | ||
800 | return ret; | ||
772 | } | 801 | } |
773 | sig = kmem_cache_alloc(signal_cachep, GFP_KERNEL); | 802 | sig = kmem_cache_alloc(signal_cachep, GFP_KERNEL); |
774 | tsk->signal = sig; | 803 | tsk->signal = sig; |
@@ -796,40 +825,25 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) | |||
796 | sig->it_real_incr.tv64 = 0; | 825 | sig->it_real_incr.tv64 = 0; |
797 | sig->real_timer.function = it_real_fn; | 826 | sig->real_timer.function = it_real_fn; |
798 | 827 | ||
799 | sig->it_virt_expires = cputime_zero; | ||
800 | sig->it_virt_incr = cputime_zero; | ||
801 | sig->it_prof_expires = cputime_zero; | ||
802 | sig->it_prof_incr = cputime_zero; | ||
803 | |||
804 | sig->leader = 0; /* session leadership doesn't inherit */ | 828 | sig->leader = 0; /* session leadership doesn't inherit */ |
805 | sig->tty_old_pgrp = NULL; | 829 | sig->tty_old_pgrp = NULL; |
806 | sig->tty = NULL; | 830 | sig->tty = NULL; |
807 | 831 | ||
808 | sig->utime = sig->stime = sig->cutime = sig->cstime = cputime_zero; | 832 | sig->cutime = sig->cstime = cputime_zero; |
809 | sig->gtime = cputime_zero; | 833 | sig->gtime = cputime_zero; |
810 | sig->cgtime = cputime_zero; | 834 | sig->cgtime = cputime_zero; |
811 | sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0; | 835 | sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0; |
812 | sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0; | 836 | sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0; |
813 | sig->inblock = sig->oublock = sig->cinblock = sig->coublock = 0; | 837 | sig->inblock = sig->oublock = sig->cinblock = sig->coublock = 0; |
814 | task_io_accounting_init(&sig->ioac); | 838 | task_io_accounting_init(&sig->ioac); |
815 | sig->sum_sched_runtime = 0; | ||
816 | INIT_LIST_HEAD(&sig->cpu_timers[0]); | ||
817 | INIT_LIST_HEAD(&sig->cpu_timers[1]); | ||
818 | INIT_LIST_HEAD(&sig->cpu_timers[2]); | ||
819 | taskstats_tgid_init(sig); | 839 | taskstats_tgid_init(sig); |
820 | 840 | ||
821 | task_lock(current->group_leader); | 841 | task_lock(current->group_leader); |
822 | memcpy(sig->rlim, current->signal->rlim, sizeof sig->rlim); | 842 | memcpy(sig->rlim, current->signal->rlim, sizeof sig->rlim); |
823 | task_unlock(current->group_leader); | 843 | task_unlock(current->group_leader); |
824 | 844 | ||
825 | if (sig->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY) { | 845 | posix_cpu_timers_init_group(sig); |
826 | /* | 846 | |
827 | * New sole thread in the process gets an expiry time | ||
828 | * of the whole CPU time limit. | ||
829 | */ | ||
830 | tsk->it_prof_expires = | ||
831 | secs_to_cputime(sig->rlim[RLIMIT_CPU].rlim_cur); | ||
832 | } | ||
833 | acct_init_pacct(&sig->pacct); | 847 | acct_init_pacct(&sig->pacct); |
834 | 848 | ||
835 | tty_audit_fork(sig); | 849 | tty_audit_fork(sig); |
@@ -839,6 +853,7 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) | |||
839 | 853 | ||
840 | void __cleanup_signal(struct signal_struct *sig) | 854 | void __cleanup_signal(struct signal_struct *sig) |
841 | { | 855 | { |
856 | thread_group_cputime_free(sig); | ||
842 | exit_thread_group_keys(sig); | 857 | exit_thread_group_keys(sig); |
843 | tty_kref_put(sig->tty); | 858 | tty_kref_put(sig->tty); |
844 | kmem_cache_free(signal_cachep, sig); | 859 | kmem_cache_free(signal_cachep, sig); |
@@ -889,6 +904,19 @@ void mm_init_owner(struct mm_struct *mm, struct task_struct *p) | |||
889 | #endif /* CONFIG_MM_OWNER */ | 904 | #endif /* CONFIG_MM_OWNER */ |
890 | 905 | ||
891 | /* | 906 | /* |
907 | * Initialize POSIX timer handling for a single task. | ||
908 | */ | ||
909 | static void posix_cpu_timers_init(struct task_struct *tsk) | ||
910 | { | ||
911 | tsk->cputime_expires.prof_exp = cputime_zero; | ||
912 | tsk->cputime_expires.virt_exp = cputime_zero; | ||
913 | tsk->cputime_expires.sched_exp = 0; | ||
914 | INIT_LIST_HEAD(&tsk->cpu_timers[0]); | ||
915 | INIT_LIST_HEAD(&tsk->cpu_timers[1]); | ||
916 | INIT_LIST_HEAD(&tsk->cpu_timers[2]); | ||
917 | } | ||
918 | |||
919 | /* | ||
892 | * This creates a new process as a copy of the old one, | 920 | * This creates a new process as a copy of the old one, |
893 | * but does not actually start it yet. | 921 | * but does not actually start it yet. |
894 | * | 922 | * |
@@ -998,12 +1026,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
998 | task_io_accounting_init(&p->ioac); | 1026 | task_io_accounting_init(&p->ioac); |
999 | acct_clear_integrals(p); | 1027 | acct_clear_integrals(p); |
1000 | 1028 | ||
1001 | p->it_virt_expires = cputime_zero; | 1029 | posix_cpu_timers_init(p); |
1002 | p->it_prof_expires = cputime_zero; | ||
1003 | p->it_sched_expires = 0; | ||
1004 | INIT_LIST_HEAD(&p->cpu_timers[0]); | ||
1005 | INIT_LIST_HEAD(&p->cpu_timers[1]); | ||
1006 | INIT_LIST_HEAD(&p->cpu_timers[2]); | ||
1007 | 1030 | ||
1008 | p->lock_depth = -1; /* -1 = no lock */ | 1031 | p->lock_depth = -1; /* -1 = no lock */ |
1009 | do_posix_clock_monotonic_gettime(&p->start_time); | 1032 | do_posix_clock_monotonic_gettime(&p->start_time); |
@@ -1204,21 +1227,6 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1204 | if (clone_flags & CLONE_THREAD) { | 1227 | if (clone_flags & CLONE_THREAD) { |
1205 | p->group_leader = current->group_leader; | 1228 | p->group_leader = current->group_leader; |
1206 | list_add_tail_rcu(&p->thread_group, &p->group_leader->thread_group); | 1229 | list_add_tail_rcu(&p->thread_group, &p->group_leader->thread_group); |
1207 | |||
1208 | if (!cputime_eq(current->signal->it_virt_expires, | ||
1209 | cputime_zero) || | ||
1210 | !cputime_eq(current->signal->it_prof_expires, | ||
1211 | cputime_zero) || | ||
1212 | current->signal->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY || | ||
1213 | !list_empty(¤t->signal->cpu_timers[0]) || | ||
1214 | !list_empty(¤t->signal->cpu_timers[1]) || | ||
1215 | !list_empty(¤t->signal->cpu_timers[2])) { | ||
1216 | /* | ||
1217 | * Have child wake up on its first tick to check | ||
1218 | * for process CPU timers. | ||
1219 | */ | ||
1220 | p->it_prof_expires = jiffies_to_cputime(1); | ||
1221 | } | ||
1222 | } | 1230 | } |
1223 | 1231 | ||
1224 | if (likely(p->pid)) { | 1232 | if (likely(p->pid)) { |
diff --git a/kernel/freezer.c b/kernel/freezer.c new file mode 100644 index 000000000000..ba6248b323ef --- /dev/null +++ b/kernel/freezer.c | |||
@@ -0,0 +1,154 @@ | |||
1 | /* | ||
2 | * kernel/freezer.c - Function to freeze a process | ||
3 | * | ||
4 | * Originally from kernel/power/process.c | ||
5 | */ | ||
6 | |||
7 | #include <linux/interrupt.h> | ||
8 | #include <linux/suspend.h> | ||
9 | #include <linux/module.h> | ||
10 | #include <linux/syscalls.h> | ||
11 | #include <linux/freezer.h> | ||
12 | |||
13 | /* | ||
14 | * freezing is complete, mark current process as frozen | ||
15 | */ | ||
16 | static inline void frozen_process(void) | ||
17 | { | ||
18 | if (!unlikely(current->flags & PF_NOFREEZE)) { | ||
19 | current->flags |= PF_FROZEN; | ||
20 | wmb(); | ||
21 | } | ||
22 | clear_freeze_flag(current); | ||
23 | } | ||
24 | |||
25 | /* Refrigerator is place where frozen processes are stored :-). */ | ||
26 | void refrigerator(void) | ||
27 | { | ||
28 | /* Hmm, should we be allowed to suspend when there are realtime | ||
29 | processes around? */ | ||
30 | long save; | ||
31 | |||
32 | task_lock(current); | ||
33 | if (freezing(current)) { | ||
34 | frozen_process(); | ||
35 | task_unlock(current); | ||
36 | } else { | ||
37 | task_unlock(current); | ||
38 | return; | ||
39 | } | ||
40 | save = current->state; | ||
41 | pr_debug("%s entered refrigerator\n", current->comm); | ||
42 | |||
43 | spin_lock_irq(¤t->sighand->siglock); | ||
44 | recalc_sigpending(); /* We sent fake signal, clean it up */ | ||
45 | spin_unlock_irq(¤t->sighand->siglock); | ||
46 | |||
47 | for (;;) { | ||
48 | set_current_state(TASK_UNINTERRUPTIBLE); | ||
49 | if (!frozen(current)) | ||
50 | break; | ||
51 | schedule(); | ||
52 | } | ||
53 | pr_debug("%s left refrigerator\n", current->comm); | ||
54 | __set_current_state(save); | ||
55 | } | ||
56 | EXPORT_SYMBOL(refrigerator); | ||
57 | |||
58 | static void fake_signal_wake_up(struct task_struct *p) | ||
59 | { | ||
60 | unsigned long flags; | ||
61 | |||
62 | spin_lock_irqsave(&p->sighand->siglock, flags); | ||
63 | signal_wake_up(p, 0); | ||
64 | spin_unlock_irqrestore(&p->sighand->siglock, flags); | ||
65 | } | ||
66 | |||
67 | /** | ||
68 | * freeze_task - send a freeze request to given task | ||
69 | * @p: task to send the request to | ||
70 | * @sig_only: if set, the request will only be sent if the task has the | ||
71 | * PF_FREEZER_NOSIG flag unset | ||
72 | * Return value: 'false', if @sig_only is set and the task has | ||
73 | * PF_FREEZER_NOSIG set or the task is frozen, 'true', otherwise | ||
74 | * | ||
75 | * The freeze request is sent by setting the tasks's TIF_FREEZE flag and | ||
76 | * either sending a fake signal to it or waking it up, depending on whether | ||
77 | * or not it has PF_FREEZER_NOSIG set. If @sig_only is set and the task | ||
78 | * has PF_FREEZER_NOSIG set (ie. it is a typical kernel thread), its | ||
79 | * TIF_FREEZE flag will not be set. | ||
80 | */ | ||
81 | bool freeze_task(struct task_struct *p, bool sig_only) | ||
82 | { | ||
83 | /* | ||
84 | * We first check if the task is freezing and next if it has already | ||
85 | * been frozen to avoid the race with frozen_process() which first marks | ||
86 | * the task as frozen and next clears its TIF_FREEZE. | ||
87 | */ | ||
88 | if (!freezing(p)) { | ||
89 | rmb(); | ||
90 | if (frozen(p)) | ||
91 | return false; | ||
92 | |||
93 | if (!sig_only || should_send_signal(p)) | ||
94 | set_freeze_flag(p); | ||
95 | else | ||
96 | return false; | ||
97 | } | ||
98 | |||
99 | if (should_send_signal(p)) { | ||
100 | if (!signal_pending(p)) | ||
101 | fake_signal_wake_up(p); | ||
102 | } else if (sig_only) { | ||
103 | return false; | ||
104 | } else { | ||
105 | wake_up_state(p, TASK_INTERRUPTIBLE); | ||
106 | } | ||
107 | |||
108 | return true; | ||
109 | } | ||
110 | |||
111 | void cancel_freezing(struct task_struct *p) | ||
112 | { | ||
113 | unsigned long flags; | ||
114 | |||
115 | if (freezing(p)) { | ||
116 | pr_debug(" clean up: %s\n", p->comm); | ||
117 | clear_freeze_flag(p); | ||
118 | spin_lock_irqsave(&p->sighand->siglock, flags); | ||
119 | recalc_sigpending_and_wake(p); | ||
120 | spin_unlock_irqrestore(&p->sighand->siglock, flags); | ||
121 | } | ||
122 | } | ||
123 | |||
124 | /* | ||
125 | * Wake up a frozen process | ||
126 | * | ||
127 | * task_lock() is needed to prevent the race with refrigerator() which may | ||
128 | * occur if the freezing of tasks fails. Namely, without the lock, if the | ||
129 | * freezing of tasks failed, thaw_tasks() might have run before a task in | ||
130 | * refrigerator() could call frozen_process(), in which case the task would be | ||
131 | * frozen and no one would thaw it. | ||
132 | */ | ||
133 | int __thaw_process(struct task_struct *p) | ||
134 | { | ||
135 | if (frozen(p)) { | ||
136 | p->flags &= ~PF_FROZEN; | ||
137 | return 1; | ||
138 | } | ||
139 | clear_freeze_flag(p); | ||
140 | return 0; | ||
141 | } | ||
142 | |||
143 | int thaw_process(struct task_struct *p) | ||
144 | { | ||
145 | task_lock(p); | ||
146 | if (__thaw_process(p) == 1) { | ||
147 | task_unlock(p); | ||
148 | wake_up_process(p); | ||
149 | return 1; | ||
150 | } | ||
151 | task_unlock(p); | ||
152 | return 0; | ||
153 | } | ||
154 | EXPORT_SYMBOL(thaw_process); | ||
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index cdec83e722fa..95978f48e039 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c | |||
@@ -1403,9 +1403,7 @@ void hrtimer_run_queues(void) | |||
1403 | if (!base->first) | 1403 | if (!base->first) |
1404 | continue; | 1404 | continue; |
1405 | 1405 | ||
1406 | if (base->get_softirq_time) | 1406 | if (gettime) { |
1407 | base->softirq_time = base->get_softirq_time(); | ||
1408 | else if (gettime) { | ||
1409 | hrtimer_get_softirq_time(cpu_base); | 1407 | hrtimer_get_softirq_time(cpu_base); |
1410 | gettime = 0; | 1408 | gettime = 0; |
1411 | } | 1409 | } |
@@ -1688,9 +1686,11 @@ static void migrate_hrtimers(int cpu) | |||
1688 | new_base = &get_cpu_var(hrtimer_bases); | 1686 | new_base = &get_cpu_var(hrtimer_bases); |
1689 | 1687 | ||
1690 | tick_cancel_sched_timer(cpu); | 1688 | tick_cancel_sched_timer(cpu); |
1691 | 1689 | /* | |
1692 | local_irq_disable(); | 1690 | * The caller is globally serialized and nobody else |
1693 | spin_lock(&new_base->lock); | 1691 | * takes two locks at once, deadlock is not possible. |
1692 | */ | ||
1693 | spin_lock_irq(&new_base->lock); | ||
1694 | spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING); | 1694 | spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING); |
1695 | 1695 | ||
1696 | for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) { | 1696 | for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) { |
@@ -1703,8 +1703,7 @@ static void migrate_hrtimers(int cpu) | |||
1703 | raise = 1; | 1703 | raise = 1; |
1704 | 1704 | ||
1705 | spin_unlock(&old_base->lock); | 1705 | spin_unlock(&old_base->lock); |
1706 | spin_unlock(&new_base->lock); | 1706 | spin_unlock_irq(&new_base->lock); |
1707 | local_irq_enable(); | ||
1708 | put_cpu_var(hrtimer_bases); | 1707 | put_cpu_var(hrtimer_bases); |
1709 | 1708 | ||
1710 | if (raise) | 1709 | if (raise) |
diff --git a/kernel/irq/autoprobe.c b/kernel/irq/autoprobe.c index 533068cfb607..cc0f7321b8ce 100644 --- a/kernel/irq/autoprobe.c +++ b/kernel/irq/autoprobe.c | |||
@@ -30,17 +30,16 @@ static DEFINE_MUTEX(probing_active); | |||
30 | unsigned long probe_irq_on(void) | 30 | unsigned long probe_irq_on(void) |
31 | { | 31 | { |
32 | struct irq_desc *desc; | 32 | struct irq_desc *desc; |
33 | unsigned long mask; | 33 | unsigned long mask = 0; |
34 | unsigned int i; | 34 | unsigned int status; |
35 | int i; | ||
35 | 36 | ||
36 | mutex_lock(&probing_active); | 37 | mutex_lock(&probing_active); |
37 | /* | 38 | /* |
38 | * something may have generated an irq long ago and we want to | 39 | * something may have generated an irq long ago and we want to |
39 | * flush such a longstanding irq before considering it as spurious. | 40 | * flush such a longstanding irq before considering it as spurious. |
40 | */ | 41 | */ |
41 | for (i = NR_IRQS-1; i > 0; i--) { | 42 | for_each_irq_desc_reverse(i, desc) { |
42 | desc = irq_desc + i; | ||
43 | |||
44 | spin_lock_irq(&desc->lock); | 43 | spin_lock_irq(&desc->lock); |
45 | if (!desc->action && !(desc->status & IRQ_NOPROBE)) { | 44 | if (!desc->action && !(desc->status & IRQ_NOPROBE)) { |
46 | /* | 45 | /* |
@@ -68,9 +67,7 @@ unsigned long probe_irq_on(void) | |||
68 | * (we must startup again here because if a longstanding irq | 67 | * (we must startup again here because if a longstanding irq |
69 | * happened in the previous stage, it may have masked itself) | 68 | * happened in the previous stage, it may have masked itself) |
70 | */ | 69 | */ |
71 | for (i = NR_IRQS-1; i > 0; i--) { | 70 | for_each_irq_desc_reverse(i, desc) { |
72 | desc = irq_desc + i; | ||
73 | |||
74 | spin_lock_irq(&desc->lock); | 71 | spin_lock_irq(&desc->lock); |
75 | if (!desc->action && !(desc->status & IRQ_NOPROBE)) { | 72 | if (!desc->action && !(desc->status & IRQ_NOPROBE)) { |
76 | desc->status |= IRQ_AUTODETECT | IRQ_WAITING; | 73 | desc->status |= IRQ_AUTODETECT | IRQ_WAITING; |
@@ -88,11 +85,7 @@ unsigned long probe_irq_on(void) | |||
88 | /* | 85 | /* |
89 | * Now filter out any obviously spurious interrupts | 86 | * Now filter out any obviously spurious interrupts |
90 | */ | 87 | */ |
91 | mask = 0; | 88 | for_each_irq_desc(i, desc) { |
92 | for (i = 0; i < NR_IRQS; i++) { | ||
93 | unsigned int status; | ||
94 | |||
95 | desc = irq_desc + i; | ||
96 | spin_lock_irq(&desc->lock); | 89 | spin_lock_irq(&desc->lock); |
97 | status = desc->status; | 90 | status = desc->status; |
98 | 91 | ||
@@ -126,14 +119,11 @@ EXPORT_SYMBOL(probe_irq_on); | |||
126 | */ | 119 | */ |
127 | unsigned int probe_irq_mask(unsigned long val) | 120 | unsigned int probe_irq_mask(unsigned long val) |
128 | { | 121 | { |
129 | unsigned int mask; | 122 | unsigned int status, mask = 0; |
123 | struct irq_desc *desc; | ||
130 | int i; | 124 | int i; |
131 | 125 | ||
132 | mask = 0; | 126 | for_each_irq_desc(i, desc) { |
133 | for (i = 0; i < NR_IRQS; i++) { | ||
134 | struct irq_desc *desc = irq_desc + i; | ||
135 | unsigned int status; | ||
136 | |||
137 | spin_lock_irq(&desc->lock); | 127 | spin_lock_irq(&desc->lock); |
138 | status = desc->status; | 128 | status = desc->status; |
139 | 129 | ||
@@ -171,20 +161,19 @@ EXPORT_SYMBOL(probe_irq_mask); | |||
171 | */ | 161 | */ |
172 | int probe_irq_off(unsigned long val) | 162 | int probe_irq_off(unsigned long val) |
173 | { | 163 | { |
174 | int i, irq_found = 0, nr_irqs = 0; | 164 | int i, irq_found = 0, nr_of_irqs = 0; |
175 | 165 | struct irq_desc *desc; | |
176 | for (i = 0; i < NR_IRQS; i++) { | 166 | unsigned int status; |
177 | struct irq_desc *desc = irq_desc + i; | ||
178 | unsigned int status; | ||
179 | 167 | ||
168 | for_each_irq_desc(i, desc) { | ||
180 | spin_lock_irq(&desc->lock); | 169 | spin_lock_irq(&desc->lock); |
181 | status = desc->status; | 170 | status = desc->status; |
182 | 171 | ||
183 | if (status & IRQ_AUTODETECT) { | 172 | if (status & IRQ_AUTODETECT) { |
184 | if (!(status & IRQ_WAITING)) { | 173 | if (!(status & IRQ_WAITING)) { |
185 | if (!nr_irqs) | 174 | if (!nr_of_irqs) |
186 | irq_found = i; | 175 | irq_found = i; |
187 | nr_irqs++; | 176 | nr_of_irqs++; |
188 | } | 177 | } |
189 | desc->status = status & ~IRQ_AUTODETECT; | 178 | desc->status = status & ~IRQ_AUTODETECT; |
190 | desc->chip->shutdown(i); | 179 | desc->chip->shutdown(i); |
@@ -193,7 +182,7 @@ int probe_irq_off(unsigned long val) | |||
193 | } | 182 | } |
194 | mutex_unlock(&probing_active); | 183 | mutex_unlock(&probing_active); |
195 | 184 | ||
196 | if (nr_irqs > 1) | 185 | if (nr_of_irqs > 1) |
197 | irq_found = -irq_found; | 186 | irq_found = -irq_found; |
198 | 187 | ||
199 | return irq_found; | 188 | return irq_found; |
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index 3cd441ebf5d2..4895fde4eb93 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c | |||
@@ -24,16 +24,15 @@ | |||
24 | */ | 24 | */ |
25 | void dynamic_irq_init(unsigned int irq) | 25 | void dynamic_irq_init(unsigned int irq) |
26 | { | 26 | { |
27 | struct irq_desc *desc; | 27 | struct irq_desc *desc = irq_to_desc(irq); |
28 | unsigned long flags; | 28 | unsigned long flags; |
29 | 29 | ||
30 | if (irq >= NR_IRQS) { | 30 | if (!desc) { |
31 | WARN(1, KERN_ERR "Trying to initialize invalid IRQ%d\n", irq); | 31 | WARN(1, KERN_ERR "Trying to initialize invalid IRQ%d\n", irq); |
32 | return; | 32 | return; |
33 | } | 33 | } |
34 | 34 | ||
35 | /* Ensure we don't have left over values from a previous use of this irq */ | 35 | /* Ensure we don't have left over values from a previous use of this irq */ |
36 | desc = irq_desc + irq; | ||
37 | spin_lock_irqsave(&desc->lock, flags); | 36 | spin_lock_irqsave(&desc->lock, flags); |
38 | desc->status = IRQ_DISABLED; | 37 | desc->status = IRQ_DISABLED; |
39 | desc->chip = &no_irq_chip; | 38 | desc->chip = &no_irq_chip; |
@@ -57,15 +56,14 @@ void dynamic_irq_init(unsigned int irq) | |||
57 | */ | 56 | */ |
58 | void dynamic_irq_cleanup(unsigned int irq) | 57 | void dynamic_irq_cleanup(unsigned int irq) |
59 | { | 58 | { |
60 | struct irq_desc *desc; | 59 | struct irq_desc *desc = irq_to_desc(irq); |
61 | unsigned long flags; | 60 | unsigned long flags; |
62 | 61 | ||
63 | if (irq >= NR_IRQS) { | 62 | if (!desc) { |
64 | WARN(1, KERN_ERR "Trying to cleanup invalid IRQ%d\n", irq); | 63 | WARN(1, KERN_ERR "Trying to cleanup invalid IRQ%d\n", irq); |
65 | return; | 64 | return; |
66 | } | 65 | } |
67 | 66 | ||
68 | desc = irq_desc + irq; | ||
69 | spin_lock_irqsave(&desc->lock, flags); | 67 | spin_lock_irqsave(&desc->lock, flags); |
70 | if (desc->action) { | 68 | if (desc->action) { |
71 | spin_unlock_irqrestore(&desc->lock, flags); | 69 | spin_unlock_irqrestore(&desc->lock, flags); |
@@ -89,10 +87,10 @@ void dynamic_irq_cleanup(unsigned int irq) | |||
89 | */ | 87 | */ |
90 | int set_irq_chip(unsigned int irq, struct irq_chip *chip) | 88 | int set_irq_chip(unsigned int irq, struct irq_chip *chip) |
91 | { | 89 | { |
92 | struct irq_desc *desc; | 90 | struct irq_desc *desc = irq_to_desc(irq); |
93 | unsigned long flags; | 91 | unsigned long flags; |
94 | 92 | ||
95 | if (irq >= NR_IRQS) { | 93 | if (!desc) { |
96 | WARN(1, KERN_ERR "Trying to install chip for IRQ%d\n", irq); | 94 | WARN(1, KERN_ERR "Trying to install chip for IRQ%d\n", irq); |
97 | return -EINVAL; | 95 | return -EINVAL; |
98 | } | 96 | } |
@@ -100,7 +98,6 @@ int set_irq_chip(unsigned int irq, struct irq_chip *chip) | |||
100 | if (!chip) | 98 | if (!chip) |
101 | chip = &no_irq_chip; | 99 | chip = &no_irq_chip; |
102 | 100 | ||
103 | desc = irq_desc + irq; | ||
104 | spin_lock_irqsave(&desc->lock, flags); | 101 | spin_lock_irqsave(&desc->lock, flags); |
105 | irq_chip_set_defaults(chip); | 102 | irq_chip_set_defaults(chip); |
106 | desc->chip = chip; | 103 | desc->chip = chip; |
@@ -111,27 +108,27 @@ int set_irq_chip(unsigned int irq, struct irq_chip *chip) | |||
111 | EXPORT_SYMBOL(set_irq_chip); | 108 | EXPORT_SYMBOL(set_irq_chip); |
112 | 109 | ||
113 | /** | 110 | /** |
114 | * set_irq_type - set the irq type for an irq | 111 | * set_irq_type - set the irq trigger type for an irq |
115 | * @irq: irq number | 112 | * @irq: irq number |
116 | * @type: interrupt type - see include/linux/interrupt.h | 113 | * @type: IRQ_TYPE_{LEVEL,EDGE}_* value - see include/linux/irq.h |
117 | */ | 114 | */ |
118 | int set_irq_type(unsigned int irq, unsigned int type) | 115 | int set_irq_type(unsigned int irq, unsigned int type) |
119 | { | 116 | { |
120 | struct irq_desc *desc; | 117 | struct irq_desc *desc = irq_to_desc(irq); |
121 | unsigned long flags; | 118 | unsigned long flags; |
122 | int ret = -ENXIO; | 119 | int ret = -ENXIO; |
123 | 120 | ||
124 | if (irq >= NR_IRQS) { | 121 | if (!desc) { |
125 | printk(KERN_ERR "Trying to set irq type for IRQ%d\n", irq); | 122 | printk(KERN_ERR "Trying to set irq type for IRQ%d\n", irq); |
126 | return -ENODEV; | 123 | return -ENODEV; |
127 | } | 124 | } |
128 | 125 | ||
129 | desc = irq_desc + irq; | 126 | if (type == IRQ_TYPE_NONE) |
130 | if (desc->chip->set_type) { | 127 | return 0; |
131 | spin_lock_irqsave(&desc->lock, flags); | 128 | |
132 | ret = desc->chip->set_type(irq, type); | 129 | spin_lock_irqsave(&desc->lock, flags); |
133 | spin_unlock_irqrestore(&desc->lock, flags); | 130 | ret = __irq_set_trigger(desc, irq, flags); |
134 | } | 131 | spin_unlock_irqrestore(&desc->lock, flags); |
135 | return ret; | 132 | return ret; |
136 | } | 133 | } |
137 | EXPORT_SYMBOL(set_irq_type); | 134 | EXPORT_SYMBOL(set_irq_type); |
@@ -145,16 +142,15 @@ EXPORT_SYMBOL(set_irq_type); | |||
145 | */ | 142 | */ |
146 | int set_irq_data(unsigned int irq, void *data) | 143 | int set_irq_data(unsigned int irq, void *data) |
147 | { | 144 | { |
148 | struct irq_desc *desc; | 145 | struct irq_desc *desc = irq_to_desc(irq); |
149 | unsigned long flags; | 146 | unsigned long flags; |
150 | 147 | ||
151 | if (irq >= NR_IRQS) { | 148 | if (!desc) { |
152 | printk(KERN_ERR | 149 | printk(KERN_ERR |
153 | "Trying to install controller data for IRQ%d\n", irq); | 150 | "Trying to install controller data for IRQ%d\n", irq); |
154 | return -EINVAL; | 151 | return -EINVAL; |
155 | } | 152 | } |
156 | 153 | ||
157 | desc = irq_desc + irq; | ||
158 | spin_lock_irqsave(&desc->lock, flags); | 154 | spin_lock_irqsave(&desc->lock, flags); |
159 | desc->handler_data = data; | 155 | desc->handler_data = data; |
160 | spin_unlock_irqrestore(&desc->lock, flags); | 156 | spin_unlock_irqrestore(&desc->lock, flags); |
@@ -171,15 +167,15 @@ EXPORT_SYMBOL(set_irq_data); | |||
171 | */ | 167 | */ |
172 | int set_irq_msi(unsigned int irq, struct msi_desc *entry) | 168 | int set_irq_msi(unsigned int irq, struct msi_desc *entry) |
173 | { | 169 | { |
174 | struct irq_desc *desc; | 170 | struct irq_desc *desc = irq_to_desc(irq); |
175 | unsigned long flags; | 171 | unsigned long flags; |
176 | 172 | ||
177 | if (irq >= NR_IRQS) { | 173 | if (!desc) { |
178 | printk(KERN_ERR | 174 | printk(KERN_ERR |
179 | "Trying to install msi data for IRQ%d\n", irq); | 175 | "Trying to install msi data for IRQ%d\n", irq); |
180 | return -EINVAL; | 176 | return -EINVAL; |
181 | } | 177 | } |
182 | desc = irq_desc + irq; | 178 | |
183 | spin_lock_irqsave(&desc->lock, flags); | 179 | spin_lock_irqsave(&desc->lock, flags); |
184 | desc->msi_desc = entry; | 180 | desc->msi_desc = entry; |
185 | if (entry) | 181 | if (entry) |
@@ -197,10 +193,16 @@ int set_irq_msi(unsigned int irq, struct msi_desc *entry) | |||
197 | */ | 193 | */ |
198 | int set_irq_chip_data(unsigned int irq, void *data) | 194 | int set_irq_chip_data(unsigned int irq, void *data) |
199 | { | 195 | { |
200 | struct irq_desc *desc = irq_desc + irq; | 196 | struct irq_desc *desc = irq_to_desc(irq); |
201 | unsigned long flags; | 197 | unsigned long flags; |
202 | 198 | ||
203 | if (irq >= NR_IRQS || !desc->chip) { | 199 | if (!desc) { |
200 | printk(KERN_ERR | ||
201 | "Trying to install chip data for IRQ%d\n", irq); | ||
202 | return -EINVAL; | ||
203 | } | ||
204 | |||
205 | if (!desc->chip) { | ||
204 | printk(KERN_ERR "BUG: bad set_irq_chip_data(IRQ#%d)\n", irq); | 206 | printk(KERN_ERR "BUG: bad set_irq_chip_data(IRQ#%d)\n", irq); |
205 | return -EINVAL; | 207 | return -EINVAL; |
206 | } | 208 | } |
@@ -218,7 +220,7 @@ EXPORT_SYMBOL(set_irq_chip_data); | |||
218 | */ | 220 | */ |
219 | static void default_enable(unsigned int irq) | 221 | static void default_enable(unsigned int irq) |
220 | { | 222 | { |
221 | struct irq_desc *desc = irq_desc + irq; | 223 | struct irq_desc *desc = irq_to_desc(irq); |
222 | 224 | ||
223 | desc->chip->unmask(irq); | 225 | desc->chip->unmask(irq); |
224 | desc->status &= ~IRQ_MASKED; | 226 | desc->status &= ~IRQ_MASKED; |
@@ -236,8 +238,9 @@ static void default_disable(unsigned int irq) | |||
236 | */ | 238 | */ |
237 | static unsigned int default_startup(unsigned int irq) | 239 | static unsigned int default_startup(unsigned int irq) |
238 | { | 240 | { |
239 | irq_desc[irq].chip->enable(irq); | 241 | struct irq_desc *desc = irq_to_desc(irq); |
240 | 242 | ||
243 | desc->chip->enable(irq); | ||
241 | return 0; | 244 | return 0; |
242 | } | 245 | } |
243 | 246 | ||
@@ -246,7 +249,7 @@ static unsigned int default_startup(unsigned int irq) | |||
246 | */ | 249 | */ |
247 | static void default_shutdown(unsigned int irq) | 250 | static void default_shutdown(unsigned int irq) |
248 | { | 251 | { |
249 | struct irq_desc *desc = irq_desc + irq; | 252 | struct irq_desc *desc = irq_to_desc(irq); |
250 | 253 | ||
251 | desc->chip->mask(irq); | 254 | desc->chip->mask(irq); |
252 | desc->status |= IRQ_MASKED; | 255 | desc->status |= IRQ_MASKED; |
@@ -305,14 +308,13 @@ handle_simple_irq(unsigned int irq, struct irq_desc *desc) | |||
305 | { | 308 | { |
306 | struct irqaction *action; | 309 | struct irqaction *action; |
307 | irqreturn_t action_ret; | 310 | irqreturn_t action_ret; |
308 | const unsigned int cpu = smp_processor_id(); | ||
309 | 311 | ||
310 | spin_lock(&desc->lock); | 312 | spin_lock(&desc->lock); |
311 | 313 | ||
312 | if (unlikely(desc->status & IRQ_INPROGRESS)) | 314 | if (unlikely(desc->status & IRQ_INPROGRESS)) |
313 | goto out_unlock; | 315 | goto out_unlock; |
314 | desc->status &= ~(IRQ_REPLAY | IRQ_WAITING); | 316 | desc->status &= ~(IRQ_REPLAY | IRQ_WAITING); |
315 | kstat_cpu(cpu).irqs[irq]++; | 317 | kstat_incr_irqs_this_cpu(irq, desc); |
316 | 318 | ||
317 | action = desc->action; | 319 | action = desc->action; |
318 | if (unlikely(!action || (desc->status & IRQ_DISABLED))) | 320 | if (unlikely(!action || (desc->status & IRQ_DISABLED))) |
@@ -344,7 +346,6 @@ out_unlock: | |||
344 | void | 346 | void |
345 | handle_level_irq(unsigned int irq, struct irq_desc *desc) | 347 | handle_level_irq(unsigned int irq, struct irq_desc *desc) |
346 | { | 348 | { |
347 | unsigned int cpu = smp_processor_id(); | ||
348 | struct irqaction *action; | 349 | struct irqaction *action; |
349 | irqreturn_t action_ret; | 350 | irqreturn_t action_ret; |
350 | 351 | ||
@@ -354,7 +355,7 @@ handle_level_irq(unsigned int irq, struct irq_desc *desc) | |||
354 | if (unlikely(desc->status & IRQ_INPROGRESS)) | 355 | if (unlikely(desc->status & IRQ_INPROGRESS)) |
355 | goto out_unlock; | 356 | goto out_unlock; |
356 | desc->status &= ~(IRQ_REPLAY | IRQ_WAITING); | 357 | desc->status &= ~(IRQ_REPLAY | IRQ_WAITING); |
357 | kstat_cpu(cpu).irqs[irq]++; | 358 | kstat_incr_irqs_this_cpu(irq, desc); |
358 | 359 | ||
359 | /* | 360 | /* |
360 | * If its disabled or no action available | 361 | * If its disabled or no action available |
@@ -392,7 +393,6 @@ out_unlock: | |||
392 | void | 393 | void |
393 | handle_fasteoi_irq(unsigned int irq, struct irq_desc *desc) | 394 | handle_fasteoi_irq(unsigned int irq, struct irq_desc *desc) |
394 | { | 395 | { |
395 | unsigned int cpu = smp_processor_id(); | ||
396 | struct irqaction *action; | 396 | struct irqaction *action; |
397 | irqreturn_t action_ret; | 397 | irqreturn_t action_ret; |
398 | 398 | ||
@@ -402,7 +402,7 @@ handle_fasteoi_irq(unsigned int irq, struct irq_desc *desc) | |||
402 | goto out; | 402 | goto out; |
403 | 403 | ||
404 | desc->status &= ~(IRQ_REPLAY | IRQ_WAITING); | 404 | desc->status &= ~(IRQ_REPLAY | IRQ_WAITING); |
405 | kstat_cpu(cpu).irqs[irq]++; | 405 | kstat_incr_irqs_this_cpu(irq, desc); |
406 | 406 | ||
407 | /* | 407 | /* |
408 | * If its disabled or no action available | 408 | * If its disabled or no action available |
@@ -451,8 +451,6 @@ out: | |||
451 | void | 451 | void |
452 | handle_edge_irq(unsigned int irq, struct irq_desc *desc) | 452 | handle_edge_irq(unsigned int irq, struct irq_desc *desc) |
453 | { | 453 | { |
454 | const unsigned int cpu = smp_processor_id(); | ||
455 | |||
456 | spin_lock(&desc->lock); | 454 | spin_lock(&desc->lock); |
457 | 455 | ||
458 | desc->status &= ~(IRQ_REPLAY | IRQ_WAITING); | 456 | desc->status &= ~(IRQ_REPLAY | IRQ_WAITING); |
@@ -468,8 +466,7 @@ handle_edge_irq(unsigned int irq, struct irq_desc *desc) | |||
468 | mask_ack_irq(desc, irq); | 466 | mask_ack_irq(desc, irq); |
469 | goto out_unlock; | 467 | goto out_unlock; |
470 | } | 468 | } |
471 | 469 | kstat_incr_irqs_this_cpu(irq, desc); | |
472 | kstat_cpu(cpu).irqs[irq]++; | ||
473 | 470 | ||
474 | /* Start handling the irq */ | 471 | /* Start handling the irq */ |
475 | desc->chip->ack(irq); | 472 | desc->chip->ack(irq); |
@@ -524,7 +521,7 @@ handle_percpu_irq(unsigned int irq, struct irq_desc *desc) | |||
524 | { | 521 | { |
525 | irqreturn_t action_ret; | 522 | irqreturn_t action_ret; |
526 | 523 | ||
527 | kstat_this_cpu.irqs[irq]++; | 524 | kstat_incr_irqs_this_cpu(irq, desc); |
528 | 525 | ||
529 | if (desc->chip->ack) | 526 | if (desc->chip->ack) |
530 | desc->chip->ack(irq); | 527 | desc->chip->ack(irq); |
@@ -541,17 +538,15 @@ void | |||
541 | __set_irq_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained, | 538 | __set_irq_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained, |
542 | const char *name) | 539 | const char *name) |
543 | { | 540 | { |
544 | struct irq_desc *desc; | 541 | struct irq_desc *desc = irq_to_desc(irq); |
545 | unsigned long flags; | 542 | unsigned long flags; |
546 | 543 | ||
547 | if (irq >= NR_IRQS) { | 544 | if (!desc) { |
548 | printk(KERN_ERR | 545 | printk(KERN_ERR |
549 | "Trying to install type control for IRQ%d\n", irq); | 546 | "Trying to install type control for IRQ%d\n", irq); |
550 | return; | 547 | return; |
551 | } | 548 | } |
552 | 549 | ||
553 | desc = irq_desc + irq; | ||
554 | |||
555 | if (!handle) | 550 | if (!handle) |
556 | handle = handle_bad_irq; | 551 | handle = handle_bad_irq; |
557 | else if (desc->chip == &no_irq_chip) { | 552 | else if (desc->chip == &no_irq_chip) { |
@@ -583,7 +578,7 @@ __set_irq_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained, | |||
583 | desc->status &= ~IRQ_DISABLED; | 578 | desc->status &= ~IRQ_DISABLED; |
584 | desc->status |= IRQ_NOREQUEST | IRQ_NOPROBE; | 579 | desc->status |= IRQ_NOREQUEST | IRQ_NOPROBE; |
585 | desc->depth = 0; | 580 | desc->depth = 0; |
586 | desc->chip->unmask(irq); | 581 | desc->chip->startup(irq); |
587 | } | 582 | } |
588 | spin_unlock_irqrestore(&desc->lock, flags); | 583 | spin_unlock_irqrestore(&desc->lock, flags); |
589 | } | 584 | } |
@@ -606,17 +601,14 @@ set_irq_chip_and_handler_name(unsigned int irq, struct irq_chip *chip, | |||
606 | 601 | ||
607 | void __init set_irq_noprobe(unsigned int irq) | 602 | void __init set_irq_noprobe(unsigned int irq) |
608 | { | 603 | { |
609 | struct irq_desc *desc; | 604 | struct irq_desc *desc = irq_to_desc(irq); |
610 | unsigned long flags; | 605 | unsigned long flags; |
611 | 606 | ||
612 | if (irq >= NR_IRQS) { | 607 | if (!desc) { |
613 | printk(KERN_ERR "Trying to mark IRQ%d non-probeable\n", irq); | 608 | printk(KERN_ERR "Trying to mark IRQ%d non-probeable\n", irq); |
614 | |||
615 | return; | 609 | return; |
616 | } | 610 | } |
617 | 611 | ||
618 | desc = irq_desc + irq; | ||
619 | |||
620 | spin_lock_irqsave(&desc->lock, flags); | 612 | spin_lock_irqsave(&desc->lock, flags); |
621 | desc->status |= IRQ_NOPROBE; | 613 | desc->status |= IRQ_NOPROBE; |
622 | spin_unlock_irqrestore(&desc->lock, flags); | 614 | spin_unlock_irqrestore(&desc->lock, flags); |
@@ -624,17 +616,14 @@ void __init set_irq_noprobe(unsigned int irq) | |||
624 | 616 | ||
625 | void __init set_irq_probe(unsigned int irq) | 617 | void __init set_irq_probe(unsigned int irq) |
626 | { | 618 | { |
627 | struct irq_desc *desc; | 619 | struct irq_desc *desc = irq_to_desc(irq); |
628 | unsigned long flags; | 620 | unsigned long flags; |
629 | 621 | ||
630 | if (irq >= NR_IRQS) { | 622 | if (!desc) { |
631 | printk(KERN_ERR "Trying to mark IRQ%d probeable\n", irq); | 623 | printk(KERN_ERR "Trying to mark IRQ%d probeable\n", irq); |
632 | |||
633 | return; | 624 | return; |
634 | } | 625 | } |
635 | 626 | ||
636 | desc = irq_desc + irq; | ||
637 | |||
638 | spin_lock_irqsave(&desc->lock, flags); | 627 | spin_lock_irqsave(&desc->lock, flags); |
639 | desc->status &= ~IRQ_NOPROBE; | 628 | desc->status &= ~IRQ_NOPROBE; |
640 | spin_unlock_irqrestore(&desc->lock, flags); | 629 | spin_unlock_irqrestore(&desc->lock, flags); |
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index 5fa6198e9139..c815b42d0f5b 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c | |||
@@ -25,11 +25,10 @@ | |||
25 | * | 25 | * |
26 | * Handles spurious and unhandled IRQ's. It also prints a debugmessage. | 26 | * Handles spurious and unhandled IRQ's. It also prints a debugmessage. |
27 | */ | 27 | */ |
28 | void | 28 | void handle_bad_irq(unsigned int irq, struct irq_desc *desc) |
29 | handle_bad_irq(unsigned int irq, struct irq_desc *desc) | ||
30 | { | 29 | { |
31 | print_irq_desc(irq, desc); | 30 | print_irq_desc(irq, desc); |
32 | kstat_this_cpu.irqs[irq]++; | 31 | kstat_incr_irqs_this_cpu(irq, desc); |
33 | ack_bad_irq(irq); | 32 | ack_bad_irq(irq); |
34 | } | 33 | } |
35 | 34 | ||
@@ -47,6 +46,9 @@ handle_bad_irq(unsigned int irq, struct irq_desc *desc) | |||
47 | * | 46 | * |
48 | * Controller mappings for all interrupt sources: | 47 | * Controller mappings for all interrupt sources: |
49 | */ | 48 | */ |
49 | int nr_irqs = NR_IRQS; | ||
50 | EXPORT_SYMBOL_GPL(nr_irqs); | ||
51 | |||
50 | struct irq_desc irq_desc[NR_IRQS] __cacheline_aligned_in_smp = { | 52 | struct irq_desc irq_desc[NR_IRQS] __cacheline_aligned_in_smp = { |
51 | [0 ... NR_IRQS-1] = { | 53 | [0 ... NR_IRQS-1] = { |
52 | .status = IRQ_DISABLED, | 54 | .status = IRQ_DISABLED, |
@@ -66,7 +68,9 @@ struct irq_desc irq_desc[NR_IRQS] __cacheline_aligned_in_smp = { | |||
66 | */ | 68 | */ |
67 | static void ack_bad(unsigned int irq) | 69 | static void ack_bad(unsigned int irq) |
68 | { | 70 | { |
69 | print_irq_desc(irq, irq_desc + irq); | 71 | struct irq_desc *desc = irq_to_desc(irq); |
72 | |||
73 | print_irq_desc(irq, desc); | ||
70 | ack_bad_irq(irq); | 74 | ack_bad_irq(irq); |
71 | } | 75 | } |
72 | 76 | ||
@@ -131,8 +135,6 @@ irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action) | |||
131 | irqreturn_t ret, retval = IRQ_NONE; | 135 | irqreturn_t ret, retval = IRQ_NONE; |
132 | unsigned int status = 0; | 136 | unsigned int status = 0; |
133 | 137 | ||
134 | handle_dynamic_tick(action); | ||
135 | |||
136 | if (!(action->flags & IRQF_DISABLED)) | 138 | if (!(action->flags & IRQF_DISABLED)) |
137 | local_irq_enable_in_hardirq(); | 139 | local_irq_enable_in_hardirq(); |
138 | 140 | ||
@@ -165,11 +167,12 @@ irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action) | |||
165 | */ | 167 | */ |
166 | unsigned int __do_IRQ(unsigned int irq) | 168 | unsigned int __do_IRQ(unsigned int irq) |
167 | { | 169 | { |
168 | struct irq_desc *desc = irq_desc + irq; | 170 | struct irq_desc *desc = irq_to_desc(irq); |
169 | struct irqaction *action; | 171 | struct irqaction *action; |
170 | unsigned int status; | 172 | unsigned int status; |
171 | 173 | ||
172 | kstat_this_cpu.irqs[irq]++; | 174 | kstat_incr_irqs_this_cpu(irq, desc); |
175 | |||
173 | if (CHECK_IRQ_PER_CPU(desc->status)) { | 176 | if (CHECK_IRQ_PER_CPU(desc->status)) { |
174 | irqreturn_t action_ret; | 177 | irqreturn_t action_ret; |
175 | 178 | ||
@@ -256,8 +259,8 @@ out: | |||
256 | } | 259 | } |
257 | #endif | 260 | #endif |
258 | 261 | ||
259 | #ifdef CONFIG_TRACE_IRQFLAGS | ||
260 | 262 | ||
263 | #ifdef CONFIG_TRACE_IRQFLAGS | ||
261 | /* | 264 | /* |
262 | * lockdep: we want to handle all irq_desc locks as a single lock-class: | 265 | * lockdep: we want to handle all irq_desc locks as a single lock-class: |
263 | */ | 266 | */ |
@@ -265,10 +268,10 @@ static struct lock_class_key irq_desc_lock_class; | |||
265 | 268 | ||
266 | void early_init_irq_lock_class(void) | 269 | void early_init_irq_lock_class(void) |
267 | { | 270 | { |
271 | struct irq_desc *desc; | ||
268 | int i; | 272 | int i; |
269 | 273 | ||
270 | for (i = 0; i < NR_IRQS; i++) | 274 | for_each_irq_desc(i, desc) |
271 | lockdep_set_class(&irq_desc[i].lock, &irq_desc_lock_class); | 275 | lockdep_set_class(&desc->lock, &irq_desc_lock_class); |
272 | } | 276 | } |
273 | |||
274 | #endif | 277 | #endif |
diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h index 08a849a22447..c9767e641980 100644 --- a/kernel/irq/internals.h +++ b/kernel/irq/internals.h | |||
@@ -10,12 +10,15 @@ extern void irq_chip_set_defaults(struct irq_chip *chip); | |||
10 | /* Set default handler: */ | 10 | /* Set default handler: */ |
11 | extern void compat_irq_chip_set_default_handler(struct irq_desc *desc); | 11 | extern void compat_irq_chip_set_default_handler(struct irq_desc *desc); |
12 | 12 | ||
13 | extern int __irq_set_trigger(struct irq_desc *desc, unsigned int irq, | ||
14 | unsigned long flags); | ||
15 | |||
13 | #ifdef CONFIG_PROC_FS | 16 | #ifdef CONFIG_PROC_FS |
14 | extern void register_irq_proc(unsigned int irq); | 17 | extern void register_irq_proc(unsigned int irq, struct irq_desc *desc); |
15 | extern void register_handler_proc(unsigned int irq, struct irqaction *action); | 18 | extern void register_handler_proc(unsigned int irq, struct irqaction *action); |
16 | extern void unregister_handler_proc(unsigned int irq, struct irqaction *action); | 19 | extern void unregister_handler_proc(unsigned int irq, struct irqaction *action); |
17 | #else | 20 | #else |
18 | static inline void register_irq_proc(unsigned int irq) { } | 21 | static inline void register_irq_proc(unsigned int irq, struct irq_desc *desc) { } |
19 | static inline void register_handler_proc(unsigned int irq, | 22 | static inline void register_handler_proc(unsigned int irq, |
20 | struct irqaction *action) { } | 23 | struct irqaction *action) { } |
21 | static inline void unregister_handler_proc(unsigned int irq, | 24 | static inline void unregister_handler_proc(unsigned int irq, |
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 60c49e324390..c498a1b8c621 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c | |||
@@ -31,10 +31,10 @@ cpumask_t irq_default_affinity = CPU_MASK_ALL; | |||
31 | */ | 31 | */ |
32 | void synchronize_irq(unsigned int irq) | 32 | void synchronize_irq(unsigned int irq) |
33 | { | 33 | { |
34 | struct irq_desc *desc = irq_desc + irq; | 34 | struct irq_desc *desc = irq_to_desc(irq); |
35 | unsigned int status; | 35 | unsigned int status; |
36 | 36 | ||
37 | if (irq >= NR_IRQS) | 37 | if (!desc) |
38 | return; | 38 | return; |
39 | 39 | ||
40 | do { | 40 | do { |
@@ -64,7 +64,7 @@ EXPORT_SYMBOL(synchronize_irq); | |||
64 | */ | 64 | */ |
65 | int irq_can_set_affinity(unsigned int irq) | 65 | int irq_can_set_affinity(unsigned int irq) |
66 | { | 66 | { |
67 | struct irq_desc *desc = irq_desc + irq; | 67 | struct irq_desc *desc = irq_to_desc(irq); |
68 | 68 | ||
69 | if (CHECK_IRQ_PER_CPU(desc->status) || !desc->chip || | 69 | if (CHECK_IRQ_PER_CPU(desc->status) || !desc->chip || |
70 | !desc->chip->set_affinity) | 70 | !desc->chip->set_affinity) |
@@ -81,18 +81,17 @@ int irq_can_set_affinity(unsigned int irq) | |||
81 | */ | 81 | */ |
82 | int irq_set_affinity(unsigned int irq, cpumask_t cpumask) | 82 | int irq_set_affinity(unsigned int irq, cpumask_t cpumask) |
83 | { | 83 | { |
84 | struct irq_desc *desc = irq_desc + irq; | 84 | struct irq_desc *desc = irq_to_desc(irq); |
85 | 85 | ||
86 | if (!desc->chip->set_affinity) | 86 | if (!desc->chip->set_affinity) |
87 | return -EINVAL; | 87 | return -EINVAL; |
88 | 88 | ||
89 | set_balance_irq_affinity(irq, cpumask); | ||
90 | |||
91 | #ifdef CONFIG_GENERIC_PENDING_IRQ | 89 | #ifdef CONFIG_GENERIC_PENDING_IRQ |
92 | if (desc->status & IRQ_MOVE_PCNTXT) { | 90 | if (desc->status & IRQ_MOVE_PCNTXT || desc->status & IRQ_DISABLED) { |
93 | unsigned long flags; | 91 | unsigned long flags; |
94 | 92 | ||
95 | spin_lock_irqsave(&desc->lock, flags); | 93 | spin_lock_irqsave(&desc->lock, flags); |
94 | desc->affinity = cpumask; | ||
96 | desc->chip->set_affinity(irq, cpumask); | 95 | desc->chip->set_affinity(irq, cpumask); |
97 | spin_unlock_irqrestore(&desc->lock, flags); | 96 | spin_unlock_irqrestore(&desc->lock, flags); |
98 | } else | 97 | } else |
@@ -111,16 +110,17 @@ int irq_set_affinity(unsigned int irq, cpumask_t cpumask) | |||
111 | int irq_select_affinity(unsigned int irq) | 110 | int irq_select_affinity(unsigned int irq) |
112 | { | 111 | { |
113 | cpumask_t mask; | 112 | cpumask_t mask; |
113 | struct irq_desc *desc; | ||
114 | 114 | ||
115 | if (!irq_can_set_affinity(irq)) | 115 | if (!irq_can_set_affinity(irq)) |
116 | return 0; | 116 | return 0; |
117 | 117 | ||
118 | cpus_and(mask, cpu_online_map, irq_default_affinity); | 118 | cpus_and(mask, cpu_online_map, irq_default_affinity); |
119 | 119 | ||
120 | irq_desc[irq].affinity = mask; | 120 | desc = irq_to_desc(irq); |
121 | irq_desc[irq].chip->set_affinity(irq, mask); | 121 | desc->affinity = mask; |
122 | desc->chip->set_affinity(irq, mask); | ||
122 | 123 | ||
123 | set_balance_irq_affinity(irq, mask); | ||
124 | return 0; | 124 | return 0; |
125 | } | 125 | } |
126 | #endif | 126 | #endif |
@@ -140,10 +140,10 @@ int irq_select_affinity(unsigned int irq) | |||
140 | */ | 140 | */ |
141 | void disable_irq_nosync(unsigned int irq) | 141 | void disable_irq_nosync(unsigned int irq) |
142 | { | 142 | { |
143 | struct irq_desc *desc = irq_desc + irq; | 143 | struct irq_desc *desc = irq_to_desc(irq); |
144 | unsigned long flags; | 144 | unsigned long flags; |
145 | 145 | ||
146 | if (irq >= NR_IRQS) | 146 | if (!desc) |
147 | return; | 147 | return; |
148 | 148 | ||
149 | spin_lock_irqsave(&desc->lock, flags); | 149 | spin_lock_irqsave(&desc->lock, flags); |
@@ -169,9 +169,9 @@ EXPORT_SYMBOL(disable_irq_nosync); | |||
169 | */ | 169 | */ |
170 | void disable_irq(unsigned int irq) | 170 | void disable_irq(unsigned int irq) |
171 | { | 171 | { |
172 | struct irq_desc *desc = irq_desc + irq; | 172 | struct irq_desc *desc = irq_to_desc(irq); |
173 | 173 | ||
174 | if (irq >= NR_IRQS) | 174 | if (!desc) |
175 | return; | 175 | return; |
176 | 176 | ||
177 | disable_irq_nosync(irq); | 177 | disable_irq_nosync(irq); |
@@ -211,10 +211,10 @@ static void __enable_irq(struct irq_desc *desc, unsigned int irq) | |||
211 | */ | 211 | */ |
212 | void enable_irq(unsigned int irq) | 212 | void enable_irq(unsigned int irq) |
213 | { | 213 | { |
214 | struct irq_desc *desc = irq_desc + irq; | 214 | struct irq_desc *desc = irq_to_desc(irq); |
215 | unsigned long flags; | 215 | unsigned long flags; |
216 | 216 | ||
217 | if (irq >= NR_IRQS) | 217 | if (!desc) |
218 | return; | 218 | return; |
219 | 219 | ||
220 | spin_lock_irqsave(&desc->lock, flags); | 220 | spin_lock_irqsave(&desc->lock, flags); |
@@ -223,9 +223,9 @@ void enable_irq(unsigned int irq) | |||
223 | } | 223 | } |
224 | EXPORT_SYMBOL(enable_irq); | 224 | EXPORT_SYMBOL(enable_irq); |
225 | 225 | ||
226 | int set_irq_wake_real(unsigned int irq, unsigned int on) | 226 | static int set_irq_wake_real(unsigned int irq, unsigned int on) |
227 | { | 227 | { |
228 | struct irq_desc *desc = irq_desc + irq; | 228 | struct irq_desc *desc = irq_to_desc(irq); |
229 | int ret = -ENXIO; | 229 | int ret = -ENXIO; |
230 | 230 | ||
231 | if (desc->chip->set_wake) | 231 | if (desc->chip->set_wake) |
@@ -248,7 +248,7 @@ int set_irq_wake_real(unsigned int irq, unsigned int on) | |||
248 | */ | 248 | */ |
249 | int set_irq_wake(unsigned int irq, unsigned int on) | 249 | int set_irq_wake(unsigned int irq, unsigned int on) |
250 | { | 250 | { |
251 | struct irq_desc *desc = irq_desc + irq; | 251 | struct irq_desc *desc = irq_to_desc(irq); |
252 | unsigned long flags; | 252 | unsigned long flags; |
253 | int ret = 0; | 253 | int ret = 0; |
254 | 254 | ||
@@ -288,12 +288,16 @@ EXPORT_SYMBOL(set_irq_wake); | |||
288 | */ | 288 | */ |
289 | int can_request_irq(unsigned int irq, unsigned long irqflags) | 289 | int can_request_irq(unsigned int irq, unsigned long irqflags) |
290 | { | 290 | { |
291 | struct irq_desc *desc = irq_to_desc(irq); | ||
291 | struct irqaction *action; | 292 | struct irqaction *action; |
292 | 293 | ||
293 | if (irq >= NR_IRQS || irq_desc[irq].status & IRQ_NOREQUEST) | 294 | if (!desc) |
295 | return 0; | ||
296 | |||
297 | if (desc->status & IRQ_NOREQUEST) | ||
294 | return 0; | 298 | return 0; |
295 | 299 | ||
296 | action = irq_desc[irq].action; | 300 | action = desc->action; |
297 | if (action) | 301 | if (action) |
298 | if (irqflags & action->flags & IRQF_SHARED) | 302 | if (irqflags & action->flags & IRQF_SHARED) |
299 | action = NULL; | 303 | action = NULL; |
@@ -312,10 +316,11 @@ void compat_irq_chip_set_default_handler(struct irq_desc *desc) | |||
312 | desc->handle_irq = NULL; | 316 | desc->handle_irq = NULL; |
313 | } | 317 | } |
314 | 318 | ||
315 | static int __irq_set_trigger(struct irq_chip *chip, unsigned int irq, | 319 | int __irq_set_trigger(struct irq_desc *desc, unsigned int irq, |
316 | unsigned long flags) | 320 | unsigned long flags) |
317 | { | 321 | { |
318 | int ret; | 322 | int ret; |
323 | struct irq_chip *chip = desc->chip; | ||
319 | 324 | ||
320 | if (!chip || !chip->set_type) { | 325 | if (!chip || !chip->set_type) { |
321 | /* | 326 | /* |
@@ -333,6 +338,11 @@ static int __irq_set_trigger(struct irq_chip *chip, unsigned int irq, | |||
333 | pr_err("setting trigger mode %d for irq %u failed (%pF)\n", | 338 | pr_err("setting trigger mode %d for irq %u failed (%pF)\n", |
334 | (int)(flags & IRQF_TRIGGER_MASK), | 339 | (int)(flags & IRQF_TRIGGER_MASK), |
335 | irq, chip->set_type); | 340 | irq, chip->set_type); |
341 | else { | ||
342 | /* note that IRQF_TRIGGER_MASK == IRQ_TYPE_SENSE_MASK */ | ||
343 | desc->status &= ~IRQ_TYPE_SENSE_MASK; | ||
344 | desc->status |= flags & IRQ_TYPE_SENSE_MASK; | ||
345 | } | ||
336 | 346 | ||
337 | return ret; | 347 | return ret; |
338 | } | 348 | } |
@@ -341,16 +351,16 @@ static int __irq_set_trigger(struct irq_chip *chip, unsigned int irq, | |||
341 | * Internal function to register an irqaction - typically used to | 351 | * Internal function to register an irqaction - typically used to |
342 | * allocate special interrupts that are part of the architecture. | 352 | * allocate special interrupts that are part of the architecture. |
343 | */ | 353 | */ |
344 | int setup_irq(unsigned int irq, struct irqaction *new) | 354 | static int |
355 | __setup_irq(unsigned int irq, struct irq_desc * desc, struct irqaction *new) | ||
345 | { | 356 | { |
346 | struct irq_desc *desc = irq_desc + irq; | ||
347 | struct irqaction *old, **p; | 357 | struct irqaction *old, **p; |
348 | const char *old_name = NULL; | 358 | const char *old_name = NULL; |
349 | unsigned long flags; | 359 | unsigned long flags; |
350 | int shared = 0; | 360 | int shared = 0; |
351 | int ret; | 361 | int ret; |
352 | 362 | ||
353 | if (irq >= NR_IRQS) | 363 | if (!desc) |
354 | return -EINVAL; | 364 | return -EINVAL; |
355 | 365 | ||
356 | if (desc->chip == &no_irq_chip) | 366 | if (desc->chip == &no_irq_chip) |
@@ -411,7 +421,7 @@ int setup_irq(unsigned int irq, struct irqaction *new) | |||
411 | 421 | ||
412 | /* Setup the type (level, edge polarity) if configured: */ | 422 | /* Setup the type (level, edge polarity) if configured: */ |
413 | if (new->flags & IRQF_TRIGGER_MASK) { | 423 | if (new->flags & IRQF_TRIGGER_MASK) { |
414 | ret = __irq_set_trigger(desc->chip, irq, new->flags); | 424 | ret = __irq_set_trigger(desc, irq, new->flags); |
415 | 425 | ||
416 | if (ret) { | 426 | if (ret) { |
417 | spin_unlock_irqrestore(&desc->lock, flags); | 427 | spin_unlock_irqrestore(&desc->lock, flags); |
@@ -430,16 +440,21 @@ int setup_irq(unsigned int irq, struct irqaction *new) | |||
430 | if (!(desc->status & IRQ_NOAUTOEN)) { | 440 | if (!(desc->status & IRQ_NOAUTOEN)) { |
431 | desc->depth = 0; | 441 | desc->depth = 0; |
432 | desc->status &= ~IRQ_DISABLED; | 442 | desc->status &= ~IRQ_DISABLED; |
433 | if (desc->chip->startup) | 443 | desc->chip->startup(irq); |
434 | desc->chip->startup(irq); | ||
435 | else | ||
436 | desc->chip->enable(irq); | ||
437 | } else | 444 | } else |
438 | /* Undo nested disables: */ | 445 | /* Undo nested disables: */ |
439 | desc->depth = 1; | 446 | desc->depth = 1; |
440 | 447 | ||
441 | /* Set default affinity mask once everything is setup */ | 448 | /* Set default affinity mask once everything is setup */ |
442 | irq_select_affinity(irq); | 449 | irq_select_affinity(irq); |
450 | |||
451 | } else if ((new->flags & IRQF_TRIGGER_MASK) | ||
452 | && (new->flags & IRQF_TRIGGER_MASK) | ||
453 | != (desc->status & IRQ_TYPE_SENSE_MASK)) { | ||
454 | /* hope the handler works with the actual trigger mode... */ | ||
455 | pr_warning("IRQ %d uses trigger mode %d; requested %d\n", | ||
456 | irq, (int)(desc->status & IRQ_TYPE_SENSE_MASK), | ||
457 | (int)(new->flags & IRQF_TRIGGER_MASK)); | ||
443 | } | 458 | } |
444 | 459 | ||
445 | *p = new; | 460 | *p = new; |
@@ -464,7 +479,7 @@ int setup_irq(unsigned int irq, struct irqaction *new) | |||
464 | spin_unlock_irqrestore(&desc->lock, flags); | 479 | spin_unlock_irqrestore(&desc->lock, flags); |
465 | 480 | ||
466 | new->irq = irq; | 481 | new->irq = irq; |
467 | register_irq_proc(irq); | 482 | register_irq_proc(irq, desc); |
468 | new->dir = NULL; | 483 | new->dir = NULL; |
469 | register_handler_proc(irq, new); | 484 | register_handler_proc(irq, new); |
470 | 485 | ||
@@ -484,6 +499,20 @@ mismatch: | |||
484 | } | 499 | } |
485 | 500 | ||
486 | /** | 501 | /** |
502 | * setup_irq - setup an interrupt | ||
503 | * @irq: Interrupt line to setup | ||
504 | * @act: irqaction for the interrupt | ||
505 | * | ||
506 | * Used to statically setup interrupts in the early boot process. | ||
507 | */ | ||
508 | int setup_irq(unsigned int irq, struct irqaction *act) | ||
509 | { | ||
510 | struct irq_desc *desc = irq_to_desc(irq); | ||
511 | |||
512 | return __setup_irq(irq, desc, act); | ||
513 | } | ||
514 | |||
515 | /** | ||
487 | * free_irq - free an interrupt | 516 | * free_irq - free an interrupt |
488 | * @irq: Interrupt line to free | 517 | * @irq: Interrupt line to free |
489 | * @dev_id: Device identity to free | 518 | * @dev_id: Device identity to free |
@@ -499,15 +528,15 @@ mismatch: | |||
499 | */ | 528 | */ |
500 | void free_irq(unsigned int irq, void *dev_id) | 529 | void free_irq(unsigned int irq, void *dev_id) |
501 | { | 530 | { |
502 | struct irq_desc *desc; | 531 | struct irq_desc *desc = irq_to_desc(irq); |
503 | struct irqaction **p; | 532 | struct irqaction **p; |
504 | unsigned long flags; | 533 | unsigned long flags; |
505 | 534 | ||
506 | WARN_ON(in_interrupt()); | 535 | WARN_ON(in_interrupt()); |
507 | if (irq >= NR_IRQS) | 536 | |
537 | if (!desc) | ||
508 | return; | 538 | return; |
509 | 539 | ||
510 | desc = irq_desc + irq; | ||
511 | spin_lock_irqsave(&desc->lock, flags); | 540 | spin_lock_irqsave(&desc->lock, flags); |
512 | p = &desc->action; | 541 | p = &desc->action; |
513 | for (;;) { | 542 | for (;;) { |
@@ -596,12 +625,14 @@ EXPORT_SYMBOL(free_irq); | |||
596 | * IRQF_SHARED Interrupt is shared | 625 | * IRQF_SHARED Interrupt is shared |
597 | * IRQF_DISABLED Disable local interrupts while processing | 626 | * IRQF_DISABLED Disable local interrupts while processing |
598 | * IRQF_SAMPLE_RANDOM The interrupt can be used for entropy | 627 | * IRQF_SAMPLE_RANDOM The interrupt can be used for entropy |
628 | * IRQF_TRIGGER_* Specify active edge(s) or level | ||
599 | * | 629 | * |
600 | */ | 630 | */ |
601 | int request_irq(unsigned int irq, irq_handler_t handler, | 631 | int request_irq(unsigned int irq, irq_handler_t handler, |
602 | unsigned long irqflags, const char *devname, void *dev_id) | 632 | unsigned long irqflags, const char *devname, void *dev_id) |
603 | { | 633 | { |
604 | struct irqaction *action; | 634 | struct irqaction *action; |
635 | struct irq_desc *desc; | ||
605 | int retval; | 636 | int retval; |
606 | 637 | ||
607 | #ifdef CONFIG_LOCKDEP | 638 | #ifdef CONFIG_LOCKDEP |
@@ -618,9 +649,12 @@ int request_irq(unsigned int irq, irq_handler_t handler, | |||
618 | */ | 649 | */ |
619 | if ((irqflags & IRQF_SHARED) && !dev_id) | 650 | if ((irqflags & IRQF_SHARED) && !dev_id) |
620 | return -EINVAL; | 651 | return -EINVAL; |
621 | if (irq >= NR_IRQS) | 652 | |
653 | desc = irq_to_desc(irq); | ||
654 | if (!desc) | ||
622 | return -EINVAL; | 655 | return -EINVAL; |
623 | if (irq_desc[irq].status & IRQ_NOREQUEST) | 656 | |
657 | if (desc->status & IRQ_NOREQUEST) | ||
624 | return -EINVAL; | 658 | return -EINVAL; |
625 | if (!handler) | 659 | if (!handler) |
626 | return -EINVAL; | 660 | return -EINVAL; |
@@ -636,26 +670,29 @@ int request_irq(unsigned int irq, irq_handler_t handler, | |||
636 | action->next = NULL; | 670 | action->next = NULL; |
637 | action->dev_id = dev_id; | 671 | action->dev_id = dev_id; |
638 | 672 | ||
673 | retval = __setup_irq(irq, desc, action); | ||
674 | if (retval) | ||
675 | kfree(action); | ||
676 | |||
639 | #ifdef CONFIG_DEBUG_SHIRQ | 677 | #ifdef CONFIG_DEBUG_SHIRQ |
640 | if (irqflags & IRQF_SHARED) { | 678 | if (irqflags & IRQF_SHARED) { |
641 | /* | 679 | /* |
642 | * It's a shared IRQ -- the driver ought to be prepared for it | 680 | * It's a shared IRQ -- the driver ought to be prepared for it |
643 | * to happen immediately, so let's make sure.... | 681 | * to happen immediately, so let's make sure.... |
644 | * We do this before actually registering it, to make sure that | 682 | * We disable the irq to make sure that a 'real' IRQ doesn't |
645 | * a 'real' IRQ doesn't run in parallel with our fake | 683 | * run in parallel with our fake. |
646 | */ | 684 | */ |
647 | unsigned long flags; | 685 | unsigned long flags; |
648 | 686 | ||
687 | disable_irq(irq); | ||
649 | local_irq_save(flags); | 688 | local_irq_save(flags); |
689 | |||
650 | handler(irq, dev_id); | 690 | handler(irq, dev_id); |
691 | |||
651 | local_irq_restore(flags); | 692 | local_irq_restore(flags); |
693 | enable_irq(irq); | ||
652 | } | 694 | } |
653 | #endif | 695 | #endif |
654 | |||
655 | retval = setup_irq(irq, action); | ||
656 | if (retval) | ||
657 | kfree(action); | ||
658 | |||
659 | return retval; | 696 | return retval; |
660 | } | 697 | } |
661 | EXPORT_SYMBOL(request_irq); | 698 | EXPORT_SYMBOL(request_irq); |
diff --git a/kernel/irq/migration.c b/kernel/irq/migration.c index 77b7acc875c5..90b920d3f52b 100644 --- a/kernel/irq/migration.c +++ b/kernel/irq/migration.c | |||
@@ -3,18 +3,18 @@ | |||
3 | 3 | ||
4 | void set_pending_irq(unsigned int irq, cpumask_t mask) | 4 | void set_pending_irq(unsigned int irq, cpumask_t mask) |
5 | { | 5 | { |
6 | struct irq_desc *desc = irq_desc + irq; | 6 | struct irq_desc *desc = irq_to_desc(irq); |
7 | unsigned long flags; | 7 | unsigned long flags; |
8 | 8 | ||
9 | spin_lock_irqsave(&desc->lock, flags); | 9 | spin_lock_irqsave(&desc->lock, flags); |
10 | desc->status |= IRQ_MOVE_PENDING; | 10 | desc->status |= IRQ_MOVE_PENDING; |
11 | irq_desc[irq].pending_mask = mask; | 11 | desc->pending_mask = mask; |
12 | spin_unlock_irqrestore(&desc->lock, flags); | 12 | spin_unlock_irqrestore(&desc->lock, flags); |
13 | } | 13 | } |
14 | 14 | ||
15 | void move_masked_irq(int irq) | 15 | void move_masked_irq(int irq) |
16 | { | 16 | { |
17 | struct irq_desc *desc = irq_desc + irq; | 17 | struct irq_desc *desc = irq_to_desc(irq); |
18 | cpumask_t tmp; | 18 | cpumask_t tmp; |
19 | 19 | ||
20 | if (likely(!(desc->status & IRQ_MOVE_PENDING))) | 20 | if (likely(!(desc->status & IRQ_MOVE_PENDING))) |
@@ -30,7 +30,7 @@ void move_masked_irq(int irq) | |||
30 | 30 | ||
31 | desc->status &= ~IRQ_MOVE_PENDING; | 31 | desc->status &= ~IRQ_MOVE_PENDING; |
32 | 32 | ||
33 | if (unlikely(cpus_empty(irq_desc[irq].pending_mask))) | 33 | if (unlikely(cpus_empty(desc->pending_mask))) |
34 | return; | 34 | return; |
35 | 35 | ||
36 | if (!desc->chip->set_affinity) | 36 | if (!desc->chip->set_affinity) |
@@ -38,7 +38,7 @@ void move_masked_irq(int irq) | |||
38 | 38 | ||
39 | assert_spin_locked(&desc->lock); | 39 | assert_spin_locked(&desc->lock); |
40 | 40 | ||
41 | cpus_and(tmp, irq_desc[irq].pending_mask, cpu_online_map); | 41 | cpus_and(tmp, desc->pending_mask, cpu_online_map); |
42 | 42 | ||
43 | /* | 43 | /* |
44 | * If there was a valid mask to work with, please | 44 | * If there was a valid mask to work with, please |
@@ -55,12 +55,12 @@ void move_masked_irq(int irq) | |||
55 | if (likely(!cpus_empty(tmp))) { | 55 | if (likely(!cpus_empty(tmp))) { |
56 | desc->chip->set_affinity(irq,tmp); | 56 | desc->chip->set_affinity(irq,tmp); |
57 | } | 57 | } |
58 | cpus_clear(irq_desc[irq].pending_mask); | 58 | cpus_clear(desc->pending_mask); |
59 | } | 59 | } |
60 | 60 | ||
61 | void move_native_irq(int irq) | 61 | void move_native_irq(int irq) |
62 | { | 62 | { |
63 | struct irq_desc *desc = irq_desc + irq; | 63 | struct irq_desc *desc = irq_to_desc(irq); |
64 | 64 | ||
65 | if (likely(!(desc->status & IRQ_MOVE_PENDING))) | 65 | if (likely(!(desc->status & IRQ_MOVE_PENDING))) |
66 | return; | 66 | return; |
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c index a09dd29c2fd7..fac014a81b24 100644 --- a/kernel/irq/proc.c +++ b/kernel/irq/proc.c | |||
@@ -19,7 +19,7 @@ static struct proc_dir_entry *root_irq_dir; | |||
19 | 19 | ||
20 | static int irq_affinity_proc_show(struct seq_file *m, void *v) | 20 | static int irq_affinity_proc_show(struct seq_file *m, void *v) |
21 | { | 21 | { |
22 | struct irq_desc *desc = irq_desc + (long)m->private; | 22 | struct irq_desc *desc = irq_to_desc((long)m->private); |
23 | cpumask_t *mask = &desc->affinity; | 23 | cpumask_t *mask = &desc->affinity; |
24 | 24 | ||
25 | #ifdef CONFIG_GENERIC_PENDING_IRQ | 25 | #ifdef CONFIG_GENERIC_PENDING_IRQ |
@@ -43,7 +43,7 @@ static ssize_t irq_affinity_proc_write(struct file *file, | |||
43 | cpumask_t new_value; | 43 | cpumask_t new_value; |
44 | int err; | 44 | int err; |
45 | 45 | ||
46 | if (!irq_desc[irq].chip->set_affinity || no_irq_affinity || | 46 | if (!irq_to_desc(irq)->chip->set_affinity || no_irq_affinity || |
47 | irq_balancing_disabled(irq)) | 47 | irq_balancing_disabled(irq)) |
48 | return -EIO; | 48 | return -EIO; |
49 | 49 | ||
@@ -132,20 +132,20 @@ static const struct file_operations default_affinity_proc_fops = { | |||
132 | static int irq_spurious_read(char *page, char **start, off_t off, | 132 | static int irq_spurious_read(char *page, char **start, off_t off, |
133 | int count, int *eof, void *data) | 133 | int count, int *eof, void *data) |
134 | { | 134 | { |
135 | struct irq_desc *d = &irq_desc[(long) data]; | 135 | struct irq_desc *desc = irq_to_desc((long) data); |
136 | return sprintf(page, "count %u\n" | 136 | return sprintf(page, "count %u\n" |
137 | "unhandled %u\n" | 137 | "unhandled %u\n" |
138 | "last_unhandled %u ms\n", | 138 | "last_unhandled %u ms\n", |
139 | d->irq_count, | 139 | desc->irq_count, |
140 | d->irqs_unhandled, | 140 | desc->irqs_unhandled, |
141 | jiffies_to_msecs(d->last_unhandled)); | 141 | jiffies_to_msecs(desc->last_unhandled)); |
142 | } | 142 | } |
143 | 143 | ||
144 | #define MAX_NAMELEN 128 | 144 | #define MAX_NAMELEN 128 |
145 | 145 | ||
146 | static int name_unique(unsigned int irq, struct irqaction *new_action) | 146 | static int name_unique(unsigned int irq, struct irqaction *new_action) |
147 | { | 147 | { |
148 | struct irq_desc *desc = irq_desc + irq; | 148 | struct irq_desc *desc = irq_to_desc(irq); |
149 | struct irqaction *action; | 149 | struct irqaction *action; |
150 | unsigned long flags; | 150 | unsigned long flags; |
151 | int ret = 1; | 151 | int ret = 1; |
@@ -165,8 +165,9 @@ static int name_unique(unsigned int irq, struct irqaction *new_action) | |||
165 | void register_handler_proc(unsigned int irq, struct irqaction *action) | 165 | void register_handler_proc(unsigned int irq, struct irqaction *action) |
166 | { | 166 | { |
167 | char name [MAX_NAMELEN]; | 167 | char name [MAX_NAMELEN]; |
168 | struct irq_desc *desc = irq_to_desc(irq); | ||
168 | 169 | ||
169 | if (!irq_desc[irq].dir || action->dir || !action->name || | 170 | if (!desc->dir || action->dir || !action->name || |
170 | !name_unique(irq, action)) | 171 | !name_unique(irq, action)) |
171 | return; | 172 | return; |
172 | 173 | ||
@@ -174,36 +175,34 @@ void register_handler_proc(unsigned int irq, struct irqaction *action) | |||
174 | snprintf(name, MAX_NAMELEN, "%s", action->name); | 175 | snprintf(name, MAX_NAMELEN, "%s", action->name); |
175 | 176 | ||
176 | /* create /proc/irq/1234/handler/ */ | 177 | /* create /proc/irq/1234/handler/ */ |
177 | action->dir = proc_mkdir(name, irq_desc[irq].dir); | 178 | action->dir = proc_mkdir(name, desc->dir); |
178 | } | 179 | } |
179 | 180 | ||
180 | #undef MAX_NAMELEN | 181 | #undef MAX_NAMELEN |
181 | 182 | ||
182 | #define MAX_NAMELEN 10 | 183 | #define MAX_NAMELEN 10 |
183 | 184 | ||
184 | void register_irq_proc(unsigned int irq) | 185 | void register_irq_proc(unsigned int irq, struct irq_desc *desc) |
185 | { | 186 | { |
186 | char name [MAX_NAMELEN]; | 187 | char name [MAX_NAMELEN]; |
187 | struct proc_dir_entry *entry; | 188 | struct proc_dir_entry *entry; |
188 | 189 | ||
189 | if (!root_irq_dir || | 190 | if (!root_irq_dir || (desc->chip == &no_irq_chip) || desc->dir) |
190 | (irq_desc[irq].chip == &no_irq_chip) || | ||
191 | irq_desc[irq].dir) | ||
192 | return; | 191 | return; |
193 | 192 | ||
194 | memset(name, 0, MAX_NAMELEN); | 193 | memset(name, 0, MAX_NAMELEN); |
195 | sprintf(name, "%d", irq); | 194 | sprintf(name, "%d", irq); |
196 | 195 | ||
197 | /* create /proc/irq/1234 */ | 196 | /* create /proc/irq/1234 */ |
198 | irq_desc[irq].dir = proc_mkdir(name, root_irq_dir); | 197 | desc->dir = proc_mkdir(name, root_irq_dir); |
199 | 198 | ||
200 | #ifdef CONFIG_SMP | 199 | #ifdef CONFIG_SMP |
201 | /* create /proc/irq/<irq>/smp_affinity */ | 200 | /* create /proc/irq/<irq>/smp_affinity */ |
202 | proc_create_data("smp_affinity", 0600, irq_desc[irq].dir, | 201 | proc_create_data("smp_affinity", 0600, desc->dir, |
203 | &irq_affinity_proc_fops, (void *)(long)irq); | 202 | &irq_affinity_proc_fops, (void *)(long)irq); |
204 | #endif | 203 | #endif |
205 | 204 | ||
206 | entry = create_proc_entry("spurious", 0444, irq_desc[irq].dir); | 205 | entry = create_proc_entry("spurious", 0444, desc->dir); |
207 | if (entry) { | 206 | if (entry) { |
208 | entry->data = (void *)(long)irq; | 207 | entry->data = (void *)(long)irq; |
209 | entry->read_proc = irq_spurious_read; | 208 | entry->read_proc = irq_spurious_read; |
@@ -214,8 +213,11 @@ void register_irq_proc(unsigned int irq) | |||
214 | 213 | ||
215 | void unregister_handler_proc(unsigned int irq, struct irqaction *action) | 214 | void unregister_handler_proc(unsigned int irq, struct irqaction *action) |
216 | { | 215 | { |
217 | if (action->dir) | 216 | if (action->dir) { |
218 | remove_proc_entry(action->dir->name, irq_desc[irq].dir); | 217 | struct irq_desc *desc = irq_to_desc(irq); |
218 | |||
219 | remove_proc_entry(action->dir->name, desc->dir); | ||
220 | } | ||
219 | } | 221 | } |
220 | 222 | ||
221 | void register_default_affinity_proc(void) | 223 | void register_default_affinity_proc(void) |
@@ -228,7 +230,8 @@ void register_default_affinity_proc(void) | |||
228 | 230 | ||
229 | void init_irq_proc(void) | 231 | void init_irq_proc(void) |
230 | { | 232 | { |
231 | int i; | 233 | unsigned int irq; |
234 | struct irq_desc *desc; | ||
232 | 235 | ||
233 | /* create /proc/irq */ | 236 | /* create /proc/irq */ |
234 | root_irq_dir = proc_mkdir("irq", NULL); | 237 | root_irq_dir = proc_mkdir("irq", NULL); |
@@ -240,7 +243,7 @@ void init_irq_proc(void) | |||
240 | /* | 243 | /* |
241 | * Create entries for all existing IRQs. | 244 | * Create entries for all existing IRQs. |
242 | */ | 245 | */ |
243 | for (i = 0; i < NR_IRQS; i++) | 246 | for_each_irq_desc(irq, desc) |
244 | register_irq_proc(i); | 247 | register_irq_proc(irq, desc); |
245 | } | 248 | } |
246 | 249 | ||
diff --git a/kernel/irq/resend.c b/kernel/irq/resend.c index a8046791ba2d..89c7117acf2b 100644 --- a/kernel/irq/resend.c +++ b/kernel/irq/resend.c | |||
@@ -33,10 +33,10 @@ static void resend_irqs(unsigned long arg) | |||
33 | struct irq_desc *desc; | 33 | struct irq_desc *desc; |
34 | int irq; | 34 | int irq; |
35 | 35 | ||
36 | while (!bitmap_empty(irqs_resend, NR_IRQS)) { | 36 | while (!bitmap_empty(irqs_resend, nr_irqs)) { |
37 | irq = find_first_bit(irqs_resend, NR_IRQS); | 37 | irq = find_first_bit(irqs_resend, nr_irqs); |
38 | clear_bit(irq, irqs_resend); | 38 | clear_bit(irq, irqs_resend); |
39 | desc = irq_desc + irq; | 39 | desc = irq_to_desc(irq); |
40 | local_irq_disable(); | 40 | local_irq_disable(); |
41 | desc->handle_irq(irq, desc); | 41 | desc->handle_irq(irq, desc); |
42 | local_irq_enable(); | 42 | local_irq_enable(); |
diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c index c66d3f10e853..dd364c11e56e 100644 --- a/kernel/irq/spurious.c +++ b/kernel/irq/spurious.c | |||
@@ -12,83 +12,122 @@ | |||
12 | #include <linux/kallsyms.h> | 12 | #include <linux/kallsyms.h> |
13 | #include <linux/interrupt.h> | 13 | #include <linux/interrupt.h> |
14 | #include <linux/moduleparam.h> | 14 | #include <linux/moduleparam.h> |
15 | #include <linux/timer.h> | ||
15 | 16 | ||
16 | static int irqfixup __read_mostly; | 17 | static int irqfixup __read_mostly; |
17 | 18 | ||
19 | #define POLL_SPURIOUS_IRQ_INTERVAL (HZ/10) | ||
20 | static void poll_spurious_irqs(unsigned long dummy); | ||
21 | static DEFINE_TIMER(poll_spurious_irq_timer, poll_spurious_irqs, 0, 0); | ||
22 | |||
18 | /* | 23 | /* |
19 | * Recovery handler for misrouted interrupts. | 24 | * Recovery handler for misrouted interrupts. |
20 | */ | 25 | */ |
21 | static int misrouted_irq(int irq) | 26 | static int try_one_irq(int irq, struct irq_desc *desc) |
22 | { | 27 | { |
23 | int i; | 28 | struct irqaction *action; |
24 | int ok = 0; | 29 | int ok = 0, work = 0; |
25 | int work = 0; /* Did we do work for a real IRQ */ | ||
26 | |||
27 | for (i = 1; i < NR_IRQS; i++) { | ||
28 | struct irq_desc *desc = irq_desc + i; | ||
29 | struct irqaction *action; | ||
30 | |||
31 | if (i == irq) /* Already tried */ | ||
32 | continue; | ||
33 | 30 | ||
34 | spin_lock(&desc->lock); | 31 | spin_lock(&desc->lock); |
35 | /* Already running on another processor */ | 32 | /* Already running on another processor */ |
36 | if (desc->status & IRQ_INPROGRESS) { | 33 | if (desc->status & IRQ_INPROGRESS) { |
37 | /* | 34 | /* |
38 | * Already running: If it is shared get the other | 35 | * Already running: If it is shared get the other |
39 | * CPU to go looking for our mystery interrupt too | 36 | * CPU to go looking for our mystery interrupt too |
40 | */ | 37 | */ |
41 | if (desc->action && (desc->action->flags & IRQF_SHARED)) | 38 | if (desc->action && (desc->action->flags & IRQF_SHARED)) |
42 | desc->status |= IRQ_PENDING; | 39 | desc->status |= IRQ_PENDING; |
43 | spin_unlock(&desc->lock); | ||
44 | continue; | ||
45 | } | ||
46 | /* Honour the normal IRQ locking */ | ||
47 | desc->status |= IRQ_INPROGRESS; | ||
48 | action = desc->action; | ||
49 | spin_unlock(&desc->lock); | 40 | spin_unlock(&desc->lock); |
41 | return ok; | ||
42 | } | ||
43 | /* Honour the normal IRQ locking */ | ||
44 | desc->status |= IRQ_INPROGRESS; | ||
45 | action = desc->action; | ||
46 | spin_unlock(&desc->lock); | ||
50 | 47 | ||
51 | while (action) { | 48 | while (action) { |
52 | /* Only shared IRQ handlers are safe to call */ | 49 | /* Only shared IRQ handlers are safe to call */ |
53 | if (action->flags & IRQF_SHARED) { | 50 | if (action->flags & IRQF_SHARED) { |
54 | if (action->handler(i, action->dev_id) == | 51 | if (action->handler(irq, action->dev_id) == |
55 | IRQ_HANDLED) | 52 | IRQ_HANDLED) |
56 | ok = 1; | 53 | ok = 1; |
57 | } | ||
58 | action = action->next; | ||
59 | } | 54 | } |
60 | local_irq_disable(); | 55 | action = action->next; |
61 | /* Now clean up the flags */ | 56 | } |
62 | spin_lock(&desc->lock); | 57 | local_irq_disable(); |
63 | action = desc->action; | 58 | /* Now clean up the flags */ |
59 | spin_lock(&desc->lock); | ||
60 | action = desc->action; | ||
64 | 61 | ||
62 | /* | ||
63 | * While we were looking for a fixup someone queued a real | ||
64 | * IRQ clashing with our walk: | ||
65 | */ | ||
66 | while ((desc->status & IRQ_PENDING) && action) { | ||
65 | /* | 67 | /* |
66 | * While we were looking for a fixup someone queued a real | 68 | * Perform real IRQ processing for the IRQ we deferred |
67 | * IRQ clashing with our walk: | ||
68 | */ | ||
69 | while ((desc->status & IRQ_PENDING) && action) { | ||
70 | /* | ||
71 | * Perform real IRQ processing for the IRQ we deferred | ||
72 | */ | ||
73 | work = 1; | ||
74 | spin_unlock(&desc->lock); | ||
75 | handle_IRQ_event(i, action); | ||
76 | spin_lock(&desc->lock); | ||
77 | desc->status &= ~IRQ_PENDING; | ||
78 | } | ||
79 | desc->status &= ~IRQ_INPROGRESS; | ||
80 | /* | ||
81 | * If we did actual work for the real IRQ line we must let the | ||
82 | * IRQ controller clean up too | ||
83 | */ | 69 | */ |
84 | if (work && desc->chip && desc->chip->end) | 70 | work = 1; |
85 | desc->chip->end(i); | ||
86 | spin_unlock(&desc->lock); | 71 | spin_unlock(&desc->lock); |
72 | handle_IRQ_event(irq, action); | ||
73 | spin_lock(&desc->lock); | ||
74 | desc->status &= ~IRQ_PENDING; | ||
75 | } | ||
76 | desc->status &= ~IRQ_INPROGRESS; | ||
77 | /* | ||
78 | * If we did actual work for the real IRQ line we must let the | ||
79 | * IRQ controller clean up too | ||
80 | */ | ||
81 | if (work && desc->chip && desc->chip->end) | ||
82 | desc->chip->end(irq); | ||
83 | spin_unlock(&desc->lock); | ||
84 | |||
85 | return ok; | ||
86 | } | ||
87 | |||
88 | static int misrouted_irq(int irq) | ||
89 | { | ||
90 | struct irq_desc *desc; | ||
91 | int i, ok = 0; | ||
92 | |||
93 | for_each_irq_desc(i, desc) { | ||
94 | if (!i) | ||
95 | continue; | ||
96 | |||
97 | if (i == irq) /* Already tried */ | ||
98 | continue; | ||
99 | |||
100 | if (try_one_irq(i, desc)) | ||
101 | ok = 1; | ||
87 | } | 102 | } |
88 | /* So the caller can adjust the irq error counts */ | 103 | /* So the caller can adjust the irq error counts */ |
89 | return ok; | 104 | return ok; |
90 | } | 105 | } |
91 | 106 | ||
107 | static void poll_spurious_irqs(unsigned long dummy) | ||
108 | { | ||
109 | struct irq_desc *desc; | ||
110 | int i; | ||
111 | |||
112 | for_each_irq_desc(i, desc) { | ||
113 | unsigned int status; | ||
114 | |||
115 | if (!i) | ||
116 | continue; | ||
117 | |||
118 | /* Racy but it doesn't matter */ | ||
119 | status = desc->status; | ||
120 | barrier(); | ||
121 | if (!(status & IRQ_SPURIOUS_DISABLED)) | ||
122 | continue; | ||
123 | |||
124 | try_one_irq(i, desc); | ||
125 | } | ||
126 | |||
127 | mod_timer(&poll_spurious_irq_timer, | ||
128 | jiffies + POLL_SPURIOUS_IRQ_INTERVAL); | ||
129 | } | ||
130 | |||
92 | /* | 131 | /* |
93 | * If 99,900 of the previous 100,000 interrupts have not been handled | 132 | * If 99,900 of the previous 100,000 interrupts have not been handled |
94 | * then assume that the IRQ is stuck in some manner. Drop a diagnostic | 133 | * then assume that the IRQ is stuck in some manner. Drop a diagnostic |
@@ -137,7 +176,9 @@ report_bad_irq(unsigned int irq, struct irq_desc *desc, irqreturn_t action_ret) | |||
137 | } | 176 | } |
138 | } | 177 | } |
139 | 178 | ||
140 | static inline int try_misrouted_irq(unsigned int irq, struct irq_desc *desc, irqreturn_t action_ret) | 179 | static inline int |
180 | try_misrouted_irq(unsigned int irq, struct irq_desc *desc, | ||
181 | irqreturn_t action_ret) | ||
141 | { | 182 | { |
142 | struct irqaction *action; | 183 | struct irqaction *action; |
143 | 184 | ||
@@ -212,6 +253,9 @@ void note_interrupt(unsigned int irq, struct irq_desc *desc, | |||
212 | desc->status |= IRQ_DISABLED | IRQ_SPURIOUS_DISABLED; | 253 | desc->status |= IRQ_DISABLED | IRQ_SPURIOUS_DISABLED; |
213 | desc->depth++; | 254 | desc->depth++; |
214 | desc->chip->disable(irq); | 255 | desc->chip->disable(irq); |
256 | |||
257 | mod_timer(&poll_spurious_irq_timer, | ||
258 | jiffies + POLL_SPURIOUS_IRQ_INTERVAL); | ||
215 | } | 259 | } |
216 | desc->irqs_unhandled = 0; | 260 | desc->irqs_unhandled = 0; |
217 | } | 261 | } |
@@ -241,7 +285,7 @@ static int __init irqfixup_setup(char *str) | |||
241 | 285 | ||
242 | __setup("irqfixup", irqfixup_setup); | 286 | __setup("irqfixup", irqfixup_setup); |
243 | module_param(irqfixup, int, 0644); | 287 | module_param(irqfixup, int, 0644); |
244 | MODULE_PARM_DESC("irqfixup", "0: No fixup, 1: irqfixup mode 2: irqpoll mode"); | 288 | MODULE_PARM_DESC("irqfixup", "0: No fixup, 1: irqfixup mode, 2: irqpoll mode"); |
245 | 289 | ||
246 | static int __init irqpoll_setup(char *str) | 290 | static int __init irqpoll_setup(char *str) |
247 | { | 291 | { |
diff --git a/kernel/itimer.c b/kernel/itimer.c index ab982747d9bd..db7c358b9a02 100644 --- a/kernel/itimer.c +++ b/kernel/itimer.c | |||
@@ -55,17 +55,15 @@ int do_getitimer(int which, struct itimerval *value) | |||
55 | spin_unlock_irq(&tsk->sighand->siglock); | 55 | spin_unlock_irq(&tsk->sighand->siglock); |
56 | break; | 56 | break; |
57 | case ITIMER_VIRTUAL: | 57 | case ITIMER_VIRTUAL: |
58 | read_lock(&tasklist_lock); | ||
59 | spin_lock_irq(&tsk->sighand->siglock); | 58 | spin_lock_irq(&tsk->sighand->siglock); |
60 | cval = tsk->signal->it_virt_expires; | 59 | cval = tsk->signal->it_virt_expires; |
61 | cinterval = tsk->signal->it_virt_incr; | 60 | cinterval = tsk->signal->it_virt_incr; |
62 | if (!cputime_eq(cval, cputime_zero)) { | 61 | if (!cputime_eq(cval, cputime_zero)) { |
63 | struct task_struct *t = tsk; | 62 | struct task_cputime cputime; |
64 | cputime_t utime = tsk->signal->utime; | 63 | cputime_t utime; |
65 | do { | 64 | |
66 | utime = cputime_add(utime, t->utime); | 65 | thread_group_cputime(tsk, &cputime); |
67 | t = next_thread(t); | 66 | utime = cputime.utime; |
68 | } while (t != tsk); | ||
69 | if (cputime_le(cval, utime)) { /* about to fire */ | 67 | if (cputime_le(cval, utime)) { /* about to fire */ |
70 | cval = jiffies_to_cputime(1); | 68 | cval = jiffies_to_cputime(1); |
71 | } else { | 69 | } else { |
@@ -73,25 +71,19 @@ int do_getitimer(int which, struct itimerval *value) | |||
73 | } | 71 | } |
74 | } | 72 | } |
75 | spin_unlock_irq(&tsk->sighand->siglock); | 73 | spin_unlock_irq(&tsk->sighand->siglock); |
76 | read_unlock(&tasklist_lock); | ||
77 | cputime_to_timeval(cval, &value->it_value); | 74 | cputime_to_timeval(cval, &value->it_value); |
78 | cputime_to_timeval(cinterval, &value->it_interval); | 75 | cputime_to_timeval(cinterval, &value->it_interval); |
79 | break; | 76 | break; |
80 | case ITIMER_PROF: | 77 | case ITIMER_PROF: |
81 | read_lock(&tasklist_lock); | ||
82 | spin_lock_irq(&tsk->sighand->siglock); | 78 | spin_lock_irq(&tsk->sighand->siglock); |
83 | cval = tsk->signal->it_prof_expires; | 79 | cval = tsk->signal->it_prof_expires; |
84 | cinterval = tsk->signal->it_prof_incr; | 80 | cinterval = tsk->signal->it_prof_incr; |
85 | if (!cputime_eq(cval, cputime_zero)) { | 81 | if (!cputime_eq(cval, cputime_zero)) { |
86 | struct task_struct *t = tsk; | 82 | struct task_cputime times; |
87 | cputime_t ptime = cputime_add(tsk->signal->utime, | 83 | cputime_t ptime; |
88 | tsk->signal->stime); | 84 | |
89 | do { | 85 | thread_group_cputime(tsk, ×); |
90 | ptime = cputime_add(ptime, | 86 | ptime = cputime_add(times.utime, times.stime); |
91 | cputime_add(t->utime, | ||
92 | t->stime)); | ||
93 | t = next_thread(t); | ||
94 | } while (t != tsk); | ||
95 | if (cputime_le(cval, ptime)) { /* about to fire */ | 87 | if (cputime_le(cval, ptime)) { /* about to fire */ |
96 | cval = jiffies_to_cputime(1); | 88 | cval = jiffies_to_cputime(1); |
97 | } else { | 89 | } else { |
@@ -99,7 +91,6 @@ int do_getitimer(int which, struct itimerval *value) | |||
99 | } | 91 | } |
100 | } | 92 | } |
101 | spin_unlock_irq(&tsk->sighand->siglock); | 93 | spin_unlock_irq(&tsk->sighand->siglock); |
102 | read_unlock(&tasklist_lock); | ||
103 | cputime_to_timeval(cval, &value->it_value); | 94 | cputime_to_timeval(cval, &value->it_value); |
104 | cputime_to_timeval(cinterval, &value->it_interval); | 95 | cputime_to_timeval(cinterval, &value->it_interval); |
105 | break; | 96 | break; |
@@ -185,7 +176,6 @@ again: | |||
185 | case ITIMER_VIRTUAL: | 176 | case ITIMER_VIRTUAL: |
186 | nval = timeval_to_cputime(&value->it_value); | 177 | nval = timeval_to_cputime(&value->it_value); |
187 | ninterval = timeval_to_cputime(&value->it_interval); | 178 | ninterval = timeval_to_cputime(&value->it_interval); |
188 | read_lock(&tasklist_lock); | ||
189 | spin_lock_irq(&tsk->sighand->siglock); | 179 | spin_lock_irq(&tsk->sighand->siglock); |
190 | cval = tsk->signal->it_virt_expires; | 180 | cval = tsk->signal->it_virt_expires; |
191 | cinterval = tsk->signal->it_virt_incr; | 181 | cinterval = tsk->signal->it_virt_incr; |
@@ -200,7 +190,6 @@ again: | |||
200 | tsk->signal->it_virt_expires = nval; | 190 | tsk->signal->it_virt_expires = nval; |
201 | tsk->signal->it_virt_incr = ninterval; | 191 | tsk->signal->it_virt_incr = ninterval; |
202 | spin_unlock_irq(&tsk->sighand->siglock); | 192 | spin_unlock_irq(&tsk->sighand->siglock); |
203 | read_unlock(&tasklist_lock); | ||
204 | if (ovalue) { | 193 | if (ovalue) { |
205 | cputime_to_timeval(cval, &ovalue->it_value); | 194 | cputime_to_timeval(cval, &ovalue->it_value); |
206 | cputime_to_timeval(cinterval, &ovalue->it_interval); | 195 | cputime_to_timeval(cinterval, &ovalue->it_interval); |
@@ -209,7 +198,6 @@ again: | |||
209 | case ITIMER_PROF: | 198 | case ITIMER_PROF: |
210 | nval = timeval_to_cputime(&value->it_value); | 199 | nval = timeval_to_cputime(&value->it_value); |
211 | ninterval = timeval_to_cputime(&value->it_interval); | 200 | ninterval = timeval_to_cputime(&value->it_interval); |
212 | read_lock(&tasklist_lock); | ||
213 | spin_lock_irq(&tsk->sighand->siglock); | 201 | spin_lock_irq(&tsk->sighand->siglock); |
214 | cval = tsk->signal->it_prof_expires; | 202 | cval = tsk->signal->it_prof_expires; |
215 | cinterval = tsk->signal->it_prof_incr; | 203 | cinterval = tsk->signal->it_prof_incr; |
@@ -224,7 +212,6 @@ again: | |||
224 | tsk->signal->it_prof_expires = nval; | 212 | tsk->signal->it_prof_expires = nval; |
225 | tsk->signal->it_prof_incr = ninterval; | 213 | tsk->signal->it_prof_incr = ninterval; |
226 | spin_unlock_irq(&tsk->sighand->siglock); | 214 | spin_unlock_irq(&tsk->sighand->siglock); |
227 | read_unlock(&tasklist_lock); | ||
228 | if (ovalue) { | 215 | if (ovalue) { |
229 | cputime_to_timeval(cval, &ovalue->it_value); | 216 | cputime_to_timeval(cval, &ovalue->it_value); |
230 | cputime_to_timeval(cinterval, &ovalue->it_interval); | 217 | cputime_to_timeval(cinterval, &ovalue->it_interval); |
diff --git a/kernel/kexec.c b/kernel/kexec.c index aef265325cd3..ac0fde7b54d0 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c | |||
@@ -30,6 +30,7 @@ | |||
30 | #include <linux/pm.h> | 30 | #include <linux/pm.h> |
31 | #include <linux/cpu.h> | 31 | #include <linux/cpu.h> |
32 | #include <linux/console.h> | 32 | #include <linux/console.h> |
33 | #include <linux/vmalloc.h> | ||
33 | 34 | ||
34 | #include <asm/page.h> | 35 | #include <asm/page.h> |
35 | #include <asm/uaccess.h> | 36 | #include <asm/uaccess.h> |
@@ -1371,6 +1372,7 @@ static int __init crash_save_vmcoreinfo_init(void) | |||
1371 | VMCOREINFO_SYMBOL(node_online_map); | 1372 | VMCOREINFO_SYMBOL(node_online_map); |
1372 | VMCOREINFO_SYMBOL(swapper_pg_dir); | 1373 | VMCOREINFO_SYMBOL(swapper_pg_dir); |
1373 | VMCOREINFO_SYMBOL(_stext); | 1374 | VMCOREINFO_SYMBOL(_stext); |
1375 | VMCOREINFO_SYMBOL(vmlist); | ||
1374 | 1376 | ||
1375 | #ifndef CONFIG_NEED_MULTIPLE_NODES | 1377 | #ifndef CONFIG_NEED_MULTIPLE_NODES |
1376 | VMCOREINFO_SYMBOL(mem_map); | 1378 | VMCOREINFO_SYMBOL(mem_map); |
@@ -1406,6 +1408,7 @@ static int __init crash_save_vmcoreinfo_init(void) | |||
1406 | VMCOREINFO_OFFSET(free_area, free_list); | 1408 | VMCOREINFO_OFFSET(free_area, free_list); |
1407 | VMCOREINFO_OFFSET(list_head, next); | 1409 | VMCOREINFO_OFFSET(list_head, next); |
1408 | VMCOREINFO_OFFSET(list_head, prev); | 1410 | VMCOREINFO_OFFSET(list_head, prev); |
1411 | VMCOREINFO_OFFSET(vm_struct, addr); | ||
1409 | VMCOREINFO_LENGTH(zone.free_area, MAX_ORDER); | 1412 | VMCOREINFO_LENGTH(zone.free_area, MAX_ORDER); |
1410 | VMCOREINFO_LENGTH(free_area.free_list, MIGRATE_TYPES); | 1413 | VMCOREINFO_LENGTH(free_area.free_list, MIGRATE_TYPES); |
1411 | VMCOREINFO_NUMBER(NR_FREE_PAGES); | 1414 | VMCOREINFO_NUMBER(NR_FREE_PAGES); |
diff --git a/kernel/kthread.c b/kernel/kthread.c index 50598e29439a..8e7a7ce3ed0a 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c | |||
@@ -172,12 +172,11 @@ EXPORT_SYMBOL(kthread_create); | |||
172 | */ | 172 | */ |
173 | void kthread_bind(struct task_struct *k, unsigned int cpu) | 173 | void kthread_bind(struct task_struct *k, unsigned int cpu) |
174 | { | 174 | { |
175 | if (k->state != TASK_UNINTERRUPTIBLE) { | 175 | /* Must have done schedule() in kthread() before we set_task_cpu */ |
176 | if (!wait_task_inactive(k, TASK_UNINTERRUPTIBLE)) { | ||
176 | WARN_ON(1); | 177 | WARN_ON(1); |
177 | return; | 178 | return; |
178 | } | 179 | } |
179 | /* Must have done schedule() in kthread() before we set_task_cpu */ | ||
180 | wait_task_inactive(k, 0); | ||
181 | set_task_cpu(k, cpu); | 180 | set_task_cpu(k, cpu); |
182 | k->cpus_allowed = cpumask_of_cpu(cpu); | 181 | k->cpus_allowed = cpumask_of_cpu(cpu); |
183 | k->rt.nr_cpus_allowed = 1; | 182 | k->rt.nr_cpus_allowed = 1; |
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index c42a03aef36f..153dcb2639c3 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c | |||
@@ -7,6 +7,93 @@ | |||
7 | #include <linux/errno.h> | 7 | #include <linux/errno.h> |
8 | #include <linux/math64.h> | 8 | #include <linux/math64.h> |
9 | #include <asm/uaccess.h> | 9 | #include <asm/uaccess.h> |
10 | #include <linux/kernel_stat.h> | ||
11 | |||
12 | /* | ||
13 | * Allocate the thread_group_cputime structure appropriately and fill in the | ||
14 | * current values of the fields. Called from copy_signal() via | ||
15 | * thread_group_cputime_clone_thread() when adding a second or subsequent | ||
16 | * thread to a thread group. Assumes interrupts are enabled when called. | ||
17 | */ | ||
18 | int thread_group_cputime_alloc(struct task_struct *tsk) | ||
19 | { | ||
20 | struct signal_struct *sig = tsk->signal; | ||
21 | struct task_cputime *cputime; | ||
22 | |||
23 | /* | ||
24 | * If we have multiple threads and we don't already have a | ||
25 | * per-CPU task_cputime struct (checked in the caller), allocate | ||
26 | * one and fill it in with the times accumulated so far. We may | ||
27 | * race with another thread so recheck after we pick up the sighand | ||
28 | * lock. | ||
29 | */ | ||
30 | cputime = alloc_percpu(struct task_cputime); | ||
31 | if (cputime == NULL) | ||
32 | return -ENOMEM; | ||
33 | spin_lock_irq(&tsk->sighand->siglock); | ||
34 | if (sig->cputime.totals) { | ||
35 | spin_unlock_irq(&tsk->sighand->siglock); | ||
36 | free_percpu(cputime); | ||
37 | return 0; | ||
38 | } | ||
39 | sig->cputime.totals = cputime; | ||
40 | cputime = per_cpu_ptr(sig->cputime.totals, smp_processor_id()); | ||
41 | cputime->utime = tsk->utime; | ||
42 | cputime->stime = tsk->stime; | ||
43 | cputime->sum_exec_runtime = tsk->se.sum_exec_runtime; | ||
44 | spin_unlock_irq(&tsk->sighand->siglock); | ||
45 | return 0; | ||
46 | } | ||
47 | |||
48 | /** | ||
49 | * thread_group_cputime - Sum the thread group time fields across all CPUs. | ||
50 | * | ||
51 | * @tsk: The task we use to identify the thread group. | ||
52 | * @times: task_cputime structure in which we return the summed fields. | ||
53 | * | ||
54 | * Walk the list of CPUs to sum the per-CPU time fields in the thread group | ||
55 | * time structure. | ||
56 | */ | ||
57 | void thread_group_cputime( | ||
58 | struct task_struct *tsk, | ||
59 | struct task_cputime *times) | ||
60 | { | ||
61 | struct signal_struct *sig; | ||
62 | int i; | ||
63 | struct task_cputime *tot; | ||
64 | |||
65 | sig = tsk->signal; | ||
66 | if (unlikely(!sig) || !sig->cputime.totals) { | ||
67 | times->utime = tsk->utime; | ||
68 | times->stime = tsk->stime; | ||
69 | times->sum_exec_runtime = tsk->se.sum_exec_runtime; | ||
70 | return; | ||
71 | } | ||
72 | times->stime = times->utime = cputime_zero; | ||
73 | times->sum_exec_runtime = 0; | ||
74 | for_each_possible_cpu(i) { | ||
75 | tot = per_cpu_ptr(tsk->signal->cputime.totals, i); | ||
76 | times->utime = cputime_add(times->utime, tot->utime); | ||
77 | times->stime = cputime_add(times->stime, tot->stime); | ||
78 | times->sum_exec_runtime += tot->sum_exec_runtime; | ||
79 | } | ||
80 | } | ||
81 | |||
82 | /* | ||
83 | * Called after updating RLIMIT_CPU to set timer expiration if necessary. | ||
84 | */ | ||
85 | void update_rlimit_cpu(unsigned long rlim_new) | ||
86 | { | ||
87 | cputime_t cputime; | ||
88 | |||
89 | cputime = secs_to_cputime(rlim_new); | ||
90 | if (cputime_eq(current->signal->it_prof_expires, cputime_zero) || | ||
91 | cputime_lt(current->signal->it_prof_expires, cputime)) { | ||
92 | spin_lock_irq(¤t->sighand->siglock); | ||
93 | set_process_cpu_timer(current, CPUCLOCK_PROF, &cputime, NULL); | ||
94 | spin_unlock_irq(¤t->sighand->siglock); | ||
95 | } | ||
96 | } | ||
10 | 97 | ||
11 | static int check_clock(const clockid_t which_clock) | 98 | static int check_clock(const clockid_t which_clock) |
12 | { | 99 | { |
@@ -158,10 +245,6 @@ static inline cputime_t virt_ticks(struct task_struct *p) | |||
158 | { | 245 | { |
159 | return p->utime; | 246 | return p->utime; |
160 | } | 247 | } |
161 | static inline unsigned long long sched_ns(struct task_struct *p) | ||
162 | { | ||
163 | return task_sched_runtime(p); | ||
164 | } | ||
165 | 248 | ||
166 | int posix_cpu_clock_getres(const clockid_t which_clock, struct timespec *tp) | 249 | int posix_cpu_clock_getres(const clockid_t which_clock, struct timespec *tp) |
167 | { | 250 | { |
@@ -211,7 +294,7 @@ static int cpu_clock_sample(const clockid_t which_clock, struct task_struct *p, | |||
211 | cpu->cpu = virt_ticks(p); | 294 | cpu->cpu = virt_ticks(p); |
212 | break; | 295 | break; |
213 | case CPUCLOCK_SCHED: | 296 | case CPUCLOCK_SCHED: |
214 | cpu->sched = sched_ns(p); | 297 | cpu->sched = p->se.sum_exec_runtime + task_delta_exec(p); |
215 | break; | 298 | break; |
216 | } | 299 | } |
217 | return 0; | 300 | return 0; |
@@ -220,59 +303,30 @@ static int cpu_clock_sample(const clockid_t which_clock, struct task_struct *p, | |||
220 | /* | 303 | /* |
221 | * Sample a process (thread group) clock for the given group_leader task. | 304 | * Sample a process (thread group) clock for the given group_leader task. |
222 | * Must be called with tasklist_lock held for reading. | 305 | * Must be called with tasklist_lock held for reading. |
223 | * Must be called with tasklist_lock held for reading, and p->sighand->siglock. | ||
224 | */ | 306 | */ |
225 | static int cpu_clock_sample_group_locked(unsigned int clock_idx, | 307 | static int cpu_clock_sample_group(const clockid_t which_clock, |
226 | struct task_struct *p, | 308 | struct task_struct *p, |
227 | union cpu_time_count *cpu) | 309 | union cpu_time_count *cpu) |
228 | { | 310 | { |
229 | struct task_struct *t = p; | 311 | struct task_cputime cputime; |
230 | switch (clock_idx) { | 312 | |
313 | thread_group_cputime(p, &cputime); | ||
314 | switch (which_clock) { | ||
231 | default: | 315 | default: |
232 | return -EINVAL; | 316 | return -EINVAL; |
233 | case CPUCLOCK_PROF: | 317 | case CPUCLOCK_PROF: |
234 | cpu->cpu = cputime_add(p->signal->utime, p->signal->stime); | 318 | cpu->cpu = cputime_add(cputime.utime, cputime.stime); |
235 | do { | ||
236 | cpu->cpu = cputime_add(cpu->cpu, prof_ticks(t)); | ||
237 | t = next_thread(t); | ||
238 | } while (t != p); | ||
239 | break; | 319 | break; |
240 | case CPUCLOCK_VIRT: | 320 | case CPUCLOCK_VIRT: |
241 | cpu->cpu = p->signal->utime; | 321 | cpu->cpu = cputime.utime; |
242 | do { | ||
243 | cpu->cpu = cputime_add(cpu->cpu, virt_ticks(t)); | ||
244 | t = next_thread(t); | ||
245 | } while (t != p); | ||
246 | break; | 322 | break; |
247 | case CPUCLOCK_SCHED: | 323 | case CPUCLOCK_SCHED: |
248 | cpu->sched = p->signal->sum_sched_runtime; | 324 | cpu->sched = cputime.sum_exec_runtime + task_delta_exec(p); |
249 | /* Add in each other live thread. */ | ||
250 | while ((t = next_thread(t)) != p) { | ||
251 | cpu->sched += t->se.sum_exec_runtime; | ||
252 | } | ||
253 | cpu->sched += sched_ns(p); | ||
254 | break; | 325 | break; |
255 | } | 326 | } |
256 | return 0; | 327 | return 0; |
257 | } | 328 | } |
258 | 329 | ||
259 | /* | ||
260 | * Sample a process (thread group) clock for the given group_leader task. | ||
261 | * Must be called with tasklist_lock held for reading. | ||
262 | */ | ||
263 | static int cpu_clock_sample_group(const clockid_t which_clock, | ||
264 | struct task_struct *p, | ||
265 | union cpu_time_count *cpu) | ||
266 | { | ||
267 | int ret; | ||
268 | unsigned long flags; | ||
269 | spin_lock_irqsave(&p->sighand->siglock, flags); | ||
270 | ret = cpu_clock_sample_group_locked(CPUCLOCK_WHICH(which_clock), p, | ||
271 | cpu); | ||
272 | spin_unlock_irqrestore(&p->sighand->siglock, flags); | ||
273 | return ret; | ||
274 | } | ||
275 | |||
276 | 330 | ||
277 | int posix_cpu_clock_get(const clockid_t which_clock, struct timespec *tp) | 331 | int posix_cpu_clock_get(const clockid_t which_clock, struct timespec *tp) |
278 | { | 332 | { |
@@ -471,80 +525,11 @@ void posix_cpu_timers_exit(struct task_struct *tsk) | |||
471 | } | 525 | } |
472 | void posix_cpu_timers_exit_group(struct task_struct *tsk) | 526 | void posix_cpu_timers_exit_group(struct task_struct *tsk) |
473 | { | 527 | { |
474 | cleanup_timers(tsk->signal->cpu_timers, | 528 | struct task_cputime cputime; |
475 | cputime_add(tsk->utime, tsk->signal->utime), | ||
476 | cputime_add(tsk->stime, tsk->signal->stime), | ||
477 | tsk->se.sum_exec_runtime + tsk->signal->sum_sched_runtime); | ||
478 | } | ||
479 | 529 | ||
480 | 530 | thread_group_cputime(tsk, &cputime); | |
481 | /* | 531 | cleanup_timers(tsk->signal->cpu_timers, |
482 | * Set the expiry times of all the threads in the process so one of them | 532 | cputime.utime, cputime.stime, cputime.sum_exec_runtime); |
483 | * will go off before the process cumulative expiry total is reached. | ||
484 | */ | ||
485 | static void process_timer_rebalance(struct task_struct *p, | ||
486 | unsigned int clock_idx, | ||
487 | union cpu_time_count expires, | ||
488 | union cpu_time_count val) | ||
489 | { | ||
490 | cputime_t ticks, left; | ||
491 | unsigned long long ns, nsleft; | ||
492 | struct task_struct *t = p; | ||
493 | unsigned int nthreads = atomic_read(&p->signal->live); | ||
494 | |||
495 | if (!nthreads) | ||
496 | return; | ||
497 | |||
498 | switch (clock_idx) { | ||
499 | default: | ||
500 | BUG(); | ||
501 | break; | ||
502 | case CPUCLOCK_PROF: | ||
503 | left = cputime_div_non_zero(cputime_sub(expires.cpu, val.cpu), | ||
504 | nthreads); | ||
505 | do { | ||
506 | if (likely(!(t->flags & PF_EXITING))) { | ||
507 | ticks = cputime_add(prof_ticks(t), left); | ||
508 | if (cputime_eq(t->it_prof_expires, | ||
509 | cputime_zero) || | ||
510 | cputime_gt(t->it_prof_expires, ticks)) { | ||
511 | t->it_prof_expires = ticks; | ||
512 | } | ||
513 | } | ||
514 | t = next_thread(t); | ||
515 | } while (t != p); | ||
516 | break; | ||
517 | case CPUCLOCK_VIRT: | ||
518 | left = cputime_div_non_zero(cputime_sub(expires.cpu, val.cpu), | ||
519 | nthreads); | ||
520 | do { | ||
521 | if (likely(!(t->flags & PF_EXITING))) { | ||
522 | ticks = cputime_add(virt_ticks(t), left); | ||
523 | if (cputime_eq(t->it_virt_expires, | ||
524 | cputime_zero) || | ||
525 | cputime_gt(t->it_virt_expires, ticks)) { | ||
526 | t->it_virt_expires = ticks; | ||
527 | } | ||
528 | } | ||
529 | t = next_thread(t); | ||
530 | } while (t != p); | ||
531 | break; | ||
532 | case CPUCLOCK_SCHED: | ||
533 | nsleft = expires.sched - val.sched; | ||
534 | do_div(nsleft, nthreads); | ||
535 | nsleft = max_t(unsigned long long, nsleft, 1); | ||
536 | do { | ||
537 | if (likely(!(t->flags & PF_EXITING))) { | ||
538 | ns = t->se.sum_exec_runtime + nsleft; | ||
539 | if (t->it_sched_expires == 0 || | ||
540 | t->it_sched_expires > ns) { | ||
541 | t->it_sched_expires = ns; | ||
542 | } | ||
543 | } | ||
544 | t = next_thread(t); | ||
545 | } while (t != p); | ||
546 | break; | ||
547 | } | ||
548 | } | 533 | } |
549 | 534 | ||
550 | static void clear_dead_task(struct k_itimer *timer, union cpu_time_count now) | 535 | static void clear_dead_task(struct k_itimer *timer, union cpu_time_count now) |
@@ -608,29 +593,32 @@ static void arm_timer(struct k_itimer *timer, union cpu_time_count now) | |||
608 | default: | 593 | default: |
609 | BUG(); | 594 | BUG(); |
610 | case CPUCLOCK_PROF: | 595 | case CPUCLOCK_PROF: |
611 | if (cputime_eq(p->it_prof_expires, | 596 | if (cputime_eq(p->cputime_expires.prof_exp, |
612 | cputime_zero) || | 597 | cputime_zero) || |
613 | cputime_gt(p->it_prof_expires, | 598 | cputime_gt(p->cputime_expires.prof_exp, |
614 | nt->expires.cpu)) | 599 | nt->expires.cpu)) |
615 | p->it_prof_expires = nt->expires.cpu; | 600 | p->cputime_expires.prof_exp = |
601 | nt->expires.cpu; | ||
616 | break; | 602 | break; |
617 | case CPUCLOCK_VIRT: | 603 | case CPUCLOCK_VIRT: |
618 | if (cputime_eq(p->it_virt_expires, | 604 | if (cputime_eq(p->cputime_expires.virt_exp, |
619 | cputime_zero) || | 605 | cputime_zero) || |
620 | cputime_gt(p->it_virt_expires, | 606 | cputime_gt(p->cputime_expires.virt_exp, |
621 | nt->expires.cpu)) | 607 | nt->expires.cpu)) |
622 | p->it_virt_expires = nt->expires.cpu; | 608 | p->cputime_expires.virt_exp = |
609 | nt->expires.cpu; | ||
623 | break; | 610 | break; |
624 | case CPUCLOCK_SCHED: | 611 | case CPUCLOCK_SCHED: |
625 | if (p->it_sched_expires == 0 || | 612 | if (p->cputime_expires.sched_exp == 0 || |
626 | p->it_sched_expires > nt->expires.sched) | 613 | p->cputime_expires.sched_exp > |
627 | p->it_sched_expires = nt->expires.sched; | 614 | nt->expires.sched) |
615 | p->cputime_expires.sched_exp = | ||
616 | nt->expires.sched; | ||
628 | break; | 617 | break; |
629 | } | 618 | } |
630 | } else { | 619 | } else { |
631 | /* | 620 | /* |
632 | * For a process timer, we must balance | 621 | * For a process timer, set the cached expiration time. |
633 | * all the live threads' expirations. | ||
634 | */ | 622 | */ |
635 | switch (CPUCLOCK_WHICH(timer->it_clock)) { | 623 | switch (CPUCLOCK_WHICH(timer->it_clock)) { |
636 | default: | 624 | default: |
@@ -641,7 +629,9 @@ static void arm_timer(struct k_itimer *timer, union cpu_time_count now) | |||
641 | cputime_lt(p->signal->it_virt_expires, | 629 | cputime_lt(p->signal->it_virt_expires, |
642 | timer->it.cpu.expires.cpu)) | 630 | timer->it.cpu.expires.cpu)) |
643 | break; | 631 | break; |
644 | goto rebalance; | 632 | p->signal->cputime_expires.virt_exp = |
633 | timer->it.cpu.expires.cpu; | ||
634 | break; | ||
645 | case CPUCLOCK_PROF: | 635 | case CPUCLOCK_PROF: |
646 | if (!cputime_eq(p->signal->it_prof_expires, | 636 | if (!cputime_eq(p->signal->it_prof_expires, |
647 | cputime_zero) && | 637 | cputime_zero) && |
@@ -652,13 +642,12 @@ static void arm_timer(struct k_itimer *timer, union cpu_time_count now) | |||
652 | if (i != RLIM_INFINITY && | 642 | if (i != RLIM_INFINITY && |
653 | i <= cputime_to_secs(timer->it.cpu.expires.cpu)) | 643 | i <= cputime_to_secs(timer->it.cpu.expires.cpu)) |
654 | break; | 644 | break; |
655 | goto rebalance; | 645 | p->signal->cputime_expires.prof_exp = |
646 | timer->it.cpu.expires.cpu; | ||
647 | break; | ||
656 | case CPUCLOCK_SCHED: | 648 | case CPUCLOCK_SCHED: |
657 | rebalance: | 649 | p->signal->cputime_expires.sched_exp = |
658 | process_timer_rebalance( | 650 | timer->it.cpu.expires.sched; |
659 | timer->it.cpu.task, | ||
660 | CPUCLOCK_WHICH(timer->it_clock), | ||
661 | timer->it.cpu.expires, now); | ||
662 | break; | 651 | break; |
663 | } | 652 | } |
664 | } | 653 | } |
@@ -969,13 +958,13 @@ static void check_thread_timers(struct task_struct *tsk, | |||
969 | struct signal_struct *const sig = tsk->signal; | 958 | struct signal_struct *const sig = tsk->signal; |
970 | 959 | ||
971 | maxfire = 20; | 960 | maxfire = 20; |
972 | tsk->it_prof_expires = cputime_zero; | 961 | tsk->cputime_expires.prof_exp = cputime_zero; |
973 | while (!list_empty(timers)) { | 962 | while (!list_empty(timers)) { |
974 | struct cpu_timer_list *t = list_first_entry(timers, | 963 | struct cpu_timer_list *t = list_first_entry(timers, |
975 | struct cpu_timer_list, | 964 | struct cpu_timer_list, |
976 | entry); | 965 | entry); |
977 | if (!--maxfire || cputime_lt(prof_ticks(tsk), t->expires.cpu)) { | 966 | if (!--maxfire || cputime_lt(prof_ticks(tsk), t->expires.cpu)) { |
978 | tsk->it_prof_expires = t->expires.cpu; | 967 | tsk->cputime_expires.prof_exp = t->expires.cpu; |
979 | break; | 968 | break; |
980 | } | 969 | } |
981 | t->firing = 1; | 970 | t->firing = 1; |
@@ -984,13 +973,13 @@ static void check_thread_timers(struct task_struct *tsk, | |||
984 | 973 | ||
985 | ++timers; | 974 | ++timers; |
986 | maxfire = 20; | 975 | maxfire = 20; |
987 | tsk->it_virt_expires = cputime_zero; | 976 | tsk->cputime_expires.virt_exp = cputime_zero; |
988 | while (!list_empty(timers)) { | 977 | while (!list_empty(timers)) { |
989 | struct cpu_timer_list *t = list_first_entry(timers, | 978 | struct cpu_timer_list *t = list_first_entry(timers, |
990 | struct cpu_timer_list, | 979 | struct cpu_timer_list, |
991 | entry); | 980 | entry); |
992 | if (!--maxfire || cputime_lt(virt_ticks(tsk), t->expires.cpu)) { | 981 | if (!--maxfire || cputime_lt(virt_ticks(tsk), t->expires.cpu)) { |
993 | tsk->it_virt_expires = t->expires.cpu; | 982 | tsk->cputime_expires.virt_exp = t->expires.cpu; |
994 | break; | 983 | break; |
995 | } | 984 | } |
996 | t->firing = 1; | 985 | t->firing = 1; |
@@ -999,13 +988,13 @@ static void check_thread_timers(struct task_struct *tsk, | |||
999 | 988 | ||
1000 | ++timers; | 989 | ++timers; |
1001 | maxfire = 20; | 990 | maxfire = 20; |
1002 | tsk->it_sched_expires = 0; | 991 | tsk->cputime_expires.sched_exp = 0; |
1003 | while (!list_empty(timers)) { | 992 | while (!list_empty(timers)) { |
1004 | struct cpu_timer_list *t = list_first_entry(timers, | 993 | struct cpu_timer_list *t = list_first_entry(timers, |
1005 | struct cpu_timer_list, | 994 | struct cpu_timer_list, |
1006 | entry); | 995 | entry); |
1007 | if (!--maxfire || tsk->se.sum_exec_runtime < t->expires.sched) { | 996 | if (!--maxfire || tsk->se.sum_exec_runtime < t->expires.sched) { |
1008 | tsk->it_sched_expires = t->expires.sched; | 997 | tsk->cputime_expires.sched_exp = t->expires.sched; |
1009 | break; | 998 | break; |
1010 | } | 999 | } |
1011 | t->firing = 1; | 1000 | t->firing = 1; |
@@ -1055,10 +1044,10 @@ static void check_process_timers(struct task_struct *tsk, | |||
1055 | { | 1044 | { |
1056 | int maxfire; | 1045 | int maxfire; |
1057 | struct signal_struct *const sig = tsk->signal; | 1046 | struct signal_struct *const sig = tsk->signal; |
1058 | cputime_t utime, stime, ptime, virt_expires, prof_expires; | 1047 | cputime_t utime, ptime, virt_expires, prof_expires; |
1059 | unsigned long long sum_sched_runtime, sched_expires; | 1048 | unsigned long long sum_sched_runtime, sched_expires; |
1060 | struct task_struct *t; | ||
1061 | struct list_head *timers = sig->cpu_timers; | 1049 | struct list_head *timers = sig->cpu_timers; |
1050 | struct task_cputime cputime; | ||
1062 | 1051 | ||
1063 | /* | 1052 | /* |
1064 | * Don't sample the current process CPU clocks if there are no timers. | 1053 | * Don't sample the current process CPU clocks if there are no timers. |
@@ -1074,18 +1063,10 @@ static void check_process_timers(struct task_struct *tsk, | |||
1074 | /* | 1063 | /* |
1075 | * Collect the current process totals. | 1064 | * Collect the current process totals. |
1076 | */ | 1065 | */ |
1077 | utime = sig->utime; | 1066 | thread_group_cputime(tsk, &cputime); |
1078 | stime = sig->stime; | 1067 | utime = cputime.utime; |
1079 | sum_sched_runtime = sig->sum_sched_runtime; | 1068 | ptime = cputime_add(utime, cputime.stime); |
1080 | t = tsk; | 1069 | sum_sched_runtime = cputime.sum_exec_runtime; |
1081 | do { | ||
1082 | utime = cputime_add(utime, t->utime); | ||
1083 | stime = cputime_add(stime, t->stime); | ||
1084 | sum_sched_runtime += t->se.sum_exec_runtime; | ||
1085 | t = next_thread(t); | ||
1086 | } while (t != tsk); | ||
1087 | ptime = cputime_add(utime, stime); | ||
1088 | |||
1089 | maxfire = 20; | 1070 | maxfire = 20; |
1090 | prof_expires = cputime_zero; | 1071 | prof_expires = cputime_zero; |
1091 | while (!list_empty(timers)) { | 1072 | while (!list_empty(timers)) { |
@@ -1193,60 +1174,18 @@ static void check_process_timers(struct task_struct *tsk, | |||
1193 | } | 1174 | } |
1194 | } | 1175 | } |
1195 | 1176 | ||
1196 | if (!cputime_eq(prof_expires, cputime_zero) || | 1177 | if (!cputime_eq(prof_expires, cputime_zero) && |
1197 | !cputime_eq(virt_expires, cputime_zero) || | 1178 | (cputime_eq(sig->cputime_expires.prof_exp, cputime_zero) || |
1198 | sched_expires != 0) { | 1179 | cputime_gt(sig->cputime_expires.prof_exp, prof_expires))) |
1199 | /* | 1180 | sig->cputime_expires.prof_exp = prof_expires; |
1200 | * Rebalance the threads' expiry times for the remaining | 1181 | if (!cputime_eq(virt_expires, cputime_zero) && |
1201 | * process CPU timers. | 1182 | (cputime_eq(sig->cputime_expires.virt_exp, cputime_zero) || |
1202 | */ | 1183 | cputime_gt(sig->cputime_expires.virt_exp, virt_expires))) |
1203 | 1184 | sig->cputime_expires.virt_exp = virt_expires; | |
1204 | cputime_t prof_left, virt_left, ticks; | 1185 | if (sched_expires != 0 && |
1205 | unsigned long long sched_left, sched; | 1186 | (sig->cputime_expires.sched_exp == 0 || |
1206 | const unsigned int nthreads = atomic_read(&sig->live); | 1187 | sig->cputime_expires.sched_exp > sched_expires)) |
1207 | 1188 | sig->cputime_expires.sched_exp = sched_expires; | |
1208 | if (!nthreads) | ||
1209 | return; | ||
1210 | |||
1211 | prof_left = cputime_sub(prof_expires, utime); | ||
1212 | prof_left = cputime_sub(prof_left, stime); | ||
1213 | prof_left = cputime_div_non_zero(prof_left, nthreads); | ||
1214 | virt_left = cputime_sub(virt_expires, utime); | ||
1215 | virt_left = cputime_div_non_zero(virt_left, nthreads); | ||
1216 | if (sched_expires) { | ||
1217 | sched_left = sched_expires - sum_sched_runtime; | ||
1218 | do_div(sched_left, nthreads); | ||
1219 | sched_left = max_t(unsigned long long, sched_left, 1); | ||
1220 | } else { | ||
1221 | sched_left = 0; | ||
1222 | } | ||
1223 | t = tsk; | ||
1224 | do { | ||
1225 | if (unlikely(t->flags & PF_EXITING)) | ||
1226 | continue; | ||
1227 | |||
1228 | ticks = cputime_add(cputime_add(t->utime, t->stime), | ||
1229 | prof_left); | ||
1230 | if (!cputime_eq(prof_expires, cputime_zero) && | ||
1231 | (cputime_eq(t->it_prof_expires, cputime_zero) || | ||
1232 | cputime_gt(t->it_prof_expires, ticks))) { | ||
1233 | t->it_prof_expires = ticks; | ||
1234 | } | ||
1235 | |||
1236 | ticks = cputime_add(t->utime, virt_left); | ||
1237 | if (!cputime_eq(virt_expires, cputime_zero) && | ||
1238 | (cputime_eq(t->it_virt_expires, cputime_zero) || | ||
1239 | cputime_gt(t->it_virt_expires, ticks))) { | ||
1240 | t->it_virt_expires = ticks; | ||
1241 | } | ||
1242 | |||
1243 | sched = t->se.sum_exec_runtime + sched_left; | ||
1244 | if (sched_expires && (t->it_sched_expires == 0 || | ||
1245 | t->it_sched_expires > sched)) { | ||
1246 | t->it_sched_expires = sched; | ||
1247 | } | ||
1248 | } while ((t = next_thread(t)) != tsk); | ||
1249 | } | ||
1250 | } | 1189 | } |
1251 | 1190 | ||
1252 | /* | 1191 | /* |
@@ -1314,6 +1253,86 @@ out: | |||
1314 | ++timer->it_requeue_pending; | 1253 | ++timer->it_requeue_pending; |
1315 | } | 1254 | } |
1316 | 1255 | ||
1256 | /** | ||
1257 | * task_cputime_zero - Check a task_cputime struct for all zero fields. | ||
1258 | * | ||
1259 | * @cputime: The struct to compare. | ||
1260 | * | ||
1261 | * Checks @cputime to see if all fields are zero. Returns true if all fields | ||
1262 | * are zero, false if any field is nonzero. | ||
1263 | */ | ||
1264 | static inline int task_cputime_zero(const struct task_cputime *cputime) | ||
1265 | { | ||
1266 | if (cputime_eq(cputime->utime, cputime_zero) && | ||
1267 | cputime_eq(cputime->stime, cputime_zero) && | ||
1268 | cputime->sum_exec_runtime == 0) | ||
1269 | return 1; | ||
1270 | return 0; | ||
1271 | } | ||
1272 | |||
1273 | /** | ||
1274 | * task_cputime_expired - Compare two task_cputime entities. | ||
1275 | * | ||
1276 | * @sample: The task_cputime structure to be checked for expiration. | ||
1277 | * @expires: Expiration times, against which @sample will be checked. | ||
1278 | * | ||
1279 | * Checks @sample against @expires to see if any field of @sample has expired. | ||
1280 | * Returns true if any field of the former is greater than the corresponding | ||
1281 | * field of the latter if the latter field is set. Otherwise returns false. | ||
1282 | */ | ||
1283 | static inline int task_cputime_expired(const struct task_cputime *sample, | ||
1284 | const struct task_cputime *expires) | ||
1285 | { | ||
1286 | if (!cputime_eq(expires->utime, cputime_zero) && | ||
1287 | cputime_ge(sample->utime, expires->utime)) | ||
1288 | return 1; | ||
1289 | if (!cputime_eq(expires->stime, cputime_zero) && | ||
1290 | cputime_ge(cputime_add(sample->utime, sample->stime), | ||
1291 | expires->stime)) | ||
1292 | return 1; | ||
1293 | if (expires->sum_exec_runtime != 0 && | ||
1294 | sample->sum_exec_runtime >= expires->sum_exec_runtime) | ||
1295 | return 1; | ||
1296 | return 0; | ||
1297 | } | ||
1298 | |||
1299 | /** | ||
1300 | * fastpath_timer_check - POSIX CPU timers fast path. | ||
1301 | * | ||
1302 | * @tsk: The task (thread) being checked. | ||
1303 | * | ||
1304 | * Check the task and thread group timers. If both are zero (there are no | ||
1305 | * timers set) return false. Otherwise snapshot the task and thread group | ||
1306 | * timers and compare them with the corresponding expiration times. Return | ||
1307 | * true if a timer has expired, else return false. | ||
1308 | */ | ||
1309 | static inline int fastpath_timer_check(struct task_struct *tsk) | ||
1310 | { | ||
1311 | struct signal_struct *sig = tsk->signal; | ||
1312 | |||
1313 | if (unlikely(!sig)) | ||
1314 | return 0; | ||
1315 | |||
1316 | if (!task_cputime_zero(&tsk->cputime_expires)) { | ||
1317 | struct task_cputime task_sample = { | ||
1318 | .utime = tsk->utime, | ||
1319 | .stime = tsk->stime, | ||
1320 | .sum_exec_runtime = tsk->se.sum_exec_runtime | ||
1321 | }; | ||
1322 | |||
1323 | if (task_cputime_expired(&task_sample, &tsk->cputime_expires)) | ||
1324 | return 1; | ||
1325 | } | ||
1326 | if (!task_cputime_zero(&sig->cputime_expires)) { | ||
1327 | struct task_cputime group_sample; | ||
1328 | |||
1329 | thread_group_cputime(tsk, &group_sample); | ||
1330 | if (task_cputime_expired(&group_sample, &sig->cputime_expires)) | ||
1331 | return 1; | ||
1332 | } | ||
1333 | return 0; | ||
1334 | } | ||
1335 | |||
1317 | /* | 1336 | /* |
1318 | * This is called from the timer interrupt handler. The irq handler has | 1337 | * This is called from the timer interrupt handler. The irq handler has |
1319 | * already updated our counts. We need to check if any timers fire now. | 1338 | * already updated our counts. We need to check if any timers fire now. |
@@ -1326,42 +1345,31 @@ void run_posix_cpu_timers(struct task_struct *tsk) | |||
1326 | 1345 | ||
1327 | BUG_ON(!irqs_disabled()); | 1346 | BUG_ON(!irqs_disabled()); |
1328 | 1347 | ||
1329 | #define UNEXPIRED(clock) \ | 1348 | /* |
1330 | (cputime_eq(tsk->it_##clock##_expires, cputime_zero) || \ | 1349 | * The fast path checks that there are no expired thread or thread |
1331 | cputime_lt(clock##_ticks(tsk), tsk->it_##clock##_expires)) | 1350 | * group timers. If that's so, just return. |
1332 | 1351 | */ | |
1333 | if (UNEXPIRED(prof) && UNEXPIRED(virt) && | 1352 | if (!fastpath_timer_check(tsk)) |
1334 | (tsk->it_sched_expires == 0 || | ||
1335 | tsk->se.sum_exec_runtime < tsk->it_sched_expires)) | ||
1336 | return; | 1353 | return; |
1337 | 1354 | ||
1338 | #undef UNEXPIRED | 1355 | spin_lock(&tsk->sighand->siglock); |
1339 | |||
1340 | /* | 1356 | /* |
1341 | * Double-check with locks held. | 1357 | * Here we take off tsk->signal->cpu_timers[N] and |
1358 | * tsk->cpu_timers[N] all the timers that are firing, and | ||
1359 | * put them on the firing list. | ||
1342 | */ | 1360 | */ |
1343 | read_lock(&tasklist_lock); | 1361 | check_thread_timers(tsk, &firing); |
1344 | if (likely(tsk->signal != NULL)) { | 1362 | check_process_timers(tsk, &firing); |
1345 | spin_lock(&tsk->sighand->siglock); | ||
1346 | 1363 | ||
1347 | /* | 1364 | /* |
1348 | * Here we take off tsk->cpu_timers[N] and tsk->signal->cpu_timers[N] | 1365 | * We must release these locks before taking any timer's lock. |
1349 | * all the timers that are firing, and put them on the firing list. | 1366 | * There is a potential race with timer deletion here, as the |
1350 | */ | 1367 | * siglock now protects our private firing list. We have set |
1351 | check_thread_timers(tsk, &firing); | 1368 | * the firing flag in each timer, so that a deletion attempt |
1352 | check_process_timers(tsk, &firing); | 1369 | * that gets the timer lock before we do will give it up and |
1353 | 1370 | * spin until we've taken care of that timer below. | |
1354 | /* | 1371 | */ |
1355 | * We must release these locks before taking any timer's lock. | 1372 | spin_unlock(&tsk->sighand->siglock); |
1356 | * There is a potential race with timer deletion here, as the | ||
1357 | * siglock now protects our private firing list. We have set | ||
1358 | * the firing flag in each timer, so that a deletion attempt | ||
1359 | * that gets the timer lock before we do will give it up and | ||
1360 | * spin until we've taken care of that timer below. | ||
1361 | */ | ||
1362 | spin_unlock(&tsk->sighand->siglock); | ||
1363 | } | ||
1364 | read_unlock(&tasklist_lock); | ||
1365 | 1373 | ||
1366 | /* | 1374 | /* |
1367 | * Now that all the timers on our list have the firing flag, | 1375 | * Now that all the timers on our list have the firing flag, |
@@ -1389,10 +1397,9 @@ void run_posix_cpu_timers(struct task_struct *tsk) | |||
1389 | 1397 | ||
1390 | /* | 1398 | /* |
1391 | * Set one of the process-wide special case CPU timers. | 1399 | * Set one of the process-wide special case CPU timers. |
1392 | * The tasklist_lock and tsk->sighand->siglock must be held by the caller. | 1400 | * The tsk->sighand->siglock must be held by the caller. |
1393 | * The oldval argument is null for the RLIMIT_CPU timer, where *newval is | 1401 | * The *newval argument is relative and we update it to be absolute, *oldval |
1394 | * absolute; non-null for ITIMER_*, where *newval is relative and we update | 1402 | * is absolute and we update it to be relative. |
1395 | * it to be absolute, *oldval is absolute and we update it to be relative. | ||
1396 | */ | 1403 | */ |
1397 | void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx, | 1404 | void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx, |
1398 | cputime_t *newval, cputime_t *oldval) | 1405 | cputime_t *newval, cputime_t *oldval) |
@@ -1401,7 +1408,7 @@ void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx, | |||
1401 | struct list_head *head; | 1408 | struct list_head *head; |
1402 | 1409 | ||
1403 | BUG_ON(clock_idx == CPUCLOCK_SCHED); | 1410 | BUG_ON(clock_idx == CPUCLOCK_SCHED); |
1404 | cpu_clock_sample_group_locked(clock_idx, tsk, &now); | 1411 | cpu_clock_sample_group(clock_idx, tsk, &now); |
1405 | 1412 | ||
1406 | if (oldval) { | 1413 | if (oldval) { |
1407 | if (!cputime_eq(*oldval, cputime_zero)) { | 1414 | if (!cputime_eq(*oldval, cputime_zero)) { |
@@ -1435,13 +1442,14 @@ void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx, | |||
1435 | cputime_ge(list_first_entry(head, | 1442 | cputime_ge(list_first_entry(head, |
1436 | struct cpu_timer_list, entry)->expires.cpu, | 1443 | struct cpu_timer_list, entry)->expires.cpu, |
1437 | *newval)) { | 1444 | *newval)) { |
1438 | /* | 1445 | switch (clock_idx) { |
1439 | * Rejigger each thread's expiry time so that one will | 1446 | case CPUCLOCK_PROF: |
1440 | * notice before we hit the process-cumulative expiry time. | 1447 | tsk->signal->cputime_expires.prof_exp = *newval; |
1441 | */ | 1448 | break; |
1442 | union cpu_time_count expires = { .sched = 0 }; | 1449 | case CPUCLOCK_VIRT: |
1443 | expires.cpu = *newval; | 1450 | tsk->signal->cputime_expires.virt_exp = *newval; |
1444 | process_timer_rebalance(tsk, clock_idx, expires, now); | 1451 | break; |
1452 | } | ||
1445 | } | 1453 | } |
1446 | } | 1454 | } |
1447 | 1455 | ||
diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c index 5131e5471169..b931d7cedbfa 100644 --- a/kernel/posix-timers.c +++ b/kernel/posix-timers.c | |||
@@ -223,6 +223,15 @@ static int posix_ktime_get_ts(clockid_t which_clock, struct timespec *tp) | |||
223 | } | 223 | } |
224 | 224 | ||
225 | /* | 225 | /* |
226 | * Get monotonic time for posix timers | ||
227 | */ | ||
228 | static int posix_get_monotonic_raw(clockid_t which_clock, struct timespec *tp) | ||
229 | { | ||
230 | getrawmonotonic(tp); | ||
231 | return 0; | ||
232 | } | ||
233 | |||
234 | /* | ||
226 | * Initialize everything, well, just everything in Posix clocks/timers ;) | 235 | * Initialize everything, well, just everything in Posix clocks/timers ;) |
227 | */ | 236 | */ |
228 | static __init int init_posix_timers(void) | 237 | static __init int init_posix_timers(void) |
@@ -235,9 +244,15 @@ static __init int init_posix_timers(void) | |||
235 | .clock_get = posix_ktime_get_ts, | 244 | .clock_get = posix_ktime_get_ts, |
236 | .clock_set = do_posix_clock_nosettime, | 245 | .clock_set = do_posix_clock_nosettime, |
237 | }; | 246 | }; |
247 | struct k_clock clock_monotonic_raw = { | ||
248 | .clock_getres = hrtimer_get_res, | ||
249 | .clock_get = posix_get_monotonic_raw, | ||
250 | .clock_set = do_posix_clock_nosettime, | ||
251 | }; | ||
238 | 252 | ||
239 | register_posix_clock(CLOCK_REALTIME, &clock_realtime); | 253 | register_posix_clock(CLOCK_REALTIME, &clock_realtime); |
240 | register_posix_clock(CLOCK_MONOTONIC, &clock_monotonic); | 254 | register_posix_clock(CLOCK_MONOTONIC, &clock_monotonic); |
255 | register_posix_clock(CLOCK_MONOTONIC_RAW, &clock_monotonic_raw); | ||
241 | 256 | ||
242 | posix_timers_cache = kmem_cache_create("posix_timers_cache", | 257 | posix_timers_cache = kmem_cache_create("posix_timers_cache", |
243 | sizeof (struct k_itimer), 0, SLAB_PANIC, | 258 | sizeof (struct k_itimer), 0, SLAB_PANIC, |
@@ -298,6 +313,7 @@ void do_schedule_next_timer(struct siginfo *info) | |||
298 | 313 | ||
299 | int posix_timer_event(struct k_itimer *timr, int si_private) | 314 | int posix_timer_event(struct k_itimer *timr, int si_private) |
300 | { | 315 | { |
316 | int shared, ret; | ||
301 | /* | 317 | /* |
302 | * FIXME: if ->sigq is queued we can race with | 318 | * FIXME: if ->sigq is queued we can race with |
303 | * dequeue_signal()->do_schedule_next_timer(). | 319 | * dequeue_signal()->do_schedule_next_timer(). |
@@ -311,25 +327,10 @@ int posix_timer_event(struct k_itimer *timr, int si_private) | |||
311 | */ | 327 | */ |
312 | timr->sigq->info.si_sys_private = si_private; | 328 | timr->sigq->info.si_sys_private = si_private; |
313 | 329 | ||
314 | timr->sigq->info.si_signo = timr->it_sigev_signo; | 330 | shared = !(timr->it_sigev_notify & SIGEV_THREAD_ID); |
315 | timr->sigq->info.si_code = SI_TIMER; | 331 | ret = send_sigqueue(timr->sigq, timr->it_process, shared); |
316 | timr->sigq->info.si_tid = timr->it_id; | 332 | /* If we failed to send the signal the timer stops. */ |
317 | timr->sigq->info.si_value = timr->it_sigev_value; | 333 | return ret > 0; |
318 | |||
319 | if (timr->it_sigev_notify & SIGEV_THREAD_ID) { | ||
320 | struct task_struct *leader; | ||
321 | int ret = send_sigqueue(timr->sigq, timr->it_process, 0); | ||
322 | |||
323 | if (likely(ret >= 0)) | ||
324 | return ret; | ||
325 | |||
326 | timr->it_sigev_notify = SIGEV_SIGNAL; | ||
327 | leader = timr->it_process->group_leader; | ||
328 | put_task_struct(timr->it_process); | ||
329 | timr->it_process = leader; | ||
330 | } | ||
331 | |||
332 | return send_sigqueue(timr->sigq, timr->it_process, 1); | ||
333 | } | 334 | } |
334 | EXPORT_SYMBOL_GPL(posix_timer_event); | 335 | EXPORT_SYMBOL_GPL(posix_timer_event); |
335 | 336 | ||
@@ -468,11 +469,9 @@ sys_timer_create(const clockid_t which_clock, | |||
468 | struct sigevent __user *timer_event_spec, | 469 | struct sigevent __user *timer_event_spec, |
469 | timer_t __user * created_timer_id) | 470 | timer_t __user * created_timer_id) |
470 | { | 471 | { |
471 | int error = 0; | 472 | struct k_itimer *new_timer; |
472 | struct k_itimer *new_timer = NULL; | 473 | int error, new_timer_id; |
473 | int new_timer_id; | 474 | struct task_struct *process; |
474 | struct task_struct *process = NULL; | ||
475 | unsigned long flags; | ||
476 | sigevent_t event; | 475 | sigevent_t event; |
477 | int it_id_set = IT_ID_NOT_SET; | 476 | int it_id_set = IT_ID_NOT_SET; |
478 | 477 | ||
@@ -490,12 +489,11 @@ sys_timer_create(const clockid_t which_clock, | |||
490 | goto out; | 489 | goto out; |
491 | } | 490 | } |
492 | spin_lock_irq(&idr_lock); | 491 | spin_lock_irq(&idr_lock); |
493 | error = idr_get_new(&posix_timers_id, (void *) new_timer, | 492 | error = idr_get_new(&posix_timers_id, new_timer, &new_timer_id); |
494 | &new_timer_id); | ||
495 | spin_unlock_irq(&idr_lock); | 493 | spin_unlock_irq(&idr_lock); |
496 | if (error == -EAGAIN) | 494 | if (error) { |
497 | goto retry; | 495 | if (error == -EAGAIN) |
498 | else if (error) { | 496 | goto retry; |
499 | /* | 497 | /* |
500 | * Weird looking, but we return EAGAIN if the IDR is | 498 | * Weird looking, but we return EAGAIN if the IDR is |
501 | * full (proper POSIX return value for this) | 499 | * full (proper POSIX return value for this) |
@@ -526,67 +524,43 @@ sys_timer_create(const clockid_t which_clock, | |||
526 | error = -EFAULT; | 524 | error = -EFAULT; |
527 | goto out; | 525 | goto out; |
528 | } | 526 | } |
529 | new_timer->it_sigev_notify = event.sigev_notify; | 527 | rcu_read_lock(); |
530 | new_timer->it_sigev_signo = event.sigev_signo; | 528 | process = good_sigevent(&event); |
531 | new_timer->it_sigev_value = event.sigev_value; | 529 | if (process) |
532 | 530 | get_task_struct(process); | |
533 | read_lock(&tasklist_lock); | 531 | rcu_read_unlock(); |
534 | if ((process = good_sigevent(&event))) { | ||
535 | /* | ||
536 | * We may be setting up this process for another | ||
537 | * thread. It may be exiting. To catch this | ||
538 | * case the we check the PF_EXITING flag. If | ||
539 | * the flag is not set, the siglock will catch | ||
540 | * him before it is too late (in exit_itimers). | ||
541 | * | ||
542 | * The exec case is a bit more invloved but easy | ||
543 | * to code. If the process is in our thread | ||
544 | * group (and it must be or we would not allow | ||
545 | * it here) and is doing an exec, it will cause | ||
546 | * us to be killed. In this case it will wait | ||
547 | * for us to die which means we can finish this | ||
548 | * linkage with our last gasp. I.e. no code :) | ||
549 | */ | ||
550 | spin_lock_irqsave(&process->sighand->siglock, flags); | ||
551 | if (!(process->flags & PF_EXITING)) { | ||
552 | new_timer->it_process = process; | ||
553 | list_add(&new_timer->list, | ||
554 | &process->signal->posix_timers); | ||
555 | if (new_timer->it_sigev_notify == (SIGEV_SIGNAL|SIGEV_THREAD_ID)) | ||
556 | get_task_struct(process); | ||
557 | spin_unlock_irqrestore(&process->sighand->siglock, flags); | ||
558 | } else { | ||
559 | spin_unlock_irqrestore(&process->sighand->siglock, flags); | ||
560 | process = NULL; | ||
561 | } | ||
562 | } | ||
563 | read_unlock(&tasklist_lock); | ||
564 | if (!process) { | 532 | if (!process) { |
565 | error = -EINVAL; | 533 | error = -EINVAL; |
566 | goto out; | 534 | goto out; |
567 | } | 535 | } |
568 | } else { | 536 | } else { |
569 | new_timer->it_sigev_notify = SIGEV_SIGNAL; | 537 | event.sigev_notify = SIGEV_SIGNAL; |
570 | new_timer->it_sigev_signo = SIGALRM; | 538 | event.sigev_signo = SIGALRM; |
571 | new_timer->it_sigev_value.sival_int = new_timer->it_id; | 539 | event.sigev_value.sival_int = new_timer->it_id; |
572 | process = current->group_leader; | 540 | process = current->group_leader; |
573 | spin_lock_irqsave(&process->sighand->siglock, flags); | 541 | get_task_struct(process); |
574 | new_timer->it_process = process; | ||
575 | list_add(&new_timer->list, &process->signal->posix_timers); | ||
576 | spin_unlock_irqrestore(&process->sighand->siglock, flags); | ||
577 | } | 542 | } |
578 | 543 | ||
544 | new_timer->it_sigev_notify = event.sigev_notify; | ||
545 | new_timer->sigq->info.si_signo = event.sigev_signo; | ||
546 | new_timer->sigq->info.si_value = event.sigev_value; | ||
547 | new_timer->sigq->info.si_tid = new_timer->it_id; | ||
548 | new_timer->sigq->info.si_code = SI_TIMER; | ||
549 | |||
550 | spin_lock_irq(¤t->sighand->siglock); | ||
551 | new_timer->it_process = process; | ||
552 | list_add(&new_timer->list, ¤t->signal->posix_timers); | ||
553 | spin_unlock_irq(¤t->sighand->siglock); | ||
554 | |||
555 | return 0; | ||
579 | /* | 556 | /* |
580 | * In the case of the timer belonging to another task, after | 557 | * In the case of the timer belonging to another task, after |
581 | * the task is unlocked, the timer is owned by the other task | 558 | * the task is unlocked, the timer is owned by the other task |
582 | * and may cease to exist at any time. Don't use or modify | 559 | * and may cease to exist at any time. Don't use or modify |
583 | * new_timer after the unlock call. | 560 | * new_timer after the unlock call. |
584 | */ | 561 | */ |
585 | |||
586 | out: | 562 | out: |
587 | if (error) | 563 | release_posix_timer(new_timer, it_id_set); |
588 | release_posix_timer(new_timer, it_id_set); | ||
589 | |||
590 | return error; | 564 | return error; |
591 | } | 565 | } |
592 | 566 | ||
@@ -597,7 +571,7 @@ out: | |||
597 | * the find to the timer lock. To avoid a dead lock, the timer id MUST | 571 | * the find to the timer lock. To avoid a dead lock, the timer id MUST |
598 | * be release with out holding the timer lock. | 572 | * be release with out holding the timer lock. |
599 | */ | 573 | */ |
600 | static struct k_itimer * lock_timer(timer_t timer_id, unsigned long *flags) | 574 | static struct k_itimer *lock_timer(timer_t timer_id, unsigned long *flags) |
601 | { | 575 | { |
602 | struct k_itimer *timr; | 576 | struct k_itimer *timr; |
603 | /* | 577 | /* |
@@ -605,23 +579,20 @@ static struct k_itimer * lock_timer(timer_t timer_id, unsigned long *flags) | |||
605 | * flags part over to the timer lock. Must not let interrupts in | 579 | * flags part over to the timer lock. Must not let interrupts in |
606 | * while we are moving the lock. | 580 | * while we are moving the lock. |
607 | */ | 581 | */ |
608 | |||
609 | spin_lock_irqsave(&idr_lock, *flags); | 582 | spin_lock_irqsave(&idr_lock, *flags); |
610 | timr = (struct k_itimer *) idr_find(&posix_timers_id, (int) timer_id); | 583 | timr = idr_find(&posix_timers_id, (int)timer_id); |
611 | if (timr) { | 584 | if (timr) { |
612 | spin_lock(&timr->it_lock); | 585 | spin_lock(&timr->it_lock); |
613 | 586 | if (timr->it_process && | |
614 | if ((timr->it_id != timer_id) || !(timr->it_process) || | 587 | same_thread_group(timr->it_process, current)) { |
615 | !same_thread_group(timr->it_process, current)) { | ||
616 | spin_unlock(&timr->it_lock); | ||
617 | spin_unlock_irqrestore(&idr_lock, *flags); | ||
618 | timr = NULL; | ||
619 | } else | ||
620 | spin_unlock(&idr_lock); | 588 | spin_unlock(&idr_lock); |
621 | } else | 589 | return timr; |
622 | spin_unlock_irqrestore(&idr_lock, *flags); | 590 | } |
591 | spin_unlock(&timr->it_lock); | ||
592 | } | ||
593 | spin_unlock_irqrestore(&idr_lock, *flags); | ||
623 | 594 | ||
624 | return timr; | 595 | return NULL; |
625 | } | 596 | } |
626 | 597 | ||
627 | /* | 598 | /* |
@@ -862,8 +833,7 @@ retry_delete: | |||
862 | * This keeps any tasks waiting on the spin lock from thinking | 833 | * This keeps any tasks waiting on the spin lock from thinking |
863 | * they got something (see the lock code above). | 834 | * they got something (see the lock code above). |
864 | */ | 835 | */ |
865 | if (timer->it_sigev_notify == (SIGEV_SIGNAL|SIGEV_THREAD_ID)) | 836 | put_task_struct(timer->it_process); |
866 | put_task_struct(timer->it_process); | ||
867 | timer->it_process = NULL; | 837 | timer->it_process = NULL; |
868 | 838 | ||
869 | unlock_timer(timer, flags); | 839 | unlock_timer(timer, flags); |
@@ -890,8 +860,7 @@ retry_delete: | |||
890 | * This keeps any tasks waiting on the spin lock from thinking | 860 | * This keeps any tasks waiting on the spin lock from thinking |
891 | * they got something (see the lock code above). | 861 | * they got something (see the lock code above). |
892 | */ | 862 | */ |
893 | if (timer->it_sigev_notify == (SIGEV_SIGNAL|SIGEV_THREAD_ID)) | 863 | put_task_struct(timer->it_process); |
894 | put_task_struct(timer->it_process); | ||
895 | timer->it_process = NULL; | 864 | timer->it_process = NULL; |
896 | 865 | ||
897 | unlock_timer(timer, flags); | 866 | unlock_timer(timer, flags); |
diff --git a/kernel/power/process.c b/kernel/power/process.c index 278946aecaf0..ca634019497a 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c | |||
@@ -28,121 +28,6 @@ static inline int freezeable(struct task_struct * p) | |||
28 | return 1; | 28 | return 1; |
29 | } | 29 | } |
30 | 30 | ||
31 | /* | ||
32 | * freezing is complete, mark current process as frozen | ||
33 | */ | ||
34 | static inline void frozen_process(void) | ||
35 | { | ||
36 | if (!unlikely(current->flags & PF_NOFREEZE)) { | ||
37 | current->flags |= PF_FROZEN; | ||
38 | wmb(); | ||
39 | } | ||
40 | clear_freeze_flag(current); | ||
41 | } | ||
42 | |||
43 | /* Refrigerator is place where frozen processes are stored :-). */ | ||
44 | void refrigerator(void) | ||
45 | { | ||
46 | /* Hmm, should we be allowed to suspend when there are realtime | ||
47 | processes around? */ | ||
48 | long save; | ||
49 | |||
50 | task_lock(current); | ||
51 | if (freezing(current)) { | ||
52 | frozen_process(); | ||
53 | task_unlock(current); | ||
54 | } else { | ||
55 | task_unlock(current); | ||
56 | return; | ||
57 | } | ||
58 | save = current->state; | ||
59 | pr_debug("%s entered refrigerator\n", current->comm); | ||
60 | |||
61 | spin_lock_irq(¤t->sighand->siglock); | ||
62 | recalc_sigpending(); /* We sent fake signal, clean it up */ | ||
63 | spin_unlock_irq(¤t->sighand->siglock); | ||
64 | |||
65 | for (;;) { | ||
66 | set_current_state(TASK_UNINTERRUPTIBLE); | ||
67 | if (!frozen(current)) | ||
68 | break; | ||
69 | schedule(); | ||
70 | } | ||
71 | pr_debug("%s left refrigerator\n", current->comm); | ||
72 | __set_current_state(save); | ||
73 | } | ||
74 | |||
75 | static void fake_signal_wake_up(struct task_struct *p) | ||
76 | { | ||
77 | unsigned long flags; | ||
78 | |||
79 | spin_lock_irqsave(&p->sighand->siglock, flags); | ||
80 | signal_wake_up(p, 0); | ||
81 | spin_unlock_irqrestore(&p->sighand->siglock, flags); | ||
82 | } | ||
83 | |||
84 | static inline bool should_send_signal(struct task_struct *p) | ||
85 | { | ||
86 | return !(p->flags & PF_FREEZER_NOSIG); | ||
87 | } | ||
88 | |||
89 | /** | ||
90 | * freeze_task - send a freeze request to given task | ||
91 | * @p: task to send the request to | ||
92 | * @sig_only: if set, the request will only be sent if the task has the | ||
93 | * PF_FREEZER_NOSIG flag unset | ||
94 | * Return value: 'false', if @sig_only is set and the task has | ||
95 | * PF_FREEZER_NOSIG set or the task is frozen, 'true', otherwise | ||
96 | * | ||
97 | * The freeze request is sent by setting the tasks's TIF_FREEZE flag and | ||
98 | * either sending a fake signal to it or waking it up, depending on whether | ||
99 | * or not it has PF_FREEZER_NOSIG set. If @sig_only is set and the task | ||
100 | * has PF_FREEZER_NOSIG set (ie. it is a typical kernel thread), its | ||
101 | * TIF_FREEZE flag will not be set. | ||
102 | */ | ||
103 | static bool freeze_task(struct task_struct *p, bool sig_only) | ||
104 | { | ||
105 | /* | ||
106 | * We first check if the task is freezing and next if it has already | ||
107 | * been frozen to avoid the race with frozen_process() which first marks | ||
108 | * the task as frozen and next clears its TIF_FREEZE. | ||
109 | */ | ||
110 | if (!freezing(p)) { | ||
111 | rmb(); | ||
112 | if (frozen(p)) | ||
113 | return false; | ||
114 | |||
115 | if (!sig_only || should_send_signal(p)) | ||
116 | set_freeze_flag(p); | ||
117 | else | ||
118 | return false; | ||
119 | } | ||
120 | |||
121 | if (should_send_signal(p)) { | ||
122 | if (!signal_pending(p)) | ||
123 | fake_signal_wake_up(p); | ||
124 | } else if (sig_only) { | ||
125 | return false; | ||
126 | } else { | ||
127 | wake_up_state(p, TASK_INTERRUPTIBLE); | ||
128 | } | ||
129 | |||
130 | return true; | ||
131 | } | ||
132 | |||
133 | static void cancel_freezing(struct task_struct *p) | ||
134 | { | ||
135 | unsigned long flags; | ||
136 | |||
137 | if (freezing(p)) { | ||
138 | pr_debug(" clean up: %s\n", p->comm); | ||
139 | clear_freeze_flag(p); | ||
140 | spin_lock_irqsave(&p->sighand->siglock, flags); | ||
141 | recalc_sigpending_and_wake(p); | ||
142 | spin_unlock_irqrestore(&p->sighand->siglock, flags); | ||
143 | } | ||
144 | } | ||
145 | |||
146 | static int try_to_freeze_tasks(bool sig_only) | 31 | static int try_to_freeze_tasks(bool sig_only) |
147 | { | 32 | { |
148 | struct task_struct *g, *p; | 33 | struct task_struct *g, *p; |
@@ -250,6 +135,9 @@ static void thaw_tasks(bool nosig_only) | |||
250 | if (nosig_only && should_send_signal(p)) | 135 | if (nosig_only && should_send_signal(p)) |
251 | continue; | 136 | continue; |
252 | 137 | ||
138 | if (cgroup_frozen(p)) | ||
139 | continue; | ||
140 | |||
253 | thaw_process(p); | 141 | thaw_process(p); |
254 | } while_each_thread(g, p); | 142 | } while_each_thread(g, p); |
255 | read_unlock(&tasklist_lock); | 143 | read_unlock(&tasklist_lock); |
@@ -264,4 +152,3 @@ void thaw_processes(void) | |||
264 | printk("done.\n"); | 152 | printk("done.\n"); |
265 | } | 153 | } |
266 | 154 | ||
267 | EXPORT_SYMBOL(refrigerator); | ||
diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 356699a96d56..1e68e4c39e2c 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c | |||
@@ -45,7 +45,7 @@ void __ptrace_link(struct task_struct *child, struct task_struct *new_parent) | |||
45 | * TASK_TRACED, resume it now. | 45 | * TASK_TRACED, resume it now. |
46 | * Requires that irqs be disabled. | 46 | * Requires that irqs be disabled. |
47 | */ | 47 | */ |
48 | void ptrace_untrace(struct task_struct *child) | 48 | static void ptrace_untrace(struct task_struct *child) |
49 | { | 49 | { |
50 | spin_lock(&child->sighand->siglock); | 50 | spin_lock(&child->sighand->siglock); |
51 | if (task_is_traced(child)) { | 51 | if (task_is_traced(child)) { |
diff --git a/kernel/rcupreempt.c b/kernel/rcupreempt.c index ca4bbbe04aa4..59236e8b9daa 100644 --- a/kernel/rcupreempt.c +++ b/kernel/rcupreempt.c | |||
@@ -54,9 +54,9 @@ | |||
54 | #include <linux/cpu.h> | 54 | #include <linux/cpu.h> |
55 | #include <linux/random.h> | 55 | #include <linux/random.h> |
56 | #include <linux/delay.h> | 56 | #include <linux/delay.h> |
57 | #include <linux/byteorder/swabb.h> | ||
58 | #include <linux/cpumask.h> | 57 | #include <linux/cpumask.h> |
59 | #include <linux/rcupreempt_trace.h> | 58 | #include <linux/rcupreempt_trace.h> |
59 | #include <asm/byteorder.h> | ||
60 | 60 | ||
61 | /* | 61 | /* |
62 | * PREEMPT_RCU data structures. | 62 | * PREEMPT_RCU data structures. |
diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c index 90b5b123f7a1..85cb90588a55 100644 --- a/kernel/rcutorture.c +++ b/kernel/rcutorture.c | |||
@@ -42,10 +42,10 @@ | |||
42 | #include <linux/freezer.h> | 42 | #include <linux/freezer.h> |
43 | #include <linux/cpu.h> | 43 | #include <linux/cpu.h> |
44 | #include <linux/delay.h> | 44 | #include <linux/delay.h> |
45 | #include <linux/byteorder/swabb.h> | ||
46 | #include <linux/stat.h> | 45 | #include <linux/stat.h> |
47 | #include <linux/srcu.h> | 46 | #include <linux/srcu.h> |
48 | #include <linux/slab.h> | 47 | #include <linux/slab.h> |
48 | #include <asm/byteorder.h> | ||
49 | 49 | ||
50 | MODULE_LICENSE("GPL"); | 50 | MODULE_LICENSE("GPL"); |
51 | MODULE_AUTHOR("Paul E. McKenney <paulmck@us.ibm.com> and " | 51 | MODULE_AUTHOR("Paul E. McKenney <paulmck@us.ibm.com> and " |
diff --git a/kernel/sched.c b/kernel/sched.c index 3d1ad130c24e..d906f72b42d2 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -4047,23 +4047,26 @@ DEFINE_PER_CPU(struct kernel_stat, kstat); | |||
4047 | EXPORT_PER_CPU_SYMBOL(kstat); | 4047 | EXPORT_PER_CPU_SYMBOL(kstat); |
4048 | 4048 | ||
4049 | /* | 4049 | /* |
4050 | * Return p->sum_exec_runtime plus any more ns on the sched_clock | 4050 | * Return any ns on the sched_clock that have not yet been banked in |
4051 | * that have not yet been banked in case the task is currently running. | 4051 | * @p in case that task is currently running. |
4052 | */ | 4052 | */ |
4053 | unsigned long long task_sched_runtime(struct task_struct *p) | 4053 | unsigned long long task_delta_exec(struct task_struct *p) |
4054 | { | 4054 | { |
4055 | unsigned long flags; | 4055 | unsigned long flags; |
4056 | u64 ns, delta_exec; | ||
4057 | struct rq *rq; | 4056 | struct rq *rq; |
4057 | u64 ns = 0; | ||
4058 | 4058 | ||
4059 | rq = task_rq_lock(p, &flags); | 4059 | rq = task_rq_lock(p, &flags); |
4060 | ns = p->se.sum_exec_runtime; | 4060 | |
4061 | if (task_current(rq, p)) { | 4061 | if (task_current(rq, p)) { |
4062 | u64 delta_exec; | ||
4063 | |||
4062 | update_rq_clock(rq); | 4064 | update_rq_clock(rq); |
4063 | delta_exec = rq->clock - p->se.exec_start; | 4065 | delta_exec = rq->clock - p->se.exec_start; |
4064 | if ((s64)delta_exec > 0) | 4066 | if ((s64)delta_exec > 0) |
4065 | ns += delta_exec; | 4067 | ns = delta_exec; |
4066 | } | 4068 | } |
4069 | |||
4067 | task_rq_unlock(rq, &flags); | 4070 | task_rq_unlock(rq, &flags); |
4068 | 4071 | ||
4069 | return ns; | 4072 | return ns; |
@@ -4080,6 +4083,7 @@ void account_user_time(struct task_struct *p, cputime_t cputime) | |||
4080 | cputime64_t tmp; | 4083 | cputime64_t tmp; |
4081 | 4084 | ||
4082 | p->utime = cputime_add(p->utime, cputime); | 4085 | p->utime = cputime_add(p->utime, cputime); |
4086 | account_group_user_time(p, cputime); | ||
4083 | 4087 | ||
4084 | /* Add user time to cpustat. */ | 4088 | /* Add user time to cpustat. */ |
4085 | tmp = cputime_to_cputime64(cputime); | 4089 | tmp = cputime_to_cputime64(cputime); |
@@ -4104,6 +4108,7 @@ static void account_guest_time(struct task_struct *p, cputime_t cputime) | |||
4104 | tmp = cputime_to_cputime64(cputime); | 4108 | tmp = cputime_to_cputime64(cputime); |
4105 | 4109 | ||
4106 | p->utime = cputime_add(p->utime, cputime); | 4110 | p->utime = cputime_add(p->utime, cputime); |
4111 | account_group_user_time(p, cputime); | ||
4107 | p->gtime = cputime_add(p->gtime, cputime); | 4112 | p->gtime = cputime_add(p->gtime, cputime); |
4108 | 4113 | ||
4109 | cpustat->user = cputime64_add(cpustat->user, tmp); | 4114 | cpustat->user = cputime64_add(cpustat->user, tmp); |
@@ -4139,6 +4144,7 @@ void account_system_time(struct task_struct *p, int hardirq_offset, | |||
4139 | } | 4144 | } |
4140 | 4145 | ||
4141 | p->stime = cputime_add(p->stime, cputime); | 4146 | p->stime = cputime_add(p->stime, cputime); |
4147 | account_group_system_time(p, cputime); | ||
4142 | 4148 | ||
4143 | /* Add system time to cpustat. */ | 4149 | /* Add system time to cpustat. */ |
4144 | tmp = cputime_to_cputime64(cputime); | 4150 | tmp = cputime_to_cputime64(cputime); |
@@ -4180,6 +4186,7 @@ void account_steal_time(struct task_struct *p, cputime_t steal) | |||
4180 | 4186 | ||
4181 | if (p == rq->idle) { | 4187 | if (p == rq->idle) { |
4182 | p->stime = cputime_add(p->stime, steal); | 4188 | p->stime = cputime_add(p->stime, steal); |
4189 | account_group_system_time(p, steal); | ||
4183 | if (atomic_read(&rq->nr_iowait) > 0) | 4190 | if (atomic_read(&rq->nr_iowait) > 0) |
4184 | cpustat->iowait = cputime64_add(cpustat->iowait, tmp); | 4191 | cpustat->iowait = cputime64_add(cpustat->iowait, tmp); |
4185 | else | 4192 | else |
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 18fd17172eb6..f604dae71316 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c | |||
@@ -449,6 +449,7 @@ static void update_curr(struct cfs_rq *cfs_rq) | |||
449 | struct task_struct *curtask = task_of(curr); | 449 | struct task_struct *curtask = task_of(curr); |
450 | 450 | ||
451 | cpuacct_charge(curtask, delta_exec); | 451 | cpuacct_charge(curtask, delta_exec); |
452 | account_group_exec_runtime(curtask, delta_exec); | ||
452 | } | 453 | } |
453 | } | 454 | } |
454 | 455 | ||
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index cdf5740ab03e..b446dc87494f 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c | |||
@@ -526,6 +526,8 @@ static void update_curr_rt(struct rq *rq) | |||
526 | schedstat_set(curr->se.exec_max, max(curr->se.exec_max, delta_exec)); | 526 | schedstat_set(curr->se.exec_max, max(curr->se.exec_max, delta_exec)); |
527 | 527 | ||
528 | curr->se.sum_exec_runtime += delta_exec; | 528 | curr->se.sum_exec_runtime += delta_exec; |
529 | account_group_exec_runtime(curr, delta_exec); | ||
530 | |||
529 | curr->se.exec_start = rq->clock; | 531 | curr->se.exec_start = rq->clock; |
530 | cpuacct_charge(curr, delta_exec); | 532 | cpuacct_charge(curr, delta_exec); |
531 | 533 | ||
@@ -1458,7 +1460,7 @@ static void watchdog(struct rq *rq, struct task_struct *p) | |||
1458 | p->rt.timeout++; | 1460 | p->rt.timeout++; |
1459 | next = DIV_ROUND_UP(min(soft, hard), USEC_PER_SEC/HZ); | 1461 | next = DIV_ROUND_UP(min(soft, hard), USEC_PER_SEC/HZ); |
1460 | if (p->rt.timeout > next) | 1462 | if (p->rt.timeout > next) |
1461 | p->it_sched_expires = p->se.sum_exec_runtime; | 1463 | p->cputime_expires.sched_exp = p->se.sum_exec_runtime; |
1462 | } | 1464 | } |
1463 | } | 1465 | } |
1464 | 1466 | ||
diff --git a/kernel/sched_stats.h b/kernel/sched_stats.h index 8385d43987e2..b8c156979cf2 100644 --- a/kernel/sched_stats.h +++ b/kernel/sched_stats.h | |||
@@ -270,3 +270,89 @@ sched_info_switch(struct task_struct *prev, struct task_struct *next) | |||
270 | #define sched_info_switch(t, next) do { } while (0) | 270 | #define sched_info_switch(t, next) do { } while (0) |
271 | #endif /* CONFIG_SCHEDSTATS || CONFIG_TASK_DELAY_ACCT */ | 271 | #endif /* CONFIG_SCHEDSTATS || CONFIG_TASK_DELAY_ACCT */ |
272 | 272 | ||
273 | /* | ||
274 | * The following are functions that support scheduler-internal time accounting. | ||
275 | * These functions are generally called at the timer tick. None of this depends | ||
276 | * on CONFIG_SCHEDSTATS. | ||
277 | */ | ||
278 | |||
279 | /** | ||
280 | * account_group_user_time - Maintain utime for a thread group. | ||
281 | * | ||
282 | * @tsk: Pointer to task structure. | ||
283 | * @cputime: Time value by which to increment the utime field of the | ||
284 | * thread_group_cputime structure. | ||
285 | * | ||
286 | * If thread group time is being maintained, get the structure for the | ||
287 | * running CPU and update the utime field there. | ||
288 | */ | ||
289 | static inline void account_group_user_time(struct task_struct *tsk, | ||
290 | cputime_t cputime) | ||
291 | { | ||
292 | struct signal_struct *sig; | ||
293 | |||
294 | sig = tsk->signal; | ||
295 | if (unlikely(!sig)) | ||
296 | return; | ||
297 | if (sig->cputime.totals) { | ||
298 | struct task_cputime *times; | ||
299 | |||
300 | times = per_cpu_ptr(sig->cputime.totals, get_cpu()); | ||
301 | times->utime = cputime_add(times->utime, cputime); | ||
302 | put_cpu_no_resched(); | ||
303 | } | ||
304 | } | ||
305 | |||
306 | /** | ||
307 | * account_group_system_time - Maintain stime for a thread group. | ||
308 | * | ||
309 | * @tsk: Pointer to task structure. | ||
310 | * @cputime: Time value by which to increment the stime field of the | ||
311 | * thread_group_cputime structure. | ||
312 | * | ||
313 | * If thread group time is being maintained, get the structure for the | ||
314 | * running CPU and update the stime field there. | ||
315 | */ | ||
316 | static inline void account_group_system_time(struct task_struct *tsk, | ||
317 | cputime_t cputime) | ||
318 | { | ||
319 | struct signal_struct *sig; | ||
320 | |||
321 | sig = tsk->signal; | ||
322 | if (unlikely(!sig)) | ||
323 | return; | ||
324 | if (sig->cputime.totals) { | ||
325 | struct task_cputime *times; | ||
326 | |||
327 | times = per_cpu_ptr(sig->cputime.totals, get_cpu()); | ||
328 | times->stime = cputime_add(times->stime, cputime); | ||
329 | put_cpu_no_resched(); | ||
330 | } | ||
331 | } | ||
332 | |||
333 | /** | ||
334 | * account_group_exec_runtime - Maintain exec runtime for a thread group. | ||
335 | * | ||
336 | * @tsk: Pointer to task structure. | ||
337 | * @ns: Time value by which to increment the sum_exec_runtime field | ||
338 | * of the thread_group_cputime structure. | ||
339 | * | ||
340 | * If thread group time is being maintained, get the structure for the | ||
341 | * running CPU and update the sum_exec_runtime field there. | ||
342 | */ | ||
343 | static inline void account_group_exec_runtime(struct task_struct *tsk, | ||
344 | unsigned long long ns) | ||
345 | { | ||
346 | struct signal_struct *sig; | ||
347 | |||
348 | sig = tsk->signal; | ||
349 | if (unlikely(!sig)) | ||
350 | return; | ||
351 | if (sig->cputime.totals) { | ||
352 | struct task_cputime *times; | ||
353 | |||
354 | times = per_cpu_ptr(sig->cputime.totals, get_cpu()); | ||
355 | times->sum_exec_runtime += ns; | ||
356 | put_cpu_no_resched(); | ||
357 | } | ||
358 | } | ||
diff --git a/kernel/signal.c b/kernel/signal.c index bf40ecc87b26..105217da5c82 100644 --- a/kernel/signal.c +++ b/kernel/signal.c | |||
@@ -1341,6 +1341,7 @@ int do_notify_parent(struct task_struct *tsk, int sig) | |||
1341 | struct siginfo info; | 1341 | struct siginfo info; |
1342 | unsigned long flags; | 1342 | unsigned long flags; |
1343 | struct sighand_struct *psig; | 1343 | struct sighand_struct *psig; |
1344 | struct task_cputime cputime; | ||
1344 | int ret = sig; | 1345 | int ret = sig; |
1345 | 1346 | ||
1346 | BUG_ON(sig == -1); | 1347 | BUG_ON(sig == -1); |
@@ -1371,10 +1372,9 @@ int do_notify_parent(struct task_struct *tsk, int sig) | |||
1371 | 1372 | ||
1372 | info.si_uid = tsk->uid; | 1373 | info.si_uid = tsk->uid; |
1373 | 1374 | ||
1374 | info.si_utime = cputime_to_clock_t(cputime_add(tsk->utime, | 1375 | thread_group_cputime(tsk, &cputime); |
1375 | tsk->signal->utime)); | 1376 | info.si_utime = cputime_to_jiffies(cputime.utime); |
1376 | info.si_stime = cputime_to_clock_t(cputime_add(tsk->stime, | 1377 | info.si_stime = cputime_to_jiffies(cputime.stime); |
1377 | tsk->signal->stime)); | ||
1378 | 1378 | ||
1379 | info.si_status = tsk->exit_code & 0x7f; | 1379 | info.si_status = tsk->exit_code & 0x7f; |
1380 | if (tsk->exit_code & 0x80) | 1380 | if (tsk->exit_code & 0x80) |
diff --git a/kernel/softirq.c b/kernel/softirq.c index 83ba21a13bd4..7110daeb9a90 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c | |||
@@ -267,16 +267,12 @@ asmlinkage void do_softirq(void) | |||
267 | */ | 267 | */ |
268 | void irq_enter(void) | 268 | void irq_enter(void) |
269 | { | 269 | { |
270 | #ifdef CONFIG_NO_HZ | ||
271 | int cpu = smp_processor_id(); | 270 | int cpu = smp_processor_id(); |
271 | |||
272 | if (idle_cpu(cpu) && !in_interrupt()) | 272 | if (idle_cpu(cpu) && !in_interrupt()) |
273 | tick_nohz_stop_idle(cpu); | 273 | tick_check_idle(cpu); |
274 | #endif | 274 | |
275 | __irq_enter(); | 275 | __irq_enter(); |
276 | #ifdef CONFIG_NO_HZ | ||
277 | if (idle_cpu(cpu)) | ||
278 | tick_nohz_update_jiffies(); | ||
279 | #endif | ||
280 | } | 276 | } |
281 | 277 | ||
282 | #ifdef __ARCH_IRQ_EXIT_IRQS_DISABLED | 278 | #ifdef __ARCH_IRQ_EXIT_IRQS_DISABLED |
diff --git a/kernel/sys.c b/kernel/sys.c index 0bc8fa3c2288..53879cdae483 100644 --- a/kernel/sys.c +++ b/kernel/sys.c | |||
@@ -853,38 +853,28 @@ asmlinkage long sys_setfsgid(gid_t gid) | |||
853 | return old_fsgid; | 853 | return old_fsgid; |
854 | } | 854 | } |
855 | 855 | ||
856 | void do_sys_times(struct tms *tms) | ||
857 | { | ||
858 | struct task_cputime cputime; | ||
859 | cputime_t cutime, cstime; | ||
860 | |||
861 | spin_lock_irq(¤t->sighand->siglock); | ||
862 | thread_group_cputime(current, &cputime); | ||
863 | cutime = current->signal->cutime; | ||
864 | cstime = current->signal->cstime; | ||
865 | spin_unlock_irq(¤t->sighand->siglock); | ||
866 | tms->tms_utime = cputime_to_clock_t(cputime.utime); | ||
867 | tms->tms_stime = cputime_to_clock_t(cputime.stime); | ||
868 | tms->tms_cutime = cputime_to_clock_t(cutime); | ||
869 | tms->tms_cstime = cputime_to_clock_t(cstime); | ||
870 | } | ||
871 | |||
856 | asmlinkage long sys_times(struct tms __user * tbuf) | 872 | asmlinkage long sys_times(struct tms __user * tbuf) |
857 | { | 873 | { |
858 | /* | ||
859 | * In the SMP world we might just be unlucky and have one of | ||
860 | * the times increment as we use it. Since the value is an | ||
861 | * atomically safe type this is just fine. Conceptually its | ||
862 | * as if the syscall took an instant longer to occur. | ||
863 | */ | ||
864 | if (tbuf) { | 874 | if (tbuf) { |
865 | struct tms tmp; | 875 | struct tms tmp; |
866 | struct task_struct *tsk = current; | 876 | |
867 | struct task_struct *t; | 877 | do_sys_times(&tmp); |
868 | cputime_t utime, stime, cutime, cstime; | ||
869 | |||
870 | spin_lock_irq(&tsk->sighand->siglock); | ||
871 | utime = tsk->signal->utime; | ||
872 | stime = tsk->signal->stime; | ||
873 | t = tsk; | ||
874 | do { | ||
875 | utime = cputime_add(utime, t->utime); | ||
876 | stime = cputime_add(stime, t->stime); | ||
877 | t = next_thread(t); | ||
878 | } while (t != tsk); | ||
879 | |||
880 | cutime = tsk->signal->cutime; | ||
881 | cstime = tsk->signal->cstime; | ||
882 | spin_unlock_irq(&tsk->sighand->siglock); | ||
883 | |||
884 | tmp.tms_utime = cputime_to_clock_t(utime); | ||
885 | tmp.tms_stime = cputime_to_clock_t(stime); | ||
886 | tmp.tms_cutime = cputime_to_clock_t(cutime); | ||
887 | tmp.tms_cstime = cputime_to_clock_t(cstime); | ||
888 | if (copy_to_user(tbuf, &tmp, sizeof(struct tms))) | 878 | if (copy_to_user(tbuf, &tmp, sizeof(struct tms))) |
889 | return -EFAULT; | 879 | return -EFAULT; |
890 | } | 880 | } |
@@ -1449,7 +1439,6 @@ asmlinkage long sys_old_getrlimit(unsigned int resource, struct rlimit __user *r | |||
1449 | asmlinkage long sys_setrlimit(unsigned int resource, struct rlimit __user *rlim) | 1439 | asmlinkage long sys_setrlimit(unsigned int resource, struct rlimit __user *rlim) |
1450 | { | 1440 | { |
1451 | struct rlimit new_rlim, *old_rlim; | 1441 | struct rlimit new_rlim, *old_rlim; |
1452 | unsigned long it_prof_secs; | ||
1453 | int retval; | 1442 | int retval; |
1454 | 1443 | ||
1455 | if (resource >= RLIM_NLIMITS) | 1444 | if (resource >= RLIM_NLIMITS) |
@@ -1503,18 +1492,7 @@ asmlinkage long sys_setrlimit(unsigned int resource, struct rlimit __user *rlim) | |||
1503 | if (new_rlim.rlim_cur == RLIM_INFINITY) | 1492 | if (new_rlim.rlim_cur == RLIM_INFINITY) |
1504 | goto out; | 1493 | goto out; |
1505 | 1494 | ||
1506 | it_prof_secs = cputime_to_secs(current->signal->it_prof_expires); | 1495 | update_rlimit_cpu(new_rlim.rlim_cur); |
1507 | if (it_prof_secs == 0 || new_rlim.rlim_cur <= it_prof_secs) { | ||
1508 | unsigned long rlim_cur = new_rlim.rlim_cur; | ||
1509 | cputime_t cputime; | ||
1510 | |||
1511 | cputime = secs_to_cputime(rlim_cur); | ||
1512 | read_lock(&tasklist_lock); | ||
1513 | spin_lock_irq(¤t->sighand->siglock); | ||
1514 | set_process_cpu_timer(current, CPUCLOCK_PROF, &cputime, NULL); | ||
1515 | spin_unlock_irq(¤t->sighand->siglock); | ||
1516 | read_unlock(&tasklist_lock); | ||
1517 | } | ||
1518 | out: | 1496 | out: |
1519 | return 0; | 1497 | return 0; |
1520 | } | 1498 | } |
@@ -1552,11 +1530,8 @@ out: | |||
1552 | * | 1530 | * |
1553 | */ | 1531 | */ |
1554 | 1532 | ||
1555 | static void accumulate_thread_rusage(struct task_struct *t, struct rusage *r, | 1533 | static void accumulate_thread_rusage(struct task_struct *t, struct rusage *r) |
1556 | cputime_t *utimep, cputime_t *stimep) | ||
1557 | { | 1534 | { |
1558 | *utimep = cputime_add(*utimep, t->utime); | ||
1559 | *stimep = cputime_add(*stimep, t->stime); | ||
1560 | r->ru_nvcsw += t->nvcsw; | 1535 | r->ru_nvcsw += t->nvcsw; |
1561 | r->ru_nivcsw += t->nivcsw; | 1536 | r->ru_nivcsw += t->nivcsw; |
1562 | r->ru_minflt += t->min_flt; | 1537 | r->ru_minflt += t->min_flt; |
@@ -1570,12 +1545,13 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r) | |||
1570 | struct task_struct *t; | 1545 | struct task_struct *t; |
1571 | unsigned long flags; | 1546 | unsigned long flags; |
1572 | cputime_t utime, stime; | 1547 | cputime_t utime, stime; |
1548 | struct task_cputime cputime; | ||
1573 | 1549 | ||
1574 | memset((char *) r, 0, sizeof *r); | 1550 | memset((char *) r, 0, sizeof *r); |
1575 | utime = stime = cputime_zero; | 1551 | utime = stime = cputime_zero; |
1576 | 1552 | ||
1577 | if (who == RUSAGE_THREAD) { | 1553 | if (who == RUSAGE_THREAD) { |
1578 | accumulate_thread_rusage(p, r, &utime, &stime); | 1554 | accumulate_thread_rusage(p, r); |
1579 | goto out; | 1555 | goto out; |
1580 | } | 1556 | } |
1581 | 1557 | ||
@@ -1598,8 +1574,9 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r) | |||
1598 | break; | 1574 | break; |
1599 | 1575 | ||
1600 | case RUSAGE_SELF: | 1576 | case RUSAGE_SELF: |
1601 | utime = cputime_add(utime, p->signal->utime); | 1577 | thread_group_cputime(p, &cputime); |
1602 | stime = cputime_add(stime, p->signal->stime); | 1578 | utime = cputime_add(utime, cputime.utime); |
1579 | stime = cputime_add(stime, cputime.stime); | ||
1603 | r->ru_nvcsw += p->signal->nvcsw; | 1580 | r->ru_nvcsw += p->signal->nvcsw; |
1604 | r->ru_nivcsw += p->signal->nivcsw; | 1581 | r->ru_nivcsw += p->signal->nivcsw; |
1605 | r->ru_minflt += p->signal->min_flt; | 1582 | r->ru_minflt += p->signal->min_flt; |
@@ -1608,7 +1585,7 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r) | |||
1608 | r->ru_oublock += p->signal->oublock; | 1585 | r->ru_oublock += p->signal->oublock; |
1609 | t = p; | 1586 | t = p; |
1610 | do { | 1587 | do { |
1611 | accumulate_thread_rusage(t, r, &utime, &stime); | 1588 | accumulate_thread_rusage(t, r); |
1612 | t = next_thread(t); | 1589 | t = next_thread(t); |
1613 | } while (t != p); | 1590 | } while (t != p); |
1614 | break; | 1591 | break; |
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 619eb9f3acd8..edb1075f80d2 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
@@ -833,6 +833,16 @@ static struct ctl_table kern_table[] = { | |||
833 | .proc_handler = &proc_dointvec, | 833 | .proc_handler = &proc_dointvec, |
834 | }, | 834 | }, |
835 | #endif | 835 | #endif |
836 | #ifdef CONFIG_UNEVICTABLE_LRU | ||
837 | { | ||
838 | .ctl_name = CTL_UNNUMBERED, | ||
839 | .procname = "scan_unevictable_pages", | ||
840 | .data = &scan_unevictable_pages, | ||
841 | .maxlen = sizeof(scan_unevictable_pages), | ||
842 | .mode = 0644, | ||
843 | .proc_handler = &scan_unevictable_handler, | ||
844 | }, | ||
845 | #endif | ||
836 | /* | 846 | /* |
837 | * NOTE: do not add new entries to this table unless you have read | 847 | * NOTE: do not add new entries to this table unless you have read |
838 | * Documentation/sysctl/ctl_unnumbered.txt | 848 | * Documentation/sysctl/ctl_unnumbered.txt |
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index 093d4acf993b..9ed2eec97526 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c | |||
@@ -325,6 +325,9 @@ int clocksource_register(struct clocksource *c) | |||
325 | unsigned long flags; | 325 | unsigned long flags; |
326 | int ret; | 326 | int ret; |
327 | 327 | ||
328 | /* save mult_orig on registration */ | ||
329 | c->mult_orig = c->mult; | ||
330 | |||
328 | spin_lock_irqsave(&clocksource_lock, flags); | 331 | spin_lock_irqsave(&clocksource_lock, flags); |
329 | ret = clocksource_enqueue(c); | 332 | ret = clocksource_enqueue(c); |
330 | if (!ret) | 333 | if (!ret) |
diff --git a/kernel/time/jiffies.c b/kernel/time/jiffies.c index 4c256fdb8875..1ca99557e929 100644 --- a/kernel/time/jiffies.c +++ b/kernel/time/jiffies.c | |||
@@ -61,6 +61,7 @@ struct clocksource clocksource_jiffies = { | |||
61 | .read = jiffies_read, | 61 | .read = jiffies_read, |
62 | .mask = 0xffffffff, /*32bits*/ | 62 | .mask = 0xffffffff, /*32bits*/ |
63 | .mult = NSEC_PER_JIFFY << JIFFIES_SHIFT, /* details above */ | 63 | .mult = NSEC_PER_JIFFY << JIFFIES_SHIFT, /* details above */ |
64 | .mult_orig = NSEC_PER_JIFFY << JIFFIES_SHIFT, | ||
64 | .shift = JIFFIES_SHIFT, | 65 | .shift = JIFFIES_SHIFT, |
65 | }; | 66 | }; |
66 | 67 | ||
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index 1ad46f3df6e7..1a20715bfd6e 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c | |||
@@ -10,13 +10,13 @@ | |||
10 | 10 | ||
11 | #include <linux/mm.h> | 11 | #include <linux/mm.h> |
12 | #include <linux/time.h> | 12 | #include <linux/time.h> |
13 | #include <linux/timer.h> | ||
14 | #include <linux/timex.h> | 13 | #include <linux/timex.h> |
15 | #include <linux/jiffies.h> | 14 | #include <linux/jiffies.h> |
16 | #include <linux/hrtimer.h> | 15 | #include <linux/hrtimer.h> |
17 | #include <linux/capability.h> | 16 | #include <linux/capability.h> |
18 | #include <linux/math64.h> | 17 | #include <linux/math64.h> |
19 | #include <linux/clocksource.h> | 18 | #include <linux/clocksource.h> |
19 | #include <linux/workqueue.h> | ||
20 | #include <asm/timex.h> | 20 | #include <asm/timex.h> |
21 | 21 | ||
22 | /* | 22 | /* |
@@ -218,11 +218,11 @@ void second_overflow(void) | |||
218 | /* Disable the cmos update - used by virtualization and embedded */ | 218 | /* Disable the cmos update - used by virtualization and embedded */ |
219 | int no_sync_cmos_clock __read_mostly; | 219 | int no_sync_cmos_clock __read_mostly; |
220 | 220 | ||
221 | static void sync_cmos_clock(unsigned long dummy); | 221 | static void sync_cmos_clock(struct work_struct *work); |
222 | 222 | ||
223 | static DEFINE_TIMER(sync_cmos_timer, sync_cmos_clock, 0, 0); | 223 | static DECLARE_DELAYED_WORK(sync_cmos_work, sync_cmos_clock); |
224 | 224 | ||
225 | static void sync_cmos_clock(unsigned long dummy) | 225 | static void sync_cmos_clock(struct work_struct *work) |
226 | { | 226 | { |
227 | struct timespec now, next; | 227 | struct timespec now, next; |
228 | int fail = 1; | 228 | int fail = 1; |
@@ -258,13 +258,13 @@ static void sync_cmos_clock(unsigned long dummy) | |||
258 | next.tv_sec++; | 258 | next.tv_sec++; |
259 | next.tv_nsec -= NSEC_PER_SEC; | 259 | next.tv_nsec -= NSEC_PER_SEC; |
260 | } | 260 | } |
261 | mod_timer(&sync_cmos_timer, jiffies + timespec_to_jiffies(&next)); | 261 | schedule_delayed_work(&sync_cmos_work, timespec_to_jiffies(&next)); |
262 | } | 262 | } |
263 | 263 | ||
264 | static void notify_cmos_timer(void) | 264 | static void notify_cmos_timer(void) |
265 | { | 265 | { |
266 | if (!no_sync_cmos_clock) | 266 | if (!no_sync_cmos_clock) |
267 | mod_timer(&sync_cmos_timer, jiffies + 1); | 267 | schedule_delayed_work(&sync_cmos_work, 0); |
268 | } | 268 | } |
269 | 269 | ||
270 | #else | 270 | #else |
@@ -277,38 +277,50 @@ static inline void notify_cmos_timer(void) { } | |||
277 | int do_adjtimex(struct timex *txc) | 277 | int do_adjtimex(struct timex *txc) |
278 | { | 278 | { |
279 | struct timespec ts; | 279 | struct timespec ts; |
280 | long save_adjust, sec; | ||
281 | int result; | 280 | int result; |
282 | 281 | ||
283 | /* In order to modify anything, you gotta be super-user! */ | 282 | /* Validate the data before disabling interrupts */ |
284 | if (txc->modes && !capable(CAP_SYS_TIME)) | 283 | if (txc->modes & ADJ_ADJTIME) { |
285 | return -EPERM; | ||
286 | |||
287 | /* Now we validate the data before disabling interrupts */ | ||
288 | |||
289 | if ((txc->modes & ADJ_OFFSET_SINGLESHOT) == ADJ_OFFSET_SINGLESHOT) { | ||
290 | /* singleshot must not be used with any other mode bits */ | 284 | /* singleshot must not be used with any other mode bits */ |
291 | if (txc->modes & ~ADJ_OFFSET_SS_READ) | 285 | if (!(txc->modes & ADJ_OFFSET_SINGLESHOT)) |
292 | return -EINVAL; | 286 | return -EINVAL; |
287 | if (!(txc->modes & ADJ_OFFSET_READONLY) && | ||
288 | !capable(CAP_SYS_TIME)) | ||
289 | return -EPERM; | ||
290 | } else { | ||
291 | /* In order to modify anything, you gotta be super-user! */ | ||
292 | if (txc->modes && !capable(CAP_SYS_TIME)) | ||
293 | return -EPERM; | ||
294 | |||
295 | /* if the quartz is off by more than 10% something is VERY wrong! */ | ||
296 | if (txc->modes & ADJ_TICK && | ||
297 | (txc->tick < 900000/USER_HZ || | ||
298 | txc->tick > 1100000/USER_HZ)) | ||
299 | return -EINVAL; | ||
300 | |||
301 | if (txc->modes & ADJ_STATUS && time_state != TIME_OK) | ||
302 | hrtimer_cancel(&leap_timer); | ||
293 | } | 303 | } |
294 | 304 | ||
295 | /* if the quartz is off by more than 10% something is VERY wrong ! */ | ||
296 | if (txc->modes & ADJ_TICK) | ||
297 | if (txc->tick < 900000/USER_HZ || | ||
298 | txc->tick > 1100000/USER_HZ) | ||
299 | return -EINVAL; | ||
300 | |||
301 | if (time_state != TIME_OK && txc->modes & ADJ_STATUS) | ||
302 | hrtimer_cancel(&leap_timer); | ||
303 | getnstimeofday(&ts); | 305 | getnstimeofday(&ts); |
304 | 306 | ||
305 | write_seqlock_irq(&xtime_lock); | 307 | write_seqlock_irq(&xtime_lock); |
306 | 308 | ||
307 | /* Save for later - semantics of adjtime is to return old value */ | ||
308 | save_adjust = time_adjust; | ||
309 | |||
310 | /* If there are input parameters, then process them */ | 309 | /* If there are input parameters, then process them */ |
310 | if (txc->modes & ADJ_ADJTIME) { | ||
311 | long save_adjust = time_adjust; | ||
312 | |||
313 | if (!(txc->modes & ADJ_OFFSET_READONLY)) { | ||
314 | /* adjtime() is independent from ntp_adjtime() */ | ||
315 | time_adjust = txc->offset; | ||
316 | ntp_update_frequency(); | ||
317 | } | ||
318 | txc->offset = save_adjust; | ||
319 | goto adj_done; | ||
320 | } | ||
311 | if (txc->modes) { | 321 | if (txc->modes) { |
322 | long sec; | ||
323 | |||
312 | if (txc->modes & ADJ_STATUS) { | 324 | if (txc->modes & ADJ_STATUS) { |
313 | if ((time_status & STA_PLL) && | 325 | if ((time_status & STA_PLL) && |
314 | !(txc->status & STA_PLL)) { | 326 | !(txc->status & STA_PLL)) { |
@@ -375,13 +387,8 @@ int do_adjtimex(struct timex *txc) | |||
375 | if (txc->modes & ADJ_TAI && txc->constant > 0) | 387 | if (txc->modes & ADJ_TAI && txc->constant > 0) |
376 | time_tai = txc->constant; | 388 | time_tai = txc->constant; |
377 | 389 | ||
378 | if (txc->modes & ADJ_OFFSET) { | 390 | if (txc->modes & ADJ_OFFSET) |
379 | if (txc->modes == ADJ_OFFSET_SINGLESHOT) | 391 | ntp_update_offset(txc->offset); |
380 | /* adjtime() is independent from ntp_adjtime() */ | ||
381 | time_adjust = txc->offset; | ||
382 | else | ||
383 | ntp_update_offset(txc->offset); | ||
384 | } | ||
385 | if (txc->modes & ADJ_TICK) | 392 | if (txc->modes & ADJ_TICK) |
386 | tick_usec = txc->tick; | 393 | tick_usec = txc->tick; |
387 | 394 | ||
@@ -389,22 +396,18 @@ int do_adjtimex(struct timex *txc) | |||
389 | ntp_update_frequency(); | 396 | ntp_update_frequency(); |
390 | } | 397 | } |
391 | 398 | ||
399 | txc->offset = shift_right(time_offset * NTP_INTERVAL_FREQ, | ||
400 | NTP_SCALE_SHIFT); | ||
401 | if (!(time_status & STA_NANO)) | ||
402 | txc->offset /= NSEC_PER_USEC; | ||
403 | |||
404 | adj_done: | ||
392 | result = time_state; /* mostly `TIME_OK' */ | 405 | result = time_state; /* mostly `TIME_OK' */ |
393 | if (time_status & (STA_UNSYNC|STA_CLOCKERR)) | 406 | if (time_status & (STA_UNSYNC|STA_CLOCKERR)) |
394 | result = TIME_ERROR; | 407 | result = TIME_ERROR; |
395 | 408 | ||
396 | if ((txc->modes == ADJ_OFFSET_SINGLESHOT) || | 409 | txc->freq = shift_right((time_freq >> PPM_SCALE_INV_SHIFT) * |
397 | (txc->modes == ADJ_OFFSET_SS_READ)) | 410 | (s64)PPM_SCALE_INV, NTP_SCALE_SHIFT); |
398 | txc->offset = save_adjust; | ||
399 | else { | ||
400 | txc->offset = shift_right(time_offset * NTP_INTERVAL_FREQ, | ||
401 | NTP_SCALE_SHIFT); | ||
402 | if (!(time_status & STA_NANO)) | ||
403 | txc->offset /= NSEC_PER_USEC; | ||
404 | } | ||
405 | txc->freq = shift_right((s32)(time_freq >> PPM_SCALE_INV_SHIFT) * | ||
406 | (s64)PPM_SCALE_INV, | ||
407 | NTP_SCALE_SHIFT); | ||
408 | txc->maxerror = time_maxerror; | 411 | txc->maxerror = time_maxerror; |
409 | txc->esterror = time_esterror; | 412 | txc->esterror = time_esterror; |
410 | txc->status = time_status; | 413 | txc->status = time_status; |
diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index cb01cd8f919b..f98a1b7b16e9 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c | |||
@@ -384,6 +384,19 @@ int tick_resume_broadcast_oneshot(struct clock_event_device *bc) | |||
384 | } | 384 | } |
385 | 385 | ||
386 | /* | 386 | /* |
387 | * Called from irq_enter() when idle was interrupted to reenable the | ||
388 | * per cpu device. | ||
389 | */ | ||
390 | void tick_check_oneshot_broadcast(int cpu) | ||
391 | { | ||
392 | if (cpu_isset(cpu, tick_broadcast_oneshot_mask)) { | ||
393 | struct tick_device *td = &per_cpu(tick_cpu_device, cpu); | ||
394 | |||
395 | clockevents_set_mode(td->evtdev, CLOCK_EVT_MODE_ONESHOT); | ||
396 | } | ||
397 | } | ||
398 | |||
399 | /* | ||
387 | * Handle oneshot mode broadcasting | 400 | * Handle oneshot mode broadcasting |
388 | */ | 401 | */ |
389 | static void tick_handle_oneshot_broadcast(struct clock_event_device *dev) | 402 | static void tick_handle_oneshot_broadcast(struct clock_event_device *dev) |
diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h index 469248782c23..b1c05bf75ee0 100644 --- a/kernel/time/tick-internal.h +++ b/kernel/time/tick-internal.h | |||
@@ -36,6 +36,7 @@ extern void tick_broadcast_switch_to_oneshot(void); | |||
36 | extern void tick_shutdown_broadcast_oneshot(unsigned int *cpup); | 36 | extern void tick_shutdown_broadcast_oneshot(unsigned int *cpup); |
37 | extern int tick_resume_broadcast_oneshot(struct clock_event_device *bc); | 37 | extern int tick_resume_broadcast_oneshot(struct clock_event_device *bc); |
38 | extern int tick_broadcast_oneshot_active(void); | 38 | extern int tick_broadcast_oneshot_active(void); |
39 | extern void tick_check_oneshot_broadcast(int cpu); | ||
39 | # else /* BROADCAST */ | 40 | # else /* BROADCAST */ |
40 | static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc) | 41 | static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc) |
41 | { | 42 | { |
@@ -45,6 +46,7 @@ static inline void tick_broadcast_oneshot_control(unsigned long reason) { } | |||
45 | static inline void tick_broadcast_switch_to_oneshot(void) { } | 46 | static inline void tick_broadcast_switch_to_oneshot(void) { } |
46 | static inline void tick_shutdown_broadcast_oneshot(unsigned int *cpup) { } | 47 | static inline void tick_shutdown_broadcast_oneshot(unsigned int *cpup) { } |
47 | static inline int tick_broadcast_oneshot_active(void) { return 0; } | 48 | static inline int tick_broadcast_oneshot_active(void) { return 0; } |
49 | static inline void tick_check_oneshot_broadcast(int cpu) { } | ||
48 | # endif /* !BROADCAST */ | 50 | # endif /* !BROADCAST */ |
49 | 51 | ||
50 | #else /* !ONESHOT */ | 52 | #else /* !ONESHOT */ |
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index b711ffcb106c..0581c11fe6c6 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c | |||
@@ -155,7 +155,7 @@ void tick_nohz_update_jiffies(void) | |||
155 | touch_softlockup_watchdog(); | 155 | touch_softlockup_watchdog(); |
156 | } | 156 | } |
157 | 157 | ||
158 | void tick_nohz_stop_idle(int cpu) | 158 | static void tick_nohz_stop_idle(int cpu) |
159 | { | 159 | { |
160 | struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); | 160 | struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); |
161 | 161 | ||
@@ -377,6 +377,32 @@ ktime_t tick_nohz_get_sleep_length(void) | |||
377 | return ts->sleep_length; | 377 | return ts->sleep_length; |
378 | } | 378 | } |
379 | 379 | ||
380 | static void tick_nohz_restart(struct tick_sched *ts, ktime_t now) | ||
381 | { | ||
382 | hrtimer_cancel(&ts->sched_timer); | ||
383 | ts->sched_timer.expires = ts->idle_tick; | ||
384 | |||
385 | while (1) { | ||
386 | /* Forward the time to expire in the future */ | ||
387 | hrtimer_forward(&ts->sched_timer, now, tick_period); | ||
388 | |||
389 | if (ts->nohz_mode == NOHZ_MODE_HIGHRES) { | ||
390 | hrtimer_start(&ts->sched_timer, | ||
391 | ts->sched_timer.expires, | ||
392 | HRTIMER_MODE_ABS); | ||
393 | /* Check, if the timer was already in the past */ | ||
394 | if (hrtimer_active(&ts->sched_timer)) | ||
395 | break; | ||
396 | } else { | ||
397 | if (!tick_program_event(ts->sched_timer.expires, 0)) | ||
398 | break; | ||
399 | } | ||
400 | /* Update jiffies and reread time */ | ||
401 | tick_do_update_jiffies64(now); | ||
402 | now = ktime_get(); | ||
403 | } | ||
404 | } | ||
405 | |||
380 | /** | 406 | /** |
381 | * tick_nohz_restart_sched_tick - restart the idle tick from the idle task | 407 | * tick_nohz_restart_sched_tick - restart the idle tick from the idle task |
382 | * | 408 | * |
@@ -430,28 +456,7 @@ void tick_nohz_restart_sched_tick(void) | |||
430 | */ | 456 | */ |
431 | ts->tick_stopped = 0; | 457 | ts->tick_stopped = 0; |
432 | ts->idle_exittime = now; | 458 | ts->idle_exittime = now; |
433 | hrtimer_cancel(&ts->sched_timer); | 459 | tick_nohz_restart(ts, now); |
434 | ts->sched_timer.expires = ts->idle_tick; | ||
435 | |||
436 | while (1) { | ||
437 | /* Forward the time to expire in the future */ | ||
438 | hrtimer_forward(&ts->sched_timer, now, tick_period); | ||
439 | |||
440 | if (ts->nohz_mode == NOHZ_MODE_HIGHRES) { | ||
441 | hrtimer_start(&ts->sched_timer, | ||
442 | ts->sched_timer.expires, | ||
443 | HRTIMER_MODE_ABS); | ||
444 | /* Check, if the timer was already in the past */ | ||
445 | if (hrtimer_active(&ts->sched_timer)) | ||
446 | break; | ||
447 | } else { | ||
448 | if (!tick_program_event(ts->sched_timer.expires, 0)) | ||
449 | break; | ||
450 | } | ||
451 | /* Update jiffies and reread time */ | ||
452 | tick_do_update_jiffies64(now); | ||
453 | now = ktime_get(); | ||
454 | } | ||
455 | local_irq_enable(); | 460 | local_irq_enable(); |
456 | } | 461 | } |
457 | 462 | ||
@@ -503,10 +508,6 @@ static void tick_nohz_handler(struct clock_event_device *dev) | |||
503 | update_process_times(user_mode(regs)); | 508 | update_process_times(user_mode(regs)); |
504 | profile_tick(CPU_PROFILING); | 509 | profile_tick(CPU_PROFILING); |
505 | 510 | ||
506 | /* Do not restart, when we are in the idle loop */ | ||
507 | if (ts->tick_stopped) | ||
508 | return; | ||
509 | |||
510 | while (tick_nohz_reprogram(ts, now)) { | 511 | while (tick_nohz_reprogram(ts, now)) { |
511 | now = ktime_get(); | 512 | now = ktime_get(); |
512 | tick_do_update_jiffies64(now); | 513 | tick_do_update_jiffies64(now); |
@@ -552,6 +553,27 @@ static void tick_nohz_switch_to_nohz(void) | |||
552 | smp_processor_id()); | 553 | smp_processor_id()); |
553 | } | 554 | } |
554 | 555 | ||
556 | /* | ||
557 | * When NOHZ is enabled and the tick is stopped, we need to kick the | ||
558 | * tick timer from irq_enter() so that the jiffies update is kept | ||
559 | * alive during long running softirqs. That's ugly as hell, but | ||
560 | * correctness is key even if we need to fix the offending softirq in | ||
561 | * the first place. | ||
562 | * | ||
563 | * Note, this is different to tick_nohz_restart. We just kick the | ||
564 | * timer and do not touch the other magic bits which need to be done | ||
565 | * when idle is left. | ||
566 | */ | ||
567 | static void tick_nohz_kick_tick(int cpu) | ||
568 | { | ||
569 | struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); | ||
570 | |||
571 | if (!ts->tick_stopped) | ||
572 | return; | ||
573 | |||
574 | tick_nohz_restart(ts, ktime_get()); | ||
575 | } | ||
576 | |||
555 | #else | 577 | #else |
556 | 578 | ||
557 | static inline void tick_nohz_switch_to_nohz(void) { } | 579 | static inline void tick_nohz_switch_to_nohz(void) { } |
@@ -559,6 +581,19 @@ static inline void tick_nohz_switch_to_nohz(void) { } | |||
559 | #endif /* NO_HZ */ | 581 | #endif /* NO_HZ */ |
560 | 582 | ||
561 | /* | 583 | /* |
584 | * Called from irq_enter to notify about the possible interruption of idle() | ||
585 | */ | ||
586 | void tick_check_idle(int cpu) | ||
587 | { | ||
588 | tick_check_oneshot_broadcast(cpu); | ||
589 | #ifdef CONFIG_NO_HZ | ||
590 | tick_nohz_stop_idle(cpu); | ||
591 | tick_nohz_update_jiffies(); | ||
592 | tick_nohz_kick_tick(cpu); | ||
593 | #endif | ||
594 | } | ||
595 | |||
596 | /* | ||
562 | * High resolution timer specific code | 597 | * High resolution timer specific code |
563 | */ | 598 | */ |
564 | #ifdef CONFIG_HIGH_RES_TIMERS | 599 | #ifdef CONFIG_HIGH_RES_TIMERS |
@@ -611,10 +646,6 @@ static enum hrtimer_restart tick_sched_timer(struct hrtimer *timer) | |||
611 | profile_tick(CPU_PROFILING); | 646 | profile_tick(CPU_PROFILING); |
612 | } | 647 | } |
613 | 648 | ||
614 | /* Do not restart, when we are in the idle loop */ | ||
615 | if (ts->tick_stopped) | ||
616 | return HRTIMER_NORESTART; | ||
617 | |||
618 | hrtimer_forward(timer, now, tick_period); | 649 | hrtimer_forward(timer, now, tick_period); |
619 | 650 | ||
620 | return HRTIMER_RESTART; | 651 | return HRTIMER_RESTART; |
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index e91c29f961c9..e7acfb482a68 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c | |||
@@ -58,27 +58,26 @@ struct clocksource *clock; | |||
58 | 58 | ||
59 | #ifdef CONFIG_GENERIC_TIME | 59 | #ifdef CONFIG_GENERIC_TIME |
60 | /** | 60 | /** |
61 | * __get_nsec_offset - Returns nanoseconds since last call to periodic_hook | 61 | * clocksource_forward_now - update clock to the current time |
62 | * | 62 | * |
63 | * private function, must hold xtime_lock lock when being | 63 | * Forward the current clock to update its state since the last call to |
64 | * called. Returns the number of nanoseconds since the | 64 | * update_wall_time(). This is useful before significant clock changes, |
65 | * last call to update_wall_time() (adjusted by NTP scaling) | 65 | * as it avoids having to deal with this time offset explicitly. |
66 | */ | 66 | */ |
67 | static inline s64 __get_nsec_offset(void) | 67 | static void clocksource_forward_now(void) |
68 | { | 68 | { |
69 | cycle_t cycle_now, cycle_delta; | 69 | cycle_t cycle_now, cycle_delta; |
70 | s64 ns_offset; | 70 | s64 nsec; |
71 | 71 | ||
72 | /* read clocksource: */ | ||
73 | cycle_now = clocksource_read(clock); | 72 | cycle_now = clocksource_read(clock); |
74 | |||
75 | /* calculate the delta since the last update_wall_time: */ | ||
76 | cycle_delta = (cycle_now - clock->cycle_last) & clock->mask; | 73 | cycle_delta = (cycle_now - clock->cycle_last) & clock->mask; |
74 | clock->cycle_last = cycle_now; | ||
77 | 75 | ||
78 | /* convert to nanoseconds: */ | 76 | nsec = cyc2ns(clock, cycle_delta); |
79 | ns_offset = cyc2ns(clock, cycle_delta); | 77 | timespec_add_ns(&xtime, nsec); |
80 | 78 | ||
81 | return ns_offset; | 79 | nsec = ((s64)cycle_delta * clock->mult_orig) >> clock->shift; |
80 | clock->raw_time.tv_nsec += nsec; | ||
82 | } | 81 | } |
83 | 82 | ||
84 | /** | 83 | /** |
@@ -89,6 +88,7 @@ static inline s64 __get_nsec_offset(void) | |||
89 | */ | 88 | */ |
90 | void getnstimeofday(struct timespec *ts) | 89 | void getnstimeofday(struct timespec *ts) |
91 | { | 90 | { |
91 | cycle_t cycle_now, cycle_delta; | ||
92 | unsigned long seq; | 92 | unsigned long seq; |
93 | s64 nsecs; | 93 | s64 nsecs; |
94 | 94 | ||
@@ -96,7 +96,15 @@ void getnstimeofday(struct timespec *ts) | |||
96 | seq = read_seqbegin(&xtime_lock); | 96 | seq = read_seqbegin(&xtime_lock); |
97 | 97 | ||
98 | *ts = xtime; | 98 | *ts = xtime; |
99 | nsecs = __get_nsec_offset(); | 99 | |
100 | /* read clocksource: */ | ||
101 | cycle_now = clocksource_read(clock); | ||
102 | |||
103 | /* calculate the delta since the last update_wall_time: */ | ||
104 | cycle_delta = (cycle_now - clock->cycle_last) & clock->mask; | ||
105 | |||
106 | /* convert to nanoseconds: */ | ||
107 | nsecs = cyc2ns(clock, cycle_delta); | ||
100 | 108 | ||
101 | } while (read_seqretry(&xtime_lock, seq)); | 109 | } while (read_seqretry(&xtime_lock, seq)); |
102 | 110 | ||
@@ -129,22 +137,22 @@ EXPORT_SYMBOL(do_gettimeofday); | |||
129 | */ | 137 | */ |
130 | int do_settimeofday(struct timespec *tv) | 138 | int do_settimeofday(struct timespec *tv) |
131 | { | 139 | { |
140 | struct timespec ts_delta; | ||
132 | unsigned long flags; | 141 | unsigned long flags; |
133 | time_t wtm_sec, sec = tv->tv_sec; | ||
134 | long wtm_nsec, nsec = tv->tv_nsec; | ||
135 | 142 | ||
136 | if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC) | 143 | if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC) |
137 | return -EINVAL; | 144 | return -EINVAL; |
138 | 145 | ||
139 | write_seqlock_irqsave(&xtime_lock, flags); | 146 | write_seqlock_irqsave(&xtime_lock, flags); |
140 | 147 | ||
141 | nsec -= __get_nsec_offset(); | 148 | clocksource_forward_now(); |
149 | |||
150 | ts_delta.tv_sec = tv->tv_sec - xtime.tv_sec; | ||
151 | ts_delta.tv_nsec = tv->tv_nsec - xtime.tv_nsec; | ||
152 | wall_to_monotonic = timespec_sub(wall_to_monotonic, ts_delta); | ||
142 | 153 | ||
143 | wtm_sec = wall_to_monotonic.tv_sec + (xtime.tv_sec - sec); | 154 | xtime = *tv; |
144 | wtm_nsec = wall_to_monotonic.tv_nsec + (xtime.tv_nsec - nsec); | ||
145 | 155 | ||
146 | set_normalized_timespec(&xtime, sec, nsec); | ||
147 | set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec); | ||
148 | update_xtime_cache(0); | 156 | update_xtime_cache(0); |
149 | 157 | ||
150 | clock->error = 0; | 158 | clock->error = 0; |
@@ -170,22 +178,19 @@ EXPORT_SYMBOL(do_settimeofday); | |||
170 | static void change_clocksource(void) | 178 | static void change_clocksource(void) |
171 | { | 179 | { |
172 | struct clocksource *new; | 180 | struct clocksource *new; |
173 | cycle_t now; | ||
174 | u64 nsec; | ||
175 | 181 | ||
176 | new = clocksource_get_next(); | 182 | new = clocksource_get_next(); |
177 | 183 | ||
178 | if (clock == new) | 184 | if (clock == new) |
179 | return; | 185 | return; |
180 | 186 | ||
181 | new->cycle_last = 0; | 187 | clocksource_forward_now(); |
182 | now = clocksource_read(new); | ||
183 | nsec = __get_nsec_offset(); | ||
184 | timespec_add_ns(&xtime, nsec); | ||
185 | 188 | ||
186 | clock = new; | 189 | new->raw_time = clock->raw_time; |
187 | clock->cycle_last = now; | ||
188 | 190 | ||
191 | clock = new; | ||
192 | clock->cycle_last = 0; | ||
193 | clock->cycle_last = clocksource_read(new); | ||
189 | clock->error = 0; | 194 | clock->error = 0; |
190 | clock->xtime_nsec = 0; | 195 | clock->xtime_nsec = 0; |
191 | clocksource_calculate_interval(clock, NTP_INTERVAL_LENGTH); | 196 | clocksource_calculate_interval(clock, NTP_INTERVAL_LENGTH); |
@@ -200,11 +205,44 @@ static void change_clocksource(void) | |||
200 | */ | 205 | */ |
201 | } | 206 | } |
202 | #else | 207 | #else |
208 | static inline void clocksource_forward_now(void) { } | ||
203 | static inline void change_clocksource(void) { } | 209 | static inline void change_clocksource(void) { } |
204 | static inline s64 __get_nsec_offset(void) { return 0; } | ||
205 | #endif | 210 | #endif |
206 | 211 | ||
207 | /** | 212 | /** |
213 | * getrawmonotonic - Returns the raw monotonic time in a timespec | ||
214 | * @ts: pointer to the timespec to be set | ||
215 | * | ||
216 | * Returns the raw monotonic time (completely un-modified by ntp) | ||
217 | */ | ||
218 | void getrawmonotonic(struct timespec *ts) | ||
219 | { | ||
220 | unsigned long seq; | ||
221 | s64 nsecs; | ||
222 | cycle_t cycle_now, cycle_delta; | ||
223 | |||
224 | do { | ||
225 | seq = read_seqbegin(&xtime_lock); | ||
226 | |||
227 | /* read clocksource: */ | ||
228 | cycle_now = clocksource_read(clock); | ||
229 | |||
230 | /* calculate the delta since the last update_wall_time: */ | ||
231 | cycle_delta = (cycle_now - clock->cycle_last) & clock->mask; | ||
232 | |||
233 | /* convert to nanoseconds: */ | ||
234 | nsecs = ((s64)cycle_delta * clock->mult_orig) >> clock->shift; | ||
235 | |||
236 | *ts = clock->raw_time; | ||
237 | |||
238 | } while (read_seqretry(&xtime_lock, seq)); | ||
239 | |||
240 | timespec_add_ns(ts, nsecs); | ||
241 | } | ||
242 | EXPORT_SYMBOL(getrawmonotonic); | ||
243 | |||
244 | |||
245 | /** | ||
208 | * timekeeping_valid_for_hres - Check if timekeeping is suitable for hres | 246 | * timekeeping_valid_for_hres - Check if timekeeping is suitable for hres |
209 | */ | 247 | */ |
210 | int timekeeping_valid_for_hres(void) | 248 | int timekeeping_valid_for_hres(void) |
@@ -265,8 +303,6 @@ void __init timekeeping_init(void) | |||
265 | static int timekeeping_suspended; | 303 | static int timekeeping_suspended; |
266 | /* time in seconds when suspend began */ | 304 | /* time in seconds when suspend began */ |
267 | static unsigned long timekeeping_suspend_time; | 305 | static unsigned long timekeeping_suspend_time; |
268 | /* xtime offset when we went into suspend */ | ||
269 | static s64 timekeeping_suspend_nsecs; | ||
270 | 306 | ||
271 | /** | 307 | /** |
272 | * timekeeping_resume - Resumes the generic timekeeping subsystem. | 308 | * timekeeping_resume - Resumes the generic timekeeping subsystem. |
@@ -292,8 +328,6 @@ static int timekeeping_resume(struct sys_device *dev) | |||
292 | wall_to_monotonic.tv_sec -= sleep_length; | 328 | wall_to_monotonic.tv_sec -= sleep_length; |
293 | total_sleep_time += sleep_length; | 329 | total_sleep_time += sleep_length; |
294 | } | 330 | } |
295 | /* Make sure that we have the correct xtime reference */ | ||
296 | timespec_add_ns(&xtime, timekeeping_suspend_nsecs); | ||
297 | update_xtime_cache(0); | 331 | update_xtime_cache(0); |
298 | /* re-base the last cycle value */ | 332 | /* re-base the last cycle value */ |
299 | clock->cycle_last = 0; | 333 | clock->cycle_last = 0; |
@@ -319,8 +353,7 @@ static int timekeeping_suspend(struct sys_device *dev, pm_message_t state) | |||
319 | timekeeping_suspend_time = read_persistent_clock(); | 353 | timekeeping_suspend_time = read_persistent_clock(); |
320 | 354 | ||
321 | write_seqlock_irqsave(&xtime_lock, flags); | 355 | write_seqlock_irqsave(&xtime_lock, flags); |
322 | /* Get the current xtime offset */ | 356 | clocksource_forward_now(); |
323 | timekeeping_suspend_nsecs = __get_nsec_offset(); | ||
324 | timekeeping_suspended = 1; | 357 | timekeeping_suspended = 1; |
325 | write_sequnlock_irqrestore(&xtime_lock, flags); | 358 | write_sequnlock_irqrestore(&xtime_lock, flags); |
326 | 359 | ||
@@ -454,23 +487,29 @@ void update_wall_time(void) | |||
454 | #else | 487 | #else |
455 | offset = clock->cycle_interval; | 488 | offset = clock->cycle_interval; |
456 | #endif | 489 | #endif |
457 | clock->xtime_nsec += (s64)xtime.tv_nsec << clock->shift; | 490 | clock->xtime_nsec = (s64)xtime.tv_nsec << clock->shift; |
458 | 491 | ||
459 | /* normally this loop will run just once, however in the | 492 | /* normally this loop will run just once, however in the |
460 | * case of lost or late ticks, it will accumulate correctly. | 493 | * case of lost or late ticks, it will accumulate correctly. |
461 | */ | 494 | */ |
462 | while (offset >= clock->cycle_interval) { | 495 | while (offset >= clock->cycle_interval) { |
463 | /* accumulate one interval */ | 496 | /* accumulate one interval */ |
464 | clock->xtime_nsec += clock->xtime_interval; | ||
465 | clock->cycle_last += clock->cycle_interval; | ||
466 | offset -= clock->cycle_interval; | 497 | offset -= clock->cycle_interval; |
498 | clock->cycle_last += clock->cycle_interval; | ||
467 | 499 | ||
500 | clock->xtime_nsec += clock->xtime_interval; | ||
468 | if (clock->xtime_nsec >= (u64)NSEC_PER_SEC << clock->shift) { | 501 | if (clock->xtime_nsec >= (u64)NSEC_PER_SEC << clock->shift) { |
469 | clock->xtime_nsec -= (u64)NSEC_PER_SEC << clock->shift; | 502 | clock->xtime_nsec -= (u64)NSEC_PER_SEC << clock->shift; |
470 | xtime.tv_sec++; | 503 | xtime.tv_sec++; |
471 | second_overflow(); | 504 | second_overflow(); |
472 | } | 505 | } |
473 | 506 | ||
507 | clock->raw_time.tv_nsec += clock->raw_interval; | ||
508 | if (clock->raw_time.tv_nsec >= NSEC_PER_SEC) { | ||
509 | clock->raw_time.tv_nsec -= NSEC_PER_SEC; | ||
510 | clock->raw_time.tv_sec++; | ||
511 | } | ||
512 | |||
474 | /* accumulate error between NTP and clock interval */ | 513 | /* accumulate error between NTP and clock interval */ |
475 | clock->error += tick_length; | 514 | clock->error += tick_length; |
476 | clock->error -= clock->xtime_interval << (NTP_SCALE_SHIFT - clock->shift); | 515 | clock->error -= clock->xtime_interval << (NTP_SCALE_SHIFT - clock->shift); |
@@ -479,9 +518,12 @@ void update_wall_time(void) | |||
479 | /* correct the clock when NTP error is too big */ | 518 | /* correct the clock when NTP error is too big */ |
480 | clocksource_adjust(offset); | 519 | clocksource_adjust(offset); |
481 | 520 | ||
482 | /* store full nanoseconds into xtime */ | 521 | /* store full nanoseconds into xtime after rounding it up and |
483 | xtime.tv_nsec = (s64)clock->xtime_nsec >> clock->shift; | 522 | * add the remainder to the error difference. |
523 | */ | ||
524 | xtime.tv_nsec = ((s64)clock->xtime_nsec >> clock->shift) + 1; | ||
484 | clock->xtime_nsec -= (s64)xtime.tv_nsec << clock->shift; | 525 | clock->xtime_nsec -= (s64)xtime.tv_nsec << clock->shift; |
526 | clock->error += clock->xtime_nsec << (NTP_SCALE_SHIFT - clock->shift); | ||
485 | 527 | ||
486 | update_xtime_cache(cyc2ns(clock, offset)); | 528 | update_xtime_cache(cyc2ns(clock, offset)); |
487 | 529 | ||
diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c index a40e20fd0001..f6426911e35a 100644 --- a/kernel/time/timer_list.c +++ b/kernel/time/timer_list.c | |||
@@ -47,13 +47,14 @@ static void print_name_offset(struct seq_file *m, void *sym) | |||
47 | } | 47 | } |
48 | 48 | ||
49 | static void | 49 | static void |
50 | print_timer(struct seq_file *m, struct hrtimer *timer, int idx, u64 now) | 50 | print_timer(struct seq_file *m, struct hrtimer *taddr, struct hrtimer *timer, |
51 | int idx, u64 now) | ||
51 | { | 52 | { |
52 | #ifdef CONFIG_TIMER_STATS | 53 | #ifdef CONFIG_TIMER_STATS |
53 | char tmp[TASK_COMM_LEN + 1]; | 54 | char tmp[TASK_COMM_LEN + 1]; |
54 | #endif | 55 | #endif |
55 | SEQ_printf(m, " #%d: ", idx); | 56 | SEQ_printf(m, " #%d: ", idx); |
56 | print_name_offset(m, timer); | 57 | print_name_offset(m, taddr); |
57 | SEQ_printf(m, ", "); | 58 | SEQ_printf(m, ", "); |
58 | print_name_offset(m, timer->function); | 59 | print_name_offset(m, timer->function); |
59 | SEQ_printf(m, ", S:%02lx", timer->state); | 60 | SEQ_printf(m, ", S:%02lx", timer->state); |
@@ -99,7 +100,7 @@ next_one: | |||
99 | tmp = *timer; | 100 | tmp = *timer; |
100 | spin_unlock_irqrestore(&base->cpu_base->lock, flags); | 101 | spin_unlock_irqrestore(&base->cpu_base->lock, flags); |
101 | 102 | ||
102 | print_timer(m, &tmp, i, now); | 103 | print_timer(m, timer, &tmp, i, now); |
103 | next++; | 104 | next++; |
104 | goto next_one; | 105 | goto next_one; |
105 | } | 106 | } |
@@ -109,6 +110,7 @@ next_one: | |||
109 | static void | 110 | static void |
110 | print_base(struct seq_file *m, struct hrtimer_clock_base *base, u64 now) | 111 | print_base(struct seq_file *m, struct hrtimer_clock_base *base, u64 now) |
111 | { | 112 | { |
113 | SEQ_printf(m, " .base: %p\n", base); | ||
112 | SEQ_printf(m, " .index: %d\n", | 114 | SEQ_printf(m, " .index: %d\n", |
113 | base->index); | 115 | base->index); |
114 | SEQ_printf(m, " .resolution: %Lu nsecs\n", | 116 | SEQ_printf(m, " .resolution: %Lu nsecs\n", |
@@ -183,12 +185,16 @@ static void print_cpu(struct seq_file *m, int cpu, u64 now) | |||
183 | 185 | ||
184 | #ifdef CONFIG_GENERIC_CLOCKEVENTS | 186 | #ifdef CONFIG_GENERIC_CLOCKEVENTS |
185 | static void | 187 | static void |
186 | print_tickdevice(struct seq_file *m, struct tick_device *td) | 188 | print_tickdevice(struct seq_file *m, struct tick_device *td, int cpu) |
187 | { | 189 | { |
188 | struct clock_event_device *dev = td->evtdev; | 190 | struct clock_event_device *dev = td->evtdev; |
189 | 191 | ||
190 | SEQ_printf(m, "\n"); | 192 | SEQ_printf(m, "\n"); |
191 | SEQ_printf(m, "Tick Device: mode: %d\n", td->mode); | 193 | SEQ_printf(m, "Tick Device: mode: %d\n", td->mode); |
194 | if (cpu < 0) | ||
195 | SEQ_printf(m, "Broadcast device\n"); | ||
196 | else | ||
197 | SEQ_printf(m, "Per CPU device: %d\n", cpu); | ||
192 | 198 | ||
193 | SEQ_printf(m, "Clock Event Device: "); | 199 | SEQ_printf(m, "Clock Event Device: "); |
194 | if (!dev) { | 200 | if (!dev) { |
@@ -222,7 +228,7 @@ static void timer_list_show_tickdevices(struct seq_file *m) | |||
222 | int cpu; | 228 | int cpu; |
223 | 229 | ||
224 | #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST | 230 | #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST |
225 | print_tickdevice(m, tick_get_broadcast_device()); | 231 | print_tickdevice(m, tick_get_broadcast_device(), -1); |
226 | SEQ_printf(m, "tick_broadcast_mask: %08lx\n", | 232 | SEQ_printf(m, "tick_broadcast_mask: %08lx\n", |
227 | tick_get_broadcast_mask()->bits[0]); | 233 | tick_get_broadcast_mask()->bits[0]); |
228 | #ifdef CONFIG_TICK_ONESHOT | 234 | #ifdef CONFIG_TICK_ONESHOT |
@@ -232,7 +238,7 @@ static void timer_list_show_tickdevices(struct seq_file *m) | |||
232 | SEQ_printf(m, "\n"); | 238 | SEQ_printf(m, "\n"); |
233 | #endif | 239 | #endif |
234 | for_each_online_cpu(cpu) | 240 | for_each_online_cpu(cpu) |
235 | print_tickdevice(m, tick_get_device(cpu)); | 241 | print_tickdevice(m, tick_get_device(cpu), cpu); |
236 | SEQ_printf(m, "\n"); | 242 | SEQ_printf(m, "\n"); |
237 | } | 243 | } |
238 | #else | 244 | #else |
@@ -244,7 +250,7 @@ static int timer_list_show(struct seq_file *m, void *v) | |||
244 | u64 now = ktime_to_ns(ktime_get()); | 250 | u64 now = ktime_to_ns(ktime_get()); |
245 | int cpu; | 251 | int cpu; |
246 | 252 | ||
247 | SEQ_printf(m, "Timer List Version: v0.3\n"); | 253 | SEQ_printf(m, "Timer List Version: v0.4\n"); |
248 | SEQ_printf(m, "HRTIMER_MAX_CLOCK_BASES: %d\n", HRTIMER_MAX_CLOCK_BASES); | 254 | SEQ_printf(m, "HRTIMER_MAX_CLOCK_BASES: %d\n", HRTIMER_MAX_CLOCK_BASES); |
249 | SEQ_printf(m, "now at %Ld nsecs\n", (unsigned long long)now); | 255 | SEQ_printf(m, "now at %Ld nsecs\n", (unsigned long long)now); |
250 | 256 | ||
diff --git a/kernel/timer.c b/kernel/timer.c index 510fe69351ca..56becf373c58 100644 --- a/kernel/timer.c +++ b/kernel/timer.c | |||
@@ -1436,9 +1436,11 @@ static void __cpuinit migrate_timers(int cpu) | |||
1436 | BUG_ON(cpu_online(cpu)); | 1436 | BUG_ON(cpu_online(cpu)); |
1437 | old_base = per_cpu(tvec_bases, cpu); | 1437 | old_base = per_cpu(tvec_bases, cpu); |
1438 | new_base = get_cpu_var(tvec_bases); | 1438 | new_base = get_cpu_var(tvec_bases); |
1439 | 1439 | /* | |
1440 | local_irq_disable(); | 1440 | * The caller is globally serialized and nobody else |
1441 | spin_lock(&new_base->lock); | 1441 | * takes two locks at once, deadlock is not possible. |
1442 | */ | ||
1443 | spin_lock_irq(&new_base->lock); | ||
1442 | spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING); | 1444 | spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING); |
1443 | 1445 | ||
1444 | BUG_ON(old_base->running_timer); | 1446 | BUG_ON(old_base->running_timer); |
@@ -1453,8 +1455,7 @@ static void __cpuinit migrate_timers(int cpu) | |||
1453 | } | 1455 | } |
1454 | 1456 | ||
1455 | spin_unlock(&old_base->lock); | 1457 | spin_unlock(&old_base->lock); |
1456 | spin_unlock(&new_base->lock); | 1458 | spin_unlock_irq(&new_base->lock); |
1457 | local_irq_enable(); | ||
1458 | put_cpu_var(tvec_bases); | 1459 | put_cpu_var(tvec_bases); |
1459 | } | 1460 | } |
1460 | #endif /* CONFIG_HOTPLUG_CPU */ | 1461 | #endif /* CONFIG_HOTPLUG_CPU */ |