From 0e241ffd306c0896bb9959be7faa4d4cfcb706d9 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 24 Jul 2008 16:58:42 -0700 Subject: locking: fix mutex @key parameter kernel-doc notation Fix @key parameter to mutex_init() and one of its callers. Warning(linux-2.6.26-git11//drivers/base/class.c:210): No description found for parameter 'key' Signed-off-by: Randy Dunlap Acked-by: Greg Kroah-Hartman Signed-off-by: Ingo Molnar --- kernel/mutex.c | 1 + 1 file changed, 1 insertion(+) (limited to 'kernel') diff --git a/kernel/mutex.c b/kernel/mutex.c index bcdc9ac8ef60..12c779dc65d4 100644 --- a/kernel/mutex.c +++ b/kernel/mutex.c @@ -34,6 +34,7 @@ /*** * mutex_init - initialize the mutex * @lock: the mutex to be initialized + * @key: the lock_class_key for the class; used by mutex lock debugging * * Initialize the mutex to unlocked state. * -- cgit v1.2.2 From 1a4e564b7db999fbe5d88318c96ac8747699d417 Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Tue, 29 Jul 2008 22:32:57 -0700 Subject: resource: add resource_size() Avoid one-off errors by introducing a resource_size() function. Signed-off-by: Magnus Damm Cc: Ben Dooks Cc: Jean Delvare Cc: Paul Mundt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/resource.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/resource.c b/kernel/resource.c index 74af2d7cb5a1..f5b518eabefe 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -490,7 +490,7 @@ resource_size_t resource_alignment(struct resource *res) { switch (res->flags & (IORESOURCE_SIZEALIGN | IORESOURCE_STARTALIGN)) { case IORESOURCE_SIZEALIGN: - return res->end - res->start + 1; + return resource_size(res); case IORESOURCE_STARTALIGN: return res->start; default: -- cgit v1.2.2 From 5a3eb9f6b7c598529f832b8baa6458ab1cbab2c6 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Tue, 29 Jul 2008 22:33:18 -0700 Subject: cgroup: fix possible memory leak There's a leak if copy_from_user() returns failure. Signed-off-by: Li Zefan Cc: Paul Menage Cc: Cedric Le Goater Cc: Balbir Singh Cc: KAMEZAWA Hiroyuki Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/cgroup.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 657f8f8d93a5..28debe4e1488 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -1424,14 +1424,17 @@ static ssize_t cgroup_write_string(struct cgroup *cgrp, struct cftype *cft, if (buffer == NULL) return -ENOMEM; } - if (nbytes && copy_from_user(buffer, userbuf, nbytes)) - return -EFAULT; + if (nbytes && copy_from_user(buffer, userbuf, nbytes)) { + retval = -EFAULT; + goto out; + } buffer[nbytes] = 0; /* nul-terminate */ strstrip(buffer); retval = cft->write_string(cgrp, cft, buffer); if (!retval) retval = nbytes; +out: if (buffer != local_buffer) kfree(buffer); return retval; -- cgit v1.2.2 From 36553434f475a84b653e25e74490ee8df43b86d5 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Tue, 29 Jul 2008 22:33:19 -0700 Subject: cgroup: remove duplicate code in allocate_cg_link() - just call free_cg_links() in allocate_cg_links() - the list will get initialized in allocate_cg_links(), so don't init it twice Signed-off-by: Li Zefan Cc: Paul Menage Cc: Cedric Le Goater Cc: Balbir Singh Cc: KAMEZAWA Hiroyuki Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/cgroup.c | 30 ++++++++++++------------------ 1 file changed, 12 insertions(+), 18 deletions(-) (limited to 'kernel') diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 28debe4e1488..249a517591b8 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -355,6 +355,17 @@ static struct css_set *find_existing_css_set( return NULL; } +static void free_cg_links(struct list_head *tmp) +{ + struct cg_cgroup_link *link; + struct cg_cgroup_link *saved_link; + + list_for_each_entry_safe(link, saved_link, tmp, cgrp_link_list) { + list_del(&link->cgrp_link_list); + kfree(link); + } +} + /* * allocate_cg_links() allocates "count" cg_cgroup_link structures * and chains them on tmp through their cgrp_link_list fields. Returns 0 on @@ -363,17 +374,12 @@ static struct css_set *find_existing_css_set( static int allocate_cg_links(int count, struct list_head *tmp) { struct cg_cgroup_link *link; - struct cg_cgroup_link *saved_link; int i; INIT_LIST_HEAD(tmp); for (i = 0; i < count; i++) { link = kmalloc(sizeof(*link), GFP_KERNEL); if (!link) { - list_for_each_entry_safe(link, saved_link, tmp, - cgrp_link_list) { - list_del(&link->cgrp_link_list); - kfree(link); - } + free_cg_links(tmp); return -ENOMEM; } list_add(&link->cgrp_link_list, tmp); @@ -381,17 +387,6 @@ static int allocate_cg_links(int count, struct list_head *tmp) return 0; } -static void free_cg_links(struct list_head *tmp) -{ - struct cg_cgroup_link *link; - struct cg_cgroup_link *saved_link; - - list_for_each_entry_safe(link, saved_link, tmp, cgrp_link_list) { - list_del(&link->cgrp_link_list); - kfree(link); - } -} - /* * find_css_set() takes an existing cgroup group and a * cgroup object, and returns a css_set object that's @@ -956,7 +951,6 @@ static int cgroup_get_sb(struct file_system_type *fs_type, struct super_block *sb; struct cgroupfs_root *root; struct list_head tmp_cg_links; - INIT_LIST_HEAD(&tmp_cg_links); /* First find the desired set of subsystems */ ret = parse_cgroupfs_options(data, &opts); -- cgit v1.2.2 From 55b6fd0162ace1e0f1b52c8c092565c115127ef6 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Tue, 29 Jul 2008 22:33:20 -0700 Subject: cgroup: uninline cgroup_has_css_refs() It's not small enough, and has 2 call sites. text data bss dec hex filename 12813 1676 4832 19321 4b79 cgroup.o.orig 12775 1676 4832 19283 4b53 cgroup.o Signed-off-by: Li Zefan Cc: Paul Menage Cc: Cedric Le Goater Cc: Balbir Singh Cc: KAMEZAWA Hiroyuki Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/cgroup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 249a517591b8..13932abde159 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -2368,7 +2368,7 @@ static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, int mode) return cgroup_create(c_parent, dentry, mode | S_IFDIR); } -static inline int cgroup_has_css_refs(struct cgroup *cgrp) +static int cgroup_has_css_refs(struct cgroup *cgrp) { /* Check the reference count on each subsystem. Since we * already established that there are no tasks in the -- cgit v1.2.2 From 8d1e6266f512b3a94ef6d33528ff385f1aea0392 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Tue, 29 Jul 2008 22:33:21 -0700 Subject: cpuset: a bit cleanup for scan_for_empty_cpusets() clean up hierarchy traversal code Signed-off-by: Li Zefan Cc: Paul Menage Cc: Cedric Le Goater Cc: Balbir Singh Cc: KAMEZAWA Hiroyuki Cc: Paul Jackson Cc: Cliff Wickman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/cpuset.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'kernel') diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 91cf85b36dd5..3624dc0e95ed 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -1833,24 +1833,21 @@ static void remove_tasks_in_empty_cpuset(struct cpuset *cs) */ static void scan_for_empty_cpusets(const struct cpuset *root) { + LIST_HEAD(queue); struct cpuset *cp; /* scans cpusets being updated */ struct cpuset *child; /* scans child cpusets of cp */ - struct list_head queue; struct cgroup *cont; nodemask_t oldmems; - INIT_LIST_HEAD(&queue); - list_add_tail((struct list_head *)&root->stack_list, &queue); while (!list_empty(&queue)) { - cp = container_of(queue.next, struct cpuset, stack_list); + cp = list_first_entry(&queue, struct cpuset, stack_list); list_del(queue.next); list_for_each_entry(cont, &cp->css.cgroup->children, sibling) { child = cgroup_cs(cont); list_add_tail(&child->stack_list, &queue); } - cont = cp->css.cgroup; /* Continue past cpusets with all cpus, mems online */ if (cpus_subset(cp->cpus_allowed, cpu_online_map) && -- cgit v1.2.2 From f5393693e96393131a4a2e2743f883986d508503 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Tue, 29 Jul 2008 22:33:22 -0700 Subject: cpuset: speed up sched domain partition All child cpusets contain a subset of the parent's cpus, so we can skip them when partitioning sched domains. This decreases 'csa' greately for cpusets with multi-level hierarchy. Signed-off-by: Lai Jiangshan Signed-off-by: Li Zefan Cc: Paul Menage Cc: Cedric Le Goater Cc: Balbir Singh Cc: KAMEZAWA Hiroyuki Reviewed-by: Paul Jackson Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/cpuset.c | 41 +++++++++++++++++++++++++++++++++++++---- 1 file changed, 37 insertions(+), 4 deletions(-) (limited to 'kernel') diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 3624dc0e95ed..c818c36b0c5f 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -486,13 +486,38 @@ static int cpusets_overlap(struct cpuset *a, struct cpuset *b) static void update_domain_attr(struct sched_domain_attr *dattr, struct cpuset *c) { - if (!dattr) - return; if (dattr->relax_domain_level < c->relax_domain_level) dattr->relax_domain_level = c->relax_domain_level; return; } +static void +update_domain_attr_tree(struct sched_domain_attr *dattr, struct cpuset *c) +{ + LIST_HEAD(q); + + list_add(&c->stack_list, &q); + while (!list_empty(&q)) { + struct cpuset *cp; + struct cgroup *cont; + struct cpuset *child; + + cp = list_first_entry(&q, struct cpuset, stack_list); + list_del(q.next); + + if (cpus_empty(cp->cpus_allowed)) + continue; + + if (is_sched_load_balance(cp)) + update_domain_attr(dattr, cp); + + list_for_each_entry(cont, &cp->css.cgroup->children, sibling) { + child = cgroup_cs(cont); + list_add_tail(&child->stack_list, &q); + } + } +} + /* * rebuild_sched_domains() * @@ -614,8 +639,16 @@ void rebuild_sched_domains(void) if (cpus_empty(cp->cpus_allowed)) continue; - if (is_sched_load_balance(cp)) + /* + * All child cpusets contain a subset of the parent's cpus, so + * just skip them, and then we call update_domain_attr_tree() + * to calc relax_domain_level of the corresponding sched + * domain. + */ + if (is_sched_load_balance(cp)) { csa[csn++] = cp; + continue; + } list_for_each_entry(cont, &cp->css.cgroup->children, sibling) { child = cgroup_cs(cont); @@ -686,7 +719,7 @@ restart: cpus_or(*dp, *dp, b->cpus_allowed); b->pn = -1; if (dattr) - update_domain_attr(dattr + update_domain_attr_tree(dattr + nslot, b); } } -- cgit v1.2.2 From 93a6557558a13f9ff35213efeca483f353c39dd3 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Tue, 29 Jul 2008 22:33:23 -0700 Subject: cpuset: fix wrong calculation of relax domain level When multiple cpusets are overlapping in their 'cpus' and hence they form a single sched domain, the largest sched_relax_domain_level among those should be used. But when top_cpuset's sched_load_balance is set, its sched_relax_domain_level is used regardless other sub-cpusets'. This patch fixes it by walking the cpuset hierarchy to find the largest sched_relax_domain_level. Signed-off-by: Lai Jiangshan Signed-off-by: Li Zefan Cc: Paul Menage Cc: Cedric Le Goater Cc: Balbir Singh Cc: KAMEZAWA Hiroyuki Reviewed-by: Paul Jackson Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/cpuset.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/cpuset.c b/kernel/cpuset.c index c818c36b0c5f..7a82e9033a7f 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -616,7 +616,7 @@ void rebuild_sched_domains(void) dattr = kmalloc(sizeof(struct sched_domain_attr), GFP_KERNEL); if (dattr) { *dattr = SD_ATTR_INIT; - update_domain_attr(dattr, &top_cpuset); + update_domain_attr_tree(dattr, &top_cpuset); } *doms = top_cpuset.cpus_allowed; goto rebuild; -- cgit v1.2.2 From aeed682421a5ebfbf46940e30c3d1caf3bc64304 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Tue, 29 Jul 2008 22:33:24 -0700 Subject: cpuset: clean up cpuset hierarchy traversal code Use cpuset.stack_list rather than kfifo, so we avoid memory allocation for kfifo. Signed-off-by: Li Zefan Signed-off-by: Lai Jiangshan Cc: Paul Menage Cc: Cedric Le Goater Cc: Balbir Singh Cc: KAMEZAWA Hiroyuki Cc: Paul Jackson Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/cpuset.c | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) (limited to 'kernel') diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 7a82e9033a7f..d5ab79cf516d 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -54,7 +54,6 @@ #include #include #include -#include #include #include @@ -557,7 +556,7 @@ update_domain_attr_tree(struct sched_domain_attr *dattr, struct cpuset *c) * So the reverse nesting would risk an ABBA deadlock. * * The three key local variables below are: - * q - a kfifo queue of cpuset pointers, used to implement a + * q - a linked-list queue of cpuset pointers, used to implement a * top-down scan of all cpusets. This scan loads a pointer * to each cpuset marked is_sched_load_balance into the * array 'csa'. For our purposes, rebuilding the schedulers @@ -592,7 +591,7 @@ update_domain_attr_tree(struct sched_domain_attr *dattr, struct cpuset *c) void rebuild_sched_domains(void) { - struct kfifo *q; /* queue of cpusets to be scanned */ + LIST_HEAD(q); /* queue of cpusets to be scanned*/ struct cpuset *cp; /* scans q */ struct cpuset **csa; /* array of all cpuset ptrs */ int csn; /* how many cpuset ptrs in csa so far */ @@ -602,7 +601,6 @@ void rebuild_sched_domains(void) int ndoms; /* number of sched domains in result */ int nslot; /* next empty doms[] cpumask_t slot */ - q = NULL; csa = NULL; doms = NULL; dattr = NULL; @@ -622,20 +620,19 @@ void rebuild_sched_domains(void) goto rebuild; } - q = kfifo_alloc(number_of_cpusets * sizeof(cp), GFP_KERNEL, NULL); - if (IS_ERR(q)) - goto done; csa = kmalloc(number_of_cpusets * sizeof(cp), GFP_KERNEL); if (!csa) goto done; csn = 0; - cp = &top_cpuset; - __kfifo_put(q, (void *)&cp, sizeof(cp)); - while (__kfifo_get(q, (void *)&cp, sizeof(cp))) { + list_add(&top_cpuset.stack_list, &q); + while (!list_empty(&q)) { struct cgroup *cont; struct cpuset *child; /* scans child cpusets of cp */ + cp = list_first_entry(&q, struct cpuset, stack_list); + list_del(q.next); + if (cpus_empty(cp->cpus_allowed)) continue; @@ -652,7 +649,7 @@ void rebuild_sched_domains(void) list_for_each_entry(cont, &cp->css.cgroup->children, sibling) { child = cgroup_cs(cont); - __kfifo_put(q, (void *)&child, sizeof(cp)); + list_add_tail(&child->stack_list, &q); } } @@ -735,8 +732,6 @@ rebuild: put_online_cpus(); done: - if (q && !IS_ERR(q)) - kfifo_free(q); kfree(csa); /* Don't kfree(doms) -- partition_sched_domains() does that. */ /* Don't kfree(dattr) -- partition_sched_domains() does that. */ -- cgit v1.2.2 From 5def9a3a22e09c99717f41ab7f07ec9e1a1f3ec8 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Tue, 29 Jul 2008 22:33:31 -0700 Subject: markers: fix markers read barrier for multiple probes Paul pointed out two incorrect read barriers in the marker handler code in the path where multiple probes are connected. Those are ordering reads of "ptype" (single or multi probe marker), "multi" array pointer, and "multi" array data access. It should be ordered like this : read ptype smp_rmb() read multi array pointer smp_read_barrier_depends() access data referenced by multi array pointer The code with a single probe connected (optimized case, does not have to allocate an array) has correct memory ordering. It applies to kernel 2.6.26.x, 2.6.25.x and linux-next. Signed-off-by: Mathieu Desnoyers Cc: "Paul E. McKenney" Cc: [2.6.25.x, 2.6.26.x] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/marker.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/marker.c b/kernel/marker.c index 971da5317903..7d1faecd7a51 100644 --- a/kernel/marker.c +++ b/kernel/marker.c @@ -125,6 +125,11 @@ void marker_probe_cb(const struct marker *mdata, void *call_private, ...) } else { struct marker_probe_closure *multi; int i; + /* + * Read mdata->ptype before mdata->multi. + */ + smp_rmb(); + multi = mdata->multi; /* * multi points to an array, therefore accessing the array * depends on reading multi. However, even in this case, @@ -133,7 +138,6 @@ void marker_probe_cb(const struct marker *mdata, void *call_private, ...) * in the fast path, so put the explicit barrier here. */ smp_read_barrier_depends(); - multi = mdata->multi; for (i = 0; multi[i].func; i++) { va_start(args, call_private); multi[i].func(multi[i].probe_private, call_private, @@ -174,6 +178,11 @@ void marker_probe_cb_noarg(const struct marker *mdata, void *call_private, ...) } else { struct marker_probe_closure *multi; int i; + /* + * Read mdata->ptype before mdata->multi. + */ + smp_rmb(); + multi = mdata->multi; /* * multi points to an array, therefore accessing the array * depends on reading multi. However, even in this case, @@ -182,7 +191,6 @@ void marker_probe_cb_noarg(const struct marker *mdata, void *call_private, ...) * in the fast path, so put the explicit barrier here. */ smp_read_barrier_depends(); - multi = mdata->multi; for (i = 0; multi[i].func; i++) multi[i].func(multi[i].probe_private, call_private, mdata->format, &args); -- cgit v1.2.2 From 641de9d8f505db055d451b50e6e38117f84e79bb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Tue, 29 Jul 2008 22:33:38 -0700 Subject: printk: fix comment for printk ratelimiting MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The comment assumed the burst to be one and the ratelimit used to be named printk_ratelimit_jiffies. Signed-off-by: Uwe Kleine-König Cc: Dave Young Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/printk.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'kernel') diff --git a/kernel/printk.c b/kernel/printk.c index a7f7559c5f6c..b51b1567bb55 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -1309,14 +1309,14 @@ void tty_write_message(struct tty_struct *tty, char *msg) #if defined CONFIG_PRINTK -DEFINE_RATELIMIT_STATE(printk_ratelimit_state, 5 * HZ, 10); /* * printk rate limiting, lifted from the networking subsystem. * - * This enforces a rate limit: not more than one kernel message - * every printk_ratelimit_jiffies to make a denial-of-service - * attack impossible. + * This enforces a rate limit: not more than 10 kernel messages + * every 5s to make a denial-of-service attack impossible. */ +DEFINE_RATELIMIT_STATE(printk_ratelimit_state, 5 * HZ, 10); + int printk_ratelimit(void) { return __ratelimit(&printk_ratelimit_state); -- cgit v1.2.2 From 6af8bf3d86d55c98af6e453cb920ddc30867e5c7 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 29 Jul 2008 22:33:49 -0700 Subject: workqueues: add comments to __create_workqueue_key() Dmitry Adamushko pointed out that the error handling in __create_workqueue_key() is not clear, add the comment. Signed-off-by: Oleg Nesterov Cc: Dmitry Adamushko Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/workqueue.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/workqueue.c b/kernel/workqueue.c index ec7e4f62aaff..4a26a1382df0 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -830,10 +830,21 @@ struct workqueue_struct *__create_workqueue_key(const char *name, start_workqueue_thread(cwq, -1); } else { cpu_maps_update_begin(); + /* + * We must place this wq on list even if the code below fails. + * cpu_down(cpu) can remove cpu from cpu_populated_map before + * destroy_workqueue() takes the lock, in that case we leak + * cwq[cpu]->thread. + */ spin_lock(&workqueue_lock); list_add(&wq->list, &workqueues); spin_unlock(&workqueue_lock); - + /* + * We must initialize cwqs for each possible cpu even if we + * are going to call destroy_workqueue() finally. Otherwise + * cpu_up() can hit the uninitialized cwq once we drop the + * lock. + */ for_each_possible_cpu(cpu) { cwq = init_cpu_workqueue(wq, cpu); if (err || !cpu_online(cpu)) -- cgit v1.2.2 From f718cd4add5aea9d379faff92f162571e356cc5f Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Tue, 29 Jul 2008 22:33:52 -0700 Subject: sched: make scheduler sysfs attributes sysdev class devices They are really class devices, but were incorrectly declared. This leads to crashes with the recent changes that makes non normal sysdevs use a different prototype. [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: Andi Kleen Cc: Ingo Molnar Cc: Pierre Ossman Cc: Greg Kroah-Hartman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/sched.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'kernel') diff --git a/kernel/sched.c b/kernel/sched.c index 0236958addcb..21f7da94662e 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -7671,34 +7671,34 @@ static ssize_t sched_power_savings_store(const char *buf, size_t count, int smt) } #ifdef CONFIG_SCHED_MC -static ssize_t sched_mc_power_savings_show(struct sys_device *dev, - struct sysdev_attribute *attr, char *page) +static ssize_t sched_mc_power_savings_show(struct sysdev_class *class, + char *page) { return sprintf(page, "%u\n", sched_mc_power_savings); } -static ssize_t sched_mc_power_savings_store(struct sys_device *dev, - struct sysdev_attribute *attr, +static ssize_t sched_mc_power_savings_store(struct sysdev_class *class, const char *buf, size_t count) { return sched_power_savings_store(buf, count, 0); } -static SYSDEV_ATTR(sched_mc_power_savings, 0644, sched_mc_power_savings_show, - sched_mc_power_savings_store); +static SYSDEV_CLASS_ATTR(sched_mc_power_savings, 0644, + sched_mc_power_savings_show, + sched_mc_power_savings_store); #endif #ifdef CONFIG_SCHED_SMT -static ssize_t sched_smt_power_savings_show(struct sys_device *dev, - struct sysdev_attribute *attr, char *page) +static ssize_t sched_smt_power_savings_show(struct sysdev_class *dev, + char *page) { return sprintf(page, "%u\n", sched_smt_power_savings); } -static ssize_t sched_smt_power_savings_store(struct sys_device *dev, - struct sysdev_attribute *attr, +static ssize_t sched_smt_power_savings_store(struct sysdev_class *dev, const char *buf, size_t count) { return sched_power_savings_store(buf, count, 1); } -static SYSDEV_ATTR(sched_smt_power_savings, 0644, sched_smt_power_savings_show, +static SYSDEV_CLASS_ATTR(sched_smt_power_savings, 0644, + sched_smt_power_savings_show, sched_smt_power_savings_store); #endif -- cgit v1.2.2 From a9b60bf4c29e07a5a2f26a6f74937972fee9b58b Mon Sep 17 00:00:00 2001 From: Jason Wessel Date: Fri, 1 Aug 2008 08:39:34 -0500 Subject: kgdb: fix kgdb_validate_break_address to perform a mem write A regression to the kgdb core was found in the case of using the CONFIG_DEBUG_RODATA kernel option. When this option is on, a breakpoint cannot be written into any readonly memory page. When an external debugger requests a breakpoint to get set, the kgdb_validate_break_address() was only checking to see if the address to place the breakpoint was readable and lacked a write check. This patch changes the validate routine to try reading (via the breakpoint set request) and also to try immediately writing the break point. If either fails, an error is correctly returned and the debugger behaves correctly. Then an end user can make the descision to use hardware breakpoints. Also update the documentation to reflect that using CONFIG_DEBUG_RODATA will inhibit the use of software breakpoints. Signed-off-by: Jason Wessel --- kernel/kgdb.c | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) (limited to 'kernel') diff --git a/kernel/kgdb.c b/kernel/kgdb.c index 3ec23c3ec97f..c0d45b2c4d79 100644 --- a/kernel/kgdb.c +++ b/kernel/kgdb.c @@ -166,13 +166,6 @@ early_param("nokgdbroundup", opt_nokgdbroundup); * Weak aliases for breakpoint management, * can be overriden by architectures when needed: */ -int __weak kgdb_validate_break_address(unsigned long addr) -{ - char tmp_variable[BREAK_INSTR_SIZE]; - - return probe_kernel_read(tmp_variable, (char *)addr, BREAK_INSTR_SIZE); -} - int __weak kgdb_arch_set_breakpoint(unsigned long addr, char *saved_instr) { int err; @@ -191,6 +184,25 @@ int __weak kgdb_arch_remove_breakpoint(unsigned long addr, char *bundle) (char *)bundle, BREAK_INSTR_SIZE); } +int __weak kgdb_validate_break_address(unsigned long addr) +{ + char tmp_variable[BREAK_INSTR_SIZE]; + int err; + /* Validate setting the breakpoint and then removing it. In the + * remove fails, the kernel needs to emit a bad message because we + * are deep trouble not being able to put things back the way we + * found them. + */ + err = kgdb_arch_set_breakpoint(addr, tmp_variable); + if (err) + return err; + err = kgdb_arch_remove_breakpoint(addr, tmp_variable); + if (err) + printk(KERN_ERR "KGDB: Critical breakpoint error, kernel " + "memory destroyed at: %lx", addr); + return err; +} + unsigned long __weak kgdb_arch_pc(int exception, struct pt_regs *regs) { return instruction_pointer(regs); -- cgit v1.2.2 From 25fc999913839a45cbb48ac7872e67f7521e7ed9 Mon Sep 17 00:00:00 2001 From: Jason Wessel Date: Fri, 1 Aug 2008 08:39:35 -0500 Subject: kgdb: fix gdb serial thread queries The command "info threads" did not work correctly with kgdb. It would result in a silent kernel hang if used. This patach addresses several problems. - Fix use of deprecated NR_CPUS - Fix kgdb to not walk linearly through the pid space - Correctly implement shadow pids - Change the threads per query to a #define - Fix kgdb_hex2long to work with negated values The threads 0 and -1 are reserved to represent the current task. That means that CPU 0 will start with a shadow thread id of -2, and CPU 1 will have a shadow thread id of -3, etc... From the debugger you can switch to a shadow thread to see what one of the other cpus was doing, however it is not possible to execute run control operations on any other cpu execept the cpu executing the kgdb_handle_exception(). Signed-off-by: Jason Wessel --- kernel/kgdb.c | 68 +++++++++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 50 insertions(+), 18 deletions(-) (limited to 'kernel') diff --git a/kernel/kgdb.c b/kernel/kgdb.c index c0d45b2c4d79..eaa21fc9ad1d 100644 --- a/kernel/kgdb.c +++ b/kernel/kgdb.c @@ -56,12 +56,14 @@ static int kgdb_break_asap; +#define KGDB_MAX_THREAD_QUERY 17 struct kgdb_state { int ex_vector; int signo; int err_code; int cpu; int pass_exception; + unsigned long thr_query; unsigned long threadid; long kgdb_usethreadid; struct pt_regs *linux_regs; @@ -445,9 +447,14 @@ int kgdb_hex2long(char **ptr, unsigned long *long_val) { int hex_val; int num = 0; + int negate = 0; *long_val = 0; + if (**ptr == '-') { + negate = 1; + (*ptr)++; + } while (**ptr) { hex_val = hex(**ptr); if (hex_val < 0) @@ -458,6 +465,9 @@ int kgdb_hex2long(char **ptr, unsigned long *long_val) (*ptr)++; } + if (negate) + *long_val = -*long_val; + return num; } @@ -527,10 +537,16 @@ static void int_to_threadref(unsigned char *id, int value) static struct task_struct *getthread(struct pt_regs *regs, int tid) { /* - * Non-positive TIDs are remapped idle tasks: + * Non-positive TIDs are remapped to the cpu shadow information */ - if (tid <= 0) - return idle_task(-tid); + if (tid == 0 || tid == -1) + tid = -atomic_read(&kgdb_active) - 2; + if (tid < 0) { + if (kgdb_info[-tid - 2].task) + return kgdb_info[-tid - 2].task; + else + return idle_task(-tid - 2); + } /* * find_task_by_pid_ns() does not take the tasklist lock anymore @@ -737,14 +753,15 @@ setundefined: } /* - * Remap normal tasks to their real PID, idle tasks to -1 ... -NR_CPUs: + * Remap normal tasks to their real PID, + * CPU shadow threads are mapped to -CPU - 2 */ static inline int shadow_pid(int realpid) { if (realpid) return realpid; - return -1-raw_smp_processor_id(); + return -raw_smp_processor_id() - 2; } static char gdbmsgbuf[BUFMAX + 1]; @@ -838,7 +855,7 @@ static void gdb_cmd_getregs(struct kgdb_state *ks) local_debuggerinfo = kgdb_info[ks->cpu].debuggerinfo; } else { local_debuggerinfo = NULL; - for (i = 0; i < NR_CPUS; i++) { + for_each_online_cpu(i) { /* * Try to find the task on some other * or possibly this node if we do not @@ -972,10 +989,13 @@ static int gdb_cmd_reboot(struct kgdb_state *ks) /* Handle the 'q' query packets */ static void gdb_cmd_query(struct kgdb_state *ks) { - struct task_struct *thread; + struct task_struct *g; + struct task_struct *p; unsigned char thref[8]; char *ptr; int i; + int cpu; + int finished = 0; switch (remcom_in_buffer[1]) { case 's': @@ -985,22 +1005,34 @@ static void gdb_cmd_query(struct kgdb_state *ks) break; } - if (remcom_in_buffer[1] == 'f') - ks->threadid = 1; - + i = 0; remcom_out_buffer[0] = 'm'; ptr = remcom_out_buffer + 1; - - for (i = 0; i < 17; ks->threadid++) { - thread = getthread(ks->linux_regs, ks->threadid); - if (thread) { - int_to_threadref(thref, ks->threadid); + if (remcom_in_buffer[1] == 'f') { + /* Each cpu is a shadow thread */ + for_each_online_cpu(cpu) { + ks->thr_query = 0; + int_to_threadref(thref, -cpu - 2); pack_threadid(ptr, thref); ptr += BUF_THREAD_ID_SIZE; *(ptr++) = ','; i++; } } + + do_each_thread(g, p) { + if (i >= ks->thr_query && !finished) { + int_to_threadref(thref, p->pid); + pack_threadid(ptr, thref); + ptr += BUF_THREAD_ID_SIZE; + *(ptr++) = ','; + ks->thr_query++; + if (ks->thr_query % KGDB_MAX_THREAD_QUERY == 0) + finished = 1; + } + i++; + } while_each_thread(g, p); + *(--ptr) = '\0'; break; @@ -1023,15 +1055,15 @@ static void gdb_cmd_query(struct kgdb_state *ks) error_packet(remcom_out_buffer, -EINVAL); break; } - if (ks->threadid > 0) { + if ((int)ks->threadid > 0) { kgdb_mem2hex(getthread(ks->linux_regs, ks->threadid)->comm, remcom_out_buffer, 16); } else { static char tmpstr[23 + BUF_THREAD_ID_SIZE]; - sprintf(tmpstr, "Shadow task %d for pid 0", - (int)(-ks->threadid-1)); + sprintf(tmpstr, "shadowCPU%d", + (int)(-ks->threadid - 2)); kgdb_mem2hex(tmpstr, remcom_out_buffer, strlen(tmpstr)); } break; -- cgit v1.2.2 From ee1d315663ee0b494898f813a266d6244b263b4f Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Mon, 7 Jul 2008 10:49:45 -0400 Subject: [PATCH] Audit: Collect signal info when SIGUSR2 is sent to auditd Makes the kernel audit subsystem collect information about the sending process when that process sends SIGUSR2 to the userspace audit daemon. SIGUSR2 is a new interesting signal to auditd telling auditd that it should try to start logging to disk again and the error condition which caused it to stop logging to disk (usually out of space) has been rectified. Signed-off-by: Eric Paris Signed-off-by: Al Viro --- kernel/auditsc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 4699950e65bd..580a5389fd99 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -2375,7 +2375,7 @@ int __audit_signal_info(int sig, struct task_struct *t) struct audit_context *ctx = tsk->audit_context; if (audit_pid && t->tgid == audit_pid) { - if (sig == SIGTERM || sig == SIGHUP || sig == SIGUSR1) { + if (sig == SIGTERM || sig == SIGHUP || sig == SIGUSR1 || sig == SIGUSR2) { audit_sig_pid = tsk->pid; if (tsk->loginuid != -1) audit_sig_uid = tsk->loginuid; -- cgit v1.2.2 From 1d6c9649e236caa2e93e3647256216e57172b011 Mon Sep 17 00:00:00 2001 From: Vesa-Matti J Kari Date: Wed, 23 Jul 2008 00:06:13 +0300 Subject: kernel/audit.c control character detection is off-by-one Hello, According to my understanding there is an off-by-one bug in the function: audit_string_contains_control() in: kernel/audit.c Patch is included. I do not know from how many places the function is called from, but for example, SELinux Access Vector Cache tries to log untrusted filenames via call path: avc_audit() audit_log_untrustedstring() audit_log_n_untrustedstring() audit_string_contains_control() If audit_string_contains_control() detects control characters, then the string is hex-encoded. But the hex=0x7f dec=127, DEL-character, is not detected. I guess this could have at least some minor security implications, since a user can create a filename with 0x7f in it, causing logged filename to possibly look different when someone reads it on the terminal. Signed-off-by: Vesa-Matti Kari Signed-off-by: Al Viro --- kernel/audit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/audit.c b/kernel/audit.c index e092f1c0ce30..6d903182c6b7 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -1366,7 +1366,7 @@ int audit_string_contains_control(const char *string, size_t len) { const unsigned char *p; for (p = string; p < (const unsigned char *)string + len && *p; p++) { - if (*p == '"' || *p < 0x21 || *p > 0x7f) + if (*p == '"' || *p < 0x21 || *p > 0x7e) return 1; } return 0; -- cgit v1.2.2 From 036bbf76ad9f83781590623111b80ba0b82930ac Mon Sep 17 00:00:00 2001 From: zhangxiliang Date: Fri, 1 Aug 2008 09:47:01 +0800 Subject: Re: [PATCH] the loginuid field should be output in all AUDIT_CONFIG_CHANGE audit messages > shouldn't these be using the "audit_get_loginuid(current)" and if we > are going to output loginuid we also should be outputting sessionid Thanks for your detailed explanation. I have made a new patch for outputing "loginuid" and "sessionid" by audit_get_loginuid(current) and audit_get_sessionid(current). If there are some deficiencies, please give me your indication. Signed-off-by: Zhang Xiliang Signed-off-by: Al Viro --- kernel/auditfilter.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c index 98c50cc671bb..b7d354e2b0ef 100644 --- a/kernel/auditfilter.c +++ b/kernel/auditfilter.c @@ -1022,8 +1022,11 @@ static void audit_update_watch(struct audit_parent *parent, struct audit_buffer *ab; ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE); + audit_log_format(ab, "auid=%u ses=%u", + audit_get_loginuid(current), + audit_get_sessionid(current)); audit_log_format(ab, - "op=updated rules specifying path="); + " op=updated rules specifying path="); audit_log_untrustedstring(ab, owatch->path); audit_log_format(ab, " with dev=%u ino=%lu\n", dev, ino); @@ -1058,7 +1061,10 @@ static void audit_remove_parent_watches(struct audit_parent *parent) struct audit_buffer *ab; ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE); - audit_log_format(ab, "op=remove rule path="); + audit_log_format(ab, "auid=%u ses=%u", + audit_get_loginuid(current), + audit_get_sessionid(current)); + audit_log_format(ab, " op=remove rule path="); audit_log_untrustedstring(ab, w->path); if (r->filterkey) { audit_log_format(ab, " key="); -- cgit v1.2.2 From 980dfb0db340b95094732d78b55311f2c539c1af Mon Sep 17 00:00:00 2001 From: zhangxiliang Date: Fri, 1 Aug 2008 19:15:47 +0800 Subject: [PATCH] Fix the kernel panic of audit_filter_task when key field is set When calling audit_filter_task(), it calls audit_filter_rules() with audit_context is NULL. If the key field is set, the result in audit_filter_rules() will be set to 1 and ctx->filterkey will be set to key. But the ctx is NULL in this condition, so kernel will panic. Signed-off-by: Zhang Xiliang Signed-off-by: Al Viro --- kernel/auditsc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 580a5389fd99..496c3dd37276 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -610,7 +610,7 @@ static int audit_filter_rules(struct task_struct *tsk, if (!result) return 0; } - if (rule->filterkey) + if (rule->filterkey && ctx) ctx->filterkey = kstrdup(rule->filterkey, GFP_ATOMIC); switch (rule->action) { case AUDIT_NEVER: *state = AUDIT_DISABLED; break; -- cgit v1.2.2 From 20c6aaa39ab735c7ed78e4e5a214d250efae0a6e Mon Sep 17 00:00:00 2001 From: zhangxiliang Date: Thu, 31 Jul 2008 10:11:19 +0800 Subject: [PATCH] Fix the bug of using AUDIT_STATUS_RATE_LIMIT when set fail, no error output. When the "status_get->mask" is "AUDIT_STATUS_RATE_LIMIT || AUDIT_STATUS_BACKLOG_LIMIT". If "audit_set_rate_limit" fails and "audit_set_backlog_limit" succeeds, the "err" value will be greater than or equal to 0. It will miss the failure of rate set. Signed-off-by: Zhang Xiliang Acked-by: Eric Paris Signed-off-by: Al Viro --- kernel/audit.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/audit.c b/kernel/audit.c index 6d903182c6b7..4414e93d8750 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -707,12 +707,14 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) if (status_get->mask & AUDIT_STATUS_ENABLED) { err = audit_set_enabled(status_get->enabled, loginuid, sessionid, sid); - if (err < 0) return err; + if (err < 0) + return err; } if (status_get->mask & AUDIT_STATUS_FAILURE) { err = audit_set_failure(status_get->failure, loginuid, sessionid, sid); - if (err < 0) return err; + if (err < 0) + return err; } if (status_get->mask & AUDIT_STATUS_PID) { int new_pid = status_get->pid; @@ -725,9 +727,12 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) audit_pid = new_pid; audit_nlk_pid = NETLINK_CB(skb).pid; } - if (status_get->mask & AUDIT_STATUS_RATE_LIMIT) + if (status_get->mask & AUDIT_STATUS_RATE_LIMIT) { err = audit_set_rate_limit(status_get->rate_limit, loginuid, sessionid, sid); + if (err < 0) + return err; + } if (status_get->mask & AUDIT_STATUS_BACKLOG_LIMIT) err = audit_set_backlog_limit(status_get->backlog_limit, loginuid, sessionid, sid); -- cgit v1.2.2 From 5c7edcd7ee6b77b88252fe4096dce1a46a60c829 Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Thu, 31 Jul 2008 02:04:09 -0700 Subject: tracehook: fix exit_signal=0 case My commit 2b2a1ff64afbadac842bbc58c5166962cf4f7664 introduced a regression (sorry about that) for the odd case of exit_signal=0 (e.g. clone_flags=0). This is not a normal use, but it's used by a case in the glibc test suite. Dying with exit_signal=0 sends no signal, but it's supposed to wake up a parent's blocked wait*() calls (unlike the delayed_group_leader case). This fixes tracehook_notify_death() and its caller to distinguish a "signal 0" wakeup from the delayed_group_leader case (with no wakeup). Signed-off-by: Roland McGrath Tested-by: Serge Hallyn Signed-off-by: Linus Torvalds --- kernel/exit.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/exit.c b/kernel/exit.c index eb4d6470d1d0..38ec40630149 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -911,10 +911,10 @@ static void exit_notify(struct task_struct *tsk, int group_dead) tsk->exit_signal = SIGCHLD; signal = tracehook_notify_death(tsk, &cookie, group_dead); - if (signal > 0) + if (signal >= 0) signal = do_notify_parent(tsk, signal); - tsk->exit_state = signal < 0 ? EXIT_DEAD : EXIT_ZOMBIE; + tsk->exit_state = signal == DEATH_REAP ? EXIT_DEAD : EXIT_ZOMBIE; /* mt-exec, de_thread() is waiting for us */ if (thread_group_leader(tsk) && @@ -927,7 +927,7 @@ static void exit_notify(struct task_struct *tsk, int group_dead) tracehook_report_death(tsk, signal, cookie, group_dead); /* If the process is dead, release it - nobody will wait for it */ - if (signal < 0) + if (signal == DEATH_REAP) release_task(tsk); } -- cgit v1.2.2 From 1a61c88defcd611bd148d6c960b498e1b8bbbe00 Mon Sep 17 00:00:00 2001 From: zhangxiliang Date: Sat, 2 Aug 2008 10:56:37 +0800 Subject: Re: [PATCH] Fix the kernel panic of audit_filter_task when key field is set Sorry, I miss a blank between if and "(". And I add "unlikely" to check "ctx" in audit_match_perm() and audit_match_filetype(). This is a new patch for it. Signed-off-by: Zhang Xiliang Signed-off-by: Al Viro --- kernel/auditsc.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'kernel') diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 496c3dd37276..972f8e61d36a 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -243,6 +243,9 @@ static inline int open_arg(int flags, int mask) static int audit_match_perm(struct audit_context *ctx, int mask) { + if (unlikely(!ctx)) + return 0; + unsigned n = ctx->major; switch (audit_classify_syscall(ctx->arch, n)) { case 0: /* native */ @@ -284,6 +287,10 @@ static int audit_match_filetype(struct audit_context *ctx, int which) { unsigned index = which & ~S_IFMT; mode_t mode = which & S_IFMT; + + if (unlikely(!ctx)) + return 0; + if (index >= ctx->name_count) return 0; if (ctx->names[index].ino == -1) -- cgit v1.2.2 From 725aad24c3ba96a7c06448c14c265a466cdbd663 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Sun, 3 Aug 2008 09:33:03 -0700 Subject: __sched_setscheduler: don't do any policy checks when not "user" The "user" parameter to __sched_setscheduler indicates whether the change is being done on behalf of a user process or not. If not, we shouldn't apply any permissions checks, so don't call security_task_setscheduler(). Signed-off-by: Jeremy Fitzhardinge Tested-by: Steve Wise Cc: Rusty Russell Cc: "Rafael J. Wysocki" Signed-off-by: Linus Torvalds --- kernel/sched.c | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) (limited to 'kernel') diff --git a/kernel/sched.c b/kernel/sched.c index 21f7da94662e..04160d277e7a 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -5004,19 +5004,21 @@ recheck: return -EPERM; } + if (user) { #ifdef CONFIG_RT_GROUP_SCHED - /* - * Do not allow realtime tasks into groups that have no runtime - * assigned. - */ - if (user - && rt_policy(policy) && task_group(p)->rt_bandwidth.rt_runtime == 0) - return -EPERM; + /* + * Do not allow realtime tasks into groups that have no runtime + * assigned. + */ + if (rt_policy(policy) && task_group(p)->rt_bandwidth.rt_runtime == 0) + return -EPERM; #endif - retval = security_task_setscheduler(p, policy, param); - if (retval) - return retval; + retval = security_task_setscheduler(p, policy, param); + if (retval) + return retval; + } + /* * make sure no PI-waiters arrive (or leave) while we are * changing the priority of the task: -- cgit v1.2.2 From 32194450330be327f3b25bf6b66298bd122599e9 Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Tue, 5 Aug 2008 13:01:10 -0700 Subject: relay: fix "full buffer with exactly full last subbuffer" accounting problem In relay's current read implementation, if the buffer is completely full but hasn't triggered the buffer-full condition (i.e. the last write didn't cross the subbuffer boundary) and the last subbuffer is exactly full, the subbuffer accounting code erroneously finds nothing available. This patch fixes the problem. Signed-off-by: Tom Zanussi Cc: Eduard - Gabriel Munteanu Cc: Pekka Enberg Cc: Jens Axboe Cc: Mathieu Desnoyers Cc: Andrea Righi Cc: [2.6.25.x, 2.6.26.x] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/relay.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/relay.c b/kernel/relay.c index 04006ef970b8..8d13a7855c08 100644 --- a/kernel/relay.c +++ b/kernel/relay.c @@ -944,6 +944,10 @@ static void relay_file_read_consume(struct rchan_buf *buf, size_t n_subbufs = buf->chan->n_subbufs; size_t read_subbuf; + if (buf->subbufs_produced == buf->subbufs_consumed && + buf->offset == buf->bytes_consumed) + return; + if (buf->bytes_consumed + bytes_consumed > subbuf_size) { relay_subbufs_consumed(buf->chan, buf->cpu, 1); buf->bytes_consumed = 0; @@ -975,6 +979,8 @@ static int relay_file_read_avail(struct rchan_buf *buf, size_t read_pos) relay_file_read_consume(buf, read_pos, 0); + consumed = buf->subbufs_consumed; + if (unlikely(buf->offset > subbuf_size)) { if (produced == consumed) return 0; @@ -993,8 +999,12 @@ static int relay_file_read_avail(struct rchan_buf *buf, size_t read_pos) if (consumed > produced) produced += n_subbufs * subbuf_size; - if (consumed == produced) + if (consumed == produced) { + if (buf->offset == subbuf_size && + buf->subbufs_produced > buf->subbufs_consumed) + return 1; return 0; + } return 1; } -- cgit v1.2.2 From 5b2becc8cffdccdd60c63099f592ddd35aa6c34f Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 5 Aug 2008 13:01:13 -0700 Subject: semaphore: __down_common: use signal_pending_state() Change __down_common() to use signal_pending_state() instead of open coding. The changes in kernel/semaphore.o are just artifacts, the state checks are optimized away. Signed-off-by: Oleg Nesterov Cc: Ingo Molnar Cc: Matthew Wilcox Cc: Roland McGrath Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/semaphore.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/semaphore.c b/kernel/semaphore.c index aaaeae8244e7..94a62c0d4ade 100644 --- a/kernel/semaphore.c +++ b/kernel/semaphore.c @@ -212,9 +212,7 @@ static inline int __sched __down_common(struct semaphore *sem, long state, waiter.up = 0; for (;;) { - if (state == TASK_INTERRUPTIBLE && signal_pending(task)) - goto interrupted; - if (state == TASK_KILLABLE && fatal_signal_pending(task)) + if (signal_pending_state(state, task)) goto interrupted; if (timeout <= 0) goto timed_out; -- cgit v1.2.2 From c69ad71bcdecbaab82cfacb1dc967bd7fd967a3b Mon Sep 17 00:00:00 2001 From: David Brownell Date: Tue, 5 Aug 2008 13:01:14 -0700 Subject: genirq: better warning on irqchip->set_type() failure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit While I'm glad to finally see the hole fixed whereby passing an invalid IRQ trigger type to request_irq() would be ignored, the current diagnostic isn't quite useful. Fixed by also listing the trigger type which was rejected. Signed-off-by: David Brownell Acked-by: Uwe Kleine-König Cc: Ingo Molnar Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/irq/manage.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 152abfd3589f..0314074fa232 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -323,7 +323,8 @@ static int __irq_set_trigger(struct irq_chip *chip, unsigned int irq, ret = chip->set_type(irq, flags & IRQF_TRIGGER_MASK); if (ret) - pr_err("setting flow type for irq %u failed (%pF)\n", + pr_err("setting trigger mode %d for irq %u failed (%pF)\n", + (int)(flags & IRQF_TRIGGER_MASK), irq, chip->set_type); return ret; -- cgit v1.2.2 From d2dc1f4adb4b5b02d87e49e115e5107f4da790c0 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Tue, 5 Aug 2008 13:01:31 -0700 Subject: dma: fix order calculation in dma_mark_declared_memory_occupied() get_order() takes byte-sized input, not a page-granular one. Irrespective of this fix I'm inclined to believe that this doesn't work right anyway - bitmap_allocate_region() has an implicit assumption of 'pos' being suitable for 'order', which this function doesn't seem to enforce (and since it's being called with a byte-granular value there's no reason to believe that the callers would make sure device_addr is passed accordingly - it's also not documented that way). Signed-off-by: Jan Beulich Cc: James E.J. Bottomley Cc: Ingo Molnar Cc: Dmitry Baryshkov Cc: Jesse Barnes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/dma-coherent.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/dma-coherent.c b/kernel/dma-coherent.c index 7517115a8cce..91e96950cd52 100644 --- a/kernel/dma-coherent.c +++ b/kernel/dma-coherent.c @@ -77,15 +77,14 @@ void *dma_mark_declared_memory_occupied(struct device *dev, { struct dma_coherent_mem *mem = dev->dma_mem; int pos, err; - int pages = (size + (device_addr & ~PAGE_MASK) + PAGE_SIZE - 1); - pages >>= PAGE_SHIFT; + size += device_addr & ~PAGE_MASK; if (!mem) return ERR_PTR(-EINVAL); pos = (device_addr - mem->device_base) >> PAGE_SHIFT; - err = bitmap_allocate_region(mem->bitmap, pos, get_order(pages)); + err = bitmap_allocate_region(mem->bitmap, pos, get_order(size)); if (err != 0) return ERR_PTR(err); return mem->virt_base + (pos << PAGE_SHIFT); -- cgit v1.2.2 From bf1db69fbf4ff511e88736ce2e6318846f34492b Mon Sep 17 00:00:00 2001 From: Richard Hughes Date: Tue, 5 Aug 2008 13:01:35 -0700 Subject: pm_qos: spelling fixes A documentation cleanup patch. With a minor tweak to clarify units for kbs. [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: mark gross Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/pm_qos_params.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'kernel') diff --git a/kernel/pm_qos_params.c b/kernel/pm_qos_params.c index 8cb757026386..da9c2dda6a4e 100644 --- a/kernel/pm_qos_params.c +++ b/kernel/pm_qos_params.c @@ -24,7 +24,7 @@ * requirement that the application has is cleaned up when closes the file * pointer or exits the pm_qos_object will get an opportunity to clean up. * - * mark gross mgross@linux.intel.com + * Mark Gross */ #include @@ -211,8 +211,8 @@ EXPORT_SYMBOL_GPL(pm_qos_requirement); * @value: defines the qos request * * This function inserts a new entry in the pm_qos_class list of requested qos - * performance charactoistics. It recomputes the agregate QoS expectations for - * the pm_qos_class of parrameters. + * performance characteristics. It recomputes the aggregate QoS expectations + * for the pm_qos_class of parameters. */ int pm_qos_add_requirement(int pm_qos_class, char *name, s32 value) { @@ -250,10 +250,10 @@ EXPORT_SYMBOL_GPL(pm_qos_add_requirement); * @name: identifies the request * @value: defines the qos request * - * Updates an existing qos requierement for the pm_qos_class of parameters along + * Updates an existing qos requirement for the pm_qos_class of parameters along * with updating the target pm_qos_class value. * - * If the named request isn't in the lest then no change is made. + * If the named request isn't in the list then no change is made. */ int pm_qos_update_requirement(int pm_qos_class, char *name, s32 new_value) { @@ -287,7 +287,7 @@ EXPORT_SYMBOL_GPL(pm_qos_update_requirement); * @pm_qos_class: identifies which list of qos request to us * @name: identifies the request * - * Will remove named qos request from pm_qos_class list of parrameters and + * Will remove named qos request from pm_qos_class list of parameters and * recompute the current target value for the pm_qos_class. */ void pm_qos_remove_requirement(int pm_qos_class, char *name) @@ -319,7 +319,7 @@ EXPORT_SYMBOL_GPL(pm_qos_remove_requirement); * @notifier: notifier block managed by caller. * * will register the notifier into a notification chain that gets called - * uppon changes to the pm_qos_class target value. + * upon changes to the pm_qos_class target value. */ int pm_qos_add_notifier(int pm_qos_class, struct notifier_block *notifier) { @@ -338,7 +338,7 @@ EXPORT_SYMBOL_GPL(pm_qos_add_notifier); * @notifier: notifier block to be removed. * * will remove the notifier from the notification chain that gets called - * uppon changes to the pm_qos_class target value. + * upon changes to the pm_qos_class target value. */ int pm_qos_remove_notifier(int pm_qos_class, struct notifier_block *notifier) { -- cgit v1.2.2