From 9c44bc03fff44ff04237a7d92e35304a0e50c331 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:21:04 +0200 Subject: softlockup: allow panic on lockup allow users to configure the softlockup detector to generate a panic instead of a warning message. high-availability systems might opt for this strict method (combined with panic_timeout= boot option/sysctl), instead of generating softlockup warnings ad infinitum. also, automated tests work better if the system reboots reliably (into a safe kernel) in case of a lockup. The full spectrum of configurability is supported: boot option, sysctl option and Kconfig option. it's default-disabled. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- kernel/sysctl.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'kernel/sysctl.c') diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 29116652dca8..2d3b388c402d 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -727,6 +727,17 @@ static struct ctl_table kern_table[] = { }, #endif #ifdef CONFIG_DETECT_SOFTLOCKUP + { + .ctl_name = CTL_UNNUMBERED, + .procname = "softlockup_panic", + .data = &softlockup_panic, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_doulongvec_minmax, + .strategy = &sysctl_intvec, + .extra1 = &zero, + .extra2 = &one, + }, { .ctl_name = CTL_UNNUMBERED, .procname = "softlockup_thresh", -- cgit v1.2.2 From 9383d9679056e6cc4e7ff70f31da945a268238f4 Mon Sep 17 00:00:00 2001 From: Dimitri Sivanich Date: Mon, 12 May 2008 21:21:14 +0200 Subject: softlockup: fix softlockup_thresh unaligned access and disable detection at runtime Fix unaligned access errors when setting softlockup_thresh on 64 bit platforms. Allow softlockup detection to be disabled by setting softlockup_thresh <= 0. Detect that boot time softlockup detection has been disabled earlier in softlockup_tick. Signed-off-by: Dimitri Sivanich Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- kernel/sysctl.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'kernel/sysctl.c') diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 2d3b388c402d..31c19a79738d 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -84,12 +84,13 @@ extern int latencytop_enabled; extern int sysctl_nr_open_min, sysctl_nr_open_max; /* Constants used for minimum and maximum */ -#if defined(CONFIG_DETECT_SOFTLOCKUP) || defined(CONFIG_HIGHMEM) +#ifdef CONFIG_HIGHMEM static int one = 1; #endif #ifdef CONFIG_DETECT_SOFTLOCKUP static int sixty = 60; +static int neg_one = -1; #endif #ifdef CONFIG_MMU @@ -742,11 +743,11 @@ static struct ctl_table kern_table[] = { .ctl_name = CTL_UNNUMBERED, .procname = "softlockup_thresh", .data = &softlockup_thresh, - .maxlen = sizeof(unsigned long), + .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_doulongvec_minmax, + .proc_handler = &proc_dointvec_minmax, .strategy = &sysctl_intvec, - .extra1 = &one, + .extra1 = &neg_one, .extra2 = &sixty, }, { -- cgit v1.2.2 From 1c4cd6dd1d0fd3057bb6b8c87460049497889d1b Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:21:14 +0200 Subject: softlockup: fix softlockup_thresh fix Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- kernel/sysctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/sysctl.c') diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 31c19a79738d..a829dc8d7a9e 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -84,7 +84,7 @@ extern int latencytop_enabled; extern int sysctl_nr_open_min, sysctl_nr_open_max; /* Constants used for minimum and maximum */ -#ifdef CONFIG_HIGHMEM +#if defined(CONFIG_HIGHMEM) || defined(CONFIG_DETECT_SOFTLOCKUP) static int one = 1; #endif -- cgit v1.2.2 From 4dca10a96041f78bed11ce9e4a5cfde813ec4ccb Mon Sep 17 00:00:00 2001 From: Hiroshi Shimamoto Date: Mon, 7 Jul 2008 18:37:04 -0700 Subject: softlockup: fix invalid proc_handler for softlockup_panic The type of softlockup_panic is int, but the proc_handler is proc_doulongvec_minmax(). This handler is for unsigned long. This handler should be proc_dointvec_minmax(). Signed-off-by: Hiroshi Shimamoto Signed-off-by: Ingo Molnar --- kernel/sysctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/sysctl.c') diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 3e56d2f91414..ab59ac008caf 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -741,7 +741,7 @@ static struct ctl_table kern_table[] = { .data = &softlockup_panic, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_doulongvec_minmax, + .proc_handler = &proc_dointvec_minmax, .strategy = &sysctl_intvec, .extra1 = &zero, .extra2 = &one, -- cgit v1.2.2 From a1ef5adb4cad43460ebba23c5a78cf4a55bb6a5b Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 8 Jul 2008 19:00:17 +0200 Subject: remove CONFIG_KMOD from core kernel code Always compile request_module when the kernel allows modules. Signed-off-by: Johannes Berg Signed-off-by: Rusty Russell --- kernel/sysctl.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel/sysctl.c') diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 6b16e16428d8..b859e6b5a767 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -110,7 +110,7 @@ static int min_percpu_pagelist_fract = 8; static int ngroups_max = NGROUPS_MAX; -#ifdef CONFIG_KMOD +#ifdef CONFIG_MODULES extern char modprobe_path[]; #endif #ifdef CONFIG_CHR_DEV_SG @@ -475,7 +475,7 @@ static struct ctl_table kern_table[] = { .proc_handler = &ftrace_enable_sysctl, }, #endif -#ifdef CONFIG_KMOD +#ifdef CONFIG_MODULES { .ctl_name = KERN_MODPROBE, .procname = "modprobe", -- cgit v1.2.2 From c748e1340e0de3fa7fed86f8bdf499be9242afff Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Wed, 23 Jul 2008 21:27:03 -0700 Subject: mm/vmstat.c: proper externs This patch adds proper extern declarations for five variables in include/linux/vmstat.h Signed-off-by: Adrian Bunk Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/sysctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/sysctl.c') diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 2a7b9d88706b..1f7b3b76a166 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -43,6 +43,7 @@ #include #include #include +#include #include #include #include @@ -80,7 +81,6 @@ extern int sysctl_drop_caches; extern int percpu_pagelist_fraction; extern int compat_log; extern int maps_protect; -extern int sysctl_stat_interval; extern int latencytop_enabled; extern int sysctl_nr_open_min, sysctl_nr_open_max; #ifdef CONFIG_RCU_TORTURE_TEST -- cgit v1.2.2 From e5ff215941d59f8ae6bf58f6428dc5c26745a612 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 23 Jul 2008 21:27:42 -0700 Subject: hugetlb: multiple hstates for multiple page sizes Add basic support for more than one hstate in hugetlbfs. This is the key to supporting multiple hugetlbfs page sizes at once. - Rather than a single hstate, we now have an array, with an iterator - default_hstate continues to be the struct hstate which we use by default - Add functions for architectures to register new hstates [akpm@linux-foundation.org: coding-style fixes] Acked-by: Adam Litke Acked-by: Nishanth Aravamudan Signed-off-by: Andi Kleen Signed-off-by: Nick Piggin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/sysctl.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'kernel/sysctl.c') diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 1f7b3b76a166..1a8299d1fe59 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -959,7 +959,7 @@ static struct ctl_table vm_table[] = { #ifdef CONFIG_HUGETLB_PAGE { .procname = "nr_hugepages", - .data = &max_huge_pages, + .data = NULL, .maxlen = sizeof(unsigned long), .mode = 0644, .proc_handler = &hugetlb_sysctl_handler, @@ -985,10 +985,12 @@ static struct ctl_table vm_table[] = { { .ctl_name = CTL_UNNUMBERED, .procname = "nr_overcommit_hugepages", - .data = &sysctl_overcommit_huge_pages, - .maxlen = sizeof(sysctl_overcommit_huge_pages), + .data = NULL, + .maxlen = sizeof(unsigned long), .mode = 0644, .proc_handler = &hugetlb_overcommit_handler, + .extra1 = (void *)&hugetlb_zero, + .extra2 = (void *)&hugetlb_infinity, }, #endif { -- cgit v1.2.2 From 717115e1a5856b57af0f71e1df7149108294fc10 Mon Sep 17 00:00:00 2001 From: Dave Young Date: Fri, 25 Jul 2008 01:45:58 -0700 Subject: printk ratelimiting rewrite All ratelimit user use same jiffies and burst params, so some messages (callbacks) will be lost. For example: a call printk_ratelimit(5 * HZ, 1) b call printk_ratelimit(5 * HZ, 1) before the 5*HZ timeout of a, then b will will be supressed. - rewrite __ratelimit, and use a ratelimit_state as parameter. Thanks for hints from andrew. - Add WARN_ON_RATELIMIT, update rcupreempt.h - remove __printk_ratelimit - use __ratelimit in net_ratelimit Signed-off-by: Dave Young Cc: "David S. Miller" Cc: "Paul E. McKenney" Cc: Dave Young Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/sysctl.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel/sysctl.c') diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 1a8299d1fe59..35a50db9b6ce 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -624,7 +624,7 @@ static struct ctl_table kern_table[] = { { .ctl_name = KERN_PRINTK_RATELIMIT, .procname = "printk_ratelimit", - .data = &printk_ratelimit_jiffies, + .data = &printk_ratelimit_state.interval, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec_jiffies, @@ -633,7 +633,7 @@ static struct ctl_table kern_table[] = { { .ctl_name = KERN_PRINTK_RATELIMIT_BURST, .procname = "printk_ratelimit_burst", - .data = &printk_ratelimit_burst, + .data = &printk_ratelimit_state.burst, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec, -- cgit v1.2.2 From 734550921e9b7ab924a43aa3d0bd4239dac4fbf1 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 14 Jul 2008 21:22:20 -0400 Subject: [PATCH] beginning of sysctl cleanup - ctl_table_set New object: set of sysctls [currently - root and per-net-ns]. Contains: pointer to parent set, list of tables and "should I see this set?" method (->is_seen(set)). Current lists of tables are subsumed by that; net-ns contains such a beast. ->lookup() for ctl_table_root returns pointer to ctl_table_set instead of that to ->list of that ctl_table_set. [folded compile fixes by rdd for configs without sysctl] Signed-off-by: Al Viro --- kernel/sysctl.c | 41 +++++++++++++++++++++++++++++++---------- 1 file changed, 31 insertions(+), 10 deletions(-) (limited to 'kernel/sysctl.c') diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 35a50db9b6ce..8ee4a0619fbb 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -160,12 +160,13 @@ static struct ctl_table root_table[]; static struct ctl_table_root sysctl_table_root; static struct ctl_table_header root_table_header = { .ctl_table = root_table, - .ctl_entry = LIST_HEAD_INIT(sysctl_table_root.header_list), + .ctl_entry = LIST_HEAD_INIT(sysctl_table_root.default_set.list), .root = &sysctl_table_root, + .set = &sysctl_table_root.default_set, }; static struct ctl_table_root sysctl_table_root = { .root_list = LIST_HEAD_INIT(sysctl_table_root.root_list), - .header_list = LIST_HEAD_INIT(root_table_header.ctl_entry), + .default_set.list = LIST_HEAD_INIT(root_table_header.ctl_entry), }; static struct ctl_table kern_table[]; @@ -1403,14 +1404,20 @@ void sysctl_head_finish(struct ctl_table_header *head) spin_unlock(&sysctl_lock); } +static struct ctl_table_set * +lookup_header_set(struct ctl_table_root *root, struct nsproxy *namespaces) +{ + struct ctl_table_set *set = &root->default_set; + if (root->lookup) + set = root->lookup(root, namespaces); + return set; +} + static struct list_head * lookup_header_list(struct ctl_table_root *root, struct nsproxy *namespaces) { - struct list_head *header_list; - header_list = &root->header_list; - if (root->lookup) - header_list = root->lookup(root, namespaces); - return header_list; + struct ctl_table_set *set = lookup_header_set(root, namespaces); + return &set->list; } struct ctl_table_header *__sysctl_head_next(struct nsproxy *namespaces, @@ -1720,7 +1727,6 @@ struct ctl_table_header *__register_sysctl_paths( struct nsproxy *namespaces, const struct ctl_path *path, struct ctl_table *table) { - struct list_head *header_list; struct ctl_table_header *header; struct ctl_table *new, **prevp; unsigned int n, npath; @@ -1772,8 +1778,8 @@ struct ctl_table_header *__register_sysctl_paths( } #endif spin_lock(&sysctl_lock); - header_list = lookup_header_list(root, namespaces); - list_add_tail(&header->ctl_entry, header_list); + header->set = lookup_header_set(root, namespaces); + list_add_tail(&header->ctl_entry, &header->set->list); spin_unlock(&sysctl_lock); return header; @@ -1832,6 +1838,15 @@ void unregister_sysctl_table(struct ctl_table_header * header) kfree(header); } +void setup_sysctl_set(struct ctl_table_set *p, + struct ctl_table_set *parent, + int (*is_seen)(struct ctl_table_set *)) +{ + INIT_LIST_HEAD(&p->list); + p->parent = parent ? parent : &sysctl_table_root.default_set; + p->is_seen = is_seen; +} + #else /* !CONFIG_SYSCTL */ struct ctl_table_header *register_sysctl_table(struct ctl_table * table) { @@ -1848,6 +1863,12 @@ void unregister_sysctl_table(struct ctl_table_header * table) { } +void setup_sysctl_set(struct ctl_table_set *p, + struct ctl_table_set *parent, + int (*is_seen)(struct ctl_table_set *)) +{ +} + #endif /* CONFIG_SYSCTL */ /* -- cgit v1.2.2 From f7e6ced4061da509f737541ca4dbd44d83a6e82f Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 15 Jul 2008 01:44:23 -0400 Subject: [PATCH] allow delayed freeing of ctl_table_header Refcount the sucker; instead of freeing it by the end of unregistration just drop the refcount and free only when it hits zero. Make sure that we _always_ make ->unregistering non-NULL in start_unregistering(). That allows anybody to get a reference to such puppy, preventing its freeing and reuse. It does *not* block unregistration. Anybody who holds such a reference can * try to grab a "use" reference (ctl_head_grab()); that will succeeds if and only if it hadn't entered unregistration yet. If it succeeds, we can use it in all normal ways until we release the "use" reference (with ctl_head_finish()). Note that this relies on having ->unregistering become non-NULL in all cases when one starts to unregister the sucker. * keep pointers to ctl_table entries; they *can* be freed if the entire thing is unregistered. However, if ctl_head_grab() succeeds, we know that unregistration had not happened (and will not happen until ctl_head_finish()) and such pointers can be used safely. IOW, now we can have inodes under /proc/sys keep references to ctl_table entries, protecting them with references to ctl_table_header and grabbing the latter for the duration of operations that require access to ctl_table. That won't cause deadlocks, since unregistration will not be stopped by mere keeping a reference to ctl_table_header. Signed-off-by: Al Viro --- kernel/sysctl.c | 37 ++++++++++++++++++++++++++++++++++++- 1 file changed, 36 insertions(+), 1 deletion(-) (limited to 'kernel/sysctl.c') diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 8ee4a0619fbb..60d9357e7172 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1387,6 +1387,9 @@ static void start_unregistering(struct ctl_table_header *p) spin_unlock(&sysctl_lock); wait_for_completion(&wait); spin_lock(&sysctl_lock); + } else { + /* anything non-NULL; we'll never dereference it */ + p->unregistering = ERR_PTR(-EINVAL); } /* * do not remove from the list until nobody holds it; walking the @@ -1395,6 +1398,32 @@ static void start_unregistering(struct ctl_table_header *p) list_del_init(&p->ctl_entry); } +void sysctl_head_get(struct ctl_table_header *head) +{ + spin_lock(&sysctl_lock); + head->count++; + spin_unlock(&sysctl_lock); +} + +void sysctl_head_put(struct ctl_table_header *head) +{ + spin_lock(&sysctl_lock); + if (!--head->count) + kfree(head); + spin_unlock(&sysctl_lock); +} + +struct ctl_table_header *sysctl_head_grab(struct ctl_table_header *head) +{ + if (!head) + BUG(); + spin_lock(&sysctl_lock); + if (!use_table(head)) + head = ERR_PTR(-ENOENT); + spin_unlock(&sysctl_lock); + return head; +} + void sysctl_head_finish(struct ctl_table_header *head) { if (!head) @@ -1771,6 +1800,7 @@ struct ctl_table_header *__register_sysctl_paths( header->unregistering = NULL; header->root = root; sysctl_set_parent(NULL, header->ctl_table); + header->count = 1; #ifdef CONFIG_SYSCTL_SYSCALL_CHECK if (sysctl_check_table(namespaces, header->ctl_table)) { kfree(header); @@ -1834,8 +1864,9 @@ void unregister_sysctl_table(struct ctl_table_header * header) spin_lock(&sysctl_lock); start_unregistering(header); + if (!--header->count) + kfree(header); spin_unlock(&sysctl_lock); - kfree(header); } void setup_sysctl_set(struct ctl_table_set *p, @@ -1869,6 +1900,10 @@ void setup_sysctl_set(struct ctl_table_set *p, { } +void sysctl_head_put(struct ctl_table_header *head) +{ +} + #endif /* CONFIG_SYSCTL */ /* -- cgit v1.2.2 From ae7edecc9b8810770a8e5cb9a466ea4bdcfa8401 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 15 Jul 2008 06:33:31 -0400 Subject: [PATCH] sysctl: keep track of tree relationships In a sense, that's the heart of the series. It's based on the following property of the trees we are actually asked to add: they can be split into stem that is already covered by registered trees and crown that is entirely new. IOW, if a/b and a/c/d are introduced by our tree, then a/c is also introduced by it. That allows to associate tree and table entry with each node in the union; while directory nodes might be covered by many trees, only one will cover the node by its crown. And that will allow much saner logics for /proc/sys in the next patches. This patch introduces the data structures needed to keep track of that. When adding a sysctl table, we find a "parent" one. Which is to say, find the deepest node on its stem that already is present in one of the tables from our table set or its ancestor sets. That table will be our parent and that node in it - attachment point. Add our table to list anchored in parent, have it refer the parent and contents of attachment point. Also remember where its crown lives. Signed-off-by: Al Viro --- kernel/sysctl.c | 63 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) (limited to 'kernel/sysctl.c') diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 60d9357e7172..c9a0af887033 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1680,6 +1680,52 @@ static __init int sysctl_init(void) core_initcall(sysctl_init); +static int is_branch_in(struct ctl_table *branch, struct ctl_table *table) +{ + struct ctl_table *p; + const char *s = branch->procname; + + /* branch should have named subdirectory as its first element */ + if (!s || !branch->child) + return 0; + + /* ... and nothing else */ + if (branch[1].procname || branch[1].ctl_name) + return 0; + + /* table should contain subdirectory with the same name */ + for (p = table; p->procname || p->ctl_name; p++) { + if (!p->child) + continue; + if (p->procname && strcmp(p->procname, s) == 0) + return 1; + } + return 0; +} + +/* see if attaching q to p would be an improvement */ +static void try_attach(struct ctl_table_header *p, struct ctl_table_header *q) +{ + struct ctl_table *to = p->ctl_table, *by = q->ctl_table; + int is_better = 0; + int not_in_parent = !p->attached_by; + + while (is_branch_in(by, to)) { + if (by == q->attached_by) + is_better = 1; + if (to == p->attached_by) + not_in_parent = 1; + by = by->child; + to = to->child; + } + + if (is_better && not_in_parent) { + q->attached_by = by; + q->attached_to = to; + q->parent = p; + } +} + /** * __register_sysctl_paths - register a sysctl hierarchy * @root: List of sysctl headers to register on @@ -1759,6 +1805,7 @@ struct ctl_table_header *__register_sysctl_paths( struct ctl_table_header *header; struct ctl_table *new, **prevp; unsigned int n, npath; + struct ctl_table_set *set; /* Count the path components */ for (npath = 0; path[npath].ctl_name || path[npath].procname; ++npath) @@ -1809,6 +1856,18 @@ struct ctl_table_header *__register_sysctl_paths( #endif spin_lock(&sysctl_lock); header->set = lookup_header_set(root, namespaces); + header->attached_by = header->ctl_table; + header->attached_to = root_table; + header->parent = &root_table_header; + for (set = header->set; set; set = set->parent) { + struct ctl_table_header *p; + list_for_each_entry(p, &set->list, ctl_entry) { + if (p->unregistering) + continue; + try_attach(p, header); + } + } + header->parent->count++; list_add_tail(&header->ctl_entry, &header->set->list); spin_unlock(&sysctl_lock); @@ -1864,6 +1923,10 @@ void unregister_sysctl_table(struct ctl_table_header * header) spin_lock(&sysctl_lock); start_unregistering(header); + if (!--header->parent->count) { + WARN_ON(1); + kfree(header->parent); + } if (!--header->count) kfree(header); spin_unlock(&sysctl_lock); -- cgit v1.2.2 From 9043476f726802f4b00c96d0c4f418dde48d1304 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 15 Jul 2008 08:54:06 -0400 Subject: [PATCH] sanitize proc_sysctl * keep references to ctl_table_head and ctl_table in /proc/sys inodes * grab the former during operations, use the latter for access to entry if that succeeds * have ->d_compare() check if table should be seen for one who does lookup; that allows us to avoid flipping inodes - if we have the same name resolve to different things, we'll just keep several dentries and ->d_compare() will reject the wrong ones. * have ->lookup() and ->readdir() scan the table of our inode first, then walk all ctl_table_header and scan ->attached_by for those that are attached to our directory. * implement ->getattr(). * get rid of insane amounts of tree-walking * get rid of the need to know dentry in ->permission() and of the contortions induced by that. Signed-off-by: Al Viro --- kernel/sysctl.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'kernel/sysctl.c') diff --git a/kernel/sysctl.c b/kernel/sysctl.c index c9a0af887033..ff5abcca5ddf 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1932,6 +1932,21 @@ void unregister_sysctl_table(struct ctl_table_header * header) spin_unlock(&sysctl_lock); } +int sysctl_is_seen(struct ctl_table_header *p) +{ + struct ctl_table_set *set = p->set; + int res; + spin_lock(&sysctl_lock); + if (p->unregistering) + res = 0; + else if (!set->is_seen) + res = 1; + else + res = set->is_seen(set); + spin_unlock(&sysctl_lock); + return res; +} + void setup_sysctl_set(struct ctl_table_set *p, struct ctl_table_set *parent, int (*is_seen)(struct ctl_table_set *)) -- cgit v1.2.2 From e6305c43eda10ebfd2ad9e35d6e172ccc7bb3695 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 15 Jul 2008 21:03:57 -0400 Subject: [PATCH] sanitize ->permission() prototype * kill nameidata * argument; map the 3 bits in ->flags anybody cares about to new MAY_... ones and pass with the mask. * kill redundant gfs2_iop_permission() * sanitize ecryptfs_permission() * fix remaining places where ->permission() instances might barf on new MAY_... found in mask. The obvious next target in that direction is permission(9) folded fix for nfs_permission() breakage from Miklos Szeredi Signed-off-by: Al Viro --- kernel/sysctl.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'kernel/sysctl.c') diff --git a/kernel/sysctl.c b/kernel/sysctl.c index ff5abcca5ddf..911d846f0503 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1516,9 +1516,9 @@ static int do_sysctl_strategy(struct ctl_table_root *root, int op = 0, rc; if (oldval) - op |= 004; + op |= MAY_READ; if (newval) - op |= 002; + op |= MAY_WRITE; if (sysctl_perm(root, table, op)) return -EPERM; @@ -1560,7 +1560,7 @@ repeat: if (n == table->ctl_name) { int error; if (table->child) { - if (sysctl_perm(root, table, 001)) + if (sysctl_perm(root, table, MAY_EXEC)) return -EPERM; name++; nlen--; @@ -1635,7 +1635,7 @@ static int test_perm(int mode, int op) mode >>= 6; else if (in_egroup_p(0)) mode >>= 3; - if ((mode & op & 0007) == op) + if ((op & ~mode & (MAY_READ|MAY_WRITE|MAY_EXEC)) == 0) return 0; return -EACCES; } @@ -1645,7 +1645,7 @@ int sysctl_perm(struct ctl_table_root *root, struct ctl_table *table, int op) int error; int mode; - error = security_sysctl(table, op); + error = security_sysctl(table, op & (MAY_READ | MAY_WRITE | MAY_EXEC)); if (error) return error; -- cgit v1.2.2 From bfbcf034798b2ca45338cee5049b5694b7ddc865 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 27 Jul 2008 06:31:22 +0100 Subject: lost sysctl fix try_attach() should walk into the matching subdirectory, not the first one... Signed-off-by: Al Viro Tested-by: Valdis.Kletnieks@vt.edu Tested-by: Ingo Molnar Signed-off-by: Linus Torvalds --- kernel/sysctl.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) (limited to 'kernel/sysctl.c') diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 911d846f0503..fe4713347275 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1680,43 +1680,45 @@ static __init int sysctl_init(void) core_initcall(sysctl_init); -static int is_branch_in(struct ctl_table *branch, struct ctl_table *table) +static struct ctl_table *is_branch_in(struct ctl_table *branch, + struct ctl_table *table) { struct ctl_table *p; const char *s = branch->procname; /* branch should have named subdirectory as its first element */ if (!s || !branch->child) - return 0; + return NULL; /* ... and nothing else */ if (branch[1].procname || branch[1].ctl_name) - return 0; + return NULL; /* table should contain subdirectory with the same name */ for (p = table; p->procname || p->ctl_name; p++) { if (!p->child) continue; if (p->procname && strcmp(p->procname, s) == 0) - return 1; + return p; } - return 0; + return NULL; } /* see if attaching q to p would be an improvement */ static void try_attach(struct ctl_table_header *p, struct ctl_table_header *q) { struct ctl_table *to = p->ctl_table, *by = q->ctl_table; + struct ctl_table *next; int is_better = 0; int not_in_parent = !p->attached_by; - while (is_branch_in(by, to)) { + while ((next = is_branch_in(by, to)) != NULL) { if (by == q->attached_by) is_better = 1; if (to == p->attached_by) not_in_parent = 1; by = by->child; - to = to->child; + to = next->child; } if (is_better && not_in_parent) { -- cgit v1.2.2