diff options
| author | Paul Menage <menage@google.com> | 2008-04-04 17:29:57 -0400 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2008-04-04 17:46:26 -0400 |
| commit | 8bab8dded67d026c39367bbd5e27d2f6c556c38e (patch) | |
| tree | d80f8f85f1da496c56bfa8575f0b59eba7c2ef55 | |
| parent | 3a143125ddc4e2e0ca1e67fb4bedd45c36e59cc7 (diff) | |
cgroups: add cgroup support for enabling controllers at boot time
The effects of cgroup_disable=foo are:
- foo isn't auto-mounted if you mount all cgroups in a single hierarchy
- foo isn't visible as an individually mountable subsystem
As a result there will only ever be one call to foo->create(), at init time;
all processes will stay in this group, and the group will never be mounted on
a visible hierarchy. Any additional effects (e.g. not allocating metadata)
are up to the foo subsystem.
This doesn't handle early_init subsystems (their "disabled" bit isn't set be,
but it could easily be extended to do so if any of the early_init systems
wanted it - I think it would just involve some nastier parameter processing
since it would occur before the command-line argument parser had been run.
Hugh said:
Ballpark figures, I'm trying to get this question out rather than
processing the exact numbers: CONFIG_CGROUP_MEM_RES_CTLR adds 15% overhead
to the affected paths, booting with cgroup_disable=memory cuts that back to
1% overhead (due to slightly bigger struct page).
I'm no expert on distros, they may have no interest whatever in
CONFIG_CGROUP_MEM_RES_CTLR=y; and the rest of us can easily build with or
without it, or apply the cgroup_disable=memory patches.
Unix bench's execl test result on x86_64 was
== just after boot without mounting any cgroup fs.==
mem_cgorup=off : Execl Throughput 43.0 3150.1 732.6
mem_cgroup=on : Execl Throughput 43.0 2932.6 682.0
==
[lizf@cn.fujitsu.com: fix boot option parsing]
Signed-off-by: Balbir Singh <balbir@linux.vnet.ibm.com>
Cc: Paul Menage <menage@google.com>
Cc: Balbir Singh <balbir@linux.vnet.ibm.com>
Cc: Pavel Emelyanov <xemul@openvz.org>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Hugh Dickins <hugh@veritas.com>
Cc: Sudhir Kumar <skumar@linux.vnet.ibm.com>
Cc: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
Cc: David Rientjes <rientjes@google.com>
Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
| -rw-r--r-- | Documentation/kernel-parameters.txt | 4 | ||||
| -rw-r--r-- | include/linux/cgroup.h | 1 | ||||
| -rw-r--r-- | kernel/cgroup.c | 42 |
3 files changed, 42 insertions, 5 deletions
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 4cd1a5da80a4..32e9297ef747 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt | |||
| @@ -375,6 +375,10 @@ and is between 256 and 4096 characters. It is defined in the file | |||
| 375 | ccw_timeout_log [S390] | 375 | ccw_timeout_log [S390] |
| 376 | See Documentation/s390/CommonIO for details. | 376 | See Documentation/s390/CommonIO for details. |
| 377 | 377 | ||
| 378 | cgroup_disable= [KNL] Disable a particular controller | ||
| 379 | Format: {name of the controller(s) to disable} | ||
| 380 | {Currently supported controllers - "memory"} | ||
| 381 | |||
| 378 | checkreqprot [SELINUX] Set initial checkreqprot flag value. | 382 | checkreqprot [SELINUX] Set initial checkreqprot flag value. |
| 379 | Format: { "0" | "1" } | 383 | Format: { "0" | "1" } |
| 380 | See security/selinux/Kconfig help text. | 384 | See security/selinux/Kconfig help text. |
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 028ba3b523b1..a6a6035a4e1e 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h | |||
| @@ -256,6 +256,7 @@ struct cgroup_subsys { | |||
| 256 | void (*bind)(struct cgroup_subsys *ss, struct cgroup *root); | 256 | void (*bind)(struct cgroup_subsys *ss, struct cgroup *root); |
| 257 | int subsys_id; | 257 | int subsys_id; |
| 258 | int active; | 258 | int active; |
| 259 | int disabled; | ||
| 259 | int early_init; | 260 | int early_init; |
| 260 | #define MAX_CGROUP_TYPE_NAMELEN 32 | 261 | #define MAX_CGROUP_TYPE_NAMELEN 32 |
| 261 | const char *name; | 262 | const char *name; |
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 53d86b4b0ce0..62f1a5231fe9 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c | |||
| @@ -782,7 +782,14 @@ static int parse_cgroupfs_options(char *data, | |||
| 782 | if (!*token) | 782 | if (!*token) |
| 783 | return -EINVAL; | 783 | return -EINVAL; |
| 784 | if (!strcmp(token, "all")) { | 784 | if (!strcmp(token, "all")) { |
| 785 | opts->subsys_bits = (1 << CGROUP_SUBSYS_COUNT) - 1; | 785 | /* Add all non-disabled subsystems */ |
| 786 | int i; | ||
| 787 | opts->subsys_bits = 0; | ||
| 788 | for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { | ||
| 789 | struct cgroup_subsys *ss = subsys[i]; | ||
| 790 | if (!ss->disabled) | ||
| 791 | opts->subsys_bits |= 1ul << i; | ||
| 792 | } | ||
| 786 | } else if (!strcmp(token, "noprefix")) { | 793 | } else if (!strcmp(token, "noprefix")) { |
| 787 | set_bit(ROOT_NOPREFIX, &opts->flags); | 794 | set_bit(ROOT_NOPREFIX, &opts->flags); |
| 788 | } else if (!strncmp(token, "release_agent=", 14)) { | 795 | } else if (!strncmp(token, "release_agent=", 14)) { |
| @@ -800,7 +807,8 @@ static int parse_cgroupfs_options(char *data, | |||
| 800 | for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { | 807 | for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { |
| 801 | ss = subsys[i]; | 808 | ss = subsys[i]; |
| 802 | if (!strcmp(token, ss->name)) { | 809 | if (!strcmp(token, ss->name)) { |
| 803 | set_bit(i, &opts->subsys_bits); | 810 | if (!ss->disabled) |
| 811 | set_bit(i, &opts->subsys_bits); | ||
| 804 | break; | 812 | break; |
| 805 | } | 813 | } |
| 806 | } | 814 | } |
| @@ -2600,13 +2608,13 @@ static int proc_cgroupstats_show(struct seq_file *m, void *v) | |||
| 2600 | { | 2608 | { |
| 2601 | int i; | 2609 | int i; |
| 2602 | 2610 | ||
| 2603 | seq_puts(m, "#subsys_name\thierarchy\tnum_cgroups\n"); | 2611 | seq_puts(m, "#subsys_name\thierarchy\tnum_cgroups\tenabled\n"); |
| 2604 | mutex_lock(&cgroup_mutex); | 2612 | mutex_lock(&cgroup_mutex); |
| 2605 | for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { | 2613 | for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { |
| 2606 | struct cgroup_subsys *ss = subsys[i]; | 2614 | struct cgroup_subsys *ss = subsys[i]; |
| 2607 | seq_printf(m, "%s\t%lu\t%d\n", | 2615 | seq_printf(m, "%s\t%lu\t%d\t%d\n", |
| 2608 | ss->name, ss->root->subsys_bits, | 2616 | ss->name, ss->root->subsys_bits, |
| 2609 | ss->root->number_of_cgroups); | 2617 | ss->root->number_of_cgroups, !ss->disabled); |
| 2610 | } | 2618 | } |
| 2611 | mutex_unlock(&cgroup_mutex); | 2619 | mutex_unlock(&cgroup_mutex); |
| 2612 | return 0; | 2620 | return 0; |
| @@ -3010,3 +3018,27 @@ static void cgroup_release_agent(struct work_struct *work) | |||
| 3010 | spin_unlock(&release_list_lock); | 3018 | spin_unlock(&release_list_lock); |
| 3011 | mutex_unlock(&cgroup_mutex); | 3019 | mutex_unlock(&cgroup_mutex); |
| 3012 | } | 3020 | } |
| 3021 | |||
| 3022 | static int __init cgroup_disable(char *str) | ||
| 3023 | { | ||
| 3024 | int i; | ||
| 3025 | char *token; | ||
| 3026 | |||
| 3027 | while ((token = strsep(&str, ",")) != NULL) { | ||
| 3028 | if (!*token) | ||
| 3029 | continue; | ||
| 3030 | |||
| 3031 | for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { | ||
| 3032 | struct cgroup_subsys *ss = subsys[i]; | ||
| 3033 | |||
| 3034 | if (!strcmp(token, ss->name)) { | ||
| 3035 | ss->disabled = 1; | ||
| 3036 | printk(KERN_INFO "Disabling %s control group" | ||
| 3037 | " subsystem\n", ss->name); | ||
| 3038 | break; | ||
| 3039 | } | ||
| 3040 | } | ||
| 3041 | } | ||
| 3042 | return 1; | ||
| 3043 | } | ||
| 3044 | __setup("cgroup_disable=", cgroup_disable); | ||
