diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2019-03-12 17:08:19 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2019-03-12 17:08:19 -0400 |
commit | 7b47a9e7c8f672b6fb0b77fca11a63a8a77f5a91 (patch) | |
tree | cf05645120ba2323c36acefdea6e62addf320f8c /fs/proc | |
parent | dbc2fba3fc46084f502aec53183995a632998dcd (diff) | |
parent | c99c2171fc61476afac0dfb59fb2c447a01fb1e0 (diff) |
Merge branch 'work.mount' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs
Pull vfs mount infrastructure updates from Al Viro:
"The rest of core infrastructure; no new syscalls in that pile, but the
old parts are switched to new infrastructure. At that point
conversions of individual filesystems can happen independently; some
are done here (afs, cgroup, procfs, etc.), there's also a large series
outside of that pile dealing with NFS (quite a bit of option-parsing
stuff is getting used there - it's one of the most convoluted
filesystems in terms of mount-related logics), but NFS bits are the
next cycle fodder.
It got seriously simplified since the last cycle; documentation is
probably the weakest bit at the moment - I considered dropping the
commit introducing Documentation/filesystems/mount_api.txt (cutting
the size increase by quarter ;-), but decided that it would be better
to fix it up after -rc1 instead.
That pile allows to do followup work in independent branches, which
should make life much easier for the next cycle. fs/super.c size
increase is unpleasant; there's a followup series that allows to
shrink it considerably, but I decided to leave that until the next
cycle"
* 'work.mount' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs: (41 commits)
afs: Use fs_context to pass parameters over automount
afs: Add fs_context support
vfs: Add some logging to the core users of the fs_context log
vfs: Implement logging through fs_context
vfs: Provide documentation for new mount API
vfs: Remove kern_mount_data()
hugetlbfs: Convert to fs_context
cpuset: Use fs_context
kernfs, sysfs, cgroup, intel_rdt: Support fs_context
cgroup: store a reference to cgroup_ns into cgroup_fs_context
cgroup1_get_tree(): separate "get cgroup_root to use" into a separate helper
cgroup_do_mount(): massage calling conventions
cgroup: stash cgroup_root reference into cgroup_fs_context
cgroup2: switch to option-by-option parsing
cgroup1: switch to option-by-option parsing
cgroup: take options parsing into ->parse_monolithic()
cgroup: fold cgroup1_mount() into cgroup1_get_tree()
cgroup: start switching to fs_context
ipc: Convert mqueue fs to fs_context
proc: Add fs_context support to procfs
...
Diffstat (limited to 'fs/proc')
-rw-r--r-- | fs/proc/inode.c | 52 | ||||
-rw-r--r-- | fs/proc/internal.h | 5 | ||||
-rw-r--r-- | fs/proc/root.c | 236 |
3 files changed, 176 insertions, 117 deletions
diff --git a/fs/proc/inode.c b/fs/proc/inode.c index da649ccd6804..fc7e38def174 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c | |||
@@ -24,7 +24,6 @@ | |||
24 | #include <linux/seq_file.h> | 24 | #include <linux/seq_file.h> |
25 | #include <linux/slab.h> | 25 | #include <linux/slab.h> |
26 | #include <linux/mount.h> | 26 | #include <linux/mount.h> |
27 | #include <linux/magic.h> | ||
28 | 27 | ||
29 | #include <linux/uaccess.h> | 28 | #include <linux/uaccess.h> |
30 | 29 | ||
@@ -122,13 +121,12 @@ static int proc_show_options(struct seq_file *seq, struct dentry *root) | |||
122 | return 0; | 121 | return 0; |
123 | } | 122 | } |
124 | 123 | ||
125 | static const struct super_operations proc_sops = { | 124 | const struct super_operations proc_sops = { |
126 | .alloc_inode = proc_alloc_inode, | 125 | .alloc_inode = proc_alloc_inode, |
127 | .destroy_inode = proc_destroy_inode, | 126 | .destroy_inode = proc_destroy_inode, |
128 | .drop_inode = generic_delete_inode, | 127 | .drop_inode = generic_delete_inode, |
129 | .evict_inode = proc_evict_inode, | 128 | .evict_inode = proc_evict_inode, |
130 | .statfs = simple_statfs, | 129 | .statfs = simple_statfs, |
131 | .remount_fs = proc_remount, | ||
132 | .show_options = proc_show_options, | 130 | .show_options = proc_show_options, |
133 | }; | 131 | }; |
134 | 132 | ||
@@ -488,51 +486,3 @@ struct inode *proc_get_inode(struct super_block *sb, struct proc_dir_entry *de) | |||
488 | pde_put(de); | 486 | pde_put(de); |
489 | return inode; | 487 | return inode; |
490 | } | 488 | } |
491 | |||
492 | int proc_fill_super(struct super_block *s, void *data, int silent) | ||
493 | { | ||
494 | struct pid_namespace *ns = get_pid_ns(s->s_fs_info); | ||
495 | struct inode *root_inode; | ||
496 | int ret; | ||
497 | |||
498 | if (!proc_parse_options(data, ns)) | ||
499 | return -EINVAL; | ||
500 | |||
501 | /* User space would break if executables or devices appear on proc */ | ||
502 | s->s_iflags |= SB_I_USERNS_VISIBLE | SB_I_NOEXEC | SB_I_NODEV; | ||
503 | s->s_flags |= SB_NODIRATIME | SB_NOSUID | SB_NOEXEC; | ||
504 | s->s_blocksize = 1024; | ||
505 | s->s_blocksize_bits = 10; | ||
506 | s->s_magic = PROC_SUPER_MAGIC; | ||
507 | s->s_op = &proc_sops; | ||
508 | s->s_time_gran = 1; | ||
509 | |||
510 | /* | ||
511 | * procfs isn't actually a stacking filesystem; however, there is | ||
512 | * too much magic going on inside it to permit stacking things on | ||
513 | * top of it | ||
514 | */ | ||
515 | s->s_stack_depth = FILESYSTEM_MAX_STACK_DEPTH; | ||
516 | |||
517 | /* procfs dentries and inodes don't require IO to create */ | ||
518 | s->s_shrink.seeks = 0; | ||
519 | |||
520 | pde_get(&proc_root); | ||
521 | root_inode = proc_get_inode(s, &proc_root); | ||
522 | if (!root_inode) { | ||
523 | pr_err("proc_fill_super: get root inode failed\n"); | ||
524 | return -ENOMEM; | ||
525 | } | ||
526 | |||
527 | s->s_root = d_make_root(root_inode); | ||
528 | if (!s->s_root) { | ||
529 | pr_err("proc_fill_super: allocate dentry failed\n"); | ||
530 | return -ENOMEM; | ||
531 | } | ||
532 | |||
533 | ret = proc_setup_self(s); | ||
534 | if (ret) { | ||
535 | return ret; | ||
536 | } | ||
537 | return proc_setup_thread_self(s); | ||
538 | } | ||
diff --git a/fs/proc/internal.h b/fs/proc/internal.h index ea575375f210..d1671e97f7fe 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h | |||
@@ -207,13 +207,12 @@ struct pde_opener { | |||
207 | struct completion *c; | 207 | struct completion *c; |
208 | } __randomize_layout; | 208 | } __randomize_layout; |
209 | extern const struct inode_operations proc_link_inode_operations; | 209 | extern const struct inode_operations proc_link_inode_operations; |
210 | |||
211 | extern const struct inode_operations proc_pid_link_inode_operations; | 210 | extern const struct inode_operations proc_pid_link_inode_operations; |
211 | extern const struct super_operations proc_sops; | ||
212 | 212 | ||
213 | void proc_init_kmemcache(void); | 213 | void proc_init_kmemcache(void); |
214 | void set_proc_pid_nlink(void); | 214 | void set_proc_pid_nlink(void); |
215 | extern struct inode *proc_get_inode(struct super_block *, struct proc_dir_entry *); | 215 | extern struct inode *proc_get_inode(struct super_block *, struct proc_dir_entry *); |
216 | extern int proc_fill_super(struct super_block *, void *data, int flags); | ||
217 | extern void proc_entry_rundown(struct proc_dir_entry *); | 216 | extern void proc_entry_rundown(struct proc_dir_entry *); |
218 | 217 | ||
219 | /* | 218 | /* |
@@ -271,10 +270,8 @@ static inline void proc_tty_init(void) {} | |||
271 | * root.c | 270 | * root.c |
272 | */ | 271 | */ |
273 | extern struct proc_dir_entry proc_root; | 272 | extern struct proc_dir_entry proc_root; |
274 | extern int proc_parse_options(char *options, struct pid_namespace *pid); | ||
275 | 273 | ||
276 | extern void proc_self_init(void); | 274 | extern void proc_self_init(void); |
277 | extern int proc_remount(struct super_block *, int *, char *); | ||
278 | 275 | ||
279 | /* | 276 | /* |
280 | * task_[no]mmu.c | 277 | * task_[no]mmu.c |
diff --git a/fs/proc/root.c b/fs/proc/root.c index 621e6ec322ca..8b145e7b9661 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c | |||
@@ -19,86 +19,178 @@ | |||
19 | #include <linux/module.h> | 19 | #include <linux/module.h> |
20 | #include <linux/bitops.h> | 20 | #include <linux/bitops.h> |
21 | #include <linux/user_namespace.h> | 21 | #include <linux/user_namespace.h> |
22 | #include <linux/fs_context.h> | ||
22 | #include <linux/mount.h> | 23 | #include <linux/mount.h> |
23 | #include <linux/pid_namespace.h> | 24 | #include <linux/pid_namespace.h> |
24 | #include <linux/parser.h> | 25 | #include <linux/fs_parser.h> |
25 | #include <linux/cred.h> | 26 | #include <linux/cred.h> |
27 | #include <linux/magic.h> | ||
28 | #include <linux/slab.h> | ||
26 | 29 | ||
27 | #include "internal.h" | 30 | #include "internal.h" |
28 | 31 | ||
29 | enum { | 32 | struct proc_fs_context { |
30 | Opt_gid, Opt_hidepid, Opt_err, | 33 | struct pid_namespace *pid_ns; |
34 | unsigned int mask; | ||
35 | int hidepid; | ||
36 | int gid; | ||
31 | }; | 37 | }; |
32 | 38 | ||
33 | static const match_table_t tokens = { | 39 | enum proc_param { |
34 | {Opt_hidepid, "hidepid=%u"}, | 40 | Opt_gid, |
35 | {Opt_gid, "gid=%u"}, | 41 | Opt_hidepid, |
36 | {Opt_err, NULL}, | ||
37 | }; | 42 | }; |
38 | 43 | ||
39 | int proc_parse_options(char *options, struct pid_namespace *pid) | 44 | static const struct fs_parameter_spec proc_param_specs[] = { |
45 | fsparam_u32("gid", Opt_gid), | ||
46 | fsparam_u32("hidepid", Opt_hidepid), | ||
47 | {} | ||
48 | }; | ||
49 | |||
50 | static const struct fs_parameter_description proc_fs_parameters = { | ||
51 | .name = "proc", | ||
52 | .specs = proc_param_specs, | ||
53 | }; | ||
54 | |||
55 | static int proc_parse_param(struct fs_context *fc, struct fs_parameter *param) | ||
40 | { | 56 | { |
41 | char *p; | 57 | struct proc_fs_context *ctx = fc->fs_private; |
42 | substring_t args[MAX_OPT_ARGS]; | 58 | struct fs_parse_result result; |
43 | int option; | 59 | int opt; |
44 | 60 | ||
45 | if (!options) | 61 | opt = fs_parse(fc, &proc_fs_parameters, param, &result); |
46 | return 1; | 62 | if (opt < 0) |
47 | 63 | return opt; | |
48 | while ((p = strsep(&options, ",")) != NULL) { | 64 | |
49 | int token; | 65 | switch (opt) { |
50 | if (!*p) | 66 | case Opt_gid: |
51 | continue; | 67 | ctx->gid = result.uint_32; |
52 | 68 | break; | |
53 | args[0].to = args[0].from = NULL; | 69 | |
54 | token = match_token(p, tokens, args); | 70 | case Opt_hidepid: |
55 | switch (token) { | 71 | ctx->hidepid = result.uint_32; |
56 | case Opt_gid: | 72 | if (ctx->hidepid < HIDEPID_OFF || |
57 | if (match_int(&args[0], &option)) | 73 | ctx->hidepid > HIDEPID_INVISIBLE) |
58 | return 0; | 74 | return invalf(fc, "proc: hidepid value must be between 0 and 2.\n"); |
59 | pid->pid_gid = make_kgid(current_user_ns(), option); | 75 | break; |
60 | break; | 76 | |
61 | case Opt_hidepid: | 77 | default: |
62 | if (match_int(&args[0], &option)) | 78 | return -EINVAL; |
63 | return 0; | ||
64 | if (option < HIDEPID_OFF || | ||
65 | option > HIDEPID_INVISIBLE) { | ||
66 | pr_err("proc: hidepid value must be between 0 and 2.\n"); | ||
67 | return 0; | ||
68 | } | ||
69 | pid->hide_pid = option; | ||
70 | break; | ||
71 | default: | ||
72 | pr_err("proc: unrecognized mount option \"%s\" " | ||
73 | "or missing value\n", p); | ||
74 | return 0; | ||
75 | } | ||
76 | } | 79 | } |
77 | 80 | ||
78 | return 1; | 81 | ctx->mask |= 1 << opt; |
82 | return 0; | ||
79 | } | 83 | } |
80 | 84 | ||
81 | int proc_remount(struct super_block *sb, int *flags, char *data) | 85 | static void proc_apply_options(struct super_block *s, |
86 | struct fs_context *fc, | ||
87 | struct pid_namespace *pid_ns, | ||
88 | struct user_namespace *user_ns) | ||
82 | { | 89 | { |
90 | struct proc_fs_context *ctx = fc->fs_private; | ||
91 | |||
92 | if (ctx->mask & (1 << Opt_gid)) | ||
93 | pid_ns->pid_gid = make_kgid(user_ns, ctx->gid); | ||
94 | if (ctx->mask & (1 << Opt_hidepid)) | ||
95 | pid_ns->hide_pid = ctx->hidepid; | ||
96 | } | ||
97 | |||
98 | static int proc_fill_super(struct super_block *s, struct fs_context *fc) | ||
99 | { | ||
100 | struct pid_namespace *pid_ns = get_pid_ns(s->s_fs_info); | ||
101 | struct inode *root_inode; | ||
102 | int ret; | ||
103 | |||
104 | proc_apply_options(s, fc, pid_ns, current_user_ns()); | ||
105 | |||
106 | /* User space would break if executables or devices appear on proc */ | ||
107 | s->s_iflags |= SB_I_USERNS_VISIBLE | SB_I_NOEXEC | SB_I_NODEV; | ||
108 | s->s_flags |= SB_NODIRATIME | SB_NOSUID | SB_NOEXEC; | ||
109 | s->s_blocksize = 1024; | ||
110 | s->s_blocksize_bits = 10; | ||
111 | s->s_magic = PROC_SUPER_MAGIC; | ||
112 | s->s_op = &proc_sops; | ||
113 | s->s_time_gran = 1; | ||
114 | |||
115 | /* | ||
116 | * procfs isn't actually a stacking filesystem; however, there is | ||
117 | * too much magic going on inside it to permit stacking things on | ||
118 | * top of it | ||
119 | */ | ||
120 | s->s_stack_depth = FILESYSTEM_MAX_STACK_DEPTH; | ||
121 | |||
122 | /* procfs dentries and inodes don't require IO to create */ | ||
123 | s->s_shrink.seeks = 0; | ||
124 | |||
125 | pde_get(&proc_root); | ||
126 | root_inode = proc_get_inode(s, &proc_root); | ||
127 | if (!root_inode) { | ||
128 | pr_err("proc_fill_super: get root inode failed\n"); | ||
129 | return -ENOMEM; | ||
130 | } | ||
131 | |||
132 | s->s_root = d_make_root(root_inode); | ||
133 | if (!s->s_root) { | ||
134 | pr_err("proc_fill_super: allocate dentry failed\n"); | ||
135 | return -ENOMEM; | ||
136 | } | ||
137 | |||
138 | ret = proc_setup_self(s); | ||
139 | if (ret) { | ||
140 | return ret; | ||
141 | } | ||
142 | return proc_setup_thread_self(s); | ||
143 | } | ||
144 | |||
145 | static int proc_reconfigure(struct fs_context *fc) | ||
146 | { | ||
147 | struct super_block *sb = fc->root->d_sb; | ||
83 | struct pid_namespace *pid = sb->s_fs_info; | 148 | struct pid_namespace *pid = sb->s_fs_info; |
84 | 149 | ||
85 | sync_filesystem(sb); | 150 | sync_filesystem(sb); |
86 | return !proc_parse_options(data, pid); | 151 | |
152 | proc_apply_options(sb, fc, pid, current_user_ns()); | ||
153 | return 0; | ||
87 | } | 154 | } |
88 | 155 | ||
89 | static struct dentry *proc_mount(struct file_system_type *fs_type, | 156 | static int proc_get_tree(struct fs_context *fc) |
90 | int flags, const char *dev_name, void *data) | ||
91 | { | 157 | { |
92 | struct pid_namespace *ns; | 158 | struct proc_fs_context *ctx = fc->fs_private; |
93 | 159 | ||
94 | if (flags & SB_KERNMOUNT) { | 160 | put_user_ns(fc->user_ns); |
95 | ns = data; | 161 | fc->user_ns = get_user_ns(ctx->pid_ns->user_ns); |
96 | data = NULL; | 162 | fc->s_fs_info = ctx->pid_ns; |
97 | } else { | 163 | return vfs_get_super(fc, vfs_get_keyed_super, proc_fill_super); |
98 | ns = task_active_pid_ns(current); | 164 | } |
99 | } | ||
100 | 165 | ||
101 | return mount_ns(fs_type, flags, data, ns, ns->user_ns, proc_fill_super); | 166 | static void proc_fs_context_free(struct fs_context *fc) |
167 | { | ||
168 | struct proc_fs_context *ctx = fc->fs_private; | ||
169 | |||
170 | if (ctx->pid_ns) | ||
171 | put_pid_ns(ctx->pid_ns); | ||
172 | kfree(ctx); | ||
173 | } | ||
174 | |||
175 | static const struct fs_context_operations proc_fs_context_ops = { | ||
176 | .free = proc_fs_context_free, | ||
177 | .parse_param = proc_parse_param, | ||
178 | .get_tree = proc_get_tree, | ||
179 | .reconfigure = proc_reconfigure, | ||
180 | }; | ||
181 | |||
182 | static int proc_init_fs_context(struct fs_context *fc) | ||
183 | { | ||
184 | struct proc_fs_context *ctx; | ||
185 | |||
186 | ctx = kzalloc(sizeof(struct proc_fs_context), GFP_KERNEL); | ||
187 | if (!ctx) | ||
188 | return -ENOMEM; | ||
189 | |||
190 | ctx->pid_ns = get_pid_ns(task_active_pid_ns(current)); | ||
191 | fc->fs_private = ctx; | ||
192 | fc->ops = &proc_fs_context_ops; | ||
193 | return 0; | ||
102 | } | 194 | } |
103 | 195 | ||
104 | static void proc_kill_sb(struct super_block *sb) | 196 | static void proc_kill_sb(struct super_block *sb) |
@@ -115,10 +207,11 @@ static void proc_kill_sb(struct super_block *sb) | |||
115 | } | 207 | } |
116 | 208 | ||
117 | static struct file_system_type proc_fs_type = { | 209 | static struct file_system_type proc_fs_type = { |
118 | .name = "proc", | 210 | .name = "proc", |
119 | .mount = proc_mount, | 211 | .init_fs_context = proc_init_fs_context, |
120 | .kill_sb = proc_kill_sb, | 212 | .parameters = &proc_fs_parameters, |
121 | .fs_flags = FS_USERNS_MOUNT, | 213 | .kill_sb = proc_kill_sb, |
214 | .fs_flags = FS_USERNS_MOUNT, | ||
122 | }; | 215 | }; |
123 | 216 | ||
124 | void __init proc_root_init(void) | 217 | void __init proc_root_init(void) |
@@ -156,7 +249,7 @@ static struct dentry *proc_root_lookup(struct inode * dir, struct dentry * dentr | |||
156 | { | 249 | { |
157 | if (!proc_pid_lookup(dentry, flags)) | 250 | if (!proc_pid_lookup(dentry, flags)) |
158 | return NULL; | 251 | return NULL; |
159 | 252 | ||
160 | return proc_lookup(dir, dentry, flags); | 253 | return proc_lookup(dir, dentry, flags); |
161 | } | 254 | } |
162 | 255 | ||
@@ -209,9 +302,28 @@ struct proc_dir_entry proc_root = { | |||
209 | 302 | ||
210 | int pid_ns_prepare_proc(struct pid_namespace *ns) | 303 | int pid_ns_prepare_proc(struct pid_namespace *ns) |
211 | { | 304 | { |
305 | struct proc_fs_context *ctx; | ||
306 | struct fs_context *fc; | ||
212 | struct vfsmount *mnt; | 307 | struct vfsmount *mnt; |
213 | 308 | ||
214 | mnt = kern_mount_data(&proc_fs_type, ns); | 309 | fc = fs_context_for_mount(&proc_fs_type, SB_KERNMOUNT); |
310 | if (IS_ERR(fc)) | ||
311 | return PTR_ERR(fc); | ||
312 | |||
313 | if (fc->user_ns != ns->user_ns) { | ||
314 | put_user_ns(fc->user_ns); | ||
315 | fc->user_ns = get_user_ns(ns->user_ns); | ||
316 | } | ||
317 | |||
318 | ctx = fc->fs_private; | ||
319 | if (ctx->pid_ns != ns) { | ||
320 | put_pid_ns(ctx->pid_ns); | ||
321 | get_pid_ns(ns); | ||
322 | ctx->pid_ns = ns; | ||
323 | } | ||
324 | |||
325 | mnt = fc_mount(fc); | ||
326 | put_fs_context(fc); | ||
215 | if (IS_ERR(mnt)) | 327 | if (IS_ERR(mnt)) |
216 | return PTR_ERR(mnt); | 328 | return PTR_ERR(mnt); |
217 | 329 | ||