aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2019-03-12 17:08:19 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2019-03-12 17:08:19 -0400
commit7b47a9e7c8f672b6fb0b77fca11a63a8a77f5a91 (patch)
treecf05645120ba2323c36acefdea6e62addf320f8c
parentdbc2fba3fc46084f502aec53183995a632998dcd (diff)
parentc99c2171fc61476afac0dfb59fb2c447a01fb1e0 (diff)
Merge branch 'work.mount' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs
Pull vfs mount infrastructure updates from Al Viro: "The rest of core infrastructure; no new syscalls in that pile, but the old parts are switched to new infrastructure. At that point conversions of individual filesystems can happen independently; some are done here (afs, cgroup, procfs, etc.), there's also a large series outside of that pile dealing with NFS (quite a bit of option-parsing stuff is getting used there - it's one of the most convoluted filesystems in terms of mount-related logics), but NFS bits are the next cycle fodder. It got seriously simplified since the last cycle; documentation is probably the weakest bit at the moment - I considered dropping the commit introducing Documentation/filesystems/mount_api.txt (cutting the size increase by quarter ;-), but decided that it would be better to fix it up after -rc1 instead. That pile allows to do followup work in independent branches, which should make life much easier for the next cycle. fs/super.c size increase is unpleasant; there's a followup series that allows to shrink it considerably, but I decided to leave that until the next cycle" * 'work.mount' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs: (41 commits) afs: Use fs_context to pass parameters over automount afs: Add fs_context support vfs: Add some logging to the core users of the fs_context log vfs: Implement logging through fs_context vfs: Provide documentation for new mount API vfs: Remove kern_mount_data() hugetlbfs: Convert to fs_context cpuset: Use fs_context kernfs, sysfs, cgroup, intel_rdt: Support fs_context cgroup: store a reference to cgroup_ns into cgroup_fs_context cgroup1_get_tree(): separate "get cgroup_root to use" into a separate helper cgroup_do_mount(): massage calling conventions cgroup: stash cgroup_root reference into cgroup_fs_context cgroup2: switch to option-by-option parsing cgroup1: switch to option-by-option parsing cgroup: take options parsing into ->parse_monolithic() cgroup: fold cgroup1_mount() into cgroup1_get_tree() cgroup: start switching to fs_context ipc: Convert mqueue fs to fs_context proc: Add fs_context support to procfs ...
-rw-r--r--Documentation/filesystems/mount_api.txt709
-rw-r--r--arch/x86/kernel/cpu/resctrl/internal.h16
-rw-r--r--arch/x86/kernel/cpu/resctrl/rdtgroup.c185
-rw-r--r--fs/Kconfig7
-rw-r--r--fs/Makefile2
-rw-r--r--fs/afs/internal.h9
-rw-r--r--fs/afs/mntpt.c149
-rw-r--r--fs/afs/super.c430
-rw-r--r--fs/afs/volume.c4
-rw-r--r--fs/filesystems.c4
-rw-r--r--fs/fs_context.c642
-rw-r--r--fs/fs_parser.c447
-rw-r--r--fs/hugetlbfs/inode.c358
-rw-r--r--fs/internal.h13
-rw-r--r--fs/kernfs/kernfs-internal.h1
-rw-r--r--fs/kernfs/mount.c119
-rw-r--r--fs/mount.h5
-rw-r--r--fs/namei.c4
-rw-r--r--fs/namespace.c395
-rw-r--r--fs/pnode.c5
-rw-r--r--fs/pnode.h3
-rw-r--r--fs/proc/inode.c52
-rw-r--r--fs/proc/internal.h5
-rw-r--r--fs/proc/root.c236
-rw-r--r--fs/super.c344
-rw-r--r--fs/sysfs/mount.c73
-rw-r--r--include/linux/errno.h1
-rw-r--r--include/linux/fs.h14
-rw-r--r--include/linux/fs_context.h188
-rw-r--r--include/linux/fs_parser.h151
-rw-r--r--include/linux/kernfs.h39
-rw-r--r--include/linux/lsm_hooks.h21
-rw-r--r--include/linux/mount.h3
-rw-r--r--include/linux/security.h18
-rw-r--r--ipc/mqueue.c94
-rw-r--r--ipc/namespace.c2
-rw-r--r--kernel/cgroup/cgroup-internal.h49
-rw-r--r--kernel/cgroup/cgroup-v1.c394
-rw-r--r--kernel/cgroup/cgroup.c223
-rw-r--r--kernel/cgroup/cpuset.c56
-rw-r--r--security/security.c10
-rw-r--r--security/selinux/hooks.c88
-rw-r--r--security/selinux/include/security.h10
-rw-r--r--security/smack/smack.h19
-rw-r--r--security/smack/smack_lsm.c92
45 files changed, 4357 insertions, 1332 deletions
diff --git a/Documentation/filesystems/mount_api.txt b/Documentation/filesystems/mount_api.txt
new file mode 100644
index 000000000000..944d1965e917
--- /dev/null
+++ b/Documentation/filesystems/mount_api.txt
@@ -0,0 +1,709 @@
1 ====================
2 FILESYSTEM MOUNT API
3 ====================
4
5CONTENTS
6
7 (1) Overview.
8
9 (2) The filesystem context.
10
11 (3) The filesystem context operations.
12
13 (4) Filesystem context security.
14
15 (5) VFS filesystem context operations.
16
17 (6) Parameter description.
18
19 (7) Parameter helper functions.
20
21
22========
23OVERVIEW
24========
25
26The creation of new mounts is now to be done in a multistep process:
27
28 (1) Create a filesystem context.
29
30 (2) Parse the parameters and attach them to the context. Parameters are
31 expected to be passed individually from userspace, though legacy binary
32 parameters can also be handled.
33
34 (3) Validate and pre-process the context.
35
36 (4) Get or create a superblock and mountable root.
37
38 (5) Perform the mount.
39
40 (6) Return an error message attached to the context.
41
42 (7) Destroy the context.
43
44To support this, the file_system_type struct gains a new field:
45
46 int (*init_fs_context)(struct fs_context *fc);
47
48which is invoked to set up the filesystem-specific parts of a filesystem
49context, including the additional space.
50
51Note that security initialisation is done *after* the filesystem is called so
52that the namespaces may be adjusted first.
53
54
55======================
56THE FILESYSTEM CONTEXT
57======================
58
59The creation and reconfiguration of a superblock is governed by a filesystem
60context. This is represented by the fs_context structure:
61
62 struct fs_context {
63 const struct fs_context_operations *ops;
64 struct file_system_type *fs_type;
65 void *fs_private;
66 struct dentry *root;
67 struct user_namespace *user_ns;
68 struct net *net_ns;
69 const struct cred *cred;
70 char *source;
71 char *subtype;
72 void *security;
73 void *s_fs_info;
74 unsigned int sb_flags;
75 unsigned int sb_flags_mask;
76 enum fs_context_purpose purpose:8;
77 bool sloppy:1;
78 bool silent:1;
79 ...
80 };
81
82The fs_context fields are as follows:
83
84 (*) const struct fs_context_operations *ops
85
86 These are operations that can be done on a filesystem context (see
87 below). This must be set by the ->init_fs_context() file_system_type
88 operation.
89
90 (*) struct file_system_type *fs_type
91
92 A pointer to the file_system_type of the filesystem that is being
93 constructed or reconfigured. This retains a reference on the type owner.
94
95 (*) void *fs_private
96
97 A pointer to the file system's private data. This is where the filesystem
98 will need to store any options it parses.
99
100 (*) struct dentry *root
101
102 A pointer to the root of the mountable tree (and indirectly, the
103 superblock thereof). This is filled in by the ->get_tree() op. If this
104 is set, an active reference on root->d_sb must also be held.
105
106 (*) struct user_namespace *user_ns
107 (*) struct net *net_ns
108
109 There are a subset of the namespaces in use by the invoking process. They
110 retain references on each namespace. The subscribed namespaces may be
111 replaced by the filesystem to reflect other sources, such as the parent
112 mount superblock on an automount.
113
114 (*) const struct cred *cred
115
116 The mounter's credentials. This retains a reference on the credentials.
117
118 (*) char *source
119
120 This specifies the source. It may be a block device (e.g. /dev/sda1) or
121 something more exotic, such as the "host:/path" that NFS desires.
122
123 (*) char *subtype
124
125 This is a string to be added to the type displayed in /proc/mounts to
126 qualify it (used by FUSE). This is available for the filesystem to set if
127 desired.
128
129 (*) void *security
130
131 A place for the LSMs to hang their security data for the superblock. The
132 relevant security operations are described below.
133
134 (*) void *s_fs_info
135
136 The proposed s_fs_info for a new superblock, set in the superblock by
137 sget_fc(). This can be used to distinguish superblocks.
138
139 (*) unsigned int sb_flags
140 (*) unsigned int sb_flags_mask
141
142 Which bits SB_* flags are to be set/cleared in super_block::s_flags.
143
144 (*) enum fs_context_purpose
145
146 This indicates the purpose for which the context is intended. The
147 available values are:
148
149 FS_CONTEXT_FOR_MOUNT, -- New superblock for explicit mount
150 FS_CONTEXT_FOR_SUBMOUNT -- New automatic submount of extant mount
151 FS_CONTEXT_FOR_RECONFIGURE -- Change an existing mount
152
153 (*) bool sloppy
154 (*) bool silent
155
156 These are set if the sloppy or silent mount options are given.
157
158 [NOTE] sloppy is probably unnecessary when userspace passes over one
159 option at a time since the error can just be ignored if userspace deems it
160 to be unimportant.
161
162 [NOTE] silent is probably redundant with sb_flags & SB_SILENT.
163
164The mount context is created by calling vfs_new_fs_context() or
165vfs_dup_fs_context() and is destroyed with put_fs_context(). Note that the
166structure is not refcounted.
167
168VFS, security and filesystem mount options are set individually with
169vfs_parse_mount_option(). Options provided by the old mount(2) system call as
170a page of data can be parsed with generic_parse_monolithic().
171
172When mounting, the filesystem is allowed to take data from any of the pointers
173and attach it to the superblock (or whatever), provided it clears the pointer
174in the mount context.
175
176The filesystem is also allowed to allocate resources and pin them with the
177mount context. For instance, NFS might pin the appropriate protocol version
178module.
179
180
181=================================
182THE FILESYSTEM CONTEXT OPERATIONS
183=================================
184
185The filesystem context points to a table of operations:
186
187 struct fs_context_operations {
188 void (*free)(struct fs_context *fc);
189 int (*dup)(struct fs_context *fc, struct fs_context *src_fc);
190 int (*parse_param)(struct fs_context *fc,
191 struct struct fs_parameter *param);
192 int (*parse_monolithic)(struct fs_context *fc, void *data);
193 int (*get_tree)(struct fs_context *fc);
194 int (*reconfigure)(struct fs_context *fc);
195 };
196
197These operations are invoked by the various stages of the mount procedure to
198manage the filesystem context. They are as follows:
199
200 (*) void (*free)(struct fs_context *fc);
201
202 Called to clean up the filesystem-specific part of the filesystem context
203 when the context is destroyed. It should be aware that parts of the
204 context may have been removed and NULL'd out by ->get_tree().
205
206 (*) int (*dup)(struct fs_context *fc, struct fs_context *src_fc);
207
208 Called when a filesystem context has been duplicated to duplicate the
209 filesystem-private data. An error may be returned to indicate failure to
210 do this.
211
212 [!] Note that even if this fails, put_fs_context() will be called
213 immediately thereafter, so ->dup() *must* make the
214 filesystem-private data safe for ->free().
215
216 (*) int (*parse_param)(struct fs_context *fc,
217 struct struct fs_parameter *param);
218
219 Called when a parameter is being added to the filesystem context. param
220 points to the key name and maybe a value object. VFS-specific options
221 will have been weeded out and fc->sb_flags updated in the context.
222 Security options will also have been weeded out and fc->security updated.
223
224 The parameter can be parsed with fs_parse() and fs_lookup_param(). Note
225 that the source(s) are presented as parameters named "source".
226
227 If successful, 0 should be returned or a negative error code otherwise.
228
229 (*) int (*parse_monolithic)(struct fs_context *fc, void *data);
230
231 Called when the mount(2) system call is invoked to pass the entire data
232 page in one go. If this is expected to be just a list of "key[=val]"
233 items separated by commas, then this may be set to NULL.
234
235 The return value is as for ->parse_param().
236
237 If the filesystem (e.g. NFS) needs to examine the data first and then
238 finds it's the standard key-val list then it may pass it off to
239 generic_parse_monolithic().
240
241 (*) int (*get_tree)(struct fs_context *fc);
242
243 Called to get or create the mountable root and superblock, using the
244 information stored in the filesystem context (reconfiguration goes via a
245 different vector). It may detach any resources it desires from the
246 filesystem context and transfer them to the superblock it creates.
247
248 On success it should set fc->root to the mountable root and return 0. In
249 the case of an error, it should return a negative error code.
250
251 The phase on a userspace-driven context will be set to only allow this to
252 be called once on any particular context.
253
254 (*) int (*reconfigure)(struct fs_context *fc);
255
256 Called to effect reconfiguration of a superblock using information stored
257 in the filesystem context. It may detach any resources it desires from
258 the filesystem context and transfer them to the superblock. The
259 superblock can be found from fc->root->d_sb.
260
261 On success it should return 0. In the case of an error, it should return
262 a negative error code.
263
264 [NOTE] reconfigure is intended as a replacement for remount_fs.
265
266
267===========================
268FILESYSTEM CONTEXT SECURITY
269===========================
270
271The filesystem context contains a security pointer that the LSMs can use for
272building up a security context for the superblock to be mounted. There are a
273number of operations used by the new mount code for this purpose:
274
275 (*) int security_fs_context_alloc(struct fs_context *fc,
276 struct dentry *reference);
277
278 Called to initialise fc->security (which is preset to NULL) and allocate
279 any resources needed. It should return 0 on success or a negative error
280 code on failure.
281
282 reference will be non-NULL if the context is being created for superblock
283 reconfiguration (FS_CONTEXT_FOR_RECONFIGURE) in which case it indicates
284 the root dentry of the superblock to be reconfigured. It will also be
285 non-NULL in the case of a submount (FS_CONTEXT_FOR_SUBMOUNT) in which case
286 it indicates the automount point.
287
288 (*) int security_fs_context_dup(struct fs_context *fc,
289 struct fs_context *src_fc);
290
291 Called to initialise fc->security (which is preset to NULL) and allocate
292 any resources needed. The original filesystem context is pointed to by
293 src_fc and may be used for reference. It should return 0 on success or a
294 negative error code on failure.
295
296 (*) void security_fs_context_free(struct fs_context *fc);
297
298 Called to clean up anything attached to fc->security. Note that the
299 contents may have been transferred to a superblock and the pointer cleared
300 during get_tree.
301
302 (*) int security_fs_context_parse_param(struct fs_context *fc,
303 struct fs_parameter *param);
304
305 Called for each mount parameter, including the source. The arguments are
306 as for the ->parse_param() method. It should return 0 to indicate that
307 the parameter should be passed on to the filesystem, 1 to indicate that
308 the parameter should be discarded or an error to indicate that the
309 parameter should be rejected.
310
311 The value pointed to by param may be modified (if a string) or stolen
312 (provided the value pointer is NULL'd out). If it is stolen, 1 must be
313 returned to prevent it being passed to the filesystem.
314
315 (*) int security_fs_context_validate(struct fs_context *fc);
316
317 Called after all the options have been parsed to validate the collection
318 as a whole and to do any necessary allocation so that
319 security_sb_get_tree() and security_sb_reconfigure() are less likely to
320 fail. It should return 0 or a negative error code.
321
322 In the case of reconfiguration, the target superblock will be accessible
323 via fc->root.
324
325 (*) int security_sb_get_tree(struct fs_context *fc);
326
327 Called during the mount procedure to verify that the specified superblock
328 is allowed to be mounted and to transfer the security data there. It
329 should return 0 or a negative error code.
330
331 (*) void security_sb_reconfigure(struct fs_context *fc);
332
333 Called to apply any reconfiguration to an LSM's context. It must not
334 fail. Error checking and resource allocation must be done in advance by
335 the parameter parsing and validation hooks.
336
337 (*) int security_sb_mountpoint(struct fs_context *fc, struct path *mountpoint,
338 unsigned int mnt_flags);
339
340 Called during the mount procedure to verify that the root dentry attached
341 to the context is permitted to be attached to the specified mountpoint.
342 It should return 0 on success or a negative error code on failure.
343
344
345=================================
346VFS FILESYSTEM CONTEXT OPERATIONS
347=================================
348
349There are four operations for creating a filesystem context and
350one for destroying a context:
351
352 (*) struct fs_context *vfs_new_fs_context(struct file_system_type *fs_type,
353 struct dentry *reference,
354 unsigned int sb_flags,
355 unsigned int sb_flags_mask,
356 enum fs_context_purpose purpose);
357
358 Create a filesystem context for a given filesystem type and purpose. This
359 allocates the filesystem context, sets the superblock flags, initialises
360 the security and calls fs_type->init_fs_context() to initialise the
361 filesystem private data.
362
363 reference can be NULL or it may indicate the root dentry of a superblock
364 that is going to be reconfigured (FS_CONTEXT_FOR_RECONFIGURE) or
365 the automount point that triggered a submount (FS_CONTEXT_FOR_SUBMOUNT).
366 This is provided as a source of namespace information.
367
368 (*) struct fs_context *vfs_dup_fs_context(struct fs_context *src_fc);
369
370 Duplicate a filesystem context, copying any options noted and duplicating
371 or additionally referencing any resources held therein. This is available
372 for use where a filesystem has to get a mount within a mount, such as NFS4
373 does by internally mounting the root of the target server and then doing a
374 private pathwalk to the target directory.
375
376 The purpose in the new context is inherited from the old one.
377
378 (*) void put_fs_context(struct fs_context *fc);
379
380 Destroy a filesystem context, releasing any resources it holds. This
381 calls the ->free() operation. This is intended to be called by anyone who
382 created a filesystem context.
383
384 [!] filesystem contexts are not refcounted, so this causes unconditional
385 destruction.
386
387In all the above operations, apart from the put op, the return is a mount
388context pointer or a negative error code.
389
390For the remaining operations, if an error occurs, a negative error code will be
391returned.
392
393 (*) int vfs_get_tree(struct fs_context *fc);
394
395 Get or create the mountable root and superblock, using the parameters in
396 the filesystem context to select/configure the superblock. This invokes
397 the ->validate() op and then the ->get_tree() op.
398
399 [NOTE] ->validate() could perhaps be rolled into ->get_tree() and
400 ->reconfigure().
401
402 (*) struct vfsmount *vfs_create_mount(struct fs_context *fc);
403
404 Create a mount given the parameters in the specified filesystem context.
405 Note that this does not attach the mount to anything.
406
407 (*) int vfs_parse_fs_param(struct fs_context *fc,
408 struct fs_parameter *param);
409
410 Supply a single mount parameter to the filesystem context. This include
411 the specification of the source/device which is specified as the "source"
412 parameter (which may be specified multiple times if the filesystem
413 supports that).
414
415 param specifies the parameter key name and the value. The parameter is
416 first checked to see if it corresponds to a standard mount flag (in which
417 case it is used to set an SB_xxx flag and consumed) or a security option
418 (in which case the LSM consumes it) before it is passed on to the
419 filesystem.
420
421 The parameter value is typed and can be one of:
422
423 fs_value_is_flag, Parameter not given a value.
424 fs_value_is_string, Value is a string
425 fs_value_is_blob, Value is a binary blob
426 fs_value_is_filename, Value is a filename* + dirfd
427 fs_value_is_filename_empty, Value is a filename* + dirfd + AT_EMPTY_PATH
428 fs_value_is_file, Value is an open file (file*)
429
430 If there is a value, that value is stored in a union in the struct in one
431 of param->{string,blob,name,file}. Note that the function may steal and
432 clear the pointer, but then becomes responsible for disposing of the
433 object.
434
435 (*) int vfs_parse_fs_string(struct fs_context *fc, char *key,
436 const char *value, size_t v_size);
437
438 A wrapper around vfs_parse_fs_param() that just passes a constant string.
439
440 (*) int generic_parse_monolithic(struct fs_context *fc, void *data);
441
442 Parse a sys_mount() data page, assuming the form to be a text list
443 consisting of key[=val] options separated by commas. Each item in the
444 list is passed to vfs_mount_option(). This is the default when the
445 ->parse_monolithic() operation is NULL.
446
447
448=====================
449PARAMETER DESCRIPTION
450=====================
451
452Parameters are described using structures defined in linux/fs_parser.h.
453There's a core description struct that links everything together:
454
455 struct fs_parameter_description {
456 const char name[16];
457 u8 nr_params;
458 u8 nr_alt_keys;
459 u8 nr_enums;
460 bool ignore_unknown;
461 bool no_source;
462 const char *const *keys;
463 const struct constant_table *alt_keys;
464 const struct fs_parameter_spec *specs;
465 const struct fs_parameter_enum *enums;
466 };
467
468For example:
469
470 enum afs_param {
471 Opt_autocell,
472 Opt_bar,
473 Opt_dyn,
474 Opt_foo,
475 Opt_source,
476 nr__afs_params
477 };
478
479 static const struct fs_parameter_description afs_fs_parameters = {
480 .name = "kAFS",
481 .nr_params = nr__afs_params,
482 .nr_alt_keys = ARRAY_SIZE(afs_param_alt_keys),
483 .nr_enums = ARRAY_SIZE(afs_param_enums),
484 .keys = afs_param_keys,
485 .alt_keys = afs_param_alt_keys,
486 .specs = afs_param_specs,
487 .enums = afs_param_enums,
488 };
489
490The members are as follows:
491
492 (1) const char name[16];
493
494 The name to be used in error messages generated by the parse helper
495 functions.
496
497 (2) u8 nr_params;
498
499 The number of discrete parameter identifiers. This indicates the number
500 of elements in the ->types[] array and also limits the values that may be
501 used in the values that the ->keys[] array maps to.
502
503 It is expected that, for example, two parameters that are related, say
504 "acl" and "noacl" with have the same ID, but will be flagged to indicate
505 that one is the inverse of the other. The value can then be picked out
506 from the parse result.
507
508 (3) const struct fs_parameter_specification *specs;
509
510 Table of parameter specifications, where the entries are of type:
511
512 struct fs_parameter_type {
513 enum fs_parameter_spec type:8;
514 u8 flags;
515 };
516
517 and the parameter identifier is the index to the array. 'type' indicates
518 the desired value type and must be one of:
519
520 TYPE NAME EXPECTED VALUE RESULT IN
521 ======================= ======================= =====================
522 fs_param_is_flag No value n/a
523 fs_param_is_bool Boolean value result->boolean
524 fs_param_is_u32 32-bit unsigned int result->uint_32
525 fs_param_is_u32_octal 32-bit octal int result->uint_32
526 fs_param_is_u32_hex 32-bit hex int result->uint_32
527 fs_param_is_s32 32-bit signed int result->int_32
528 fs_param_is_enum Enum value name result->uint_32
529 fs_param_is_string Arbitrary string param->string
530 fs_param_is_blob Binary blob param->blob
531 fs_param_is_blockdev Blockdev path * Needs lookup
532 fs_param_is_path Path * Needs lookup
533 fs_param_is_fd File descriptor param->file
534
535 And each parameter can be qualified with 'flags':
536
537 fs_param_v_optional The value is optional
538 fs_param_neg_with_no If key name is prefixed with "no", it is false
539 fs_param_neg_with_empty If value is "", it is false
540 fs_param_deprecated The parameter is deprecated.
541
542 For example:
543
544 static const struct fs_parameter_spec afs_param_specs[nr__afs_params] = {
545 [Opt_autocell] = { fs_param_is flag },
546 [Opt_bar] = { fs_param_is_enum },
547 [Opt_dyn] = { fs_param_is flag },
548 [Opt_foo] = { fs_param_is_bool, fs_param_neg_with_no },
549 [Opt_source] = { fs_param_is_string },
550 };
551
552 Note that if the value is of fs_param_is_bool type, fs_parse() will try
553 to match any string value against "0", "1", "no", "yes", "false", "true".
554
555 [!] NOTE that the table must be sorted according to primary key name so
556 that ->keys[] is also sorted.
557
558 (4) const char *const *keys;
559
560 Table of primary key names for the parameters. There must be one entry
561 per defined parameter. The table is optional if ->nr_params is 0. The
562 table is just an array of names e.g.:
563
564 static const char *const afs_param_keys[nr__afs_params] = {
565 [Opt_autocell] = "autocell",
566 [Opt_bar] = "bar",
567 [Opt_dyn] = "dyn",
568 [Opt_foo] = "foo",
569 [Opt_source] = "source",
570 };
571
572 [!] NOTE that the table must be sorted such that the table can be searched
573 with bsearch() using strcmp(). This means that the Opt_* values must
574 correspond to the entries in this table.
575
576 (5) const struct constant_table *alt_keys;
577 u8 nr_alt_keys;
578
579 Table of additional key names and their mappings to parameter ID plus the
580 number of elements in the table. This is optional. The table is just an
581 array of { name, integer } pairs, e.g.:
582
583 static const struct constant_table afs_param_keys[] = {
584 { "baz", Opt_bar },
585 { "dynamic", Opt_dyn },
586 };
587
588 [!] NOTE that the table must be sorted such that strcmp() can be used with
589 bsearch() to search the entries.
590
591 The parameter ID can also be fs_param_key_removed to indicate that a
592 deprecated parameter has been removed and that an error will be given.
593 This differs from fs_param_deprecated where the parameter may still have
594 an effect.
595
596 Further, the behaviour of the parameter may differ when an alternate name
597 is used (for instance with NFS, "v3", "v4.2", etc. are alternate names).
598
599 (6) const struct fs_parameter_enum *enums;
600 u8 nr_enums;
601
602 Table of enum value names to integer mappings and the number of elements
603 stored therein. This is of type:
604
605 struct fs_parameter_enum {
606 u8 param_id;
607 char name[14];
608 u8 value;
609 };
610
611 Where the array is an unsorted list of { parameter ID, name }-keyed
612 elements that indicate the value to map to, e.g.:
613
614 static const struct fs_parameter_enum afs_param_enums[] = {
615 { Opt_bar, "x", 1},
616 { Opt_bar, "y", 23},
617 { Opt_bar, "z", 42},
618 };
619
620 If a parameter of type fs_param_is_enum is encountered, fs_parse() will
621 try to look the value up in the enum table and the result will be stored
622 in the parse result.
623
624 (7) bool no_source;
625
626 If this is set, fs_parse() will ignore any "source" parameter and not
627 pass it to the filesystem.
628
629The parser should be pointed to by the parser pointer in the file_system_type
630struct as this will provide validation on registration (if
631CONFIG_VALIDATE_FS_PARSER=y) and will allow the description to be queried from
632userspace using the fsinfo() syscall.
633
634
635==========================
636PARAMETER HELPER FUNCTIONS
637==========================
638
639A number of helper functions are provided to help a filesystem or an LSM
640process the parameters it is given.
641
642 (*) int lookup_constant(const struct constant_table tbl[],
643 const char *name, int not_found);
644
645 Look up a constant by name in a table of name -> integer mappings. The
646 table is an array of elements of the following type:
647
648 struct constant_table {
649 const char *name;
650 int value;
651 };
652
653 and it must be sorted such that it can be searched using bsearch() using
654 strcmp(). If a match is found, the corresponding value is returned. If a
655 match isn't found, the not_found value is returned instead.
656
657 (*) bool validate_constant_table(const struct constant_table *tbl,
658 size_t tbl_size,
659 int low, int high, int special);
660
661 Validate a constant table. Checks that all the elements are appropriately
662 ordered, that there are no duplicates and that the values are between low
663 and high inclusive, though provision is made for one allowable special
664 value outside of that range. If no special value is required, special
665 should just be set to lie inside the low-to-high range.
666
667 If all is good, true is returned. If the table is invalid, errors are
668 logged to dmesg, the stack is dumped and false is returned.
669
670 (*) int fs_parse(struct fs_context *fc,
671 const struct fs_param_parser *parser,
672 struct fs_parameter *param,
673 struct fs_param_parse_result *result);
674
675 This is the main interpreter of parameters. It uses the parameter
676 description (parser) to look up the name of the parameter to use and to
677 convert that to a parameter ID (stored in result->key).
678
679 If successful, and if the parameter type indicates the result is a
680 boolean, integer or enum type, the value is converted by this function and
681 the result stored in result->{boolean,int_32,uint_32}.
682
683 If a match isn't initially made, the key is prefixed with "no" and no
684 value is present then an attempt will be made to look up the key with the
685 prefix removed. If this matches a parameter for which the type has flag
686 fs_param_neg_with_no set, then a match will be made and the value will be
687 set to false/0/NULL.
688
689 If the parameter is successfully matched and, optionally, parsed
690 correctly, 1 is returned. If the parameter isn't matched and
691 parser->ignore_unknown is set, then 0 is returned. Otherwise -EINVAL is
692 returned.
693
694 (*) bool fs_validate_description(const struct fs_parameter_description *desc);
695
696 This is validates the parameter description. It returns true if the
697 description is good and false if it is not.
698
699 (*) int fs_lookup_param(struct fs_context *fc,
700 struct fs_parameter *value,
701 bool want_bdev,
702 struct path *_path);
703
704 This takes a parameter that carries a string or filename type and attempts
705 to do a path lookup on it. If the parameter expects a blockdev, a check
706 is made that the inode actually represents one.
707
708 Returns 0 if successful and *_path will be set; returns a negative error
709 code if not.
diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
index 822b7db634ee..e49b77283924 100644
--- a/arch/x86/kernel/cpu/resctrl/internal.h
+++ b/arch/x86/kernel/cpu/resctrl/internal.h
@@ -4,6 +4,7 @@
4 4
5#include <linux/sched.h> 5#include <linux/sched.h>
6#include <linux/kernfs.h> 6#include <linux/kernfs.h>
7#include <linux/fs_context.h>
7#include <linux/jump_label.h> 8#include <linux/jump_label.h>
8 9
9#define MSR_IA32_L3_QOS_CFG 0xc81 10#define MSR_IA32_L3_QOS_CFG 0xc81
@@ -40,6 +41,21 @@
40#define RMID_VAL_ERROR BIT_ULL(63) 41#define RMID_VAL_ERROR BIT_ULL(63)
41#define RMID_VAL_UNAVAIL BIT_ULL(62) 42#define RMID_VAL_UNAVAIL BIT_ULL(62)
42 43
44
45struct rdt_fs_context {
46 struct kernfs_fs_context kfc;
47 bool enable_cdpl2;
48 bool enable_cdpl3;
49 bool enable_mba_mbps;
50};
51
52static inline struct rdt_fs_context *rdt_fc2context(struct fs_context *fc)
53{
54 struct kernfs_fs_context *kfc = fc->fs_private;
55
56 return container_of(kfc, struct rdt_fs_context, kfc);
57}
58
43DECLARE_STATIC_KEY_FALSE(rdt_enable_key); 59DECLARE_STATIC_KEY_FALSE(rdt_enable_key);
44 60
45/** 61/**
diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
index 8388adf241b2..399601eda8e4 100644
--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
@@ -24,6 +24,7 @@
24#include <linux/cpu.h> 24#include <linux/cpu.h>
25#include <linux/debugfs.h> 25#include <linux/debugfs.h>
26#include <linux/fs.h> 26#include <linux/fs.h>
27#include <linux/fs_parser.h>
27#include <linux/sysfs.h> 28#include <linux/sysfs.h>
28#include <linux/kernfs.h> 29#include <linux/kernfs.h>
29#include <linux/seq_buf.h> 30#include <linux/seq_buf.h>
@@ -32,6 +33,7 @@
32#include <linux/sched/task.h> 33#include <linux/sched/task.h>
33#include <linux/slab.h> 34#include <linux/slab.h>
34#include <linux/task_work.h> 35#include <linux/task_work.h>
36#include <linux/user_namespace.h>
35 37
36#include <uapi/linux/magic.h> 38#include <uapi/linux/magic.h>
37 39
@@ -1858,46 +1860,6 @@ static void cdp_disable_all(void)
1858 cdpl2_disable(); 1860 cdpl2_disable();
1859} 1861}
1860 1862
1861static int parse_rdtgroupfs_options(char *data)
1862{
1863 char *token, *o = data;
1864 int ret = 0;
1865
1866 while ((token = strsep(&o, ",")) != NULL) {
1867 if (!*token) {
1868 ret = -EINVAL;
1869 goto out;
1870 }
1871
1872 if (!strcmp(token, "cdp")) {
1873 ret = cdpl3_enable();
1874 if (ret)
1875 goto out;
1876 } else if (!strcmp(token, "cdpl2")) {
1877 ret = cdpl2_enable();
1878 if (ret)
1879 goto out;
1880 } else if (!strcmp(token, "mba_MBps")) {
1881 if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
1882 ret = set_mba_sc(true);
1883 else
1884 ret = -EINVAL;
1885 if (ret)
1886 goto out;
1887 } else {
1888 ret = -EINVAL;
1889 goto out;
1890 }
1891 }
1892
1893 return 0;
1894
1895out:
1896 pr_err("Invalid mount option \"%s\"\n", token);
1897
1898 return ret;
1899}
1900
1901/* 1863/*
1902 * We don't allow rdtgroup directories to be created anywhere 1864 * We don't allow rdtgroup directories to be created anywhere
1903 * except the root directory. Thus when looking for the rdtgroup 1865 * except the root directory. Thus when looking for the rdtgroup
@@ -1969,13 +1931,27 @@ static int mkdir_mondata_all(struct kernfs_node *parent_kn,
1969 struct rdtgroup *prgrp, 1931 struct rdtgroup *prgrp,
1970 struct kernfs_node **mon_data_kn); 1932 struct kernfs_node **mon_data_kn);
1971 1933
1972static struct dentry *rdt_mount(struct file_system_type *fs_type, 1934static int rdt_enable_ctx(struct rdt_fs_context *ctx)
1973 int flags, const char *unused_dev_name, 1935{
1974 void *data) 1936 int ret = 0;
1937
1938 if (ctx->enable_cdpl2)
1939 ret = cdpl2_enable();
1940
1941 if (!ret && ctx->enable_cdpl3)
1942 ret = cdpl3_enable();
1943
1944 if (!ret && ctx->enable_mba_mbps)
1945 ret = set_mba_sc(true);
1946
1947 return ret;
1948}
1949
1950static int rdt_get_tree(struct fs_context *fc)
1975{ 1951{
1952 struct rdt_fs_context *ctx = rdt_fc2context(fc);
1976 struct rdt_domain *dom; 1953 struct rdt_domain *dom;
1977 struct rdt_resource *r; 1954 struct rdt_resource *r;
1978 struct dentry *dentry;
1979 int ret; 1955 int ret;
1980 1956
1981 cpus_read_lock(); 1957 cpus_read_lock();
@@ -1984,53 +1960,42 @@ static struct dentry *rdt_mount(struct file_system_type *fs_type,
1984 * resctrl file system can only be mounted once. 1960 * resctrl file system can only be mounted once.
1985 */ 1961 */
1986 if (static_branch_unlikely(&rdt_enable_key)) { 1962 if (static_branch_unlikely(&rdt_enable_key)) {
1987 dentry = ERR_PTR(-EBUSY); 1963 ret = -EBUSY;
1988 goto out; 1964 goto out;
1989 } 1965 }
1990 1966
1991 ret = parse_rdtgroupfs_options(data); 1967 ret = rdt_enable_ctx(ctx);
1992 if (ret) { 1968 if (ret < 0)
1993 dentry = ERR_PTR(ret);
1994 goto out_cdp; 1969 goto out_cdp;
1995 }
1996 1970
1997 closid_init(); 1971 closid_init();
1998 1972
1999 ret = rdtgroup_create_info_dir(rdtgroup_default.kn); 1973 ret = rdtgroup_create_info_dir(rdtgroup_default.kn);
2000 if (ret) { 1974 if (ret < 0)
2001 dentry = ERR_PTR(ret); 1975 goto out_mba;
2002 goto out_cdp;
2003 }
2004 1976
2005 if (rdt_mon_capable) { 1977 if (rdt_mon_capable) {
2006 ret = mongroup_create_dir(rdtgroup_default.kn, 1978 ret = mongroup_create_dir(rdtgroup_default.kn,
2007 NULL, "mon_groups", 1979 NULL, "mon_groups",
2008 &kn_mongrp); 1980 &kn_mongrp);
2009 if (ret) { 1981 if (ret < 0)
2010 dentry = ERR_PTR(ret);
2011 goto out_info; 1982 goto out_info;
2012 }
2013 kernfs_get(kn_mongrp); 1983 kernfs_get(kn_mongrp);
2014 1984
2015 ret = mkdir_mondata_all(rdtgroup_default.kn, 1985 ret = mkdir_mondata_all(rdtgroup_default.kn,
2016 &rdtgroup_default, &kn_mondata); 1986 &rdtgroup_default, &kn_mondata);
2017 if (ret) { 1987 if (ret < 0)
2018 dentry = ERR_PTR(ret);
2019 goto out_mongrp; 1988 goto out_mongrp;
2020 }
2021 kernfs_get(kn_mondata); 1989 kernfs_get(kn_mondata);
2022 rdtgroup_default.mon.mon_data_kn = kn_mondata; 1990 rdtgroup_default.mon.mon_data_kn = kn_mondata;
2023 } 1991 }
2024 1992
2025 ret = rdt_pseudo_lock_init(); 1993 ret = rdt_pseudo_lock_init();
2026 if (ret) { 1994 if (ret)
2027 dentry = ERR_PTR(ret);
2028 goto out_mondata; 1995 goto out_mondata;
2029 }
2030 1996
2031 dentry = kernfs_mount(fs_type, flags, rdt_root, 1997 ret = kernfs_get_tree(fc);
2032 RDTGROUP_SUPER_MAGIC, NULL); 1998 if (ret < 0)
2033 if (IS_ERR(dentry))
2034 goto out_psl; 1999 goto out_psl;
2035 2000
2036 if (rdt_alloc_capable) 2001 if (rdt_alloc_capable)
@@ -2059,14 +2024,95 @@ out_mongrp:
2059 kernfs_remove(kn_mongrp); 2024 kernfs_remove(kn_mongrp);
2060out_info: 2025out_info:
2061 kernfs_remove(kn_info); 2026 kernfs_remove(kn_info);
2027out_mba:
2028 if (ctx->enable_mba_mbps)
2029 set_mba_sc(false);
2062out_cdp: 2030out_cdp:
2063 cdp_disable_all(); 2031 cdp_disable_all();
2064out: 2032out:
2065 rdt_last_cmd_clear(); 2033 rdt_last_cmd_clear();
2066 mutex_unlock(&rdtgroup_mutex); 2034 mutex_unlock(&rdtgroup_mutex);
2067 cpus_read_unlock(); 2035 cpus_read_unlock();
2036 return ret;
2037}
2038
2039enum rdt_param {
2040 Opt_cdp,
2041 Opt_cdpl2,
2042 Opt_mba_mpbs,
2043 nr__rdt_params
2044};
2045
2046static const struct fs_parameter_spec rdt_param_specs[] = {
2047 fsparam_flag("cdp", Opt_cdp),
2048 fsparam_flag("cdpl2", Opt_cdpl2),
2049 fsparam_flag("mba_mpbs", Opt_mba_mpbs),
2050 {}
2051};
2052
2053static const struct fs_parameter_description rdt_fs_parameters = {
2054 .name = "rdt",
2055 .specs = rdt_param_specs,
2056};
2057
2058static int rdt_parse_param(struct fs_context *fc, struct fs_parameter *param)
2059{
2060 struct rdt_fs_context *ctx = rdt_fc2context(fc);
2061 struct fs_parse_result result;
2062 int opt;
2063
2064 opt = fs_parse(fc, &rdt_fs_parameters, param, &result);
2065 if (opt < 0)
2066 return opt;
2068 2067
2069 return dentry; 2068 switch (opt) {
2069 case Opt_cdp:
2070 ctx->enable_cdpl3 = true;
2071 return 0;
2072 case Opt_cdpl2:
2073 ctx->enable_cdpl2 = true;
2074 return 0;
2075 case Opt_mba_mpbs:
2076 if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
2077 return -EINVAL;
2078 ctx->enable_mba_mbps = true;
2079 return 0;
2080 }
2081
2082 return -EINVAL;
2083}
2084
2085static void rdt_fs_context_free(struct fs_context *fc)
2086{
2087 struct rdt_fs_context *ctx = rdt_fc2context(fc);
2088
2089 kernfs_free_fs_context(fc);
2090 kfree(ctx);
2091}
2092
2093static const struct fs_context_operations rdt_fs_context_ops = {
2094 .free = rdt_fs_context_free,
2095 .parse_param = rdt_parse_param,
2096 .get_tree = rdt_get_tree,
2097};
2098
2099static int rdt_init_fs_context(struct fs_context *fc)
2100{
2101 struct rdt_fs_context *ctx;
2102
2103 ctx = kzalloc(sizeof(struct rdt_fs_context), GFP_KERNEL);
2104 if (!ctx)
2105 return -ENOMEM;
2106
2107 ctx->kfc.root = rdt_root;
2108 ctx->kfc.magic = RDTGROUP_SUPER_MAGIC;
2109 fc->fs_private = &ctx->kfc;
2110 fc->ops = &rdt_fs_context_ops;
2111 if (fc->user_ns)
2112 put_user_ns(fc->user_ns);
2113 fc->user_ns = get_user_ns(&init_user_ns);
2114 fc->global = true;
2115 return 0;
2070} 2116}
2071 2117
2072static int reset_all_ctrls(struct rdt_resource *r) 2118static int reset_all_ctrls(struct rdt_resource *r)
@@ -2239,9 +2285,10 @@ static void rdt_kill_sb(struct super_block *sb)
2239} 2285}
2240 2286
2241static struct file_system_type rdt_fs_type = { 2287static struct file_system_type rdt_fs_type = {
2242 .name = "resctrl", 2288 .name = "resctrl",
2243 .mount = rdt_mount, 2289 .init_fs_context = rdt_init_fs_context,
2244 .kill_sb = rdt_kill_sb, 2290 .parameters = &rdt_fs_parameters,
2291 .kill_sb = rdt_kill_sb,
2245}; 2292};
2246 2293
2247static int mon_addfile(struct kernfs_node *parent_kn, const char *name, 2294static int mon_addfile(struct kernfs_node *parent_kn, const char *name,
diff --git a/fs/Kconfig b/fs/Kconfig
index 2557506051a3..3e6d3101f3ff 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -8,6 +8,13 @@ menu "File systems"
8config DCACHE_WORD_ACCESS 8config DCACHE_WORD_ACCESS
9 bool 9 bool
10 10
11config VALIDATE_FS_PARSER
12 bool "Validate filesystem parameter description"
13 default y
14 help
15 Enable this to perform validation of the parameter description for a
16 filesystem when it is registered.
17
11if BLOCK 18if BLOCK
12 19
13config FS_IOMAP 20config FS_IOMAP
diff --git a/fs/Makefile b/fs/Makefile
index 7bff9abecfa4..427fec226fae 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -13,7 +13,7 @@ obj-y := open.o read_write.o file_table.o super.o \
13 seq_file.o xattr.o libfs.o fs-writeback.o \ 13 seq_file.o xattr.o libfs.o fs-writeback.o \
14 pnode.o splice.o sync.o utimes.o d_path.o \ 14 pnode.o splice.o sync.o utimes.o d_path.o \
15 stack.o fs_struct.o statfs.o fs_pin.o nsfs.o \ 15 stack.o fs_struct.o statfs.o fs_pin.o nsfs.o \
16 fs_types.o 16 fs_types.o fs_context.o fs_parser.o
17 17
18ifeq ($(CONFIG_BLOCK),y) 18ifeq ($(CONFIG_BLOCK),y)
19obj-y += buffer.o block_dev.o direct-io.o mpage.o 19obj-y += buffer.o block_dev.o direct-io.o mpage.o
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 8871b9e8645f..bb1f244b2b3a 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -36,15 +36,14 @@
36struct pagevec; 36struct pagevec;
37struct afs_call; 37struct afs_call;
38 38
39struct afs_mount_params { 39struct afs_fs_context {
40 bool rwpath; /* T if the parent should be considered R/W */
41 bool force; /* T to force cell type */ 40 bool force; /* T to force cell type */
42 bool autocell; /* T if set auto mount operation */ 41 bool autocell; /* T if set auto mount operation */
43 bool dyn_root; /* T if dynamic root */ 42 bool dyn_root; /* T if dynamic root */
43 bool no_cell; /* T if the source is "none" (for dynroot) */
44 afs_voltype_t type; /* type of volume requested */ 44 afs_voltype_t type; /* type of volume requested */
45 int volnamesz; /* size of volume name */ 45 unsigned int volnamesz; /* size of volume name */
46 const char *volname; /* name of volume to mount */ 46 const char *volname; /* name of volume to mount */
47 struct net *net_ns; /* Network namespace in effect */
48 struct afs_net *net; /* the AFS net namespace stuff */ 47 struct afs_net *net; /* the AFS net namespace stuff */
49 struct afs_cell *cell; /* cell in which to find volume */ 48 struct afs_cell *cell; /* cell in which to find volume */
50 struct afs_volume *volume; /* volume record */ 49 struct afs_volume *volume; /* volume record */
@@ -1274,7 +1273,7 @@ static inline struct afs_volume *__afs_get_volume(struct afs_volume *volume)
1274 return volume; 1273 return volume;
1275} 1274}
1276 1275
1277extern struct afs_volume *afs_create_volume(struct afs_mount_params *); 1276extern struct afs_volume *afs_create_volume(struct afs_fs_context *);
1278extern void afs_activate_volume(struct afs_volume *); 1277extern void afs_activate_volume(struct afs_volume *);
1279extern void afs_deactivate_volume(struct afs_volume *); 1278extern void afs_deactivate_volume(struct afs_volume *);
1280extern void afs_put_volume(struct afs_cell *, struct afs_volume *); 1279extern void afs_put_volume(struct afs_cell *, struct afs_volume *);
diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c
index 2e51c6994148..eecd8b699186 100644
--- a/fs/afs/mntpt.c
+++ b/fs/afs/mntpt.c
@@ -17,6 +17,7 @@
17#include <linux/mount.h> 17#include <linux/mount.h>
18#include <linux/namei.h> 18#include <linux/namei.h>
19#include <linux/gfp.h> 19#include <linux/gfp.h>
20#include <linux/fs_context.h>
20#include "internal.h" 21#include "internal.h"
21 22
22 23
@@ -47,6 +48,8 @@ static DECLARE_DELAYED_WORK(afs_mntpt_expiry_timer, afs_mntpt_expiry_timed_out);
47 48
48static unsigned long afs_mntpt_expiry_timeout = 10 * 60; 49static unsigned long afs_mntpt_expiry_timeout = 10 * 60;
49 50
51static const char afs_root_volume[] = "root.cell";
52
50/* 53/*
51 * no valid lookup procedure on this sort of dir 54 * no valid lookup procedure on this sort of dir
52 */ 55 */
@@ -68,108 +71,112 @@ static int afs_mntpt_open(struct inode *inode, struct file *file)
68} 71}
69 72
70/* 73/*
71 * create a vfsmount to be automounted 74 * Set the parameters for the proposed superblock.
72 */ 75 */
73static struct vfsmount *afs_mntpt_do_automount(struct dentry *mntpt) 76static int afs_mntpt_set_params(struct fs_context *fc, struct dentry *mntpt)
74{ 77{
75 struct afs_super_info *as; 78 struct afs_fs_context *ctx = fc->fs_private;
76 struct vfsmount *mnt; 79 struct afs_super_info *src_as = AFS_FS_S(mntpt->d_sb);
77 struct afs_vnode *vnode; 80 struct afs_vnode *vnode = AFS_FS_I(d_inode(mntpt));
78 struct page *page; 81 struct afs_cell *cell;
79 char *devname, *options; 82 const char *p;
80 bool rwpath = false;
81 int ret; 83 int ret;
82 84
83 _enter("{%pd}", mntpt); 85 if (fc->net_ns != src_as->net_ns) {
84 86 put_net(fc->net_ns);
85 BUG_ON(!d_inode(mntpt)); 87 fc->net_ns = get_net(src_as->net_ns);
86 88 }
87 ret = -ENOMEM;
88 devname = (char *) get_zeroed_page(GFP_KERNEL);
89 if (!devname)
90 goto error_no_devname;
91
92 options = (char *) get_zeroed_page(GFP_KERNEL);
93 if (!options)
94 goto error_no_options;
95 89
96 vnode = AFS_FS_I(d_inode(mntpt)); 90 if (src_as->volume && src_as->volume->type == AFSVL_RWVOL) {
91 ctx->type = AFSVL_RWVOL;
92 ctx->force = true;
93 }
94 if (ctx->cell) {
95 afs_put_cell(ctx->net, ctx->cell);
96 ctx->cell = NULL;
97 }
97 if (test_bit(AFS_VNODE_PSEUDODIR, &vnode->flags)) { 98 if (test_bit(AFS_VNODE_PSEUDODIR, &vnode->flags)) {
98 /* if the directory is a pseudo directory, use the d_name */ 99 /* if the directory is a pseudo directory, use the d_name */
99 static const char afs_root_cell[] = ":root.cell.";
100 unsigned size = mntpt->d_name.len; 100 unsigned size = mntpt->d_name.len;
101 101
102 ret = -ENOENT; 102 if (size < 2)
103 if (size < 2 || size > AFS_MAXCELLNAME) 103 return -ENOENT;
104 goto error_no_page;
105 104
105 p = mntpt->d_name.name;
106 if (mntpt->d_name.name[0] == '.') { 106 if (mntpt->d_name.name[0] == '.') {
107 devname[0] = '%'; 107 size--;
108 memcpy(devname + 1, mntpt->d_name.name + 1, size - 1); 108 p++;
109 memcpy(devname + size, afs_root_cell, 109 ctx->type = AFSVL_RWVOL;
110 sizeof(afs_root_cell)); 110 ctx->force = true;
111 rwpath = true;
112 } else {
113 devname[0] = '#';
114 memcpy(devname + 1, mntpt->d_name.name, size);
115 memcpy(devname + size + 1, afs_root_cell,
116 sizeof(afs_root_cell));
117 } 111 }
112 if (size > AFS_MAXCELLNAME)
113 return -ENAMETOOLONG;
114
115 cell = afs_lookup_cell(ctx->net, p, size, NULL, false);
116 if (IS_ERR(cell)) {
117 pr_err("kAFS: unable to lookup cell '%pd'\n", mntpt);
118 return PTR_ERR(cell);
119 }
120 ctx->cell = cell;
121
122 ctx->volname = afs_root_volume;
123 ctx->volnamesz = sizeof(afs_root_volume) - 1;
118 } else { 124 } else {
119 /* read the contents of the AFS special symlink */ 125 /* read the contents of the AFS special symlink */
126 struct page *page;
120 loff_t size = i_size_read(d_inode(mntpt)); 127 loff_t size = i_size_read(d_inode(mntpt));
121 char *buf; 128 char *buf;
122 129
123 ret = -EINVAL; 130 if (src_as->cell)
131 ctx->cell = afs_get_cell(src_as->cell);
132
124 if (size > PAGE_SIZE - 1) 133 if (size > PAGE_SIZE - 1)
125 goto error_no_page; 134 return -EINVAL;
126 135
127 page = read_mapping_page(d_inode(mntpt)->i_mapping, 0, NULL); 136 page = read_mapping_page(d_inode(mntpt)->i_mapping, 0, NULL);
128 if (IS_ERR(page)) { 137 if (IS_ERR(page))
129 ret = PTR_ERR(page); 138 return PTR_ERR(page);
130 goto error_no_page;
131 }
132 139
133 if (PageError(page)) { 140 if (PageError(page)) {
134 ret = afs_bad(AFS_FS_I(d_inode(mntpt)), afs_file_error_mntpt); 141 ret = afs_bad(AFS_FS_I(d_inode(mntpt)), afs_file_error_mntpt);
135 goto error; 142 put_page(page);
143 return ret;
136 } 144 }
137 145
138 buf = kmap_atomic(page); 146 buf = kmap(page);
139 memcpy(devname, buf, size); 147 ret = vfs_parse_fs_string(fc, "source", buf, size);
140 kunmap_atomic(buf); 148 kunmap(page);
141 put_page(page); 149 put_page(page);
142 page = NULL; 150 if (ret < 0)
151 return ret;
143 } 152 }
144 153
145 /* work out what options we want */ 154 return 0;
146 as = AFS_FS_S(mntpt->d_sb); 155}
147 if (as->cell) {
148 memcpy(options, "cell=", 5);
149 strcpy(options + 5, as->cell->name);
150 if ((as->volume && as->volume->type == AFSVL_RWVOL) || rwpath)
151 strcat(options, ",rwpath");
152 }
153 156
154 /* try and do the mount */ 157/*
155 _debug("--- attempting mount %s -o %s ---", devname, options); 158 * create a vfsmount to be automounted
156 mnt = vfs_submount(mntpt, &afs_fs_type, devname, options); 159 */
157 _debug("--- mount result %p ---", mnt); 160static struct vfsmount *afs_mntpt_do_automount(struct dentry *mntpt)
161{
162 struct fs_context *fc;
163 struct vfsmount *mnt;
164 int ret;
158 165
159 free_page((unsigned long) devname); 166 BUG_ON(!d_inode(mntpt));
160 free_page((unsigned long) options);
161 _leave(" = %p", mnt);
162 return mnt;
163 167
164error: 168 fc = fs_context_for_submount(&afs_fs_type, mntpt);
165 put_page(page); 169 if (IS_ERR(fc))
166error_no_page: 170 return ERR_CAST(fc);
167 free_page((unsigned long) options); 171
168error_no_options: 172 ret = afs_mntpt_set_params(fc, mntpt);
169 free_page((unsigned long) devname); 173 if (!ret)
170error_no_devname: 174 mnt = fc_mount(fc);
171 _leave(" = %d", ret); 175 else
172 return ERR_PTR(ret); 176 mnt = ERR_PTR(ret);
177
178 put_fs_context(fc);
179 return mnt;
173} 180}
174 181
175/* 182/*
diff --git a/fs/afs/super.c b/fs/afs/super.c
index e684f6769b15..5adf012b8e27 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -1,6 +1,6 @@
1/* AFS superblock handling 1/* AFS superblock handling
2 * 2 *
3 * Copyright (c) 2002, 2007 Red Hat, Inc. All rights reserved. 3 * Copyright (c) 2002, 2007, 2018 Red Hat, Inc. All rights reserved.
4 * 4 *
5 * This software may be freely redistributed under the terms of the 5 * This software may be freely redistributed under the terms of the
6 * GNU General Public License. 6 * GNU General Public License.
@@ -21,7 +21,7 @@
21#include <linux/slab.h> 21#include <linux/slab.h>
22#include <linux/fs.h> 22#include <linux/fs.h>
23#include <linux/pagemap.h> 23#include <linux/pagemap.h>
24#include <linux/parser.h> 24#include <linux/fs_parser.h>
25#include <linux/statfs.h> 25#include <linux/statfs.h>
26#include <linux/sched.h> 26#include <linux/sched.h>
27#include <linux/nsproxy.h> 27#include <linux/nsproxy.h>
@@ -30,21 +30,22 @@
30#include "internal.h" 30#include "internal.h"
31 31
32static void afs_i_init_once(void *foo); 32static void afs_i_init_once(void *foo);
33static struct dentry *afs_mount(struct file_system_type *fs_type,
34 int flags, const char *dev_name, void *data);
35static void afs_kill_super(struct super_block *sb); 33static void afs_kill_super(struct super_block *sb);
36static struct inode *afs_alloc_inode(struct super_block *sb); 34static struct inode *afs_alloc_inode(struct super_block *sb);
37static void afs_destroy_inode(struct inode *inode); 35static void afs_destroy_inode(struct inode *inode);
38static int afs_statfs(struct dentry *dentry, struct kstatfs *buf); 36static int afs_statfs(struct dentry *dentry, struct kstatfs *buf);
39static int afs_show_devname(struct seq_file *m, struct dentry *root); 37static int afs_show_devname(struct seq_file *m, struct dentry *root);
40static int afs_show_options(struct seq_file *m, struct dentry *root); 38static int afs_show_options(struct seq_file *m, struct dentry *root);
39static int afs_init_fs_context(struct fs_context *fc);
40static const struct fs_parameter_description afs_fs_parameters;
41 41
42struct file_system_type afs_fs_type = { 42struct file_system_type afs_fs_type = {
43 .owner = THIS_MODULE, 43 .owner = THIS_MODULE,
44 .name = "afs", 44 .name = "afs",
45 .mount = afs_mount, 45 .init_fs_context = afs_init_fs_context,
46 .kill_sb = afs_kill_super, 46 .parameters = &afs_fs_parameters,
47 .fs_flags = 0, 47 .kill_sb = afs_kill_super,
48 .fs_flags = 0,
48}; 49};
49MODULE_ALIAS_FS("afs"); 50MODULE_ALIAS_FS("afs");
50 51
@@ -63,22 +64,22 @@ static const struct super_operations afs_super_ops = {
63static struct kmem_cache *afs_inode_cachep; 64static struct kmem_cache *afs_inode_cachep;
64static atomic_t afs_count_active_inodes; 65static atomic_t afs_count_active_inodes;
65 66
66enum { 67enum afs_param {
67 afs_no_opt, 68 Opt_autocell,
68 afs_opt_cell, 69 Opt_dyn,
69 afs_opt_dyn, 70 Opt_source,
70 afs_opt_rwpath,
71 afs_opt_vol,
72 afs_opt_autocell,
73}; 71};
74 72
75static const match_table_t afs_options_list = { 73static const struct fs_parameter_spec afs_param_specs[] = {
76 { afs_opt_cell, "cell=%s" }, 74 fsparam_flag ("autocell", Opt_autocell),
77 { afs_opt_dyn, "dyn" }, 75 fsparam_flag ("dyn", Opt_dyn),
78 { afs_opt_rwpath, "rwpath" }, 76 fsparam_string("source", Opt_source),
79 { afs_opt_vol, "vol=%s" }, 77 {}
80 { afs_opt_autocell, "autocell" }, 78};
81 { afs_no_opt, NULL }, 79
80static const struct fs_parameter_description afs_fs_parameters = {
81 .name = "kAFS",
82 .specs = afs_param_specs,
82}; 83};
83 84
84/* 85/*
@@ -190,84 +191,23 @@ static int afs_show_options(struct seq_file *m, struct dentry *root)
190} 191}
191 192
192/* 193/*
193 * parse the mount options 194 * Parse the source name to get cell name, volume name, volume type and R/W
194 * - this function has been shamelessly adapted from the ext3 fs which 195 * selector.
195 * shamelessly adapted it from the msdos fs 196 *
196 */ 197 * This can be one of the following:
197static int afs_parse_options(struct afs_mount_params *params,
198 char *options, const char **devname)
199{
200 struct afs_cell *cell;
201 substring_t args[MAX_OPT_ARGS];
202 char *p;
203 int token;
204
205 _enter("%s", options);
206
207 options[PAGE_SIZE - 1] = 0;
208
209 while ((p = strsep(&options, ","))) {
210 if (!*p)
211 continue;
212
213 token = match_token(p, afs_options_list, args);
214 switch (token) {
215 case afs_opt_cell:
216 rcu_read_lock();
217 cell = afs_lookup_cell_rcu(params->net,
218 args[0].from,
219 args[0].to - args[0].from);
220 rcu_read_unlock();
221 if (IS_ERR(cell))
222 return PTR_ERR(cell);
223 afs_put_cell(params->net, params->cell);
224 params->cell = cell;
225 break;
226
227 case afs_opt_rwpath:
228 params->rwpath = true;
229 break;
230
231 case afs_opt_vol:
232 *devname = args[0].from;
233 break;
234
235 case afs_opt_autocell:
236 params->autocell = true;
237 break;
238
239 case afs_opt_dyn:
240 params->dyn_root = true;
241 break;
242
243 default:
244 printk(KERN_ERR "kAFS:"
245 " Unknown or invalid mount option: '%s'\n", p);
246 return -EINVAL;
247 }
248 }
249
250 _leave(" = 0");
251 return 0;
252}
253
254/*
255 * parse a device name to get cell name, volume name, volume type and R/W
256 * selector
257 * - this can be one of the following:
258 * "%[cell:]volume[.]" R/W volume 198 * "%[cell:]volume[.]" R/W volume
259 * "#[cell:]volume[.]" R/O or R/W volume (rwpath=0), 199 * "#[cell:]volume[.]" R/O or R/W volume (R/O parent),
260 * or R/W (rwpath=1) volume 200 * or R/W (R/W parent) volume
261 * "%[cell:]volume.readonly" R/O volume 201 * "%[cell:]volume.readonly" R/O volume
262 * "#[cell:]volume.readonly" R/O volume 202 * "#[cell:]volume.readonly" R/O volume
263 * "%[cell:]volume.backup" Backup volume 203 * "%[cell:]volume.backup" Backup volume
264 * "#[cell:]volume.backup" Backup volume 204 * "#[cell:]volume.backup" Backup volume
265 */ 205 */
266static int afs_parse_device_name(struct afs_mount_params *params, 206static int afs_parse_source(struct fs_context *fc, struct fs_parameter *param)
267 const char *name)
268{ 207{
208 struct afs_fs_context *ctx = fc->fs_private;
269 struct afs_cell *cell; 209 struct afs_cell *cell;
270 const char *cellname, *suffix; 210 const char *cellname, *suffix, *name = param->string;
271 int cellnamesz; 211 int cellnamesz;
272 212
273 _enter(",%s", name); 213 _enter(",%s", name);
@@ -278,69 +218,149 @@ static int afs_parse_device_name(struct afs_mount_params *params,
278 } 218 }
279 219
280 if ((name[0] != '%' && name[0] != '#') || !name[1]) { 220 if ((name[0] != '%' && name[0] != '#') || !name[1]) {
221 /* To use dynroot, we don't want to have to provide a source */
222 if (strcmp(name, "none") == 0) {
223 ctx->no_cell = true;
224 return 0;
225 }
281 printk(KERN_ERR "kAFS: unparsable volume name\n"); 226 printk(KERN_ERR "kAFS: unparsable volume name\n");
282 return -EINVAL; 227 return -EINVAL;
283 } 228 }
284 229
285 /* determine the type of volume we're looking for */ 230 /* determine the type of volume we're looking for */
286 params->type = AFSVL_ROVOL; 231 if (name[0] == '%') {
287 params->force = false; 232 ctx->type = AFSVL_RWVOL;
288 if (params->rwpath || name[0] == '%') { 233 ctx->force = true;
289 params->type = AFSVL_RWVOL;
290 params->force = true;
291 } 234 }
292 name++; 235 name++;
293 236
294 /* split the cell name out if there is one */ 237 /* split the cell name out if there is one */
295 params->volname = strchr(name, ':'); 238 ctx->volname = strchr(name, ':');
296 if (params->volname) { 239 if (ctx->volname) {
297 cellname = name; 240 cellname = name;
298 cellnamesz = params->volname - name; 241 cellnamesz = ctx->volname - name;
299 params->volname++; 242 ctx->volname++;
300 } else { 243 } else {
301 params->volname = name; 244 ctx->volname = name;
302 cellname = NULL; 245 cellname = NULL;
303 cellnamesz = 0; 246 cellnamesz = 0;
304 } 247 }
305 248
306 /* the volume type is further affected by a possible suffix */ 249 /* the volume type is further affected by a possible suffix */
307 suffix = strrchr(params->volname, '.'); 250 suffix = strrchr(ctx->volname, '.');
308 if (suffix) { 251 if (suffix) {
309 if (strcmp(suffix, ".readonly") == 0) { 252 if (strcmp(suffix, ".readonly") == 0) {
310 params->type = AFSVL_ROVOL; 253 ctx->type = AFSVL_ROVOL;
311 params->force = true; 254 ctx->force = true;
312 } else if (strcmp(suffix, ".backup") == 0) { 255 } else if (strcmp(suffix, ".backup") == 0) {
313 params->type = AFSVL_BACKVOL; 256 ctx->type = AFSVL_BACKVOL;
314 params->force = true; 257 ctx->force = true;
315 } else if (suffix[1] == 0) { 258 } else if (suffix[1] == 0) {
316 } else { 259 } else {
317 suffix = NULL; 260 suffix = NULL;
318 } 261 }
319 } 262 }
320 263
321 params->volnamesz = suffix ? 264 ctx->volnamesz = suffix ?
322 suffix - params->volname : strlen(params->volname); 265 suffix - ctx->volname : strlen(ctx->volname);
323 266
324 _debug("cell %*.*s [%p]", 267 _debug("cell %*.*s [%p]",
325 cellnamesz, cellnamesz, cellname ?: "", params->cell); 268 cellnamesz, cellnamesz, cellname ?: "", ctx->cell);
326 269
327 /* lookup the cell record */ 270 /* lookup the cell record */
328 if (cellname || !params->cell) { 271 if (cellname) {
329 cell = afs_lookup_cell(params->net, cellname, cellnamesz, 272 cell = afs_lookup_cell(ctx->net, cellname, cellnamesz,
330 NULL, false); 273 NULL, false);
331 if (IS_ERR(cell)) { 274 if (IS_ERR(cell)) {
332 printk(KERN_ERR "kAFS: unable to lookup cell '%*.*s'\n", 275 pr_err("kAFS: unable to lookup cell '%*.*s'\n",
333 cellnamesz, cellnamesz, cellname ?: ""); 276 cellnamesz, cellnamesz, cellname ?: "");
334 return PTR_ERR(cell); 277 return PTR_ERR(cell);
335 } 278 }
336 afs_put_cell(params->net, params->cell); 279 afs_put_cell(ctx->net, ctx->cell);
337 params->cell = cell; 280 ctx->cell = cell;
338 } 281 }
339 282
340 _debug("CELL:%s [%p] VOLUME:%*.*s SUFFIX:%s TYPE:%d%s", 283 _debug("CELL:%s [%p] VOLUME:%*.*s SUFFIX:%s TYPE:%d%s",
341 params->cell->name, params->cell, 284 ctx->cell->name, ctx->cell,
342 params->volnamesz, params->volnamesz, params->volname, 285 ctx->volnamesz, ctx->volnamesz, ctx->volname,
343 suffix ?: "-", params->type, params->force ? " FORCE" : ""); 286 suffix ?: "-", ctx->type, ctx->force ? " FORCE" : "");
287
288 fc->source = param->string;
289 param->string = NULL;
290 return 0;
291}
292
293/*
294 * Parse a single mount parameter.
295 */
296static int afs_parse_param(struct fs_context *fc, struct fs_parameter *param)
297{
298 struct fs_parse_result result;
299 struct afs_fs_context *ctx = fc->fs_private;
300 int opt;
301
302 opt = fs_parse(fc, &afs_fs_parameters, param, &result);
303 if (opt < 0)
304 return opt;
305
306 switch (opt) {
307 case Opt_source:
308 return afs_parse_source(fc, param);
309
310 case Opt_autocell:
311 ctx->autocell = true;
312 break;
313
314 case Opt_dyn:
315 ctx->dyn_root = true;
316 break;
317
318 default:
319 return -EINVAL;
320 }
321
322 _leave(" = 0");
323 return 0;
324}
325
326/*
327 * Validate the options, get the cell key and look up the volume.
328 */
329static int afs_validate_fc(struct fs_context *fc)
330{
331 struct afs_fs_context *ctx = fc->fs_private;
332 struct afs_volume *volume;
333 struct key *key;
334
335 if (!ctx->dyn_root) {
336 if (ctx->no_cell) {
337 pr_warn("kAFS: Can only specify source 'none' with -o dyn\n");
338 return -EINVAL;
339 }
340
341 if (!ctx->cell) {
342 pr_warn("kAFS: No cell specified\n");
343 return -EDESTADDRREQ;
344 }
345
346 /* We try to do the mount securely. */
347 key = afs_request_key(ctx->cell);
348 if (IS_ERR(key))
349 return PTR_ERR(key);
350
351 ctx->key = key;
352
353 if (ctx->volume) {
354 afs_put_volume(ctx->cell, ctx->volume);
355 ctx->volume = NULL;
356 }
357
358 volume = afs_create_volume(ctx);
359 if (IS_ERR(volume))
360 return PTR_ERR(volume);
361
362 ctx->volume = volume;
363 }
344 364
345 return 0; 365 return 0;
346} 366}
@@ -348,39 +368,34 @@ static int afs_parse_device_name(struct afs_mount_params *params,
348/* 368/*
349 * check a superblock to see if it's the one we're looking for 369 * check a superblock to see if it's the one we're looking for
350 */ 370 */
351static int afs_test_super(struct super_block *sb, void *data) 371static int afs_test_super(struct super_block *sb, struct fs_context *fc)
352{ 372{
353 struct afs_super_info *as1 = data; 373 struct afs_fs_context *ctx = fc->fs_private;
354 struct afs_super_info *as = AFS_FS_S(sb); 374 struct afs_super_info *as = AFS_FS_S(sb);
355 375
356 return (as->net_ns == as1->net_ns && 376 return (as->net_ns == fc->net_ns &&
357 as->volume && 377 as->volume &&
358 as->volume->vid == as1->volume->vid && 378 as->volume->vid == ctx->volume->vid &&
359 !as->dyn_root); 379 !as->dyn_root);
360} 380}
361 381
362static int afs_dynroot_test_super(struct super_block *sb, void *data) 382static int afs_dynroot_test_super(struct super_block *sb, struct fs_context *fc)
363{ 383{
364 struct afs_super_info *as1 = data;
365 struct afs_super_info *as = AFS_FS_S(sb); 384 struct afs_super_info *as = AFS_FS_S(sb);
366 385
367 return (as->net_ns == as1->net_ns && 386 return (as->net_ns == fc->net_ns &&
368 as->dyn_root); 387 as->dyn_root);
369} 388}
370 389
371static int afs_set_super(struct super_block *sb, void *data) 390static int afs_set_super(struct super_block *sb, struct fs_context *fc)
372{ 391{
373 struct afs_super_info *as = data;
374
375 sb->s_fs_info = as;
376 return set_anon_super(sb, NULL); 392 return set_anon_super(sb, NULL);
377} 393}
378 394
379/* 395/*
380 * fill in the superblock 396 * fill in the superblock
381 */ 397 */
382static int afs_fill_super(struct super_block *sb, 398static int afs_fill_super(struct super_block *sb, struct afs_fs_context *ctx)
383 struct afs_mount_params *params)
384{ 399{
385 struct afs_super_info *as = AFS_FS_S(sb); 400 struct afs_super_info *as = AFS_FS_S(sb);
386 struct afs_fid fid; 401 struct afs_fid fid;
@@ -412,13 +427,13 @@ static int afs_fill_super(struct super_block *sb,
412 fid.vnode = 1; 427 fid.vnode = 1;
413 fid.vnode_hi = 0; 428 fid.vnode_hi = 0;
414 fid.unique = 1; 429 fid.unique = 1;
415 inode = afs_iget(sb, params->key, &fid, NULL, NULL, NULL); 430 inode = afs_iget(sb, ctx->key, &fid, NULL, NULL, NULL);
416 } 431 }
417 432
418 if (IS_ERR(inode)) 433 if (IS_ERR(inode))
419 return PTR_ERR(inode); 434 return PTR_ERR(inode);
420 435
421 if (params->autocell || params->dyn_root) 436 if (ctx->autocell || as->dyn_root)
422 set_bit(AFS_VNODE_AUTOCELL, &AFS_FS_I(inode)->flags); 437 set_bit(AFS_VNODE_AUTOCELL, &AFS_FS_I(inode)->flags);
423 438
424 ret = -ENOMEM; 439 ret = -ENOMEM;
@@ -443,17 +458,20 @@ error:
443 return ret; 458 return ret;
444} 459}
445 460
446static struct afs_super_info *afs_alloc_sbi(struct afs_mount_params *params) 461static struct afs_super_info *afs_alloc_sbi(struct fs_context *fc)
447{ 462{
463 struct afs_fs_context *ctx = fc->fs_private;
448 struct afs_super_info *as; 464 struct afs_super_info *as;
449 465
450 as = kzalloc(sizeof(struct afs_super_info), GFP_KERNEL); 466 as = kzalloc(sizeof(struct afs_super_info), GFP_KERNEL);
451 if (as) { 467 if (as) {
452 as->net_ns = get_net(params->net_ns); 468 as->net_ns = get_net(fc->net_ns);
453 if (params->dyn_root) 469 if (ctx->dyn_root) {
454 as->dyn_root = true; 470 as->dyn_root = true;
455 else 471 } else {
456 as->cell = afs_get_cell(params->cell); 472 as->cell = afs_get_cell(ctx->cell);
473 as->volume = __afs_get_volume(ctx->volume);
474 }
457 } 475 }
458 return as; 476 return as;
459} 477}
@@ -475,7 +493,7 @@ static void afs_kill_super(struct super_block *sb)
475 493
476 if (as->dyn_root) 494 if (as->dyn_root)
477 afs_dynroot_depopulate(sb); 495 afs_dynroot_depopulate(sb);
478 496
479 /* Clear the callback interests (which will do ilookup5) before 497 /* Clear the callback interests (which will do ilookup5) before
480 * deactivating the superblock. 498 * deactivating the superblock.
481 */ 499 */
@@ -488,111 +506,103 @@ static void afs_kill_super(struct super_block *sb)
488} 506}
489 507
490/* 508/*
491 * get an AFS superblock 509 * Get an AFS superblock and root directory.
492 */ 510 */
493static struct dentry *afs_mount(struct file_system_type *fs_type, 511static int afs_get_tree(struct fs_context *fc)
494 int flags, const char *dev_name, void *options)
495{ 512{
496 struct afs_mount_params params; 513 struct afs_fs_context *ctx = fc->fs_private;
497 struct super_block *sb; 514 struct super_block *sb;
498 struct afs_volume *candidate;
499 struct key *key;
500 struct afs_super_info *as; 515 struct afs_super_info *as;
501 int ret; 516 int ret;
502 517
503 _enter(",,%s,%p", dev_name, options); 518 ret = afs_validate_fc(fc);
504 519 if (ret)
505 memset(&params, 0, sizeof(params));
506
507 ret = -EINVAL;
508 if (current->nsproxy->net_ns != &init_net)
509 goto error; 520 goto error;
510 params.net_ns = current->nsproxy->net_ns;
511 params.net = afs_net(params.net_ns);
512
513 /* parse the options and device name */
514 if (options) {
515 ret = afs_parse_options(&params, options, &dev_name);
516 if (ret < 0)
517 goto error;
518 }
519
520 if (!params.dyn_root) {
521 ret = afs_parse_device_name(&params, dev_name);
522 if (ret < 0)
523 goto error;
524 521
525 /* try and do the mount securely */ 522 _enter("");
526 key = afs_request_key(params.cell);
527 if (IS_ERR(key)) {
528 _leave(" = %ld [key]", PTR_ERR(key));
529 ret = PTR_ERR(key);
530 goto error;
531 }
532 params.key = key;
533 }
534 523
535 /* allocate a superblock info record */ 524 /* allocate a superblock info record */
536 ret = -ENOMEM; 525 ret = -ENOMEM;
537 as = afs_alloc_sbi(&params); 526 as = afs_alloc_sbi(fc);
538 if (!as) 527 if (!as)
539 goto error_key; 528 goto error;
540 529 fc->s_fs_info = as;
541 if (!params.dyn_root) {
542 /* Assume we're going to need a volume record; at the very
543 * least we can use it to update the volume record if we have
544 * one already. This checks that the volume exists within the
545 * cell.
546 */
547 candidate = afs_create_volume(&params);
548 if (IS_ERR(candidate)) {
549 ret = PTR_ERR(candidate);
550 goto error_as;
551 }
552
553 as->volume = candidate;
554 }
555 530
556 /* allocate a deviceless superblock */ 531 /* allocate a deviceless superblock */
557 sb = sget(fs_type, 532 sb = sget_fc(fc,
558 as->dyn_root ? afs_dynroot_test_super : afs_test_super, 533 as->dyn_root ? afs_dynroot_test_super : afs_test_super,
559 afs_set_super, flags, as); 534 afs_set_super);
560 if (IS_ERR(sb)) { 535 if (IS_ERR(sb)) {
561 ret = PTR_ERR(sb); 536 ret = PTR_ERR(sb);
562 goto error_as; 537 goto error;
563 } 538 }
564 539
565 if (!sb->s_root) { 540 if (!sb->s_root) {
566 /* initial superblock/root creation */ 541 /* initial superblock/root creation */
567 _debug("create"); 542 _debug("create");
568 ret = afs_fill_super(sb, &params); 543 ret = afs_fill_super(sb, ctx);
569 if (ret < 0) 544 if (ret < 0)
570 goto error_sb; 545 goto error_sb;
571 as = NULL;
572 sb->s_flags |= SB_ACTIVE; 546 sb->s_flags |= SB_ACTIVE;
573 } else { 547 } else {
574 _debug("reuse"); 548 _debug("reuse");
575 ASSERTCMP(sb->s_flags, &, SB_ACTIVE); 549 ASSERTCMP(sb->s_flags, &, SB_ACTIVE);
576 afs_destroy_sbi(as);
577 as = NULL;
578 } 550 }
579 551
580 afs_put_cell(params.net, params.cell); 552 fc->root = dget(sb->s_root);
581 key_put(params.key);
582 _leave(" = 0 [%p]", sb); 553 _leave(" = 0 [%p]", sb);
583 return dget(sb->s_root); 554 return 0;
584 555
585error_sb: 556error_sb:
586 deactivate_locked_super(sb); 557 deactivate_locked_super(sb);
587 goto error_key;
588error_as:
589 afs_destroy_sbi(as);
590error_key:
591 key_put(params.key);
592error: 558error:
593 afs_put_cell(params.net, params.cell);
594 _leave(" = %d", ret); 559 _leave(" = %d", ret);
595 return ERR_PTR(ret); 560 return ret;
561}
562
563static void afs_free_fc(struct fs_context *fc)
564{
565 struct afs_fs_context *ctx = fc->fs_private;
566
567 afs_destroy_sbi(fc->s_fs_info);
568 afs_put_volume(ctx->cell, ctx->volume);
569 afs_put_cell(ctx->net, ctx->cell);
570 key_put(ctx->key);
571 kfree(ctx);
572}
573
574static const struct fs_context_operations afs_context_ops = {
575 .free = afs_free_fc,
576 .parse_param = afs_parse_param,
577 .get_tree = afs_get_tree,
578};
579
580/*
581 * Set up the filesystem mount context.
582 */
583static int afs_init_fs_context(struct fs_context *fc)
584{
585 struct afs_fs_context *ctx;
586 struct afs_cell *cell;
587
588 ctx = kzalloc(sizeof(struct afs_fs_context), GFP_KERNEL);
589 if (!ctx)
590 return -ENOMEM;
591
592 ctx->type = AFSVL_ROVOL;
593 ctx->net = afs_net(fc->net_ns);
594
595 /* Default to the workstation cell. */
596 rcu_read_lock();
597 cell = afs_lookup_cell_rcu(ctx->net, NULL, 0);
598 rcu_read_unlock();
599 if (IS_ERR(cell))
600 cell = NULL;
601 ctx->cell = cell;
602
603 fc->fs_private = ctx;
604 fc->ops = &afs_context_ops;
605 return 0;
596} 606}
597 607
598/* 608/*
diff --git a/fs/afs/volume.c b/fs/afs/volume.c
index 00975ed3640f..f6eba2def0a1 100644
--- a/fs/afs/volume.c
+++ b/fs/afs/volume.c
@@ -21,7 +21,7 @@ static const char *const afs_voltypes[] = { "R/W", "R/O", "BAK" };
21/* 21/*
22 * Allocate a volume record and load it up from a vldb record. 22 * Allocate a volume record and load it up from a vldb record.
23 */ 23 */
24static struct afs_volume *afs_alloc_volume(struct afs_mount_params *params, 24static struct afs_volume *afs_alloc_volume(struct afs_fs_context *params,
25 struct afs_vldb_entry *vldb, 25 struct afs_vldb_entry *vldb,
26 unsigned long type_mask) 26 unsigned long type_mask)
27{ 27{
@@ -113,7 +113,7 @@ static struct afs_vldb_entry *afs_vl_lookup_vldb(struct afs_cell *cell,
113 * - Rule 3: If parent volume is R/W, then only mount R/W volume unless 113 * - Rule 3: If parent volume is R/W, then only mount R/W volume unless
114 * explicitly told otherwise 114 * explicitly told otherwise
115 */ 115 */
116struct afs_volume *afs_create_volume(struct afs_mount_params *params) 116struct afs_volume *afs_create_volume(struct afs_fs_context *params)
117{ 117{
118 struct afs_vldb_entry *vldb; 118 struct afs_vldb_entry *vldb;
119 struct afs_volume *volume; 119 struct afs_volume *volume;
diff --git a/fs/filesystems.c b/fs/filesystems.c
index b03f57b1105b..9135646e41ac 100644
--- a/fs/filesystems.c
+++ b/fs/filesystems.c
@@ -16,6 +16,7 @@
16#include <linux/module.h> 16#include <linux/module.h>
17#include <linux/slab.h> 17#include <linux/slab.h>
18#include <linux/uaccess.h> 18#include <linux/uaccess.h>
19#include <linux/fs_parser.h>
19 20
20/* 21/*
21 * Handling of filesystem drivers list. 22 * Handling of filesystem drivers list.
@@ -73,6 +74,9 @@ int register_filesystem(struct file_system_type * fs)
73 int res = 0; 74 int res = 0;
74 struct file_system_type ** p; 75 struct file_system_type ** p;
75 76
77 if (fs->parameters && !fs_validate_description(fs->parameters))
78 return -EINVAL;
79
76 BUG_ON(strchr(fs->name, '.')); 80 BUG_ON(strchr(fs->name, '.'));
77 if (fs->next) 81 if (fs->next)
78 return -EBUSY; 82 return -EBUSY;
diff --git a/fs/fs_context.c b/fs/fs_context.c
new file mode 100644
index 000000000000..87e3546b9a52
--- /dev/null
+++ b/fs/fs_context.c
@@ -0,0 +1,642 @@
1/* Provide a way to create a superblock configuration context within the kernel
2 * that allows a superblock to be set up prior to mounting.
3 *
4 * Copyright (C) 2017 Red Hat, Inc. All Rights Reserved.
5 * Written by David Howells (dhowells@redhat.com)
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public Licence
9 * as published by the Free Software Foundation; either version
10 * 2 of the Licence, or (at your option) any later version.
11 */
12
13#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
14#include <linux/fs_context.h>
15#include <linux/fs_parser.h>
16#include <linux/fs.h>
17#include <linux/mount.h>
18#include <linux/nsproxy.h>
19#include <linux/slab.h>
20#include <linux/magic.h>
21#include <linux/security.h>
22#include <linux/mnt_namespace.h>
23#include <linux/pid_namespace.h>
24#include <linux/user_namespace.h>
25#include <net/net_namespace.h>
26#include "mount.h"
27#include "internal.h"
28
29enum legacy_fs_param {
30 LEGACY_FS_UNSET_PARAMS,
31 LEGACY_FS_MONOLITHIC_PARAMS,
32 LEGACY_FS_INDIVIDUAL_PARAMS,
33};
34
35struct legacy_fs_context {
36 char *legacy_data; /* Data page for legacy filesystems */
37 size_t data_size;
38 enum legacy_fs_param param_type;
39};
40
41static int legacy_init_fs_context(struct fs_context *fc);
42
43static const struct constant_table common_set_sb_flag[] = {
44 { "dirsync", SB_DIRSYNC },
45 { "lazytime", SB_LAZYTIME },
46 { "mand", SB_MANDLOCK },
47 { "posixacl", SB_POSIXACL },
48 { "ro", SB_RDONLY },
49 { "sync", SB_SYNCHRONOUS },
50};
51
52static const struct constant_table common_clear_sb_flag[] = {
53 { "async", SB_SYNCHRONOUS },
54 { "nolazytime", SB_LAZYTIME },
55 { "nomand", SB_MANDLOCK },
56 { "rw", SB_RDONLY },
57 { "silent", SB_SILENT },
58};
59
60static const char *const forbidden_sb_flag[] = {
61 "bind",
62 "dev",
63 "exec",
64 "move",
65 "noatime",
66 "nodev",
67 "nodiratime",
68 "noexec",
69 "norelatime",
70 "nostrictatime",
71 "nosuid",
72 "private",
73 "rec",
74 "relatime",
75 "remount",
76 "shared",
77 "slave",
78 "strictatime",
79 "suid",
80 "unbindable",
81};
82
83/*
84 * Check for a common mount option that manipulates s_flags.
85 */
86static int vfs_parse_sb_flag(struct fs_context *fc, const char *key)
87{
88 unsigned int token;
89 unsigned int i;
90
91 for (i = 0; i < ARRAY_SIZE(forbidden_sb_flag); i++)
92 if (strcmp(key, forbidden_sb_flag[i]) == 0)
93 return -EINVAL;
94
95 token = lookup_constant(common_set_sb_flag, key, 0);
96 if (token) {
97 fc->sb_flags |= token;
98 fc->sb_flags_mask |= token;
99 return 0;
100 }
101
102 token = lookup_constant(common_clear_sb_flag, key, 0);
103 if (token) {
104 fc->sb_flags &= ~token;
105 fc->sb_flags_mask |= token;
106 return 0;
107 }
108
109 return -ENOPARAM;
110}
111
112/**
113 * vfs_parse_fs_param - Add a single parameter to a superblock config
114 * @fc: The filesystem context to modify
115 * @param: The parameter
116 *
117 * A single mount option in string form is applied to the filesystem context
118 * being set up. Certain standard options (for example "ro") are translated
119 * into flag bits without going to the filesystem. The active security module
120 * is allowed to observe and poach options. Any other options are passed over
121 * to the filesystem to parse.
122 *
123 * This may be called multiple times for a context.
124 *
125 * Returns 0 on success and a negative error code on failure. In the event of
126 * failure, supplementary error information may have been set.
127 */
128int vfs_parse_fs_param(struct fs_context *fc, struct fs_parameter *param)
129{
130 int ret;
131
132 if (!param->key)
133 return invalf(fc, "Unnamed parameter\n");
134
135 ret = vfs_parse_sb_flag(fc, param->key);
136 if (ret != -ENOPARAM)
137 return ret;
138
139 ret = security_fs_context_parse_param(fc, param);
140 if (ret != -ENOPARAM)
141 /* Param belongs to the LSM or is disallowed by the LSM; so
142 * don't pass to the FS.
143 */
144 return ret;
145
146 if (fc->ops->parse_param) {
147 ret = fc->ops->parse_param(fc, param);
148 if (ret != -ENOPARAM)
149 return ret;
150 }
151
152 /* If the filesystem doesn't take any arguments, give it the
153 * default handling of source.
154 */
155 if (strcmp(param->key, "source") == 0) {
156 if (param->type != fs_value_is_string)
157 return invalf(fc, "VFS: Non-string source");
158 if (fc->source)
159 return invalf(fc, "VFS: Multiple sources");
160 fc->source = param->string;
161 param->string = NULL;
162 return 0;
163 }
164
165 return invalf(fc, "%s: Unknown parameter '%s'",
166 fc->fs_type->name, param->key);
167}
168EXPORT_SYMBOL(vfs_parse_fs_param);
169
170/**
171 * vfs_parse_fs_string - Convenience function to just parse a string.
172 */
173int vfs_parse_fs_string(struct fs_context *fc, const char *key,
174 const char *value, size_t v_size)
175{
176 int ret;
177
178 struct fs_parameter param = {
179 .key = key,
180 .type = fs_value_is_string,
181 .size = v_size,
182 };
183
184 if (v_size > 0) {
185 param.string = kmemdup_nul(value, v_size, GFP_KERNEL);
186 if (!param.string)
187 return -ENOMEM;
188 }
189
190 ret = vfs_parse_fs_param(fc, &param);
191 kfree(param.string);
192 return ret;
193}
194EXPORT_SYMBOL(vfs_parse_fs_string);
195
196/**
197 * generic_parse_monolithic - Parse key[=val][,key[=val]]* mount data
198 * @ctx: The superblock configuration to fill in.
199 * @data: The data to parse
200 *
201 * Parse a blob of data that's in key[=val][,key[=val]]* form. This can be
202 * called from the ->monolithic_mount_data() fs_context operation.
203 *
204 * Returns 0 on success or the error returned by the ->parse_option() fs_context
205 * operation on failure.
206 */
207int generic_parse_monolithic(struct fs_context *fc, void *data)
208{
209 char *options = data, *key;
210 int ret = 0;
211
212 if (!options)
213 return 0;
214
215 ret = security_sb_eat_lsm_opts(options, &fc->security);
216 if (ret)
217 return ret;
218
219 while ((key = strsep(&options, ",")) != NULL) {
220 if (*key) {
221 size_t v_len = 0;
222 char *value = strchr(key, '=');
223
224 if (value) {
225 if (value == key)
226 continue;
227 *value++ = 0;
228 v_len = strlen(value);
229 }
230 ret = vfs_parse_fs_string(fc, key, value, v_len);
231 if (ret < 0)
232 break;
233 }
234 }
235
236 return ret;
237}
238EXPORT_SYMBOL(generic_parse_monolithic);
239
240/**
241 * alloc_fs_context - Create a filesystem context.
242 * @fs_type: The filesystem type.
243 * @reference: The dentry from which this one derives (or NULL)
244 * @sb_flags: Filesystem/superblock flags (SB_*)
245 * @sb_flags_mask: Applicable members of @sb_flags
246 * @purpose: The purpose that this configuration shall be used for.
247 *
248 * Open a filesystem and create a mount context. The mount context is
249 * initialised with the supplied flags and, if a submount/automount from
250 * another superblock (referred to by @reference) is supplied, may have
251 * parameters such as namespaces copied across from that superblock.
252 */
253static struct fs_context *alloc_fs_context(struct file_system_type *fs_type,
254 struct dentry *reference,
255 unsigned int sb_flags,
256 unsigned int sb_flags_mask,
257 enum fs_context_purpose purpose)
258{
259 int (*init_fs_context)(struct fs_context *);
260 struct fs_context *fc;
261 int ret = -ENOMEM;
262
263 fc = kzalloc(sizeof(struct fs_context), GFP_KERNEL);
264 if (!fc)
265 return ERR_PTR(-ENOMEM);
266
267 fc->purpose = purpose;
268 fc->sb_flags = sb_flags;
269 fc->sb_flags_mask = sb_flags_mask;
270 fc->fs_type = get_filesystem(fs_type);
271 fc->cred = get_current_cred();
272 fc->net_ns = get_net(current->nsproxy->net_ns);
273
274 switch (purpose) {
275 case FS_CONTEXT_FOR_MOUNT:
276 fc->user_ns = get_user_ns(fc->cred->user_ns);
277 break;
278 case FS_CONTEXT_FOR_SUBMOUNT:
279 fc->user_ns = get_user_ns(reference->d_sb->s_user_ns);
280 break;
281 case FS_CONTEXT_FOR_RECONFIGURE:
282 /* We don't pin any namespaces as the superblock's
283 * subscriptions cannot be changed at this point.
284 */
285 atomic_inc(&reference->d_sb->s_active);
286 fc->root = dget(reference);
287 break;
288 }
289
290 /* TODO: Make all filesystems support this unconditionally */
291 init_fs_context = fc->fs_type->init_fs_context;
292 if (!init_fs_context)
293 init_fs_context = legacy_init_fs_context;
294
295 ret = init_fs_context(fc);
296 if (ret < 0)
297 goto err_fc;
298 fc->need_free = true;
299 return fc;
300
301err_fc:
302 put_fs_context(fc);
303 return ERR_PTR(ret);
304}
305
306struct fs_context *fs_context_for_mount(struct file_system_type *fs_type,
307 unsigned int sb_flags)
308{
309 return alloc_fs_context(fs_type, NULL, sb_flags, 0,
310 FS_CONTEXT_FOR_MOUNT);
311}
312EXPORT_SYMBOL(fs_context_for_mount);
313
314struct fs_context *fs_context_for_reconfigure(struct dentry *dentry,
315 unsigned int sb_flags,
316 unsigned int sb_flags_mask)
317{
318 return alloc_fs_context(dentry->d_sb->s_type, dentry, sb_flags,
319 sb_flags_mask, FS_CONTEXT_FOR_RECONFIGURE);
320}
321EXPORT_SYMBOL(fs_context_for_reconfigure);
322
323struct fs_context *fs_context_for_submount(struct file_system_type *type,
324 struct dentry *reference)
325{
326 return alloc_fs_context(type, reference, 0, 0, FS_CONTEXT_FOR_SUBMOUNT);
327}
328EXPORT_SYMBOL(fs_context_for_submount);
329
330void fc_drop_locked(struct fs_context *fc)
331{
332 struct super_block *sb = fc->root->d_sb;
333 dput(fc->root);
334 fc->root = NULL;
335 deactivate_locked_super(sb);
336}
337
338static void legacy_fs_context_free(struct fs_context *fc);
339
340/**
341 * vfs_dup_fc_config: Duplicate a filesystem context.
342 * @src_fc: The context to copy.
343 */
344struct fs_context *vfs_dup_fs_context(struct fs_context *src_fc)
345{
346 struct fs_context *fc;
347 int ret;
348
349 if (!src_fc->ops->dup)
350 return ERR_PTR(-EOPNOTSUPP);
351
352 fc = kmemdup(src_fc, sizeof(struct fs_context), GFP_KERNEL);
353 if (!fc)
354 return ERR_PTR(-ENOMEM);
355
356 fc->fs_private = NULL;
357 fc->s_fs_info = NULL;
358 fc->source = NULL;
359 fc->security = NULL;
360 get_filesystem(fc->fs_type);
361 get_net(fc->net_ns);
362 get_user_ns(fc->user_ns);
363 get_cred(fc->cred);
364
365 /* Can't call put until we've called ->dup */
366 ret = fc->ops->dup(fc, src_fc);
367 if (ret < 0)
368 goto err_fc;
369
370 ret = security_fs_context_dup(fc, src_fc);
371 if (ret < 0)
372 goto err_fc;
373 return fc;
374
375err_fc:
376 put_fs_context(fc);
377 return ERR_PTR(ret);
378}
379EXPORT_SYMBOL(vfs_dup_fs_context);
380
381#ifdef CONFIG_PRINTK
382/**
383 * logfc - Log a message to a filesystem context
384 * @fc: The filesystem context to log to.
385 * @fmt: The format of the buffer.
386 */
387void logfc(struct fs_context *fc, const char *fmt, ...)
388{
389 va_list va;
390
391 va_start(va, fmt);
392
393 switch (fmt[0]) {
394 case 'w':
395 vprintk_emit(0, LOGLEVEL_WARNING, NULL, 0, fmt, va);
396 break;
397 case 'e':
398 vprintk_emit(0, LOGLEVEL_ERR, NULL, 0, fmt, va);
399 break;
400 default:
401 vprintk_emit(0, LOGLEVEL_NOTICE, NULL, 0, fmt, va);
402 break;
403 }
404
405 pr_cont("\n");
406 va_end(va);
407}
408EXPORT_SYMBOL(logfc);
409#endif
410
411/**
412 * put_fs_context - Dispose of a superblock configuration context.
413 * @fc: The context to dispose of.
414 */
415void put_fs_context(struct fs_context *fc)
416{
417 struct super_block *sb;
418
419 if (fc->root) {
420 sb = fc->root->d_sb;
421 dput(fc->root);
422 fc->root = NULL;
423 deactivate_super(sb);
424 }
425
426 if (fc->need_free && fc->ops && fc->ops->free)
427 fc->ops->free(fc);
428
429 security_free_mnt_opts(&fc->security);
430 put_net(fc->net_ns);
431 put_user_ns(fc->user_ns);
432 put_cred(fc->cred);
433 kfree(fc->subtype);
434 put_filesystem(fc->fs_type);
435 kfree(fc->source);
436 kfree(fc);
437}
438EXPORT_SYMBOL(put_fs_context);
439
440/*
441 * Free the config for a filesystem that doesn't support fs_context.
442 */
443static void legacy_fs_context_free(struct fs_context *fc)
444{
445 struct legacy_fs_context *ctx = fc->fs_private;
446
447 if (ctx) {
448 if (ctx->param_type == LEGACY_FS_INDIVIDUAL_PARAMS)
449 kfree(ctx->legacy_data);
450 kfree(ctx);
451 }
452}
453
454/*
455 * Duplicate a legacy config.
456 */
457static int legacy_fs_context_dup(struct fs_context *fc, struct fs_context *src_fc)
458{
459 struct legacy_fs_context *ctx;
460 struct legacy_fs_context *src_ctx = src_fc->fs_private;
461
462 ctx = kmemdup(src_ctx, sizeof(*src_ctx), GFP_KERNEL);
463 if (!ctx)
464 return -ENOMEM;
465
466 if (ctx->param_type == LEGACY_FS_INDIVIDUAL_PARAMS) {
467 ctx->legacy_data = kmemdup(src_ctx->legacy_data,
468 src_ctx->data_size, GFP_KERNEL);
469 if (!ctx->legacy_data) {
470 kfree(ctx);
471 return -ENOMEM;
472 }
473 }
474
475 fc->fs_private = ctx;
476 return 0;
477}
478
479/*
480 * Add a parameter to a legacy config. We build up a comma-separated list of
481 * options.
482 */
483static int legacy_parse_param(struct fs_context *fc, struct fs_parameter *param)
484{
485 struct legacy_fs_context *ctx = fc->fs_private;
486 unsigned int size = ctx->data_size;
487 size_t len = 0;
488
489 if (strcmp(param->key, "source") == 0) {
490 if (param->type != fs_value_is_string)
491 return invalf(fc, "VFS: Legacy: Non-string source");
492 if (fc->source)
493 return invalf(fc, "VFS: Legacy: Multiple sources");
494 fc->source = param->string;
495 param->string = NULL;
496 return 0;
497 }
498
499 if ((fc->fs_type->fs_flags & FS_HAS_SUBTYPE) &&
500 strcmp(param->key, "subtype") == 0) {
501 if (param->type != fs_value_is_string)
502 return invalf(fc, "VFS: Legacy: Non-string subtype");
503 if (fc->subtype)
504 return invalf(fc, "VFS: Legacy: Multiple subtype");
505 fc->subtype = param->string;
506 param->string = NULL;
507 return 0;
508 }
509
510 if (ctx->param_type == LEGACY_FS_MONOLITHIC_PARAMS)
511 return invalf(fc, "VFS: Legacy: Can't mix monolithic and individual options");
512
513 switch (param->type) {
514 case fs_value_is_string:
515 len = 1 + param->size;
516 /* Fall through */
517 case fs_value_is_flag:
518 len += strlen(param->key);
519 break;
520 default:
521 return invalf(fc, "VFS: Legacy: Parameter type for '%s' not supported",
522 param->key);
523 }
524
525 if (len > PAGE_SIZE - 2 - size)
526 return invalf(fc, "VFS: Legacy: Cumulative options too large");
527 if (strchr(param->key, ',') ||
528 (param->type == fs_value_is_string &&
529 memchr(param->string, ',', param->size)))
530 return invalf(fc, "VFS: Legacy: Option '%s' contained comma",
531 param->key);
532 if (!ctx->legacy_data) {
533 ctx->legacy_data = kmalloc(PAGE_SIZE, GFP_KERNEL);
534 if (!ctx->legacy_data)
535 return -ENOMEM;
536 }
537
538 ctx->legacy_data[size++] = ',';
539 len = strlen(param->key);
540 memcpy(ctx->legacy_data + size, param->key, len);
541 size += len;
542 if (param->type == fs_value_is_string) {
543 ctx->legacy_data[size++] = '=';
544 memcpy(ctx->legacy_data + size, param->string, param->size);
545 size += param->size;
546 }
547 ctx->legacy_data[size] = '\0';
548 ctx->data_size = size;
549 ctx->param_type = LEGACY_FS_INDIVIDUAL_PARAMS;
550 return 0;
551}
552
553/*
554 * Add monolithic mount data.
555 */
556static int legacy_parse_monolithic(struct fs_context *fc, void *data)
557{
558 struct legacy_fs_context *ctx = fc->fs_private;
559
560 if (ctx->param_type != LEGACY_FS_UNSET_PARAMS) {
561 pr_warn("VFS: Can't mix monolithic and individual options\n");
562 return -EINVAL;
563 }
564
565 ctx->legacy_data = data;
566 ctx->param_type = LEGACY_FS_MONOLITHIC_PARAMS;
567 if (!ctx->legacy_data)
568 return 0;
569
570 if (fc->fs_type->fs_flags & FS_BINARY_MOUNTDATA)
571 return 0;
572 return security_sb_eat_lsm_opts(ctx->legacy_data, &fc->security);
573}
574
575/*
576 * Get a mountable root with the legacy mount command.
577 */
578static int legacy_get_tree(struct fs_context *fc)
579{
580 struct legacy_fs_context *ctx = fc->fs_private;
581 struct super_block *sb;
582 struct dentry *root;
583
584 root = fc->fs_type->mount(fc->fs_type, fc->sb_flags,
585 fc->source, ctx->legacy_data);
586 if (IS_ERR(root))
587 return PTR_ERR(root);
588
589 sb = root->d_sb;
590 BUG_ON(!sb);
591
592 fc->root = root;
593 return 0;
594}
595
596/*
597 * Handle remount.
598 */
599static int legacy_reconfigure(struct fs_context *fc)
600{
601 struct legacy_fs_context *ctx = fc->fs_private;
602 struct super_block *sb = fc->root->d_sb;
603
604 if (!sb->s_op->remount_fs)
605 return 0;
606
607 return sb->s_op->remount_fs(sb, &fc->sb_flags,
608 ctx ? ctx->legacy_data : NULL);
609}
610
611const struct fs_context_operations legacy_fs_context_ops = {
612 .free = legacy_fs_context_free,
613 .dup = legacy_fs_context_dup,
614 .parse_param = legacy_parse_param,
615 .parse_monolithic = legacy_parse_monolithic,
616 .get_tree = legacy_get_tree,
617 .reconfigure = legacy_reconfigure,
618};
619
620/*
621 * Initialise a legacy context for a filesystem that doesn't support
622 * fs_context.
623 */
624static int legacy_init_fs_context(struct fs_context *fc)
625{
626 fc->fs_private = kzalloc(sizeof(struct legacy_fs_context), GFP_KERNEL);
627 if (!fc->fs_private)
628 return -ENOMEM;
629 fc->ops = &legacy_fs_context_ops;
630 return 0;
631}
632
633int parse_monolithic_mount_data(struct fs_context *fc, void *data)
634{
635 int (*monolithic_mount_data)(struct fs_context *, void *);
636
637 monolithic_mount_data = fc->ops->parse_monolithic;
638 if (!monolithic_mount_data)
639 monolithic_mount_data = generic_parse_monolithic;
640
641 return monolithic_mount_data(fc, data);
642}
diff --git a/fs/fs_parser.c b/fs/fs_parser.c
new file mode 100644
index 000000000000..842e8f749db6
--- /dev/null
+++ b/fs/fs_parser.c
@@ -0,0 +1,447 @@
1/* Filesystem parameter parser.
2 *
3 * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public Licence
8 * as published by the Free Software Foundation; either version
9 * 2 of the Licence, or (at your option) any later version.
10 */
11
12#include <linux/export.h>
13#include <linux/fs_context.h>
14#include <linux/fs_parser.h>
15#include <linux/slab.h>
16#include <linux/security.h>
17#include <linux/namei.h>
18#include "internal.h"
19
20static const struct constant_table bool_names[] = {
21 { "0", false },
22 { "1", true },
23 { "false", false },
24 { "no", false },
25 { "true", true },
26 { "yes", true },
27};
28
29/**
30 * lookup_constant - Look up a constant by name in an ordered table
31 * @tbl: The table of constants to search.
32 * @tbl_size: The size of the table.
33 * @name: The name to look up.
34 * @not_found: The value to return if the name is not found.
35 */
36int __lookup_constant(const struct constant_table *tbl, size_t tbl_size,
37 const char *name, int not_found)
38{
39 unsigned int i;
40
41 for (i = 0; i < tbl_size; i++)
42 if (strcmp(name, tbl[i].name) == 0)
43 return tbl[i].value;
44
45 return not_found;
46}
47EXPORT_SYMBOL(__lookup_constant);
48
49static const struct fs_parameter_spec *fs_lookup_key(
50 const struct fs_parameter_description *desc,
51 const char *name)
52{
53 const struct fs_parameter_spec *p;
54
55 if (!desc->specs)
56 return NULL;
57
58 for (p = desc->specs; p->name; p++)
59 if (strcmp(p->name, name) == 0)
60 return p;
61
62 return NULL;
63}
64
65/*
66 * fs_parse - Parse a filesystem configuration parameter
67 * @fc: The filesystem context to log errors through.
68 * @desc: The parameter description to use.
69 * @param: The parameter.
70 * @result: Where to place the result of the parse
71 *
72 * Parse a filesystem configuration parameter and attempt a conversion for a
73 * simple parameter for which this is requested. If successful, the determined
74 * parameter ID is placed into @result->key, the desired type is indicated in
75 * @result->t and any converted value is placed into an appropriate member of
76 * the union in @result.
77 *
78 * The function returns the parameter number if the parameter was matched,
79 * -ENOPARAM if it wasn't matched and @desc->ignore_unknown indicated that
80 * unknown parameters are okay and -EINVAL if there was a conversion issue or
81 * the parameter wasn't recognised and unknowns aren't okay.
82 */
83int fs_parse(struct fs_context *fc,
84 const struct fs_parameter_description *desc,
85 struct fs_parameter *param,
86 struct fs_parse_result *result)
87{
88 const struct fs_parameter_spec *p;
89 const struct fs_parameter_enum *e;
90 int ret = -ENOPARAM, b;
91
92 result->has_value = !!param->string;
93 result->negated = false;
94 result->uint_64 = 0;
95
96 p = fs_lookup_key(desc, param->key);
97 if (!p) {
98 /* If we didn't find something that looks like "noxxx", see if
99 * "xxx" takes the "no"-form negative - but only if there
100 * wasn't an value.
101 */
102 if (result->has_value)
103 goto unknown_parameter;
104 if (param->key[0] != 'n' || param->key[1] != 'o' || !param->key[2])
105 goto unknown_parameter;
106
107 p = fs_lookup_key(desc, param->key + 2);
108 if (!p)
109 goto unknown_parameter;
110 if (!(p->flags & fs_param_neg_with_no))
111 goto unknown_parameter;
112 result->boolean = false;
113 result->negated = true;
114 }
115
116 if (p->flags & fs_param_deprecated)
117 warnf(fc, "%s: Deprecated parameter '%s'",
118 desc->name, param->key);
119
120 if (result->negated)
121 goto okay;
122
123 /* Certain parameter types only take a string and convert it. */
124 switch (p->type) {
125 case __fs_param_wasnt_defined:
126 return -EINVAL;
127 case fs_param_is_u32:
128 case fs_param_is_u32_octal:
129 case fs_param_is_u32_hex:
130 case fs_param_is_s32:
131 case fs_param_is_u64:
132 case fs_param_is_enum:
133 case fs_param_is_string:
134 if (param->type != fs_value_is_string)
135 goto bad_value;
136 if (!result->has_value) {
137 if (p->flags & fs_param_v_optional)
138 goto okay;
139 goto bad_value;
140 }
141 /* Fall through */
142 default:
143 break;
144 }
145
146 /* Try to turn the type we were given into the type desired by the
147 * parameter and give an error if we can't.
148 */
149 switch (p->type) {
150 case fs_param_is_flag:
151 if (param->type != fs_value_is_flag &&
152 (param->type != fs_value_is_string || result->has_value))
153 return invalf(fc, "%s: Unexpected value for '%s'",
154 desc->name, param->key);
155 result->boolean = true;
156 goto okay;
157
158 case fs_param_is_bool:
159 switch (param->type) {
160 case fs_value_is_flag:
161 result->boolean = true;
162 goto okay;
163 case fs_value_is_string:
164 if (param->size == 0) {
165 result->boolean = true;
166 goto okay;
167 }
168 b = lookup_constant(bool_names, param->string, -1);
169 if (b == -1)
170 goto bad_value;
171 result->boolean = b;
172 goto okay;
173 default:
174 goto bad_value;
175 }
176
177 case fs_param_is_u32:
178 ret = kstrtouint(param->string, 0, &result->uint_32);
179 goto maybe_okay;
180 case fs_param_is_u32_octal:
181 ret = kstrtouint(param->string, 8, &result->uint_32);
182 goto maybe_okay;
183 case fs_param_is_u32_hex:
184 ret = kstrtouint(param->string, 16, &result->uint_32);
185 goto maybe_okay;
186 case fs_param_is_s32:
187 ret = kstrtoint(param->string, 0, &result->int_32);
188 goto maybe_okay;
189 case fs_param_is_u64:
190 ret = kstrtoull(param->string, 0, &result->uint_64);
191 goto maybe_okay;
192
193 case fs_param_is_enum:
194 for (e = desc->enums; e->name[0]; e++) {
195 if (e->opt == p->opt &&
196 strcmp(e->name, param->string) == 0) {
197 result->uint_32 = e->value;
198 goto okay;
199 }
200 }
201 goto bad_value;
202
203 case fs_param_is_string:
204 goto okay;
205 case fs_param_is_blob:
206 if (param->type != fs_value_is_blob)
207 goto bad_value;
208 goto okay;
209
210 case fs_param_is_fd: {
211 if (param->type != fs_value_is_file)
212 goto bad_value;
213 goto okay;
214 }
215
216 case fs_param_is_blockdev:
217 case fs_param_is_path:
218 goto okay;
219 default:
220 BUG();
221 }
222
223maybe_okay:
224 if (ret < 0)
225 goto bad_value;
226okay:
227 return p->opt;
228
229bad_value:
230 return invalf(fc, "%s: Bad value for '%s'", desc->name, param->key);
231unknown_parameter:
232 return -ENOPARAM;
233}
234EXPORT_SYMBOL(fs_parse);
235
236/**
237 * fs_lookup_param - Look up a path referred to by a parameter
238 * @fc: The filesystem context to log errors through.
239 * @param: The parameter.
240 * @want_bdev: T if want a blockdev
241 * @_path: The result of the lookup
242 */
243int fs_lookup_param(struct fs_context *fc,
244 struct fs_parameter *param,
245 bool want_bdev,
246 struct path *_path)
247{
248 struct filename *f;
249 unsigned int flags = 0;
250 bool put_f;
251 int ret;
252
253 switch (param->type) {
254 case fs_value_is_string:
255 f = getname_kernel(param->string);
256 if (IS_ERR(f))
257 return PTR_ERR(f);
258 put_f = true;
259 break;
260 case fs_value_is_filename_empty:
261 flags = LOOKUP_EMPTY;
262 /* Fall through */
263 case fs_value_is_filename:
264 f = param->name;
265 put_f = false;
266 break;
267 default:
268 return invalf(fc, "%s: not usable as path", param->key);
269 }
270
271 ret = filename_lookup(param->dirfd, f, flags, _path, NULL);
272 if (ret < 0) {
273 errorf(fc, "%s: Lookup failure for '%s'", param->key, f->name);
274 goto out;
275 }
276
277 if (want_bdev &&
278 !S_ISBLK(d_backing_inode(_path->dentry)->i_mode)) {
279 path_put(_path);
280 _path->dentry = NULL;
281 _path->mnt = NULL;
282 errorf(fc, "%s: Non-blockdev passed as '%s'",
283 param->key, f->name);
284 ret = -ENOTBLK;
285 }
286
287out:
288 if (put_f)
289 putname(f);
290 return ret;
291}
292EXPORT_SYMBOL(fs_lookup_param);
293
294#ifdef CONFIG_VALIDATE_FS_PARSER
295/**
296 * validate_constant_table - Validate a constant table
297 * @name: Name to use in reporting
298 * @tbl: The constant table to validate.
299 * @tbl_size: The size of the table.
300 * @low: The lowest permissible value.
301 * @high: The highest permissible value.
302 * @special: One special permissible value outside of the range.
303 */
304bool validate_constant_table(const struct constant_table *tbl, size_t tbl_size,
305 int low, int high, int special)
306{
307 size_t i;
308 bool good = true;
309
310 if (tbl_size == 0) {
311 pr_warn("VALIDATE C-TBL: Empty\n");
312 return true;
313 }
314
315 for (i = 0; i < tbl_size; i++) {
316 if (!tbl[i].name) {
317 pr_err("VALIDATE C-TBL[%zu]: Null\n", i);
318 good = false;
319 } else if (i > 0 && tbl[i - 1].name) {
320 int c = strcmp(tbl[i-1].name, tbl[i].name);
321
322 if (c == 0) {
323 pr_err("VALIDATE C-TBL[%zu]: Duplicate %s\n",
324 i, tbl[i].name);
325 good = false;
326 }
327 if (c > 0) {
328 pr_err("VALIDATE C-TBL[%zu]: Missorted %s>=%s\n",
329 i, tbl[i-1].name, tbl[i].name);
330 good = false;
331 }
332 }
333
334 if (tbl[i].value != special &&
335 (tbl[i].value < low || tbl[i].value > high)) {
336 pr_err("VALIDATE C-TBL[%zu]: %s->%d const out of range (%d-%d)\n",
337 i, tbl[i].name, tbl[i].value, low, high);
338 good = false;
339 }
340 }
341
342 return good;
343}
344
345/**
346 * fs_validate_description - Validate a parameter description
347 * @desc: The parameter description to validate.
348 */
349bool fs_validate_description(const struct fs_parameter_description *desc)
350{
351 const struct fs_parameter_spec *param, *p2;
352 const struct fs_parameter_enum *e;
353 const char *name = desc->name;
354 unsigned int nr_params = 0;
355 bool good = true, enums = false;
356
357 pr_notice("*** VALIDATE %s ***\n", name);
358
359 if (!name[0]) {
360 pr_err("VALIDATE Parser: No name\n");
361 name = "Unknown";
362 good = false;
363 }
364
365 if (desc->specs) {
366 for (param = desc->specs; param->name; param++) {
367 enum fs_parameter_type t = param->type;
368
369 /* Check that the type is in range */
370 if (t == __fs_param_wasnt_defined ||
371 t >= nr__fs_parameter_type) {
372 pr_err("VALIDATE %s: PARAM[%s] Bad type %u\n",
373 name, param->name, t);
374 good = false;
375 } else if (t == fs_param_is_enum) {
376 enums = true;
377 }
378
379 /* Check for duplicate parameter names */
380 for (p2 = desc->specs; p2 < param; p2++) {
381 if (strcmp(param->name, p2->name) == 0) {
382 pr_err("VALIDATE %s: PARAM[%s]: Duplicate\n",
383 name, param->name);
384 good = false;
385 }
386 }
387 }
388
389 nr_params = param - desc->specs;
390 }
391
392 if (desc->enums) {
393 if (!nr_params) {
394 pr_err("VALIDATE %s: Enum table but no parameters\n",
395 name);
396 good = false;
397 goto no_enums;
398 }
399 if (!enums) {
400 pr_err("VALIDATE %s: Enum table but no enum-type values\n",
401 name);
402 good = false;
403 goto no_enums;
404 }
405
406 for (e = desc->enums; e->name[0]; e++) {
407 /* Check that all entries in the enum table have at
408 * least one parameter that uses them.
409 */
410 for (param = desc->specs; param->name; param++) {
411 if (param->opt == e->opt &&
412 param->type != fs_param_is_enum) {
413 pr_err("VALIDATE %s: e[%lu] enum val for %s\n",
414 name, e - desc->enums, param->name);
415 good = false;
416 }
417 }
418 }
419
420 /* Check that all enum-type parameters have at least one enum
421 * value in the enum table.
422 */
423 for (param = desc->specs; param->name; param++) {
424 if (param->type != fs_param_is_enum)
425 continue;
426 for (e = desc->enums; e->name[0]; e++)
427 if (e->opt == param->opt)
428 break;
429 if (!e->name[0]) {
430 pr_err("VALIDATE %s: PARAM[%s] enum with no values\n",
431 name, param->name);
432 good = false;
433 }
434 }
435 } else {
436 if (enums) {
437 pr_err("VALIDATE %s: enum-type values, but no enum table\n",
438 name);
439 good = false;
440 goto no_enums;
441 }
442 }
443
444no_enums:
445 return good;
446}
447#endif /* CONFIG_VALIDATE_FS_PARSER */
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index b0eef008de67..ec32fece5e1e 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -27,7 +27,7 @@
27#include <linux/backing-dev.h> 27#include <linux/backing-dev.h>
28#include <linux/hugetlb.h> 28#include <linux/hugetlb.h>
29#include <linux/pagevec.h> 29#include <linux/pagevec.h>
30#include <linux/parser.h> 30#include <linux/fs_parser.h>
31#include <linux/mman.h> 31#include <linux/mman.h>
32#include <linux/slab.h> 32#include <linux/slab.h>
33#include <linux/dnotify.h> 33#include <linux/dnotify.h>
@@ -45,11 +45,17 @@ const struct file_operations hugetlbfs_file_operations;
45static const struct inode_operations hugetlbfs_dir_inode_operations; 45static const struct inode_operations hugetlbfs_dir_inode_operations;
46static const struct inode_operations hugetlbfs_inode_operations; 46static const struct inode_operations hugetlbfs_inode_operations;
47 47
48struct hugetlbfs_config { 48enum hugetlbfs_size_type { NO_SIZE, SIZE_STD, SIZE_PERCENT };
49
50struct hugetlbfs_fs_context {
49 struct hstate *hstate; 51 struct hstate *hstate;
52 unsigned long long max_size_opt;
53 unsigned long long min_size_opt;
50 long max_hpages; 54 long max_hpages;
51 long nr_inodes; 55 long nr_inodes;
52 long min_hpages; 56 long min_hpages;
57 enum hugetlbfs_size_type max_val_type;
58 enum hugetlbfs_size_type min_val_type;
53 kuid_t uid; 59 kuid_t uid;
54 kgid_t gid; 60 kgid_t gid;
55 umode_t mode; 61 umode_t mode;
@@ -57,22 +63,30 @@ struct hugetlbfs_config {
57 63
58int sysctl_hugetlb_shm_group; 64int sysctl_hugetlb_shm_group;
59 65
60enum { 66enum hugetlb_param {
61 Opt_size, Opt_nr_inodes, 67 Opt_gid,
62 Opt_mode, Opt_uid, Opt_gid, 68 Opt_min_size,
63 Opt_pagesize, Opt_min_size, 69 Opt_mode,
64 Opt_err, 70 Opt_nr_inodes,
71 Opt_pagesize,
72 Opt_size,
73 Opt_uid,
65}; 74};
66 75
67static const match_table_t tokens = { 76static const struct fs_parameter_spec hugetlb_param_specs[] = {
68 {Opt_size, "size=%s"}, 77 fsparam_u32 ("gid", Opt_gid),
69 {Opt_nr_inodes, "nr_inodes=%s"}, 78 fsparam_string("min_size", Opt_min_size),
70 {Opt_mode, "mode=%o"}, 79 fsparam_u32 ("mode", Opt_mode),
71 {Opt_uid, "uid=%u"}, 80 fsparam_string("nr_inodes", Opt_nr_inodes),
72 {Opt_gid, "gid=%u"}, 81 fsparam_string("pagesize", Opt_pagesize),
73 {Opt_pagesize, "pagesize=%s"}, 82 fsparam_string("size", Opt_size),
74 {Opt_min_size, "min_size=%s"}, 83 fsparam_u32 ("uid", Opt_uid),
75 {Opt_err, NULL}, 84 {}
85};
86
87static const struct fs_parameter_description hugetlb_fs_parameters = {
88 .name = "hugetlbfs",
89 .specs = hugetlb_param_specs,
76}; 90};
77 91
78#ifdef CONFIG_NUMA 92#ifdef CONFIG_NUMA
@@ -708,16 +722,16 @@ static int hugetlbfs_setattr(struct dentry *dentry, struct iattr *attr)
708} 722}
709 723
710static struct inode *hugetlbfs_get_root(struct super_block *sb, 724static struct inode *hugetlbfs_get_root(struct super_block *sb,
711 struct hugetlbfs_config *config) 725 struct hugetlbfs_fs_context *ctx)
712{ 726{
713 struct inode *inode; 727 struct inode *inode;
714 728
715 inode = new_inode(sb); 729 inode = new_inode(sb);
716 if (inode) { 730 if (inode) {
717 inode->i_ino = get_next_ino(); 731 inode->i_ino = get_next_ino();
718 inode->i_mode = S_IFDIR | config->mode; 732 inode->i_mode = S_IFDIR | ctx->mode;
719 inode->i_uid = config->uid; 733 inode->i_uid = ctx->uid;
720 inode->i_gid = config->gid; 734 inode->i_gid = ctx->gid;
721 inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode); 735 inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
722 inode->i_op = &hugetlbfs_dir_inode_operations; 736 inode->i_op = &hugetlbfs_dir_inode_operations;
723 inode->i_fop = &simple_dir_operations; 737 inode->i_fop = &simple_dir_operations;
@@ -1093,8 +1107,6 @@ static const struct super_operations hugetlbfs_ops = {
1093 .show_options = hugetlbfs_show_options, 1107 .show_options = hugetlbfs_show_options,
1094}; 1108};
1095 1109
1096enum hugetlbfs_size_type { NO_SIZE, SIZE_STD, SIZE_PERCENT };
1097
1098/* 1110/*
1099 * Convert size option passed from command line to number of huge pages 1111 * Convert size option passed from command line to number of huge pages
1100 * in the pool specified by hstate. Size option could be in bytes 1112 * in the pool specified by hstate. Size option could be in bytes
@@ -1117,170 +1129,151 @@ hugetlbfs_size_to_hpages(struct hstate *h, unsigned long long size_opt,
1117 return size_opt; 1129 return size_opt;
1118} 1130}
1119 1131
1120static int 1132/*
1121hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig) 1133 * Parse one mount parameter.
1134 */
1135static int hugetlbfs_parse_param(struct fs_context *fc, struct fs_parameter *param)
1122{ 1136{
1123 char *p, *rest; 1137 struct hugetlbfs_fs_context *ctx = fc->fs_private;
1124 substring_t args[MAX_OPT_ARGS]; 1138 struct fs_parse_result result;
1125 int option; 1139 char *rest;
1126 unsigned long long max_size_opt = 0, min_size_opt = 0; 1140 unsigned long ps;
1127 enum hugetlbfs_size_type max_val_type = NO_SIZE, min_val_type = NO_SIZE; 1141 int opt;
1128 1142
1129 if (!options) 1143 opt = fs_parse(fc, &hugetlb_fs_parameters, param, &result);
1144 if (opt < 0)
1145 return opt;
1146
1147 switch (opt) {
1148 case Opt_uid:
1149 ctx->uid = make_kuid(current_user_ns(), result.uint_32);
1150 if (!uid_valid(ctx->uid))
1151 goto bad_val;
1130 return 0; 1152 return 0;
1131 1153
1132 while ((p = strsep(&options, ",")) != NULL) { 1154 case Opt_gid:
1133 int token; 1155 ctx->gid = make_kgid(current_user_ns(), result.uint_32);
1134 if (!*p) 1156 if (!gid_valid(ctx->gid))
1135 continue; 1157 goto bad_val;
1158 return 0;
1136 1159
1137 token = match_token(p, tokens, args); 1160 case Opt_mode:
1138 switch (token) { 1161 ctx->mode = result.uint_32 & 01777U;
1139 case Opt_uid: 1162 return 0;
1140 if (match_int(&args[0], &option))
1141 goto bad_val;
1142 pconfig->uid = make_kuid(current_user_ns(), option);
1143 if (!uid_valid(pconfig->uid))
1144 goto bad_val;
1145 break;
1146 1163
1147 case Opt_gid: 1164 case Opt_size:
1148 if (match_int(&args[0], &option)) 1165 /* memparse() will accept a K/M/G without a digit */
1149 goto bad_val; 1166 if (!isdigit(param->string[0]))
1150 pconfig->gid = make_kgid(current_user_ns(), option); 1167 goto bad_val;
1151 if (!gid_valid(pconfig->gid)) 1168 ctx->max_size_opt = memparse(param->string, &rest);
1152 goto bad_val; 1169 ctx->max_val_type = SIZE_STD;
1153 break; 1170 if (*rest == '%')
1171 ctx->max_val_type = SIZE_PERCENT;
1172 return 0;
1154 1173
1155 case Opt_mode: 1174 case Opt_nr_inodes:
1156 if (match_octal(&args[0], &option)) 1175 /* memparse() will accept a K/M/G without a digit */
1157 goto bad_val; 1176 if (!isdigit(param->string[0]))
1158 pconfig->mode = option & 01777U; 1177 goto bad_val;
1159 break; 1178 ctx->nr_inodes = memparse(param->string, &rest);
1179 return 0;
1160 1180
1161 case Opt_size: { 1181 case Opt_pagesize:
1162 /* memparse() will accept a K/M/G without a digit */ 1182 ps = memparse(param->string, &rest);
1163 if (!isdigit(*args[0].from)) 1183 ctx->hstate = size_to_hstate(ps);
1164 goto bad_val; 1184 if (!ctx->hstate) {
1165 max_size_opt = memparse(args[0].from, &rest); 1185 pr_err("Unsupported page size %lu MB\n", ps >> 20);
1166 max_val_type = SIZE_STD; 1186 return -EINVAL;
1167 if (*rest == '%')
1168 max_val_type = SIZE_PERCENT;
1169 break;
1170 } 1187 }
1188 return 0;
1171 1189
1172 case Opt_nr_inodes: 1190 case Opt_min_size:
1173 /* memparse() will accept a K/M/G without a digit */ 1191 /* memparse() will accept a K/M/G without a digit */
1174 if (!isdigit(*args[0].from)) 1192 if (!isdigit(param->string[0]))
1175 goto bad_val; 1193 goto bad_val;
1176 pconfig->nr_inodes = memparse(args[0].from, &rest); 1194 ctx->min_size_opt = memparse(param->string, &rest);
1177 break; 1195 ctx->min_val_type = SIZE_STD;
1196 if (*rest == '%')
1197 ctx->min_val_type = SIZE_PERCENT;
1198 return 0;
1178 1199
1179 case Opt_pagesize: { 1200 default:
1180 unsigned long ps; 1201 return -EINVAL;
1181 ps = memparse(args[0].from, &rest); 1202 }
1182 pconfig->hstate = size_to_hstate(ps);
1183 if (!pconfig->hstate) {
1184 pr_err("Unsupported page size %lu MB\n",
1185 ps >> 20);
1186 return -EINVAL;
1187 }
1188 break;
1189 }
1190 1203
1191 case Opt_min_size: { 1204bad_val:
1192 /* memparse() will accept a K/M/G without a digit */ 1205 return invalf(fc, "hugetlbfs: Bad value '%s' for mount option '%s'\n",
1193 if (!isdigit(*args[0].from)) 1206 param->string, param->key);
1194 goto bad_val; 1207}
1195 min_size_opt = memparse(args[0].from, &rest);
1196 min_val_type = SIZE_STD;
1197 if (*rest == '%')
1198 min_val_type = SIZE_PERCENT;
1199 break;
1200 }
1201 1208
1202 default: 1209/*
1203 pr_err("Bad mount option: \"%s\"\n", p); 1210 * Validate the parsed options.
1204 return -EINVAL; 1211 */
1205 break; 1212static int hugetlbfs_validate(struct fs_context *fc)
1206 } 1213{
1207 } 1214 struct hugetlbfs_fs_context *ctx = fc->fs_private;
1208 1215
1209 /* 1216 /*
1210 * Use huge page pool size (in hstate) to convert the size 1217 * Use huge page pool size (in hstate) to convert the size
1211 * options to number of huge pages. If NO_SIZE, -1 is returned. 1218 * options to number of huge pages. If NO_SIZE, -1 is returned.
1212 */ 1219 */
1213 pconfig->max_hpages = hugetlbfs_size_to_hpages(pconfig->hstate, 1220 ctx->max_hpages = hugetlbfs_size_to_hpages(ctx->hstate,
1214 max_size_opt, max_val_type); 1221 ctx->max_size_opt,
1215 pconfig->min_hpages = hugetlbfs_size_to_hpages(pconfig->hstate, 1222 ctx->max_val_type);
1216 min_size_opt, min_val_type); 1223 ctx->min_hpages = hugetlbfs_size_to_hpages(ctx->hstate,
1224 ctx->min_size_opt,
1225 ctx->min_val_type);
1217 1226
1218 /* 1227 /*
1219 * If max_size was specified, then min_size must be smaller 1228 * If max_size was specified, then min_size must be smaller
1220 */ 1229 */
1221 if (max_val_type > NO_SIZE && 1230 if (ctx->max_val_type > NO_SIZE &&
1222 pconfig->min_hpages > pconfig->max_hpages) { 1231 ctx->min_hpages > ctx->max_hpages) {
1223 pr_err("minimum size can not be greater than maximum size\n"); 1232 pr_err("Minimum size can not be greater than maximum size\n");
1224 return -EINVAL; 1233 return -EINVAL;
1225 } 1234 }
1226 1235
1227 return 0; 1236 return 0;
1228
1229bad_val:
1230 pr_err("Bad value '%s' for mount option '%s'\n", args[0].from, p);
1231 return -EINVAL;
1232} 1237}
1233 1238
1234static int 1239static int
1235hugetlbfs_fill_super(struct super_block *sb, void *data, int silent) 1240hugetlbfs_fill_super(struct super_block *sb, struct fs_context *fc)
1236{ 1241{
1237 int ret; 1242 struct hugetlbfs_fs_context *ctx = fc->fs_private;
1238 struct hugetlbfs_config config;
1239 struct hugetlbfs_sb_info *sbinfo; 1243 struct hugetlbfs_sb_info *sbinfo;
1240 1244
1241 config.max_hpages = -1; /* No limit on size by default */
1242 config.nr_inodes = -1; /* No limit on number of inodes by default */
1243 config.uid = current_fsuid();
1244 config.gid = current_fsgid();
1245 config.mode = 0755;
1246 config.hstate = &default_hstate;
1247 config.min_hpages = -1; /* No default minimum size */
1248 ret = hugetlbfs_parse_options(data, &config);
1249 if (ret)
1250 return ret;
1251
1252 sbinfo = kmalloc(sizeof(struct hugetlbfs_sb_info), GFP_KERNEL); 1245 sbinfo = kmalloc(sizeof(struct hugetlbfs_sb_info), GFP_KERNEL);
1253 if (!sbinfo) 1246 if (!sbinfo)
1254 return -ENOMEM; 1247 return -ENOMEM;
1255 sb->s_fs_info = sbinfo; 1248 sb->s_fs_info = sbinfo;
1256 sbinfo->hstate = config.hstate;
1257 spin_lock_init(&sbinfo->stat_lock); 1249 spin_lock_init(&sbinfo->stat_lock);
1258 sbinfo->max_inodes = config.nr_inodes; 1250 sbinfo->hstate = ctx->hstate;
1259 sbinfo->free_inodes = config.nr_inodes; 1251 sbinfo->max_inodes = ctx->nr_inodes;
1260 sbinfo->spool = NULL; 1252 sbinfo->free_inodes = ctx->nr_inodes;
1261 sbinfo->uid = config.uid; 1253 sbinfo->spool = NULL;
1262 sbinfo->gid = config.gid; 1254 sbinfo->uid = ctx->uid;
1263 sbinfo->mode = config.mode; 1255 sbinfo->gid = ctx->gid;
1256 sbinfo->mode = ctx->mode;
1264 1257
1265 /* 1258 /*
1266 * Allocate and initialize subpool if maximum or minimum size is 1259 * Allocate and initialize subpool if maximum or minimum size is
1267 * specified. Any needed reservations (for minimim size) are taken 1260 * specified. Any needed reservations (for minimim size) are taken
1268 * taken when the subpool is created. 1261 * taken when the subpool is created.
1269 */ 1262 */
1270 if (config.max_hpages != -1 || config.min_hpages != -1) { 1263 if (ctx->max_hpages != -1 || ctx->min_hpages != -1) {
1271 sbinfo->spool = hugepage_new_subpool(config.hstate, 1264 sbinfo->spool = hugepage_new_subpool(ctx->hstate,
1272 config.max_hpages, 1265 ctx->max_hpages,
1273 config.min_hpages); 1266 ctx->min_hpages);
1274 if (!sbinfo->spool) 1267 if (!sbinfo->spool)
1275 goto out_free; 1268 goto out_free;
1276 } 1269 }
1277 sb->s_maxbytes = MAX_LFS_FILESIZE; 1270 sb->s_maxbytes = MAX_LFS_FILESIZE;
1278 sb->s_blocksize = huge_page_size(config.hstate); 1271 sb->s_blocksize = huge_page_size(ctx->hstate);
1279 sb->s_blocksize_bits = huge_page_shift(config.hstate); 1272 sb->s_blocksize_bits = huge_page_shift(ctx->hstate);
1280 sb->s_magic = HUGETLBFS_MAGIC; 1273 sb->s_magic = HUGETLBFS_MAGIC;
1281 sb->s_op = &hugetlbfs_ops; 1274 sb->s_op = &hugetlbfs_ops;
1282 sb->s_time_gran = 1; 1275 sb->s_time_gran = 1;
1283 sb->s_root = d_make_root(hugetlbfs_get_root(sb, &config)); 1276 sb->s_root = d_make_root(hugetlbfs_get_root(sb, ctx));
1284 if (!sb->s_root) 1277 if (!sb->s_root)
1285 goto out_free; 1278 goto out_free;
1286 return 0; 1279 return 0;
@@ -1290,16 +1283,52 @@ out_free:
1290 return -ENOMEM; 1283 return -ENOMEM;
1291} 1284}
1292 1285
1293static struct dentry *hugetlbfs_mount(struct file_system_type *fs_type, 1286static int hugetlbfs_get_tree(struct fs_context *fc)
1294 int flags, const char *dev_name, void *data) 1287{
1288 int err = hugetlbfs_validate(fc);
1289 if (err)
1290 return err;
1291 return vfs_get_super(fc, vfs_get_independent_super, hugetlbfs_fill_super);
1292}
1293
1294static void hugetlbfs_fs_context_free(struct fs_context *fc)
1295{
1296 kfree(fc->fs_private);
1297}
1298
1299static const struct fs_context_operations hugetlbfs_fs_context_ops = {
1300 .free = hugetlbfs_fs_context_free,
1301 .parse_param = hugetlbfs_parse_param,
1302 .get_tree = hugetlbfs_get_tree,
1303};
1304
1305static int hugetlbfs_init_fs_context(struct fs_context *fc)
1295{ 1306{
1296 return mount_nodev(fs_type, flags, data, hugetlbfs_fill_super); 1307 struct hugetlbfs_fs_context *ctx;
1308
1309 ctx = kzalloc(sizeof(struct hugetlbfs_fs_context), GFP_KERNEL);
1310 if (!ctx)
1311 return -ENOMEM;
1312
1313 ctx->max_hpages = -1; /* No limit on size by default */
1314 ctx->nr_inodes = -1; /* No limit on number of inodes by default */
1315 ctx->uid = current_fsuid();
1316 ctx->gid = current_fsgid();
1317 ctx->mode = 0755;
1318 ctx->hstate = &default_hstate;
1319 ctx->min_hpages = -1; /* No default minimum size */
1320 ctx->max_val_type = NO_SIZE;
1321 ctx->min_val_type = NO_SIZE;
1322 fc->fs_private = ctx;
1323 fc->ops = &hugetlbfs_fs_context_ops;
1324 return 0;
1297} 1325}
1298 1326
1299static struct file_system_type hugetlbfs_fs_type = { 1327static struct file_system_type hugetlbfs_fs_type = {
1300 .name = "hugetlbfs", 1328 .name = "hugetlbfs",
1301 .mount = hugetlbfs_mount, 1329 .init_fs_context = hugetlbfs_init_fs_context,
1302 .kill_sb = kill_litter_super, 1330 .parameters = &hugetlb_fs_parameters,
1331 .kill_sb = kill_litter_super,
1303}; 1332};
1304 1333
1305static struct vfsmount *hugetlbfs_vfsmount[HUGE_MAX_HSTATE]; 1334static struct vfsmount *hugetlbfs_vfsmount[HUGE_MAX_HSTATE];
@@ -1384,8 +1413,29 @@ out:
1384 return file; 1413 return file;
1385} 1414}
1386 1415
1416static struct vfsmount *__init mount_one_hugetlbfs(struct hstate *h)
1417{
1418 struct fs_context *fc;
1419 struct vfsmount *mnt;
1420
1421 fc = fs_context_for_mount(&hugetlbfs_fs_type, SB_KERNMOUNT);
1422 if (IS_ERR(fc)) {
1423 mnt = ERR_CAST(fc);
1424 } else {
1425 struct hugetlbfs_fs_context *ctx = fc->fs_private;
1426 ctx->hstate = h;
1427 mnt = fc_mount(fc);
1428 put_fs_context(fc);
1429 }
1430 if (IS_ERR(mnt))
1431 pr_err("Cannot mount internal hugetlbfs for page size %uK",
1432 1U << (h->order + PAGE_SHIFT - 10));
1433 return mnt;
1434}
1435
1387static int __init init_hugetlbfs_fs(void) 1436static int __init init_hugetlbfs_fs(void)
1388{ 1437{
1438 struct vfsmount *mnt;
1389 struct hstate *h; 1439 struct hstate *h;
1390 int error; 1440 int error;
1391 int i; 1441 int i;
@@ -1408,24 +1458,16 @@ static int __init init_hugetlbfs_fs(void)
1408 1458
1409 i = 0; 1459 i = 0;
1410 for_each_hstate(h) { 1460 for_each_hstate(h) {
1411 char buf[50]; 1461 mnt = mount_one_hugetlbfs(h);
1412 unsigned ps_kb = 1U << (h->order + PAGE_SHIFT - 10); 1462 if (IS_ERR(mnt) && i == 0) {
1413 1463 error = PTR_ERR(mnt);
1414 snprintf(buf, sizeof(buf), "pagesize=%uK", ps_kb); 1464 goto out;
1415 hugetlbfs_vfsmount[i] = kern_mount_data(&hugetlbfs_fs_type,
1416 buf);
1417
1418 if (IS_ERR(hugetlbfs_vfsmount[i])) {
1419 pr_err("Cannot mount internal hugetlbfs for "
1420 "page size %uK", ps_kb);
1421 error = PTR_ERR(hugetlbfs_vfsmount[i]);
1422 hugetlbfs_vfsmount[i] = NULL;
1423 } 1465 }
1466 hugetlbfs_vfsmount[i] = mnt;
1424 i++; 1467 i++;
1425 } 1468 }
1426 /* Non default hstates are optional */ 1469
1427 if (!IS_ERR_OR_NULL(hugetlbfs_vfsmount[default_hstate_idx])) 1470 return 0;
1428 return 0;
1429 1471
1430 out: 1472 out:
1431 kmem_cache_destroy(hugetlbfs_inode_cachep); 1473 kmem_cache_destroy(hugetlbfs_inode_cachep);
diff --git a/fs/internal.h b/fs/internal.h
index d410186bc369..6a8b71643af4 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -17,6 +17,7 @@ struct linux_binprm;
17struct path; 17struct path;
18struct mount; 18struct mount;
19struct shrink_control; 19struct shrink_control;
20struct fs_context;
20 21
21/* 22/*
22 * block_dev.c 23 * block_dev.c
@@ -52,8 +53,16 @@ int __generic_write_end(struct inode *inode, loff_t pos, unsigned copied,
52extern void __init chrdev_init(void); 53extern void __init chrdev_init(void);
53 54
54/* 55/*
56 * fs_context.c
57 */
58extern int parse_monolithic_mount_data(struct fs_context *, void *);
59extern void fc_drop_locked(struct fs_context *);
60
61/*
55 * namei.c 62 * namei.c
56 */ 63 */
64extern int filename_lookup(int dfd, struct filename *name, unsigned flags,
65 struct path *path, struct path *root);
57extern int user_path_mountpoint_at(int, const char __user *, unsigned int, struct path *); 66extern int user_path_mountpoint_at(int, const char __user *, unsigned int, struct path *);
58extern int vfs_path_lookup(struct dentry *, struct vfsmount *, 67extern int vfs_path_lookup(struct dentry *, struct vfsmount *,
59 const char *, unsigned int, struct path *); 68 const char *, unsigned int, struct path *);
@@ -99,10 +108,8 @@ extern struct file *alloc_empty_file_noaccount(int, const struct cred *);
99/* 108/*
100 * super.c 109 * super.c
101 */ 110 */
102extern int do_remount_sb(struct super_block *, int, void *, int); 111extern int reconfigure_super(struct fs_context *);
103extern bool trylock_super(struct super_block *sb); 112extern bool trylock_super(struct super_block *sb);
104extern struct dentry *mount_fs(struct file_system_type *,
105 int, const char *, void *);
106extern struct super_block *user_get_super(dev_t); 113extern struct super_block *user_get_super(dev_t);
107 114
108/* 115/*
diff --git a/fs/kernfs/kernfs-internal.h b/fs/kernfs/kernfs-internal.h
index dba810cd83b1..0b7d197a904c 100644
--- a/fs/kernfs/kernfs-internal.h
+++ b/fs/kernfs/kernfs-internal.h
@@ -17,6 +17,7 @@
17#include <linux/xattr.h> 17#include <linux/xattr.h>
18 18
19#include <linux/kernfs.h> 19#include <linux/kernfs.h>
20#include <linux/fs_context.h>
20 21
21struct kernfs_iattrs { 22struct kernfs_iattrs {
22 struct iattr ia_iattr; 23 struct iattr ia_iattr;
diff --git a/fs/kernfs/mount.c b/fs/kernfs/mount.c
index f3ac352699cf..9a4646eecb71 100644
--- a/fs/kernfs/mount.c
+++ b/fs/kernfs/mount.c
@@ -22,16 +22,6 @@
22 22
23struct kmem_cache *kernfs_node_cache, *kernfs_iattrs_cache; 23struct kmem_cache *kernfs_node_cache, *kernfs_iattrs_cache;
24 24
25static int kernfs_sop_remount_fs(struct super_block *sb, int *flags, char *data)
26{
27 struct kernfs_root *root = kernfs_info(sb)->root;
28 struct kernfs_syscall_ops *scops = root->syscall_ops;
29
30 if (scops && scops->remount_fs)
31 return scops->remount_fs(root, flags, data);
32 return 0;
33}
34
35static int kernfs_sop_show_options(struct seq_file *sf, struct dentry *dentry) 25static int kernfs_sop_show_options(struct seq_file *sf, struct dentry *dentry)
36{ 26{
37 struct kernfs_root *root = kernfs_root(kernfs_dentry_node(dentry)); 27 struct kernfs_root *root = kernfs_root(kernfs_dentry_node(dentry));
@@ -60,7 +50,6 @@ const struct super_operations kernfs_sops = {
60 .drop_inode = generic_delete_inode, 50 .drop_inode = generic_delete_inode,
61 .evict_inode = kernfs_evict_inode, 51 .evict_inode = kernfs_evict_inode,
62 52
63 .remount_fs = kernfs_sop_remount_fs,
64 .show_options = kernfs_sop_show_options, 53 .show_options = kernfs_sop_show_options,
65 .show_path = kernfs_sop_show_path, 54 .show_path = kernfs_sop_show_path,
66}; 55};
@@ -222,7 +211,7 @@ struct dentry *kernfs_node_dentry(struct kernfs_node *kn,
222 } while (true); 211 } while (true);
223} 212}
224 213
225static int kernfs_fill_super(struct super_block *sb, unsigned long magic) 214static int kernfs_fill_super(struct super_block *sb, struct kernfs_fs_context *kfc)
226{ 215{
227 struct kernfs_super_info *info = kernfs_info(sb); 216 struct kernfs_super_info *info = kernfs_info(sb);
228 struct inode *inode; 217 struct inode *inode;
@@ -233,7 +222,7 @@ static int kernfs_fill_super(struct super_block *sb, unsigned long magic)
233 sb->s_iflags |= SB_I_NOEXEC | SB_I_NODEV; 222 sb->s_iflags |= SB_I_NOEXEC | SB_I_NODEV;
234 sb->s_blocksize = PAGE_SIZE; 223 sb->s_blocksize = PAGE_SIZE;
235 sb->s_blocksize_bits = PAGE_SHIFT; 224 sb->s_blocksize_bits = PAGE_SHIFT;
236 sb->s_magic = magic; 225 sb->s_magic = kfc->magic;
237 sb->s_op = &kernfs_sops; 226 sb->s_op = &kernfs_sops;
238 sb->s_xattr = kernfs_xattr_handlers; 227 sb->s_xattr = kernfs_xattr_handlers;
239 if (info->root->flags & KERNFS_ROOT_SUPPORT_EXPORTOP) 228 if (info->root->flags & KERNFS_ROOT_SUPPORT_EXPORTOP)
@@ -263,21 +252,20 @@ static int kernfs_fill_super(struct super_block *sb, unsigned long magic)
263 return 0; 252 return 0;
264} 253}
265 254
266static int kernfs_test_super(struct super_block *sb, void *data) 255static int kernfs_test_super(struct super_block *sb, struct fs_context *fc)
267{ 256{
268 struct kernfs_super_info *sb_info = kernfs_info(sb); 257 struct kernfs_super_info *sb_info = kernfs_info(sb);
269 struct kernfs_super_info *info = data; 258 struct kernfs_super_info *info = fc->s_fs_info;
270 259
271 return sb_info->root == info->root && sb_info->ns == info->ns; 260 return sb_info->root == info->root && sb_info->ns == info->ns;
272} 261}
273 262
274static int kernfs_set_super(struct super_block *sb, void *data) 263static int kernfs_set_super(struct super_block *sb, struct fs_context *fc)
275{ 264{
276 int error; 265 struct kernfs_fs_context *kfc = fc->fs_private;
277 error = set_anon_super(sb, data); 266
278 if (!error) 267 kfc->ns_tag = NULL;
279 sb->s_fs_info = data; 268 return set_anon_super_fc(sb, fc);
280 return error;
281} 269}
282 270
283/** 271/**
@@ -294,63 +282,60 @@ const void *kernfs_super_ns(struct super_block *sb)
294} 282}
295 283
296/** 284/**
297 * kernfs_mount_ns - kernfs mount helper 285 * kernfs_get_tree - kernfs filesystem access/retrieval helper
298 * @fs_type: file_system_type of the fs being mounted 286 * @fc: The filesystem context.
299 * @flags: mount flags specified for the mount
300 * @root: kernfs_root of the hierarchy being mounted
301 * @magic: file system specific magic number
302 * @new_sb_created: tell the caller if we allocated a new superblock
303 * @ns: optional namespace tag of the mount
304 * 287 *
305 * This is to be called from each kernfs user's file_system_type->mount() 288 * This is to be called from each kernfs user's fs_context->ops->get_tree()
306 * implementation, which should pass through the specified @fs_type and 289 * implementation, which should set the specified ->@fs_type and ->@flags, and
307 * @flags, and specify the hierarchy and namespace tag to mount via @root 290 * specify the hierarchy and namespace tag to mount via ->@root and ->@ns,
308 * and @ns, respectively. 291 * respectively.
309 *
310 * The return value can be passed to the vfs layer verbatim.
311 */ 292 */
312struct dentry *kernfs_mount_ns(struct file_system_type *fs_type, int flags, 293int kernfs_get_tree(struct fs_context *fc)
313 struct kernfs_root *root, unsigned long magic,
314 bool *new_sb_created, const void *ns)
315{ 294{
295 struct kernfs_fs_context *kfc = fc->fs_private;
316 struct super_block *sb; 296 struct super_block *sb;
317 struct kernfs_super_info *info; 297 struct kernfs_super_info *info;
318 int error; 298 int error;
319 299
320 info = kzalloc(sizeof(*info), GFP_KERNEL); 300 info = kzalloc(sizeof(*info), GFP_KERNEL);
321 if (!info) 301 if (!info)
322 return ERR_PTR(-ENOMEM); 302 return -ENOMEM;
323 303
324 info->root = root; 304 info->root = kfc->root;
325 info->ns = ns; 305 info->ns = kfc->ns_tag;
326 INIT_LIST_HEAD(&info->node); 306 INIT_LIST_HEAD(&info->node);
327 307
328 sb = sget_userns(fs_type, kernfs_test_super, kernfs_set_super, flags, 308 fc->s_fs_info = info;
329 &init_user_ns, info); 309 sb = sget_fc(fc, kernfs_test_super, kernfs_set_super);
330 if (IS_ERR(sb) || sb->s_fs_info != info)
331 kfree(info);
332 if (IS_ERR(sb)) 310 if (IS_ERR(sb))
333 return ERR_CAST(sb); 311 return PTR_ERR(sb);
334
335 if (new_sb_created)
336 *new_sb_created = !sb->s_root;
337 312
338 if (!sb->s_root) { 313 if (!sb->s_root) {
339 struct kernfs_super_info *info = kernfs_info(sb); 314 struct kernfs_super_info *info = kernfs_info(sb);
340 315
341 error = kernfs_fill_super(sb, magic); 316 kfc->new_sb_created = true;
317
318 error = kernfs_fill_super(sb, kfc);
342 if (error) { 319 if (error) {
343 deactivate_locked_super(sb); 320 deactivate_locked_super(sb);
344 return ERR_PTR(error); 321 return error;
345 } 322 }
346 sb->s_flags |= SB_ACTIVE; 323 sb->s_flags |= SB_ACTIVE;
347 324
348 mutex_lock(&kernfs_mutex); 325 mutex_lock(&kernfs_mutex);
349 list_add(&info->node, &root->supers); 326 list_add(&info->node, &info->root->supers);
350 mutex_unlock(&kernfs_mutex); 327 mutex_unlock(&kernfs_mutex);
351 } 328 }
352 329
353 return dget(sb->s_root); 330 fc->root = dget(sb->s_root);
331 return 0;
332}
333
334void kernfs_free_fs_context(struct fs_context *fc)
335{
336 /* Note that we don't deal with kfc->ns_tag here. */
337 kfree(fc->s_fs_info);
338 fc->s_fs_info = NULL;
354} 339}
355 340
356/** 341/**
@@ -377,36 +362,6 @@ void kernfs_kill_sb(struct super_block *sb)
377 kfree(info); 362 kfree(info);
378} 363}
379 364
380/**
381 * kernfs_pin_sb: try to pin the superblock associated with a kernfs_root
382 * @kernfs_root: the kernfs_root in question
383 * @ns: the namespace tag
384 *
385 * Pin the superblock so the superblock won't be destroyed in subsequent
386 * operations. This can be used to block ->kill_sb() which may be useful
387 * for kernfs users which dynamically manage superblocks.
388 *
389 * Returns NULL if there's no superblock associated to this kernfs_root, or
390 * -EINVAL if the superblock is being freed.
391 */
392struct super_block *kernfs_pin_sb(struct kernfs_root *root, const void *ns)
393{
394 struct kernfs_super_info *info;
395 struct super_block *sb = NULL;
396
397 mutex_lock(&kernfs_mutex);
398 list_for_each_entry(info, &root->supers, node) {
399 if (info->ns == ns) {
400 sb = info->sb;
401 if (!atomic_inc_not_zero(&info->sb->s_active))
402 sb = ERR_PTR(-EINVAL);
403 break;
404 }
405 }
406 mutex_unlock(&kernfs_mutex);
407 return sb;
408}
409
410void __init kernfs_init(void) 365void __init kernfs_init(void)
411{ 366{
412 367
diff --git a/fs/mount.h b/fs/mount.h
index f39bc9da4d73..6250de544760 100644
--- a/fs/mount.h
+++ b/fs/mount.h
@@ -146,3 +146,8 @@ static inline bool is_local_mountpoint(struct dentry *dentry)
146 146
147 return __is_local_mountpoint(dentry); 147 return __is_local_mountpoint(dentry);
148} 148}
149
150static inline bool is_anon_ns(struct mnt_namespace *ns)
151{
152 return ns->seq == 0;
153}
diff --git a/fs/namei.c b/fs/namei.c
index 3662a09830be..dede0147b3f6 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2331,8 +2331,8 @@ static int path_lookupat(struct nameidata *nd, unsigned flags, struct path *path
2331 return err; 2331 return err;
2332} 2332}
2333 2333
2334static int filename_lookup(int dfd, struct filename *name, unsigned flags, 2334int filename_lookup(int dfd, struct filename *name, unsigned flags,
2335 struct path *path, struct path *root) 2335 struct path *path, struct path *root)
2336{ 2336{
2337 int retval; 2337 int retval;
2338 struct nameidata nd; 2338 struct nameidata nd;
diff --git a/fs/namespace.c b/fs/namespace.c
index 98a8c182af4f..c9cab307fa77 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -27,6 +27,7 @@
27#include <linux/task_work.h> 27#include <linux/task_work.h>
28#include <linux/sched/task.h> 28#include <linux/sched/task.h>
29#include <uapi/linux/mount.h> 29#include <uapi/linux/mount.h>
30#include <linux/fs_context.h>
30 31
31#include "pnode.h" 32#include "pnode.h"
32#include "internal.h" 33#include "internal.h"
@@ -940,38 +941,81 @@ static struct mount *skip_mnt_tree(struct mount *p)
940 return p; 941 return p;
941} 942}
942 943
943struct vfsmount * 944/**
944vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void *data) 945 * vfs_create_mount - Create a mount for a configured superblock
946 * @fc: The configuration context with the superblock attached
947 *
948 * Create a mount to an already configured superblock. If necessary, the
949 * caller should invoke vfs_get_tree() before calling this.
950 *
951 * Note that this does not attach the mount to anything.
952 */
953struct vfsmount *vfs_create_mount(struct fs_context *fc)
945{ 954{
946 struct mount *mnt; 955 struct mount *mnt;
947 struct dentry *root;
948 956
949 if (!type) 957 if (!fc->root)
950 return ERR_PTR(-ENODEV); 958 return ERR_PTR(-EINVAL);
951 959
952 mnt = alloc_vfsmnt(name); 960 mnt = alloc_vfsmnt(fc->source ?: "none");
953 if (!mnt) 961 if (!mnt)
954 return ERR_PTR(-ENOMEM); 962 return ERR_PTR(-ENOMEM);
955 963
956 if (flags & SB_KERNMOUNT) 964 if (fc->sb_flags & SB_KERNMOUNT)
957 mnt->mnt.mnt_flags = MNT_INTERNAL; 965 mnt->mnt.mnt_flags = MNT_INTERNAL;
958 966
959 root = mount_fs(type, flags, name, data); 967 atomic_inc(&fc->root->d_sb->s_active);
960 if (IS_ERR(root)) { 968 mnt->mnt.mnt_sb = fc->root->d_sb;
961 mnt_free_id(mnt); 969 mnt->mnt.mnt_root = dget(fc->root);
962 free_vfsmnt(mnt); 970 mnt->mnt_mountpoint = mnt->mnt.mnt_root;
963 return ERR_CAST(root); 971 mnt->mnt_parent = mnt;
964 }
965 972
966 mnt->mnt.mnt_root = root;
967 mnt->mnt.mnt_sb = root->d_sb;
968 mnt->mnt_mountpoint = mnt->mnt.mnt_root;
969 mnt->mnt_parent = mnt;
970 lock_mount_hash(); 973 lock_mount_hash();
971 list_add_tail(&mnt->mnt_instance, &root->d_sb->s_mounts); 974 list_add_tail(&mnt->mnt_instance, &mnt->mnt.mnt_sb->s_mounts);
972 unlock_mount_hash(); 975 unlock_mount_hash();
973 return &mnt->mnt; 976 return &mnt->mnt;
974} 977}
978EXPORT_SYMBOL(vfs_create_mount);
979
980struct vfsmount *fc_mount(struct fs_context *fc)
981{
982 int err = vfs_get_tree(fc);
983 if (!err) {
984 up_write(&fc->root->d_sb->s_umount);
985 return vfs_create_mount(fc);
986 }
987 return ERR_PTR(err);
988}
989EXPORT_SYMBOL(fc_mount);
990
991struct vfsmount *vfs_kern_mount(struct file_system_type *type,
992 int flags, const char *name,
993 void *data)
994{
995 struct fs_context *fc;
996 struct vfsmount *mnt;
997 int ret = 0;
998
999 if (!type)
1000 return ERR_PTR(-EINVAL);
1001
1002 fc = fs_context_for_mount(type, flags);
1003 if (IS_ERR(fc))
1004 return ERR_CAST(fc);
1005
1006 if (name)
1007 ret = vfs_parse_fs_string(fc, "source",
1008 name, strlen(name));
1009 if (!ret)
1010 ret = parse_monolithic_mount_data(fc, data);
1011 if (!ret)
1012 mnt = fc_mount(fc);
1013 else
1014 mnt = ERR_PTR(ret);
1015
1016 put_fs_context(fc);
1017 return mnt;
1018}
975EXPORT_SYMBOL_GPL(vfs_kern_mount); 1019EXPORT_SYMBOL_GPL(vfs_kern_mount);
976 1020
977struct vfsmount * 1021struct vfsmount *
@@ -1013,27 +1057,6 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root,
1013 1057
1014 mnt->mnt.mnt_flags = old->mnt.mnt_flags; 1058 mnt->mnt.mnt_flags = old->mnt.mnt_flags;
1015 mnt->mnt.mnt_flags &= ~(MNT_WRITE_HOLD|MNT_MARKED|MNT_INTERNAL); 1059 mnt->mnt.mnt_flags &= ~(MNT_WRITE_HOLD|MNT_MARKED|MNT_INTERNAL);
1016 /* Don't allow unprivileged users to change mount flags */
1017 if (flag & CL_UNPRIVILEGED) {
1018 mnt->mnt.mnt_flags |= MNT_LOCK_ATIME;
1019
1020 if (mnt->mnt.mnt_flags & MNT_READONLY)
1021 mnt->mnt.mnt_flags |= MNT_LOCK_READONLY;
1022
1023 if (mnt->mnt.mnt_flags & MNT_NODEV)
1024 mnt->mnt.mnt_flags |= MNT_LOCK_NODEV;
1025
1026 if (mnt->mnt.mnt_flags & MNT_NOSUID)
1027 mnt->mnt.mnt_flags |= MNT_LOCK_NOSUID;
1028
1029 if (mnt->mnt.mnt_flags & MNT_NOEXEC)
1030 mnt->mnt.mnt_flags |= MNT_LOCK_NOEXEC;
1031 }
1032
1033 /* Don't allow unprivileged users to reveal what is under a mount */
1034 if ((flag & CL_UNPRIVILEGED) &&
1035 (!(flag & CL_EXPIRE) || list_empty(&old->mnt_expire)))
1036 mnt->mnt.mnt_flags |= MNT_LOCKED;
1037 1060
1038 atomic_inc(&sb->s_active); 1061 atomic_inc(&sb->s_active);
1039 mnt->mnt.mnt_sb = sb; 1062 mnt->mnt.mnt_sb = sb;
@@ -1464,6 +1487,29 @@ static void umount_tree(struct mount *mnt, enum umount_tree_flags how)
1464 1487
1465static void shrink_submounts(struct mount *mnt); 1488static void shrink_submounts(struct mount *mnt);
1466 1489
1490static int do_umount_root(struct super_block *sb)
1491{
1492 int ret = 0;
1493
1494 down_write(&sb->s_umount);
1495 if (!sb_rdonly(sb)) {
1496 struct fs_context *fc;
1497
1498 fc = fs_context_for_reconfigure(sb->s_root, SB_RDONLY,
1499 SB_RDONLY);
1500 if (IS_ERR(fc)) {
1501 ret = PTR_ERR(fc);
1502 } else {
1503 ret = parse_monolithic_mount_data(fc, NULL);
1504 if (!ret)
1505 ret = reconfigure_super(fc);
1506 put_fs_context(fc);
1507 }
1508 }
1509 up_write(&sb->s_umount);
1510 return ret;
1511}
1512
1467static int do_umount(struct mount *mnt, int flags) 1513static int do_umount(struct mount *mnt, int flags)
1468{ 1514{
1469 struct super_block *sb = mnt->mnt.mnt_sb; 1515 struct super_block *sb = mnt->mnt.mnt_sb;
@@ -1529,11 +1575,7 @@ static int do_umount(struct mount *mnt, int flags)
1529 */ 1575 */
1530 if (!ns_capable(sb->s_user_ns, CAP_SYS_ADMIN)) 1576 if (!ns_capable(sb->s_user_ns, CAP_SYS_ADMIN))
1531 return -EPERM; 1577 return -EPERM;
1532 down_write(&sb->s_umount); 1578 return do_umount_root(sb);
1533 if (!sb_rdonly(sb))
1534 retval = do_remount_sb(sb, SB_RDONLY, NULL, 0);
1535 up_write(&sb->s_umount);
1536 return retval;
1537 } 1579 }
1538 1580
1539 namespace_lock(); 1581 namespace_lock();
@@ -1839,6 +1881,33 @@ int iterate_mounts(int (*f)(struct vfsmount *, void *), void *arg,
1839 return 0; 1881 return 0;
1840} 1882}
1841 1883
1884static void lock_mnt_tree(struct mount *mnt)
1885{
1886 struct mount *p;
1887
1888 for (p = mnt; p; p = next_mnt(p, mnt)) {
1889 int flags = p->mnt.mnt_flags;
1890 /* Don't allow unprivileged users to change mount flags */
1891 flags |= MNT_LOCK_ATIME;
1892
1893 if (flags & MNT_READONLY)
1894 flags |= MNT_LOCK_READONLY;
1895
1896 if (flags & MNT_NODEV)
1897 flags |= MNT_LOCK_NODEV;
1898
1899 if (flags & MNT_NOSUID)
1900 flags |= MNT_LOCK_NOSUID;
1901
1902 if (flags & MNT_NOEXEC)
1903 flags |= MNT_LOCK_NOEXEC;
1904 /* Don't allow unprivileged users to reveal what is under a mount */
1905 if (list_empty(&p->mnt_expire))
1906 flags |= MNT_LOCKED;
1907 p->mnt.mnt_flags = flags;
1908 }
1909}
1910
1842static void cleanup_group_ids(struct mount *mnt, struct mount *end) 1911static void cleanup_group_ids(struct mount *mnt, struct mount *end)
1843{ 1912{
1844 struct mount *p; 1913 struct mount *p;
@@ -1956,6 +2025,7 @@ static int attach_recursive_mnt(struct mount *source_mnt,
1956 struct mountpoint *dest_mp, 2025 struct mountpoint *dest_mp,
1957 struct path *parent_path) 2026 struct path *parent_path)
1958{ 2027{
2028 struct user_namespace *user_ns = current->nsproxy->mnt_ns->user_ns;
1959 HLIST_HEAD(tree_list); 2029 HLIST_HEAD(tree_list);
1960 struct mnt_namespace *ns = dest_mnt->mnt_ns; 2030 struct mnt_namespace *ns = dest_mnt->mnt_ns;
1961 struct mountpoint *smp; 2031 struct mountpoint *smp;
@@ -2006,6 +2076,9 @@ static int attach_recursive_mnt(struct mount *source_mnt,
2006 child->mnt_mountpoint); 2076 child->mnt_mountpoint);
2007 if (q) 2077 if (q)
2008 mnt_change_mountpoint(child, smp, q); 2078 mnt_change_mountpoint(child, smp, q);
2079 /* Notice when we are propagating across user namespaces */
2080 if (child->mnt_parent->mnt_ns->user_ns != user_ns)
2081 lock_mnt_tree(child);
2009 commit_tree(child); 2082 commit_tree(child);
2010 } 2083 }
2011 put_mountpoint(smp); 2084 put_mountpoint(smp);
@@ -2313,7 +2386,7 @@ static int do_remount(struct path *path, int ms_flags, int sb_flags,
2313 int err; 2386 int err;
2314 struct super_block *sb = path->mnt->mnt_sb; 2387 struct super_block *sb = path->mnt->mnt_sb;
2315 struct mount *mnt = real_mount(path->mnt); 2388 struct mount *mnt = real_mount(path->mnt);
2316 void *sec_opts = NULL; 2389 struct fs_context *fc;
2317 2390
2318 if (!check_mnt(mnt)) 2391 if (!check_mnt(mnt))
2319 return -EINVAL; 2392 return -EINVAL;
@@ -2324,24 +2397,22 @@ static int do_remount(struct path *path, int ms_flags, int sb_flags,
2324 if (!can_change_locked_flags(mnt, mnt_flags)) 2397 if (!can_change_locked_flags(mnt, mnt_flags))
2325 return -EPERM; 2398 return -EPERM;
2326 2399
2327 if (data && !(sb->s_type->fs_flags & FS_BINARY_MOUNTDATA)) { 2400 fc = fs_context_for_reconfigure(path->dentry, sb_flags, MS_RMT_MASK);
2328 err = security_sb_eat_lsm_opts(data, &sec_opts); 2401 if (IS_ERR(fc))
2329 if (err) 2402 return PTR_ERR(fc);
2330 return err;
2331 }
2332 err = security_sb_remount(sb, sec_opts);
2333 security_free_mnt_opts(&sec_opts);
2334 if (err)
2335 return err;
2336 2403
2337 down_write(&sb->s_umount); 2404 err = parse_monolithic_mount_data(fc, data);
2338 err = -EPERM; 2405 if (!err) {
2339 if (ns_capable(sb->s_user_ns, CAP_SYS_ADMIN)) { 2406 down_write(&sb->s_umount);
2340 err = do_remount_sb(sb, sb_flags, data, 0); 2407 err = -EPERM;
2341 if (!err) 2408 if (ns_capable(sb->s_user_ns, CAP_SYS_ADMIN)) {
2342 set_mount_attributes(mnt, mnt_flags); 2409 err = reconfigure_super(fc);
2410 if (!err)
2411 set_mount_attributes(mnt, mnt_flags);
2412 }
2413 up_write(&sb->s_umount);
2343 } 2414 }
2344 up_write(&sb->s_umount); 2415 put_fs_context(fc);
2345 return err; 2416 return err;
2346} 2417}
2347 2418
@@ -2425,29 +2496,6 @@ out:
2425 return err; 2496 return err;
2426} 2497}
2427 2498
2428static struct vfsmount *fs_set_subtype(struct vfsmount *mnt, const char *fstype)
2429{
2430 int err;
2431 const char *subtype = strchr(fstype, '.');
2432 if (subtype) {
2433 subtype++;
2434 err = -EINVAL;
2435 if (!subtype[0])
2436 goto err;
2437 } else
2438 subtype = "";
2439
2440 mnt->mnt_sb->s_subtype = kstrdup(subtype, GFP_KERNEL);
2441 err = -ENOMEM;
2442 if (!mnt->mnt_sb->s_subtype)
2443 goto err;
2444 return mnt;
2445
2446 err:
2447 mntput(mnt);
2448 return ERR_PTR(err);
2449}
2450
2451/* 2499/*
2452 * add a mount into a namespace's mount tree 2500 * add a mount into a namespace's mount tree
2453 */ 2501 */
@@ -2492,7 +2540,39 @@ unlock:
2492 return err; 2540 return err;
2493} 2541}
2494 2542
2495static bool mount_too_revealing(struct vfsmount *mnt, int *new_mnt_flags); 2543static bool mount_too_revealing(const struct super_block *sb, int *new_mnt_flags);
2544
2545/*
2546 * Create a new mount using a superblock configuration and request it
2547 * be added to the namespace tree.
2548 */
2549static int do_new_mount_fc(struct fs_context *fc, struct path *mountpoint,
2550 unsigned int mnt_flags)
2551{
2552 struct vfsmount *mnt;
2553 struct super_block *sb = fc->root->d_sb;
2554 int error;
2555
2556 error = security_sb_kern_mount(sb);
2557 if (!error && mount_too_revealing(sb, &mnt_flags))
2558 error = -EPERM;
2559
2560 if (unlikely(error)) {
2561 fc_drop_locked(fc);
2562 return error;
2563 }
2564
2565 up_write(&sb->s_umount);
2566
2567 mnt = vfs_create_mount(fc);
2568 if (IS_ERR(mnt))
2569 return PTR_ERR(mnt);
2570
2571 error = do_add_mount(real_mount(mnt), mountpoint, mnt_flags);
2572 if (error < 0)
2573 mntput(mnt);
2574 return error;
2575}
2496 2576
2497/* 2577/*
2498 * create a new mount for userspace and request it to be added into the 2578 * create a new mount for userspace and request it to be added into the
@@ -2502,8 +2582,9 @@ static int do_new_mount(struct path *path, const char *fstype, int sb_flags,
2502 int mnt_flags, const char *name, void *data) 2582 int mnt_flags, const char *name, void *data)
2503{ 2583{
2504 struct file_system_type *type; 2584 struct file_system_type *type;
2505 struct vfsmount *mnt; 2585 struct fs_context *fc;
2506 int err; 2586 const char *subtype = NULL;
2587 int err = 0;
2507 2588
2508 if (!fstype) 2589 if (!fstype)
2509 return -EINVAL; 2590 return -EINVAL;
@@ -2512,23 +2593,37 @@ static int do_new_mount(struct path *path, const char *fstype, int sb_flags,
2512 if (!type) 2593 if (!type)
2513 return -ENODEV; 2594 return -ENODEV;
2514 2595
2515 mnt = vfs_kern_mount(type, sb_flags, name, data); 2596 if (type->fs_flags & FS_HAS_SUBTYPE) {
2516 if (!IS_ERR(mnt) && (type->fs_flags & FS_HAS_SUBTYPE) && 2597 subtype = strchr(fstype, '.');
2517 !mnt->mnt_sb->s_subtype) 2598 if (subtype) {
2518 mnt = fs_set_subtype(mnt, fstype); 2599 subtype++;
2600 if (!*subtype) {
2601 put_filesystem(type);
2602 return -EINVAL;
2603 }
2604 } else {
2605 subtype = "";
2606 }
2607 }
2519 2608
2609 fc = fs_context_for_mount(type, sb_flags);
2520 put_filesystem(type); 2610 put_filesystem(type);
2521 if (IS_ERR(mnt)) 2611 if (IS_ERR(fc))
2522 return PTR_ERR(mnt); 2612 return PTR_ERR(fc);
2523 2613
2524 if (mount_too_revealing(mnt, &mnt_flags)) { 2614 if (subtype)
2525 mntput(mnt); 2615 err = vfs_parse_fs_string(fc, "subtype",
2526 return -EPERM; 2616 subtype, strlen(subtype));
2527 } 2617 if (!err && name)
2618 err = vfs_parse_fs_string(fc, "source", name, strlen(name));
2619 if (!err)
2620 err = parse_monolithic_mount_data(fc, data);
2621 if (!err)
2622 err = vfs_get_tree(fc);
2623 if (!err)
2624 err = do_new_mount_fc(fc, path, mnt_flags);
2528 2625
2529 err = do_add_mount(real_mount(mnt), path, mnt_flags); 2626 put_fs_context(fc);
2530 if (err)
2531 mntput(mnt);
2532 return err; 2627 return err;
2533} 2628}
2534 2629
@@ -2863,7 +2958,8 @@ static void dec_mnt_namespaces(struct ucounts *ucounts)
2863 2958
2864static void free_mnt_ns(struct mnt_namespace *ns) 2959static void free_mnt_ns(struct mnt_namespace *ns)
2865{ 2960{
2866 ns_free_inum(&ns->ns); 2961 if (!is_anon_ns(ns))
2962 ns_free_inum(&ns->ns);
2867 dec_mnt_namespaces(ns->ucounts); 2963 dec_mnt_namespaces(ns->ucounts);
2868 put_user_ns(ns->user_ns); 2964 put_user_ns(ns->user_ns);
2869 kfree(ns); 2965 kfree(ns);
@@ -2878,7 +2974,7 @@ static void free_mnt_ns(struct mnt_namespace *ns)
2878 */ 2974 */
2879static atomic64_t mnt_ns_seq = ATOMIC64_INIT(1); 2975static atomic64_t mnt_ns_seq = ATOMIC64_INIT(1);
2880 2976
2881static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns) 2977static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns, bool anon)
2882{ 2978{
2883 struct mnt_namespace *new_ns; 2979 struct mnt_namespace *new_ns;
2884 struct ucounts *ucounts; 2980 struct ucounts *ucounts;
@@ -2888,28 +2984,27 @@ static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns)
2888 if (!ucounts) 2984 if (!ucounts)
2889 return ERR_PTR(-ENOSPC); 2985 return ERR_PTR(-ENOSPC);
2890 2986
2891 new_ns = kmalloc(sizeof(struct mnt_namespace), GFP_KERNEL); 2987 new_ns = kzalloc(sizeof(struct mnt_namespace), GFP_KERNEL);
2892 if (!new_ns) { 2988 if (!new_ns) {
2893 dec_mnt_namespaces(ucounts); 2989 dec_mnt_namespaces(ucounts);
2894 return ERR_PTR(-ENOMEM); 2990 return ERR_PTR(-ENOMEM);
2895 } 2991 }
2896 ret = ns_alloc_inum(&new_ns->ns); 2992 if (!anon) {
2897 if (ret) { 2993 ret = ns_alloc_inum(&new_ns->ns);
2898 kfree(new_ns); 2994 if (ret) {
2899 dec_mnt_namespaces(ucounts); 2995 kfree(new_ns);
2900 return ERR_PTR(ret); 2996 dec_mnt_namespaces(ucounts);
2997 return ERR_PTR(ret);
2998 }
2901 } 2999 }
2902 new_ns->ns.ops = &mntns_operations; 3000 new_ns->ns.ops = &mntns_operations;
2903 new_ns->seq = atomic64_add_return(1, &mnt_ns_seq); 3001 if (!anon)
3002 new_ns->seq = atomic64_add_return(1, &mnt_ns_seq);
2904 atomic_set(&new_ns->count, 1); 3003 atomic_set(&new_ns->count, 1);
2905 new_ns->root = NULL;
2906 INIT_LIST_HEAD(&new_ns->list); 3004 INIT_LIST_HEAD(&new_ns->list);
2907 init_waitqueue_head(&new_ns->poll); 3005 init_waitqueue_head(&new_ns->poll);
2908 new_ns->event = 0;
2909 new_ns->user_ns = get_user_ns(user_ns); 3006 new_ns->user_ns = get_user_ns(user_ns);
2910 new_ns->ucounts = ucounts; 3007 new_ns->ucounts = ucounts;
2911 new_ns->mounts = 0;
2912 new_ns->pending_mounts = 0;
2913 return new_ns; 3008 return new_ns;
2914} 3009}
2915 3010
@@ -2933,7 +3028,7 @@ struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
2933 3028
2934 old = ns->root; 3029 old = ns->root;
2935 3030
2936 new_ns = alloc_mnt_ns(user_ns); 3031 new_ns = alloc_mnt_ns(user_ns, false);
2937 if (IS_ERR(new_ns)) 3032 if (IS_ERR(new_ns))
2938 return new_ns; 3033 return new_ns;
2939 3034
@@ -2941,13 +3036,18 @@ struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
2941 /* First pass: copy the tree topology */ 3036 /* First pass: copy the tree topology */
2942 copy_flags = CL_COPY_UNBINDABLE | CL_EXPIRE; 3037 copy_flags = CL_COPY_UNBINDABLE | CL_EXPIRE;
2943 if (user_ns != ns->user_ns) 3038 if (user_ns != ns->user_ns)
2944 copy_flags |= CL_SHARED_TO_SLAVE | CL_UNPRIVILEGED; 3039 copy_flags |= CL_SHARED_TO_SLAVE;
2945 new = copy_tree(old, old->mnt.mnt_root, copy_flags); 3040 new = copy_tree(old, old->mnt.mnt_root, copy_flags);
2946 if (IS_ERR(new)) { 3041 if (IS_ERR(new)) {
2947 namespace_unlock(); 3042 namespace_unlock();
2948 free_mnt_ns(new_ns); 3043 free_mnt_ns(new_ns);
2949 return ERR_CAST(new); 3044 return ERR_CAST(new);
2950 } 3045 }
3046 if (user_ns != ns->user_ns) {
3047 lock_mount_hash();
3048 lock_mnt_tree(new);
3049 unlock_mount_hash();
3050 }
2951 new_ns->root = new; 3051 new_ns->root = new;
2952 list_add_tail(&new_ns->list, &new->mnt_list); 3052 list_add_tail(&new_ns->list, &new->mnt_list);
2953 3053
@@ -2988,37 +3088,25 @@ struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
2988 return new_ns; 3088 return new_ns;
2989} 3089}
2990 3090
2991/** 3091struct dentry *mount_subtree(struct vfsmount *m, const char *name)
2992 * create_mnt_ns - creates a private namespace and adds a root filesystem
2993 * @mnt: pointer to the new root filesystem mountpoint
2994 */
2995static struct mnt_namespace *create_mnt_ns(struct vfsmount *m)
2996{
2997 struct mnt_namespace *new_ns = alloc_mnt_ns(&init_user_ns);
2998 if (!IS_ERR(new_ns)) {
2999 struct mount *mnt = real_mount(m);
3000 mnt->mnt_ns = new_ns;
3001 new_ns->root = mnt;
3002 new_ns->mounts++;
3003 list_add(&mnt->mnt_list, &new_ns->list);
3004 } else {
3005 mntput(m);
3006 }
3007 return new_ns;
3008}
3009
3010struct dentry *mount_subtree(struct vfsmount *mnt, const char *name)
3011{ 3092{
3093 struct mount *mnt = real_mount(m);
3012 struct mnt_namespace *ns; 3094 struct mnt_namespace *ns;
3013 struct super_block *s; 3095 struct super_block *s;
3014 struct path path; 3096 struct path path;
3015 int err; 3097 int err;
3016 3098
3017 ns = create_mnt_ns(mnt); 3099 ns = alloc_mnt_ns(&init_user_ns, true);
3018 if (IS_ERR(ns)) 3100 if (IS_ERR(ns)) {
3101 mntput(m);
3019 return ERR_CAST(ns); 3102 return ERR_CAST(ns);
3103 }
3104 mnt->mnt_ns = ns;
3105 ns->root = mnt;
3106 ns->mounts++;
3107 list_add(&mnt->mnt_list, &ns->list);
3020 3108
3021 err = vfs_path_lookup(mnt->mnt_root, mnt, 3109 err = vfs_path_lookup(m->mnt_root, m,
3022 name, LOOKUP_FOLLOW|LOOKUP_AUTOMOUNT, &path); 3110 name, LOOKUP_FOLLOW|LOOKUP_AUTOMOUNT, &path);
3023 3111
3024 put_mnt_ns(ns); 3112 put_mnt_ns(ns);
@@ -3228,6 +3316,7 @@ out0:
3228static void __init init_mount_tree(void) 3316static void __init init_mount_tree(void)
3229{ 3317{
3230 struct vfsmount *mnt; 3318 struct vfsmount *mnt;
3319 struct mount *m;
3231 struct mnt_namespace *ns; 3320 struct mnt_namespace *ns;
3232 struct path root; 3321 struct path root;
3233 struct file_system_type *type; 3322 struct file_system_type *type;
@@ -3240,10 +3329,14 @@ static void __init init_mount_tree(void)
3240 if (IS_ERR(mnt)) 3329 if (IS_ERR(mnt))
3241 panic("Can't create rootfs"); 3330 panic("Can't create rootfs");
3242 3331
3243 ns = create_mnt_ns(mnt); 3332 ns = alloc_mnt_ns(&init_user_ns, false);
3244 if (IS_ERR(ns)) 3333 if (IS_ERR(ns))
3245 panic("Can't allocate initial namespace"); 3334 panic("Can't allocate initial namespace");
3246 3335 m = real_mount(mnt);
3336 m->mnt_ns = ns;
3337 ns->root = m;
3338 ns->mounts = 1;
3339 list_add(&m->mnt_list, &ns->list);
3247 init_task.nsproxy->mnt_ns = ns; 3340 init_task.nsproxy->mnt_ns = ns;
3248 get_mnt_ns(ns); 3341 get_mnt_ns(ns);
3249 3342
@@ -3297,10 +3390,10 @@ void put_mnt_ns(struct mnt_namespace *ns)
3297 free_mnt_ns(ns); 3390 free_mnt_ns(ns);
3298} 3391}
3299 3392
3300struct vfsmount *kern_mount_data(struct file_system_type *type, void *data) 3393struct vfsmount *kern_mount(struct file_system_type *type)
3301{ 3394{
3302 struct vfsmount *mnt; 3395 struct vfsmount *mnt;
3303 mnt = vfs_kern_mount(type, SB_KERNMOUNT, type->name, data); 3396 mnt = vfs_kern_mount(type, SB_KERNMOUNT, type->name, NULL);
3304 if (!IS_ERR(mnt)) { 3397 if (!IS_ERR(mnt)) {
3305 /* 3398 /*
3306 * it is a longterm mount, don't release mnt until 3399 * it is a longterm mount, don't release mnt until
@@ -3310,7 +3403,7 @@ struct vfsmount *kern_mount_data(struct file_system_type *type, void *data)
3310 } 3403 }
3311 return mnt; 3404 return mnt;
3312} 3405}
3313EXPORT_SYMBOL_GPL(kern_mount_data); 3406EXPORT_SYMBOL_GPL(kern_mount);
3314 3407
3315void kern_unmount(struct vfsmount *mnt) 3408void kern_unmount(struct vfsmount *mnt)
3316{ 3409{
@@ -3352,7 +3445,8 @@ bool current_chrooted(void)
3352 return chrooted; 3445 return chrooted;
3353} 3446}
3354 3447
3355static bool mnt_already_visible(struct mnt_namespace *ns, struct vfsmount *new, 3448static bool mnt_already_visible(struct mnt_namespace *ns,
3449 const struct super_block *sb,
3356 int *new_mnt_flags) 3450 int *new_mnt_flags)
3357{ 3451{
3358 int new_flags = *new_mnt_flags; 3452 int new_flags = *new_mnt_flags;
@@ -3364,7 +3458,7 @@ static bool mnt_already_visible(struct mnt_namespace *ns, struct vfsmount *new,
3364 struct mount *child; 3458 struct mount *child;
3365 int mnt_flags; 3459 int mnt_flags;
3366 3460
3367 if (mnt->mnt.mnt_sb->s_type != new->mnt_sb->s_type) 3461 if (mnt->mnt.mnt_sb->s_type != sb->s_type)
3368 continue; 3462 continue;
3369 3463
3370 /* This mount is not fully visible if it's root directory 3464 /* This mount is not fully visible if it's root directory
@@ -3415,7 +3509,7 @@ found:
3415 return visible; 3509 return visible;
3416} 3510}
3417 3511
3418static bool mount_too_revealing(struct vfsmount *mnt, int *new_mnt_flags) 3512static bool mount_too_revealing(const struct super_block *sb, int *new_mnt_flags)
3419{ 3513{
3420 const unsigned long required_iflags = SB_I_NOEXEC | SB_I_NODEV; 3514 const unsigned long required_iflags = SB_I_NOEXEC | SB_I_NODEV;
3421 struct mnt_namespace *ns = current->nsproxy->mnt_ns; 3515 struct mnt_namespace *ns = current->nsproxy->mnt_ns;
@@ -3425,7 +3519,7 @@ static bool mount_too_revealing(struct vfsmount *mnt, int *new_mnt_flags)
3425 return false; 3519 return false;
3426 3520
3427 /* Can this filesystem be too revealing? */ 3521 /* Can this filesystem be too revealing? */
3428 s_iflags = mnt->mnt_sb->s_iflags; 3522 s_iflags = sb->s_iflags;
3429 if (!(s_iflags & SB_I_USERNS_VISIBLE)) 3523 if (!(s_iflags & SB_I_USERNS_VISIBLE))
3430 return false; 3524 return false;
3431 3525
@@ -3435,7 +3529,7 @@ static bool mount_too_revealing(struct vfsmount *mnt, int *new_mnt_flags)
3435 return true; 3529 return true;
3436 } 3530 }
3437 3531
3438 return !mnt_already_visible(ns, mnt, new_mnt_flags); 3532 return !mnt_already_visible(ns, sb, new_mnt_flags);
3439} 3533}
3440 3534
3441bool mnt_may_suid(struct vfsmount *mnt) 3535bool mnt_may_suid(struct vfsmount *mnt)
@@ -3484,6 +3578,9 @@ static int mntns_install(struct nsproxy *nsproxy, struct ns_common *ns)
3484 !ns_capable(current_user_ns(), CAP_SYS_ADMIN)) 3578 !ns_capable(current_user_ns(), CAP_SYS_ADMIN))
3485 return -EPERM; 3579 return -EPERM;
3486 3580
3581 if (is_anon_ns(mnt_ns))
3582 return -EINVAL;
3583
3487 if (fs->users != 1) 3584 if (fs->users != 1)
3488 return -EINVAL; 3585 return -EINVAL;
3489 3586
diff --git a/fs/pnode.c b/fs/pnode.c
index 1100e810d855..7ea6cfb65077 100644
--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -214,7 +214,6 @@ static struct mount *next_group(struct mount *m, struct mount *origin)
214} 214}
215 215
216/* all accesses are serialized by namespace_sem */ 216/* all accesses are serialized by namespace_sem */
217static struct user_namespace *user_ns;
218static struct mount *last_dest, *first_source, *last_source, *dest_master; 217static struct mount *last_dest, *first_source, *last_source, *dest_master;
219static struct mountpoint *mp; 218static struct mountpoint *mp;
220static struct hlist_head *list; 219static struct hlist_head *list;
@@ -260,9 +259,6 @@ static int propagate_one(struct mount *m)
260 type |= CL_MAKE_SHARED; 259 type |= CL_MAKE_SHARED;
261 } 260 }
262 261
263 /* Notice when we are propagating across user namespaces */
264 if (m->mnt_ns->user_ns != user_ns)
265 type |= CL_UNPRIVILEGED;
266 child = copy_tree(last_source, last_source->mnt.mnt_root, type); 262 child = copy_tree(last_source, last_source->mnt.mnt_root, type);
267 if (IS_ERR(child)) 263 if (IS_ERR(child))
268 return PTR_ERR(child); 264 return PTR_ERR(child);
@@ -303,7 +299,6 @@ int propagate_mnt(struct mount *dest_mnt, struct mountpoint *dest_mp,
303 * propagate_one(); everything is serialized by namespace_sem, 299 * propagate_one(); everything is serialized by namespace_sem,
304 * so globals will do just fine. 300 * so globals will do just fine.
305 */ 301 */
306 user_ns = current->nsproxy->mnt_ns->user_ns;
307 last_dest = dest_mnt; 302 last_dest = dest_mnt;
308 first_source = source_mnt; 303 first_source = source_mnt;
309 last_source = source_mnt; 304 last_source = source_mnt;
diff --git a/fs/pnode.h b/fs/pnode.h
index dc87e65becd2..3960a83666cf 100644
--- a/fs/pnode.h
+++ b/fs/pnode.h
@@ -27,8 +27,7 @@
27#define CL_MAKE_SHARED 0x08 27#define CL_MAKE_SHARED 0x08
28#define CL_PRIVATE 0x10 28#define CL_PRIVATE 0x10
29#define CL_SHARED_TO_SLAVE 0x20 29#define CL_SHARED_TO_SLAVE 0x20
30#define CL_UNPRIVILEGED 0x40 30#define CL_COPY_MNT_NS_FILE 0x40
31#define CL_COPY_MNT_NS_FILE 0x80
32 31
33#define CL_COPY_ALL (CL_COPY_UNBINDABLE | CL_COPY_MNT_NS_FILE) 32#define CL_COPY_ALL (CL_COPY_UNBINDABLE | CL_COPY_MNT_NS_FILE)
34 33
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index da649ccd6804..fc7e38def174 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -24,7 +24,6 @@
24#include <linux/seq_file.h> 24#include <linux/seq_file.h>
25#include <linux/slab.h> 25#include <linux/slab.h>
26#include <linux/mount.h> 26#include <linux/mount.h>
27#include <linux/magic.h>
28 27
29#include <linux/uaccess.h> 28#include <linux/uaccess.h>
30 29
@@ -122,13 +121,12 @@ static int proc_show_options(struct seq_file *seq, struct dentry *root)
122 return 0; 121 return 0;
123} 122}
124 123
125static const struct super_operations proc_sops = { 124const struct super_operations proc_sops = {
126 .alloc_inode = proc_alloc_inode, 125 .alloc_inode = proc_alloc_inode,
127 .destroy_inode = proc_destroy_inode, 126 .destroy_inode = proc_destroy_inode,
128 .drop_inode = generic_delete_inode, 127 .drop_inode = generic_delete_inode,
129 .evict_inode = proc_evict_inode, 128 .evict_inode = proc_evict_inode,
130 .statfs = simple_statfs, 129 .statfs = simple_statfs,
131 .remount_fs = proc_remount,
132 .show_options = proc_show_options, 130 .show_options = proc_show_options,
133}; 131};
134 132
@@ -488,51 +486,3 @@ struct inode *proc_get_inode(struct super_block *sb, struct proc_dir_entry *de)
488 pde_put(de); 486 pde_put(de);
489 return inode; 487 return inode;
490} 488}
491
492int proc_fill_super(struct super_block *s, void *data, int silent)
493{
494 struct pid_namespace *ns = get_pid_ns(s->s_fs_info);
495 struct inode *root_inode;
496 int ret;
497
498 if (!proc_parse_options(data, ns))
499 return -EINVAL;
500
501 /* User space would break if executables or devices appear on proc */
502 s->s_iflags |= SB_I_USERNS_VISIBLE | SB_I_NOEXEC | SB_I_NODEV;
503 s->s_flags |= SB_NODIRATIME | SB_NOSUID | SB_NOEXEC;
504 s->s_blocksize = 1024;
505 s->s_blocksize_bits = 10;
506 s->s_magic = PROC_SUPER_MAGIC;
507 s->s_op = &proc_sops;
508 s->s_time_gran = 1;
509
510 /*
511 * procfs isn't actually a stacking filesystem; however, there is
512 * too much magic going on inside it to permit stacking things on
513 * top of it
514 */
515 s->s_stack_depth = FILESYSTEM_MAX_STACK_DEPTH;
516
517 /* procfs dentries and inodes don't require IO to create */
518 s->s_shrink.seeks = 0;
519
520 pde_get(&proc_root);
521 root_inode = proc_get_inode(s, &proc_root);
522 if (!root_inode) {
523 pr_err("proc_fill_super: get root inode failed\n");
524 return -ENOMEM;
525 }
526
527 s->s_root = d_make_root(root_inode);
528 if (!s->s_root) {
529 pr_err("proc_fill_super: allocate dentry failed\n");
530 return -ENOMEM;
531 }
532
533 ret = proc_setup_self(s);
534 if (ret) {
535 return ret;
536 }
537 return proc_setup_thread_self(s);
538}
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index ea575375f210..d1671e97f7fe 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -207,13 +207,12 @@ struct pde_opener {
207 struct completion *c; 207 struct completion *c;
208} __randomize_layout; 208} __randomize_layout;
209extern const struct inode_operations proc_link_inode_operations; 209extern const struct inode_operations proc_link_inode_operations;
210
211extern const struct inode_operations proc_pid_link_inode_operations; 210extern const struct inode_operations proc_pid_link_inode_operations;
211extern const struct super_operations proc_sops;
212 212
213void proc_init_kmemcache(void); 213void proc_init_kmemcache(void);
214void set_proc_pid_nlink(void); 214void set_proc_pid_nlink(void);
215extern struct inode *proc_get_inode(struct super_block *, struct proc_dir_entry *); 215extern struct inode *proc_get_inode(struct super_block *, struct proc_dir_entry *);
216extern int proc_fill_super(struct super_block *, void *data, int flags);
217extern void proc_entry_rundown(struct proc_dir_entry *); 216extern void proc_entry_rundown(struct proc_dir_entry *);
218 217
219/* 218/*
@@ -271,10 +270,8 @@ static inline void proc_tty_init(void) {}
271 * root.c 270 * root.c
272 */ 271 */
273extern struct proc_dir_entry proc_root; 272extern struct proc_dir_entry proc_root;
274extern int proc_parse_options(char *options, struct pid_namespace *pid);
275 273
276extern void proc_self_init(void); 274extern void proc_self_init(void);
277extern int proc_remount(struct super_block *, int *, char *);
278 275
279/* 276/*
280 * task_[no]mmu.c 277 * task_[no]mmu.c
diff --git a/fs/proc/root.c b/fs/proc/root.c
index 621e6ec322ca..8b145e7b9661 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -19,86 +19,178 @@
19#include <linux/module.h> 19#include <linux/module.h>
20#include <linux/bitops.h> 20#include <linux/bitops.h>
21#include <linux/user_namespace.h> 21#include <linux/user_namespace.h>
22#include <linux/fs_context.h>
22#include <linux/mount.h> 23#include <linux/mount.h>
23#include <linux/pid_namespace.h> 24#include <linux/pid_namespace.h>
24#include <linux/parser.h> 25#include <linux/fs_parser.h>
25#include <linux/cred.h> 26#include <linux/cred.h>
27#include <linux/magic.h>
28#include <linux/slab.h>
26 29
27#include "internal.h" 30#include "internal.h"
28 31
29enum { 32struct proc_fs_context {
30 Opt_gid, Opt_hidepid, Opt_err, 33 struct pid_namespace *pid_ns;
34 unsigned int mask;
35 int hidepid;
36 int gid;
31}; 37};
32 38
33static const match_table_t tokens = { 39enum proc_param {
34 {Opt_hidepid, "hidepid=%u"}, 40 Opt_gid,
35 {Opt_gid, "gid=%u"}, 41 Opt_hidepid,
36 {Opt_err, NULL},
37}; 42};
38 43
39int proc_parse_options(char *options, struct pid_namespace *pid) 44static const struct fs_parameter_spec proc_param_specs[] = {
45 fsparam_u32("gid", Opt_gid),
46 fsparam_u32("hidepid", Opt_hidepid),
47 {}
48};
49
50static const struct fs_parameter_description proc_fs_parameters = {
51 .name = "proc",
52 .specs = proc_param_specs,
53};
54
55static int proc_parse_param(struct fs_context *fc, struct fs_parameter *param)
40{ 56{
41 char *p; 57 struct proc_fs_context *ctx = fc->fs_private;
42 substring_t args[MAX_OPT_ARGS]; 58 struct fs_parse_result result;
43 int option; 59 int opt;
44 60
45 if (!options) 61 opt = fs_parse(fc, &proc_fs_parameters, param, &result);
46 return 1; 62 if (opt < 0)
47 63 return opt;
48 while ((p = strsep(&options, ",")) != NULL) { 64
49 int token; 65 switch (opt) {
50 if (!*p) 66 case Opt_gid:
51 continue; 67 ctx->gid = result.uint_32;
52 68 break;
53 args[0].to = args[0].from = NULL; 69
54 token = match_token(p, tokens, args); 70 case Opt_hidepid:
55 switch (token) { 71 ctx->hidepid = result.uint_32;
56 case Opt_gid: 72 if (ctx->hidepid < HIDEPID_OFF ||
57 if (match_int(&args[0], &option)) 73 ctx->hidepid > HIDEPID_INVISIBLE)
58 return 0; 74 return invalf(fc, "proc: hidepid value must be between 0 and 2.\n");
59 pid->pid_gid = make_kgid(current_user_ns(), option); 75 break;
60 break; 76
61 case Opt_hidepid: 77 default:
62 if (match_int(&args[0], &option)) 78 return -EINVAL;
63 return 0;
64 if (option < HIDEPID_OFF ||
65 option > HIDEPID_INVISIBLE) {
66 pr_err("proc: hidepid value must be between 0 and 2.\n");
67 return 0;
68 }
69 pid->hide_pid = option;
70 break;
71 default:
72 pr_err("proc: unrecognized mount option \"%s\" "
73 "or missing value\n", p);
74 return 0;
75 }
76 } 79 }
77 80
78 return 1; 81 ctx->mask |= 1 << opt;
82 return 0;
79} 83}
80 84
81int proc_remount(struct super_block *sb, int *flags, char *data) 85static void proc_apply_options(struct super_block *s,
86 struct fs_context *fc,
87 struct pid_namespace *pid_ns,
88 struct user_namespace *user_ns)
82{ 89{
90 struct proc_fs_context *ctx = fc->fs_private;
91
92 if (ctx->mask & (1 << Opt_gid))
93 pid_ns->pid_gid = make_kgid(user_ns, ctx->gid);
94 if (ctx->mask & (1 << Opt_hidepid))
95 pid_ns->hide_pid = ctx->hidepid;
96}
97
98static int proc_fill_super(struct super_block *s, struct fs_context *fc)
99{
100 struct pid_namespace *pid_ns = get_pid_ns(s->s_fs_info);
101 struct inode *root_inode;
102 int ret;
103
104 proc_apply_options(s, fc, pid_ns, current_user_ns());
105
106 /* User space would break if executables or devices appear on proc */
107 s->s_iflags |= SB_I_USERNS_VISIBLE | SB_I_NOEXEC | SB_I_NODEV;
108 s->s_flags |= SB_NODIRATIME | SB_NOSUID | SB_NOEXEC;
109 s->s_blocksize = 1024;
110 s->s_blocksize_bits = 10;
111 s->s_magic = PROC_SUPER_MAGIC;
112 s->s_op = &proc_sops;
113 s->s_time_gran = 1;
114
115 /*
116 * procfs isn't actually a stacking filesystem; however, there is
117 * too much magic going on inside it to permit stacking things on
118 * top of it
119 */
120 s->s_stack_depth = FILESYSTEM_MAX_STACK_DEPTH;
121
122 /* procfs dentries and inodes don't require IO to create */
123 s->s_shrink.seeks = 0;
124
125 pde_get(&proc_root);
126 root_inode = proc_get_inode(s, &proc_root);
127 if (!root_inode) {
128 pr_err("proc_fill_super: get root inode failed\n");
129 return -ENOMEM;
130 }
131
132 s->s_root = d_make_root(root_inode);
133 if (!s->s_root) {
134 pr_err("proc_fill_super: allocate dentry failed\n");
135 return -ENOMEM;
136 }
137
138 ret = proc_setup_self(s);
139 if (ret) {
140 return ret;
141 }
142 return proc_setup_thread_self(s);
143}
144
145static int proc_reconfigure(struct fs_context *fc)
146{
147 struct super_block *sb = fc->root->d_sb;
83 struct pid_namespace *pid = sb->s_fs_info; 148 struct pid_namespace *pid = sb->s_fs_info;
84 149
85 sync_filesystem(sb); 150 sync_filesystem(sb);
86 return !proc_parse_options(data, pid); 151
152 proc_apply_options(sb, fc, pid, current_user_ns());
153 return 0;
87} 154}
88 155
89static struct dentry *proc_mount(struct file_system_type *fs_type, 156static int proc_get_tree(struct fs_context *fc)
90 int flags, const char *dev_name, void *data)
91{ 157{
92 struct pid_namespace *ns; 158 struct proc_fs_context *ctx = fc->fs_private;
93 159
94 if (flags & SB_KERNMOUNT) { 160 put_user_ns(fc->user_ns);
95 ns = data; 161 fc->user_ns = get_user_ns(ctx->pid_ns->user_ns);
96 data = NULL; 162 fc->s_fs_info = ctx->pid_ns;
97 } else { 163 return vfs_get_super(fc, vfs_get_keyed_super, proc_fill_super);
98 ns = task_active_pid_ns(current); 164}
99 }
100 165
101 return mount_ns(fs_type, flags, data, ns, ns->user_ns, proc_fill_super); 166static void proc_fs_context_free(struct fs_context *fc)
167{
168 struct proc_fs_context *ctx = fc->fs_private;
169
170 if (ctx->pid_ns)
171 put_pid_ns(ctx->pid_ns);
172 kfree(ctx);
173}
174
175static const struct fs_context_operations proc_fs_context_ops = {
176 .free = proc_fs_context_free,
177 .parse_param = proc_parse_param,
178 .get_tree = proc_get_tree,
179 .reconfigure = proc_reconfigure,
180};
181
182static int proc_init_fs_context(struct fs_context *fc)
183{
184 struct proc_fs_context *ctx;
185
186 ctx = kzalloc(sizeof(struct proc_fs_context), GFP_KERNEL);
187 if (!ctx)
188 return -ENOMEM;
189
190 ctx->pid_ns = get_pid_ns(task_active_pid_ns(current));
191 fc->fs_private = ctx;
192 fc->ops = &proc_fs_context_ops;
193 return 0;
102} 194}
103 195
104static void proc_kill_sb(struct super_block *sb) 196static void proc_kill_sb(struct super_block *sb)
@@ -115,10 +207,11 @@ static void proc_kill_sb(struct super_block *sb)
115} 207}
116 208
117static struct file_system_type proc_fs_type = { 209static struct file_system_type proc_fs_type = {
118 .name = "proc", 210 .name = "proc",
119 .mount = proc_mount, 211 .init_fs_context = proc_init_fs_context,
120 .kill_sb = proc_kill_sb, 212 .parameters = &proc_fs_parameters,
121 .fs_flags = FS_USERNS_MOUNT, 213 .kill_sb = proc_kill_sb,
214 .fs_flags = FS_USERNS_MOUNT,
122}; 215};
123 216
124void __init proc_root_init(void) 217void __init proc_root_init(void)
@@ -156,7 +249,7 @@ static struct dentry *proc_root_lookup(struct inode * dir, struct dentry * dentr
156{ 249{
157 if (!proc_pid_lookup(dentry, flags)) 250 if (!proc_pid_lookup(dentry, flags))
158 return NULL; 251 return NULL;
159 252
160 return proc_lookup(dir, dentry, flags); 253 return proc_lookup(dir, dentry, flags);
161} 254}
162 255
@@ -209,9 +302,28 @@ struct proc_dir_entry proc_root = {
209 302
210int pid_ns_prepare_proc(struct pid_namespace *ns) 303int pid_ns_prepare_proc(struct pid_namespace *ns)
211{ 304{
305 struct proc_fs_context *ctx;
306 struct fs_context *fc;
212 struct vfsmount *mnt; 307 struct vfsmount *mnt;
213 308
214 mnt = kern_mount_data(&proc_fs_type, ns); 309 fc = fs_context_for_mount(&proc_fs_type, SB_KERNMOUNT);
310 if (IS_ERR(fc))
311 return PTR_ERR(fc);
312
313 if (fc->user_ns != ns->user_ns) {
314 put_user_ns(fc->user_ns);
315 fc->user_ns = get_user_ns(ns->user_ns);
316 }
317
318 ctx = fc->fs_private;
319 if (ctx->pid_ns != ns) {
320 put_pid_ns(ctx->pid_ns);
321 get_pid_ns(ns);
322 ctx->pid_ns = ns;
323 }
324
325 mnt = fc_mount(fc);
326 put_fs_context(fc);
215 if (IS_ERR(mnt)) 327 if (IS_ERR(mnt))
216 return PTR_ERR(mnt); 328 return PTR_ERR(mnt);
217 329
diff --git a/fs/super.c b/fs/super.c
index 48e25eba8465..583a0124bc39 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -35,6 +35,7 @@
35#include <linux/fsnotify.h> 35#include <linux/fsnotify.h>
36#include <linux/lockdep.h> 36#include <linux/lockdep.h>
37#include <linux/user_namespace.h> 37#include <linux/user_namespace.h>
38#include <linux/fs_context.h>
38#include <uapi/linux/mount.h> 39#include <uapi/linux/mount.h>
39#include "internal.h" 40#include "internal.h"
40 41
@@ -476,6 +477,94 @@ void generic_shutdown_super(struct super_block *sb)
476EXPORT_SYMBOL(generic_shutdown_super); 477EXPORT_SYMBOL(generic_shutdown_super);
477 478
478/** 479/**
480 * sget_fc - Find or create a superblock
481 * @fc: Filesystem context.
482 * @test: Comparison callback
483 * @set: Setup callback
484 *
485 * Find or create a superblock using the parameters stored in the filesystem
486 * context and the two callback functions.
487 *
488 * If an extant superblock is matched, then that will be returned with an
489 * elevated reference count that the caller must transfer or discard.
490 *
491 * If no match is made, a new superblock will be allocated and basic
492 * initialisation will be performed (s_type, s_fs_info and s_id will be set and
493 * the set() callback will be invoked), the superblock will be published and it
494 * will be returned in a partially constructed state with SB_BORN and SB_ACTIVE
495 * as yet unset.
496 */
497struct super_block *sget_fc(struct fs_context *fc,
498 int (*test)(struct super_block *, struct fs_context *),
499 int (*set)(struct super_block *, struct fs_context *))
500{
501 struct super_block *s = NULL;
502 struct super_block *old;
503 struct user_namespace *user_ns = fc->global ? &init_user_ns : fc->user_ns;
504 int err;
505
506 if (!(fc->sb_flags & SB_KERNMOUNT) &&
507 fc->purpose != FS_CONTEXT_FOR_SUBMOUNT) {
508 /* Don't allow mounting unless the caller has CAP_SYS_ADMIN
509 * over the namespace.
510 */
511 if (!(fc->fs_type->fs_flags & FS_USERNS_MOUNT)) {
512 if (!capable(CAP_SYS_ADMIN))
513 return ERR_PTR(-EPERM);
514 } else {
515 if (!ns_capable(fc->user_ns, CAP_SYS_ADMIN))
516 return ERR_PTR(-EPERM);
517 }
518 }
519
520retry:
521 spin_lock(&sb_lock);
522 if (test) {
523 hlist_for_each_entry(old, &fc->fs_type->fs_supers, s_instances) {
524 if (test(old, fc))
525 goto share_extant_sb;
526 }
527 }
528 if (!s) {
529 spin_unlock(&sb_lock);
530 s = alloc_super(fc->fs_type, fc->sb_flags, user_ns);
531 if (!s)
532 return ERR_PTR(-ENOMEM);
533 goto retry;
534 }
535
536 s->s_fs_info = fc->s_fs_info;
537 err = set(s, fc);
538 if (err) {
539 s->s_fs_info = NULL;
540 spin_unlock(&sb_lock);
541 destroy_unused_super(s);
542 return ERR_PTR(err);
543 }
544 fc->s_fs_info = NULL;
545 s->s_type = fc->fs_type;
546 strlcpy(s->s_id, s->s_type->name, sizeof(s->s_id));
547 list_add_tail(&s->s_list, &super_blocks);
548 hlist_add_head(&s->s_instances, &s->s_type->fs_supers);
549 spin_unlock(&sb_lock);
550 get_filesystem(s->s_type);
551 register_shrinker_prepared(&s->s_shrink);
552 return s;
553
554share_extant_sb:
555 if (user_ns != old->s_user_ns) {
556 spin_unlock(&sb_lock);
557 destroy_unused_super(s);
558 return ERR_PTR(-EBUSY);
559 }
560 if (!grab_super(old))
561 goto retry;
562 destroy_unused_super(s);
563 return old;
564}
565EXPORT_SYMBOL(sget_fc);
566
567/**
479 * sget_userns - find or create a superblock 568 * sget_userns - find or create a superblock
480 * @type: filesystem type superblock should belong to 569 * @type: filesystem type superblock should belong to
481 * @test: comparison callback 570 * @test: comparison callback
@@ -835,28 +924,35 @@ rescan:
835} 924}
836 925
837/** 926/**
838 * do_remount_sb - asks filesystem to change mount options. 927 * reconfigure_super - asks filesystem to change superblock parameters
839 * @sb: superblock in question 928 * @fc: The superblock and configuration
840 * @sb_flags: revised superblock flags
841 * @data: the rest of options
842 * @force: whether or not to force the change
843 * 929 *
844 * Alters the mount options of a mounted file system. 930 * Alters the configuration parameters of a live superblock.
845 */ 931 */
846int do_remount_sb(struct super_block *sb, int sb_flags, void *data, int force) 932int reconfigure_super(struct fs_context *fc)
847{ 933{
934 struct super_block *sb = fc->root->d_sb;
848 int retval; 935 int retval;
849 int remount_ro; 936 bool remount_ro = false;
937 bool force = fc->sb_flags & SB_FORCE;
850 938
939 if (fc->sb_flags_mask & ~MS_RMT_MASK)
940 return -EINVAL;
851 if (sb->s_writers.frozen != SB_UNFROZEN) 941 if (sb->s_writers.frozen != SB_UNFROZEN)
852 return -EBUSY; 942 return -EBUSY;
853 943
944 retval = security_sb_remount(sb, fc->security);
945 if (retval)
946 return retval;
947
948 if (fc->sb_flags_mask & SB_RDONLY) {
854#ifdef CONFIG_BLOCK 949#ifdef CONFIG_BLOCK
855 if (!(sb_flags & SB_RDONLY) && bdev_read_only(sb->s_bdev)) 950 if (!(fc->sb_flags & SB_RDONLY) && bdev_read_only(sb->s_bdev))
856 return -EACCES; 951 return -EACCES;
857#endif 952#endif
858 953
859 remount_ro = (sb_flags & SB_RDONLY) && !sb_rdonly(sb); 954 remount_ro = (fc->sb_flags & SB_RDONLY) && !sb_rdonly(sb);
955 }
860 956
861 if (remount_ro) { 957 if (remount_ro) {
862 if (!hlist_empty(&sb->s_pins)) { 958 if (!hlist_empty(&sb->s_pins)) {
@@ -867,13 +963,14 @@ int do_remount_sb(struct super_block *sb, int sb_flags, void *data, int force)
867 return 0; 963 return 0;
868 if (sb->s_writers.frozen != SB_UNFROZEN) 964 if (sb->s_writers.frozen != SB_UNFROZEN)
869 return -EBUSY; 965 return -EBUSY;
870 remount_ro = (sb_flags & SB_RDONLY) && !sb_rdonly(sb); 966 remount_ro = !sb_rdonly(sb);
871 } 967 }
872 } 968 }
873 shrink_dcache_sb(sb); 969 shrink_dcache_sb(sb);
874 970
875 /* If we are remounting RDONLY and current sb is read/write, 971 /* If we are reconfiguring to RDONLY and current sb is read/write,
876 make sure there are no rw files opened */ 972 * make sure there are no files open for writing.
973 */
877 if (remount_ro) { 974 if (remount_ro) {
878 if (force) { 975 if (force) {
879 sb->s_readonly_remount = 1; 976 sb->s_readonly_remount = 1;
@@ -885,8 +982,8 @@ int do_remount_sb(struct super_block *sb, int sb_flags, void *data, int force)
885 } 982 }
886 } 983 }
887 984
888 if (sb->s_op->remount_fs) { 985 if (fc->ops->reconfigure) {
889 retval = sb->s_op->remount_fs(sb, &sb_flags, data); 986 retval = fc->ops->reconfigure(fc);
890 if (retval) { 987 if (retval) {
891 if (!force) 988 if (!force)
892 goto cancel_readonly; 989 goto cancel_readonly;
@@ -895,7 +992,9 @@ int do_remount_sb(struct super_block *sb, int sb_flags, void *data, int force)
895 sb->s_type->name, retval); 992 sb->s_type->name, retval);
896 } 993 }
897 } 994 }
898 sb->s_flags = (sb->s_flags & ~MS_RMT_MASK) | (sb_flags & MS_RMT_MASK); 995
996 WRITE_ONCE(sb->s_flags, ((sb->s_flags & ~fc->sb_flags_mask) |
997 (fc->sb_flags & fc->sb_flags_mask)));
899 /* Needs to be ordered wrt mnt_is_readonly() */ 998 /* Needs to be ordered wrt mnt_is_readonly() */
900 smp_wmb(); 999 smp_wmb();
901 sb->s_readonly_remount = 0; 1000 sb->s_readonly_remount = 0;
@@ -922,10 +1021,15 @@ static void do_emergency_remount_callback(struct super_block *sb)
922 down_write(&sb->s_umount); 1021 down_write(&sb->s_umount);
923 if (sb->s_root && sb->s_bdev && (sb->s_flags & SB_BORN) && 1022 if (sb->s_root && sb->s_bdev && (sb->s_flags & SB_BORN) &&
924 !sb_rdonly(sb)) { 1023 !sb_rdonly(sb)) {
925 /* 1024 struct fs_context *fc;
926 * What lock protects sb->s_flags?? 1025
927 */ 1026 fc = fs_context_for_reconfigure(sb->s_root,
928 do_remount_sb(sb, SB_RDONLY, NULL, 1); 1027 SB_RDONLY | SB_FORCE, SB_RDONLY);
1028 if (!IS_ERR(fc)) {
1029 if (parse_monolithic_mount_data(fc, NULL) == 0)
1030 (void)reconfigure_super(fc);
1031 put_fs_context(fc);
1032 }
929 } 1033 }
930 up_write(&sb->s_umount); 1034 up_write(&sb->s_umount);
931} 1035}
@@ -1087,6 +1191,89 @@ struct dentry *mount_ns(struct file_system_type *fs_type,
1087 1191
1088EXPORT_SYMBOL(mount_ns); 1192EXPORT_SYMBOL(mount_ns);
1089 1193
1194int set_anon_super_fc(struct super_block *sb, struct fs_context *fc)
1195{
1196 return set_anon_super(sb, NULL);
1197}
1198EXPORT_SYMBOL(set_anon_super_fc);
1199
1200static int test_keyed_super(struct super_block *sb, struct fs_context *fc)
1201{
1202 return sb->s_fs_info == fc->s_fs_info;
1203}
1204
1205static int test_single_super(struct super_block *s, struct fs_context *fc)
1206{
1207 return 1;
1208}
1209
1210/**
1211 * vfs_get_super - Get a superblock with a search key set in s_fs_info.
1212 * @fc: The filesystem context holding the parameters
1213 * @keying: How to distinguish superblocks
1214 * @fill_super: Helper to initialise a new superblock
1215 *
1216 * Search for a superblock and create a new one if not found. The search
1217 * criterion is controlled by @keying. If the search fails, a new superblock
1218 * is created and @fill_super() is called to initialise it.
1219 *
1220 * @keying can take one of a number of values:
1221 *
1222 * (1) vfs_get_single_super - Only one superblock of this type may exist on the
1223 * system. This is typically used for special system filesystems.
1224 *
1225 * (2) vfs_get_keyed_super - Multiple superblocks may exist, but they must have
1226 * distinct keys (where the key is in s_fs_info). Searching for the same
1227 * key again will turn up the superblock for that key.
1228 *
1229 * (3) vfs_get_independent_super - Multiple superblocks may exist and are
1230 * unkeyed. Each call will get a new superblock.
1231 *
1232 * A permissions check is made by sget_fc() unless we're getting a superblock
1233 * for a kernel-internal mount or a submount.
1234 */
1235int vfs_get_super(struct fs_context *fc,
1236 enum vfs_get_super_keying keying,
1237 int (*fill_super)(struct super_block *sb,
1238 struct fs_context *fc))
1239{
1240 int (*test)(struct super_block *, struct fs_context *);
1241 struct super_block *sb;
1242
1243 switch (keying) {
1244 case vfs_get_single_super:
1245 test = test_single_super;
1246 break;
1247 case vfs_get_keyed_super:
1248 test = test_keyed_super;
1249 break;
1250 case vfs_get_independent_super:
1251 test = NULL;
1252 break;
1253 default:
1254 BUG();
1255 }
1256
1257 sb = sget_fc(fc, test, set_anon_super_fc);
1258 if (IS_ERR(sb))
1259 return PTR_ERR(sb);
1260
1261 if (!sb->s_root) {
1262 int err = fill_super(sb, fc);
1263 if (err) {
1264 deactivate_locked_super(sb);
1265 return err;
1266 }
1267
1268 sb->s_flags |= SB_ACTIVE;
1269 }
1270
1271 BUG_ON(fc->root);
1272 fc->root = dget(sb->s_root);
1273 return 0;
1274}
1275EXPORT_SYMBOL(vfs_get_super);
1276
1090#ifdef CONFIG_BLOCK 1277#ifdef CONFIG_BLOCK
1091static int set_bdev_super(struct super_block *s, void *data) 1278static int set_bdev_super(struct super_block *s, void *data)
1092{ 1279{
@@ -1212,6 +1399,31 @@ struct dentry *mount_nodev(struct file_system_type *fs_type,
1212} 1399}
1213EXPORT_SYMBOL(mount_nodev); 1400EXPORT_SYMBOL(mount_nodev);
1214 1401
1402static int reconfigure_single(struct super_block *s,
1403 int flags, void *data)
1404{
1405 struct fs_context *fc;
1406 int ret;
1407
1408 /* The caller really need to be passing fc down into mount_single(),
1409 * then a chunk of this can be removed. [Bollocks -- AV]
1410 * Better yet, reconfiguration shouldn't happen, but rather the second
1411 * mount should be rejected if the parameters are not compatible.
1412 */
1413 fc = fs_context_for_reconfigure(s->s_root, flags, MS_RMT_MASK);
1414 if (IS_ERR(fc))
1415 return PTR_ERR(fc);
1416
1417 ret = parse_monolithic_mount_data(fc, data);
1418 if (ret < 0)
1419 goto out;
1420
1421 ret = reconfigure_super(fc);
1422out:
1423 put_fs_context(fc);
1424 return ret;
1425}
1426
1215static int compare_single(struct super_block *s, void *p) 1427static int compare_single(struct super_block *s, void *p)
1216{ 1428{
1217 return 1; 1429 return 1;
@@ -1229,41 +1441,64 @@ struct dentry *mount_single(struct file_system_type *fs_type,
1229 return ERR_CAST(s); 1441 return ERR_CAST(s);
1230 if (!s->s_root) { 1442 if (!s->s_root) {
1231 error = fill_super(s, data, flags & SB_SILENT ? 1 : 0); 1443 error = fill_super(s, data, flags & SB_SILENT ? 1 : 0);
1232 if (error) { 1444 if (!error)
1233 deactivate_locked_super(s); 1445 s->s_flags |= SB_ACTIVE;
1234 return ERR_PTR(error);
1235 }
1236 s->s_flags |= SB_ACTIVE;
1237 } else { 1446 } else {
1238 do_remount_sb(s, flags, data, 0); 1447 error = reconfigure_single(s, flags, data);
1448 }
1449 if (unlikely(error)) {
1450 deactivate_locked_super(s);
1451 return ERR_PTR(error);
1239 } 1452 }
1240 return dget(s->s_root); 1453 return dget(s->s_root);
1241} 1454}
1242EXPORT_SYMBOL(mount_single); 1455EXPORT_SYMBOL(mount_single);
1243 1456
1244struct dentry * 1457/**
1245mount_fs(struct file_system_type *type, int flags, const char *name, void *data) 1458 * vfs_get_tree - Get the mountable root
1459 * @fc: The superblock configuration context.
1460 *
1461 * The filesystem is invoked to get or create a superblock which can then later
1462 * be used for mounting. The filesystem places a pointer to the root to be
1463 * used for mounting in @fc->root.
1464 */
1465int vfs_get_tree(struct fs_context *fc)
1246{ 1466{
1247 struct dentry *root;
1248 struct super_block *sb; 1467 struct super_block *sb;
1249 int error = -ENOMEM; 1468 int error;
1250 void *sec_opts = NULL;
1251 1469
1252 if (data && !(type->fs_flags & FS_BINARY_MOUNTDATA)) { 1470 if (fc->fs_type->fs_flags & FS_REQUIRES_DEV && !fc->source) {
1253 error = security_sb_eat_lsm_opts(data, &sec_opts); 1471 errorf(fc, "Filesystem requires source device");
1254 if (error) 1472 return -ENOENT;
1255 return ERR_PTR(error);
1256 } 1473 }
1257 1474
1258 root = type->mount(type, flags, name, data); 1475 if (fc->root)
1259 if (IS_ERR(root)) { 1476 return -EBUSY;
1260 error = PTR_ERR(root); 1477
1261 goto out_free_secdata; 1478 /* Get the mountable root in fc->root, with a ref on the root and a ref
1479 * on the superblock.
1480 */
1481 error = fc->ops->get_tree(fc);
1482 if (error < 0)
1483 return error;
1484
1485 if (!fc->root) {
1486 pr_err("Filesystem %s get_tree() didn't set fc->root\n",
1487 fc->fs_type->name);
1488 /* We don't know what the locking state of the superblock is -
1489 * if there is a superblock.
1490 */
1491 BUG();
1262 } 1492 }
1263 sb = root->d_sb; 1493
1264 BUG_ON(!sb); 1494 sb = fc->root->d_sb;
1265 WARN_ON(!sb->s_bdi); 1495 WARN_ON(!sb->s_bdi);
1266 1496
1497 if (fc->subtype && !sb->s_subtype) {
1498 sb->s_subtype = fc->subtype;
1499 fc->subtype = NULL;
1500 }
1501
1267 /* 1502 /*
1268 * Write barrier is for super_cache_count(). We place it before setting 1503 * Write barrier is for super_cache_count(). We place it before setting
1269 * SB_BORN as the data dependency between the two functions is the 1504 * SB_BORN as the data dependency between the two functions is the
@@ -1273,14 +1508,10 @@ mount_fs(struct file_system_type *type, int flags, const char *name, void *data)
1273 smp_wmb(); 1508 smp_wmb();
1274 sb->s_flags |= SB_BORN; 1509 sb->s_flags |= SB_BORN;
1275 1510
1276 error = security_sb_set_mnt_opts(sb, sec_opts, 0, NULL); 1511 error = security_sb_set_mnt_opts(sb, fc->security, 0, NULL);
1277 if (error) 1512 if (unlikely(error)) {
1278 goto out_sb; 1513 fc_drop_locked(fc);
1279 1514 return error;
1280 if (!(flags & (MS_KERNMOUNT|MS_SUBMOUNT))) {
1281 error = security_sb_kern_mount(sb);
1282 if (error)
1283 goto out_sb;
1284 } 1515 }
1285 1516
1286 /* 1517 /*
@@ -1290,18 +1521,11 @@ mount_fs(struct file_system_type *type, int flags, const char *name, void *data)
1290 * violate this rule. 1521 * violate this rule.
1291 */ 1522 */
1292 WARN((sb->s_maxbytes < 0), "%s set sb->s_maxbytes to " 1523 WARN((sb->s_maxbytes < 0), "%s set sb->s_maxbytes to "
1293 "negative value (%lld)\n", type->name, sb->s_maxbytes); 1524 "negative value (%lld)\n", fc->fs_type->name, sb->s_maxbytes);
1294 1525
1295 up_write(&sb->s_umount); 1526 return 0;
1296 security_free_mnt_opts(&sec_opts);
1297 return root;
1298out_sb:
1299 dput(root);
1300 deactivate_locked_super(sb);
1301out_free_secdata:
1302 security_free_mnt_opts(&sec_opts);
1303 return ERR_PTR(error);
1304} 1527}
1528EXPORT_SYMBOL(vfs_get_tree);
1305 1529
1306/* 1530/*
1307 * Setup private BDI for given superblock. It gets automatically cleaned up 1531 * Setup private BDI for given superblock. It gets automatically cleaned up
diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c
index 92682fcc41f6..4cb21b558a85 100644
--- a/fs/sysfs/mount.c
+++ b/fs/sysfs/mount.c
@@ -13,34 +13,69 @@
13#include <linux/magic.h> 13#include <linux/magic.h>
14#include <linux/mount.h> 14#include <linux/mount.h>
15#include <linux/init.h> 15#include <linux/init.h>
16#include <linux/slab.h>
16#include <linux/user_namespace.h> 17#include <linux/user_namespace.h>
18#include <linux/fs_context.h>
19#include <net/net_namespace.h>
17 20
18#include "sysfs.h" 21#include "sysfs.h"
19 22
20static struct kernfs_root *sysfs_root; 23static struct kernfs_root *sysfs_root;
21struct kernfs_node *sysfs_root_kn; 24struct kernfs_node *sysfs_root_kn;
22 25
23static struct dentry *sysfs_mount(struct file_system_type *fs_type, 26static int sysfs_get_tree(struct fs_context *fc)
24 int flags, const char *dev_name, void *data)
25{ 27{
26 struct dentry *root; 28 struct kernfs_fs_context *kfc = fc->fs_private;
27 void *ns; 29 int ret;
28 bool new_sb = false;
29 30
30 if (!(flags & SB_KERNMOUNT)) { 31 ret = kernfs_get_tree(fc);
32 if (ret)
33 return ret;
34
35 if (kfc->new_sb_created)
36 fc->root->d_sb->s_iflags |= SB_I_USERNS_VISIBLE;
37 return 0;
38}
39
40static void sysfs_fs_context_free(struct fs_context *fc)
41{
42 struct kernfs_fs_context *kfc = fc->fs_private;
43
44 if (kfc->ns_tag)
45 kobj_ns_drop(KOBJ_NS_TYPE_NET, kfc->ns_tag);
46 kernfs_free_fs_context(fc);
47 kfree(kfc);
48}
49
50static const struct fs_context_operations sysfs_fs_context_ops = {
51 .free = sysfs_fs_context_free,
52 .get_tree = sysfs_get_tree,
53};
54
55static int sysfs_init_fs_context(struct fs_context *fc)
56{
57 struct kernfs_fs_context *kfc;
58 struct net *netns;
59
60 if (!(fc->sb_flags & SB_KERNMOUNT)) {
31 if (!kobj_ns_current_may_mount(KOBJ_NS_TYPE_NET)) 61 if (!kobj_ns_current_may_mount(KOBJ_NS_TYPE_NET))
32 return ERR_PTR(-EPERM); 62 return -EPERM;
33 } 63 }
34 64
35 ns = kobj_ns_grab_current(KOBJ_NS_TYPE_NET); 65 kfc = kzalloc(sizeof(struct kernfs_fs_context), GFP_KERNEL);
36 root = kernfs_mount_ns(fs_type, flags, sysfs_root, 66 if (!kfc)
37 SYSFS_MAGIC, &new_sb, ns); 67 return -ENOMEM;
38 if (!new_sb)
39 kobj_ns_drop(KOBJ_NS_TYPE_NET, ns);
40 else if (!IS_ERR(root))
41 root->d_sb->s_iflags |= SB_I_USERNS_VISIBLE;
42 68
43 return root; 69 kfc->ns_tag = netns = kobj_ns_grab_current(KOBJ_NS_TYPE_NET);
70 kfc->root = sysfs_root;
71 kfc->magic = SYSFS_MAGIC;
72 fc->fs_private = kfc;
73 fc->ops = &sysfs_fs_context_ops;
74 if (fc->user_ns)
75 put_user_ns(fc->user_ns);
76 fc->user_ns = get_user_ns(netns->user_ns);
77 fc->global = true;
78 return 0;
44} 79}
45 80
46static void sysfs_kill_sb(struct super_block *sb) 81static void sysfs_kill_sb(struct super_block *sb)
@@ -52,10 +87,10 @@ static void sysfs_kill_sb(struct super_block *sb)
52} 87}
53 88
54static struct file_system_type sysfs_fs_type = { 89static struct file_system_type sysfs_fs_type = {
55 .name = "sysfs", 90 .name = "sysfs",
56 .mount = sysfs_mount, 91 .init_fs_context = sysfs_init_fs_context,
57 .kill_sb = sysfs_kill_sb, 92 .kill_sb = sysfs_kill_sb,
58 .fs_flags = FS_USERNS_MOUNT, 93 .fs_flags = FS_USERNS_MOUNT,
59}; 94};
60 95
61int __init sysfs_init(void) 96int __init sysfs_init(void)
diff --git a/include/linux/errno.h b/include/linux/errno.h
index 3cba627577d6..d73f597a2484 100644
--- a/include/linux/errno.h
+++ b/include/linux/errno.h
@@ -18,6 +18,7 @@
18#define ERESTART_RESTARTBLOCK 516 /* restart by calling sys_restart_syscall */ 18#define ERESTART_RESTARTBLOCK 516 /* restart by calling sys_restart_syscall */
19#define EPROBE_DEFER 517 /* Driver requests probe retry */ 19#define EPROBE_DEFER 517 /* Driver requests probe retry */
20#define EOPENSTALE 518 /* open found a stale dentry */ 20#define EOPENSTALE 518 /* open found a stale dentry */
21#define ENOPARAM 519 /* Parameter not supported */
21 22
22/* Defined for the NFSv3 protocol */ 23/* Defined for the NFSv3 protocol */
23#define EBADHANDLE 521 /* Illegal NFS file handle */ 24#define EBADHANDLE 521 /* Illegal NFS file handle */
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 80c6a4093b46..8b42df09b04c 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -64,6 +64,8 @@ struct workqueue_struct;
64struct iov_iter; 64struct iov_iter;
65struct fscrypt_info; 65struct fscrypt_info;
66struct fscrypt_operations; 66struct fscrypt_operations;
67struct fs_context;
68struct fs_parameter_description;
67 69
68extern void __init inode_init(void); 70extern void __init inode_init(void);
69extern void __init inode_init_early(void); 71extern void __init inode_init_early(void);
@@ -1349,6 +1351,7 @@ extern int send_sigurg(struct fown_struct *fown);
1349 1351
1350/* These sb flags are internal to the kernel */ 1352/* These sb flags are internal to the kernel */
1351#define SB_SUBMOUNT (1<<26) 1353#define SB_SUBMOUNT (1<<26)
1354#define SB_FORCE (1<<27)
1352#define SB_NOSEC (1<<28) 1355#define SB_NOSEC (1<<28)
1353#define SB_BORN (1<<29) 1356#define SB_BORN (1<<29)
1354#define SB_ACTIVE (1<<30) 1357#define SB_ACTIVE (1<<30)
@@ -1459,7 +1462,7 @@ struct super_block {
1459 * Filesystem subtype. If non-empty the filesystem type field 1462 * Filesystem subtype. If non-empty the filesystem type field
1460 * in /proc/mounts will be "type.subtype" 1463 * in /proc/mounts will be "type.subtype"
1461 */ 1464 */
1462 char *s_subtype; 1465 const char *s_subtype;
1463 1466
1464 const struct dentry_operations *s_d_op; /* default d_op for dentries */ 1467 const struct dentry_operations *s_d_op; /* default d_op for dentries */
1465 1468
@@ -2170,6 +2173,8 @@ struct file_system_type {
2170#define FS_HAS_SUBTYPE 4 2173#define FS_HAS_SUBTYPE 4
2171#define FS_USERNS_MOUNT 8 /* Can be mounted by userns root */ 2174#define FS_USERNS_MOUNT 8 /* Can be mounted by userns root */
2172#define FS_RENAME_DOES_D_MOVE 32768 /* FS will handle d_move() during rename() internally. */ 2175#define FS_RENAME_DOES_D_MOVE 32768 /* FS will handle d_move() during rename() internally. */
2176 int (*init_fs_context)(struct fs_context *);
2177 const struct fs_parameter_description *parameters;
2173 struct dentry *(*mount) (struct file_system_type *, int, 2178 struct dentry *(*mount) (struct file_system_type *, int,
2174 const char *, void *); 2179 const char *, void *);
2175 void (*kill_sb) (struct super_block *); 2180 void (*kill_sb) (struct super_block *);
@@ -2225,8 +2230,12 @@ void kill_litter_super(struct super_block *sb);
2225void deactivate_super(struct super_block *sb); 2230void deactivate_super(struct super_block *sb);
2226void deactivate_locked_super(struct super_block *sb); 2231void deactivate_locked_super(struct super_block *sb);
2227int set_anon_super(struct super_block *s, void *data); 2232int set_anon_super(struct super_block *s, void *data);
2233int set_anon_super_fc(struct super_block *s, struct fs_context *fc);
2228int get_anon_bdev(dev_t *); 2234int get_anon_bdev(dev_t *);
2229void free_anon_bdev(dev_t); 2235void free_anon_bdev(dev_t);
2236struct super_block *sget_fc(struct fs_context *fc,
2237 int (*test)(struct super_block *, struct fs_context *),
2238 int (*set)(struct super_block *, struct fs_context *));
2230struct super_block *sget_userns(struct file_system_type *type, 2239struct super_block *sget_userns(struct file_system_type *type,
2231 int (*test)(struct super_block *,void *), 2240 int (*test)(struct super_block *,void *),
2232 int (*set)(struct super_block *,void *), 2241 int (*set)(struct super_block *,void *),
@@ -2269,8 +2278,7 @@ mount_pseudo(struct file_system_type *fs_type, char *name,
2269 2278
2270extern int register_filesystem(struct file_system_type *); 2279extern int register_filesystem(struct file_system_type *);
2271extern int unregister_filesystem(struct file_system_type *); 2280extern int unregister_filesystem(struct file_system_type *);
2272extern struct vfsmount *kern_mount_data(struct file_system_type *, void *data); 2281extern struct vfsmount *kern_mount(struct file_system_type *);
2273#define kern_mount(type) kern_mount_data(type, NULL)
2274extern void kern_unmount(struct vfsmount *mnt); 2282extern void kern_unmount(struct vfsmount *mnt);
2275extern int may_umount_tree(struct vfsmount *); 2283extern int may_umount_tree(struct vfsmount *);
2276extern int may_umount(struct vfsmount *); 2284extern int may_umount(struct vfsmount *);
diff --git a/include/linux/fs_context.h b/include/linux/fs_context.h
new file mode 100644
index 000000000000..eaca452088fa
--- /dev/null
+++ b/include/linux/fs_context.h
@@ -0,0 +1,188 @@
1/* Filesystem superblock creation and reconfiguration context.
2 *
3 * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public Licence
8 * as published by the Free Software Foundation; either version
9 * 2 of the Licence, or (at your option) any later version.
10 */
11
12#ifndef _LINUX_FS_CONTEXT_H
13#define _LINUX_FS_CONTEXT_H
14
15#include <linux/kernel.h>
16#include <linux/errno.h>
17#include <linux/security.h>
18
19struct cred;
20struct dentry;
21struct file_operations;
22struct file_system_type;
23struct mnt_namespace;
24struct net;
25struct pid_namespace;
26struct super_block;
27struct user_namespace;
28struct vfsmount;
29struct path;
30
31enum fs_context_purpose {
32 FS_CONTEXT_FOR_MOUNT, /* New superblock for explicit mount */
33 FS_CONTEXT_FOR_SUBMOUNT, /* New superblock for automatic submount */
34 FS_CONTEXT_FOR_RECONFIGURE, /* Superblock reconfiguration (remount) */
35};
36
37/*
38 * Type of parameter value.
39 */
40enum fs_value_type {
41 fs_value_is_undefined,
42 fs_value_is_flag, /* Value not given a value */
43 fs_value_is_string, /* Value is a string */
44 fs_value_is_blob, /* Value is a binary blob */
45 fs_value_is_filename, /* Value is a filename* + dirfd */
46 fs_value_is_filename_empty, /* Value is a filename* + dirfd + AT_EMPTY_PATH */
47 fs_value_is_file, /* Value is a file* */
48};
49
50/*
51 * Configuration parameter.
52 */
53struct fs_parameter {
54 const char *key; /* Parameter name */
55 enum fs_value_type type:8; /* The type of value here */
56 union {
57 char *string;
58 void *blob;
59 struct filename *name;
60 struct file *file;
61 };
62 size_t size;
63 int dirfd;
64};
65
66/*
67 * Filesystem context for holding the parameters used in the creation or
68 * reconfiguration of a superblock.
69 *
70 * Superblock creation fills in ->root whereas reconfiguration begins with this
71 * already set.
72 *
73 * See Documentation/filesystems/mounting.txt
74 */
75struct fs_context {
76 const struct fs_context_operations *ops;
77 struct file_system_type *fs_type;
78 void *fs_private; /* The filesystem's context */
79 struct dentry *root; /* The root and superblock */
80 struct user_namespace *user_ns; /* The user namespace for this mount */
81 struct net *net_ns; /* The network namespace for this mount */
82 const struct cred *cred; /* The mounter's credentials */
83 const char *source; /* The source name (eg. dev path) */
84 const char *subtype; /* The subtype to set on the superblock */
85 void *security; /* Linux S&M options */
86 void *s_fs_info; /* Proposed s_fs_info */
87 unsigned int sb_flags; /* Proposed superblock flags (SB_*) */
88 unsigned int sb_flags_mask; /* Superblock flags that were changed */
89 unsigned int lsm_flags; /* Information flags from the fs to the LSM */
90 enum fs_context_purpose purpose:8;
91 bool need_free:1; /* Need to call ops->free() */
92 bool global:1; /* Goes into &init_user_ns */
93};
94
95struct fs_context_operations {
96 void (*free)(struct fs_context *fc);
97 int (*dup)(struct fs_context *fc, struct fs_context *src_fc);
98 int (*parse_param)(struct fs_context *fc, struct fs_parameter *param);
99 int (*parse_monolithic)(struct fs_context *fc, void *data);
100 int (*get_tree)(struct fs_context *fc);
101 int (*reconfigure)(struct fs_context *fc);
102};
103
104/*
105 * fs_context manipulation functions.
106 */
107extern struct fs_context *fs_context_for_mount(struct file_system_type *fs_type,
108 unsigned int sb_flags);
109extern struct fs_context *fs_context_for_reconfigure(struct dentry *dentry,
110 unsigned int sb_flags,
111 unsigned int sb_flags_mask);
112extern struct fs_context *fs_context_for_submount(struct file_system_type *fs_type,
113 struct dentry *reference);
114
115extern struct fs_context *vfs_dup_fs_context(struct fs_context *fc);
116extern int vfs_parse_fs_param(struct fs_context *fc, struct fs_parameter *param);
117extern int vfs_parse_fs_string(struct fs_context *fc, const char *key,
118 const char *value, size_t v_size);
119extern int generic_parse_monolithic(struct fs_context *fc, void *data);
120extern int vfs_get_tree(struct fs_context *fc);
121extern void put_fs_context(struct fs_context *fc);
122
123/*
124 * sget() wrapper to be called from the ->get_tree() op.
125 */
126enum vfs_get_super_keying {
127 vfs_get_single_super, /* Only one such superblock may exist */
128 vfs_get_keyed_super, /* Superblocks with different s_fs_info keys may exist */
129 vfs_get_independent_super, /* Multiple independent superblocks may exist */
130};
131extern int vfs_get_super(struct fs_context *fc,
132 enum vfs_get_super_keying keying,
133 int (*fill_super)(struct super_block *sb,
134 struct fs_context *fc));
135
136extern const struct file_operations fscontext_fops;
137
138#ifdef CONFIG_PRINTK
139extern __attribute__((format(printf, 2, 3)))
140void logfc(struct fs_context *fc, const char *fmt, ...);
141#else
142static inline __attribute__((format(printf, 2, 3)))
143void logfc(struct fs_context *fc, const char *fmt, ...)
144{
145}
146#endif
147
148/**
149 * infof - Store supplementary informational message
150 * @fc: The context in which to log the informational message
151 * @fmt: The format string
152 *
153 * Store the supplementary informational message for the process if the process
154 * has enabled the facility.
155 */
156#define infof(fc, fmt, ...) ({ logfc(fc, "i "fmt, ## __VA_ARGS__); })
157
158/**
159 * warnf - Store supplementary warning message
160 * @fc: The context in which to log the error message
161 * @fmt: The format string
162 *
163 * Store the supplementary warning message for the process if the process has
164 * enabled the facility.
165 */
166#define warnf(fc, fmt, ...) ({ logfc(fc, "w "fmt, ## __VA_ARGS__); })
167
168/**
169 * errorf - Store supplementary error message
170 * @fc: The context in which to log the error message
171 * @fmt: The format string
172 *
173 * Store the supplementary error message for the process if the process has
174 * enabled the facility.
175 */
176#define errorf(fc, fmt, ...) ({ logfc(fc, "e "fmt, ## __VA_ARGS__); })
177
178/**
179 * invalf - Store supplementary invalid argument error message
180 * @fc: The context in which to log the error message
181 * @fmt: The format string
182 *
183 * Store the supplementary error message for the process if the process has
184 * enabled the facility and return -EINVAL.
185 */
186#define invalf(fc, fmt, ...) ({ errorf(fc, fmt, ## __VA_ARGS__); -EINVAL; })
187
188#endif /* _LINUX_FS_CONTEXT_H */
diff --git a/include/linux/fs_parser.h b/include/linux/fs_parser.h
new file mode 100644
index 000000000000..d966f96ffe62
--- /dev/null
+++ b/include/linux/fs_parser.h
@@ -0,0 +1,151 @@
1/* Filesystem parameter description and parser
2 *
3 * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public Licence
8 * as published by the Free Software Foundation; either version
9 * 2 of the Licence, or (at your option) any later version.
10 */
11
12#ifndef _LINUX_FS_PARSER_H
13#define _LINUX_FS_PARSER_H
14
15#include <linux/fs_context.h>
16
17struct path;
18
19struct constant_table {
20 const char *name;
21 int value;
22};
23
24/*
25 * The type of parameter expected.
26 */
27enum fs_parameter_type {
28 __fs_param_wasnt_defined,
29 fs_param_is_flag,
30 fs_param_is_bool,
31 fs_param_is_u32,
32 fs_param_is_u32_octal,
33 fs_param_is_u32_hex,
34 fs_param_is_s32,
35 fs_param_is_u64,
36 fs_param_is_enum,
37 fs_param_is_string,
38 fs_param_is_blob,
39 fs_param_is_blockdev,
40 fs_param_is_path,
41 fs_param_is_fd,
42 nr__fs_parameter_type,
43};
44
45/*
46 * Specification of the type of value a parameter wants.
47 *
48 * Note that the fsparam_flag(), fsparam_string(), fsparam_u32(), ... macros
49 * should be used to generate elements of this type.
50 */
51struct fs_parameter_spec {
52 const char *name;
53 u8 opt; /* Option number (returned by fs_parse()) */
54 enum fs_parameter_type type:8; /* The desired parameter type */
55 unsigned short flags;
56#define fs_param_v_optional 0x0001 /* The value is optional */
57#define fs_param_neg_with_no 0x0002 /* "noxxx" is negative param */
58#define fs_param_neg_with_empty 0x0004 /* "xxx=" is negative param */
59#define fs_param_deprecated 0x0008 /* The param is deprecated */
60};
61
62struct fs_parameter_enum {
63 u8 opt; /* Option number (as fs_parameter_spec::opt) */
64 char name[14];
65 u8 value;
66};
67
68struct fs_parameter_description {
69 const char name[16]; /* Name for logging purposes */
70 const struct fs_parameter_spec *specs; /* List of param specifications */
71 const struct fs_parameter_enum *enums; /* Enum values */
72};
73
74/*
75 * Result of parse.
76 */
77struct fs_parse_result {
78 bool negated; /* T if param was "noxxx" */
79 bool has_value; /* T if value supplied to param */
80 union {
81 bool boolean; /* For spec_bool */
82 int int_32; /* For spec_s32/spec_enum */
83 unsigned int uint_32; /* For spec_u32{,_octal,_hex}/spec_enum */
84 u64 uint_64; /* For spec_u64 */
85 };
86};
87
88extern int fs_parse(struct fs_context *fc,
89 const struct fs_parameter_description *desc,
90 struct fs_parameter *value,
91 struct fs_parse_result *result);
92extern int fs_lookup_param(struct fs_context *fc,
93 struct fs_parameter *param,
94 bool want_bdev,
95 struct path *_path);
96
97extern int __lookup_constant(const struct constant_table tbl[], size_t tbl_size,
98 const char *name, int not_found);
99#define lookup_constant(t, n, nf) __lookup_constant(t, ARRAY_SIZE(t), (n), (nf))
100
101#ifdef CONFIG_VALIDATE_FS_PARSER
102extern bool validate_constant_table(const struct constant_table *tbl, size_t tbl_size,
103 int low, int high, int special);
104extern bool fs_validate_description(const struct fs_parameter_description *desc);
105#else
106static inline bool validate_constant_table(const struct constant_table *tbl, size_t tbl_size,
107 int low, int high, int special)
108{ return true; }
109static inline bool fs_validate_description(const struct fs_parameter_description *desc)
110{ return true; }
111#endif
112
113/*
114 * Parameter type, name, index and flags element constructors. Use as:
115 *
116 * fsparam_xxxx("foo", Opt_foo)
117 *
118 * If existing helpers are not enough, direct use of __fsparam() would
119 * work, but any such case is probably a sign that new helper is needed.
120 * Helpers will remain stable; low-level implementation may change.
121 */
122#define __fsparam(TYPE, NAME, OPT, FLAGS) \
123 { \
124 .name = NAME, \
125 .opt = OPT, \
126 .type = TYPE, \
127 .flags = FLAGS \
128 }
129
130#define fsparam_flag(NAME, OPT) __fsparam(fs_param_is_flag, NAME, OPT, 0)
131#define fsparam_flag_no(NAME, OPT) \
132 __fsparam(fs_param_is_flag, NAME, OPT, \
133 fs_param_neg_with_no)
134#define fsparam_bool(NAME, OPT) __fsparam(fs_param_is_bool, NAME, OPT, 0)
135#define fsparam_u32(NAME, OPT) __fsparam(fs_param_is_u32, NAME, OPT, 0)
136#define fsparam_u32oct(NAME, OPT) \
137 __fsparam(fs_param_is_u32_octal, NAME, OPT, 0)
138#define fsparam_u32hex(NAME, OPT) \
139 __fsparam(fs_param_is_u32_hex, NAME, OPT, 0)
140#define fsparam_s32(NAME, OPT) __fsparam(fs_param_is_s32, NAME, OPT, 0)
141#define fsparam_u64(NAME, OPT) __fsparam(fs_param_is_u64, NAME, OPT, 0)
142#define fsparam_enum(NAME, OPT) __fsparam(fs_param_is_enum, NAME, OPT, 0)
143#define fsparam_string(NAME, OPT) \
144 __fsparam(fs_param_is_string, NAME, OPT, 0)
145#define fsparam_blob(NAME, OPT) __fsparam(fs_param_is_blob, NAME, OPT, 0)
146#define fsparam_bdev(NAME, OPT) __fsparam(fs_param_is_blockdev, NAME, OPT, 0)
147#define fsparam_path(NAME, OPT) __fsparam(fs_param_is_path, NAME, OPT, 0)
148#define fsparam_fd(NAME, OPT) __fsparam(fs_param_is_fd, NAME, OPT, 0)
149
150
151#endif /* _LINUX_FS_PARSER_H */
diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h
index 0cac1207bb00..c8893f663470 100644
--- a/include/linux/kernfs.h
+++ b/include/linux/kernfs.h
@@ -26,7 +26,9 @@ struct vm_area_struct;
26struct super_block; 26struct super_block;
27struct file_system_type; 27struct file_system_type;
28struct poll_table_struct; 28struct poll_table_struct;
29struct fs_context;
29 30
31struct kernfs_fs_context;
30struct kernfs_open_node; 32struct kernfs_open_node;
31struct kernfs_iattrs; 33struct kernfs_iattrs;
32 34
@@ -168,7 +170,6 @@ struct kernfs_node {
168 * kernfs_node parameter. 170 * kernfs_node parameter.
169 */ 171 */
170struct kernfs_syscall_ops { 172struct kernfs_syscall_ops {
171 int (*remount_fs)(struct kernfs_root *root, int *flags, char *data);
172 int (*show_options)(struct seq_file *sf, struct kernfs_root *root); 173 int (*show_options)(struct seq_file *sf, struct kernfs_root *root);
173 174
174 int (*mkdir)(struct kernfs_node *parent, const char *name, 175 int (*mkdir)(struct kernfs_node *parent, const char *name,
@@ -272,6 +273,18 @@ struct kernfs_ops {
272#endif 273#endif
273}; 274};
274 275
276/*
277 * The kernfs superblock creation/mount parameter context.
278 */
279struct kernfs_fs_context {
280 struct kernfs_root *root; /* Root of the hierarchy being mounted */
281 void *ns_tag; /* Namespace tag of the mount (or NULL) */
282 unsigned long magic; /* File system specific magic number */
283
284 /* The following are set/used by kernfs_mount() */
285 bool new_sb_created; /* Set to T if we allocated a new sb */
286};
287
275#ifdef CONFIG_KERNFS 288#ifdef CONFIG_KERNFS
276 289
277static inline enum kernfs_node_type kernfs_type(struct kernfs_node *kn) 290static inline enum kernfs_node_type kernfs_type(struct kernfs_node *kn)
@@ -359,11 +372,9 @@ __poll_t kernfs_generic_poll(struct kernfs_open_file *of,
359void kernfs_notify(struct kernfs_node *kn); 372void kernfs_notify(struct kernfs_node *kn);
360 373
361const void *kernfs_super_ns(struct super_block *sb); 374const void *kernfs_super_ns(struct super_block *sb);
362struct dentry *kernfs_mount_ns(struct file_system_type *fs_type, int flags, 375int kernfs_get_tree(struct fs_context *fc);
363 struct kernfs_root *root, unsigned long magic, 376void kernfs_free_fs_context(struct fs_context *fc);
364 bool *new_sb_created, const void *ns);
365void kernfs_kill_sb(struct super_block *sb); 377void kernfs_kill_sb(struct super_block *sb);
366struct super_block *kernfs_pin_sb(struct kernfs_root *root, const void *ns);
367 378
368void kernfs_init(void); 379void kernfs_init(void);
369 380
@@ -465,11 +476,10 @@ static inline void kernfs_notify(struct kernfs_node *kn) { }
465static inline const void *kernfs_super_ns(struct super_block *sb) 476static inline const void *kernfs_super_ns(struct super_block *sb)
466{ return NULL; } 477{ return NULL; }
467 478
468static inline struct dentry * 479static inline int kernfs_get_tree(struct fs_context *fc)
469kernfs_mount_ns(struct file_system_type *fs_type, int flags, 480{ return -ENOSYS; }
470 struct kernfs_root *root, unsigned long magic, 481
471 bool *new_sb_created, const void *ns) 482static inline void kernfs_free_fs_context(struct fs_context *fc) { }
472{ return ERR_PTR(-ENOSYS); }
473 483
474static inline void kernfs_kill_sb(struct super_block *sb) { } 484static inline void kernfs_kill_sb(struct super_block *sb) { }
475 485
@@ -552,13 +562,4 @@ static inline int kernfs_rename(struct kernfs_node *kn,
552 return kernfs_rename_ns(kn, new_parent, new_name, NULL); 562 return kernfs_rename_ns(kn, new_parent, new_name, NULL);
553} 563}
554 564
555static inline struct dentry *
556kernfs_mount(struct file_system_type *fs_type, int flags,
557 struct kernfs_root *root, unsigned long magic,
558 bool *new_sb_created)
559{
560 return kernfs_mount_ns(fs_type, flags, root,
561 magic, new_sb_created, NULL);
562}
563
564#endif /* __LINUX_KERNFS_H */ 565#endif /* __LINUX_KERNFS_H */
diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h
index 85a301632cf1..a9b8ff578b6b 100644
--- a/include/linux/lsm_hooks.h
+++ b/include/linux/lsm_hooks.h
@@ -76,6 +76,22 @@
76 * changes on the process such as clearing out non-inheritable signal 76 * changes on the process such as clearing out non-inheritable signal
77 * state. This is called immediately after commit_creds(). 77 * state. This is called immediately after commit_creds().
78 * 78 *
79 * Security hooks for mount using fs_context.
80 * [See also Documentation/filesystems/mounting.txt]
81 *
82 * @fs_context_dup:
83 * Allocate and attach a security structure to sc->security. This pointer
84 * is initialised to NULL by the caller.
85 * @fc indicates the new filesystem context.
86 * @src_fc indicates the original filesystem context.
87 * @fs_context_parse_param:
88 * Userspace provided a parameter to configure a superblock. The LSM may
89 * reject it with an error and may use it for itself, in which case it
90 * should return 0; otherwise it should return -ENOPARAM to pass it on to
91 * the filesystem.
92 * @fc indicates the filesystem context.
93 * @param The parameter
94 *
79 * Security hooks for filesystem operations. 95 * Security hooks for filesystem operations.
80 * 96 *
81 * @sb_alloc_security: 97 * @sb_alloc_security:
@@ -1460,6 +1476,9 @@ union security_list_options {
1460 void (*bprm_committing_creds)(struct linux_binprm *bprm); 1476 void (*bprm_committing_creds)(struct linux_binprm *bprm);
1461 void (*bprm_committed_creds)(struct linux_binprm *bprm); 1477 void (*bprm_committed_creds)(struct linux_binprm *bprm);
1462 1478
1479 int (*fs_context_dup)(struct fs_context *fc, struct fs_context *src_sc);
1480 int (*fs_context_parse_param)(struct fs_context *fc, struct fs_parameter *param);
1481
1463 int (*sb_alloc_security)(struct super_block *sb); 1482 int (*sb_alloc_security)(struct super_block *sb);
1464 void (*sb_free_security)(struct super_block *sb); 1483 void (*sb_free_security)(struct super_block *sb);
1465 void (*sb_free_mnt_opts)(void *mnt_opts); 1484 void (*sb_free_mnt_opts)(void *mnt_opts);
@@ -1800,6 +1819,8 @@ struct security_hook_heads {
1800 struct hlist_head bprm_check_security; 1819 struct hlist_head bprm_check_security;
1801 struct hlist_head bprm_committing_creds; 1820 struct hlist_head bprm_committing_creds;
1802 struct hlist_head bprm_committed_creds; 1821 struct hlist_head bprm_committed_creds;
1822 struct hlist_head fs_context_dup;
1823 struct hlist_head fs_context_parse_param;
1803 struct hlist_head sb_alloc_security; 1824 struct hlist_head sb_alloc_security;
1804 struct hlist_head sb_free_security; 1825 struct hlist_head sb_free_security;
1805 struct hlist_head sb_free_mnt_opts; 1826 struct hlist_head sb_free_mnt_opts;
diff --git a/include/linux/mount.h b/include/linux/mount.h
index 037eed52164b..9197ddbf35fb 100644
--- a/include/linux/mount.h
+++ b/include/linux/mount.h
@@ -21,6 +21,7 @@ struct super_block;
21struct vfsmount; 21struct vfsmount;
22struct dentry; 22struct dentry;
23struct mnt_namespace; 23struct mnt_namespace;
24struct fs_context;
24 25
25#define MNT_NOSUID 0x01 26#define MNT_NOSUID 0x01
26#define MNT_NODEV 0x02 27#define MNT_NODEV 0x02
@@ -88,6 +89,8 @@ struct path;
88extern struct vfsmount *clone_private_mount(const struct path *path); 89extern struct vfsmount *clone_private_mount(const struct path *path);
89 90
90struct file_system_type; 91struct file_system_type;
92extern struct vfsmount *fc_mount(struct fs_context *fc);
93extern struct vfsmount *vfs_create_mount(struct fs_context *fc);
91extern struct vfsmount *vfs_kern_mount(struct file_system_type *type, 94extern struct vfsmount *vfs_kern_mount(struct file_system_type *type,
92 int flags, const char *name, 95 int flags, const char *name,
93 void *data); 96 void *data);
diff --git a/include/linux/security.h b/include/linux/security.h
index 2b35a43d11d6..49f2685324b0 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -53,6 +53,9 @@ struct msg_msg;
53struct xattr; 53struct xattr;
54struct xfrm_sec_ctx; 54struct xfrm_sec_ctx;
55struct mm_struct; 55struct mm_struct;
56struct fs_context;
57struct fs_parameter;
58enum fs_value_type;
56 59
57/* Default (no) options for the capable function */ 60/* Default (no) options for the capable function */
58#define CAP_OPT_NONE 0x0 61#define CAP_OPT_NONE 0x0
@@ -61,7 +64,7 @@ struct mm_struct;
61/* If capable is being called by a setid function */ 64/* If capable is being called by a setid function */
62#define CAP_OPT_INSETID BIT(2) 65#define CAP_OPT_INSETID BIT(2)
63 66
64/* LSM Agnostic defines for sb_set_mnt_opts */ 67/* LSM Agnostic defines for fs_context::lsm_flags */
65#define SECURITY_LSM_NATIVE_LABELS 1 68#define SECURITY_LSM_NATIVE_LABELS 1
66 69
67struct ctl_table; 70struct ctl_table;
@@ -223,6 +226,8 @@ int security_bprm_set_creds(struct linux_binprm *bprm);
223int security_bprm_check(struct linux_binprm *bprm); 226int security_bprm_check(struct linux_binprm *bprm);
224void security_bprm_committing_creds(struct linux_binprm *bprm); 227void security_bprm_committing_creds(struct linux_binprm *bprm);
225void security_bprm_committed_creds(struct linux_binprm *bprm); 228void security_bprm_committed_creds(struct linux_binprm *bprm);
229int security_fs_context_dup(struct fs_context *fc, struct fs_context *src_fc);
230int security_fs_context_parse_param(struct fs_context *fc, struct fs_parameter *param);
226int security_sb_alloc(struct super_block *sb); 231int security_sb_alloc(struct super_block *sb);
227void security_sb_free(struct super_block *sb); 232void security_sb_free(struct super_block *sb);
228void security_free_mnt_opts(void **mnt_opts); 233void security_free_mnt_opts(void **mnt_opts);
@@ -519,6 +524,17 @@ static inline void security_bprm_committed_creds(struct linux_binprm *bprm)
519{ 524{
520} 525}
521 526
527static inline int security_fs_context_dup(struct fs_context *fc,
528 struct fs_context *src_fc)
529{
530 return 0;
531}
532static inline int security_fs_context_parse_param(struct fs_context *fc,
533 struct fs_parameter *param)
534{
535 return -ENOPARAM;
536}
537
522static inline int security_sb_alloc(struct super_block *sb) 538static inline int security_sb_alloc(struct super_block *sb)
523{ 539{
524 return 0; 540 return 0;
diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index c839bf83231d..aea30530c472 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -18,6 +18,7 @@
18#include <linux/pagemap.h> 18#include <linux/pagemap.h>
19#include <linux/file.h> 19#include <linux/file.h>
20#include <linux/mount.h> 20#include <linux/mount.h>
21#include <linux/fs_context.h>
21#include <linux/namei.h> 22#include <linux/namei.h>
22#include <linux/sysctl.h> 23#include <linux/sysctl.h>
23#include <linux/poll.h> 24#include <linux/poll.h>
@@ -42,6 +43,10 @@
42#include <net/sock.h> 43#include <net/sock.h>
43#include "util.h" 44#include "util.h"
44 45
46struct mqueue_fs_context {
47 struct ipc_namespace *ipc_ns;
48};
49
45#define MQUEUE_MAGIC 0x19800202 50#define MQUEUE_MAGIC 0x19800202
46#define DIRENT_SIZE 20 51#define DIRENT_SIZE 20
47#define FILENT_SIZE 80 52#define FILENT_SIZE 80
@@ -87,9 +92,11 @@ struct mqueue_inode_info {
87 unsigned long qsize; /* size of queue in memory (sum of all msgs) */ 92 unsigned long qsize; /* size of queue in memory (sum of all msgs) */
88}; 93};
89 94
95static struct file_system_type mqueue_fs_type;
90static const struct inode_operations mqueue_dir_inode_operations; 96static const struct inode_operations mqueue_dir_inode_operations;
91static const struct file_operations mqueue_file_operations; 97static const struct file_operations mqueue_file_operations;
92static const struct super_operations mqueue_super_ops; 98static const struct super_operations mqueue_super_ops;
99static const struct fs_context_operations mqueue_fs_context_ops;
93static void remove_notification(struct mqueue_inode_info *info); 100static void remove_notification(struct mqueue_inode_info *info);
94 101
95static struct kmem_cache *mqueue_inode_cachep; 102static struct kmem_cache *mqueue_inode_cachep;
@@ -322,7 +329,7 @@ err:
322 return ERR_PTR(ret); 329 return ERR_PTR(ret);
323} 330}
324 331
325static int mqueue_fill_super(struct super_block *sb, void *data, int silent) 332static int mqueue_fill_super(struct super_block *sb, struct fs_context *fc)
326{ 333{
327 struct inode *inode; 334 struct inode *inode;
328 struct ipc_namespace *ns = sb->s_fs_info; 335 struct ipc_namespace *ns = sb->s_fs_info;
@@ -343,18 +350,56 @@ static int mqueue_fill_super(struct super_block *sb, void *data, int silent)
343 return 0; 350 return 0;
344} 351}
345 352
346static struct dentry *mqueue_mount(struct file_system_type *fs_type, 353static int mqueue_get_tree(struct fs_context *fc)
347 int flags, const char *dev_name,
348 void *data)
349{ 354{
350 struct ipc_namespace *ns; 355 struct mqueue_fs_context *ctx = fc->fs_private;
351 if (flags & SB_KERNMOUNT) { 356
352 ns = data; 357 put_user_ns(fc->user_ns);
353 data = NULL; 358 fc->user_ns = get_user_ns(ctx->ipc_ns->user_ns);
354 } else { 359 fc->s_fs_info = ctx->ipc_ns;
355 ns = current->nsproxy->ipc_ns; 360 return vfs_get_super(fc, vfs_get_keyed_super, mqueue_fill_super);
356 } 361}
357 return mount_ns(fs_type, flags, data, ns, ns->user_ns, mqueue_fill_super); 362
363static void mqueue_fs_context_free(struct fs_context *fc)
364{
365 struct mqueue_fs_context *ctx = fc->fs_private;
366
367 if (ctx->ipc_ns)
368 put_ipc_ns(ctx->ipc_ns);
369 kfree(ctx);
370}
371
372static int mqueue_init_fs_context(struct fs_context *fc)
373{
374 struct mqueue_fs_context *ctx;
375
376 ctx = kzalloc(sizeof(struct mqueue_fs_context), GFP_KERNEL);
377 if (!ctx)
378 return -ENOMEM;
379
380 ctx->ipc_ns = get_ipc_ns(current->nsproxy->ipc_ns);
381 fc->fs_private = ctx;
382 fc->ops = &mqueue_fs_context_ops;
383 return 0;
384}
385
386static struct vfsmount *mq_create_mount(struct ipc_namespace *ns)
387{
388 struct mqueue_fs_context *ctx;
389 struct fs_context *fc;
390 struct vfsmount *mnt;
391
392 fc = fs_context_for_mount(&mqueue_fs_type, SB_KERNMOUNT);
393 if (IS_ERR(fc))
394 return ERR_CAST(fc);
395
396 ctx = fc->fs_private;
397 put_ipc_ns(ctx->ipc_ns);
398 ctx->ipc_ns = get_ipc_ns(ns);
399
400 mnt = fc_mount(fc);
401 put_fs_context(fc);
402 return mnt;
358} 403}
359 404
360static void init_once(void *foo) 405static void init_once(void *foo)
@@ -1522,15 +1567,22 @@ static const struct super_operations mqueue_super_ops = {
1522 .statfs = simple_statfs, 1567 .statfs = simple_statfs,
1523}; 1568};
1524 1569
1570static const struct fs_context_operations mqueue_fs_context_ops = {
1571 .free = mqueue_fs_context_free,
1572 .get_tree = mqueue_get_tree,
1573};
1574
1525static struct file_system_type mqueue_fs_type = { 1575static struct file_system_type mqueue_fs_type = {
1526 .name = "mqueue", 1576 .name = "mqueue",
1527 .mount = mqueue_mount, 1577 .init_fs_context = mqueue_init_fs_context,
1528 .kill_sb = kill_litter_super, 1578 .kill_sb = kill_litter_super,
1529 .fs_flags = FS_USERNS_MOUNT, 1579 .fs_flags = FS_USERNS_MOUNT,
1530}; 1580};
1531 1581
1532int mq_init_ns(struct ipc_namespace *ns) 1582int mq_init_ns(struct ipc_namespace *ns)
1533{ 1583{
1584 struct vfsmount *m;
1585
1534 ns->mq_queues_count = 0; 1586 ns->mq_queues_count = 0;
1535 ns->mq_queues_max = DFLT_QUEUESMAX; 1587 ns->mq_queues_max = DFLT_QUEUESMAX;
1536 ns->mq_msg_max = DFLT_MSGMAX; 1588 ns->mq_msg_max = DFLT_MSGMAX;
@@ -1538,12 +1590,10 @@ int mq_init_ns(struct ipc_namespace *ns)
1538 ns->mq_msg_default = DFLT_MSG; 1590 ns->mq_msg_default = DFLT_MSG;
1539 ns->mq_msgsize_default = DFLT_MSGSIZE; 1591 ns->mq_msgsize_default = DFLT_MSGSIZE;
1540 1592
1541 ns->mq_mnt = kern_mount_data(&mqueue_fs_type, ns); 1593 m = mq_create_mount(ns);
1542 if (IS_ERR(ns->mq_mnt)) { 1594 if (IS_ERR(m))
1543 int err = PTR_ERR(ns->mq_mnt); 1595 return PTR_ERR(m);
1544 ns->mq_mnt = NULL; 1596 ns->mq_mnt = m;
1545 return err;
1546 }
1547 return 0; 1597 return 0;
1548} 1598}
1549 1599
diff --git a/ipc/namespace.c b/ipc/namespace.c
index 21607791d62c..b3ca1476ca51 100644
--- a/ipc/namespace.c
+++ b/ipc/namespace.c
@@ -42,7 +42,7 @@ static struct ipc_namespace *create_ipc_ns(struct user_namespace *user_ns,
42 goto fail; 42 goto fail;
43 43
44 err = -ENOMEM; 44 err = -ENOMEM;
45 ns = kmalloc(sizeof(struct ipc_namespace), GFP_KERNEL); 45 ns = kzalloc(sizeof(struct ipc_namespace), GFP_KERNEL);
46 if (ns == NULL) 46 if (ns == NULL)
47 goto fail_dec; 47 goto fail_dec;
48 48
diff --git a/kernel/cgroup/cgroup-internal.h b/kernel/cgroup/cgroup-internal.h
index c9a35f09e4b9..30e39f3932ad 100644
--- a/kernel/cgroup/cgroup-internal.h
+++ b/kernel/cgroup/cgroup-internal.h
@@ -7,6 +7,7 @@
7#include <linux/workqueue.h> 7#include <linux/workqueue.h>
8#include <linux/list.h> 8#include <linux/list.h>
9#include <linux/refcount.h> 9#include <linux/refcount.h>
10#include <linux/fs_context.h>
10 11
11#define TRACE_CGROUP_PATH_LEN 1024 12#define TRACE_CGROUP_PATH_LEN 1024
12extern spinlock_t trace_cgroup_path_lock; 13extern spinlock_t trace_cgroup_path_lock;
@@ -37,6 +38,31 @@ extern void __init enable_debug_cgroup(void);
37 } while (0) 38 } while (0)
38 39
39/* 40/*
41 * The cgroup filesystem superblock creation/mount context.
42 */
43struct cgroup_fs_context {
44 struct kernfs_fs_context kfc;
45 struct cgroup_root *root;
46 struct cgroup_namespace *ns;
47 unsigned int flags; /* CGRP_ROOT_* flags */
48
49 /* cgroup1 bits */
50 bool cpuset_clone_children;
51 bool none; /* User explicitly requested empty subsystem */
52 bool all_ss; /* Seen 'all' option */
53 u16 subsys_mask; /* Selected subsystems */
54 char *name; /* Hierarchy name */
55 char *release_agent; /* Path for release notifications */
56};
57
58static inline struct cgroup_fs_context *cgroup_fc2context(struct fs_context *fc)
59{
60 struct kernfs_fs_context *kfc = fc->fs_private;
61
62 return container_of(kfc, struct cgroup_fs_context, kfc);
63}
64
65/*
40 * A cgroup can be associated with multiple css_sets as different tasks may 66 * A cgroup can be associated with multiple css_sets as different tasks may
41 * belong to different cgroups on different hierarchies. In the other 67 * belong to different cgroups on different hierarchies. In the other
42 * direction, a css_set is naturally associated with multiple cgroups. 68 * direction, a css_set is naturally associated with multiple cgroups.
@@ -117,16 +143,6 @@ struct cgroup_mgctx {
117#define DEFINE_CGROUP_MGCTX(name) \ 143#define DEFINE_CGROUP_MGCTX(name) \
118 struct cgroup_mgctx name = CGROUP_MGCTX_INIT(name) 144 struct cgroup_mgctx name = CGROUP_MGCTX_INIT(name)
119 145
120struct cgroup_sb_opts {
121 u16 subsys_mask;
122 unsigned int flags;
123 char *release_agent;
124 bool cpuset_clone_children;
125 char *name;
126 /* User explicitly requested empty subsystem */
127 bool none;
128};
129
130extern struct mutex cgroup_mutex; 146extern struct mutex cgroup_mutex;
131extern spinlock_t css_set_lock; 147extern spinlock_t css_set_lock;
132extern struct cgroup_subsys *cgroup_subsys[]; 148extern struct cgroup_subsys *cgroup_subsys[];
@@ -197,12 +213,10 @@ int cgroup_path_ns_locked(struct cgroup *cgrp, char *buf, size_t buflen,
197 struct cgroup_namespace *ns); 213 struct cgroup_namespace *ns);
198 214
199void cgroup_free_root(struct cgroup_root *root); 215void cgroup_free_root(struct cgroup_root *root);
200void init_cgroup_root(struct cgroup_root *root, struct cgroup_sb_opts *opts); 216void init_cgroup_root(struct cgroup_fs_context *ctx);
201int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask); 217int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask);
202int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask); 218int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask);
203struct dentry *cgroup_do_mount(struct file_system_type *fs_type, int flags, 219int cgroup_do_get_tree(struct fs_context *fc);
204 struct cgroup_root *root, unsigned long magic,
205 struct cgroup_namespace *ns);
206 220
207int cgroup_migrate_vet_dst(struct cgroup *dst_cgrp); 221int cgroup_migrate_vet_dst(struct cgroup *dst_cgrp);
208void cgroup_migrate_finish(struct cgroup_mgctx *mgctx); 222void cgroup_migrate_finish(struct cgroup_mgctx *mgctx);
@@ -246,14 +260,15 @@ extern const struct proc_ns_operations cgroupns_operations;
246 */ 260 */
247extern struct cftype cgroup1_base_files[]; 261extern struct cftype cgroup1_base_files[];
248extern struct kernfs_syscall_ops cgroup1_kf_syscall_ops; 262extern struct kernfs_syscall_ops cgroup1_kf_syscall_ops;
263extern const struct fs_parameter_description cgroup1_fs_parameters;
249 264
250int proc_cgroupstats_show(struct seq_file *m, void *v); 265int proc_cgroupstats_show(struct seq_file *m, void *v);
251bool cgroup1_ssid_disabled(int ssid); 266bool cgroup1_ssid_disabled(int ssid);
252void cgroup1_pidlist_destroy_all(struct cgroup *cgrp); 267void cgroup1_pidlist_destroy_all(struct cgroup *cgrp);
253void cgroup1_release_agent(struct work_struct *work); 268void cgroup1_release_agent(struct work_struct *work);
254void cgroup1_check_for_release(struct cgroup *cgrp); 269void cgroup1_check_for_release(struct cgroup *cgrp);
255struct dentry *cgroup1_mount(struct file_system_type *fs_type, int flags, 270int cgroup1_parse_param(struct fs_context *fc, struct fs_parameter *param);
256 void *data, unsigned long magic, 271int cgroup1_get_tree(struct fs_context *fc);
257 struct cgroup_namespace *ns); 272int cgroup1_reconfigure(struct fs_context *ctx);
258 273
259#endif /* __CGROUP_INTERNAL_H */ 274#endif /* __CGROUP_INTERNAL_H */
diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c
index f94a7229974e..c126b34fd4ff 100644
--- a/kernel/cgroup/cgroup-v1.c
+++ b/kernel/cgroup/cgroup-v1.c
@@ -13,9 +13,12 @@
13#include <linux/delayacct.h> 13#include <linux/delayacct.h>
14#include <linux/pid_namespace.h> 14#include <linux/pid_namespace.h>
15#include <linux/cgroupstats.h> 15#include <linux/cgroupstats.h>
16#include <linux/fs_parser.h>
16 17
17#include <trace/events/cgroup.h> 18#include <trace/events/cgroup.h>
18 19
20#define cg_invalf(fc, fmt, ...) invalf(fc, fmt, ## __VA_ARGS__)
21
19/* 22/*
20 * pidlists linger the following amount before being destroyed. The goal 23 * pidlists linger the following amount before being destroyed. The goal
21 * is avoiding frequent destruction in the middle of consecutive read calls 24 * is avoiding frequent destruction in the middle of consecutive read calls
@@ -906,172 +909,195 @@ static int cgroup1_show_options(struct seq_file *seq, struct kernfs_root *kf_roo
906 return 0; 909 return 0;
907} 910}
908 911
909static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts) 912enum cgroup1_param {
910{ 913 Opt_all,
911 char *token, *o = data; 914 Opt_clone_children,
912 bool all_ss = false, one_ss = false; 915 Opt_cpuset_v2_mode,
913 u16 mask = U16_MAX; 916 Opt_name,
914 struct cgroup_subsys *ss; 917 Opt_none,
915 int nr_opts = 0; 918 Opt_noprefix,
916 int i; 919 Opt_release_agent,
917 920 Opt_xattr,
918#ifdef CONFIG_CPUSETS 921};
919 mask = ~((u16)1 << cpuset_cgrp_id);
920#endif
921
922 memset(opts, 0, sizeof(*opts));
923 922
924 while ((token = strsep(&o, ",")) != NULL) { 923static const struct fs_parameter_spec cgroup1_param_specs[] = {
925 nr_opts++; 924 fsparam_flag ("all", Opt_all),
925 fsparam_flag ("clone_children", Opt_clone_children),
926 fsparam_flag ("cpuset_v2_mode", Opt_cpuset_v2_mode),
927 fsparam_string("name", Opt_name),
928 fsparam_flag ("none", Opt_none),
929 fsparam_flag ("noprefix", Opt_noprefix),
930 fsparam_string("release_agent", Opt_release_agent),
931 fsparam_flag ("xattr", Opt_xattr),
932 {}
933};
926 934
927 if (!*token) 935const struct fs_parameter_description cgroup1_fs_parameters = {
928 return -EINVAL; 936 .name = "cgroup1",
929 if (!strcmp(token, "none")) { 937 .specs = cgroup1_param_specs,
930 /* Explicitly have no subsystems */ 938};
931 opts->none = true;
932 continue;
933 }
934 if (!strcmp(token, "all")) {
935 /* Mutually exclusive option 'all' + subsystem name */
936 if (one_ss)
937 return -EINVAL;
938 all_ss = true;
939 continue;
940 }
941 if (!strcmp(token, "noprefix")) {
942 opts->flags |= CGRP_ROOT_NOPREFIX;
943 continue;
944 }
945 if (!strcmp(token, "clone_children")) {
946 opts->cpuset_clone_children = true;
947 continue;
948 }
949 if (!strcmp(token, "cpuset_v2_mode")) {
950 opts->flags |= CGRP_ROOT_CPUSET_V2_MODE;
951 continue;
952 }
953 if (!strcmp(token, "xattr")) {
954 opts->flags |= CGRP_ROOT_XATTR;
955 continue;
956 }
957 if (!strncmp(token, "release_agent=", 14)) {
958 /* Specifying two release agents is forbidden */
959 if (opts->release_agent)
960 return -EINVAL;
961 opts->release_agent =
962 kstrndup(token + 14, PATH_MAX - 1, GFP_KERNEL);
963 if (!opts->release_agent)
964 return -ENOMEM;
965 continue;
966 }
967 if (!strncmp(token, "name=", 5)) {
968 const char *name = token + 5;
969
970 /* blocked by boot param? */
971 if (cgroup_no_v1_named)
972 return -ENOENT;
973 /* Can't specify an empty name */
974 if (!strlen(name))
975 return -EINVAL;
976 /* Must match [\w.-]+ */
977 for (i = 0; i < strlen(name); i++) {
978 char c = name[i];
979 if (isalnum(c))
980 continue;
981 if ((c == '.') || (c == '-') || (c == '_'))
982 continue;
983 return -EINVAL;
984 }
985 /* Specifying two names is forbidden */
986 if (opts->name)
987 return -EINVAL;
988 opts->name = kstrndup(name,
989 MAX_CGROUP_ROOT_NAMELEN - 1,
990 GFP_KERNEL);
991 if (!opts->name)
992 return -ENOMEM;
993 939
994 continue; 940int cgroup1_parse_param(struct fs_context *fc, struct fs_parameter *param)
941{
942 struct cgroup_fs_context *ctx = cgroup_fc2context(fc);
943 struct cgroup_subsys *ss;
944 struct fs_parse_result result;
945 int opt, i;
946
947 opt = fs_parse(fc, &cgroup1_fs_parameters, param, &result);
948 if (opt == -ENOPARAM) {
949 if (strcmp(param->key, "source") == 0) {
950 fc->source = param->string;
951 param->string = NULL;
952 return 0;
995 } 953 }
996
997 for_each_subsys(ss, i) { 954 for_each_subsys(ss, i) {
998 if (strcmp(token, ss->legacy_name)) 955 if (strcmp(param->key, ss->legacy_name))
999 continue; 956 continue;
1000 if (!cgroup_ssid_enabled(i)) 957 ctx->subsys_mask |= (1 << i);
958 return 0;
959 }
960 return cg_invalf(fc, "cgroup1: Unknown subsys name '%s'", param->key);
961 }
962 if (opt < 0)
963 return opt;
964
965 switch (opt) {
966 case Opt_none:
967 /* Explicitly have no subsystems */
968 ctx->none = true;
969 break;
970 case Opt_all:
971 ctx->all_ss = true;
972 break;
973 case Opt_noprefix:
974 ctx->flags |= CGRP_ROOT_NOPREFIX;
975 break;
976 case Opt_clone_children:
977 ctx->cpuset_clone_children = true;
978 break;
979 case Opt_cpuset_v2_mode:
980 ctx->flags |= CGRP_ROOT_CPUSET_V2_MODE;
981 break;
982 case Opt_xattr:
983 ctx->flags |= CGRP_ROOT_XATTR;
984 break;
985 case Opt_release_agent:
986 /* Specifying two release agents is forbidden */
987 if (ctx->release_agent)
988 return cg_invalf(fc, "cgroup1: release_agent respecified");
989 ctx->release_agent = param->string;
990 param->string = NULL;
991 break;
992 case Opt_name:
993 /* blocked by boot param? */
994 if (cgroup_no_v1_named)
995 return -ENOENT;
996 /* Can't specify an empty name */
997 if (!param->size)
998 return cg_invalf(fc, "cgroup1: Empty name");
999 if (param->size > MAX_CGROUP_ROOT_NAMELEN - 1)
1000 return cg_invalf(fc, "cgroup1: Name too long");
1001 /* Must match [\w.-]+ */
1002 for (i = 0; i < param->size; i++) {
1003 char c = param->string[i];
1004 if (isalnum(c))
1001 continue; 1005 continue;
1002 if (cgroup1_ssid_disabled(i)) 1006 if ((c == '.') || (c == '-') || (c == '_'))
1003 continue; 1007 continue;
1004 1008 return cg_invalf(fc, "cgroup1: Invalid name");
1005 /* Mutually exclusive option 'all' + subsystem name */
1006 if (all_ss)
1007 return -EINVAL;
1008 opts->subsys_mask |= (1 << i);
1009 one_ss = true;
1010
1011 break;
1012 } 1009 }
1013 if (i == CGROUP_SUBSYS_COUNT) 1010 /* Specifying two names is forbidden */
1014 return -ENOENT; 1011 if (ctx->name)
1012 return cg_invalf(fc, "cgroup1: name respecified");
1013 ctx->name = param->string;
1014 param->string = NULL;
1015 break;
1015 } 1016 }
1017 return 0;
1018}
1019
1020static int check_cgroupfs_options(struct fs_context *fc)
1021{
1022 struct cgroup_fs_context *ctx = cgroup_fc2context(fc);
1023 u16 mask = U16_MAX;
1024 u16 enabled = 0;
1025 struct cgroup_subsys *ss;
1026 int i;
1027
1028#ifdef CONFIG_CPUSETS
1029 mask = ~((u16)1 << cpuset_cgrp_id);
1030#endif
1031 for_each_subsys(ss, i)
1032 if (cgroup_ssid_enabled(i) && !cgroup1_ssid_disabled(i))
1033 enabled |= 1 << i;
1034
1035 ctx->subsys_mask &= enabled;
1016 1036
1017 /* 1037 /*
1018 * If the 'all' option was specified select all the subsystems, 1038 * In absense of 'none', 'name=' or subsystem name options,
1019 * otherwise if 'none', 'name=' and a subsystem name options were 1039 * let's default to 'all'.
1020 * not specified, let's default to 'all'
1021 */ 1040 */
1022 if (all_ss || (!one_ss && !opts->none && !opts->name)) 1041 if (!ctx->subsys_mask && !ctx->none && !ctx->name)
1023 for_each_subsys(ss, i) 1042 ctx->all_ss = true;
1024 if (cgroup_ssid_enabled(i) && !cgroup1_ssid_disabled(i)) 1043
1025 opts->subsys_mask |= (1 << i); 1044 if (ctx->all_ss) {
1045 /* Mutually exclusive option 'all' + subsystem name */
1046 if (ctx->subsys_mask)
1047 return cg_invalf(fc, "cgroup1: subsys name conflicts with all");
1048 /* 'all' => select all the subsystems */
1049 ctx->subsys_mask = enabled;
1050 }
1026 1051
1027 /* 1052 /*
1028 * We either have to specify by name or by subsystems. (So all 1053 * We either have to specify by name or by subsystems. (So all
1029 * empty hierarchies must have a name). 1054 * empty hierarchies must have a name).
1030 */ 1055 */
1031 if (!opts->subsys_mask && !opts->name) 1056 if (!ctx->subsys_mask && !ctx->name)
1032 return -EINVAL; 1057 return cg_invalf(fc, "cgroup1: Need name or subsystem set");
1033 1058
1034 /* 1059 /*
1035 * Option noprefix was introduced just for backward compatibility 1060 * Option noprefix was introduced just for backward compatibility
1036 * with the old cpuset, so we allow noprefix only if mounting just 1061 * with the old cpuset, so we allow noprefix only if mounting just
1037 * the cpuset subsystem. 1062 * the cpuset subsystem.
1038 */ 1063 */
1039 if ((opts->flags & CGRP_ROOT_NOPREFIX) && (opts->subsys_mask & mask)) 1064 if ((ctx->flags & CGRP_ROOT_NOPREFIX) && (ctx->subsys_mask & mask))
1040 return -EINVAL; 1065 return cg_invalf(fc, "cgroup1: noprefix used incorrectly");
1041 1066
1042 /* Can't specify "none" and some subsystems */ 1067 /* Can't specify "none" and some subsystems */
1043 if (opts->subsys_mask && opts->none) 1068 if (ctx->subsys_mask && ctx->none)
1044 return -EINVAL; 1069 return cg_invalf(fc, "cgroup1: none used incorrectly");
1045 1070
1046 return 0; 1071 return 0;
1047} 1072}
1048 1073
1049static int cgroup1_remount(struct kernfs_root *kf_root, int *flags, char *data) 1074int cgroup1_reconfigure(struct fs_context *fc)
1050{ 1075{
1051 int ret = 0; 1076 struct cgroup_fs_context *ctx = cgroup_fc2context(fc);
1077 struct kernfs_root *kf_root = kernfs_root_from_sb(fc->root->d_sb);
1052 struct cgroup_root *root = cgroup_root_from_kf(kf_root); 1078 struct cgroup_root *root = cgroup_root_from_kf(kf_root);
1053 struct cgroup_sb_opts opts; 1079 int ret = 0;
1054 u16 added_mask, removed_mask; 1080 u16 added_mask, removed_mask;
1055 1081
1056 cgroup_lock_and_drain_offline(&cgrp_dfl_root.cgrp); 1082 cgroup_lock_and_drain_offline(&cgrp_dfl_root.cgrp);
1057 1083
1058 /* See what subsystems are wanted */ 1084 /* See what subsystems are wanted */
1059 ret = parse_cgroupfs_options(data, &opts); 1085 ret = check_cgroupfs_options(fc);
1060 if (ret) 1086 if (ret)
1061 goto out_unlock; 1087 goto out_unlock;
1062 1088
1063 if (opts.subsys_mask != root->subsys_mask || opts.release_agent) 1089 if (ctx->subsys_mask != root->subsys_mask || ctx->release_agent)
1064 pr_warn("option changes via remount are deprecated (pid=%d comm=%s)\n", 1090 pr_warn("option changes via remount are deprecated (pid=%d comm=%s)\n",
1065 task_tgid_nr(current), current->comm); 1091 task_tgid_nr(current), current->comm);
1066 1092
1067 added_mask = opts.subsys_mask & ~root->subsys_mask; 1093 added_mask = ctx->subsys_mask & ~root->subsys_mask;
1068 removed_mask = root->subsys_mask & ~opts.subsys_mask; 1094 removed_mask = root->subsys_mask & ~ctx->subsys_mask;
1069 1095
1070 /* Don't allow flags or name to change at remount */ 1096 /* Don't allow flags or name to change at remount */
1071 if ((opts.flags ^ root->flags) || 1097 if ((ctx->flags ^ root->flags) ||
1072 (opts.name && strcmp(opts.name, root->name))) { 1098 (ctx->name && strcmp(ctx->name, root->name))) {
1073 pr_err("option or name mismatch, new: 0x%x \"%s\", old: 0x%x \"%s\"\n", 1099 cg_invalf(fc, "option or name mismatch, new: 0x%x \"%s\", old: 0x%x \"%s\"",
1074 opts.flags, opts.name ?: "", root->flags, root->name); 1100 ctx->flags, ctx->name ?: "", root->flags, root->name);
1075 ret = -EINVAL; 1101 ret = -EINVAL;
1076 goto out_unlock; 1102 goto out_unlock;
1077 } 1103 }
@@ -1088,17 +1114,15 @@ static int cgroup1_remount(struct kernfs_root *kf_root, int *flags, char *data)
1088 1114
1089 WARN_ON(rebind_subsystems(&cgrp_dfl_root, removed_mask)); 1115 WARN_ON(rebind_subsystems(&cgrp_dfl_root, removed_mask));
1090 1116
1091 if (opts.release_agent) { 1117 if (ctx->release_agent) {
1092 spin_lock(&release_agent_path_lock); 1118 spin_lock(&release_agent_path_lock);
1093 strcpy(root->release_agent_path, opts.release_agent); 1119 strcpy(root->release_agent_path, ctx->release_agent);
1094 spin_unlock(&release_agent_path_lock); 1120 spin_unlock(&release_agent_path_lock);
1095 } 1121 }
1096 1122
1097 trace_cgroup_remount(root); 1123 trace_cgroup_remount(root);
1098 1124
1099 out_unlock: 1125 out_unlock:
1100 kfree(opts.release_agent);
1101 kfree(opts.name);
1102 mutex_unlock(&cgroup_mutex); 1126 mutex_unlock(&cgroup_mutex);
1103 return ret; 1127 return ret;
1104} 1128}
@@ -1106,28 +1130,30 @@ static int cgroup1_remount(struct kernfs_root *kf_root, int *flags, char *data)
1106struct kernfs_syscall_ops cgroup1_kf_syscall_ops = { 1130struct kernfs_syscall_ops cgroup1_kf_syscall_ops = {
1107 .rename = cgroup1_rename, 1131 .rename = cgroup1_rename,
1108 .show_options = cgroup1_show_options, 1132 .show_options = cgroup1_show_options,
1109 .remount_fs = cgroup1_remount,
1110 .mkdir = cgroup_mkdir, 1133 .mkdir = cgroup_mkdir,
1111 .rmdir = cgroup_rmdir, 1134 .rmdir = cgroup_rmdir,
1112 .show_path = cgroup_show_path, 1135 .show_path = cgroup_show_path,
1113}; 1136};
1114 1137
1115struct dentry *cgroup1_mount(struct file_system_type *fs_type, int flags, 1138/*
1116 void *data, unsigned long magic, 1139 * The guts of cgroup1 mount - find or create cgroup_root to use.
1117 struct cgroup_namespace *ns) 1140 * Called with cgroup_mutex held; returns 0 on success, -E... on
1141 * error and positive - in case when the candidate is busy dying.
1142 * On success it stashes a reference to cgroup_root into given
1143 * cgroup_fs_context; that reference is *NOT* counting towards the
1144 * cgroup_root refcount.
1145 */
1146static int cgroup1_root_to_use(struct fs_context *fc)
1118{ 1147{
1119 struct cgroup_sb_opts opts; 1148 struct cgroup_fs_context *ctx = cgroup_fc2context(fc);
1120 struct cgroup_root *root; 1149 struct cgroup_root *root;
1121 struct cgroup_subsys *ss; 1150 struct cgroup_subsys *ss;
1122 struct dentry *dentry;
1123 int i, ret; 1151 int i, ret;
1124 1152
1125 cgroup_lock_and_drain_offline(&cgrp_dfl_root.cgrp);
1126
1127 /* First find the desired set of subsystems */ 1153 /* First find the desired set of subsystems */
1128 ret = parse_cgroupfs_options(data, &opts); 1154 ret = check_cgroupfs_options(fc);
1129 if (ret) 1155 if (ret)
1130 goto out_unlock; 1156 return ret;
1131 1157
1132 /* 1158 /*
1133 * Destruction of cgroup root is asynchronous, so subsystems may 1159 * Destruction of cgroup root is asynchronous, so subsystems may
@@ -1137,16 +1163,12 @@ struct dentry *cgroup1_mount(struct file_system_type *fs_type, int flags,
1137 * starting. Testing ref liveliness is good enough. 1163 * starting. Testing ref liveliness is good enough.
1138 */ 1164 */
1139 for_each_subsys(ss, i) { 1165 for_each_subsys(ss, i) {
1140 if (!(opts.subsys_mask & (1 << i)) || 1166 if (!(ctx->subsys_mask & (1 << i)) ||
1141 ss->root == &cgrp_dfl_root) 1167 ss->root == &cgrp_dfl_root)
1142 continue; 1168 continue;
1143 1169
1144 if (!percpu_ref_tryget_live(&ss->root->cgrp.self.refcnt)) { 1170 if (!percpu_ref_tryget_live(&ss->root->cgrp.self.refcnt))
1145 mutex_unlock(&cgroup_mutex); 1171 return 1; /* restart */
1146 msleep(10);
1147 ret = restart_syscall();
1148 goto out_free;
1149 }
1150 cgroup_put(&ss->root->cgrp); 1172 cgroup_put(&ss->root->cgrp);
1151 } 1173 }
1152 1174
@@ -1161,8 +1183,8 @@ struct dentry *cgroup1_mount(struct file_system_type *fs_type, int flags,
1161 * name matches but sybsys_mask doesn't, we should fail. 1183 * name matches but sybsys_mask doesn't, we should fail.
1162 * Remember whether name matched. 1184 * Remember whether name matched.
1163 */ 1185 */
1164 if (opts.name) { 1186 if (ctx->name) {
1165 if (strcmp(opts.name, root->name)) 1187 if (strcmp(ctx->name, root->name))
1166 continue; 1188 continue;
1167 name_match = true; 1189 name_match = true;
1168 } 1190 }
@@ -1171,19 +1193,18 @@ struct dentry *cgroup1_mount(struct file_system_type *fs_type, int flags,
1171 * If we asked for subsystems (or explicitly for no 1193 * If we asked for subsystems (or explicitly for no
1172 * subsystems) then they must match. 1194 * subsystems) then they must match.
1173 */ 1195 */
1174 if ((opts.subsys_mask || opts.none) && 1196 if ((ctx->subsys_mask || ctx->none) &&
1175 (opts.subsys_mask != root->subsys_mask)) { 1197 (ctx->subsys_mask != root->subsys_mask)) {
1176 if (!name_match) 1198 if (!name_match)
1177 continue; 1199 continue;
1178 ret = -EBUSY; 1200 return -EBUSY;
1179 goto out_unlock;
1180 } 1201 }
1181 1202
1182 if (root->flags ^ opts.flags) 1203 if (root->flags ^ ctx->flags)
1183 pr_warn("new mount options do not match the existing superblock, will be ignored\n"); 1204 pr_warn("new mount options do not match the existing superblock, will be ignored\n");
1184 1205
1185 ret = 0; 1206 ctx->root = root;
1186 goto out_unlock; 1207 return 0;
1187 } 1208 }
1188 1209
1189 /* 1210 /*
@@ -1191,55 +1212,58 @@ struct dentry *cgroup1_mount(struct file_system_type *fs_type, int flags,
1191 * specification is allowed for already existing hierarchies but we 1212 * specification is allowed for already existing hierarchies but we
1192 * can't create new one without subsys specification. 1213 * can't create new one without subsys specification.
1193 */ 1214 */
1194 if (!opts.subsys_mask && !opts.none) { 1215 if (!ctx->subsys_mask && !ctx->none)
1195 ret = -EINVAL; 1216 return cg_invalf(fc, "cgroup1: No subsys list or none specified");
1196 goto out_unlock;
1197 }
1198 1217
1199 /* Hierarchies may only be created in the initial cgroup namespace. */ 1218 /* Hierarchies may only be created in the initial cgroup namespace. */
1200 if (ns != &init_cgroup_ns) { 1219 if (ctx->ns != &init_cgroup_ns)
1201 ret = -EPERM; 1220 return -EPERM;
1202 goto out_unlock;
1203 }
1204 1221
1205 root = kzalloc(sizeof(*root), GFP_KERNEL); 1222 root = kzalloc(sizeof(*root), GFP_KERNEL);
1206 if (!root) { 1223 if (!root)
1207 ret = -ENOMEM; 1224 return -ENOMEM;
1208 goto out_unlock;
1209 }
1210 1225
1211 init_cgroup_root(root, &opts); 1226 ctx->root = root;
1227 init_cgroup_root(ctx);
1212 1228
1213 ret = cgroup_setup_root(root, opts.subsys_mask); 1229 ret = cgroup_setup_root(root, ctx->subsys_mask);
1214 if (ret) 1230 if (ret)
1215 cgroup_free_root(root); 1231 cgroup_free_root(root);
1232 return ret;
1233}
1216 1234
1217out_unlock: 1235int cgroup1_get_tree(struct fs_context *fc)
1218 if (!ret && !percpu_ref_tryget_live(&root->cgrp.self.refcnt)) { 1236{
1219 mutex_unlock(&cgroup_mutex); 1237 struct cgroup_fs_context *ctx = cgroup_fc2context(fc);
1220 msleep(10); 1238 int ret;
1221 ret = restart_syscall();
1222 goto out_free;
1223 }
1224 mutex_unlock(&cgroup_mutex);
1225out_free:
1226 kfree(opts.release_agent);
1227 kfree(opts.name);
1228 1239
1229 if (ret) 1240 /* Check if the caller has permission to mount. */
1230 return ERR_PTR(ret); 1241 if (!ns_capable(ctx->ns->user_ns, CAP_SYS_ADMIN))
1242 return -EPERM;
1243
1244 cgroup_lock_and_drain_offline(&cgrp_dfl_root.cgrp);
1245
1246 ret = cgroup1_root_to_use(fc);
1247 if (!ret && !percpu_ref_tryget_live(&ctx->root->cgrp.self.refcnt))
1248 ret = 1; /* restart */
1231 1249
1232 dentry = cgroup_do_mount(&cgroup_fs_type, flags, root, 1250 mutex_unlock(&cgroup_mutex);
1233 CGROUP_SUPER_MAGIC, ns);
1234 1251
1235 if (!IS_ERR(dentry) && percpu_ref_is_dying(&root->cgrp.self.refcnt)) { 1252 if (!ret)
1236 struct super_block *sb = dentry->d_sb; 1253 ret = cgroup_do_get_tree(fc);
1237 dput(dentry); 1254
1255 if (!ret && percpu_ref_is_dying(&ctx->root->cgrp.self.refcnt)) {
1256 struct super_block *sb = fc->root->d_sb;
1257 dput(fc->root);
1238 deactivate_locked_super(sb); 1258 deactivate_locked_super(sb);
1259 ret = 1;
1260 }
1261
1262 if (unlikely(ret > 0)) {
1239 msleep(10); 1263 msleep(10);
1240 dentry = ERR_PTR(restart_syscall()); 1264 return restart_syscall();
1241 } 1265 }
1242 return dentry; 1266 return ret;
1243} 1267}
1244 1268
1245static int __init cgroup1_wq_init(void) 1269static int __init cgroup1_wq_init(void)
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index eef24a25bda7..3f2b4bde0f9c 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -54,6 +54,7 @@
54#include <linux/proc_ns.h> 54#include <linux/proc_ns.h>
55#include <linux/nsproxy.h> 55#include <linux/nsproxy.h>
56#include <linux/file.h> 56#include <linux/file.h>
57#include <linux/fs_parser.h>
57#include <linux/sched/cputime.h> 58#include <linux/sched/cputime.h>
58#include <linux/psi.h> 59#include <linux/psi.h>
59#include <net/sock.h> 60#include <net/sock.h>
@@ -1772,26 +1773,37 @@ int cgroup_show_path(struct seq_file *sf, struct kernfs_node *kf_node,
1772 return len; 1773 return len;
1773} 1774}
1774 1775
1775static int parse_cgroup_root_flags(char *data, unsigned int *root_flags) 1776enum cgroup2_param {
1776{ 1777 Opt_nsdelegate,
1777 char *token; 1778 nr__cgroup2_params
1779};
1778 1780
1779 *root_flags = 0; 1781static const struct fs_parameter_spec cgroup2_param_specs[] = {
1782 fsparam_flag ("nsdelegate", Opt_nsdelegate),
1783 {}
1784};
1780 1785
1781 if (!data || *data == '\0') 1786static const struct fs_parameter_description cgroup2_fs_parameters = {
1782 return 0; 1787 .name = "cgroup2",
1788 .specs = cgroup2_param_specs,
1789};
1783 1790
1784 while ((token = strsep(&data, ",")) != NULL) { 1791static int cgroup2_parse_param(struct fs_context *fc, struct fs_parameter *param)
1785 if (!strcmp(token, "nsdelegate")) { 1792{
1786 *root_flags |= CGRP_ROOT_NS_DELEGATE; 1793 struct cgroup_fs_context *ctx = cgroup_fc2context(fc);
1787 continue; 1794 struct fs_parse_result result;
1788 } 1795 int opt;
1789 1796
1790 pr_err("cgroup2: unknown option \"%s\"\n", token); 1797 opt = fs_parse(fc, &cgroup2_fs_parameters, param, &result);
1791 return -EINVAL; 1798 if (opt < 0)
1792 } 1799 return opt;
1793 1800
1794 return 0; 1801 switch (opt) {
1802 case Opt_nsdelegate:
1803 ctx->flags |= CGRP_ROOT_NS_DELEGATE;
1804 return 0;
1805 }
1806 return -EINVAL;
1795} 1807}
1796 1808
1797static void apply_cgroup_root_flags(unsigned int root_flags) 1809static void apply_cgroup_root_flags(unsigned int root_flags)
@@ -1811,16 +1823,11 @@ static int cgroup_show_options(struct seq_file *seq, struct kernfs_root *kf_root
1811 return 0; 1823 return 0;
1812} 1824}
1813 1825
1814static int cgroup_remount(struct kernfs_root *kf_root, int *flags, char *data) 1826static int cgroup_reconfigure(struct fs_context *fc)
1815{ 1827{
1816 unsigned int root_flags; 1828 struct cgroup_fs_context *ctx = cgroup_fc2context(fc);
1817 int ret;
1818
1819 ret = parse_cgroup_root_flags(data, &root_flags);
1820 if (ret)
1821 return ret;
1822 1829
1823 apply_cgroup_root_flags(root_flags); 1830 apply_cgroup_root_flags(ctx->flags);
1824 return 0; 1831 return 0;
1825} 1832}
1826 1833
@@ -1908,8 +1915,9 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp)
1908 INIT_WORK(&cgrp->release_agent_work, cgroup1_release_agent); 1915 INIT_WORK(&cgrp->release_agent_work, cgroup1_release_agent);
1909} 1916}
1910 1917
1911void init_cgroup_root(struct cgroup_root *root, struct cgroup_sb_opts *opts) 1918void init_cgroup_root(struct cgroup_fs_context *ctx)
1912{ 1919{
1920 struct cgroup_root *root = ctx->root;
1913 struct cgroup *cgrp = &root->cgrp; 1921 struct cgroup *cgrp = &root->cgrp;
1914 1922
1915 INIT_LIST_HEAD(&root->root_list); 1923 INIT_LIST_HEAD(&root->root_list);
@@ -1918,12 +1926,12 @@ void init_cgroup_root(struct cgroup_root *root, struct cgroup_sb_opts *opts)
1918 init_cgroup_housekeeping(cgrp); 1926 init_cgroup_housekeeping(cgrp);
1919 idr_init(&root->cgroup_idr); 1927 idr_init(&root->cgroup_idr);
1920 1928
1921 root->flags = opts->flags; 1929 root->flags = ctx->flags;
1922 if (opts->release_agent) 1930 if (ctx->release_agent)
1923 strscpy(root->release_agent_path, opts->release_agent, PATH_MAX); 1931 strscpy(root->release_agent_path, ctx->release_agent, PATH_MAX);
1924 if (opts->name) 1932 if (ctx->name)
1925 strscpy(root->name, opts->name, MAX_CGROUP_ROOT_NAMELEN); 1933 strscpy(root->name, ctx->name, MAX_CGROUP_ROOT_NAMELEN);
1926 if (opts->cpuset_clone_children) 1934 if (ctx->cpuset_clone_children)
1927 set_bit(CGRP_CPUSET_CLONE_CHILDREN, &root->cgrp.flags); 1935 set_bit(CGRP_CPUSET_CLONE_CHILDREN, &root->cgrp.flags);
1928} 1936}
1929 1937
@@ -2028,60 +2036,104 @@ out:
2028 return ret; 2036 return ret;
2029} 2037}
2030 2038
2031struct dentry *cgroup_do_mount(struct file_system_type *fs_type, int flags, 2039int cgroup_do_get_tree(struct fs_context *fc)
2032 struct cgroup_root *root, unsigned long magic,
2033 struct cgroup_namespace *ns)
2034{ 2040{
2035 struct dentry *dentry; 2041 struct cgroup_fs_context *ctx = cgroup_fc2context(fc);
2036 bool new_sb = false; 2042 int ret;
2037 2043
2038 dentry = kernfs_mount(fs_type, flags, root->kf_root, magic, &new_sb); 2044 ctx->kfc.root = ctx->root->kf_root;
2045 if (fc->fs_type == &cgroup2_fs_type)
2046 ctx->kfc.magic = CGROUP2_SUPER_MAGIC;
2047 else
2048 ctx->kfc.magic = CGROUP_SUPER_MAGIC;
2049 ret = kernfs_get_tree(fc);
2039 2050
2040 /* 2051 /*
2041 * In non-init cgroup namespace, instead of root cgroup's dentry, 2052 * In non-init cgroup namespace, instead of root cgroup's dentry,
2042 * we return the dentry corresponding to the cgroupns->root_cgrp. 2053 * we return the dentry corresponding to the cgroupns->root_cgrp.
2043 */ 2054 */
2044 if (!IS_ERR(dentry) && ns != &init_cgroup_ns) { 2055 if (!ret && ctx->ns != &init_cgroup_ns) {
2045 struct dentry *nsdentry; 2056 struct dentry *nsdentry;
2046 struct super_block *sb = dentry->d_sb; 2057 struct super_block *sb = fc->root->d_sb;
2047 struct cgroup *cgrp; 2058 struct cgroup *cgrp;
2048 2059
2049 mutex_lock(&cgroup_mutex); 2060 mutex_lock(&cgroup_mutex);
2050 spin_lock_irq(&css_set_lock); 2061 spin_lock_irq(&css_set_lock);
2051 2062
2052 cgrp = cset_cgroup_from_root(ns->root_cset, root); 2063 cgrp = cset_cgroup_from_root(ctx->ns->root_cset, ctx->root);
2053 2064
2054 spin_unlock_irq(&css_set_lock); 2065 spin_unlock_irq(&css_set_lock);
2055 mutex_unlock(&cgroup_mutex); 2066 mutex_unlock(&cgroup_mutex);
2056 2067
2057 nsdentry = kernfs_node_dentry(cgrp->kn, sb); 2068 nsdentry = kernfs_node_dentry(cgrp->kn, sb);
2058 dput(dentry); 2069 dput(fc->root);
2059 if (IS_ERR(nsdentry)) 2070 fc->root = nsdentry;
2071 if (IS_ERR(nsdentry)) {
2072 ret = PTR_ERR(nsdentry);
2060 deactivate_locked_super(sb); 2073 deactivate_locked_super(sb);
2061 dentry = nsdentry; 2074 }
2062 } 2075 }
2063 2076
2064 if (!new_sb) 2077 if (!ctx->kfc.new_sb_created)
2065 cgroup_put(&root->cgrp); 2078 cgroup_put(&ctx->root->cgrp);
2066 2079
2067 return dentry; 2080 return ret;
2068} 2081}
2069 2082
2070static struct dentry *cgroup_mount(struct file_system_type *fs_type, 2083/*
2071 int flags, const char *unused_dev_name, 2084 * Destroy a cgroup filesystem context.
2072 void *data) 2085 */
2086static void cgroup_fs_context_free(struct fs_context *fc)
2073{ 2087{
2074 struct cgroup_namespace *ns = current->nsproxy->cgroup_ns; 2088 struct cgroup_fs_context *ctx = cgroup_fc2context(fc);
2075 struct dentry *dentry; 2089
2090 kfree(ctx->name);
2091 kfree(ctx->release_agent);
2092 put_cgroup_ns(ctx->ns);
2093 kernfs_free_fs_context(fc);
2094 kfree(ctx);
2095}
2096
2097static int cgroup_get_tree(struct fs_context *fc)
2098{
2099 struct cgroup_fs_context *ctx = cgroup_fc2context(fc);
2076 int ret; 2100 int ret;
2077 2101
2078 get_cgroup_ns(ns); 2102 cgrp_dfl_visible = true;
2103 cgroup_get_live(&cgrp_dfl_root.cgrp);
2104 ctx->root = &cgrp_dfl_root;
2079 2105
2080 /* Check if the caller has permission to mount. */ 2106 ret = cgroup_do_get_tree(fc);
2081 if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN)) { 2107 if (!ret)
2082 put_cgroup_ns(ns); 2108 apply_cgroup_root_flags(ctx->flags);
2083 return ERR_PTR(-EPERM); 2109 return ret;
2084 } 2110}
2111
2112static const struct fs_context_operations cgroup_fs_context_ops = {
2113 .free = cgroup_fs_context_free,
2114 .parse_param = cgroup2_parse_param,
2115 .get_tree = cgroup_get_tree,
2116 .reconfigure = cgroup_reconfigure,
2117};
2118
2119static const struct fs_context_operations cgroup1_fs_context_ops = {
2120 .free = cgroup_fs_context_free,
2121 .parse_param = cgroup1_parse_param,
2122 .get_tree = cgroup1_get_tree,
2123 .reconfigure = cgroup1_reconfigure,
2124};
2125
2126/*
2127 * Initialise the cgroup filesystem creation/reconfiguration context. Notably,
2128 * we select the namespace we're going to use.
2129 */
2130static int cgroup_init_fs_context(struct fs_context *fc)
2131{
2132 struct cgroup_fs_context *ctx;
2133
2134 ctx = kzalloc(sizeof(struct cgroup_fs_context), GFP_KERNEL);
2135 if (!ctx)
2136 return -ENOMEM;
2085 2137
2086 /* 2138 /*
2087 * The first time anyone tries to mount a cgroup, enable the list 2139 * The first time anyone tries to mount a cgroup, enable the list
@@ -2090,29 +2142,18 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
2090 if (!use_task_css_set_links) 2142 if (!use_task_css_set_links)
2091 cgroup_enable_task_cg_lists(); 2143 cgroup_enable_task_cg_lists();
2092 2144
2093 if (fs_type == &cgroup2_fs_type) { 2145 ctx->ns = current->nsproxy->cgroup_ns;
2094 unsigned int root_flags; 2146 get_cgroup_ns(ctx->ns);
2095 2147 fc->fs_private = &ctx->kfc;
2096 ret = parse_cgroup_root_flags(data, &root_flags); 2148 if (fc->fs_type == &cgroup2_fs_type)
2097 if (ret) { 2149 fc->ops = &cgroup_fs_context_ops;
2098 put_cgroup_ns(ns); 2150 else
2099 return ERR_PTR(ret); 2151 fc->ops = &cgroup1_fs_context_ops;
2100 } 2152 if (fc->user_ns)
2101 2153 put_user_ns(fc->user_ns);
2102 cgrp_dfl_visible = true; 2154 fc->user_ns = get_user_ns(ctx->ns->user_ns);
2103 cgroup_get_live(&cgrp_dfl_root.cgrp); 2155 fc->global = true;
2104 2156 return 0;
2105 dentry = cgroup_do_mount(&cgroup2_fs_type, flags, &cgrp_dfl_root,
2106 CGROUP2_SUPER_MAGIC, ns);
2107 if (!IS_ERR(dentry))
2108 apply_cgroup_root_flags(root_flags);
2109 } else {
2110 dentry = cgroup1_mount(&cgroup_fs_type, flags, data,
2111 CGROUP_SUPER_MAGIC, ns);
2112 }
2113
2114 put_cgroup_ns(ns);
2115 return dentry;
2116} 2157}
2117 2158
2118static void cgroup_kill_sb(struct super_block *sb) 2159static void cgroup_kill_sb(struct super_block *sb)
@@ -2135,17 +2176,19 @@ static void cgroup_kill_sb(struct super_block *sb)
2135} 2176}
2136 2177
2137struct file_system_type cgroup_fs_type = { 2178struct file_system_type cgroup_fs_type = {
2138 .name = "cgroup", 2179 .name = "cgroup",
2139 .mount = cgroup_mount, 2180 .init_fs_context = cgroup_init_fs_context,
2140 .kill_sb = cgroup_kill_sb, 2181 .parameters = &cgroup1_fs_parameters,
2141 .fs_flags = FS_USERNS_MOUNT, 2182 .kill_sb = cgroup_kill_sb,
2183 .fs_flags = FS_USERNS_MOUNT,
2142}; 2184};
2143 2185
2144static struct file_system_type cgroup2_fs_type = { 2186static struct file_system_type cgroup2_fs_type = {
2145 .name = "cgroup2", 2187 .name = "cgroup2",
2146 .mount = cgroup_mount, 2188 .init_fs_context = cgroup_init_fs_context,
2147 .kill_sb = cgroup_kill_sb, 2189 .parameters = &cgroup2_fs_parameters,
2148 .fs_flags = FS_USERNS_MOUNT, 2190 .kill_sb = cgroup_kill_sb,
2191 .fs_flags = FS_USERNS_MOUNT,
2149}; 2192};
2150 2193
2151int cgroup_path_ns_locked(struct cgroup *cgrp, char *buf, size_t buflen, 2194int cgroup_path_ns_locked(struct cgroup *cgrp, char *buf, size_t buflen,
@@ -5280,7 +5323,6 @@ int cgroup_rmdir(struct kernfs_node *kn)
5280 5323
5281static struct kernfs_syscall_ops cgroup_kf_syscall_ops = { 5324static struct kernfs_syscall_ops cgroup_kf_syscall_ops = {
5282 .show_options = cgroup_show_options, 5325 .show_options = cgroup_show_options,
5283 .remount_fs = cgroup_remount,
5284 .mkdir = cgroup_mkdir, 5326 .mkdir = cgroup_mkdir,
5285 .rmdir = cgroup_rmdir, 5327 .rmdir = cgroup_rmdir,
5286 .show_path = cgroup_show_path, 5328 .show_path = cgroup_show_path,
@@ -5347,11 +5389,12 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss, bool early)
5347 */ 5389 */
5348int __init cgroup_init_early(void) 5390int __init cgroup_init_early(void)
5349{ 5391{
5350 static struct cgroup_sb_opts __initdata opts; 5392 static struct cgroup_fs_context __initdata ctx;
5351 struct cgroup_subsys *ss; 5393 struct cgroup_subsys *ss;
5352 int i; 5394 int i;
5353 5395
5354 init_cgroup_root(&cgrp_dfl_root, &opts); 5396 ctx.root = &cgrp_dfl_root;
5397 init_cgroup_root(&ctx);
5355 cgrp_dfl_root.cgrp.self.flags |= CSS_NO_REF; 5398 cgrp_dfl_root.cgrp.self.flags |= CSS_NO_REF;
5356 5399
5357 RCU_INIT_POINTER(init_task.cgroups, &init_css_set); 5400 RCU_INIT_POINTER(init_task.cgroups, &init_css_set);
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index 72afd55f70c6..4834c4214e9c 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -39,6 +39,7 @@
39#include <linux/memory.h> 39#include <linux/memory.h>
40#include <linux/export.h> 40#include <linux/export.h>
41#include <linux/mount.h> 41#include <linux/mount.h>
42#include <linux/fs_context.h>
42#include <linux/namei.h> 43#include <linux/namei.h>
43#include <linux/pagemap.h> 44#include <linux/pagemap.h>
44#include <linux/proc_fs.h> 45#include <linux/proc_fs.h>
@@ -359,25 +360,52 @@ static inline bool is_in_v2_mode(void)
359 * users. If someone tries to mount the "cpuset" filesystem, we 360 * users. If someone tries to mount the "cpuset" filesystem, we
360 * silently switch it to mount "cgroup" instead 361 * silently switch it to mount "cgroup" instead
361 */ 362 */
362static struct dentry *cpuset_mount(struct file_system_type *fs_type, 363static int cpuset_get_tree(struct fs_context *fc)
363 int flags, const char *unused_dev_name, void *data) 364{
364{ 365 struct file_system_type *cgroup_fs;
365 struct file_system_type *cgroup_fs = get_fs_type("cgroup"); 366 struct fs_context *new_fc;
366 struct dentry *ret = ERR_PTR(-ENODEV); 367 int ret;
367 if (cgroup_fs) { 368
368 char mountopts[] = 369 cgroup_fs = get_fs_type("cgroup");
369 "cpuset,noprefix," 370 if (!cgroup_fs)
370 "release_agent=/sbin/cpuset_release_agent"; 371 return -ENODEV;
371 ret = cgroup_fs->mount(cgroup_fs, flags, 372
372 unused_dev_name, mountopts); 373 new_fc = fs_context_for_mount(cgroup_fs, fc->sb_flags);
373 put_filesystem(cgroup_fs); 374 if (IS_ERR(new_fc)) {
375 ret = PTR_ERR(new_fc);
376 } else {
377 static const char agent_path[] = "/sbin/cpuset_release_agent";
378 ret = vfs_parse_fs_string(new_fc, "cpuset", NULL, 0);
379 if (!ret)
380 ret = vfs_parse_fs_string(new_fc, "noprefix", NULL, 0);
381 if (!ret)
382 ret = vfs_parse_fs_string(new_fc, "release_agent",
383 agent_path, sizeof(agent_path) - 1);
384 if (!ret)
385 ret = vfs_get_tree(new_fc);
386 if (!ret) { /* steal the result */
387 fc->root = new_fc->root;
388 new_fc->root = NULL;
389 }
390 put_fs_context(new_fc);
374 } 391 }
392 put_filesystem(cgroup_fs);
375 return ret; 393 return ret;
376} 394}
377 395
396static const struct fs_context_operations cpuset_fs_context_ops = {
397 .get_tree = cpuset_get_tree,
398};
399
400static int cpuset_init_fs_context(struct fs_context *fc)
401{
402 fc->ops = &cpuset_fs_context_ops;
403 return 0;
404}
405
378static struct file_system_type cpuset_fs_type = { 406static struct file_system_type cpuset_fs_type = {
379 .name = "cpuset", 407 .name = "cpuset",
380 .mount = cpuset_mount, 408 .init_fs_context = cpuset_init_fs_context,
381}; 409};
382 410
383/* 411/*
diff --git a/security/security.c b/security/security.c
index 301b141b9a32..23cbb1a295a3 100644
--- a/security/security.c
+++ b/security/security.c
@@ -764,6 +764,16 @@ void security_bprm_committed_creds(struct linux_binprm *bprm)
764 call_void_hook(bprm_committed_creds, bprm); 764 call_void_hook(bprm_committed_creds, bprm);
765} 765}
766 766
767int security_fs_context_dup(struct fs_context *fc, struct fs_context *src_fc)
768{
769 return call_int_hook(fs_context_dup, 0, fc, src_fc);
770}
771
772int security_fs_context_parse_param(struct fs_context *fc, struct fs_parameter *param)
773{
774 return call_int_hook(fs_context_parse_param, -ENOPARAM, fc, param);
775}
776
767int security_sb_alloc(struct super_block *sb) 777int security_sb_alloc(struct super_block *sb)
768{ 778{
769 return call_int_hook(sb_alloc_security, 0, sb); 779 return call_int_hook(sb_alloc_security, 0, sb);
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 333606b3a8ef..c5363f0c67ef 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -48,6 +48,8 @@
48#include <linux/fdtable.h> 48#include <linux/fdtable.h>
49#include <linux/namei.h> 49#include <linux/namei.h>
50#include <linux/mount.h> 50#include <linux/mount.h>
51#include <linux/fs_context.h>
52#include <linux/fs_parser.h>
51#include <linux/netfilter_ipv4.h> 53#include <linux/netfilter_ipv4.h>
52#include <linux/netfilter_ipv6.h> 54#include <linux/netfilter_ipv6.h>
53#include <linux/tty.h> 55#include <linux/tty.h>
@@ -410,11 +412,11 @@ static inline int inode_doinit(struct inode *inode)
410 412
411enum { 413enum {
412 Opt_error = -1, 414 Opt_error = -1,
413 Opt_context = 1, 415 Opt_context = 0,
416 Opt_defcontext = 1,
414 Opt_fscontext = 2, 417 Opt_fscontext = 2,
415 Opt_defcontext = 3, 418 Opt_rootcontext = 3,
416 Opt_rootcontext = 4, 419 Opt_seclabel = 4,
417 Opt_seclabel = 5,
418}; 420};
419 421
420#define A(s, has_arg) {#s, sizeof(#s) - 1, Opt_##s, has_arg} 422#define A(s, has_arg) {#s, sizeof(#s) - 1, Opt_##s, has_arg}
@@ -1067,6 +1069,7 @@ static int show_sid(struct seq_file *m, u32 sid)
1067 if (!rc) { 1069 if (!rc) {
1068 bool has_comma = context && strchr(context, ','); 1070 bool has_comma = context && strchr(context, ',');
1069 1071
1072 seq_putc(m, '=');
1070 if (has_comma) 1073 if (has_comma)
1071 seq_putc(m, '\"'); 1074 seq_putc(m, '\"');
1072 seq_escape(m, context, "\"\n\\"); 1075 seq_escape(m, context, "\"\n\\");
@@ -1120,7 +1123,7 @@ static int selinux_sb_show_options(struct seq_file *m, struct super_block *sb)
1120 } 1123 }
1121 if (sbsec->flags & SBLABEL_MNT) { 1124 if (sbsec->flags & SBLABEL_MNT) {
1122 seq_putc(m, ','); 1125 seq_putc(m, ',');
1123 seq_puts(m, LABELSUPP_STR); 1126 seq_puts(m, SECLABEL_STR);
1124 } 1127 }
1125 return 0; 1128 return 0;
1126} 1129}
@@ -2739,6 +2742,76 @@ static int selinux_umount(struct vfsmount *mnt, int flags)
2739 FILESYSTEM__UNMOUNT, NULL); 2742 FILESYSTEM__UNMOUNT, NULL);
2740} 2743}
2741 2744
2745static int selinux_fs_context_dup(struct fs_context *fc,
2746 struct fs_context *src_fc)
2747{
2748 const struct selinux_mnt_opts *src = src_fc->security;
2749 struct selinux_mnt_opts *opts;
2750
2751 if (!src)
2752 return 0;
2753
2754 fc->security = kzalloc(sizeof(struct selinux_mnt_opts), GFP_KERNEL);
2755 if (!fc->security)
2756 return -ENOMEM;
2757
2758 opts = fc->security;
2759
2760 if (src->fscontext) {
2761 opts->fscontext = kstrdup(src->fscontext, GFP_KERNEL);
2762 if (!opts->fscontext)
2763 return -ENOMEM;
2764 }
2765 if (src->context) {
2766 opts->context = kstrdup(src->context, GFP_KERNEL);
2767 if (!opts->context)
2768 return -ENOMEM;
2769 }
2770 if (src->rootcontext) {
2771 opts->rootcontext = kstrdup(src->rootcontext, GFP_KERNEL);
2772 if (!opts->rootcontext)
2773 return -ENOMEM;
2774 }
2775 if (src->defcontext) {
2776 opts->defcontext = kstrdup(src->defcontext, GFP_KERNEL);
2777 if (!opts->defcontext)
2778 return -ENOMEM;
2779 }
2780 return 0;
2781}
2782
2783static const struct fs_parameter_spec selinux_param_specs[] = {
2784 fsparam_string(CONTEXT_STR, Opt_context),
2785 fsparam_string(DEFCONTEXT_STR, Opt_defcontext),
2786 fsparam_string(FSCONTEXT_STR, Opt_fscontext),
2787 fsparam_string(ROOTCONTEXT_STR, Opt_rootcontext),
2788 fsparam_flag (SECLABEL_STR, Opt_seclabel),
2789 {}
2790};
2791
2792static const struct fs_parameter_description selinux_fs_parameters = {
2793 .name = "SELinux",
2794 .specs = selinux_param_specs,
2795};
2796
2797static int selinux_fs_context_parse_param(struct fs_context *fc,
2798 struct fs_parameter *param)
2799{
2800 struct fs_parse_result result;
2801 int opt, rc;
2802
2803 opt = fs_parse(fc, &selinux_fs_parameters, param, &result);
2804 if (opt < 0)
2805 return opt;
2806
2807 rc = selinux_add_opt(opt, param->string, &fc->security);
2808 if (!rc) {
2809 param->string = NULL;
2810 rc = 1;
2811 }
2812 return rc;
2813}
2814
2742/* inode security operations */ 2815/* inode security operations */
2743 2816
2744static int selinux_inode_alloc_security(struct inode *inode) 2817static int selinux_inode_alloc_security(struct inode *inode)
@@ -6592,6 +6665,9 @@ static struct security_hook_list selinux_hooks[] __lsm_ro_after_init = {
6592 LSM_HOOK_INIT(bprm_committing_creds, selinux_bprm_committing_creds), 6665 LSM_HOOK_INIT(bprm_committing_creds, selinux_bprm_committing_creds),
6593 LSM_HOOK_INIT(bprm_committed_creds, selinux_bprm_committed_creds), 6666 LSM_HOOK_INIT(bprm_committed_creds, selinux_bprm_committed_creds),
6594 6667
6668 LSM_HOOK_INIT(fs_context_dup, selinux_fs_context_dup),
6669 LSM_HOOK_INIT(fs_context_parse_param, selinux_fs_context_parse_param),
6670
6595 LSM_HOOK_INIT(sb_alloc_security, selinux_sb_alloc_security), 6671 LSM_HOOK_INIT(sb_alloc_security, selinux_sb_alloc_security),
6596 LSM_HOOK_INIT(sb_free_security, selinux_sb_free_security), 6672 LSM_HOOK_INIT(sb_free_security, selinux_sb_free_security),
6597 LSM_HOOK_INIT(sb_eat_lsm_opts, selinux_sb_eat_lsm_opts), 6673 LSM_HOOK_INIT(sb_eat_lsm_opts, selinux_sb_eat_lsm_opts),
@@ -6837,6 +6913,8 @@ static __init int selinux_init(void)
6837 else 6913 else
6838 pr_debug("SELinux: Starting in permissive mode\n"); 6914 pr_debug("SELinux: Starting in permissive mode\n");
6839 6915
6916 fs_validate_description(&selinux_fs_parameters);
6917
6840 return 0; 6918 return 0;
6841} 6919}
6842 6920
diff --git a/security/selinux/include/security.h b/security/selinux/include/security.h
index f68fb25b5702..b5b7c5aade8c 100644
--- a/security/selinux/include/security.h
+++ b/security/selinux/include/security.h
@@ -59,11 +59,11 @@
59#define SE_SBPROC 0x0200 59#define SE_SBPROC 0x0200
60#define SE_SBGENFS 0x0400 60#define SE_SBGENFS 0x0400
61 61
62#define CONTEXT_STR "context=" 62#define CONTEXT_STR "context"
63#define FSCONTEXT_STR "fscontext=" 63#define FSCONTEXT_STR "fscontext"
64#define ROOTCONTEXT_STR "rootcontext=" 64#define ROOTCONTEXT_STR "rootcontext"
65#define DEFCONTEXT_STR "defcontext=" 65#define DEFCONTEXT_STR "defcontext"
66#define LABELSUPP_STR "seclabel" 66#define SECLABEL_STR "seclabel"
67 67
68struct netlbl_lsm_secattr; 68struct netlbl_lsm_secattr;
69 69
diff --git a/security/smack/smack.h b/security/smack/smack.h
index 9c7c95a5c497..cf52af77d15e 100644
--- a/security/smack/smack.h
+++ b/security/smack/smack.h
@@ -196,22 +196,13 @@ struct smack_known_list_elem {
196 196
197enum { 197enum {
198 Opt_error = -1, 198 Opt_error = -1,
199 Opt_fsdefault = 1, 199 Opt_fsdefault = 0,
200 Opt_fsfloor = 2, 200 Opt_fsfloor = 1,
201 Opt_fshat = 3, 201 Opt_fshat = 2,
202 Opt_fsroot = 4, 202 Opt_fsroot = 3,
203 Opt_fstransmute = 5, 203 Opt_fstransmute = 4,
204}; 204};
205 205
206/*
207 * Mount options
208 */
209#define SMK_FSDEFAULT "smackfsdef="
210#define SMK_FSFLOOR "smackfsfloor="
211#define SMK_FSHAT "smackfshat="
212#define SMK_FSROOT "smackfsroot="
213#define SMK_FSTRANS "smackfstransmute="
214
215#define SMACK_DELETE_OPTION "-DELETE" 206#define SMACK_DELETE_OPTION "-DELETE"
216#define SMACK_CIPSO_OPTION "-CIPSO" 207#define SMACK_CIPSO_OPTION "-CIPSO"
217 208
diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c
index 424bce4ef21d..5c1613519d5a 100644
--- a/security/smack/smack_lsm.c
+++ b/security/smack/smack_lsm.c
@@ -43,6 +43,8 @@
43#include <linux/shm.h> 43#include <linux/shm.h>
44#include <linux/binfmts.h> 44#include <linux/binfmts.h>
45#include <linux/parser.h> 45#include <linux/parser.h>
46#include <linux/fs_context.h>
47#include <linux/fs_parser.h>
46#include "smack.h" 48#include "smack.h"
47 49
48#define TRANS_TRUE "TRUE" 50#define TRANS_TRUE "TRUE"
@@ -526,7 +528,6 @@ static int smack_syslog(int typefrom_file)
526 return rc; 528 return rc;
527} 529}
528 530
529
530/* 531/*
531 * Superblock Hooks. 532 * Superblock Hooks.
532 */ 533 */
@@ -631,6 +632,92 @@ out_opt_err:
631 return -EINVAL; 632 return -EINVAL;
632} 633}
633 634
635/**
636 * smack_fs_context_dup - Duplicate the security data on fs_context duplication
637 * @fc: The new filesystem context.
638 * @src_fc: The source filesystem context being duplicated.
639 *
640 * Returns 0 on success or -ENOMEM on error.
641 */
642static int smack_fs_context_dup(struct fs_context *fc,
643 struct fs_context *src_fc)
644{
645 struct smack_mnt_opts *dst, *src = src_fc->security;
646
647 if (!src)
648 return 0;
649
650 fc->security = kzalloc(sizeof(struct smack_mnt_opts), GFP_KERNEL);
651 if (!fc->security)
652 return -ENOMEM;
653 dst = fc->security;
654
655 if (src->fsdefault) {
656 dst->fsdefault = kstrdup(src->fsdefault, GFP_KERNEL);
657 if (!dst->fsdefault)
658 return -ENOMEM;
659 }
660 if (src->fsfloor) {
661 dst->fsfloor = kstrdup(src->fsfloor, GFP_KERNEL);
662 if (!dst->fsfloor)
663 return -ENOMEM;
664 }
665 if (src->fshat) {
666 dst->fshat = kstrdup(src->fshat, GFP_KERNEL);
667 if (!dst->fshat)
668 return -ENOMEM;
669 }
670 if (src->fsroot) {
671 dst->fsroot = kstrdup(src->fsroot, GFP_KERNEL);
672 if (!dst->fsroot)
673 return -ENOMEM;
674 }
675 if (src->fstransmute) {
676 dst->fstransmute = kstrdup(src->fstransmute, GFP_KERNEL);
677 if (!dst->fstransmute)
678 return -ENOMEM;
679 }
680 return 0;
681}
682
683static const struct fs_parameter_spec smack_param_specs[] = {
684 fsparam_string("fsdefault", Opt_fsdefault),
685 fsparam_string("fsfloor", Opt_fsfloor),
686 fsparam_string("fshat", Opt_fshat),
687 fsparam_string("fsroot", Opt_fsroot),
688 fsparam_string("fstransmute", Opt_fstransmute),
689 {}
690};
691
692static const struct fs_parameter_description smack_fs_parameters = {
693 .name = "smack",
694 .specs = smack_param_specs,
695};
696
697/**
698 * smack_fs_context_parse_param - Parse a single mount parameter
699 * @fc: The new filesystem context being constructed.
700 * @param: The parameter.
701 *
702 * Returns 0 on success, -ENOPARAM to pass the parameter on or anything else on
703 * error.
704 */
705static int smack_fs_context_parse_param(struct fs_context *fc,
706 struct fs_parameter *param)
707{
708 struct fs_parse_result result;
709 int opt, rc;
710
711 opt = fs_parse(fc, &smack_fs_parameters, param, &result);
712 if (opt < 0)
713 return opt;
714
715 rc = smack_add_opt(opt, param->string, &fc->security);
716 if (!rc)
717 param->string = NULL;
718 return rc;
719}
720
634static int smack_sb_eat_lsm_opts(char *options, void **mnt_opts) 721static int smack_sb_eat_lsm_opts(char *options, void **mnt_opts)
635{ 722{
636 char *from = options, *to = options; 723 char *from = options, *to = options;
@@ -4495,6 +4582,9 @@ static struct security_hook_list smack_hooks[] __lsm_ro_after_init = {
4495 LSM_HOOK_INIT(ptrace_traceme, smack_ptrace_traceme), 4582 LSM_HOOK_INIT(ptrace_traceme, smack_ptrace_traceme),
4496 LSM_HOOK_INIT(syslog, smack_syslog), 4583 LSM_HOOK_INIT(syslog, smack_syslog),
4497 4584
4585 LSM_HOOK_INIT(fs_context_dup, smack_fs_context_dup),
4586 LSM_HOOK_INIT(fs_context_parse_param, smack_fs_context_parse_param),
4587
4498 LSM_HOOK_INIT(sb_alloc_security, smack_sb_alloc_security), 4588 LSM_HOOK_INIT(sb_alloc_security, smack_sb_alloc_security),
4499 LSM_HOOK_INIT(sb_free_security, smack_sb_free_security), 4589 LSM_HOOK_INIT(sb_free_security, smack_sb_free_security),
4500 LSM_HOOK_INIT(sb_free_mnt_opts, smack_free_mnt_opts), 4590 LSM_HOOK_INIT(sb_free_mnt_opts, smack_free_mnt_opts),