diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2019-03-12 17:08:19 -0400 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2019-03-12 17:08:19 -0400 |
| commit | 7b47a9e7c8f672b6fb0b77fca11a63a8a77f5a91 (patch) | |
| tree | cf05645120ba2323c36acefdea6e62addf320f8c | |
| parent | dbc2fba3fc46084f502aec53183995a632998dcd (diff) | |
| parent | c99c2171fc61476afac0dfb59fb2c447a01fb1e0 (diff) | |
Merge branch 'work.mount' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs
Pull vfs mount infrastructure updates from Al Viro:
"The rest of core infrastructure; no new syscalls in that pile, but the
old parts are switched to new infrastructure. At that point
conversions of individual filesystems can happen independently; some
are done here (afs, cgroup, procfs, etc.), there's also a large series
outside of that pile dealing with NFS (quite a bit of option-parsing
stuff is getting used there - it's one of the most convoluted
filesystems in terms of mount-related logics), but NFS bits are the
next cycle fodder.
It got seriously simplified since the last cycle; documentation is
probably the weakest bit at the moment - I considered dropping the
commit introducing Documentation/filesystems/mount_api.txt (cutting
the size increase by quarter ;-), but decided that it would be better
to fix it up after -rc1 instead.
That pile allows to do followup work in independent branches, which
should make life much easier for the next cycle. fs/super.c size
increase is unpleasant; there's a followup series that allows to
shrink it considerably, but I decided to leave that until the next
cycle"
* 'work.mount' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs: (41 commits)
afs: Use fs_context to pass parameters over automount
afs: Add fs_context support
vfs: Add some logging to the core users of the fs_context log
vfs: Implement logging through fs_context
vfs: Provide documentation for new mount API
vfs: Remove kern_mount_data()
hugetlbfs: Convert to fs_context
cpuset: Use fs_context
kernfs, sysfs, cgroup, intel_rdt: Support fs_context
cgroup: store a reference to cgroup_ns into cgroup_fs_context
cgroup1_get_tree(): separate "get cgroup_root to use" into a separate helper
cgroup_do_mount(): massage calling conventions
cgroup: stash cgroup_root reference into cgroup_fs_context
cgroup2: switch to option-by-option parsing
cgroup1: switch to option-by-option parsing
cgroup: take options parsing into ->parse_monolithic()
cgroup: fold cgroup1_mount() into cgroup1_get_tree()
cgroup: start switching to fs_context
ipc: Convert mqueue fs to fs_context
proc: Add fs_context support to procfs
...
45 files changed, 4357 insertions, 1332 deletions
diff --git a/Documentation/filesystems/mount_api.txt b/Documentation/filesystems/mount_api.txt new file mode 100644 index 000000000000..944d1965e917 --- /dev/null +++ b/Documentation/filesystems/mount_api.txt | |||
| @@ -0,0 +1,709 @@ | |||
| 1 | ==================== | ||
| 2 | FILESYSTEM MOUNT API | ||
| 3 | ==================== | ||
| 4 | |||
| 5 | CONTENTS | ||
| 6 | |||
| 7 | (1) Overview. | ||
| 8 | |||
| 9 | (2) The filesystem context. | ||
| 10 | |||
| 11 | (3) The filesystem context operations. | ||
| 12 | |||
| 13 | (4) Filesystem context security. | ||
| 14 | |||
| 15 | (5) VFS filesystem context operations. | ||
| 16 | |||
| 17 | (6) Parameter description. | ||
| 18 | |||
| 19 | (7) Parameter helper functions. | ||
| 20 | |||
| 21 | |||
| 22 | ======== | ||
| 23 | OVERVIEW | ||
| 24 | ======== | ||
| 25 | |||
| 26 | The creation of new mounts is now to be done in a multistep process: | ||
| 27 | |||
| 28 | (1) Create a filesystem context. | ||
| 29 | |||
| 30 | (2) Parse the parameters and attach them to the context. Parameters are | ||
| 31 | expected to be passed individually from userspace, though legacy binary | ||
| 32 | parameters can also be handled. | ||
| 33 | |||
| 34 | (3) Validate and pre-process the context. | ||
| 35 | |||
| 36 | (4) Get or create a superblock and mountable root. | ||
| 37 | |||
| 38 | (5) Perform the mount. | ||
| 39 | |||
| 40 | (6) Return an error message attached to the context. | ||
| 41 | |||
| 42 | (7) Destroy the context. | ||
| 43 | |||
| 44 | To support this, the file_system_type struct gains a new field: | ||
| 45 | |||
| 46 | int (*init_fs_context)(struct fs_context *fc); | ||
| 47 | |||
| 48 | which is invoked to set up the filesystem-specific parts of a filesystem | ||
| 49 | context, including the additional space. | ||
| 50 | |||
| 51 | Note that security initialisation is done *after* the filesystem is called so | ||
| 52 | that the namespaces may be adjusted first. | ||
| 53 | |||
| 54 | |||
| 55 | ====================== | ||
| 56 | THE FILESYSTEM CONTEXT | ||
| 57 | ====================== | ||
| 58 | |||
| 59 | The creation and reconfiguration of a superblock is governed by a filesystem | ||
| 60 | context. This is represented by the fs_context structure: | ||
| 61 | |||
| 62 | struct fs_context { | ||
| 63 | const struct fs_context_operations *ops; | ||
| 64 | struct file_system_type *fs_type; | ||
| 65 | void *fs_private; | ||
| 66 | struct dentry *root; | ||
| 67 | struct user_namespace *user_ns; | ||
| 68 | struct net *net_ns; | ||
| 69 | const struct cred *cred; | ||
| 70 | char *source; | ||
| 71 | char *subtype; | ||
| 72 | void *security; | ||
| 73 | void *s_fs_info; | ||
| 74 | unsigned int sb_flags; | ||
| 75 | unsigned int sb_flags_mask; | ||
| 76 | enum fs_context_purpose purpose:8; | ||
| 77 | bool sloppy:1; | ||
| 78 | bool silent:1; | ||
| 79 | ... | ||
| 80 | }; | ||
| 81 | |||
| 82 | The fs_context fields are as follows: | ||
| 83 | |||
| 84 | (*) const struct fs_context_operations *ops | ||
| 85 | |||
| 86 | These are operations that can be done on a filesystem context (see | ||
| 87 | below). This must be set by the ->init_fs_context() file_system_type | ||
| 88 | operation. | ||
| 89 | |||
| 90 | (*) struct file_system_type *fs_type | ||
| 91 | |||
| 92 | A pointer to the file_system_type of the filesystem that is being | ||
| 93 | constructed or reconfigured. This retains a reference on the type owner. | ||
| 94 | |||
| 95 | (*) void *fs_private | ||
| 96 | |||
| 97 | A pointer to the file system's private data. This is where the filesystem | ||
| 98 | will need to store any options it parses. | ||
| 99 | |||
| 100 | (*) struct dentry *root | ||
| 101 | |||
| 102 | A pointer to the root of the mountable tree (and indirectly, the | ||
| 103 | superblock thereof). This is filled in by the ->get_tree() op. If this | ||
| 104 | is set, an active reference on root->d_sb must also be held. | ||
| 105 | |||
| 106 | (*) struct user_namespace *user_ns | ||
| 107 | (*) struct net *net_ns | ||
| 108 | |||
| 109 | There are a subset of the namespaces in use by the invoking process. They | ||
| 110 | retain references on each namespace. The subscribed namespaces may be | ||
| 111 | replaced by the filesystem to reflect other sources, such as the parent | ||
| 112 | mount superblock on an automount. | ||
| 113 | |||
| 114 | (*) const struct cred *cred | ||
| 115 | |||
| 116 | The mounter's credentials. This retains a reference on the credentials. | ||
| 117 | |||
| 118 | (*) char *source | ||
| 119 | |||
| 120 | This specifies the source. It may be a block device (e.g. /dev/sda1) or | ||
| 121 | something more exotic, such as the "host:/path" that NFS desires. | ||
| 122 | |||
| 123 | (*) char *subtype | ||
| 124 | |||
| 125 | This is a string to be added to the type displayed in /proc/mounts to | ||
| 126 | qualify it (used by FUSE). This is available for the filesystem to set if | ||
| 127 | desired. | ||
| 128 | |||
| 129 | (*) void *security | ||
| 130 | |||
| 131 | A place for the LSMs to hang their security data for the superblock. The | ||
| 132 | relevant security operations are described below. | ||
| 133 | |||
| 134 | (*) void *s_fs_info | ||
| 135 | |||
| 136 | The proposed s_fs_info for a new superblock, set in the superblock by | ||
| 137 | sget_fc(). This can be used to distinguish superblocks. | ||
| 138 | |||
| 139 | (*) unsigned int sb_flags | ||
| 140 | (*) unsigned int sb_flags_mask | ||
| 141 | |||
| 142 | Which bits SB_* flags are to be set/cleared in super_block::s_flags. | ||
| 143 | |||
| 144 | (*) enum fs_context_purpose | ||
| 145 | |||
| 146 | This indicates the purpose for which the context is intended. The | ||
| 147 | available values are: | ||
| 148 | |||
| 149 | FS_CONTEXT_FOR_MOUNT, -- New superblock for explicit mount | ||
| 150 | FS_CONTEXT_FOR_SUBMOUNT -- New automatic submount of extant mount | ||
| 151 | FS_CONTEXT_FOR_RECONFIGURE -- Change an existing mount | ||
| 152 | |||
| 153 | (*) bool sloppy | ||
| 154 | (*) bool silent | ||
| 155 | |||
| 156 | These are set if the sloppy or silent mount options are given. | ||
| 157 | |||
| 158 | [NOTE] sloppy is probably unnecessary when userspace passes over one | ||
| 159 | option at a time since the error can just be ignored if userspace deems it | ||
| 160 | to be unimportant. | ||
| 161 | |||
| 162 | [NOTE] silent is probably redundant with sb_flags & SB_SILENT. | ||
| 163 | |||
| 164 | The mount context is created by calling vfs_new_fs_context() or | ||
| 165 | vfs_dup_fs_context() and is destroyed with put_fs_context(). Note that the | ||
| 166 | structure is not refcounted. | ||
| 167 | |||
| 168 | VFS, security and filesystem mount options are set individually with | ||
| 169 | vfs_parse_mount_option(). Options provided by the old mount(2) system call as | ||
| 170 | a page of data can be parsed with generic_parse_monolithic(). | ||
| 171 | |||
| 172 | When mounting, the filesystem is allowed to take data from any of the pointers | ||
| 173 | and attach it to the superblock (or whatever), provided it clears the pointer | ||
| 174 | in the mount context. | ||
| 175 | |||
| 176 | The filesystem is also allowed to allocate resources and pin them with the | ||
| 177 | mount context. For instance, NFS might pin the appropriate protocol version | ||
| 178 | module. | ||
| 179 | |||
| 180 | |||
| 181 | ================================= | ||
| 182 | THE FILESYSTEM CONTEXT OPERATIONS | ||
| 183 | ================================= | ||
| 184 | |||
| 185 | The filesystem context points to a table of operations: | ||
| 186 | |||
| 187 | struct fs_context_operations { | ||
| 188 | void (*free)(struct fs_context *fc); | ||
| 189 | int (*dup)(struct fs_context *fc, struct fs_context *src_fc); | ||
| 190 | int (*parse_param)(struct fs_context *fc, | ||
| 191 | struct struct fs_parameter *param); | ||
| 192 | int (*parse_monolithic)(struct fs_context *fc, void *data); | ||
| 193 | int (*get_tree)(struct fs_context *fc); | ||
| 194 | int (*reconfigure)(struct fs_context *fc); | ||
| 195 | }; | ||
| 196 | |||
| 197 | These operations are invoked by the various stages of the mount procedure to | ||
| 198 | manage the filesystem context. They are as follows: | ||
| 199 | |||
| 200 | (*) void (*free)(struct fs_context *fc); | ||
| 201 | |||
| 202 | Called to clean up the filesystem-specific part of the filesystem context | ||
| 203 | when the context is destroyed. It should be aware that parts of the | ||
| 204 | context may have been removed and NULL'd out by ->get_tree(). | ||
| 205 | |||
| 206 | (*) int (*dup)(struct fs_context *fc, struct fs_context *src_fc); | ||
| 207 | |||
| 208 | Called when a filesystem context has been duplicated to duplicate the | ||
| 209 | filesystem-private data. An error may be returned to indicate failure to | ||
| 210 | do this. | ||
| 211 | |||
| 212 | [!] Note that even if this fails, put_fs_context() will be called | ||
| 213 | immediately thereafter, so ->dup() *must* make the | ||
| 214 | filesystem-private data safe for ->free(). | ||
| 215 | |||
| 216 | (*) int (*parse_param)(struct fs_context *fc, | ||
| 217 | struct struct fs_parameter *param); | ||
| 218 | |||
| 219 | Called when a parameter is being added to the filesystem context. param | ||
| 220 | points to the key name and maybe a value object. VFS-specific options | ||
| 221 | will have been weeded out and fc->sb_flags updated in the context. | ||
| 222 | Security options will also have been weeded out and fc->security updated. | ||
| 223 | |||
| 224 | The parameter can be parsed with fs_parse() and fs_lookup_param(). Note | ||
| 225 | that the source(s) are presented as parameters named "source". | ||
| 226 | |||
| 227 | If successful, 0 should be returned or a negative error code otherwise. | ||
| 228 | |||
| 229 | (*) int (*parse_monolithic)(struct fs_context *fc, void *data); | ||
| 230 | |||
| 231 | Called when the mount(2) system call is invoked to pass the entire data | ||
| 232 | page in one go. If this is expected to be just a list of "key[=val]" | ||
| 233 | items separated by commas, then this may be set to NULL. | ||
| 234 | |||
| 235 | The return value is as for ->parse_param(). | ||
| 236 | |||
| 237 | If the filesystem (e.g. NFS) needs to examine the data first and then | ||
| 238 | finds it's the standard key-val list then it may pass it off to | ||
| 239 | generic_parse_monolithic(). | ||
| 240 | |||
| 241 | (*) int (*get_tree)(struct fs_context *fc); | ||
| 242 | |||
| 243 | Called to get or create the mountable root and superblock, using the | ||
| 244 | information stored in the filesystem context (reconfiguration goes via a | ||
| 245 | different vector). It may detach any resources it desires from the | ||
| 246 | filesystem context and transfer them to the superblock it creates. | ||
| 247 | |||
| 248 | On success it should set fc->root to the mountable root and return 0. In | ||
| 249 | the case of an error, it should return a negative error code. | ||
| 250 | |||
| 251 | The phase on a userspace-driven context will be set to only allow this to | ||
| 252 | be called once on any particular context. | ||
| 253 | |||
| 254 | (*) int (*reconfigure)(struct fs_context *fc); | ||
| 255 | |||
| 256 | Called to effect reconfiguration of a superblock using information stored | ||
| 257 | in the filesystem context. It may detach any resources it desires from | ||
| 258 | the filesystem context and transfer them to the superblock. The | ||
| 259 | superblock can be found from fc->root->d_sb. | ||
| 260 | |||
| 261 | On success it should return 0. In the case of an error, it should return | ||
| 262 | a negative error code. | ||
| 263 | |||
| 264 | [NOTE] reconfigure is intended as a replacement for remount_fs. | ||
| 265 | |||
| 266 | |||
| 267 | =========================== | ||
| 268 | FILESYSTEM CONTEXT SECURITY | ||
| 269 | =========================== | ||
| 270 | |||
| 271 | The filesystem context contains a security pointer that the LSMs can use for | ||
| 272 | building up a security context for the superblock to be mounted. There are a | ||
| 273 | number of operations used by the new mount code for this purpose: | ||
| 274 | |||
| 275 | (*) int security_fs_context_alloc(struct fs_context *fc, | ||
| 276 | struct dentry *reference); | ||
| 277 | |||
| 278 | Called to initialise fc->security (which is preset to NULL) and allocate | ||
| 279 | any resources needed. It should return 0 on success or a negative error | ||
| 280 | code on failure. | ||
| 281 | |||
| 282 | reference will be non-NULL if the context is being created for superblock | ||
| 283 | reconfiguration (FS_CONTEXT_FOR_RECONFIGURE) in which case it indicates | ||
| 284 | the root dentry of the superblock to be reconfigured. It will also be | ||
| 285 | non-NULL in the case of a submount (FS_CONTEXT_FOR_SUBMOUNT) in which case | ||
| 286 | it indicates the automount point. | ||
| 287 | |||
| 288 | (*) int security_fs_context_dup(struct fs_context *fc, | ||
| 289 | struct fs_context *src_fc); | ||
| 290 | |||
| 291 | Called to initialise fc->security (which is preset to NULL) and allocate | ||
| 292 | any resources needed. The original filesystem context is pointed to by | ||
| 293 | src_fc and may be used for reference. It should return 0 on success or a | ||
| 294 | negative error code on failure. | ||
| 295 | |||
| 296 | (*) void security_fs_context_free(struct fs_context *fc); | ||
| 297 | |||
| 298 | Called to clean up anything attached to fc->security. Note that the | ||
| 299 | contents may have been transferred to a superblock and the pointer cleared | ||
| 300 | during get_tree. | ||
| 301 | |||
| 302 | (*) int security_fs_context_parse_param(struct fs_context *fc, | ||
| 303 | struct fs_parameter *param); | ||
| 304 | |||
| 305 | Called for each mount parameter, including the source. The arguments are | ||
| 306 | as for the ->parse_param() method. It should return 0 to indicate that | ||
| 307 | the parameter should be passed on to the filesystem, 1 to indicate that | ||
| 308 | the parameter should be discarded or an error to indicate that the | ||
| 309 | parameter should be rejected. | ||
| 310 | |||
| 311 | The value pointed to by param may be modified (if a string) or stolen | ||
| 312 | (provided the value pointer is NULL'd out). If it is stolen, 1 must be | ||
| 313 | returned to prevent it being passed to the filesystem. | ||
| 314 | |||
| 315 | (*) int security_fs_context_validate(struct fs_context *fc); | ||
| 316 | |||
| 317 | Called after all the options have been parsed to validate the collection | ||
| 318 | as a whole and to do any necessary allocation so that | ||
| 319 | security_sb_get_tree() and security_sb_reconfigure() are less likely to | ||
| 320 | fail. It should return 0 or a negative error code. | ||
| 321 | |||
| 322 | In the case of reconfiguration, the target superblock will be accessible | ||
| 323 | via fc->root. | ||
| 324 | |||
| 325 | (*) int security_sb_get_tree(struct fs_context *fc); | ||
| 326 | |||
| 327 | Called during the mount procedure to verify that the specified superblock | ||
| 328 | is allowed to be mounted and to transfer the security data there. It | ||
| 329 | should return 0 or a negative error code. | ||
| 330 | |||
| 331 | (*) void security_sb_reconfigure(struct fs_context *fc); | ||
| 332 | |||
| 333 | Called to apply any reconfiguration to an LSM's context. It must not | ||
| 334 | fail. Error checking and resource allocation must be done in advance by | ||
| 335 | the parameter parsing and validation hooks. | ||
| 336 | |||
| 337 | (*) int security_sb_mountpoint(struct fs_context *fc, struct path *mountpoint, | ||
| 338 | unsigned int mnt_flags); | ||
| 339 | |||
| 340 | Called during the mount procedure to verify that the root dentry attached | ||
| 341 | to the context is permitted to be attached to the specified mountpoint. | ||
| 342 | It should return 0 on success or a negative error code on failure. | ||
| 343 | |||
| 344 | |||
| 345 | ================================= | ||
| 346 | VFS FILESYSTEM CONTEXT OPERATIONS | ||
| 347 | ================================= | ||
| 348 | |||
| 349 | There are four operations for creating a filesystem context and | ||
| 350 | one for destroying a context: | ||
| 351 | |||
| 352 | (*) struct fs_context *vfs_new_fs_context(struct file_system_type *fs_type, | ||
| 353 | struct dentry *reference, | ||
| 354 | unsigned int sb_flags, | ||
| 355 | unsigned int sb_flags_mask, | ||
| 356 | enum fs_context_purpose purpose); | ||
| 357 | |||
| 358 | Create a filesystem context for a given filesystem type and purpose. This | ||
| 359 | allocates the filesystem context, sets the superblock flags, initialises | ||
| 360 | the security and calls fs_type->init_fs_context() to initialise the | ||
| 361 | filesystem private data. | ||
| 362 | |||
| 363 | reference can be NULL or it may indicate the root dentry of a superblock | ||
| 364 | that is going to be reconfigured (FS_CONTEXT_FOR_RECONFIGURE) or | ||
| 365 | the automount point that triggered a submount (FS_CONTEXT_FOR_SUBMOUNT). | ||
| 366 | This is provided as a source of namespace information. | ||
| 367 | |||
| 368 | (*) struct fs_context *vfs_dup_fs_context(struct fs_context *src_fc); | ||
| 369 | |||
| 370 | Duplicate a filesystem context, copying any options noted and duplicating | ||
| 371 | or additionally referencing any resources held therein. This is available | ||
| 372 | for use where a filesystem has to get a mount within a mount, such as NFS4 | ||
| 373 | does by internally mounting the root of the target server and then doing a | ||
| 374 | private pathwalk to the target directory. | ||
| 375 | |||
| 376 | The purpose in the new context is inherited from the old one. | ||
| 377 | |||
| 378 | (*) void put_fs_context(struct fs_context *fc); | ||
| 379 | |||
| 380 | Destroy a filesystem context, releasing any resources it holds. This | ||
| 381 | calls the ->free() operation. This is intended to be called by anyone who | ||
| 382 | created a filesystem context. | ||
| 383 | |||
| 384 | [!] filesystem contexts are not refcounted, so this causes unconditional | ||
| 385 | destruction. | ||
| 386 | |||
| 387 | In all the above operations, apart from the put op, the return is a mount | ||
| 388 | context pointer or a negative error code. | ||
| 389 | |||
| 390 | For the remaining operations, if an error occurs, a negative error code will be | ||
| 391 | returned. | ||
| 392 | |||
| 393 | (*) int vfs_get_tree(struct fs_context *fc); | ||
| 394 | |||
| 395 | Get or create the mountable root and superblock, using the parameters in | ||
| 396 | the filesystem context to select/configure the superblock. This invokes | ||
| 397 | the ->validate() op and then the ->get_tree() op. | ||
| 398 | |||
| 399 | [NOTE] ->validate() could perhaps be rolled into ->get_tree() and | ||
| 400 | ->reconfigure(). | ||
| 401 | |||
| 402 | (*) struct vfsmount *vfs_create_mount(struct fs_context *fc); | ||
| 403 | |||
| 404 | Create a mount given the parameters in the specified filesystem context. | ||
| 405 | Note that this does not attach the mount to anything. | ||
| 406 | |||
| 407 | (*) int vfs_parse_fs_param(struct fs_context *fc, | ||
| 408 | struct fs_parameter *param); | ||
| 409 | |||
| 410 | Supply a single mount parameter to the filesystem context. This include | ||
| 411 | the specification of the source/device which is specified as the "source" | ||
| 412 | parameter (which may be specified multiple times if the filesystem | ||
| 413 | supports that). | ||
| 414 | |||
| 415 | param specifies the parameter key name and the value. The parameter is | ||
| 416 | first checked to see if it corresponds to a standard mount flag (in which | ||
| 417 | case it is used to set an SB_xxx flag and consumed) or a security option | ||
| 418 | (in which case the LSM consumes it) before it is passed on to the | ||
| 419 | filesystem. | ||
| 420 | |||
| 421 | The parameter value is typed and can be one of: | ||
| 422 | |||
| 423 | fs_value_is_flag, Parameter not given a value. | ||
| 424 | fs_value_is_string, Value is a string | ||
| 425 | fs_value_is_blob, Value is a binary blob | ||
| 426 | fs_value_is_filename, Value is a filename* + dirfd | ||
| 427 | fs_value_is_filename_empty, Value is a filename* + dirfd + AT_EMPTY_PATH | ||
| 428 | fs_value_is_file, Value is an open file (file*) | ||
| 429 | |||
| 430 | If there is a value, that value is stored in a union in the struct in one | ||
| 431 | of param->{string,blob,name,file}. Note that the function may steal and | ||
| 432 | clear the pointer, but then becomes responsible for disposing of the | ||
| 433 | object. | ||
| 434 | |||
| 435 | (*) int vfs_parse_fs_string(struct fs_context *fc, char *key, | ||
| 436 | const char *value, size_t v_size); | ||
| 437 | |||
| 438 | A wrapper around vfs_parse_fs_param() that just passes a constant string. | ||
| 439 | |||
| 440 | (*) int generic_parse_monolithic(struct fs_context *fc, void *data); | ||
| 441 | |||
| 442 | Parse a sys_mount() data page, assuming the form to be a text list | ||
| 443 | consisting of key[=val] options separated by commas. Each item in the | ||
| 444 | list is passed to vfs_mount_option(). This is the default when the | ||
| 445 | ->parse_monolithic() operation is NULL. | ||
| 446 | |||
| 447 | |||
| 448 | ===================== | ||
| 449 | PARAMETER DESCRIPTION | ||
| 450 | ===================== | ||
| 451 | |||
| 452 | Parameters are described using structures defined in linux/fs_parser.h. | ||
| 453 | There's a core description struct that links everything together: | ||
| 454 | |||
| 455 | struct fs_parameter_description { | ||
| 456 | const char name[16]; | ||
| 457 | u8 nr_params; | ||
| 458 | u8 nr_alt_keys; | ||
| 459 | u8 nr_enums; | ||
| 460 | bool ignore_unknown; | ||
| 461 | bool no_source; | ||
| 462 | const char *const *keys; | ||
| 463 | const struct constant_table *alt_keys; | ||
| 464 | const struct fs_parameter_spec *specs; | ||
| 465 | const struct fs_parameter_enum *enums; | ||
| 466 | }; | ||
| 467 | |||
| 468 | For example: | ||
| 469 | |||
| 470 | enum afs_param { | ||
| 471 | Opt_autocell, | ||
| 472 | Opt_bar, | ||
| 473 | Opt_dyn, | ||
| 474 | Opt_foo, | ||
| 475 | Opt_source, | ||
| 476 | nr__afs_params | ||
| 477 | }; | ||
| 478 | |||
| 479 | static const struct fs_parameter_description afs_fs_parameters = { | ||
| 480 | .name = "kAFS", | ||
| 481 | .nr_params = nr__afs_params, | ||
| 482 | .nr_alt_keys = ARRAY_SIZE(afs_param_alt_keys), | ||
| 483 | .nr_enums = ARRAY_SIZE(afs_param_enums), | ||
| 484 | .keys = afs_param_keys, | ||
| 485 | .alt_keys = afs_param_alt_keys, | ||
| 486 | .specs = afs_param_specs, | ||
| 487 | .enums = afs_param_enums, | ||
| 488 | }; | ||
| 489 | |||
| 490 | The members are as follows: | ||
| 491 | |||
| 492 | (1) const char name[16]; | ||
| 493 | |||
| 494 | The name to be used in error messages generated by the parse helper | ||
| 495 | functions. | ||
| 496 | |||
| 497 | (2) u8 nr_params; | ||
| 498 | |||
| 499 | The number of discrete parameter identifiers. This indicates the number | ||
| 500 | of elements in the ->types[] array and also limits the values that may be | ||
| 501 | used in the values that the ->keys[] array maps to. | ||
| 502 | |||
| 503 | It is expected that, for example, two parameters that are related, say | ||
| 504 | "acl" and "noacl" with have the same ID, but will be flagged to indicate | ||
| 505 | that one is the inverse of the other. The value can then be picked out | ||
| 506 | from the parse result. | ||
| 507 | |||
| 508 | (3) const struct fs_parameter_specification *specs; | ||
| 509 | |||
| 510 | Table of parameter specifications, where the entries are of type: | ||
| 511 | |||
| 512 | struct fs_parameter_type { | ||
| 513 | enum fs_parameter_spec type:8; | ||
| 514 | u8 flags; | ||
| 515 | }; | ||
| 516 | |||
| 517 | and the parameter identifier is the index to the array. 'type' indicates | ||
| 518 | the desired value type and must be one of: | ||
| 519 | |||
| 520 | TYPE NAME EXPECTED VALUE RESULT IN | ||
| 521 | ======================= ======================= ===================== | ||
| 522 | fs_param_is_flag No value n/a | ||
| 523 | fs_param_is_bool Boolean value result->boolean | ||
| 524 | fs_param_is_u32 32-bit unsigned int result->uint_32 | ||
| 525 | fs_param_is_u32_octal 32-bit octal int result->uint_32 | ||
| 526 | fs_param_is_u32_hex 32-bit hex int result->uint_32 | ||
| 527 | fs_param_is_s32 32-bit signed int result->int_32 | ||
| 528 | fs_param_is_enum Enum value name result->uint_32 | ||
| 529 | fs_param_is_string Arbitrary string param->string | ||
| 530 | fs_param_is_blob Binary blob param->blob | ||
| 531 | fs_param_is_blockdev Blockdev path * Needs lookup | ||
| 532 | fs_param_is_path Path * Needs lookup | ||
| 533 | fs_param_is_fd File descriptor param->file | ||
| 534 | |||
| 535 | And each parameter can be qualified with 'flags': | ||
| 536 | |||
| 537 | fs_param_v_optional The value is optional | ||
| 538 | fs_param_neg_with_no If key name is prefixed with "no", it is false | ||
| 539 | fs_param_neg_with_empty If value is "", it is false | ||
| 540 | fs_param_deprecated The parameter is deprecated. | ||
| 541 | |||
| 542 | For example: | ||
| 543 | |||
| 544 | static const struct fs_parameter_spec afs_param_specs[nr__afs_params] = { | ||
| 545 | [Opt_autocell] = { fs_param_is flag }, | ||
| 546 | [Opt_bar] = { fs_param_is_enum }, | ||
| 547 | [Opt_dyn] = { fs_param_is flag }, | ||
| 548 | [Opt_foo] = { fs_param_is_bool, fs_param_neg_with_no }, | ||
| 549 | [Opt_source] = { fs_param_is_string }, | ||
| 550 | }; | ||
| 551 | |||
| 552 | Note that if the value is of fs_param_is_bool type, fs_parse() will try | ||
| 553 | to match any string value against "0", "1", "no", "yes", "false", "true". | ||
| 554 | |||
| 555 | [!] NOTE that the table must be sorted according to primary key name so | ||
| 556 | that ->keys[] is also sorted. | ||
| 557 | |||
| 558 | (4) const char *const *keys; | ||
| 559 | |||
| 560 | Table of primary key names for the parameters. There must be one entry | ||
| 561 | per defined parameter. The table is optional if ->nr_params is 0. The | ||
| 562 | table is just an array of names e.g.: | ||
| 563 | |||
| 564 | static const char *const afs_param_keys[nr__afs_params] = { | ||
| 565 | [Opt_autocell] = "autocell", | ||
| 566 | [Opt_bar] = "bar", | ||
| 567 | [Opt_dyn] = "dyn", | ||
| 568 | [Opt_foo] = "foo", | ||
| 569 | [Opt_source] = "source", | ||
| 570 | }; | ||
| 571 | |||
| 572 | [!] NOTE that the table must be sorted such that the table can be searched | ||
| 573 | with bsearch() using strcmp(). This means that the Opt_* values must | ||
| 574 | correspond to the entries in this table. | ||
| 575 | |||
| 576 | (5) const struct constant_table *alt_keys; | ||
| 577 | u8 nr_alt_keys; | ||
| 578 | |||
| 579 | Table of additional key names and their mappings to parameter ID plus the | ||
| 580 | number of elements in the table. This is optional. The table is just an | ||
| 581 | array of { name, integer } pairs, e.g.: | ||
| 582 | |||
| 583 | static const struct constant_table afs_param_keys[] = { | ||
| 584 | { "baz", Opt_bar }, | ||
| 585 | { "dynamic", Opt_dyn }, | ||
| 586 | }; | ||
| 587 | |||
| 588 | [!] NOTE that the table must be sorted such that strcmp() can be used with | ||
| 589 | bsearch() to search the entries. | ||
| 590 | |||
| 591 | The parameter ID can also be fs_param_key_removed to indicate that a | ||
| 592 | deprecated parameter has been removed and that an error will be given. | ||
| 593 | This differs from fs_param_deprecated where the parameter may still have | ||
| 594 | an effect. | ||
| 595 | |||
| 596 | Further, the behaviour of the parameter may differ when an alternate name | ||
| 597 | is used (for instance with NFS, "v3", "v4.2", etc. are alternate names). | ||
| 598 | |||
| 599 | (6) const struct fs_parameter_enum *enums; | ||
| 600 | u8 nr_enums; | ||
| 601 | |||
| 602 | Table of enum value names to integer mappings and the number of elements | ||
| 603 | stored therein. This is of type: | ||
| 604 | |||
| 605 | struct fs_parameter_enum { | ||
| 606 | u8 param_id; | ||
| 607 | char name[14]; | ||
| 608 | u8 value; | ||
| 609 | }; | ||
| 610 | |||
| 611 | Where the array is an unsorted list of { parameter ID, name }-keyed | ||
| 612 | elements that indicate the value to map to, e.g.: | ||
| 613 | |||
| 614 | static const struct fs_parameter_enum afs_param_enums[] = { | ||
| 615 | { Opt_bar, "x", 1}, | ||
| 616 | { Opt_bar, "y", 23}, | ||
| 617 | { Opt_bar, "z", 42}, | ||
| 618 | }; | ||
| 619 | |||
| 620 | If a parameter of type fs_param_is_enum is encountered, fs_parse() will | ||
| 621 | try to look the value up in the enum table and the result will be stored | ||
| 622 | in the parse result. | ||
| 623 | |||
| 624 | (7) bool no_source; | ||
| 625 | |||
| 626 | If this is set, fs_parse() will ignore any "source" parameter and not | ||
| 627 | pass it to the filesystem. | ||
| 628 | |||
| 629 | The parser should be pointed to by the parser pointer in the file_system_type | ||
| 630 | struct as this will provide validation on registration (if | ||
| 631 | CONFIG_VALIDATE_FS_PARSER=y) and will allow the description to be queried from | ||
| 632 | userspace using the fsinfo() syscall. | ||
| 633 | |||
| 634 | |||
| 635 | ========================== | ||
| 636 | PARAMETER HELPER FUNCTIONS | ||
| 637 | ========================== | ||
| 638 | |||
| 639 | A number of helper functions are provided to help a filesystem or an LSM | ||
| 640 | process the parameters it is given. | ||
| 641 | |||
| 642 | (*) int lookup_constant(const struct constant_table tbl[], | ||
| 643 | const char *name, int not_found); | ||
| 644 | |||
| 645 | Look up a constant by name in a table of name -> integer mappings. The | ||
| 646 | table is an array of elements of the following type: | ||
| 647 | |||
| 648 | struct constant_table { | ||
| 649 | const char *name; | ||
| 650 | int value; | ||
| 651 | }; | ||
| 652 | |||
| 653 | and it must be sorted such that it can be searched using bsearch() using | ||
| 654 | strcmp(). If a match is found, the corresponding value is returned. If a | ||
| 655 | match isn't found, the not_found value is returned instead. | ||
| 656 | |||
| 657 | (*) bool validate_constant_table(const struct constant_table *tbl, | ||
| 658 | size_t tbl_size, | ||
| 659 | int low, int high, int special); | ||
| 660 | |||
| 661 | Validate a constant table. Checks that all the elements are appropriately | ||
| 662 | ordered, that there are no duplicates and that the values are between low | ||
| 663 | and high inclusive, though provision is made for one allowable special | ||
| 664 | value outside of that range. If no special value is required, special | ||
| 665 | should just be set to lie inside the low-to-high range. | ||
| 666 | |||
| 667 | If all is good, true is returned. If the table is invalid, errors are | ||
| 668 | logged to dmesg, the stack is dumped and false is returned. | ||
| 669 | |||
| 670 | (*) int fs_parse(struct fs_context *fc, | ||
| 671 | const struct fs_param_parser *parser, | ||
| 672 | struct fs_parameter *param, | ||
| 673 | struct fs_param_parse_result *result); | ||
| 674 | |||
| 675 | This is the main interpreter of parameters. It uses the parameter | ||
| 676 | description (parser) to look up the name of the parameter to use and to | ||
| 677 | convert that to a parameter ID (stored in result->key). | ||
| 678 | |||
| 679 | If successful, and if the parameter type indicates the result is a | ||
| 680 | boolean, integer or enum type, the value is converted by this function and | ||
| 681 | the result stored in result->{boolean,int_32,uint_32}. | ||
| 682 | |||
| 683 | If a match isn't initially made, the key is prefixed with "no" and no | ||
| 684 | value is present then an attempt will be made to look up the key with the | ||
| 685 | prefix removed. If this matches a parameter for which the type has flag | ||
| 686 | fs_param_neg_with_no set, then a match will be made and the value will be | ||
| 687 | set to false/0/NULL. | ||
| 688 | |||
| 689 | If the parameter is successfully matched and, optionally, parsed | ||
| 690 | correctly, 1 is returned. If the parameter isn't matched and | ||
| 691 | parser->ignore_unknown is set, then 0 is returned. Otherwise -EINVAL is | ||
| 692 | returned. | ||
| 693 | |||
| 694 | (*) bool fs_validate_description(const struct fs_parameter_description *desc); | ||
| 695 | |||
| 696 | This is validates the parameter description. It returns true if the | ||
| 697 | description is good and false if it is not. | ||
| 698 | |||
| 699 | (*) int fs_lookup_param(struct fs_context *fc, | ||
| 700 | struct fs_parameter *value, | ||
| 701 | bool want_bdev, | ||
| 702 | struct path *_path); | ||
| 703 | |||
| 704 | This takes a parameter that carries a string or filename type and attempts | ||
| 705 | to do a path lookup on it. If the parameter expects a blockdev, a check | ||
| 706 | is made that the inode actually represents one. | ||
| 707 | |||
| 708 | Returns 0 if successful and *_path will be set; returns a negative error | ||
| 709 | code if not. | ||
diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h index 822b7db634ee..e49b77283924 100644 --- a/arch/x86/kernel/cpu/resctrl/internal.h +++ b/arch/x86/kernel/cpu/resctrl/internal.h | |||
| @@ -4,6 +4,7 @@ | |||
| 4 | 4 | ||
| 5 | #include <linux/sched.h> | 5 | #include <linux/sched.h> |
| 6 | #include <linux/kernfs.h> | 6 | #include <linux/kernfs.h> |
| 7 | #include <linux/fs_context.h> | ||
| 7 | #include <linux/jump_label.h> | 8 | #include <linux/jump_label.h> |
| 8 | 9 | ||
| 9 | #define MSR_IA32_L3_QOS_CFG 0xc81 | 10 | #define MSR_IA32_L3_QOS_CFG 0xc81 |
| @@ -40,6 +41,21 @@ | |||
| 40 | #define RMID_VAL_ERROR BIT_ULL(63) | 41 | #define RMID_VAL_ERROR BIT_ULL(63) |
| 41 | #define RMID_VAL_UNAVAIL BIT_ULL(62) | 42 | #define RMID_VAL_UNAVAIL BIT_ULL(62) |
| 42 | 43 | ||
| 44 | |||
| 45 | struct rdt_fs_context { | ||
| 46 | struct kernfs_fs_context kfc; | ||
| 47 | bool enable_cdpl2; | ||
| 48 | bool enable_cdpl3; | ||
| 49 | bool enable_mba_mbps; | ||
| 50 | }; | ||
| 51 | |||
| 52 | static inline struct rdt_fs_context *rdt_fc2context(struct fs_context *fc) | ||
| 53 | { | ||
| 54 | struct kernfs_fs_context *kfc = fc->fs_private; | ||
| 55 | |||
| 56 | return container_of(kfc, struct rdt_fs_context, kfc); | ||
| 57 | } | ||
| 58 | |||
| 43 | DECLARE_STATIC_KEY_FALSE(rdt_enable_key); | 59 | DECLARE_STATIC_KEY_FALSE(rdt_enable_key); |
| 44 | 60 | ||
| 45 | /** | 61 | /** |
diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index 8388adf241b2..399601eda8e4 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c | |||
| @@ -24,6 +24,7 @@ | |||
| 24 | #include <linux/cpu.h> | 24 | #include <linux/cpu.h> |
| 25 | #include <linux/debugfs.h> | 25 | #include <linux/debugfs.h> |
| 26 | #include <linux/fs.h> | 26 | #include <linux/fs.h> |
| 27 | #include <linux/fs_parser.h> | ||
| 27 | #include <linux/sysfs.h> | 28 | #include <linux/sysfs.h> |
| 28 | #include <linux/kernfs.h> | 29 | #include <linux/kernfs.h> |
| 29 | #include <linux/seq_buf.h> | 30 | #include <linux/seq_buf.h> |
| @@ -32,6 +33,7 @@ | |||
| 32 | #include <linux/sched/task.h> | 33 | #include <linux/sched/task.h> |
| 33 | #include <linux/slab.h> | 34 | #include <linux/slab.h> |
| 34 | #include <linux/task_work.h> | 35 | #include <linux/task_work.h> |
| 36 | #include <linux/user_namespace.h> | ||
| 35 | 37 | ||
| 36 | #include <uapi/linux/magic.h> | 38 | #include <uapi/linux/magic.h> |
| 37 | 39 | ||
| @@ -1858,46 +1860,6 @@ static void cdp_disable_all(void) | |||
| 1858 | cdpl2_disable(); | 1860 | cdpl2_disable(); |
| 1859 | } | 1861 | } |
| 1860 | 1862 | ||
| 1861 | static int parse_rdtgroupfs_options(char *data) | ||
| 1862 | { | ||
| 1863 | char *token, *o = data; | ||
| 1864 | int ret = 0; | ||
| 1865 | |||
| 1866 | while ((token = strsep(&o, ",")) != NULL) { | ||
| 1867 | if (!*token) { | ||
| 1868 | ret = -EINVAL; | ||
| 1869 | goto out; | ||
| 1870 | } | ||
| 1871 | |||
| 1872 | if (!strcmp(token, "cdp")) { | ||
| 1873 | ret = cdpl3_enable(); | ||
| 1874 | if (ret) | ||
| 1875 | goto out; | ||
| 1876 | } else if (!strcmp(token, "cdpl2")) { | ||
| 1877 | ret = cdpl2_enable(); | ||
| 1878 | if (ret) | ||
| 1879 | goto out; | ||
| 1880 | } else if (!strcmp(token, "mba_MBps")) { | ||
| 1881 | if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) | ||
| 1882 | ret = set_mba_sc(true); | ||
| 1883 | else | ||
| 1884 | ret = -EINVAL; | ||
| 1885 | if (ret) | ||
| 1886 | goto out; | ||
| 1887 | } else { | ||
| 1888 | ret = -EINVAL; | ||
| 1889 | goto out; | ||
| 1890 | } | ||
| 1891 | } | ||
| 1892 | |||
| 1893 | return 0; | ||
| 1894 | |||
| 1895 | out: | ||
| 1896 | pr_err("Invalid mount option \"%s\"\n", token); | ||
| 1897 | |||
| 1898 | return ret; | ||
| 1899 | } | ||
| 1900 | |||
| 1901 | /* | 1863 | /* |
| 1902 | * We don't allow rdtgroup directories to be created anywhere | 1864 | * We don't allow rdtgroup directories to be created anywhere |
| 1903 | * except the root directory. Thus when looking for the rdtgroup | 1865 | * except the root directory. Thus when looking for the rdtgroup |
| @@ -1969,13 +1931,27 @@ static int mkdir_mondata_all(struct kernfs_node *parent_kn, | |||
| 1969 | struct rdtgroup *prgrp, | 1931 | struct rdtgroup *prgrp, |
| 1970 | struct kernfs_node **mon_data_kn); | 1932 | struct kernfs_node **mon_data_kn); |
| 1971 | 1933 | ||
| 1972 | static struct dentry *rdt_mount(struct file_system_type *fs_type, | 1934 | static int rdt_enable_ctx(struct rdt_fs_context *ctx) |
| 1973 | int flags, const char *unused_dev_name, | 1935 | { |
| 1974 | void *data) | 1936 | int ret = 0; |
| 1937 | |||
| 1938 | if (ctx->enable_cdpl2) | ||
| 1939 | ret = cdpl2_enable(); | ||
| 1940 | |||
| 1941 | if (!ret && ctx->enable_cdpl3) | ||
| 1942 | ret = cdpl3_enable(); | ||
| 1943 | |||
| 1944 | if (!ret && ctx->enable_mba_mbps) | ||
| 1945 | ret = set_mba_sc(true); | ||
| 1946 | |||
| 1947 | return ret; | ||
| 1948 | } | ||
| 1949 | |||
| 1950 | static int rdt_get_tree(struct fs_context *fc) | ||
| 1975 | { | 1951 | { |
| 1952 | struct rdt_fs_context *ctx = rdt_fc2context(fc); | ||
| 1976 | struct rdt_domain *dom; | 1953 | struct rdt_domain *dom; |
| 1977 | struct rdt_resource *r; | 1954 | struct rdt_resource *r; |
| 1978 | struct dentry *dentry; | ||
| 1979 | int ret; | 1955 | int ret; |
| 1980 | 1956 | ||
| 1981 | cpus_read_lock(); | 1957 | cpus_read_lock(); |
| @@ -1984,53 +1960,42 @@ static struct dentry *rdt_mount(struct file_system_type *fs_type, | |||
| 1984 | * resctrl file system can only be mounted once. | 1960 | * resctrl file system can only be mounted once. |
| 1985 | */ | 1961 | */ |
| 1986 | if (static_branch_unlikely(&rdt_enable_key)) { | 1962 | if (static_branch_unlikely(&rdt_enable_key)) { |
| 1987 | dentry = ERR_PTR(-EBUSY); | 1963 | ret = -EBUSY; |
| 1988 | goto out; | 1964 | goto out; |
| 1989 | } | 1965 | } |
| 1990 | 1966 | ||
| 1991 | ret = parse_rdtgroupfs_options(data); | 1967 | ret = rdt_enable_ctx(ctx); |
| 1992 | if (ret) { | 1968 | if (ret < 0) |
| 1993 | dentry = ERR_PTR(ret); | ||
| 1994 | goto out_cdp; | 1969 | goto out_cdp; |
| 1995 | } | ||
| 1996 | 1970 | ||
| 1997 | closid_init(); | 1971 | closid_init(); |
| 1998 | 1972 | ||
| 1999 | ret = rdtgroup_create_info_dir(rdtgroup_default.kn); | 1973 | ret = rdtgroup_create_info_dir(rdtgroup_default.kn); |
| 2000 | if (ret) { | 1974 | if (ret < 0) |
| 2001 | dentry = ERR_PTR(ret); | 1975 | goto out_mba; |
| 2002 | goto out_cdp; | ||
| 2003 | } | ||
| 2004 | 1976 | ||
| 2005 | if (rdt_mon_capable) { | 1977 | if (rdt_mon_capable) { |
| 2006 | ret = mongroup_create_dir(rdtgroup_default.kn, | 1978 | ret = mongroup_create_dir(rdtgroup_default.kn, |
| 2007 | NULL, "mon_groups", | 1979 | NULL, "mon_groups", |
| 2008 | &kn_mongrp); | 1980 | &kn_mongrp); |
| 2009 | if (ret) { | 1981 | if (ret < 0) |
| 2010 | dentry = ERR_PTR(ret); | ||
| 2011 | goto out_info; | 1982 | goto out_info; |
| 2012 | } | ||
| 2013 | kernfs_get(kn_mongrp); | 1983 | kernfs_get(kn_mongrp); |
| 2014 | 1984 | ||
| 2015 | ret = mkdir_mondata_all(rdtgroup_default.kn, | 1985 | ret = mkdir_mondata_all(rdtgroup_default.kn, |
| 2016 | &rdtgroup_default, &kn_mondata); | 1986 | &rdtgroup_default, &kn_mondata); |
| 2017 | if (ret) { | 1987 | if (ret < 0) |
| 2018 | dentry = ERR_PTR(ret); | ||
| 2019 | goto out_mongrp; | 1988 | goto out_mongrp; |
| 2020 | } | ||
| 2021 | kernfs_get(kn_mondata); | 1989 | kernfs_get(kn_mondata); |
| 2022 | rdtgroup_default.mon.mon_data_kn = kn_mondata; | 1990 | rdtgroup_default.mon.mon_data_kn = kn_mondata; |
| 2023 | } | 1991 | } |
| 2024 | 1992 | ||
| 2025 | ret = rdt_pseudo_lock_init(); | 1993 | ret = rdt_pseudo_lock_init(); |
| 2026 | if (ret) { | 1994 | if (ret) |
| 2027 | dentry = ERR_PTR(ret); | ||
| 2028 | goto out_mondata; | 1995 | goto out_mondata; |
| 2029 | } | ||
| 2030 | 1996 | ||
| 2031 | dentry = kernfs_mount(fs_type, flags, rdt_root, | 1997 | ret = kernfs_get_tree(fc); |
| 2032 | RDTGROUP_SUPER_MAGIC, NULL); | 1998 | if (ret < 0) |
| 2033 | if (IS_ERR(dentry)) | ||
| 2034 | goto out_psl; | 1999 | goto out_psl; |
| 2035 | 2000 | ||
| 2036 | if (rdt_alloc_capable) | 2001 | if (rdt_alloc_capable) |
| @@ -2059,14 +2024,95 @@ out_mongrp: | |||
| 2059 | kernfs_remove(kn_mongrp); | 2024 | kernfs_remove(kn_mongrp); |
| 2060 | out_info: | 2025 | out_info: |
| 2061 | kernfs_remove(kn_info); | 2026 | kernfs_remove(kn_info); |
| 2027 | out_mba: | ||
| 2028 | if (ctx->enable_mba_mbps) | ||
| 2029 | set_mba_sc(false); | ||
| 2062 | out_cdp: | 2030 | out_cdp: |
| 2063 | cdp_disable_all(); | 2031 | cdp_disable_all(); |
| 2064 | out: | 2032 | out: |
| 2065 | rdt_last_cmd_clear(); | 2033 | rdt_last_cmd_clear(); |
| 2066 | mutex_unlock(&rdtgroup_mutex); | 2034 | mutex_unlock(&rdtgroup_mutex); |
| 2067 | cpus_read_unlock(); | 2035 | cpus_read_unlock(); |
| 2036 | return ret; | ||
| 2037 | } | ||
| 2038 | |||
| 2039 | enum rdt_param { | ||
| 2040 | Opt_cdp, | ||
| 2041 | Opt_cdpl2, | ||
| 2042 | Opt_mba_mpbs, | ||
| 2043 | nr__rdt_params | ||
| 2044 | }; | ||
| 2045 | |||
| 2046 | static const struct fs_parameter_spec rdt_param_specs[] = { | ||
| 2047 | fsparam_flag("cdp", Opt_cdp), | ||
| 2048 | fsparam_flag("cdpl2", Opt_cdpl2), | ||
| 2049 | fsparam_flag("mba_mpbs", Opt_mba_mpbs), | ||
| 2050 | {} | ||
| 2051 | }; | ||
| 2052 | |||
| 2053 | static const struct fs_parameter_description rdt_fs_parameters = { | ||
| 2054 | .name = "rdt", | ||
| 2055 | .specs = rdt_param_specs, | ||
| 2056 | }; | ||
| 2057 | |||
| 2058 | static int rdt_parse_param(struct fs_context *fc, struct fs_parameter *param) | ||
| 2059 | { | ||
| 2060 | struct rdt_fs_context *ctx = rdt_fc2context(fc); | ||
| 2061 | struct fs_parse_result result; | ||
| 2062 | int opt; | ||
| 2063 | |||
| 2064 | opt = fs_parse(fc, &rdt_fs_parameters, param, &result); | ||
| 2065 | if (opt < 0) | ||
| 2066 | return opt; | ||
| 2068 | 2067 | ||
| 2069 | return dentry; | 2068 | switch (opt) { |
| 2069 | case Opt_cdp: | ||
| 2070 | ctx->enable_cdpl3 = true; | ||
| 2071 | return 0; | ||
| 2072 | case Opt_cdpl2: | ||
| 2073 | ctx->enable_cdpl2 = true; | ||
| 2074 | return 0; | ||
| 2075 | case Opt_mba_mpbs: | ||
| 2076 | if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) | ||
| 2077 | return -EINVAL; | ||
| 2078 | ctx->enable_mba_mbps = true; | ||
| 2079 | return 0; | ||
| 2080 | } | ||
| 2081 | |||
| 2082 | return -EINVAL; | ||
| 2083 | } | ||
| 2084 | |||
| 2085 | static void rdt_fs_context_free(struct fs_context *fc) | ||
| 2086 | { | ||
| 2087 | struct rdt_fs_context *ctx = rdt_fc2context(fc); | ||
| 2088 | |||
| 2089 | kernfs_free_fs_context(fc); | ||
| 2090 | kfree(ctx); | ||
| 2091 | } | ||
| 2092 | |||
| 2093 | static const struct fs_context_operations rdt_fs_context_ops = { | ||
| 2094 | .free = rdt_fs_context_free, | ||
| 2095 | .parse_param = rdt_parse_param, | ||
| 2096 | .get_tree = rdt_get_tree, | ||
| 2097 | }; | ||
| 2098 | |||
| 2099 | static int rdt_init_fs_context(struct fs_context *fc) | ||
| 2100 | { | ||
| 2101 | struct rdt_fs_context *ctx; | ||
| 2102 | |||
| 2103 | ctx = kzalloc(sizeof(struct rdt_fs_context), GFP_KERNEL); | ||
| 2104 | if (!ctx) | ||
| 2105 | return -ENOMEM; | ||
| 2106 | |||
| 2107 | ctx->kfc.root = rdt_root; | ||
| 2108 | ctx->kfc.magic = RDTGROUP_SUPER_MAGIC; | ||
| 2109 | fc->fs_private = &ctx->kfc; | ||
| 2110 | fc->ops = &rdt_fs_context_ops; | ||
| 2111 | if (fc->user_ns) | ||
| 2112 | put_user_ns(fc->user_ns); | ||
| 2113 | fc->user_ns = get_user_ns(&init_user_ns); | ||
| 2114 | fc->global = true; | ||
| 2115 | return 0; | ||
| 2070 | } | 2116 | } |
| 2071 | 2117 | ||
| 2072 | static int reset_all_ctrls(struct rdt_resource *r) | 2118 | static int reset_all_ctrls(struct rdt_resource *r) |
| @@ -2239,9 +2285,10 @@ static void rdt_kill_sb(struct super_block *sb) | |||
| 2239 | } | 2285 | } |
| 2240 | 2286 | ||
| 2241 | static struct file_system_type rdt_fs_type = { | 2287 | static struct file_system_type rdt_fs_type = { |
| 2242 | .name = "resctrl", | 2288 | .name = "resctrl", |
| 2243 | .mount = rdt_mount, | 2289 | .init_fs_context = rdt_init_fs_context, |
| 2244 | .kill_sb = rdt_kill_sb, | 2290 | .parameters = &rdt_fs_parameters, |
| 2291 | .kill_sb = rdt_kill_sb, | ||
| 2245 | }; | 2292 | }; |
| 2246 | 2293 | ||
| 2247 | static int mon_addfile(struct kernfs_node *parent_kn, const char *name, | 2294 | static int mon_addfile(struct kernfs_node *parent_kn, const char *name, |
diff --git a/fs/Kconfig b/fs/Kconfig index 2557506051a3..3e6d3101f3ff 100644 --- a/fs/Kconfig +++ b/fs/Kconfig | |||
| @@ -8,6 +8,13 @@ menu "File systems" | |||
| 8 | config DCACHE_WORD_ACCESS | 8 | config DCACHE_WORD_ACCESS |
| 9 | bool | 9 | bool |
| 10 | 10 | ||
| 11 | config VALIDATE_FS_PARSER | ||
| 12 | bool "Validate filesystem parameter description" | ||
| 13 | default y | ||
| 14 | help | ||
| 15 | Enable this to perform validation of the parameter description for a | ||
| 16 | filesystem when it is registered. | ||
| 17 | |||
| 11 | if BLOCK | 18 | if BLOCK |
| 12 | 19 | ||
| 13 | config FS_IOMAP | 20 | config FS_IOMAP |
diff --git a/fs/Makefile b/fs/Makefile index 7bff9abecfa4..427fec226fae 100644 --- a/fs/Makefile +++ b/fs/Makefile | |||
| @@ -13,7 +13,7 @@ obj-y := open.o read_write.o file_table.o super.o \ | |||
| 13 | seq_file.o xattr.o libfs.o fs-writeback.o \ | 13 | seq_file.o xattr.o libfs.o fs-writeback.o \ |
| 14 | pnode.o splice.o sync.o utimes.o d_path.o \ | 14 | pnode.o splice.o sync.o utimes.o d_path.o \ |
| 15 | stack.o fs_struct.o statfs.o fs_pin.o nsfs.o \ | 15 | stack.o fs_struct.o statfs.o fs_pin.o nsfs.o \ |
| 16 | fs_types.o | 16 | fs_types.o fs_context.o fs_parser.o |
| 17 | 17 | ||
| 18 | ifeq ($(CONFIG_BLOCK),y) | 18 | ifeq ($(CONFIG_BLOCK),y) |
| 19 | obj-y += buffer.o block_dev.o direct-io.o mpage.o | 19 | obj-y += buffer.o block_dev.o direct-io.o mpage.o |
diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 8871b9e8645f..bb1f244b2b3a 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h | |||
| @@ -36,15 +36,14 @@ | |||
| 36 | struct pagevec; | 36 | struct pagevec; |
| 37 | struct afs_call; | 37 | struct afs_call; |
| 38 | 38 | ||
| 39 | struct afs_mount_params { | 39 | struct afs_fs_context { |
| 40 | bool rwpath; /* T if the parent should be considered R/W */ | ||
| 41 | bool force; /* T to force cell type */ | 40 | bool force; /* T to force cell type */ |
| 42 | bool autocell; /* T if set auto mount operation */ | 41 | bool autocell; /* T if set auto mount operation */ |
| 43 | bool dyn_root; /* T if dynamic root */ | 42 | bool dyn_root; /* T if dynamic root */ |
| 43 | bool no_cell; /* T if the source is "none" (for dynroot) */ | ||
| 44 | afs_voltype_t type; /* type of volume requested */ | 44 | afs_voltype_t type; /* type of volume requested */ |
| 45 | int volnamesz; /* size of volume name */ | 45 | unsigned int volnamesz; /* size of volume name */ |
| 46 | const char *volname; /* name of volume to mount */ | 46 | const char *volname; /* name of volume to mount */ |
| 47 | struct net *net_ns; /* Network namespace in effect */ | ||
| 48 | struct afs_net *net; /* the AFS net namespace stuff */ | 47 | struct afs_net *net; /* the AFS net namespace stuff */ |
| 49 | struct afs_cell *cell; /* cell in which to find volume */ | 48 | struct afs_cell *cell; /* cell in which to find volume */ |
| 50 | struct afs_volume *volume; /* volume record */ | 49 | struct afs_volume *volume; /* volume record */ |
| @@ -1274,7 +1273,7 @@ static inline struct afs_volume *__afs_get_volume(struct afs_volume *volume) | |||
| 1274 | return volume; | 1273 | return volume; |
| 1275 | } | 1274 | } |
| 1276 | 1275 | ||
| 1277 | extern struct afs_volume *afs_create_volume(struct afs_mount_params *); | 1276 | extern struct afs_volume *afs_create_volume(struct afs_fs_context *); |
| 1278 | extern void afs_activate_volume(struct afs_volume *); | 1277 | extern void afs_activate_volume(struct afs_volume *); |
| 1279 | extern void afs_deactivate_volume(struct afs_volume *); | 1278 | extern void afs_deactivate_volume(struct afs_volume *); |
| 1280 | extern void afs_put_volume(struct afs_cell *, struct afs_volume *); | 1279 | extern void afs_put_volume(struct afs_cell *, struct afs_volume *); |
diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c index 2e51c6994148..eecd8b699186 100644 --- a/fs/afs/mntpt.c +++ b/fs/afs/mntpt.c | |||
| @@ -17,6 +17,7 @@ | |||
| 17 | #include <linux/mount.h> | 17 | #include <linux/mount.h> |
| 18 | #include <linux/namei.h> | 18 | #include <linux/namei.h> |
| 19 | #include <linux/gfp.h> | 19 | #include <linux/gfp.h> |
| 20 | #include <linux/fs_context.h> | ||
| 20 | #include "internal.h" | 21 | #include "internal.h" |
| 21 | 22 | ||
| 22 | 23 | ||
| @@ -47,6 +48,8 @@ static DECLARE_DELAYED_WORK(afs_mntpt_expiry_timer, afs_mntpt_expiry_timed_out); | |||
| 47 | 48 | ||
| 48 | static unsigned long afs_mntpt_expiry_timeout = 10 * 60; | 49 | static unsigned long afs_mntpt_expiry_timeout = 10 * 60; |
| 49 | 50 | ||
| 51 | static const char afs_root_volume[] = "root.cell"; | ||
| 52 | |||
| 50 | /* | 53 | /* |
| 51 | * no valid lookup procedure on this sort of dir | 54 | * no valid lookup procedure on this sort of dir |
| 52 | */ | 55 | */ |
| @@ -68,108 +71,112 @@ static int afs_mntpt_open(struct inode *inode, struct file *file) | |||
| 68 | } | 71 | } |
| 69 | 72 | ||
| 70 | /* | 73 | /* |
| 71 | * create a vfsmount to be automounted | 74 | * Set the parameters for the proposed superblock. |
| 72 | */ | 75 | */ |
| 73 | static struct vfsmount *afs_mntpt_do_automount(struct dentry *mntpt) | 76 | static int afs_mntpt_set_params(struct fs_context *fc, struct dentry *mntpt) |
| 74 | { | 77 | { |
| 75 | struct afs_super_info *as; | 78 | struct afs_fs_context *ctx = fc->fs_private; |
| 76 | struct vfsmount *mnt; | 79 | struct afs_super_info *src_as = AFS_FS_S(mntpt->d_sb); |
| 77 | struct afs_vnode *vnode; | 80 | struct afs_vnode *vnode = AFS_FS_I(d_inode(mntpt)); |
| 78 | struct page *page; | 81 | struct afs_cell *cell; |
| 79 | char *devname, *options; | 82 | const char *p; |
| 80 | bool rwpath = false; | ||
| 81 | int ret; | 83 | int ret; |
| 82 | 84 | ||
| 83 | _enter("{%pd}", mntpt); | 85 | if (fc->net_ns != src_as->net_ns) { |
| 84 | 86 | put_net(fc->net_ns); | |
| 85 | BUG_ON(!d_inode(mntpt)); | 87 | fc->net_ns = get_net(src_as->net_ns); |
| 86 | 88 | } | |
| 87 | ret = -ENOMEM; | ||
| 88 | devname = (char *) get_zeroed_page(GFP_KERNEL); | ||
| 89 | if (!devname) | ||
| 90 | goto error_no_devname; | ||
| 91 | |||
| 92 | options = (char *) get_zeroed_page(GFP_KERNEL); | ||
| 93 | if (!options) | ||
| 94 | goto error_no_options; | ||
| 95 | 89 | ||
| 96 | vnode = AFS_FS_I(d_inode(mntpt)); | 90 | if (src_as->volume && src_as->volume->type == AFSVL_RWVOL) { |
| 91 | ctx->type = AFSVL_RWVOL; | ||
| 92 | ctx->force = true; | ||
| 93 | } | ||
| 94 | if (ctx->cell) { | ||
| 95 | afs_put_cell(ctx->net, ctx->cell); | ||
| 96 | ctx->cell = NULL; | ||
| 97 | } | ||
| 97 | if (test_bit(AFS_VNODE_PSEUDODIR, &vnode->flags)) { | 98 | if (test_bit(AFS_VNODE_PSEUDODIR, &vnode->flags)) { |
| 98 | /* if the directory is a pseudo directory, use the d_name */ | 99 | /* if the directory is a pseudo directory, use the d_name */ |
| 99 | static const char afs_root_cell[] = ":root.cell."; | ||
| 100 | unsigned size = mntpt->d_name.len; | 100 | unsigned size = mntpt->d_name.len; |
| 101 | 101 | ||
| 102 | ret = -ENOENT; | 102 | if (size < 2) |
| 103 | if (size < 2 || size > AFS_MAXCELLNAME) | 103 | return -ENOENT; |
| 104 | goto error_no_page; | ||
| 105 | 104 | ||
| 105 | p = mntpt->d_name.name; | ||
| 106 | if (mntpt->d_name.name[0] == '.') { | 106 | if (mntpt->d_name.name[0] == '.') { |
| 107 | devname[0] = '%'; | 107 | size--; |
| 108 | memcpy(devname + 1, mntpt->d_name.name + 1, size - 1); | 108 | p++; |
| 109 | memcpy(devname + size, afs_root_cell, | 109 | ctx->type = AFSVL_RWVOL; |
| 110 | sizeof(afs_root_cell)); | 110 | ctx->force = true; |
| 111 | rwpath = true; | ||
| 112 | } else { | ||
| 113 | devname[0] = '#'; | ||
| 114 | memcpy(devname + 1, mntpt->d_name.name, size); | ||
| 115 | memcpy(devname + size + 1, afs_root_cell, | ||
| 116 | sizeof(afs_root_cell)); | ||
| 117 | } | 111 | } |
| 112 | if (size > AFS_MAXCELLNAME) | ||
| 113 | return -ENAMETOOLONG; | ||
| 114 | |||
| 115 | cell = afs_lookup_cell(ctx->net, p, size, NULL, false); | ||
| 116 | if (IS_ERR(cell)) { | ||
| 117 | pr_err("kAFS: unable to lookup cell '%pd'\n", mntpt); | ||
| 118 | return PTR_ERR(cell); | ||
| 119 | } | ||
| 120 | ctx->cell = cell; | ||
| 121 | |||
| 122 | ctx->volname = afs_root_volume; | ||
| 123 | ctx->volnamesz = sizeof(afs_root_volume) - 1; | ||
| 118 | } else { | 124 | } else { |
| 119 | /* read the contents of the AFS special symlink */ | 125 | /* read the contents of the AFS special symlink */ |
| 126 | struct page *page; | ||
| 120 | loff_t size = i_size_read(d_inode(mntpt)); | 127 | loff_t size = i_size_read(d_inode(mntpt)); |
| 121 | char *buf; | 128 | char *buf; |
| 122 | 129 | ||
| 123 | ret = -EINVAL; | 130 | if (src_as->cell) |
| 131 | ctx->cell = afs_get_cell(src_as->cell); | ||
| 132 | |||
| 124 | if (size > PAGE_SIZE - 1) | 133 | if (size > PAGE_SIZE - 1) |
| 125 | goto error_no_page; | 134 | return -EINVAL; |
| 126 | 135 | ||
| 127 | page = read_mapping_page(d_inode(mntpt)->i_mapping, 0, NULL); | 136 | page = read_mapping_page(d_inode(mntpt)->i_mapping, 0, NULL); |
| 128 | if (IS_ERR(page)) { | 137 | if (IS_ERR(page)) |
| 129 | ret = PTR_ERR(page); | 138 | return PTR_ERR(page); |
| 130 | goto error_no_page; | ||
| 131 | } | ||
| 132 | 139 | ||
| 133 | if (PageError(page)) { | 140 | if (PageError(page)) { |
| 134 | ret = afs_bad(AFS_FS_I(d_inode(mntpt)), afs_file_error_mntpt); | 141 | ret = afs_bad(AFS_FS_I(d_inode(mntpt)), afs_file_error_mntpt); |
| 135 | goto error; | 142 | put_page(page); |
| 143 | return ret; | ||
| 136 | } | 144 | } |
| 137 | 145 | ||
| 138 | buf = kmap_atomic(page); | 146 | buf = kmap(page); |
| 139 | memcpy(devname, buf, size); | 147 | ret = vfs_parse_fs_string(fc, "source", buf, size); |
| 140 | kunmap_atomic(buf); | 148 | kunmap(page); |
| 141 | put_page(page); | 149 | put_page(page); |
| 142 | page = NULL; | 150 | if (ret < 0) |
| 151 | return ret; | ||
| 143 | } | 152 | } |
| 144 | 153 | ||
| 145 | /* work out what options we want */ | 154 | return 0; |
| 146 | as = AFS_FS_S(mntpt->d_sb); | 155 | } |
| 147 | if (as->cell) { | ||
| 148 | memcpy(options, "cell=", 5); | ||
| 149 | strcpy(options + 5, as->cell->name); | ||
| 150 | if ((as->volume && as->volume->type == AFSVL_RWVOL) || rwpath) | ||
| 151 | strcat(options, ",rwpath"); | ||
| 152 | } | ||
| 153 | 156 | ||
| 154 | /* try and do the mount */ | 157 | /* |
| 155 | _debug("--- attempting mount %s -o %s ---", devname, options); | 158 | * create a vfsmount to be automounted |
| 156 | mnt = vfs_submount(mntpt, &afs_fs_type, devname, options); | 159 | */ |
| 157 | _debug("--- mount result %p ---", mnt); | 160 | static struct vfsmount *afs_mntpt_do_automount(struct dentry *mntpt) |
| 161 | { | ||
| 162 | struct fs_context *fc; | ||
| 163 | struct vfsmount *mnt; | ||
| 164 | int ret; | ||
| 158 | 165 | ||
| 159 | free_page((unsigned long) devname); | 166 | BUG_ON(!d_inode(mntpt)); |
| 160 | free_page((unsigned long) options); | ||
| 161 | _leave(" = %p", mnt); | ||
| 162 | return mnt; | ||
| 163 | 167 | ||
| 164 | error: | 168 | fc = fs_context_for_submount(&afs_fs_type, mntpt); |
| 165 | put_page(page); | 169 | if (IS_ERR(fc)) |
| 166 | error_no_page: | 170 | return ERR_CAST(fc); |
| 167 | free_page((unsigned long) options); | 171 | |
| 168 | error_no_options: | 172 | ret = afs_mntpt_set_params(fc, mntpt); |
| 169 | free_page((unsigned long) devname); | 173 | if (!ret) |
| 170 | error_no_devname: | 174 | mnt = fc_mount(fc); |
| 171 | _leave(" = %d", ret); | 175 | else |
| 172 | return ERR_PTR(ret); | 176 | mnt = ERR_PTR(ret); |
| 177 | |||
| 178 | put_fs_context(fc); | ||
| 179 | return mnt; | ||
| 173 | } | 180 | } |
| 174 | 181 | ||
| 175 | /* | 182 | /* |
diff --git a/fs/afs/super.c b/fs/afs/super.c index e684f6769b15..5adf012b8e27 100644 --- a/fs/afs/super.c +++ b/fs/afs/super.c | |||
| @@ -1,6 +1,6 @@ | |||
| 1 | /* AFS superblock handling | 1 | /* AFS superblock handling |
| 2 | * | 2 | * |
| 3 | * Copyright (c) 2002, 2007 Red Hat, Inc. All rights reserved. | 3 | * Copyright (c) 2002, 2007, 2018 Red Hat, Inc. All rights reserved. |
| 4 | * | 4 | * |
| 5 | * This software may be freely redistributed under the terms of the | 5 | * This software may be freely redistributed under the terms of the |
| 6 | * GNU General Public License. | 6 | * GNU General Public License. |
| @@ -21,7 +21,7 @@ | |||
| 21 | #include <linux/slab.h> | 21 | #include <linux/slab.h> |
| 22 | #include <linux/fs.h> | 22 | #include <linux/fs.h> |
| 23 | #include <linux/pagemap.h> | 23 | #include <linux/pagemap.h> |
| 24 | #include <linux/parser.h> | 24 | #include <linux/fs_parser.h> |
| 25 | #include <linux/statfs.h> | 25 | #include <linux/statfs.h> |
| 26 | #include <linux/sched.h> | 26 | #include <linux/sched.h> |
| 27 | #include <linux/nsproxy.h> | 27 | #include <linux/nsproxy.h> |
| @@ -30,21 +30,22 @@ | |||
| 30 | #include "internal.h" | 30 | #include "internal.h" |
| 31 | 31 | ||
| 32 | static void afs_i_init_once(void *foo); | 32 | static void afs_i_init_once(void *foo); |
| 33 | static struct dentry *afs_mount(struct file_system_type *fs_type, | ||
| 34 | int flags, const char *dev_name, void *data); | ||
| 35 | static void afs_kill_super(struct super_block *sb); | 33 | static void afs_kill_super(struct super_block *sb); |
| 36 | static struct inode *afs_alloc_inode(struct super_block *sb); | 34 | static struct inode *afs_alloc_inode(struct super_block *sb); |
| 37 | static void afs_destroy_inode(struct inode *inode); | 35 | static void afs_destroy_inode(struct inode *inode); |
| 38 | static int afs_statfs(struct dentry *dentry, struct kstatfs *buf); | 36 | static int afs_statfs(struct dentry *dentry, struct kstatfs *buf); |
| 39 | static int afs_show_devname(struct seq_file *m, struct dentry *root); | 37 | static int afs_show_devname(struct seq_file *m, struct dentry *root); |
| 40 | static int afs_show_options(struct seq_file *m, struct dentry *root); | 38 | static int afs_show_options(struct seq_file *m, struct dentry *root); |
| 39 | static int afs_init_fs_context(struct fs_context *fc); | ||
| 40 | static const struct fs_parameter_description afs_fs_parameters; | ||
| 41 | 41 | ||
| 42 | struct file_system_type afs_fs_type = { | 42 | struct file_system_type afs_fs_type = { |
| 43 | .owner = THIS_MODULE, | 43 | .owner = THIS_MODULE, |
| 44 | .name = "afs", | 44 | .name = "afs", |
| 45 | .mount = afs_mount, | 45 | .init_fs_context = afs_init_fs_context, |
| 46 | .kill_sb = afs_kill_super, | 46 | .parameters = &afs_fs_parameters, |
| 47 | .fs_flags = 0, | 47 | .kill_sb = afs_kill_super, |
| 48 | .fs_flags = 0, | ||
| 48 | }; | 49 | }; |
| 49 | MODULE_ALIAS_FS("afs"); | 50 | MODULE_ALIAS_FS("afs"); |
| 50 | 51 | ||
| @@ -63,22 +64,22 @@ static const struct super_operations afs_super_ops = { | |||
| 63 | static struct kmem_cache *afs_inode_cachep; | 64 | static struct kmem_cache *afs_inode_cachep; |
| 64 | static atomic_t afs_count_active_inodes; | 65 | static atomic_t afs_count_active_inodes; |
| 65 | 66 | ||
| 66 | enum { | 67 | enum afs_param { |
| 67 | afs_no_opt, | 68 | Opt_autocell, |
| 68 | afs_opt_cell, | 69 | Opt_dyn, |
| 69 | afs_opt_dyn, | 70 | Opt_source, |
| 70 | afs_opt_rwpath, | ||
| 71 | afs_opt_vol, | ||
| 72 | afs_opt_autocell, | ||
| 73 | }; | 71 | }; |
| 74 | 72 | ||
| 75 | static const match_table_t afs_options_list = { | 73 | static const struct fs_parameter_spec afs_param_specs[] = { |
| 76 | { afs_opt_cell, "cell=%s" }, | 74 | fsparam_flag ("autocell", Opt_autocell), |
| 77 | { afs_opt_dyn, "dyn" }, | 75 | fsparam_flag ("dyn", Opt_dyn), |
| 78 | { afs_opt_rwpath, "rwpath" }, | 76 | fsparam_string("source", Opt_source), |
| 79 | { afs_opt_vol, "vol=%s" }, | 77 | {} |
| 80 | { afs_opt_autocell, "autocell" }, | 78 | }; |
| 81 | { afs_no_opt, NULL }, | 79 | |
| 80 | static const struct fs_parameter_description afs_fs_parameters = { | ||
| 81 | .name = "kAFS", | ||
| 82 | .specs = afs_param_specs, | ||
| 82 | }; | 83 | }; |
| 83 | 84 | ||
| 84 | /* | 85 | /* |
| @@ -190,84 +191,23 @@ static int afs_show_options(struct seq_file *m, struct dentry *root) | |||
| 190 | } | 191 | } |
| 191 | 192 | ||
| 192 | /* | 193 | /* |
| 193 | * parse the mount options | 194 | * Parse the source name to get cell name, volume name, volume type and R/W |
| 194 | * - this function has been shamelessly adapted from the ext3 fs which | 195 | * selector. |
| 195 | * shamelessly adapted it from the msdos fs | 196 | * |
| 196 | */ | 197 | * This can be one of the following: |
| 197 | static int afs_parse_options(struct afs_mount_params *params, | ||
| 198 | char *options, const char **devname) | ||
| 199 | { | ||
| 200 | struct afs_cell *cell; | ||
| 201 | substring_t args[MAX_OPT_ARGS]; | ||
| 202 | char *p; | ||
| 203 | int token; | ||
| 204 | |||
| 205 | _enter("%s", options); | ||
| 206 | |||
| 207 | options[PAGE_SIZE - 1] = 0; | ||
| 208 | |||
| 209 | while ((p = strsep(&options, ","))) { | ||
| 210 | if (!*p) | ||
| 211 | continue; | ||
| 212 | |||
| 213 | token = match_token(p, afs_options_list, args); | ||
| 214 | switch (token) { | ||
| 215 | case afs_opt_cell: | ||
| 216 | rcu_read_lock(); | ||
| 217 | cell = afs_lookup_cell_rcu(params->net, | ||
| 218 | args[0].from, | ||
| 219 | args[0].to - args[0].from); | ||
| 220 | rcu_read_unlock(); | ||
| 221 | if (IS_ERR(cell)) | ||
| 222 | return PTR_ERR(cell); | ||
| 223 | afs_put_cell(params->net, params->cell); | ||
| 224 | params->cell = cell; | ||
| 225 | break; | ||
| 226 | |||
| 227 | case afs_opt_rwpath: | ||
| 228 | params->rwpath = true; | ||
| 229 | break; | ||
| 230 | |||
| 231 | case afs_opt_vol: | ||
| 232 | *devname = args[0].from; | ||
| 233 | break; | ||
| 234 | |||
| 235 | case afs_opt_autocell: | ||
| 236 | params->autocell = true; | ||
| 237 | break; | ||
| 238 | |||
| 239 | case afs_opt_dyn: | ||
| 240 | params->dyn_root = true; | ||
| 241 | break; | ||
| 242 | |||
| 243 | default: | ||
| 244 | printk(KERN_ERR "kAFS:" | ||
| 245 | " Unknown or invalid mount option: '%s'\n", p); | ||
| 246 | return -EINVAL; | ||
| 247 | } | ||
| 248 | } | ||
| 249 | |||
| 250 | _leave(" = 0"); | ||
| 251 | return 0; | ||
| 252 | } | ||
| 253 | |||
| 254 | /* | ||
| 255 | * parse a device name to get cell name, volume name, volume type and R/W | ||
| 256 | * selector | ||
| 257 | * - this can be one of the following: | ||
| 258 | * "%[cell:]volume[.]" R/W volume | 198 | * "%[cell:]volume[.]" R/W volume |
| 259 | * "#[cell:]volume[.]" R/O or R/W volume (rwpath=0), | 199 | * "#[cell:]volume[.]" R/O or R/W volume (R/O parent), |
| 260 | * or R/W (rwpath=1) volume | 200 | * or R/W (R/W parent) volume |
| 261 | * "%[cell:]volume.readonly" R/O volume | 201 | * "%[cell:]volume.readonly" R/O volume |
| 262 | * "#[cell:]volume.readonly" R/O volume | 202 | * "#[cell:]volume.readonly" R/O volume |
| 263 | * "%[cell:]volume.backup" Backup volume | 203 | * "%[cell:]volume.backup" Backup volume |
| 264 | * "#[cell:]volume.backup" Backup volume | 204 | * "#[cell:]volume.backup" Backup volume |
| 265 | */ | 205 | */ |
| 266 | static int afs_parse_device_name(struct afs_mount_params *params, | 206 | static int afs_parse_source(struct fs_context *fc, struct fs_parameter *param) |
| 267 | const char *name) | ||
| 268 | { | 207 | { |
| 208 | struct afs_fs_context *ctx = fc->fs_private; | ||
| 269 | struct afs_cell *cell; | 209 | struct afs_cell *cell; |
| 270 | const char *cellname, *suffix; | 210 | const char *cellname, *suffix, *name = param->string; |
| 271 | int cellnamesz; | 211 | int cellnamesz; |
| 272 | 212 | ||
| 273 | _enter(",%s", name); | 213 | _enter(",%s", name); |
| @@ -278,69 +218,149 @@ static int afs_parse_device_name(struct afs_mount_params *params, | |||
| 278 | } | 218 | } |
| 279 | 219 | ||
| 280 | if ((name[0] != '%' && name[0] != '#') || !name[1]) { | 220 | if ((name[0] != '%' && name[0] != '#') || !name[1]) { |
| 221 | /* To use dynroot, we don't want to have to provide a source */ | ||
| 222 | if (strcmp(name, "none") == 0) { | ||
| 223 | ctx->no_cell = true; | ||
| 224 | return 0; | ||
| 225 | } | ||
| 281 | printk(KERN_ERR "kAFS: unparsable volume name\n"); | 226 | printk(KERN_ERR "kAFS: unparsable volume name\n"); |
| 282 | return -EINVAL; | 227 | return -EINVAL; |
| 283 | } | 228 | } |
| 284 | 229 | ||
| 285 | /* determine the type of volume we're looking for */ | 230 | /* determine the type of volume we're looking for */ |
| 286 | params->type = AFSVL_ROVOL; | 231 | if (name[0] == '%') { |
| 287 | params->force = false; | 232 | ctx->type = AFSVL_RWVOL; |
| 288 | if (params->rwpath || name[0] == '%') { | 233 | ctx->force = true; |
| 289 | params->type = AFSVL_RWVOL; | ||
| 290 | params->force = true; | ||
| 291 | } | 234 | } |
| 292 | name++; | 235 | name++; |
| 293 | 236 | ||
| 294 | /* split the cell name out if there is one */ | 237 | /* split the cell name out if there is one */ |
| 295 | params->volname = strchr(name, ':'); | 238 | ctx->volname = strchr(name, ':'); |
| 296 | if (params->volname) { | 239 | if (ctx->volname) { |
| 297 | cellname = name; | 240 | cellname = name; |
| 298 | cellnamesz = params->volname - name; | 241 | cellnamesz = ctx->volname - name; |
| 299 | params->volname++; | 242 | ctx->volname++; |
| 300 | } else { | 243 | } else { |
| 301 | params->volname = name; | 244 | ctx->volname = name; |
| 302 | cellname = NULL; | 245 | cellname = NULL; |
| 303 | cellnamesz = 0; | 246 | cellnamesz = 0; |
| 304 | } | 247 | } |
| 305 | 248 | ||
| 306 | /* the volume type is further affected by a possible suffix */ | 249 | /* the volume type is further affected by a possible suffix */ |
| 307 | suffix = strrchr(params->volname, '.'); | 250 | suffix = strrchr(ctx->volname, '.'); |
| 308 | if (suffix) { | 251 | if (suffix) { |
| 309 | if (strcmp(suffix, ".readonly") == 0) { | 252 | if (strcmp(suffix, ".readonly") == 0) { |
| 310 | params->type = AFSVL_ROVOL; | 253 | ctx->type = AFSVL_ROVOL; |
| 311 | params->force = true; | 254 | ctx->force = true; |
| 312 | } else if (strcmp(suffix, ".backup") == 0) { | 255 | } else if (strcmp(suffix, ".backup") == 0) { |
| 313 | params->type = AFSVL_BACKVOL; | 256 | ctx->type = AFSVL_BACKVOL; |
| 314 | params->force = true; | 257 | ctx->force = true; |
| 315 | } else if (suffix[1] == 0) { | 258 | } else if (suffix[1] == 0) { |
| 316 | } else { | 259 | } else { |
| 317 | suffix = NULL; | 260 | suffix = NULL; |
| 318 | } | 261 | } |
| 319 | } | 262 | } |
| 320 | 263 | ||
| 321 | params->volnamesz = suffix ? | 264 | ctx->volnamesz = suffix ? |
| 322 | suffix - params->volname : strlen(params->volname); | 265 | suffix - ctx->volname : strlen(ctx->volname); |
| 323 | 266 | ||
| 324 | _debug("cell %*.*s [%p]", | 267 | _debug("cell %*.*s [%p]", |
| 325 | cellnamesz, cellnamesz, cellname ?: "", params->cell); | 268 | cellnamesz, cellnamesz, cellname ?: "", ctx->cell); |
| 326 | 269 | ||
| 327 | /* lookup the cell record */ | 270 | /* lookup the cell record */ |
| 328 | if (cellname || !params->cell) { | 271 | if (cellname) { |
| 329 | cell = afs_lookup_cell(params->net, cellname, cellnamesz, | 272 | cell = afs_lookup_cell(ctx->net, cellname, cellnamesz, |
| 330 | NULL, false); | 273 | NULL, false); |
| 331 | if (IS_ERR(cell)) { | 274 | if (IS_ERR(cell)) { |
| 332 | printk(KERN_ERR "kAFS: unable to lookup cell '%*.*s'\n", | 275 | pr_err("kAFS: unable to lookup cell '%*.*s'\n", |
| 333 | cellnamesz, cellnamesz, cellname ?: ""); | 276 | cellnamesz, cellnamesz, cellname ?: ""); |
| 334 | return PTR_ERR(cell); | 277 | return PTR_ERR(cell); |
| 335 | } | 278 | } |
| 336 | afs_put_cell(params->net, params->cell); | 279 | afs_put_cell(ctx->net, ctx->cell); |
| 337 | params->cell = cell; | 280 | ctx->cell = cell; |
| 338 | } | 281 | } |
| 339 | 282 | ||
| 340 | _debug("CELL:%s [%p] VOLUME:%*.*s SUFFIX:%s TYPE:%d%s", | 283 | _debug("CELL:%s [%p] VOLUME:%*.*s SUFFIX:%s TYPE:%d%s", |
| 341 | params->cell->name, params->cell, | 284 | ctx->cell->name, ctx->cell, |
| 342 | params->volnamesz, params->volnamesz, params->volname, | 285 | ctx->volnamesz, ctx->volnamesz, ctx->volname, |
| 343 | suffix ?: "-", params->type, params->force ? " FORCE" : ""); | 286 | suffix ?: "-", ctx->type, ctx->force ? " FORCE" : ""); |
| 287 | |||
| 288 | fc->source = param->string; | ||
| 289 | param->string = NULL; | ||
| 290 | return 0; | ||
| 291 | } | ||
| 292 | |||
| 293 | /* | ||
| 294 | * Parse a single mount parameter. | ||
| 295 | */ | ||
| 296 | static int afs_parse_param(struct fs_context *fc, struct fs_parameter *param) | ||
| 297 | { | ||
| 298 | struct fs_parse_result result; | ||
| 299 | struct afs_fs_context *ctx = fc->fs_private; | ||
| 300 | int opt; | ||
| 301 | |||
| 302 | opt = fs_parse(fc, &afs_fs_parameters, param, &result); | ||
| 303 | if (opt < 0) | ||
| 304 | return opt; | ||
| 305 | |||
| 306 | switch (opt) { | ||
| 307 | case Opt_source: | ||
| 308 | return afs_parse_source(fc, param); | ||
| 309 | |||
| 310 | case Opt_autocell: | ||
| 311 | ctx->autocell = true; | ||
| 312 | break; | ||
| 313 | |||
| 314 | case Opt_dyn: | ||
| 315 | ctx->dyn_root = true; | ||
| 316 | break; | ||
| 317 | |||
| 318 | default: | ||
| 319 | return -EINVAL; | ||
| 320 | } | ||
| 321 | |||
| 322 | _leave(" = 0"); | ||
| 323 | return 0; | ||
| 324 | } | ||
| 325 | |||
| 326 | /* | ||
| 327 | * Validate the options, get the cell key and look up the volume. | ||
| 328 | */ | ||
| 329 | static int afs_validate_fc(struct fs_context *fc) | ||
| 330 | { | ||
| 331 | struct afs_fs_context *ctx = fc->fs_private; | ||
| 332 | struct afs_volume *volume; | ||
| 333 | struct key *key; | ||
| 334 | |||
| 335 | if (!ctx->dyn_root) { | ||
| 336 | if (ctx->no_cell) { | ||
| 337 | pr_warn("kAFS: Can only specify source 'none' with -o dyn\n"); | ||
| 338 | return -EINVAL; | ||
| 339 | } | ||
| 340 | |||
| 341 | if (!ctx->cell) { | ||
| 342 | pr_warn("kAFS: No cell specified\n"); | ||
| 343 | return -EDESTADDRREQ; | ||
| 344 | } | ||
| 345 | |||
| 346 | /* We try to do the mount securely. */ | ||
| 347 | key = afs_request_key(ctx->cell); | ||
| 348 | if (IS_ERR(key)) | ||
| 349 | return PTR_ERR(key); | ||
| 350 | |||
| 351 | ctx->key = key; | ||
| 352 | |||
| 353 | if (ctx->volume) { | ||
| 354 | afs_put_volume(ctx->cell, ctx->volume); | ||
| 355 | ctx->volume = NULL; | ||
| 356 | } | ||
| 357 | |||
| 358 | volume = afs_create_volume(ctx); | ||
| 359 | if (IS_ERR(volume)) | ||
| 360 | return PTR_ERR(volume); | ||
| 361 | |||
| 362 | ctx->volume = volume; | ||
| 363 | } | ||
| 344 | 364 | ||
| 345 | return 0; | 365 | return 0; |
| 346 | } | 366 | } |
| @@ -348,39 +368,34 @@ static int afs_parse_device_name(struct afs_mount_params *params, | |||
| 348 | /* | 368 | /* |
| 349 | * check a superblock to see if it's the one we're looking for | 369 | * check a superblock to see if it's the one we're looking for |
| 350 | */ | 370 | */ |
| 351 | static int afs_test_super(struct super_block *sb, void *data) | 371 | static int afs_test_super(struct super_block *sb, struct fs_context *fc) |
| 352 | { | 372 | { |
| 353 | struct afs_super_info *as1 = data; | 373 | struct afs_fs_context *ctx = fc->fs_private; |
| 354 | struct afs_super_info *as = AFS_FS_S(sb); | 374 | struct afs_super_info *as = AFS_FS_S(sb); |
| 355 | 375 | ||
| 356 | return (as->net_ns == as1->net_ns && | 376 | return (as->net_ns == fc->net_ns && |
| 357 | as->volume && | 377 | as->volume && |
| 358 | as->volume->vid == as1->volume->vid && | 378 | as->volume->vid == ctx->volume->vid && |
| 359 | !as->dyn_root); | 379 | !as->dyn_root); |
| 360 | } | 380 | } |
| 361 | 381 | ||
| 362 | static int afs_dynroot_test_super(struct super_block *sb, void *data) | 382 | static int afs_dynroot_test_super(struct super_block *sb, struct fs_context *fc) |
| 363 | { | 383 | { |
| 364 | struct afs_super_info *as1 = data; | ||
| 365 | struct afs_super_info *as = AFS_FS_S(sb); | 384 | struct afs_super_info *as = AFS_FS_S(sb); |
| 366 | 385 | ||
| 367 | return (as->net_ns == as1->net_ns && | 386 | return (as->net_ns == fc->net_ns && |
| 368 | as->dyn_root); | 387 | as->dyn_root); |
| 369 | } | 388 | } |
| 370 | 389 | ||
| 371 | static int afs_set_super(struct super_block *sb, void *data) | 390 | static int afs_set_super(struct super_block *sb, struct fs_context *fc) |
| 372 | { | 391 | { |
| 373 | struct afs_super_info *as = data; | ||
| 374 | |||
| 375 | sb->s_fs_info = as; | ||
| 376 | return set_anon_super(sb, NULL); | 392 | return set_anon_super(sb, NULL); |
| 377 | } | 393 | } |
| 378 | 394 | ||
| 379 | /* | 395 | /* |
| 380 | * fill in the superblock | 396 | * fill in the superblock |
| 381 | */ | 397 | */ |
| 382 | static int afs_fill_super(struct super_block *sb, | 398 | static int afs_fill_super(struct super_block *sb, struct afs_fs_context *ctx) |
| 383 | struct afs_mount_params *params) | ||
| 384 | { | 399 | { |
| 385 | struct afs_super_info *as = AFS_FS_S(sb); | 400 | struct afs_super_info *as = AFS_FS_S(sb); |
| 386 | struct afs_fid fid; | 401 | struct afs_fid fid; |
| @@ -412,13 +427,13 @@ static int afs_fill_super(struct super_block *sb, | |||
| 412 | fid.vnode = 1; | 427 | fid.vnode = 1; |
| 413 | fid.vnode_hi = 0; | 428 | fid.vnode_hi = 0; |
| 414 | fid.unique = 1; | 429 | fid.unique = 1; |
| 415 | inode = afs_iget(sb, params->key, &fid, NULL, NULL, NULL); | 430 | inode = afs_iget(sb, ctx->key, &fid, NULL, NULL, NULL); |
| 416 | } | 431 | } |
| 417 | 432 | ||
| 418 | if (IS_ERR(inode)) | 433 | if (IS_ERR(inode)) |
| 419 | return PTR_ERR(inode); | 434 | return PTR_ERR(inode); |
| 420 | 435 | ||
| 421 | if (params->autocell || params->dyn_root) | 436 | if (ctx->autocell || as->dyn_root) |
| 422 | set_bit(AFS_VNODE_AUTOCELL, &AFS_FS_I(inode)->flags); | 437 | set_bit(AFS_VNODE_AUTOCELL, &AFS_FS_I(inode)->flags); |
| 423 | 438 | ||
| 424 | ret = -ENOMEM; | 439 | ret = -ENOMEM; |
| @@ -443,17 +458,20 @@ error: | |||
| 443 | return ret; | 458 | return ret; |
| 444 | } | 459 | } |
| 445 | 460 | ||
| 446 | static struct afs_super_info *afs_alloc_sbi(struct afs_mount_params *params) | 461 | static struct afs_super_info *afs_alloc_sbi(struct fs_context *fc) |
| 447 | { | 462 | { |
| 463 | struct afs_fs_context *ctx = fc->fs_private; | ||
| 448 | struct afs_super_info *as; | 464 | struct afs_super_info *as; |
| 449 | 465 | ||
| 450 | as = kzalloc(sizeof(struct afs_super_info), GFP_KERNEL); | 466 | as = kzalloc(sizeof(struct afs_super_info), GFP_KERNEL); |
| 451 | if (as) { | 467 | if (as) { |
| 452 | as->net_ns = get_net(params->net_ns); | 468 | as->net_ns = get_net(fc->net_ns); |
| 453 | if (params->dyn_root) | 469 | if (ctx->dyn_root) { |
| 454 | as->dyn_root = true; | 470 | as->dyn_root = true; |
| 455 | else | 471 | } else { |
| 456 | as->cell = afs_get_cell(params->cell); | 472 | as->cell = afs_get_cell(ctx->cell); |
| 473 | as->volume = __afs_get_volume(ctx->volume); | ||
| 474 | } | ||
| 457 | } | 475 | } |
| 458 | return as; | 476 | return as; |
| 459 | } | 477 | } |
| @@ -475,7 +493,7 @@ static void afs_kill_super(struct super_block *sb) | |||
| 475 | 493 | ||
| 476 | if (as->dyn_root) | 494 | if (as->dyn_root) |
| 477 | afs_dynroot_depopulate(sb); | 495 | afs_dynroot_depopulate(sb); |
| 478 | 496 | ||
| 479 | /* Clear the callback interests (which will do ilookup5) before | 497 | /* Clear the callback interests (which will do ilookup5) before |
| 480 | * deactivating the superblock. | 498 | * deactivating the superblock. |
| 481 | */ | 499 | */ |
| @@ -488,111 +506,103 @@ static void afs_kill_super(struct super_block *sb) | |||
| 488 | } | 506 | } |
| 489 | 507 | ||
| 490 | /* | 508 | /* |
| 491 | * get an AFS superblock | 509 | * Get an AFS superblock and root directory. |
| 492 | */ | 510 | */ |
| 493 | static struct dentry *afs_mount(struct file_system_type *fs_type, | 511 | static int afs_get_tree(struct fs_context *fc) |
| 494 | int flags, const char *dev_name, void *options) | ||
| 495 | { | 512 | { |
| 496 | struct afs_mount_params params; | 513 | struct afs_fs_context *ctx = fc->fs_private; |
| 497 | struct super_block *sb; | 514 | struct super_block *sb; |
| 498 | struct afs_volume *candidate; | ||
| 499 | struct key *key; | ||
| 500 | struct afs_super_info *as; | 515 | struct afs_super_info *as; |
| 501 | int ret; | 516 | int ret; |
| 502 | 517 | ||
| 503 | _enter(",,%s,%p", dev_name, options); | 518 | ret = afs_validate_fc(fc); |
| 504 | 519 | if (ret) | |
| 505 | memset(¶ms, 0, sizeof(params)); | ||
| 506 | |||
| 507 | ret = -EINVAL; | ||
| 508 | if (current->nsproxy->net_ns != &init_net) | ||
| 509 | goto error; | 520 | goto error; |
| 510 | params.net_ns = current->nsproxy->net_ns; | ||
| 511 | params.net = afs_net(params.net_ns); | ||
| 512 | |||
| 513 | /* parse the options and device name */ | ||
| 514 | if (options) { | ||
| 515 | ret = afs_parse_options(¶ms, options, &dev_name); | ||
| 516 | if (ret < 0) | ||
| 517 | goto error; | ||
| 518 | } | ||
| 519 | |||
| 520 | if (!params.dyn_root) { | ||
| 521 | ret = afs_parse_device_name(¶ms, dev_name); | ||
| 522 | if (ret < 0) | ||
| 523 | goto error; | ||
| 524 | 521 | ||
| 525 | /* try and do the mount securely */ | 522 | _enter(""); |
| 526 | key = afs_request_key(params.cell); | ||
| 527 | if (IS_ERR(key)) { | ||
| 528 | _leave(" = %ld [key]", PTR_ERR(key)); | ||
| 529 | ret = PTR_ERR(key); | ||
| 530 | goto error; | ||
| 531 | } | ||
| 532 | params.key = key; | ||
| 533 | } | ||
| 534 | 523 | ||
| 535 | /* allocate a superblock info record */ | 524 | /* allocate a superblock info record */ |
| 536 | ret = -ENOMEM; | 525 | ret = -ENOMEM; |
| 537 | as = afs_alloc_sbi(¶ms); | 526 | as = afs_alloc_sbi(fc); |
| 538 | if (!as) | 527 | if (!as) |
| 539 | goto error_key; | 528 | goto error; |
| 540 | 529 | fc->s_fs_info = as; | |
| 541 | if (!params.dyn_root) { | ||
| 542 | /* Assume we're going to need a volume record; at the very | ||
| 543 | * least we can use it to update the volume record if we have | ||
| 544 | * one already. This checks that the volume exists within the | ||
| 545 | * cell. | ||
| 546 | */ | ||
| 547 | candidate = afs_create_volume(¶ms); | ||
| 548 | if (IS_ERR(candidate)) { | ||
| 549 | ret = PTR_ERR(candidate); | ||
| 550 | goto error_as; | ||
| 551 | } | ||
| 552 | |||
| 553 | as->volume = candidate; | ||
| 554 | } | ||
| 555 | 530 | ||
| 556 | /* allocate a deviceless superblock */ | 531 | /* allocate a deviceless superblock */ |
| 557 | sb = sget(fs_type, | 532 | sb = sget_fc(fc, |
| 558 | as->dyn_root ? afs_dynroot_test_super : afs_test_super, | 533 | as->dyn_root ? afs_dynroot_test_super : afs_test_super, |
| 559 | afs_set_super, flags, as); | 534 | afs_set_super); |
| 560 | if (IS_ERR(sb)) { | 535 | if (IS_ERR(sb)) { |
| 561 | ret = PTR_ERR(sb); | 536 | ret = PTR_ERR(sb); |
| 562 | goto error_as; | 537 | goto error; |
| 563 | } | 538 | } |
| 564 | 539 | ||
| 565 | if (!sb->s_root) { | 540 | if (!sb->s_root) { |
| 566 | /* initial superblock/root creation */ | 541 | /* initial superblock/root creation */ |
| 567 | _debug("create"); | 542 | _debug("create"); |
| 568 | ret = afs_fill_super(sb, ¶ms); | 543 | ret = afs_fill_super(sb, ctx); |
| 569 | if (ret < 0) | 544 | if (ret < 0) |
| 570 | goto error_sb; | 545 | goto error_sb; |
| 571 | as = NULL; | ||
| 572 | sb->s_flags |= SB_ACTIVE; | 546 | sb->s_flags |= SB_ACTIVE; |
| 573 | } else { | 547 | } else { |
| 574 | _debug("reuse"); | 548 | _debug("reuse"); |
| 575 | ASSERTCMP(sb->s_flags, &, SB_ACTIVE); | 549 | ASSERTCMP(sb->s_flags, &, SB_ACTIVE); |
| 576 | afs_destroy_sbi(as); | ||
| 577 | as = NULL; | ||
| 578 | } | 550 | } |
| 579 | 551 | ||
| 580 | afs_put_cell(params.net, params.cell); | 552 | fc->root = dget(sb->s_root); |
| 581 | key_put(params.key); | ||
| 582 | _leave(" = 0 [%p]", sb); | 553 | _leave(" = 0 [%p]", sb); |
| 583 | return dget(sb->s_root); | 554 | return 0; |
| 584 | 555 | ||
| 585 | error_sb: | 556 | error_sb: |
| 586 | deactivate_locked_super(sb); | 557 | deactivate_locked_super(sb); |
| 587 | goto error_key; | ||
| 588 | error_as: | ||
| 589 | afs_destroy_sbi(as); | ||
| 590 | error_key: | ||
| 591 | key_put(params.key); | ||
| 592 | error: | 558 | error: |
| 593 | afs_put_cell(params.net, params.cell); | ||
| 594 | _leave(" = %d", ret); | 559 | _leave(" = %d", ret); |
| 595 | return ERR_PTR(ret); | 560 | return ret; |
| 561 | } | ||
| 562 | |||
| 563 | static void afs_free_fc(struct fs_context *fc) | ||
| 564 | { | ||
| 565 | struct afs_fs_context *ctx = fc->fs_private; | ||
| 566 | |||
| 567 | afs_destroy_sbi(fc->s_fs_info); | ||
| 568 | afs_put_volume(ctx->cell, ctx->volume); | ||
| 569 | afs_put_cell(ctx->net, ctx->cell); | ||
| 570 | key_put(ctx->key); | ||
| 571 | kfree(ctx); | ||
| 572 | } | ||
| 573 | |||
| 574 | static const struct fs_context_operations afs_context_ops = { | ||
| 575 | .free = afs_free_fc, | ||
| 576 | .parse_param = afs_parse_param, | ||
| 577 | .get_tree = afs_get_tree, | ||
| 578 | }; | ||
| 579 | |||
| 580 | /* | ||
| 581 | * Set up the filesystem mount context. | ||
| 582 | */ | ||
| 583 | static int afs_init_fs_context(struct fs_context *fc) | ||
| 584 | { | ||
| 585 | struct afs_fs_context *ctx; | ||
| 586 | struct afs_cell *cell; | ||
| 587 | |||
| 588 | ctx = kzalloc(sizeof(struct afs_fs_context), GFP_KERNEL); | ||
| 589 | if (!ctx) | ||
| 590 | return -ENOMEM; | ||
| 591 | |||
| 592 | ctx->type = AFSVL_ROVOL; | ||
| 593 | ctx->net = afs_net(fc->net_ns); | ||
| 594 | |||
| 595 | /* Default to the workstation cell. */ | ||
| 596 | rcu_read_lock(); | ||
| 597 | cell = afs_lookup_cell_rcu(ctx->net, NULL, 0); | ||
| 598 | rcu_read_unlock(); | ||
| 599 | if (IS_ERR(cell)) | ||
| 600 | cell = NULL; | ||
| 601 | ctx->cell = cell; | ||
| 602 | |||
| 603 | fc->fs_private = ctx; | ||
| 604 | fc->ops = &afs_context_ops; | ||
| 605 | return 0; | ||
| 596 | } | 606 | } |
| 597 | 607 | ||
| 598 | /* | 608 | /* |
diff --git a/fs/afs/volume.c b/fs/afs/volume.c index 00975ed3640f..f6eba2def0a1 100644 --- a/fs/afs/volume.c +++ b/fs/afs/volume.c | |||
| @@ -21,7 +21,7 @@ static const char *const afs_voltypes[] = { "R/W", "R/O", "BAK" }; | |||
| 21 | /* | 21 | /* |
| 22 | * Allocate a volume record and load it up from a vldb record. | 22 | * Allocate a volume record and load it up from a vldb record. |
| 23 | */ | 23 | */ |
| 24 | static struct afs_volume *afs_alloc_volume(struct afs_mount_params *params, | 24 | static struct afs_volume *afs_alloc_volume(struct afs_fs_context *params, |
| 25 | struct afs_vldb_entry *vldb, | 25 | struct afs_vldb_entry *vldb, |
| 26 | unsigned long type_mask) | 26 | unsigned long type_mask) |
| 27 | { | 27 | { |
| @@ -113,7 +113,7 @@ static struct afs_vldb_entry *afs_vl_lookup_vldb(struct afs_cell *cell, | |||
| 113 | * - Rule 3: If parent volume is R/W, then only mount R/W volume unless | 113 | * - Rule 3: If parent volume is R/W, then only mount R/W volume unless |
| 114 | * explicitly told otherwise | 114 | * explicitly told otherwise |
| 115 | */ | 115 | */ |
| 116 | struct afs_volume *afs_create_volume(struct afs_mount_params *params) | 116 | struct afs_volume *afs_create_volume(struct afs_fs_context *params) |
| 117 | { | 117 | { |
| 118 | struct afs_vldb_entry *vldb; | 118 | struct afs_vldb_entry *vldb; |
| 119 | struct afs_volume *volume; | 119 | struct afs_volume *volume; |
diff --git a/fs/filesystems.c b/fs/filesystems.c index b03f57b1105b..9135646e41ac 100644 --- a/fs/filesystems.c +++ b/fs/filesystems.c | |||
| @@ -16,6 +16,7 @@ | |||
| 16 | #include <linux/module.h> | 16 | #include <linux/module.h> |
| 17 | #include <linux/slab.h> | 17 | #include <linux/slab.h> |
| 18 | #include <linux/uaccess.h> | 18 | #include <linux/uaccess.h> |
| 19 | #include <linux/fs_parser.h> | ||
| 19 | 20 | ||
| 20 | /* | 21 | /* |
| 21 | * Handling of filesystem drivers list. | 22 | * Handling of filesystem drivers list. |
| @@ -73,6 +74,9 @@ int register_filesystem(struct file_system_type * fs) | |||
| 73 | int res = 0; | 74 | int res = 0; |
| 74 | struct file_system_type ** p; | 75 | struct file_system_type ** p; |
| 75 | 76 | ||
| 77 | if (fs->parameters && !fs_validate_description(fs->parameters)) | ||
| 78 | return -EINVAL; | ||
| 79 | |||
| 76 | BUG_ON(strchr(fs->name, '.')); | 80 | BUG_ON(strchr(fs->name, '.')); |
| 77 | if (fs->next) | 81 | if (fs->next) |
| 78 | return -EBUSY; | 82 | return -EBUSY; |
diff --git a/fs/fs_context.c b/fs/fs_context.c new file mode 100644 index 000000000000..87e3546b9a52 --- /dev/null +++ b/fs/fs_context.c | |||
| @@ -0,0 +1,642 @@ | |||
| 1 | /* Provide a way to create a superblock configuration context within the kernel | ||
| 2 | * that allows a superblock to be set up prior to mounting. | ||
| 3 | * | ||
| 4 | * Copyright (C) 2017 Red Hat, Inc. All Rights Reserved. | ||
| 5 | * Written by David Howells (dhowells@redhat.com) | ||
| 6 | * | ||
| 7 | * This program is free software; you can redistribute it and/or | ||
| 8 | * modify it under the terms of the GNU General Public Licence | ||
| 9 | * as published by the Free Software Foundation; either version | ||
| 10 | * 2 of the Licence, or (at your option) any later version. | ||
| 11 | */ | ||
| 12 | |||
| 13 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | ||
| 14 | #include <linux/fs_context.h> | ||
| 15 | #include <linux/fs_parser.h> | ||
| 16 | #include <linux/fs.h> | ||
| 17 | #include <linux/mount.h> | ||
| 18 | #include <linux/nsproxy.h> | ||
| 19 | #include <linux/slab.h> | ||
| 20 | #include <linux/magic.h> | ||
| 21 | #include <linux/security.h> | ||
| 22 | #include <linux/mnt_namespace.h> | ||
| 23 | #include <linux/pid_namespace.h> | ||
| 24 | #include <linux/user_namespace.h> | ||
| 25 | #include <net/net_namespace.h> | ||
| 26 | #include "mount.h" | ||
| 27 | #include "internal.h" | ||
| 28 | |||
| 29 | enum legacy_fs_param { | ||
| 30 | LEGACY_FS_UNSET_PARAMS, | ||
| 31 | LEGACY_FS_MONOLITHIC_PARAMS, | ||
| 32 | LEGACY_FS_INDIVIDUAL_PARAMS, | ||
| 33 | }; | ||
| 34 | |||
| 35 | struct legacy_fs_context { | ||
| 36 | char *legacy_data; /* Data page for legacy filesystems */ | ||
| 37 | size_t data_size; | ||
| 38 | enum legacy_fs_param param_type; | ||
| 39 | }; | ||
| 40 | |||
| 41 | static int legacy_init_fs_context(struct fs_context *fc); | ||
| 42 | |||
| 43 | static const struct constant_table common_set_sb_flag[] = { | ||
| 44 | { "dirsync", SB_DIRSYNC }, | ||
| 45 | { "lazytime", SB_LAZYTIME }, | ||
| 46 | { "mand", SB_MANDLOCK }, | ||
| 47 | { "posixacl", SB_POSIXACL }, | ||
| 48 | { "ro", SB_RDONLY }, | ||
| 49 | { "sync", SB_SYNCHRONOUS }, | ||
| 50 | }; | ||
| 51 | |||
| 52 | static const struct constant_table common_clear_sb_flag[] = { | ||
| 53 | { "async", SB_SYNCHRONOUS }, | ||
| 54 | { "nolazytime", SB_LAZYTIME }, | ||
| 55 | { "nomand", SB_MANDLOCK }, | ||
| 56 | { "rw", SB_RDONLY }, | ||
| 57 | { "silent", SB_SILENT }, | ||
| 58 | }; | ||
| 59 | |||
| 60 | static const char *const forbidden_sb_flag[] = { | ||
| 61 | "bind", | ||
| 62 | "dev", | ||
| 63 | "exec", | ||
| 64 | "move", | ||
| 65 | "noatime", | ||
| 66 | "nodev", | ||
| 67 | "nodiratime", | ||
| 68 | "noexec", | ||
| 69 | "norelatime", | ||
| 70 | "nostrictatime", | ||
| 71 | "nosuid", | ||
| 72 | "private", | ||
| 73 | "rec", | ||
| 74 | "relatime", | ||
| 75 | "remount", | ||
| 76 | "shared", | ||
| 77 | "slave", | ||
| 78 | "strictatime", | ||
| 79 | "suid", | ||
| 80 | "unbindable", | ||
| 81 | }; | ||
| 82 | |||
| 83 | /* | ||
| 84 | * Check for a common mount option that manipulates s_flags. | ||
| 85 | */ | ||
| 86 | static int vfs_parse_sb_flag(struct fs_context *fc, const char *key) | ||
| 87 | { | ||
| 88 | unsigned int token; | ||
| 89 | unsigned int i; | ||
| 90 | |||
| 91 | for (i = 0; i < ARRAY_SIZE(forbidden_sb_flag); i++) | ||
| 92 | if (strcmp(key, forbidden_sb_flag[i]) == 0) | ||
| 93 | return -EINVAL; | ||
| 94 | |||
| 95 | token = lookup_constant(common_set_sb_flag, key, 0); | ||
| 96 | if (token) { | ||
| 97 | fc->sb_flags |= token; | ||
| 98 | fc->sb_flags_mask |= token; | ||
| 99 | return 0; | ||
| 100 | } | ||
| 101 | |||
| 102 | token = lookup_constant(common_clear_sb_flag, key, 0); | ||
| 103 | if (token) { | ||
| 104 | fc->sb_flags &= ~token; | ||
| 105 | fc->sb_flags_mask |= token; | ||
| 106 | return 0; | ||
| 107 | } | ||
| 108 | |||
| 109 | return -ENOPARAM; | ||
| 110 | } | ||
| 111 | |||
| 112 | /** | ||
| 113 | * vfs_parse_fs_param - Add a single parameter to a superblock config | ||
| 114 | * @fc: The filesystem context to modify | ||
| 115 | * @param: The parameter | ||
| 116 | * | ||
| 117 | * A single mount option in string form is applied to the filesystem context | ||
| 118 | * being set up. Certain standard options (for example "ro") are translated | ||
| 119 | * into flag bits without going to the filesystem. The active security module | ||
| 120 | * is allowed to observe and poach options. Any other options are passed over | ||
| 121 | * to the filesystem to parse. | ||
| 122 | * | ||
| 123 | * This may be called multiple times for a context. | ||
| 124 | * | ||
| 125 | * Returns 0 on success and a negative error code on failure. In the event of | ||
| 126 | * failure, supplementary error information may have been set. | ||
| 127 | */ | ||
| 128 | int vfs_parse_fs_param(struct fs_context *fc, struct fs_parameter *param) | ||
| 129 | { | ||
| 130 | int ret; | ||
| 131 | |||
| 132 | if (!param->key) | ||
| 133 | return invalf(fc, "Unnamed parameter\n"); | ||
| 134 | |||
| 135 | ret = vfs_parse_sb_flag(fc, param->key); | ||
| 136 | if (ret != -ENOPARAM) | ||
| 137 | return ret; | ||
| 138 | |||
| 139 | ret = security_fs_context_parse_param(fc, param); | ||
| 140 | if (ret != -ENOPARAM) | ||
| 141 | /* Param belongs to the LSM or is disallowed by the LSM; so | ||
| 142 | * don't pass to the FS. | ||
| 143 | */ | ||
| 144 | return ret; | ||
| 145 | |||
| 146 | if (fc->ops->parse_param) { | ||
| 147 | ret = fc->ops->parse_param(fc, param); | ||
| 148 | if (ret != -ENOPARAM) | ||
| 149 | return ret; | ||
| 150 | } | ||
| 151 | |||
| 152 | /* If the filesystem doesn't take any arguments, give it the | ||
| 153 | * default handling of source. | ||
| 154 | */ | ||
| 155 | if (strcmp(param->key, "source") == 0) { | ||
| 156 | if (param->type != fs_value_is_string) | ||
| 157 | return invalf(fc, "VFS: Non-string source"); | ||
| 158 | if (fc->source) | ||
| 159 | return invalf(fc, "VFS: Multiple sources"); | ||
| 160 | fc->source = param->string; | ||
| 161 | param->string = NULL; | ||
| 162 | return 0; | ||
| 163 | } | ||
| 164 | |||
| 165 | return invalf(fc, "%s: Unknown parameter '%s'", | ||
| 166 | fc->fs_type->name, param->key); | ||
| 167 | } | ||
| 168 | EXPORT_SYMBOL(vfs_parse_fs_param); | ||
| 169 | |||
| 170 | /** | ||
| 171 | * vfs_parse_fs_string - Convenience function to just parse a string. | ||
| 172 | */ | ||
| 173 | int vfs_parse_fs_string(struct fs_context *fc, const char *key, | ||
| 174 | const char *value, size_t v_size) | ||
| 175 | { | ||
| 176 | int ret; | ||
| 177 | |||
| 178 | struct fs_parameter param = { | ||
| 179 | .key = key, | ||
| 180 | .type = fs_value_is_string, | ||
| 181 | .size = v_size, | ||
| 182 | }; | ||
| 183 | |||
| 184 | if (v_size > 0) { | ||
| 185 | param.string = kmemdup_nul(value, v_size, GFP_KERNEL); | ||
| 186 | if (!param.string) | ||
| 187 | return -ENOMEM; | ||
| 188 | } | ||
| 189 | |||
| 190 | ret = vfs_parse_fs_param(fc, ¶m); | ||
| 191 | kfree(param.string); | ||
| 192 | return ret; | ||
| 193 | } | ||
| 194 | EXPORT_SYMBOL(vfs_parse_fs_string); | ||
| 195 | |||
| 196 | /** | ||
| 197 | * generic_parse_monolithic - Parse key[=val][,key[=val]]* mount data | ||
| 198 | * @ctx: The superblock configuration to fill in. | ||
| 199 | * @data: The data to parse | ||
| 200 | * | ||
| 201 | * Parse a blob of data that's in key[=val][,key[=val]]* form. This can be | ||
| 202 | * called from the ->monolithic_mount_data() fs_context operation. | ||
| 203 | * | ||
| 204 | * Returns 0 on success or the error returned by the ->parse_option() fs_context | ||
| 205 | * operation on failure. | ||
| 206 | */ | ||
| 207 | int generic_parse_monolithic(struct fs_context *fc, void *data) | ||
| 208 | { | ||
| 209 | char *options = data, *key; | ||
| 210 | int ret = 0; | ||
| 211 | |||
| 212 | if (!options) | ||
| 213 | return 0; | ||
| 214 | |||
| 215 | ret = security_sb_eat_lsm_opts(options, &fc->security); | ||
| 216 | if (ret) | ||
| 217 | return ret; | ||
| 218 | |||
| 219 | while ((key = strsep(&options, ",")) != NULL) { | ||
| 220 | if (*key) { | ||
| 221 | size_t v_len = 0; | ||
| 222 | char *value = strchr(key, '='); | ||
| 223 | |||
| 224 | if (value) { | ||
| 225 | if (value == key) | ||
| 226 | continue; | ||
| 227 | *value++ = 0; | ||
| 228 | v_len = strlen(value); | ||
| 229 | } | ||
| 230 | ret = vfs_parse_fs_string(fc, key, value, v_len); | ||
| 231 | if (ret < 0) | ||
| 232 | break; | ||
| 233 | } | ||
| 234 | } | ||
| 235 | |||
| 236 | return ret; | ||
| 237 | } | ||
| 238 | EXPORT_SYMBOL(generic_parse_monolithic); | ||
| 239 | |||
| 240 | /** | ||
| 241 | * alloc_fs_context - Create a filesystem context. | ||
| 242 | * @fs_type: The filesystem type. | ||
| 243 | * @reference: The dentry from which this one derives (or NULL) | ||
| 244 | * @sb_flags: Filesystem/superblock flags (SB_*) | ||
| 245 | * @sb_flags_mask: Applicable members of @sb_flags | ||
| 246 | * @purpose: The purpose that this configuration shall be used for. | ||
| 247 | * | ||
| 248 | * Open a filesystem and create a mount context. The mount context is | ||
| 249 | * initialised with the supplied flags and, if a submount/automount from | ||
| 250 | * another superblock (referred to by @reference) is supplied, may have | ||
| 251 | * parameters such as namespaces copied across from that superblock. | ||
| 252 | */ | ||
| 253 | static struct fs_context *alloc_fs_context(struct file_system_type *fs_type, | ||
| 254 | struct dentry *reference, | ||
| 255 | unsigned int sb_flags, | ||
| 256 | unsigned int sb_flags_mask, | ||
| 257 | enum fs_context_purpose purpose) | ||
| 258 | { | ||
| 259 | int (*init_fs_context)(struct fs_context *); | ||
| 260 | struct fs_context *fc; | ||
| 261 | int ret = -ENOMEM; | ||
| 262 | |||
| 263 | fc = kzalloc(sizeof(struct fs_context), GFP_KERNEL); | ||
| 264 | if (!fc) | ||
| 265 | return ERR_PTR(-ENOMEM); | ||
| 266 | |||
| 267 | fc->purpose = purpose; | ||
| 268 | fc->sb_flags = sb_flags; | ||
| 269 | fc->sb_flags_mask = sb_flags_mask; | ||
| 270 | fc->fs_type = get_filesystem(fs_type); | ||
| 271 | fc->cred = get_current_cred(); | ||
| 272 | fc->net_ns = get_net(current->nsproxy->net_ns); | ||
| 273 | |||
| 274 | switch (purpose) { | ||
| 275 | case FS_CONTEXT_FOR_MOUNT: | ||
| 276 | fc->user_ns = get_user_ns(fc->cred->user_ns); | ||
| 277 | break; | ||
| 278 | case FS_CONTEXT_FOR_SUBMOUNT: | ||
| 279 | fc->user_ns = get_user_ns(reference->d_sb->s_user_ns); | ||
| 280 | break; | ||
| 281 | case FS_CONTEXT_FOR_RECONFIGURE: | ||
| 282 | /* We don't pin any namespaces as the superblock's | ||
| 283 | * subscriptions cannot be changed at this point. | ||
| 284 | */ | ||
| 285 | atomic_inc(&reference->d_sb->s_active); | ||
| 286 | fc->root = dget(reference); | ||
| 287 | break; | ||
| 288 | } | ||
| 289 | |||
| 290 | /* TODO: Make all filesystems support this unconditionally */ | ||
| 291 | init_fs_context = fc->fs_type->init_fs_context; | ||
| 292 | if (!init_fs_context) | ||
| 293 | init_fs_context = legacy_init_fs_context; | ||
| 294 | |||
| 295 | ret = init_fs_context(fc); | ||
| 296 | if (ret < 0) | ||
| 297 | goto err_fc; | ||
| 298 | fc->need_free = true; | ||
| 299 | return fc; | ||
| 300 | |||
| 301 | err_fc: | ||
| 302 | put_fs_context(fc); | ||
| 303 | return ERR_PTR(ret); | ||
| 304 | } | ||
| 305 | |||
| 306 | struct fs_context *fs_context_for_mount(struct file_system_type *fs_type, | ||
| 307 | unsigned int sb_flags) | ||
| 308 | { | ||
| 309 | return alloc_fs_context(fs_type, NULL, sb_flags, 0, | ||
| 310 | FS_CONTEXT_FOR_MOUNT); | ||
| 311 | } | ||
| 312 | EXPORT_SYMBOL(fs_context_for_mount); | ||
| 313 | |||
| 314 | struct fs_context *fs_context_for_reconfigure(struct dentry *dentry, | ||
| 315 | unsigned int sb_flags, | ||
| 316 | unsigned int sb_flags_mask) | ||
| 317 | { | ||
| 318 | return alloc_fs_context(dentry->d_sb->s_type, dentry, sb_flags, | ||
| 319 | sb_flags_mask, FS_CONTEXT_FOR_RECONFIGURE); | ||
| 320 | } | ||
| 321 | EXPORT_SYMBOL(fs_context_for_reconfigure); | ||
| 322 | |||
| 323 | struct fs_context *fs_context_for_submount(struct file_system_type *type, | ||
| 324 | struct dentry *reference) | ||
| 325 | { | ||
| 326 | return alloc_fs_context(type, reference, 0, 0, FS_CONTEXT_FOR_SUBMOUNT); | ||
| 327 | } | ||
| 328 | EXPORT_SYMBOL(fs_context_for_submount); | ||
| 329 | |||
| 330 | void fc_drop_locked(struct fs_context *fc) | ||
| 331 | { | ||
| 332 | struct super_block *sb = fc->root->d_sb; | ||
| 333 | dput(fc->root); | ||
| 334 | fc->root = NULL; | ||
| 335 | deactivate_locked_super(sb); | ||
| 336 | } | ||
| 337 | |||
| 338 | static void legacy_fs_context_free(struct fs_context *fc); | ||
| 339 | |||
| 340 | /** | ||
| 341 | * vfs_dup_fc_config: Duplicate a filesystem context. | ||
| 342 | * @src_fc: The context to copy. | ||
| 343 | */ | ||
| 344 | struct fs_context *vfs_dup_fs_context(struct fs_context *src_fc) | ||
| 345 | { | ||
| 346 | struct fs_context *fc; | ||
| 347 | int ret; | ||
| 348 | |||
| 349 | if (!src_fc->ops->dup) | ||
| 350 | return ERR_PTR(-EOPNOTSUPP); | ||
| 351 | |||
| 352 | fc = kmemdup(src_fc, sizeof(struct fs_context), GFP_KERNEL); | ||
| 353 | if (!fc) | ||
| 354 | return ERR_PTR(-ENOMEM); | ||
| 355 | |||
| 356 | fc->fs_private = NULL; | ||
| 357 | fc->s_fs_info = NULL; | ||
| 358 | fc->source = NULL; | ||
| 359 | fc->security = NULL; | ||
| 360 | get_filesystem(fc->fs_type); | ||
| 361 | get_net(fc->net_ns); | ||
| 362 | get_user_ns(fc->user_ns); | ||
| 363 | get_cred(fc->cred); | ||
| 364 | |||
| 365 | /* Can't call put until we've called ->dup */ | ||
| 366 | ret = fc->ops->dup(fc, src_fc); | ||
| 367 | if (ret < 0) | ||
| 368 | goto err_fc; | ||
| 369 | |||
| 370 | ret = security_fs_context_dup(fc, src_fc); | ||
| 371 | if (ret < 0) | ||
| 372 | goto err_fc; | ||
| 373 | return fc; | ||
| 374 | |||
| 375 | err_fc: | ||
| 376 | put_fs_context(fc); | ||
| 377 | return ERR_PTR(ret); | ||
| 378 | } | ||
| 379 | EXPORT_SYMBOL(vfs_dup_fs_context); | ||
| 380 | |||
| 381 | #ifdef CONFIG_PRINTK | ||
| 382 | /** | ||
| 383 | * logfc - Log a message to a filesystem context | ||
| 384 | * @fc: The filesystem context to log to. | ||
| 385 | * @fmt: The format of the buffer. | ||
| 386 | */ | ||
| 387 | void logfc(struct fs_context *fc, const char *fmt, ...) | ||
| 388 | { | ||
| 389 | va_list va; | ||
| 390 | |||
| 391 | va_start(va, fmt); | ||
| 392 | |||
| 393 | switch (fmt[0]) { | ||
| 394 | case 'w': | ||
| 395 | vprintk_emit(0, LOGLEVEL_WARNING, NULL, 0, fmt, va); | ||
| 396 | break; | ||
| 397 | case 'e': | ||
| 398 | vprintk_emit(0, LOGLEVEL_ERR, NULL, 0, fmt, va); | ||
| 399 | break; | ||
| 400 | default: | ||
| 401 | vprintk_emit(0, LOGLEVEL_NOTICE, NULL, 0, fmt, va); | ||
| 402 | break; | ||
| 403 | } | ||
| 404 | |||
| 405 | pr_cont("\n"); | ||
| 406 | va_end(va); | ||
| 407 | } | ||
| 408 | EXPORT_SYMBOL(logfc); | ||
| 409 | #endif | ||
| 410 | |||
| 411 | /** | ||
| 412 | * put_fs_context - Dispose of a superblock configuration context. | ||
| 413 | * @fc: The context to dispose of. | ||
| 414 | */ | ||
| 415 | void put_fs_context(struct fs_context *fc) | ||
| 416 | { | ||
| 417 | struct super_block *sb; | ||
| 418 | |||
| 419 | if (fc->root) { | ||
| 420 | sb = fc->root->d_sb; | ||
| 421 | dput(fc->root); | ||
| 422 | fc->root = NULL; | ||
| 423 | deactivate_super(sb); | ||
| 424 | } | ||
| 425 | |||
| 426 | if (fc->need_free && fc->ops && fc->ops->free) | ||
| 427 | fc->ops->free(fc); | ||
| 428 | |||
| 429 | security_free_mnt_opts(&fc->security); | ||
| 430 | put_net(fc->net_ns); | ||
| 431 | put_user_ns(fc->user_ns); | ||
| 432 | put_cred(fc->cred); | ||
| 433 | kfree(fc->subtype); | ||
| 434 | put_filesystem(fc->fs_type); | ||
| 435 | kfree(fc->source); | ||
| 436 | kfree(fc); | ||
| 437 | } | ||
| 438 | EXPORT_SYMBOL(put_fs_context); | ||
| 439 | |||
| 440 | /* | ||
| 441 | * Free the config for a filesystem that doesn't support fs_context. | ||
| 442 | */ | ||
| 443 | static void legacy_fs_context_free(struct fs_context *fc) | ||
| 444 | { | ||
| 445 | struct legacy_fs_context *ctx = fc->fs_private; | ||
| 446 | |||
| 447 | if (ctx) { | ||
| 448 | if (ctx->param_type == LEGACY_FS_INDIVIDUAL_PARAMS) | ||
| 449 | kfree(ctx->legacy_data); | ||
| 450 | kfree(ctx); | ||
| 451 | } | ||
| 452 | } | ||
| 453 | |||
| 454 | /* | ||
| 455 | * Duplicate a legacy config. | ||
| 456 | */ | ||
| 457 | static int legacy_fs_context_dup(struct fs_context *fc, struct fs_context *src_fc) | ||
| 458 | { | ||
| 459 | struct legacy_fs_context *ctx; | ||
| 460 | struct legacy_fs_context *src_ctx = src_fc->fs_private; | ||
| 461 | |||
| 462 | ctx = kmemdup(src_ctx, sizeof(*src_ctx), GFP_KERNEL); | ||
| 463 | if (!ctx) | ||
| 464 | return -ENOMEM; | ||
| 465 | |||
| 466 | if (ctx->param_type == LEGACY_FS_INDIVIDUAL_PARAMS) { | ||
| 467 | ctx->legacy_data = kmemdup(src_ctx->legacy_data, | ||
| 468 | src_ctx->data_size, GFP_KERNEL); | ||
| 469 | if (!ctx->legacy_data) { | ||
| 470 | kfree(ctx); | ||
| 471 | return -ENOMEM; | ||
| 472 | } | ||
| 473 | } | ||
| 474 | |||
| 475 | fc->fs_private = ctx; | ||
| 476 | return 0; | ||
| 477 | } | ||
| 478 | |||
| 479 | /* | ||
| 480 | * Add a parameter to a legacy config. We build up a comma-separated list of | ||
| 481 | * options. | ||
| 482 | */ | ||
| 483 | static int legacy_parse_param(struct fs_context *fc, struct fs_parameter *param) | ||
| 484 | { | ||
| 485 | struct legacy_fs_context *ctx = fc->fs_private; | ||
| 486 | unsigned int size = ctx->data_size; | ||
| 487 | size_t len = 0; | ||
| 488 | |||
| 489 | if (strcmp(param->key, "source") == 0) { | ||
| 490 | if (param->type != fs_value_is_string) | ||
| 491 | return invalf(fc, "VFS: Legacy: Non-string source"); | ||
| 492 | if (fc->source) | ||
| 493 | return invalf(fc, "VFS: Legacy: Multiple sources"); | ||
| 494 | fc->source = param->string; | ||
| 495 | param->string = NULL; | ||
| 496 | return 0; | ||
| 497 | } | ||
| 498 | |||
| 499 | if ((fc->fs_type->fs_flags & FS_HAS_SUBTYPE) && | ||
| 500 | strcmp(param->key, "subtype") == 0) { | ||
| 501 | if (param->type != fs_value_is_string) | ||
| 502 | return invalf(fc, "VFS: Legacy: Non-string subtype"); | ||
| 503 | if (fc->subtype) | ||
| 504 | return invalf(fc, "VFS: Legacy: Multiple subtype"); | ||
| 505 | fc->subtype = param->string; | ||
| 506 | param->string = NULL; | ||
| 507 | return 0; | ||
| 508 | } | ||
| 509 | |||
| 510 | if (ctx->param_type == LEGACY_FS_MONOLITHIC_PARAMS) | ||
| 511 | return invalf(fc, "VFS: Legacy: Can't mix monolithic and individual options"); | ||
| 512 | |||
| 513 | switch (param->type) { | ||
| 514 | case fs_value_is_string: | ||
| 515 | len = 1 + param->size; | ||
| 516 | /* Fall through */ | ||
| 517 | case fs_value_is_flag: | ||
| 518 | len += strlen(param->key); | ||
| 519 | break; | ||
| 520 | default: | ||
| 521 | return invalf(fc, "VFS: Legacy: Parameter type for '%s' not supported", | ||
| 522 | param->key); | ||
| 523 | } | ||
| 524 | |||
| 525 | if (len > PAGE_SIZE - 2 - size) | ||
| 526 | return invalf(fc, "VFS: Legacy: Cumulative options too large"); | ||
| 527 | if (strchr(param->key, ',') || | ||
| 528 | (param->type == fs_value_is_string && | ||
| 529 | memchr(param->string, ',', param->size))) | ||
| 530 | return invalf(fc, "VFS: Legacy: Option '%s' contained comma", | ||
| 531 | param->key); | ||
| 532 | if (!ctx->legacy_data) { | ||
| 533 | ctx->legacy_data = kmalloc(PAGE_SIZE, GFP_KERNEL); | ||
| 534 | if (!ctx->legacy_data) | ||
| 535 | return -ENOMEM; | ||
| 536 | } | ||
| 537 | |||
| 538 | ctx->legacy_data[size++] = ','; | ||
| 539 | len = strlen(param->key); | ||
| 540 | memcpy(ctx->legacy_data + size, param->key, len); | ||
| 541 | size += len; | ||
| 542 | if (param->type == fs_value_is_string) { | ||
| 543 | ctx->legacy_data[size++] = '='; | ||
| 544 | memcpy(ctx->legacy_data + size, param->string, param->size); | ||
| 545 | size += param->size; | ||
| 546 | } | ||
| 547 | ctx->legacy_data[size] = '\0'; | ||
| 548 | ctx->data_size = size; | ||
| 549 | ctx->param_type = LEGACY_FS_INDIVIDUAL_PARAMS; | ||
| 550 | return 0; | ||
| 551 | } | ||
| 552 | |||
| 553 | /* | ||
| 554 | * Add monolithic mount data. | ||
| 555 | */ | ||
| 556 | static int legacy_parse_monolithic(struct fs_context *fc, void *data) | ||
| 557 | { | ||
| 558 | struct legacy_fs_context *ctx = fc->fs_private; | ||
| 559 | |||
| 560 | if (ctx->param_type != LEGACY_FS_UNSET_PARAMS) { | ||
| 561 | pr_warn("VFS: Can't mix monolithic and individual options\n"); | ||
| 562 | return -EINVAL; | ||
| 563 | } | ||
| 564 | |||
| 565 | ctx->legacy_data = data; | ||
| 566 | ctx->param_type = LEGACY_FS_MONOLITHIC_PARAMS; | ||
| 567 | if (!ctx->legacy_data) | ||
| 568 | return 0; | ||
| 569 | |||
| 570 | if (fc->fs_type->fs_flags & FS_BINARY_MOUNTDATA) | ||
| 571 | return 0; | ||
| 572 | return security_sb_eat_lsm_opts(ctx->legacy_data, &fc->security); | ||
| 573 | } | ||
| 574 | |||
| 575 | /* | ||
| 576 | * Get a mountable root with the legacy mount command. | ||
| 577 | */ | ||
| 578 | static int legacy_get_tree(struct fs_context *fc) | ||
| 579 | { | ||
| 580 | struct legacy_fs_context *ctx = fc->fs_private; | ||
| 581 | struct super_block *sb; | ||
| 582 | struct dentry *root; | ||
| 583 | |||
| 584 | root = fc->fs_type->mount(fc->fs_type, fc->sb_flags, | ||
| 585 | fc->source, ctx->legacy_data); | ||
| 586 | if (IS_ERR(root)) | ||
| 587 | return PTR_ERR(root); | ||
| 588 | |||
| 589 | sb = root->d_sb; | ||
| 590 | BUG_ON(!sb); | ||
| 591 | |||
| 592 | fc->root = root; | ||
| 593 | return 0; | ||
| 594 | } | ||
| 595 | |||
| 596 | /* | ||
| 597 | * Handle remount. | ||
| 598 | */ | ||
| 599 | static int legacy_reconfigure(struct fs_context *fc) | ||
| 600 | { | ||
| 601 | struct legacy_fs_context *ctx = fc->fs_private; | ||
| 602 | struct super_block *sb = fc->root->d_sb; | ||
| 603 | |||
| 604 | if (!sb->s_op->remount_fs) | ||
| 605 | return 0; | ||
| 606 | |||
| 607 | return sb->s_op->remount_fs(sb, &fc->sb_flags, | ||
| 608 | ctx ? ctx->legacy_data : NULL); | ||
| 609 | } | ||
| 610 | |||
| 611 | const struct fs_context_operations legacy_fs_context_ops = { | ||
| 612 | .free = legacy_fs_context_free, | ||
| 613 | .dup = legacy_fs_context_dup, | ||
| 614 | .parse_param = legacy_parse_param, | ||
| 615 | .parse_monolithic = legacy_parse_monolithic, | ||
| 616 | .get_tree = legacy_get_tree, | ||
| 617 | .reconfigure = legacy_reconfigure, | ||
| 618 | }; | ||
| 619 | |||
| 620 | /* | ||
| 621 | * Initialise a legacy context for a filesystem that doesn't support | ||
| 622 | * fs_context. | ||
| 623 | */ | ||
| 624 | static int legacy_init_fs_context(struct fs_context *fc) | ||
| 625 | { | ||
| 626 | fc->fs_private = kzalloc(sizeof(struct legacy_fs_context), GFP_KERNEL); | ||
| 627 | if (!fc->fs_private) | ||
| 628 | return -ENOMEM; | ||
| 629 | fc->ops = &legacy_fs_context_ops; | ||
| 630 | return 0; | ||
| 631 | } | ||
| 632 | |||
| 633 | int parse_monolithic_mount_data(struct fs_context *fc, void *data) | ||
| 634 | { | ||
| 635 | int (*monolithic_mount_data)(struct fs_context *, void *); | ||
| 636 | |||
| 637 | monolithic_mount_data = fc->ops->parse_monolithic; | ||
| 638 | if (!monolithic_mount_data) | ||
| 639 | monolithic_mount_data = generic_parse_monolithic; | ||
| 640 | |||
| 641 | return monolithic_mount_data(fc, data); | ||
| 642 | } | ||
diff --git a/fs/fs_parser.c b/fs/fs_parser.c new file mode 100644 index 000000000000..842e8f749db6 --- /dev/null +++ b/fs/fs_parser.c | |||
| @@ -0,0 +1,447 @@ | |||
| 1 | /* Filesystem parameter parser. | ||
| 2 | * | ||
| 3 | * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved. | ||
| 4 | * Written by David Howells (dhowells@redhat.com) | ||
| 5 | * | ||
| 6 | * This program is free software; you can redistribute it and/or | ||
| 7 | * modify it under the terms of the GNU General Public Licence | ||
| 8 | * as published by the Free Software Foundation; either version | ||
| 9 | * 2 of the Licence, or (at your option) any later version. | ||
| 10 | */ | ||
| 11 | |||
| 12 | #include <linux/export.h> | ||
| 13 | #include <linux/fs_context.h> | ||
| 14 | #include <linux/fs_parser.h> | ||
| 15 | #include <linux/slab.h> | ||
| 16 | #include <linux/security.h> | ||
| 17 | #include <linux/namei.h> | ||
| 18 | #include "internal.h" | ||
| 19 | |||
| 20 | static const struct constant_table bool_names[] = { | ||
| 21 | { "0", false }, | ||
| 22 | { "1", true }, | ||
| 23 | { "false", false }, | ||
| 24 | { "no", false }, | ||
| 25 | { "true", true }, | ||
| 26 | { "yes", true }, | ||
| 27 | }; | ||
| 28 | |||
| 29 | /** | ||
| 30 | * lookup_constant - Look up a constant by name in an ordered table | ||
| 31 | * @tbl: The table of constants to search. | ||
| 32 | * @tbl_size: The size of the table. | ||
| 33 | * @name: The name to look up. | ||
| 34 | * @not_found: The value to return if the name is not found. | ||
| 35 | */ | ||
| 36 | int __lookup_constant(const struct constant_table *tbl, size_t tbl_size, | ||
| 37 | const char *name, int not_found) | ||
| 38 | { | ||
| 39 | unsigned int i; | ||
| 40 | |||
| 41 | for (i = 0; i < tbl_size; i++) | ||
| 42 | if (strcmp(name, tbl[i].name) == 0) | ||
| 43 | return tbl[i].value; | ||
| 44 | |||
| 45 | return not_found; | ||
| 46 | } | ||
| 47 | EXPORT_SYMBOL(__lookup_constant); | ||
| 48 | |||
| 49 | static const struct fs_parameter_spec *fs_lookup_key( | ||
| 50 | const struct fs_parameter_description *desc, | ||
| 51 | const char *name) | ||
| 52 | { | ||
| 53 | const struct fs_parameter_spec *p; | ||
| 54 | |||
| 55 | if (!desc->specs) | ||
| 56 | return NULL; | ||
| 57 | |||
| 58 | for (p = desc->specs; p->name; p++) | ||
| 59 | if (strcmp(p->name, name) == 0) | ||
| 60 | return p; | ||
| 61 | |||
| 62 | return NULL; | ||
| 63 | } | ||
| 64 | |||
| 65 | /* | ||
| 66 | * fs_parse - Parse a filesystem configuration parameter | ||
| 67 | * @fc: The filesystem context to log errors through. | ||
| 68 | * @desc: The parameter description to use. | ||
| 69 | * @param: The parameter. | ||
| 70 | * @result: Where to place the result of the parse | ||
| 71 | * | ||
| 72 | * Parse a filesystem configuration parameter and attempt a conversion for a | ||
| 73 | * simple parameter for which this is requested. If successful, the determined | ||
| 74 | * parameter ID is placed into @result->key, the desired type is indicated in | ||
| 75 | * @result->t and any converted value is placed into an appropriate member of | ||
| 76 | * the union in @result. | ||
| 77 | * | ||
| 78 | * The function returns the parameter number if the parameter was matched, | ||
| 79 | * -ENOPARAM if it wasn't matched and @desc->ignore_unknown indicated that | ||
| 80 | * unknown parameters are okay and -EINVAL if there was a conversion issue or | ||
| 81 | * the parameter wasn't recognised and unknowns aren't okay. | ||
| 82 | */ | ||
| 83 | int fs_parse(struct fs_context *fc, | ||
| 84 | const struct fs_parameter_description *desc, | ||
| 85 | struct fs_parameter *param, | ||
| 86 | struct fs_parse_result *result) | ||
| 87 | { | ||
| 88 | const struct fs_parameter_spec *p; | ||
| 89 | const struct fs_parameter_enum *e; | ||
| 90 | int ret = -ENOPARAM, b; | ||
| 91 | |||
| 92 | result->has_value = !!param->string; | ||
| 93 | result->negated = false; | ||
| 94 | result->uint_64 = 0; | ||
| 95 | |||
| 96 | p = fs_lookup_key(desc, param->key); | ||
| 97 | if (!p) { | ||
| 98 | /* If we didn't find something that looks like "noxxx", see if | ||
| 99 | * "xxx" takes the "no"-form negative - but only if there | ||
| 100 | * wasn't an value. | ||
| 101 | */ | ||
| 102 | if (result->has_value) | ||
| 103 | goto unknown_parameter; | ||
| 104 | if (param->key[0] != 'n' || param->key[1] != 'o' || !param->key[2]) | ||
| 105 | goto unknown_parameter; | ||
| 106 | |||
| 107 | p = fs_lookup_key(desc, param->key + 2); | ||
| 108 | if (!p) | ||
| 109 | goto unknown_parameter; | ||
| 110 | if (!(p->flags & fs_param_neg_with_no)) | ||
| 111 | goto unknown_parameter; | ||
| 112 | result->boolean = false; | ||
| 113 | result->negated = true; | ||
| 114 | } | ||
| 115 | |||
| 116 | if (p->flags & fs_param_deprecated) | ||
| 117 | warnf(fc, "%s: Deprecated parameter '%s'", | ||
| 118 | desc->name, param->key); | ||
| 119 | |||
| 120 | if (result->negated) | ||
| 121 | goto okay; | ||
| 122 | |||
| 123 | /* Certain parameter types only take a string and convert it. */ | ||
| 124 | switch (p->type) { | ||
| 125 | case __fs_param_wasnt_defined: | ||
| 126 | return -EINVAL; | ||
| 127 | case fs_param_is_u32: | ||
| 128 | case fs_param_is_u32_octal: | ||
| 129 | case fs_param_is_u32_hex: | ||
| 130 | case fs_param_is_s32: | ||
| 131 | case fs_param_is_u64: | ||
| 132 | case fs_param_is_enum: | ||
| 133 | case fs_param_is_string: | ||
| 134 | if (param->type != fs_value_is_string) | ||
| 135 | goto bad_value; | ||
| 136 | if (!result->has_value) { | ||
| 137 | if (p->flags & fs_param_v_optional) | ||
| 138 | goto okay; | ||
| 139 | goto bad_value; | ||
| 140 | } | ||
| 141 | /* Fall through */ | ||
| 142 | default: | ||
| 143 | break; | ||
| 144 | } | ||
| 145 | |||
| 146 | /* Try to turn the type we were given into the type desired by the | ||
| 147 | * parameter and give an error if we can't. | ||
| 148 | */ | ||
| 149 | switch (p->type) { | ||
| 150 | case fs_param_is_flag: | ||
| 151 | if (param->type != fs_value_is_flag && | ||
| 152 | (param->type != fs_value_is_string || result->has_value)) | ||
| 153 | return invalf(fc, "%s: Unexpected value for '%s'", | ||
| 154 | desc->name, param->key); | ||
| 155 | result->boolean = true; | ||
| 156 | goto okay; | ||
| 157 | |||
| 158 | case fs_param_is_bool: | ||
| 159 | switch (param->type) { | ||
| 160 | case fs_value_is_flag: | ||
| 161 | result->boolean = true; | ||
| 162 | goto okay; | ||
| 163 | case fs_value_is_string: | ||
| 164 | if (param->size == 0) { | ||
| 165 | result->boolean = true; | ||
| 166 | goto okay; | ||
| 167 | } | ||
| 168 | b = lookup_constant(bool_names, param->string, -1); | ||
| 169 | if (b == -1) | ||
| 170 | goto bad_value; | ||
| 171 | result->boolean = b; | ||
| 172 | goto okay; | ||
| 173 | default: | ||
| 174 | goto bad_value; | ||
| 175 | } | ||
| 176 | |||
| 177 | case fs_param_is_u32: | ||
| 178 | ret = kstrtouint(param->string, 0, &result->uint_32); | ||
| 179 | goto maybe_okay; | ||
| 180 | case fs_param_is_u32_octal: | ||
| 181 | ret = kstrtouint(param->string, 8, &result->uint_32); | ||
| 182 | goto maybe_okay; | ||
| 183 | case fs_param_is_u32_hex: | ||
| 184 | ret = kstrtouint(param->string, 16, &result->uint_32); | ||
| 185 | goto maybe_okay; | ||
| 186 | case fs_param_is_s32: | ||
| 187 | ret = kstrtoint(param->string, 0, &result->int_32); | ||
| 188 | goto maybe_okay; | ||
| 189 | case fs_param_is_u64: | ||
| 190 | ret = kstrtoull(param->string, 0, &result->uint_64); | ||
| 191 | goto maybe_okay; | ||
| 192 | |||
| 193 | case fs_param_is_enum: | ||
| 194 | for (e = desc->enums; e->name[0]; e++) { | ||
| 195 | if (e->opt == p->opt && | ||
| 196 | strcmp(e->name, param->string) == 0) { | ||
| 197 | result->uint_32 = e->value; | ||
| 198 | goto okay; | ||
| 199 | } | ||
| 200 | } | ||
| 201 | goto bad_value; | ||
| 202 | |||
| 203 | case fs_param_is_string: | ||
| 204 | goto okay; | ||
| 205 | case fs_param_is_blob: | ||
| 206 | if (param->type != fs_value_is_blob) | ||
| 207 | goto bad_value; | ||
| 208 | goto okay; | ||
| 209 | |||
| 210 | case fs_param_is_fd: { | ||
| 211 | if (param->type != fs_value_is_file) | ||
| 212 | goto bad_value; | ||
| 213 | goto okay; | ||
| 214 | } | ||
| 215 | |||
| 216 | case fs_param_is_blockdev: | ||
| 217 | case fs_param_is_path: | ||
| 218 | goto okay; | ||
| 219 | default: | ||
| 220 | BUG(); | ||
| 221 | } | ||
| 222 | |||
| 223 | maybe_okay: | ||
| 224 | if (ret < 0) | ||
| 225 | goto bad_value; | ||
| 226 | okay: | ||
| 227 | return p->opt; | ||
| 228 | |||
| 229 | bad_value: | ||
| 230 | return invalf(fc, "%s: Bad value for '%s'", desc->name, param->key); | ||
| 231 | unknown_parameter: | ||
| 232 | return -ENOPARAM; | ||
| 233 | } | ||
| 234 | EXPORT_SYMBOL(fs_parse); | ||
| 235 | |||
| 236 | /** | ||
| 237 | * fs_lookup_param - Look up a path referred to by a parameter | ||
| 238 | * @fc: The filesystem context to log errors through. | ||
| 239 | * @param: The parameter. | ||
| 240 | * @want_bdev: T if want a blockdev | ||
| 241 | * @_path: The result of the lookup | ||
| 242 | */ | ||
| 243 | int fs_lookup_param(struct fs_context *fc, | ||
| 244 | struct fs_parameter *param, | ||
| 245 | bool want_bdev, | ||
| 246 | struct path *_path) | ||
| 247 | { | ||
| 248 | struct filename *f; | ||
| 249 | unsigned int flags = 0; | ||
| 250 | bool put_f; | ||
| 251 | int ret; | ||
| 252 | |||
| 253 | switch (param->type) { | ||
| 254 | case fs_value_is_string: | ||
| 255 | f = getname_kernel(param->string); | ||
| 256 | if (IS_ERR(f)) | ||
| 257 | return PTR_ERR(f); | ||
| 258 | put_f = true; | ||
| 259 | break; | ||
| 260 | case fs_value_is_filename_empty: | ||
| 261 | flags = LOOKUP_EMPTY; | ||
| 262 | /* Fall through */ | ||
| 263 | case fs_value_is_filename: | ||
| 264 | f = param->name; | ||
| 265 | put_f = false; | ||
| 266 | break; | ||
| 267 | default: | ||
| 268 | return invalf(fc, "%s: not usable as path", param->key); | ||
| 269 | } | ||
| 270 | |||
| 271 | ret = filename_lookup(param->dirfd, f, flags, _path, NULL); | ||
| 272 | if (ret < 0) { | ||
| 273 | errorf(fc, "%s: Lookup failure for '%s'", param->key, f->name); | ||
| 274 | goto out; | ||
| 275 | } | ||
| 276 | |||
| 277 | if (want_bdev && | ||
| 278 | !S_ISBLK(d_backing_inode(_path->dentry)->i_mode)) { | ||
| 279 | path_put(_path); | ||
| 280 | _path->dentry = NULL; | ||
| 281 | _path->mnt = NULL; | ||
| 282 | errorf(fc, "%s: Non-blockdev passed as '%s'", | ||
| 283 | param->key, f->name); | ||
| 284 | ret = -ENOTBLK; | ||
| 285 | } | ||
| 286 | |||
| 287 | out: | ||
| 288 | if (put_f) | ||
| 289 | putname(f); | ||
| 290 | return ret; | ||
| 291 | } | ||
| 292 | EXPORT_SYMBOL(fs_lookup_param); | ||
| 293 | |||
| 294 | #ifdef CONFIG_VALIDATE_FS_PARSER | ||
| 295 | /** | ||
| 296 | * validate_constant_table - Validate a constant table | ||
| 297 | * @name: Name to use in reporting | ||
| 298 | * @tbl: The constant table to validate. | ||
| 299 | * @tbl_size: The size of the table. | ||
| 300 | * @low: The lowest permissible value. | ||
| 301 | * @high: The highest permissible value. | ||
| 302 | * @special: One special permissible value outside of the range. | ||
| 303 | */ | ||
| 304 | bool validate_constant_table(const struct constant_table *tbl, size_t tbl_size, | ||
| 305 | int low, int high, int special) | ||
| 306 | { | ||
| 307 | size_t i; | ||
| 308 | bool good = true; | ||
| 309 | |||
| 310 | if (tbl_size == 0) { | ||
| 311 | pr_warn("VALIDATE C-TBL: Empty\n"); | ||
| 312 | return true; | ||
| 313 | } | ||
| 314 | |||
| 315 | for (i = 0; i < tbl_size; i++) { | ||
| 316 | if (!tbl[i].name) { | ||
| 317 | pr_err("VALIDATE C-TBL[%zu]: Null\n", i); | ||
| 318 | good = false; | ||
| 319 | } else if (i > 0 && tbl[i - 1].name) { | ||
| 320 | int c = strcmp(tbl[i-1].name, tbl[i].name); | ||
| 321 | |||
| 322 | if (c == 0) { | ||
| 323 | pr_err("VALIDATE C-TBL[%zu]: Duplicate %s\n", | ||
| 324 | i, tbl[i].name); | ||
| 325 | good = false; | ||
| 326 | } | ||
| 327 | if (c > 0) { | ||
| 328 | pr_err("VALIDATE C-TBL[%zu]: Missorted %s>=%s\n", | ||
| 329 | i, tbl[i-1].name, tbl[i].name); | ||
| 330 | good = false; | ||
| 331 | } | ||
| 332 | } | ||
| 333 | |||
| 334 | if (tbl[i].value != special && | ||
| 335 | (tbl[i].value < low || tbl[i].value > high)) { | ||
| 336 | pr_err("VALIDATE C-TBL[%zu]: %s->%d const out of range (%d-%d)\n", | ||
| 337 | i, tbl[i].name, tbl[i].value, low, high); | ||
| 338 | good = false; | ||
| 339 | } | ||
| 340 | } | ||
| 341 | |||
| 342 | return good; | ||
| 343 | } | ||
| 344 | |||
| 345 | /** | ||
| 346 | * fs_validate_description - Validate a parameter description | ||
| 347 | * @desc: The parameter description to validate. | ||
| 348 | */ | ||
| 349 | bool fs_validate_description(const struct fs_parameter_description *desc) | ||
| 350 | { | ||
| 351 | const struct fs_parameter_spec *param, *p2; | ||
| 352 | const struct fs_parameter_enum *e; | ||
| 353 | const char *name = desc->name; | ||
| 354 | unsigned int nr_params = 0; | ||
| 355 | bool good = true, enums = false; | ||
| 356 | |||
| 357 | pr_notice("*** VALIDATE %s ***\n", name); | ||
| 358 | |||
| 359 | if (!name[0]) { | ||
| 360 | pr_err("VALIDATE Parser: No name\n"); | ||
| 361 | name = "Unknown"; | ||
| 362 | good = false; | ||
| 363 | } | ||
| 364 | |||
| 365 | if (desc->specs) { | ||
| 366 | for (param = desc->specs; param->name; param++) { | ||
| 367 | enum fs_parameter_type t = param->type; | ||
| 368 | |||
| 369 | /* Check that the type is in range */ | ||
| 370 | if (t == __fs_param_wasnt_defined || | ||
| 371 | t >= nr__fs_parameter_type) { | ||
| 372 | pr_err("VALIDATE %s: PARAM[%s] Bad type %u\n", | ||
| 373 | name, param->name, t); | ||
| 374 | good = false; | ||
| 375 | } else if (t == fs_param_is_enum) { | ||
| 376 | enums = true; | ||
| 377 | } | ||
| 378 | |||
| 379 | /* Check for duplicate parameter names */ | ||
| 380 | for (p2 = desc->specs; p2 < param; p2++) { | ||
| 381 | if (strcmp(param->name, p2->name) == 0) { | ||
| 382 | pr_err("VALIDATE %s: PARAM[%s]: Duplicate\n", | ||
| 383 | name, param->name); | ||
| 384 | good = false; | ||
| 385 | } | ||
| 386 | } | ||
| 387 | } | ||
| 388 | |||
| 389 | nr_params = param - desc->specs; | ||
| 390 | } | ||
| 391 | |||
| 392 | if (desc->enums) { | ||
| 393 | if (!nr_params) { | ||
| 394 | pr_err("VALIDATE %s: Enum table but no parameters\n", | ||
| 395 | name); | ||
| 396 | good = false; | ||
| 397 | goto no_enums; | ||
| 398 | } | ||
| 399 | if (!enums) { | ||
| 400 | pr_err("VALIDATE %s: Enum table but no enum-type values\n", | ||
| 401 | name); | ||
| 402 | good = false; | ||
| 403 | goto no_enums; | ||
| 404 | } | ||
| 405 | |||
| 406 | for (e = desc->enums; e->name[0]; e++) { | ||
| 407 | /* Check that all entries in the enum table have at | ||
| 408 | * least one parameter that uses them. | ||
| 409 | */ | ||
| 410 | for (param = desc->specs; param->name; param++) { | ||
| 411 | if (param->opt == e->opt && | ||
| 412 | param->type != fs_param_is_enum) { | ||
| 413 | pr_err("VALIDATE %s: e[%lu] enum val for %s\n", | ||
| 414 | name, e - desc->enums, param->name); | ||
| 415 | good = false; | ||
| 416 | } | ||
| 417 | } | ||
| 418 | } | ||
| 419 | |||
| 420 | /* Check that all enum-type parameters have at least one enum | ||
| 421 | * value in the enum table. | ||
| 422 | */ | ||
| 423 | for (param = desc->specs; param->name; param++) { | ||
| 424 | if (param->type != fs_param_is_enum) | ||
| 425 | continue; | ||
| 426 | for (e = desc->enums; e->name[0]; e++) | ||
| 427 | if (e->opt == param->opt) | ||
| 428 | break; | ||
| 429 | if (!e->name[0]) { | ||
| 430 | pr_err("VALIDATE %s: PARAM[%s] enum with no values\n", | ||
| 431 | name, param->name); | ||
| 432 | good = false; | ||
| 433 | } | ||
| 434 | } | ||
| 435 | } else { | ||
| 436 | if (enums) { | ||
| 437 | pr_err("VALIDATE %s: enum-type values, but no enum table\n", | ||
| 438 | name); | ||
| 439 | good = false; | ||
| 440 | goto no_enums; | ||
| 441 | } | ||
| 442 | } | ||
| 443 | |||
| 444 | no_enums: | ||
| 445 | return good; | ||
| 446 | } | ||
| 447 | #endif /* CONFIG_VALIDATE_FS_PARSER */ | ||
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index b0eef008de67..ec32fece5e1e 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c | |||
| @@ -27,7 +27,7 @@ | |||
| 27 | #include <linux/backing-dev.h> | 27 | #include <linux/backing-dev.h> |
| 28 | #include <linux/hugetlb.h> | 28 | #include <linux/hugetlb.h> |
| 29 | #include <linux/pagevec.h> | 29 | #include <linux/pagevec.h> |
| 30 | #include <linux/parser.h> | 30 | #include <linux/fs_parser.h> |
| 31 | #include <linux/mman.h> | 31 | #include <linux/mman.h> |
| 32 | #include <linux/slab.h> | 32 | #include <linux/slab.h> |
| 33 | #include <linux/dnotify.h> | 33 | #include <linux/dnotify.h> |
| @@ -45,11 +45,17 @@ const struct file_operations hugetlbfs_file_operations; | |||
| 45 | static const struct inode_operations hugetlbfs_dir_inode_operations; | 45 | static const struct inode_operations hugetlbfs_dir_inode_operations; |
| 46 | static const struct inode_operations hugetlbfs_inode_operations; | 46 | static const struct inode_operations hugetlbfs_inode_operations; |
| 47 | 47 | ||
| 48 | struct hugetlbfs_config { | 48 | enum hugetlbfs_size_type { NO_SIZE, SIZE_STD, SIZE_PERCENT }; |
| 49 | |||
| 50 | struct hugetlbfs_fs_context { | ||
| 49 | struct hstate *hstate; | 51 | struct hstate *hstate; |
| 52 | unsigned long long max_size_opt; | ||
| 53 | unsigned long long min_size_opt; | ||
| 50 | long max_hpages; | 54 | long max_hpages; |
| 51 | long nr_inodes; | 55 | long nr_inodes; |
| 52 | long min_hpages; | 56 | long min_hpages; |
| 57 | enum hugetlbfs_size_type max_val_type; | ||
| 58 | enum hugetlbfs_size_type min_val_type; | ||
| 53 | kuid_t uid; | 59 | kuid_t uid; |
| 54 | kgid_t gid; | 60 | kgid_t gid; |
| 55 | umode_t mode; | 61 | umode_t mode; |
| @@ -57,22 +63,30 @@ struct hugetlbfs_config { | |||
| 57 | 63 | ||
| 58 | int sysctl_hugetlb_shm_group; | 64 | int sysctl_hugetlb_shm_group; |
| 59 | 65 | ||
| 60 | enum { | 66 | enum hugetlb_param { |
| 61 | Opt_size, Opt_nr_inodes, | 67 | Opt_gid, |
| 62 | Opt_mode, Opt_uid, Opt_gid, | 68 | Opt_min_size, |
| 63 | Opt_pagesize, Opt_min_size, | 69 | Opt_mode, |
| 64 | Opt_err, | 70 | Opt_nr_inodes, |
| 71 | Opt_pagesize, | ||
| 72 | Opt_size, | ||
| 73 | Opt_uid, | ||
| 65 | }; | 74 | }; |
| 66 | 75 | ||
| 67 | static const match_table_t tokens = { | 76 | static const struct fs_parameter_spec hugetlb_param_specs[] = { |
| 68 | {Opt_size, "size=%s"}, | 77 | fsparam_u32 ("gid", Opt_gid), |
| 69 | {Opt_nr_inodes, "nr_inodes=%s"}, | 78 | fsparam_string("min_size", Opt_min_size), |
| 70 | {Opt_mode, "mode=%o"}, | 79 | fsparam_u32 ("mode", Opt_mode), |
| 71 | {Opt_uid, "uid=%u"}, | 80 | fsparam_string("nr_inodes", Opt_nr_inodes), |
| 72 | {Opt_gid, "gid=%u"}, | 81 | fsparam_string("pagesize", Opt_pagesize), |
| 73 | {Opt_pagesize, "pagesize=%s"}, | 82 | fsparam_string("size", Opt_size), |
| 74 | {Opt_min_size, "min_size=%s"}, | 83 | fsparam_u32 ("uid", Opt_uid), |
| 75 | {Opt_err, NULL}, | 84 | {} |
| 85 | }; | ||
| 86 | |||
| 87 | static const struct fs_parameter_description hugetlb_fs_parameters = { | ||
| 88 | .name = "hugetlbfs", | ||
| 89 | .specs = hugetlb_param_specs, | ||
| 76 | }; | 90 | }; |
| 77 | 91 | ||
| 78 | #ifdef CONFIG_NUMA | 92 | #ifdef CONFIG_NUMA |
| @@ -708,16 +722,16 @@ static int hugetlbfs_setattr(struct dentry *dentry, struct iattr *attr) | |||
| 708 | } | 722 | } |
| 709 | 723 | ||
| 710 | static struct inode *hugetlbfs_get_root(struct super_block *sb, | 724 | static struct inode *hugetlbfs_get_root(struct super_block *sb, |
| 711 | struct hugetlbfs_config *config) | 725 | struct hugetlbfs_fs_context *ctx) |
| 712 | { | 726 | { |
| 713 | struct inode *inode; | 727 | struct inode *inode; |
| 714 | 728 | ||
| 715 | inode = new_inode(sb); | 729 | inode = new_inode(sb); |
| 716 | if (inode) { | 730 | if (inode) { |
| 717 | inode->i_ino = get_next_ino(); | 731 | inode->i_ino = get_next_ino(); |
| 718 | inode->i_mode = S_IFDIR | config->mode; | 732 | inode->i_mode = S_IFDIR | ctx->mode; |
| 719 | inode->i_uid = config->uid; | 733 | inode->i_uid = ctx->uid; |
| 720 | inode->i_gid = config->gid; | 734 | inode->i_gid = ctx->gid; |
| 721 | inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode); | 735 | inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode); |
| 722 | inode->i_op = &hugetlbfs_dir_inode_operations; | 736 | inode->i_op = &hugetlbfs_dir_inode_operations; |
| 723 | inode->i_fop = &simple_dir_operations; | 737 | inode->i_fop = &simple_dir_operations; |
| @@ -1093,8 +1107,6 @@ static const struct super_operations hugetlbfs_ops = { | |||
| 1093 | .show_options = hugetlbfs_show_options, | 1107 | .show_options = hugetlbfs_show_options, |
| 1094 | }; | 1108 | }; |
| 1095 | 1109 | ||
| 1096 | enum hugetlbfs_size_type { NO_SIZE, SIZE_STD, SIZE_PERCENT }; | ||
| 1097 | |||
| 1098 | /* | 1110 | /* |
| 1099 | * Convert size option passed from command line to number of huge pages | 1111 | * Convert size option passed from command line to number of huge pages |
| 1100 | * in the pool specified by hstate. Size option could be in bytes | 1112 | * in the pool specified by hstate. Size option could be in bytes |
| @@ -1117,170 +1129,151 @@ hugetlbfs_size_to_hpages(struct hstate *h, unsigned long long size_opt, | |||
| 1117 | return size_opt; | 1129 | return size_opt; |
| 1118 | } | 1130 | } |
| 1119 | 1131 | ||
| 1120 | static int | 1132 | /* |
| 1121 | hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig) | 1133 | * Parse one mount parameter. |
| 1134 | */ | ||
| 1135 | static int hugetlbfs_parse_param(struct fs_context *fc, struct fs_parameter *param) | ||
| 1122 | { | 1136 | { |
| 1123 | char *p, *rest; | 1137 | struct hugetlbfs_fs_context *ctx = fc->fs_private; |
| 1124 | substring_t args[MAX_OPT_ARGS]; | 1138 | struct fs_parse_result result; |
| 1125 | int option; | 1139 | char *rest; |
| 1126 | unsigned long long max_size_opt = 0, min_size_opt = 0; | 1140 | unsigned long ps; |
| 1127 | enum hugetlbfs_size_type max_val_type = NO_SIZE, min_val_type = NO_SIZE; | 1141 | int opt; |
| 1128 | 1142 | ||
| 1129 | if (!options) | 1143 | opt = fs_parse(fc, &hugetlb_fs_parameters, param, &result); |
| 1144 | if (opt < 0) | ||
| 1145 | return opt; | ||
| 1146 | |||
| 1147 | switch (opt) { | ||
| 1148 | case Opt_uid: | ||
| 1149 | ctx->uid = make_kuid(current_user_ns(), result.uint_32); | ||
| 1150 | if (!uid_valid(ctx->uid)) | ||
| 1151 | goto bad_val; | ||
| 1130 | return 0; | 1152 | return 0; |
| 1131 | 1153 | ||
| 1132 | while ((p = strsep(&options, ",")) != NULL) { | 1154 | case Opt_gid: |
| 1133 | int token; | 1155 | ctx->gid = make_kgid(current_user_ns(), result.uint_32); |
| 1134 | if (!*p) | 1156 | if (!gid_valid(ctx->gid)) |
| 1135 | continue; | 1157 | goto bad_val; |
| 1158 | return 0; | ||
| 1136 | 1159 | ||
| 1137 | token = match_token(p, tokens, args); | 1160 | case Opt_mode: |
| 1138 | switch (token) { | 1161 | ctx->mode = result.uint_32 & 01777U; |
| 1139 | case Opt_uid: | 1162 | return 0; |
| 1140 | if (match_int(&args[0], &option)) | ||
| 1141 | goto bad_val; | ||
| 1142 | pconfig->uid = make_kuid(current_user_ns(), option); | ||
| 1143 | if (!uid_valid(pconfig->uid)) | ||
| 1144 | goto bad_val; | ||
| 1145 | break; | ||
| 1146 | 1163 | ||
| 1147 | case Opt_gid: | 1164 | case Opt_size: |
| 1148 | if (match_int(&args[0], &option)) | 1165 | /* memparse() will accept a K/M/G without a digit */ |
| 1149 | goto bad_val; | 1166 | if (!isdigit(param->string[0])) |
| 1150 | pconfig->gid = make_kgid(current_user_ns(), option); | 1167 | goto bad_val; |
| 1151 | if (!gid_valid(pconfig->gid)) | 1168 | ctx->max_size_opt = memparse(param->string, &rest); |
| 1152 | goto bad_val; | 1169 | ctx->max_val_type = SIZE_STD; |
| 1153 | break; | 1170 | if (*rest == '%') |
| 1171 | ctx->max_val_type = SIZE_PERCENT; | ||
| 1172 | return 0; | ||
| 1154 | 1173 | ||
| 1155 | case Opt_mode: | 1174 | case Opt_nr_inodes: |
| 1156 | if (match_octal(&args[0], &option)) | 1175 | /* memparse() will accept a K/M/G without a digit */ |
| 1157 | goto bad_val; | 1176 | if (!isdigit(param->string[0])) |
| 1158 | pconfig->mode = option & 01777U; | 1177 | goto bad_val; |
| 1159 | break; | 1178 | ctx->nr_inodes = memparse(param->string, &rest); |
| 1179 | return 0; | ||
| 1160 | 1180 | ||
| 1161 | case Opt_size: { | 1181 | case Opt_pagesize: |
| 1162 | /* memparse() will accept a K/M/G without a digit */ | 1182 | ps = memparse(param->string, &rest); |
| 1163 | if (!isdigit(*args[0].from)) | 1183 | ctx->hstate = size_to_hstate(ps); |
| 1164 | goto bad_val; | 1184 | if (!ctx->hstate) { |
| 1165 | max_size_opt = memparse(args[0].from, &rest); | 1185 | pr_err("Unsupported page size %lu MB\n", ps >> 20); |
| 1166 | max_val_type = SIZE_STD; | 1186 | return -EINVAL; |
| 1167 | if (*rest == '%') | ||
| 1168 | max_val_type = SIZE_PERCENT; | ||
| 1169 | break; | ||
| 1170 | } | 1187 | } |
| 1188 | return 0; | ||
| 1171 | 1189 | ||
| 1172 | case Opt_nr_inodes: | 1190 | case Opt_min_size: |
| 1173 | /* memparse() will accept a K/M/G without a digit */ | 1191 | /* memparse() will accept a K/M/G without a digit */ |
| 1174 | if (!isdigit(*args[0].from)) | 1192 | if (!isdigit(param->string[0])) |
| 1175 | goto bad_val; | 1193 | goto bad_val; |
| 1176 | pconfig->nr_inodes = memparse(args[0].from, &rest); | 1194 | ctx->min_size_opt = memparse(param->string, &rest); |
| 1177 | break; | 1195 | ctx->min_val_type = SIZE_STD; |
| 1196 | if (*rest == '%') | ||
| 1197 | ctx->min_val_type = SIZE_PERCENT; | ||
| 1198 | return 0; | ||
| 1178 | 1199 | ||
| 1179 | case Opt_pagesize: { | 1200 | default: |
| 1180 | unsigned long ps; | 1201 | return -EINVAL; |
| 1181 | ps = memparse(args[0].from, &rest); | 1202 | } |
| 1182 | pconfig->hstate = size_to_hstate(ps); | ||
| 1183 | if (!pconfig->hstate) { | ||
| 1184 | pr_err("Unsupported page size %lu MB\n", | ||
| 1185 | ps >> 20); | ||
| 1186 | return -EINVAL; | ||
| 1187 | } | ||
| 1188 | break; | ||
| 1189 | } | ||
| 1190 | 1203 | ||
| 1191 | case Opt_min_size: { | 1204 | bad_val: |
| 1192 | /* memparse() will accept a K/M/G without a digit */ | 1205 | return invalf(fc, "hugetlbfs: Bad value '%s' for mount option '%s'\n", |
| 1193 | if (!isdigit(*args[0].from)) | 1206 | param->string, param->key); |
| 1194 | goto bad_val; | 1207 | } |
| 1195 | min_size_opt = memparse(args[0].from, &rest); | ||
| 1196 | min_val_type = SIZE_STD; | ||
| 1197 | if (*rest == '%') | ||
| 1198 | min_val_type = SIZE_PERCENT; | ||
| 1199 | break; | ||
| 1200 | } | ||
| 1201 | 1208 | ||
| 1202 | default: | 1209 | /* |
| 1203 | pr_err("Bad mount option: \"%s\"\n", p); | 1210 | * Validate the parsed options. |
| 1204 | return -EINVAL; | 1211 | */ |
| 1205 | break; | 1212 | static int hugetlbfs_validate(struct fs_context *fc) |
| 1206 | } | 1213 | { |
| 1207 | } | 1214 | struct hugetlbfs_fs_context *ctx = fc->fs_private; |
| 1208 | 1215 | ||
| 1209 | /* | 1216 | /* |
| 1210 | * Use huge page pool size (in hstate) to convert the size | 1217 | * Use huge page pool size (in hstate) to convert the size |
| 1211 | * options to number of huge pages. If NO_SIZE, -1 is returned. | 1218 | * options to number of huge pages. If NO_SIZE, -1 is returned. |
| 1212 | */ | 1219 | */ |
| 1213 | pconfig->max_hpages = hugetlbfs_size_to_hpages(pconfig->hstate, | 1220 | ctx->max_hpages = hugetlbfs_size_to_hpages(ctx->hstate, |
| 1214 | max_size_opt, max_val_type); | 1221 | ctx->max_size_opt, |
| 1215 | pconfig->min_hpages = hugetlbfs_size_to_hpages(pconfig->hstate, | 1222 | ctx->max_val_type); |
| 1216 | min_size_opt, min_val_type); | 1223 | ctx->min_hpages = hugetlbfs_size_to_hpages(ctx->hstate, |
| 1224 | ctx->min_size_opt, | ||
| 1225 | ctx->min_val_type); | ||
| 1217 | 1226 | ||
| 1218 | /* | 1227 | /* |
| 1219 | * If max_size was specified, then min_size must be smaller | 1228 | * If max_size was specified, then min_size must be smaller |
| 1220 | */ | 1229 | */ |
| 1221 | if (max_val_type > NO_SIZE && | 1230 | if (ctx->max_val_type > NO_SIZE && |
| 1222 | pconfig->min_hpages > pconfig->max_hpages) { | 1231 | ctx->min_hpages > ctx->max_hpages) { |
| 1223 | pr_err("minimum size can not be greater than maximum size\n"); | 1232 | pr_err("Minimum size can not be greater than maximum size\n"); |
| 1224 | return -EINVAL; | 1233 | return -EINVAL; |
| 1225 | } | 1234 | } |
| 1226 | 1235 | ||
| 1227 | return 0; | 1236 | return 0; |
| 1228 | |||
| 1229 | bad_val: | ||
| 1230 | pr_err("Bad value '%s' for mount option '%s'\n", args[0].from, p); | ||
| 1231 | return -EINVAL; | ||
| 1232 | } | 1237 | } |
| 1233 | 1238 | ||
| 1234 | static int | 1239 | static int |
| 1235 | hugetlbfs_fill_super(struct super_block *sb, void *data, int silent) | 1240 | hugetlbfs_fill_super(struct super_block *sb, struct fs_context *fc) |
| 1236 | { | 1241 | { |
| 1237 | int ret; | 1242 | struct hugetlbfs_fs_context *ctx = fc->fs_private; |
| 1238 | struct hugetlbfs_config config; | ||
| 1239 | struct hugetlbfs_sb_info *sbinfo; | 1243 | struct hugetlbfs_sb_info *sbinfo; |
| 1240 | 1244 | ||
| 1241 | config.max_hpages = -1; /* No limit on size by default */ | ||
| 1242 | config.nr_inodes = -1; /* No limit on number of inodes by default */ | ||
| 1243 | config.uid = current_fsuid(); | ||
| 1244 | config.gid = current_fsgid(); | ||
| 1245 | config.mode = 0755; | ||
| 1246 | config.hstate = &default_hstate; | ||
| 1247 | config.min_hpages = -1; /* No default minimum size */ | ||
| 1248 | ret = hugetlbfs_parse_options(data, &config); | ||
| 1249 | if (ret) | ||
| 1250 | return ret; | ||
| 1251 | |||
| 1252 | sbinfo = kmalloc(sizeof(struct hugetlbfs_sb_info), GFP_KERNEL); | 1245 | sbinfo = kmalloc(sizeof(struct hugetlbfs_sb_info), GFP_KERNEL); |
| 1253 | if (!sbinfo) | 1246 | if (!sbinfo) |
| 1254 | return -ENOMEM; | 1247 | return -ENOMEM; |
| 1255 | sb->s_fs_info = sbinfo; | 1248 | sb->s_fs_info = sbinfo; |
| 1256 | sbinfo->hstate = config.hstate; | ||
| 1257 | spin_lock_init(&sbinfo->stat_lock); | 1249 | spin_lock_init(&sbinfo->stat_lock); |
| 1258 | sbinfo->max_inodes = config.nr_inodes; | 1250 | sbinfo->hstate = ctx->hstate; |
| 1259 | sbinfo->free_inodes = config.nr_inodes; | 1251 | sbinfo->max_inodes = ctx->nr_inodes; |
| 1260 | sbinfo->spool = NULL; | 1252 | sbinfo->free_inodes = ctx->nr_inodes; |
| 1261 | sbinfo->uid = config.uid; | 1253 | sbinfo->spool = NULL; |
| 1262 | sbinfo->gid = config.gid; | 1254 | sbinfo->uid = ctx->uid; |
| 1263 | sbinfo->mode = config.mode; | 1255 | sbinfo->gid = ctx->gid; |
| 1256 | sbinfo->mode = ctx->mode; | ||
| 1264 | 1257 | ||
| 1265 | /* | 1258 | /* |
| 1266 | * Allocate and initialize subpool if maximum or minimum size is | 1259 | * Allocate and initialize subpool if maximum or minimum size is |
| 1267 | * specified. Any needed reservations (for minimim size) are taken | 1260 | * specified. Any needed reservations (for minimim size) are taken |
| 1268 | * taken when the subpool is created. | 1261 | * taken when the subpool is created. |
| 1269 | */ | 1262 | */ |
| 1270 | if (config.max_hpages != -1 || config.min_hpages != -1) { | 1263 | if (ctx->max_hpages != -1 || ctx->min_hpages != -1) { |
| 1271 | sbinfo->spool = hugepage_new_subpool(config.hstate, | 1264 | sbinfo->spool = hugepage_new_subpool(ctx->hstate, |
| 1272 | config.max_hpages, | 1265 | ctx->max_hpages, |
| 1273 | config.min_hpages); | 1266 | ctx->min_hpages); |
| 1274 | if (!sbinfo->spool) | 1267 | if (!sbinfo->spool) |
| 1275 | goto out_free; | 1268 | goto out_free; |
| 1276 | } | 1269 | } |
| 1277 | sb->s_maxbytes = MAX_LFS_FILESIZE; | 1270 | sb->s_maxbytes = MAX_LFS_FILESIZE; |
| 1278 | sb->s_blocksize = huge_page_size(config.hstate); | 1271 | sb->s_blocksize = huge_page_size(ctx->hstate); |
| 1279 | sb->s_blocksize_bits = huge_page_shift(config.hstate); | 1272 | sb->s_blocksize_bits = huge_page_shift(ctx->hstate); |
| 1280 | sb->s_magic = HUGETLBFS_MAGIC; | 1273 | sb->s_magic = HUGETLBFS_MAGIC; |
| 1281 | sb->s_op = &hugetlbfs_ops; | 1274 | sb->s_op = &hugetlbfs_ops; |
| 1282 | sb->s_time_gran = 1; | 1275 | sb->s_time_gran = 1; |
| 1283 | sb->s_root = d_make_root(hugetlbfs_get_root(sb, &config)); | 1276 | sb->s_root = d_make_root(hugetlbfs_get_root(sb, ctx)); |
| 1284 | if (!sb->s_root) | 1277 | if (!sb->s_root) |
| 1285 | goto out_free; | 1278 | goto out_free; |
| 1286 | return 0; | 1279 | return 0; |
| @@ -1290,16 +1283,52 @@ out_free: | |||
| 1290 | return -ENOMEM; | 1283 | return -ENOMEM; |
| 1291 | } | 1284 | } |
| 1292 | 1285 | ||
| 1293 | static struct dentry *hugetlbfs_mount(struct file_system_type *fs_type, | 1286 | static int hugetlbfs_get_tree(struct fs_context *fc) |
| 1294 | int flags, const char *dev_name, void *data) | 1287 | { |
| 1288 | int err = hugetlbfs_validate(fc); | ||
| 1289 | if (err) | ||
| 1290 | return err; | ||
| 1291 | return vfs_get_super(fc, vfs_get_independent_super, hugetlbfs_fill_super); | ||
| 1292 | } | ||
| 1293 | |||
| 1294 | static void hugetlbfs_fs_context_free(struct fs_context *fc) | ||
| 1295 | { | ||
| 1296 | kfree(fc->fs_private); | ||
| 1297 | } | ||
| 1298 | |||
| 1299 | static const struct fs_context_operations hugetlbfs_fs_context_ops = { | ||
| 1300 | .free = hugetlbfs_fs_context_free, | ||
| 1301 | .parse_param = hugetlbfs_parse_param, | ||
| 1302 | .get_tree = hugetlbfs_get_tree, | ||
| 1303 | }; | ||
| 1304 | |||
| 1305 | static int hugetlbfs_init_fs_context(struct fs_context *fc) | ||
| 1295 | { | 1306 | { |
| 1296 | return mount_nodev(fs_type, flags, data, hugetlbfs_fill_super); | 1307 | struct hugetlbfs_fs_context *ctx; |
| 1308 | |||
| 1309 | ctx = kzalloc(sizeof(struct hugetlbfs_fs_context), GFP_KERNEL); | ||
| 1310 | if (!ctx) | ||
| 1311 | return -ENOMEM; | ||
| 1312 | |||
| 1313 | ctx->max_hpages = -1; /* No limit on size by default */ | ||
| 1314 | ctx->nr_inodes = -1; /* No limit on number of inodes by default */ | ||
| 1315 | ctx->uid = current_fsuid(); | ||
| 1316 | ctx->gid = current_fsgid(); | ||
| 1317 | ctx->mode = 0755; | ||
| 1318 | ctx->hstate = &default_hstate; | ||
| 1319 | ctx->min_hpages = -1; /* No default minimum size */ | ||
| 1320 | ctx->max_val_type = NO_SIZE; | ||
| 1321 | ctx->min_val_type = NO_SIZE; | ||
| 1322 | fc->fs_private = ctx; | ||
| 1323 | fc->ops = &hugetlbfs_fs_context_ops; | ||
| 1324 | return 0; | ||
| 1297 | } | 1325 | } |
| 1298 | 1326 | ||
| 1299 | static struct file_system_type hugetlbfs_fs_type = { | 1327 | static struct file_system_type hugetlbfs_fs_type = { |
| 1300 | .name = "hugetlbfs", | 1328 | .name = "hugetlbfs", |
| 1301 | .mount = hugetlbfs_mount, | 1329 | .init_fs_context = hugetlbfs_init_fs_context, |
| 1302 | .kill_sb = kill_litter_super, | 1330 | .parameters = &hugetlb_fs_parameters, |
| 1331 | .kill_sb = kill_litter_super, | ||
| 1303 | }; | 1332 | }; |
| 1304 | 1333 | ||
| 1305 | static struct vfsmount *hugetlbfs_vfsmount[HUGE_MAX_HSTATE]; | 1334 | static struct vfsmount *hugetlbfs_vfsmount[HUGE_MAX_HSTATE]; |
| @@ -1384,8 +1413,29 @@ out: | |||
| 1384 | return file; | 1413 | return file; |
| 1385 | } | 1414 | } |
| 1386 | 1415 | ||
| 1416 | static struct vfsmount *__init mount_one_hugetlbfs(struct hstate *h) | ||
| 1417 | { | ||
| 1418 | struct fs_context *fc; | ||
| 1419 | struct vfsmount *mnt; | ||
| 1420 | |||
| 1421 | fc = fs_context_for_mount(&hugetlbfs_fs_type, SB_KERNMOUNT); | ||
| 1422 | if (IS_ERR(fc)) { | ||
| 1423 | mnt = ERR_CAST(fc); | ||
| 1424 | } else { | ||
| 1425 | struct hugetlbfs_fs_context *ctx = fc->fs_private; | ||
| 1426 | ctx->hstate = h; | ||
| 1427 | mnt = fc_mount(fc); | ||
| 1428 | put_fs_context(fc); | ||
| 1429 | } | ||
| 1430 | if (IS_ERR(mnt)) | ||
| 1431 | pr_err("Cannot mount internal hugetlbfs for page size %uK", | ||
| 1432 | 1U << (h->order + PAGE_SHIFT - 10)); | ||
| 1433 | return mnt; | ||
| 1434 | } | ||
| 1435 | |||
| 1387 | static int __init init_hugetlbfs_fs(void) | 1436 | static int __init init_hugetlbfs_fs(void) |
| 1388 | { | 1437 | { |
| 1438 | struct vfsmount *mnt; | ||
| 1389 | struct hstate *h; | 1439 | struct hstate *h; |
| 1390 | int error; | 1440 | int error; |
| 1391 | int i; | 1441 | int i; |
| @@ -1408,24 +1458,16 @@ static int __init init_hugetlbfs_fs(void) | |||
| 1408 | 1458 | ||
| 1409 | i = 0; | 1459 | i = 0; |
| 1410 | for_each_hstate(h) { | 1460 | for_each_hstate(h) { |
| 1411 | char buf[50]; | 1461 | mnt = mount_one_hugetlbfs(h); |
| 1412 | unsigned ps_kb = 1U << (h->order + PAGE_SHIFT - 10); | 1462 | if (IS_ERR(mnt) && i == 0) { |
| 1413 | 1463 | error = PTR_ERR(mnt); | |
| 1414 | snprintf(buf, sizeof(buf), "pagesize=%uK", ps_kb); | 1464 | goto out; |
| 1415 | hugetlbfs_vfsmount[i] = kern_mount_data(&hugetlbfs_fs_type, | ||
| 1416 | buf); | ||
| 1417 | |||
| 1418 | if (IS_ERR(hugetlbfs_vfsmount[i])) { | ||
| 1419 | pr_err("Cannot mount internal hugetlbfs for " | ||
| 1420 | "page size %uK", ps_kb); | ||
| 1421 | error = PTR_ERR(hugetlbfs_vfsmount[i]); | ||
| 1422 | hugetlbfs_vfsmount[i] = NULL; | ||
| 1423 | } | 1465 | } |
| 1466 | hugetlbfs_vfsmount[i] = mnt; | ||
| 1424 | i++; | 1467 | i++; |
| 1425 | } | 1468 | } |
| 1426 | /* Non default hstates are optional */ | 1469 | |
| 1427 | if (!IS_ERR_OR_NULL(hugetlbfs_vfsmount[default_hstate_idx])) | 1470 | return 0; |
| 1428 | return 0; | ||
| 1429 | 1471 | ||
| 1430 | out: | 1472 | out: |
| 1431 | kmem_cache_destroy(hugetlbfs_inode_cachep); | 1473 | kmem_cache_destroy(hugetlbfs_inode_cachep); |
diff --git a/fs/internal.h b/fs/internal.h index d410186bc369..6a8b71643af4 100644 --- a/fs/internal.h +++ b/fs/internal.h | |||
| @@ -17,6 +17,7 @@ struct linux_binprm; | |||
| 17 | struct path; | 17 | struct path; |
| 18 | struct mount; | 18 | struct mount; |
| 19 | struct shrink_control; | 19 | struct shrink_control; |
| 20 | struct fs_context; | ||
| 20 | 21 | ||
| 21 | /* | 22 | /* |
| 22 | * block_dev.c | 23 | * block_dev.c |
| @@ -52,8 +53,16 @@ int __generic_write_end(struct inode *inode, loff_t pos, unsigned copied, | |||
| 52 | extern void __init chrdev_init(void); | 53 | extern void __init chrdev_init(void); |
| 53 | 54 | ||
| 54 | /* | 55 | /* |
| 56 | * fs_context.c | ||
| 57 | */ | ||
| 58 | extern int parse_monolithic_mount_data(struct fs_context *, void *); | ||
| 59 | extern void fc_drop_locked(struct fs_context *); | ||
| 60 | |||
| 61 | /* | ||
| 55 | * namei.c | 62 | * namei.c |
| 56 | */ | 63 | */ |
| 64 | extern int filename_lookup(int dfd, struct filename *name, unsigned flags, | ||
| 65 | struct path *path, struct path *root); | ||
| 57 | extern int user_path_mountpoint_at(int, const char __user *, unsigned int, struct path *); | 66 | extern int user_path_mountpoint_at(int, const char __user *, unsigned int, struct path *); |
| 58 | extern int vfs_path_lookup(struct dentry *, struct vfsmount *, | 67 | extern int vfs_path_lookup(struct dentry *, struct vfsmount *, |
| 59 | const char *, unsigned int, struct path *); | 68 | const char *, unsigned int, struct path *); |
| @@ -99,10 +108,8 @@ extern struct file *alloc_empty_file_noaccount(int, const struct cred *); | |||
| 99 | /* | 108 | /* |
| 100 | * super.c | 109 | * super.c |
| 101 | */ | 110 | */ |
| 102 | extern int do_remount_sb(struct super_block *, int, void *, int); | 111 | extern int reconfigure_super(struct fs_context *); |
| 103 | extern bool trylock_super(struct super_block *sb); | 112 | extern bool trylock_super(struct super_block *sb); |
| 104 | extern struct dentry *mount_fs(struct file_system_type *, | ||
| 105 | int, const char *, void *); | ||
| 106 | extern struct super_block *user_get_super(dev_t); | 113 | extern struct super_block *user_get_super(dev_t); |
| 107 | 114 | ||
| 108 | /* | 115 | /* |
diff --git a/fs/kernfs/kernfs-internal.h b/fs/kernfs/kernfs-internal.h index dba810cd83b1..0b7d197a904c 100644 --- a/fs/kernfs/kernfs-internal.h +++ b/fs/kernfs/kernfs-internal.h | |||
| @@ -17,6 +17,7 @@ | |||
| 17 | #include <linux/xattr.h> | 17 | #include <linux/xattr.h> |
| 18 | 18 | ||
| 19 | #include <linux/kernfs.h> | 19 | #include <linux/kernfs.h> |
| 20 | #include <linux/fs_context.h> | ||
| 20 | 21 | ||
| 21 | struct kernfs_iattrs { | 22 | struct kernfs_iattrs { |
| 22 | struct iattr ia_iattr; | 23 | struct iattr ia_iattr; |
diff --git a/fs/kernfs/mount.c b/fs/kernfs/mount.c index f3ac352699cf..9a4646eecb71 100644 --- a/fs/kernfs/mount.c +++ b/fs/kernfs/mount.c | |||
| @@ -22,16 +22,6 @@ | |||
| 22 | 22 | ||
| 23 | struct kmem_cache *kernfs_node_cache, *kernfs_iattrs_cache; | 23 | struct kmem_cache *kernfs_node_cache, *kernfs_iattrs_cache; |
| 24 | 24 | ||
| 25 | static int kernfs_sop_remount_fs(struct super_block *sb, int *flags, char *data) | ||
| 26 | { | ||
| 27 | struct kernfs_root *root = kernfs_info(sb)->root; | ||
| 28 | struct kernfs_syscall_ops *scops = root->syscall_ops; | ||
| 29 | |||
| 30 | if (scops && scops->remount_fs) | ||
| 31 | return scops->remount_fs(root, flags, data); | ||
| 32 | return 0; | ||
| 33 | } | ||
| 34 | |||
| 35 | static int kernfs_sop_show_options(struct seq_file *sf, struct dentry *dentry) | 25 | static int kernfs_sop_show_options(struct seq_file *sf, struct dentry *dentry) |
| 36 | { | 26 | { |
| 37 | struct kernfs_root *root = kernfs_root(kernfs_dentry_node(dentry)); | 27 | struct kernfs_root *root = kernfs_root(kernfs_dentry_node(dentry)); |
| @@ -60,7 +50,6 @@ const struct super_operations kernfs_sops = { | |||
| 60 | .drop_inode = generic_delete_inode, | 50 | .drop_inode = generic_delete_inode, |
| 61 | .evict_inode = kernfs_evict_inode, | 51 | .evict_inode = kernfs_evict_inode, |
| 62 | 52 | ||
| 63 | .remount_fs = kernfs_sop_remount_fs, | ||
| 64 | .show_options = kernfs_sop_show_options, | 53 | .show_options = kernfs_sop_show_options, |
| 65 | .show_path = kernfs_sop_show_path, | 54 | .show_path = kernfs_sop_show_path, |
| 66 | }; | 55 | }; |
| @@ -222,7 +211,7 @@ struct dentry *kernfs_node_dentry(struct kernfs_node *kn, | |||
| 222 | } while (true); | 211 | } while (true); |
| 223 | } | 212 | } |
| 224 | 213 | ||
| 225 | static int kernfs_fill_super(struct super_block *sb, unsigned long magic) | 214 | static int kernfs_fill_super(struct super_block *sb, struct kernfs_fs_context *kfc) |
| 226 | { | 215 | { |
| 227 | struct kernfs_super_info *info = kernfs_info(sb); | 216 | struct kernfs_super_info *info = kernfs_info(sb); |
| 228 | struct inode *inode; | 217 | struct inode *inode; |
| @@ -233,7 +222,7 @@ static int kernfs_fill_super(struct super_block *sb, unsigned long magic) | |||
| 233 | sb->s_iflags |= SB_I_NOEXEC | SB_I_NODEV; | 222 | sb->s_iflags |= SB_I_NOEXEC | SB_I_NODEV; |
| 234 | sb->s_blocksize = PAGE_SIZE; | 223 | sb->s_blocksize = PAGE_SIZE; |
| 235 | sb->s_blocksize_bits = PAGE_SHIFT; | 224 | sb->s_blocksize_bits = PAGE_SHIFT; |
| 236 | sb->s_magic = magic; | 225 | sb->s_magic = kfc->magic; |
| 237 | sb->s_op = &kernfs_sops; | 226 | sb->s_op = &kernfs_sops; |
| 238 | sb->s_xattr = kernfs_xattr_handlers; | 227 | sb->s_xattr = kernfs_xattr_handlers; |
| 239 | if (info->root->flags & KERNFS_ROOT_SUPPORT_EXPORTOP) | 228 | if (info->root->flags & KERNFS_ROOT_SUPPORT_EXPORTOP) |
| @@ -263,21 +252,20 @@ static int kernfs_fill_super(struct super_block *sb, unsigned long magic) | |||
| 263 | return 0; | 252 | return 0; |
| 264 | } | 253 | } |
| 265 | 254 | ||
| 266 | static int kernfs_test_super(struct super_block *sb, void *data) | 255 | static int kernfs_test_super(struct super_block *sb, struct fs_context *fc) |
| 267 | { | 256 | { |
| 268 | struct kernfs_super_info *sb_info = kernfs_info(sb); | 257 | struct kernfs_super_info *sb_info = kernfs_info(sb); |
| 269 | struct kernfs_super_info *info = data; | 258 | struct kernfs_super_info *info = fc->s_fs_info; |
| 270 | 259 | ||
| 271 | return sb_info->root == info->root && sb_info->ns == info->ns; | 260 | return sb_info->root == info->root && sb_info->ns == info->ns; |
| 272 | } | 261 | } |
| 273 | 262 | ||
| 274 | static int kernfs_set_super(struct super_block *sb, void *data) | 263 | static int kernfs_set_super(struct super_block *sb, struct fs_context *fc) |
| 275 | { | 264 | { |
| 276 | int error; | 265 | struct kernfs_fs_context *kfc = fc->fs_private; |
| 277 | error = set_anon_super(sb, data); | 266 | |
| 278 | if (!error) | 267 | kfc->ns_tag = NULL; |
| 279 | sb->s_fs_info = data; | 268 | return set_anon_super_fc(sb, fc); |
| 280 | return error; | ||
| 281 | } | 269 | } |
| 282 | 270 | ||
| 283 | /** | 271 | /** |
| @@ -294,63 +282,60 @@ const void *kernfs_super_ns(struct super_block *sb) | |||
| 294 | } | 282 | } |
| 295 | 283 | ||
| 296 | /** | 284 | /** |
| 297 | * kernfs_mount_ns - kernfs mount helper | 285 | * kernfs_get_tree - kernfs filesystem access/retrieval helper |
| 298 | * @fs_type: file_system_type of the fs being mounted | 286 | * @fc: The filesystem context. |
| 299 | * @flags: mount flags specified for the mount | ||
| 300 | * @root: kernfs_root of the hierarchy being mounted | ||
| 301 | * @magic: file system specific magic number | ||
| 302 | * @new_sb_created: tell the caller if we allocated a new superblock | ||
| 303 | * @ns: optional namespace tag of the mount | ||
| 304 | * | 287 | * |
| 305 | * This is to be called from each kernfs user's file_system_type->mount() | 288 | * This is to be called from each kernfs user's fs_context->ops->get_tree() |
| 306 | * implementation, which should pass through the specified @fs_type and | 289 | * implementation, which should set the specified ->@fs_type and ->@flags, and |
| 307 | * @flags, and specify the hierarchy and namespace tag to mount via @root | 290 | * specify the hierarchy and namespace tag to mount via ->@root and ->@ns, |
| 308 | * and @ns, respectively. | 291 | * respectively. |
| 309 | * | ||
| 310 | * The return value can be passed to the vfs layer verbatim. | ||
| 311 | */ | 292 | */ |
| 312 | struct dentry *kernfs_mount_ns(struct file_system_type *fs_type, int flags, | 293 | int kernfs_get_tree(struct fs_context *fc) |
| 313 | struct kernfs_root *root, unsigned long magic, | ||
| 314 | bool *new_sb_created, const void *ns) | ||
| 315 | { | 294 | { |
| 295 | struct kernfs_fs_context *kfc = fc->fs_private; | ||
| 316 | struct super_block *sb; | 296 | struct super_block *sb; |
| 317 | struct kernfs_super_info *info; | 297 | struct kernfs_super_info *info; |
| 318 | int error; | 298 | int error; |
| 319 | 299 | ||
| 320 | info = kzalloc(sizeof(*info), GFP_KERNEL); | 300 | info = kzalloc(sizeof(*info), GFP_KERNEL); |
| 321 | if (!info) | 301 | if (!info) |
| 322 | return ERR_PTR(-ENOMEM); | 302 | return -ENOMEM; |
| 323 | 303 | ||
| 324 | info->root = root; | 304 | info->root = kfc->root; |
| 325 | info->ns = ns; | 305 | info->ns = kfc->ns_tag; |
| 326 | INIT_LIST_HEAD(&info->node); | 306 | INIT_LIST_HEAD(&info->node); |
| 327 | 307 | ||
| 328 | sb = sget_userns(fs_type, kernfs_test_super, kernfs_set_super, flags, | 308 | fc->s_fs_info = info; |
| 329 | &init_user_ns, info); | 309 | sb = sget_fc(fc, kernfs_test_super, kernfs_set_super); |
| 330 | if (IS_ERR(sb) || sb->s_fs_info != info) | ||
| 331 | kfree(info); | ||
| 332 | if (IS_ERR(sb)) | 310 | if (IS_ERR(sb)) |
| 333 | return ERR_CAST(sb); | 311 | return PTR_ERR(sb); |
| 334 | |||
| 335 | if (new_sb_created) | ||
| 336 | *new_sb_created = !sb->s_root; | ||
| 337 | 312 | ||
| 338 | if (!sb->s_root) { | 313 | if (!sb->s_root) { |
| 339 | struct kernfs_super_info *info = kernfs_info(sb); | 314 | struct kernfs_super_info *info = kernfs_info(sb); |
| 340 | 315 | ||
| 341 | error = kernfs_fill_super(sb, magic); | 316 | kfc->new_sb_created = true; |
| 317 | |||
| 318 | error = kernfs_fill_super(sb, kfc); | ||
| 342 | if (error) { | 319 | if (error) { |
| 343 | deactivate_locked_super(sb); | 320 | deactivate_locked_super(sb); |
| 344 | return ERR_PTR(error); | 321 | return error; |
| 345 | } | 322 | } |
| 346 | sb->s_flags |= SB_ACTIVE; | 323 | sb->s_flags |= SB_ACTIVE; |
| 347 | 324 | ||
| 348 | mutex_lock(&kernfs_mutex); | 325 | mutex_lock(&kernfs_mutex); |
| 349 | list_add(&info->node, &root->supers); | 326 | list_add(&info->node, &info->root->supers); |
| 350 | mutex_unlock(&kernfs_mutex); | 327 | mutex_unlock(&kernfs_mutex); |
| 351 | } | 328 | } |
| 352 | 329 | ||
| 353 | return dget(sb->s_root); | 330 | fc->root = dget(sb->s_root); |
| 331 | return 0; | ||
| 332 | } | ||
| 333 | |||
| 334 | void kernfs_free_fs_context(struct fs_context *fc) | ||
| 335 | { | ||
| 336 | /* Note that we don't deal with kfc->ns_tag here. */ | ||
| 337 | kfree(fc->s_fs_info); | ||
| 338 | fc->s_fs_info = NULL; | ||
| 354 | } | 339 | } |
| 355 | 340 | ||
| 356 | /** | 341 | /** |
| @@ -377,36 +362,6 @@ void kernfs_kill_sb(struct super_block *sb) | |||
| 377 | kfree(info); | 362 | kfree(info); |
| 378 | } | 363 | } |
| 379 | 364 | ||
| 380 | /** | ||
| 381 | * kernfs_pin_sb: try to pin the superblock associated with a kernfs_root | ||
| 382 | * @kernfs_root: the kernfs_root in question | ||
| 383 | * @ns: the namespace tag | ||
| 384 | * | ||
| 385 | * Pin the superblock so the superblock won't be destroyed in subsequent | ||
| 386 | * operations. This can be used to block ->kill_sb() which may be useful | ||
| 387 | * for kernfs users which dynamically manage superblocks. | ||
| 388 | * | ||
| 389 | * Returns NULL if there's no superblock associated to this kernfs_root, or | ||
| 390 | * -EINVAL if the superblock is being freed. | ||
| 391 | */ | ||
| 392 | struct super_block *kernfs_pin_sb(struct kernfs_root *root, const void *ns) | ||
| 393 | { | ||
| 394 | struct kernfs_super_info *info; | ||
| 395 | struct super_block *sb = NULL; | ||
| 396 | |||
| 397 | mutex_lock(&kernfs_mutex); | ||
| 398 | list_for_each_entry(info, &root->supers, node) { | ||
| 399 | if (info->ns == ns) { | ||
| 400 | sb = info->sb; | ||
| 401 | if (!atomic_inc_not_zero(&info->sb->s_active)) | ||
| 402 | sb = ERR_PTR(-EINVAL); | ||
| 403 | break; | ||
| 404 | } | ||
| 405 | } | ||
| 406 | mutex_unlock(&kernfs_mutex); | ||
| 407 | return sb; | ||
| 408 | } | ||
| 409 | |||
| 410 | void __init kernfs_init(void) | 365 | void __init kernfs_init(void) |
| 411 | { | 366 | { |
| 412 | 367 | ||
diff --git a/fs/mount.h b/fs/mount.h index f39bc9da4d73..6250de544760 100644 --- a/fs/mount.h +++ b/fs/mount.h | |||
| @@ -146,3 +146,8 @@ static inline bool is_local_mountpoint(struct dentry *dentry) | |||
| 146 | 146 | ||
| 147 | return __is_local_mountpoint(dentry); | 147 | return __is_local_mountpoint(dentry); |
| 148 | } | 148 | } |
| 149 | |||
| 150 | static inline bool is_anon_ns(struct mnt_namespace *ns) | ||
| 151 | { | ||
| 152 | return ns->seq == 0; | ||
| 153 | } | ||
diff --git a/fs/namei.c b/fs/namei.c index 3662a09830be..dede0147b3f6 100644 --- a/fs/namei.c +++ b/fs/namei.c | |||
| @@ -2331,8 +2331,8 @@ static int path_lookupat(struct nameidata *nd, unsigned flags, struct path *path | |||
| 2331 | return err; | 2331 | return err; |
| 2332 | } | 2332 | } |
| 2333 | 2333 | ||
| 2334 | static int filename_lookup(int dfd, struct filename *name, unsigned flags, | 2334 | int filename_lookup(int dfd, struct filename *name, unsigned flags, |
| 2335 | struct path *path, struct path *root) | 2335 | struct path *path, struct path *root) |
| 2336 | { | 2336 | { |
| 2337 | int retval; | 2337 | int retval; |
| 2338 | struct nameidata nd; | 2338 | struct nameidata nd; |
diff --git a/fs/namespace.c b/fs/namespace.c index 98a8c182af4f..c9cab307fa77 100644 --- a/fs/namespace.c +++ b/fs/namespace.c | |||
| @@ -27,6 +27,7 @@ | |||
| 27 | #include <linux/task_work.h> | 27 | #include <linux/task_work.h> |
| 28 | #include <linux/sched/task.h> | 28 | #include <linux/sched/task.h> |
| 29 | #include <uapi/linux/mount.h> | 29 | #include <uapi/linux/mount.h> |
| 30 | #include <linux/fs_context.h> | ||
| 30 | 31 | ||
| 31 | #include "pnode.h" | 32 | #include "pnode.h" |
| 32 | #include "internal.h" | 33 | #include "internal.h" |
| @@ -940,38 +941,81 @@ static struct mount *skip_mnt_tree(struct mount *p) | |||
| 940 | return p; | 941 | return p; |
| 941 | } | 942 | } |
| 942 | 943 | ||
| 943 | struct vfsmount * | 944 | /** |
| 944 | vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void *data) | 945 | * vfs_create_mount - Create a mount for a configured superblock |
| 946 | * @fc: The configuration context with the superblock attached | ||
| 947 | * | ||
| 948 | * Create a mount to an already configured superblock. If necessary, the | ||
| 949 | * caller should invoke vfs_get_tree() before calling this. | ||
| 950 | * | ||
| 951 | * Note that this does not attach the mount to anything. | ||
| 952 | */ | ||
| 953 | struct vfsmount *vfs_create_mount(struct fs_context *fc) | ||
| 945 | { | 954 | { |
| 946 | struct mount *mnt; | 955 | struct mount *mnt; |
| 947 | struct dentry *root; | ||
| 948 | 956 | ||
| 949 | if (!type) | 957 | if (!fc->root) |
| 950 | return ERR_PTR(-ENODEV); | 958 | return ERR_PTR(-EINVAL); |
| 951 | 959 | ||
| 952 | mnt = alloc_vfsmnt(name); | 960 | mnt = alloc_vfsmnt(fc->source ?: "none"); |
| 953 | if (!mnt) | 961 | if (!mnt) |
| 954 | return ERR_PTR(-ENOMEM); | 962 | return ERR_PTR(-ENOMEM); |
| 955 | 963 | ||
| 956 | if (flags & SB_KERNMOUNT) | 964 | if (fc->sb_flags & SB_KERNMOUNT) |
| 957 | mnt->mnt.mnt_flags = MNT_INTERNAL; | 965 | mnt->mnt.mnt_flags = MNT_INTERNAL; |
| 958 | 966 | ||
| 959 | root = mount_fs(type, flags, name, data); | 967 | atomic_inc(&fc->root->d_sb->s_active); |
| 960 | if (IS_ERR(root)) { | 968 | mnt->mnt.mnt_sb = fc->root->d_sb; |
| 961 | mnt_free_id(mnt); | 969 | mnt->mnt.mnt_root = dget(fc->root); |
| 962 | free_vfsmnt(mnt); | 970 | mnt->mnt_mountpoint = mnt->mnt.mnt_root; |
| 963 | return ERR_CAST(root); | 971 | mnt->mnt_parent = mnt; |
| 964 | } | ||
| 965 | 972 | ||
| 966 | mnt->mnt.mnt_root = root; | ||
| 967 | mnt->mnt.mnt_sb = root->d_sb; | ||
| 968 | mnt->mnt_mountpoint = mnt->mnt.mnt_root; | ||
| 969 | mnt->mnt_parent = mnt; | ||
| 970 | lock_mount_hash(); | 973 | lock_mount_hash(); |
| 971 | list_add_tail(&mnt->mnt_instance, &root->d_sb->s_mounts); | 974 | list_add_tail(&mnt->mnt_instance, &mnt->mnt.mnt_sb->s_mounts); |
| 972 | unlock_mount_hash(); | 975 | unlock_mount_hash(); |
| 973 | return &mnt->mnt; | 976 | return &mnt->mnt; |
| 974 | } | 977 | } |
| 978 | EXPORT_SYMBOL(vfs_create_mount); | ||
| 979 | |||
| 980 | struct vfsmount *fc_mount(struct fs_context *fc) | ||
| 981 | { | ||
| 982 | int err = vfs_get_tree(fc); | ||
| 983 | if (!err) { | ||
| 984 | up_write(&fc->root->d_sb->s_umount); | ||
| 985 | return vfs_create_mount(fc); | ||
| 986 | } | ||
| 987 | return ERR_PTR(err); | ||
| 988 | } | ||
| 989 | EXPORT_SYMBOL(fc_mount); | ||
| 990 | |||
| 991 | struct vfsmount *vfs_kern_mount(struct file_system_type *type, | ||
| 992 | int flags, const char *name, | ||
| 993 | void *data) | ||
| 994 | { | ||
| 995 | struct fs_context *fc; | ||
| 996 | struct vfsmount *mnt; | ||
| 997 | int ret = 0; | ||
| 998 | |||
| 999 | if (!type) | ||
| 1000 | return ERR_PTR(-EINVAL); | ||
| 1001 | |||
| 1002 | fc = fs_context_for_mount(type, flags); | ||
| 1003 | if (IS_ERR(fc)) | ||
| 1004 | return ERR_CAST(fc); | ||
| 1005 | |||
| 1006 | if (name) | ||
| 1007 | ret = vfs_parse_fs_string(fc, "source", | ||
| 1008 | name, strlen(name)); | ||
| 1009 | if (!ret) | ||
| 1010 | ret = parse_monolithic_mount_data(fc, data); | ||
| 1011 | if (!ret) | ||
| 1012 | mnt = fc_mount(fc); | ||
| 1013 | else | ||
| 1014 | mnt = ERR_PTR(ret); | ||
| 1015 | |||
| 1016 | put_fs_context(fc); | ||
| 1017 | return mnt; | ||
| 1018 | } | ||
| 975 | EXPORT_SYMBOL_GPL(vfs_kern_mount); | 1019 | EXPORT_SYMBOL_GPL(vfs_kern_mount); |
| 976 | 1020 | ||
| 977 | struct vfsmount * | 1021 | struct vfsmount * |
| @@ -1013,27 +1057,6 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root, | |||
| 1013 | 1057 | ||
| 1014 | mnt->mnt.mnt_flags = old->mnt.mnt_flags; | 1058 | mnt->mnt.mnt_flags = old->mnt.mnt_flags; |
| 1015 | mnt->mnt.mnt_flags &= ~(MNT_WRITE_HOLD|MNT_MARKED|MNT_INTERNAL); | 1059 | mnt->mnt.mnt_flags &= ~(MNT_WRITE_HOLD|MNT_MARKED|MNT_INTERNAL); |
| 1016 | /* Don't allow unprivileged users to change mount flags */ | ||
| 1017 | if (flag & CL_UNPRIVILEGED) { | ||
| 1018 | mnt->mnt.mnt_flags |= MNT_LOCK_ATIME; | ||
| 1019 | |||
| 1020 | if (mnt->mnt.mnt_flags & MNT_READONLY) | ||
| 1021 | mnt->mnt.mnt_flags |= MNT_LOCK_READONLY; | ||
| 1022 | |||
| 1023 | if (mnt->mnt.mnt_flags & MNT_NODEV) | ||
| 1024 | mnt->mnt.mnt_flags |= MNT_LOCK_NODEV; | ||
| 1025 | |||
| 1026 | if (mnt->mnt.mnt_flags & MNT_NOSUID) | ||
| 1027 | mnt->mnt.mnt_flags |= MNT_LOCK_NOSUID; | ||
| 1028 | |||
| 1029 | if (mnt->mnt.mnt_flags & MNT_NOEXEC) | ||
| 1030 | mnt->mnt.mnt_flags |= MNT_LOCK_NOEXEC; | ||
| 1031 | } | ||
| 1032 | |||
| 1033 | /* Don't allow unprivileged users to reveal what is under a mount */ | ||
| 1034 | if ((flag & CL_UNPRIVILEGED) && | ||
| 1035 | (!(flag & CL_EXPIRE) || list_empty(&old->mnt_expire))) | ||
| 1036 | mnt->mnt.mnt_flags |= MNT_LOCKED; | ||
| 1037 | 1060 | ||
| 1038 | atomic_inc(&sb->s_active); | 1061 | atomic_inc(&sb->s_active); |
| 1039 | mnt->mnt.mnt_sb = sb; | 1062 | mnt->mnt.mnt_sb = sb; |
| @@ -1464,6 +1487,29 @@ static void umount_tree(struct mount *mnt, enum umount_tree_flags how) | |||
| 1464 | 1487 | ||
| 1465 | static void shrink_submounts(struct mount *mnt); | 1488 | static void shrink_submounts(struct mount *mnt); |
| 1466 | 1489 | ||
| 1490 | static int do_umount_root(struct super_block *sb) | ||
| 1491 | { | ||
| 1492 | int ret = 0; | ||
| 1493 | |||
| 1494 | down_write(&sb->s_umount); | ||
| 1495 | if (!sb_rdonly(sb)) { | ||
| 1496 | struct fs_context *fc; | ||
| 1497 | |||
| 1498 | fc = fs_context_for_reconfigure(sb->s_root, SB_RDONLY, | ||
| 1499 | SB_RDONLY); | ||
| 1500 | if (IS_ERR(fc)) { | ||
| 1501 | ret = PTR_ERR(fc); | ||
| 1502 | } else { | ||
| 1503 | ret = parse_monolithic_mount_data(fc, NULL); | ||
| 1504 | if (!ret) | ||
| 1505 | ret = reconfigure_super(fc); | ||
| 1506 | put_fs_context(fc); | ||
| 1507 | } | ||
| 1508 | } | ||
| 1509 | up_write(&sb->s_umount); | ||
| 1510 | return ret; | ||
| 1511 | } | ||
| 1512 | |||
| 1467 | static int do_umount(struct mount *mnt, int flags) | 1513 | static int do_umount(struct mount *mnt, int flags) |
| 1468 | { | 1514 | { |
| 1469 | struct super_block *sb = mnt->mnt.mnt_sb; | 1515 | struct super_block *sb = mnt->mnt.mnt_sb; |
| @@ -1529,11 +1575,7 @@ static int do_umount(struct mount *mnt, int flags) | |||
| 1529 | */ | 1575 | */ |
| 1530 | if (!ns_capable(sb->s_user_ns, CAP_SYS_ADMIN)) | 1576 | if (!ns_capable(sb->s_user_ns, CAP_SYS_ADMIN)) |
| 1531 | return -EPERM; | 1577 | return -EPERM; |
| 1532 | down_write(&sb->s_umount); | 1578 | return do_umount_root(sb); |
| 1533 | if (!sb_rdonly(sb)) | ||
| 1534 | retval = do_remount_sb(sb, SB_RDONLY, NULL, 0); | ||
| 1535 | up_write(&sb->s_umount); | ||
| 1536 | return retval; | ||
| 1537 | } | 1579 | } |
| 1538 | 1580 | ||
| 1539 | namespace_lock(); | 1581 | namespace_lock(); |
| @@ -1839,6 +1881,33 @@ int iterate_mounts(int (*f)(struct vfsmount *, void *), void *arg, | |||
| 1839 | return 0; | 1881 | return 0; |
| 1840 | } | 1882 | } |
| 1841 | 1883 | ||
| 1884 | static void lock_mnt_tree(struct mount *mnt) | ||
| 1885 | { | ||
| 1886 | struct mount *p; | ||
| 1887 | |||
| 1888 | for (p = mnt; p; p = next_mnt(p, mnt)) { | ||
| 1889 | int flags = p->mnt.mnt_flags; | ||
| 1890 | /* Don't allow unprivileged users to change mount flags */ | ||
| 1891 | flags |= MNT_LOCK_ATIME; | ||
| 1892 | |||
| 1893 | if (flags & MNT_READONLY) | ||
| 1894 | flags |= MNT_LOCK_READONLY; | ||
| 1895 | |||
| 1896 | if (flags & MNT_NODEV) | ||
| 1897 | flags |= MNT_LOCK_NODEV; | ||
| 1898 | |||
| 1899 | if (flags & MNT_NOSUID) | ||
| 1900 | flags |= MNT_LOCK_NOSUID; | ||
| 1901 | |||
| 1902 | if (flags & MNT_NOEXEC) | ||
| 1903 | flags |= MNT_LOCK_NOEXEC; | ||
| 1904 | /* Don't allow unprivileged users to reveal what is under a mount */ | ||
| 1905 | if (list_empty(&p->mnt_expire)) | ||
| 1906 | flags |= MNT_LOCKED; | ||
| 1907 | p->mnt.mnt_flags = flags; | ||
| 1908 | } | ||
| 1909 | } | ||
| 1910 | |||
| 1842 | static void cleanup_group_ids(struct mount *mnt, struct mount *end) | 1911 | static void cleanup_group_ids(struct mount *mnt, struct mount *end) |
| 1843 | { | 1912 | { |
| 1844 | struct mount *p; | 1913 | struct mount *p; |
| @@ -1956,6 +2025,7 @@ static int attach_recursive_mnt(struct mount *source_mnt, | |||
| 1956 | struct mountpoint *dest_mp, | 2025 | struct mountpoint *dest_mp, |
| 1957 | struct path *parent_path) | 2026 | struct path *parent_path) |
| 1958 | { | 2027 | { |
| 2028 | struct user_namespace *user_ns = current->nsproxy->mnt_ns->user_ns; | ||
| 1959 | HLIST_HEAD(tree_list); | 2029 | HLIST_HEAD(tree_list); |
| 1960 | struct mnt_namespace *ns = dest_mnt->mnt_ns; | 2030 | struct mnt_namespace *ns = dest_mnt->mnt_ns; |
| 1961 | struct mountpoint *smp; | 2031 | struct mountpoint *smp; |
| @@ -2006,6 +2076,9 @@ static int attach_recursive_mnt(struct mount *source_mnt, | |||
| 2006 | child->mnt_mountpoint); | 2076 | child->mnt_mountpoint); |
| 2007 | if (q) | 2077 | if (q) |
| 2008 | mnt_change_mountpoint(child, smp, q); | 2078 | mnt_change_mountpoint(child, smp, q); |
| 2079 | /* Notice when we are propagating across user namespaces */ | ||
| 2080 | if (child->mnt_parent->mnt_ns->user_ns != user_ns) | ||
| 2081 | lock_mnt_tree(child); | ||
| 2009 | commit_tree(child); | 2082 | commit_tree(child); |
| 2010 | } | 2083 | } |
| 2011 | put_mountpoint(smp); | 2084 | put_mountpoint(smp); |
| @@ -2313,7 +2386,7 @@ static int do_remount(struct path *path, int ms_flags, int sb_flags, | |||
| 2313 | int err; | 2386 | int err; |
| 2314 | struct super_block *sb = path->mnt->mnt_sb; | 2387 | struct super_block *sb = path->mnt->mnt_sb; |
| 2315 | struct mount *mnt = real_mount(path->mnt); | 2388 | struct mount *mnt = real_mount(path->mnt); |
| 2316 | void *sec_opts = NULL; | 2389 | struct fs_context *fc; |
| 2317 | 2390 | ||
| 2318 | if (!check_mnt(mnt)) | 2391 | if (!check_mnt(mnt)) |
| 2319 | return -EINVAL; | 2392 | return -EINVAL; |
| @@ -2324,24 +2397,22 @@ static int do_remount(struct path *path, int ms_flags, int sb_flags, | |||
| 2324 | if (!can_change_locked_flags(mnt, mnt_flags)) | 2397 | if (!can_change_locked_flags(mnt, mnt_flags)) |
| 2325 | return -EPERM; | 2398 | return -EPERM; |
| 2326 | 2399 | ||
| 2327 | if (data && !(sb->s_type->fs_flags & FS_BINARY_MOUNTDATA)) { | 2400 | fc = fs_context_for_reconfigure(path->dentry, sb_flags, MS_RMT_MASK); |
| 2328 | err = security_sb_eat_lsm_opts(data, &sec_opts); | 2401 | if (IS_ERR(fc)) |
| 2329 | if (err) | 2402 | return PTR_ERR(fc); |
| 2330 | return err; | ||
| 2331 | } | ||
| 2332 | err = security_sb_remount(sb, sec_opts); | ||
| 2333 | security_free_mnt_opts(&sec_opts); | ||
| 2334 | if (err) | ||
| 2335 | return err; | ||
| 2336 | 2403 | ||
| 2337 | down_write(&sb->s_umount); | 2404 | err = parse_monolithic_mount_data(fc, data); |
| 2338 | err = -EPERM; | 2405 | if (!err) { |
| 2339 | if (ns_capable(sb->s_user_ns, CAP_SYS_ADMIN)) { | 2406 | down_write(&sb->s_umount); |
| 2340 | err = do_remount_sb(sb, sb_flags, data, 0); | 2407 | err = -EPERM; |
| 2341 | if (!err) | 2408 | if (ns_capable(sb->s_user_ns, CAP_SYS_ADMIN)) { |
| 2342 | set_mount_attributes(mnt, mnt_flags); | 2409 | err = reconfigure_super(fc); |
| 2410 | if (!err) | ||
| 2411 | set_mount_attributes(mnt, mnt_flags); | ||
| 2412 | } | ||
| 2413 | up_write(&sb->s_umount); | ||
| 2343 | } | 2414 | } |
| 2344 | up_write(&sb->s_umount); | 2415 | put_fs_context(fc); |
| 2345 | return err; | 2416 | return err; |
| 2346 | } | 2417 | } |
| 2347 | 2418 | ||
| @@ -2425,29 +2496,6 @@ out: | |||
| 2425 | return err; | 2496 | return err; |
| 2426 | } | 2497 | } |
| 2427 | 2498 | ||
| 2428 | static struct vfsmount *fs_set_subtype(struct vfsmount *mnt, const char *fstype) | ||
| 2429 | { | ||
| 2430 | int err; | ||
| 2431 | const char *subtype = strchr(fstype, '.'); | ||
| 2432 | if (subtype) { | ||
| 2433 | subtype++; | ||
| 2434 | err = -EINVAL; | ||
| 2435 | if (!subtype[0]) | ||
| 2436 | goto err; | ||
| 2437 | } else | ||
| 2438 | subtype = ""; | ||
| 2439 | |||
| 2440 | mnt->mnt_sb->s_subtype = kstrdup(subtype, GFP_KERNEL); | ||
| 2441 | err = -ENOMEM; | ||
| 2442 | if (!mnt->mnt_sb->s_subtype) | ||
| 2443 | goto err; | ||
| 2444 | return mnt; | ||
| 2445 | |||
| 2446 | err: | ||
| 2447 | mntput(mnt); | ||
| 2448 | return ERR_PTR(err); | ||
| 2449 | } | ||
| 2450 | |||
| 2451 | /* | 2499 | /* |
| 2452 | * add a mount into a namespace's mount tree | 2500 | * add a mount into a namespace's mount tree |
| 2453 | */ | 2501 | */ |
| @@ -2492,7 +2540,39 @@ unlock: | |||
| 2492 | return err; | 2540 | return err; |
| 2493 | } | 2541 | } |
| 2494 | 2542 | ||
| 2495 | static bool mount_too_revealing(struct vfsmount *mnt, int *new_mnt_flags); | 2543 | static bool mount_too_revealing(const struct super_block *sb, int *new_mnt_flags); |
| 2544 | |||
| 2545 | /* | ||
| 2546 | * Create a new mount using a superblock configuration and request it | ||
| 2547 | * be added to the namespace tree. | ||
| 2548 | */ | ||
| 2549 | static int do_new_mount_fc(struct fs_context *fc, struct path *mountpoint, | ||
| 2550 | unsigned int mnt_flags) | ||
| 2551 | { | ||
| 2552 | struct vfsmount *mnt; | ||
| 2553 | struct super_block *sb = fc->root->d_sb; | ||
| 2554 | int error; | ||
| 2555 | |||
| 2556 | error = security_sb_kern_mount(sb); | ||
| 2557 | if (!error && mount_too_revealing(sb, &mnt_flags)) | ||
| 2558 | error = -EPERM; | ||
| 2559 | |||
| 2560 | if (unlikely(error)) { | ||
| 2561 | fc_drop_locked(fc); | ||
| 2562 | return error; | ||
| 2563 | } | ||
| 2564 | |||
| 2565 | up_write(&sb->s_umount); | ||
| 2566 | |||
| 2567 | mnt = vfs_create_mount(fc); | ||
| 2568 | if (IS_ERR(mnt)) | ||
| 2569 | return PTR_ERR(mnt); | ||
| 2570 | |||
| 2571 | error = do_add_mount(real_mount(mnt), mountpoint, mnt_flags); | ||
| 2572 | if (error < 0) | ||
| 2573 | mntput(mnt); | ||
| 2574 | return error; | ||
| 2575 | } | ||
| 2496 | 2576 | ||
| 2497 | /* | 2577 | /* |
| 2498 | * create a new mount for userspace and request it to be added into the | 2578 | * create a new mount for userspace and request it to be added into the |
| @@ -2502,8 +2582,9 @@ static int do_new_mount(struct path *path, const char *fstype, int sb_flags, | |||
| 2502 | int mnt_flags, const char *name, void *data) | 2582 | int mnt_flags, const char *name, void *data) |
| 2503 | { | 2583 | { |
| 2504 | struct file_system_type *type; | 2584 | struct file_system_type *type; |
| 2505 | struct vfsmount *mnt; | 2585 | struct fs_context *fc; |
| 2506 | int err; | 2586 | const char *subtype = NULL; |
| 2587 | int err = 0; | ||
| 2507 | 2588 | ||
| 2508 | if (!fstype) | 2589 | if (!fstype) |
| 2509 | return -EINVAL; | 2590 | return -EINVAL; |
| @@ -2512,23 +2593,37 @@ static int do_new_mount(struct path *path, const char *fstype, int sb_flags, | |||
| 2512 | if (!type) | 2593 | if (!type) |
| 2513 | return -ENODEV; | 2594 | return -ENODEV; |
| 2514 | 2595 | ||
| 2515 | mnt = vfs_kern_mount(type, sb_flags, name, data); | 2596 | if (type->fs_flags & FS_HAS_SUBTYPE) { |
| 2516 | if (!IS_ERR(mnt) && (type->fs_flags & FS_HAS_SUBTYPE) && | 2597 | subtype = strchr(fstype, '.'); |
| 2517 | !mnt->mnt_sb->s_subtype) | 2598 | if (subtype) { |
| 2518 | mnt = fs_set_subtype(mnt, fstype); | 2599 | subtype++; |
| 2600 | if (!*subtype) { | ||
| 2601 | put_filesystem(type); | ||
| 2602 | return -EINVAL; | ||
| 2603 | } | ||
| 2604 | } else { | ||
| 2605 | subtype = ""; | ||
| 2606 | } | ||
| 2607 | } | ||
| 2519 | 2608 | ||
| 2609 | fc = fs_context_for_mount(type, sb_flags); | ||
| 2520 | put_filesystem(type); | 2610 | put_filesystem(type); |
| 2521 | if (IS_ERR(mnt)) | 2611 | if (IS_ERR(fc)) |
| 2522 | return PTR_ERR(mnt); | 2612 | return PTR_ERR(fc); |
| 2523 | 2613 | ||
| 2524 | if (mount_too_revealing(mnt, &mnt_flags)) { | 2614 | if (subtype) |
| 2525 | mntput(mnt); | 2615 | err = vfs_parse_fs_string(fc, "subtype", |
| 2526 | return -EPERM; | 2616 | subtype, strlen(subtype)); |
| 2527 | } | 2617 | if (!err && name) |
| 2618 | err = vfs_parse_fs_string(fc, "source", name, strlen(name)); | ||
| 2619 | if (!err) | ||
| 2620 | err = parse_monolithic_mount_data(fc, data); | ||
| 2621 | if (!err) | ||
| 2622 | err = vfs_get_tree(fc); | ||
| 2623 | if (!err) | ||
| 2624 | err = do_new_mount_fc(fc, path, mnt_flags); | ||
| 2528 | 2625 | ||
| 2529 | err = do_add_mount(real_mount(mnt), path, mnt_flags); | 2626 | put_fs_context(fc); |
| 2530 | if (err) | ||
| 2531 | mntput(mnt); | ||
| 2532 | return err; | 2627 | return err; |
| 2533 | } | 2628 | } |
| 2534 | 2629 | ||
| @@ -2863,7 +2958,8 @@ static void dec_mnt_namespaces(struct ucounts *ucounts) | |||
| 2863 | 2958 | ||
| 2864 | static void free_mnt_ns(struct mnt_namespace *ns) | 2959 | static void free_mnt_ns(struct mnt_namespace *ns) |
| 2865 | { | 2960 | { |
| 2866 | ns_free_inum(&ns->ns); | 2961 | if (!is_anon_ns(ns)) |
| 2962 | ns_free_inum(&ns->ns); | ||
| 2867 | dec_mnt_namespaces(ns->ucounts); | 2963 | dec_mnt_namespaces(ns->ucounts); |
| 2868 | put_user_ns(ns->user_ns); | 2964 | put_user_ns(ns->user_ns); |
| 2869 | kfree(ns); | 2965 | kfree(ns); |
| @@ -2878,7 +2974,7 @@ static void free_mnt_ns(struct mnt_namespace *ns) | |||
| 2878 | */ | 2974 | */ |
| 2879 | static atomic64_t mnt_ns_seq = ATOMIC64_INIT(1); | 2975 | static atomic64_t mnt_ns_seq = ATOMIC64_INIT(1); |
| 2880 | 2976 | ||
| 2881 | static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns) | 2977 | static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns, bool anon) |
| 2882 | { | 2978 | { |
| 2883 | struct mnt_namespace *new_ns; | 2979 | struct mnt_namespace *new_ns; |
| 2884 | struct ucounts *ucounts; | 2980 | struct ucounts *ucounts; |
| @@ -2888,28 +2984,27 @@ static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns) | |||
| 2888 | if (!ucounts) | 2984 | if (!ucounts) |
| 2889 | return ERR_PTR(-ENOSPC); | 2985 | return ERR_PTR(-ENOSPC); |
| 2890 | 2986 | ||
| 2891 | new_ns = kmalloc(sizeof(struct mnt_namespace), GFP_KERNEL); | 2987 | new_ns = kzalloc(sizeof(struct mnt_namespace), GFP_KERNEL); |
| 2892 | if (!new_ns) { | 2988 | if (!new_ns) { |
| 2893 | dec_mnt_namespaces(ucounts); | 2989 | dec_mnt_namespaces(ucounts); |
| 2894 | return ERR_PTR(-ENOMEM); | 2990 | return ERR_PTR(-ENOMEM); |
| 2895 | } | 2991 | } |
| 2896 | ret = ns_alloc_inum(&new_ns->ns); | 2992 | if (!anon) { |
| 2897 | if (ret) { | 2993 | ret = ns_alloc_inum(&new_ns->ns); |
| 2898 | kfree(new_ns); | 2994 | if (ret) { |
| 2899 | dec_mnt_namespaces(ucounts); | 2995 | kfree(new_ns); |
| 2900 | return ERR_PTR(ret); | 2996 | dec_mnt_namespaces(ucounts); |
| 2997 | return ERR_PTR(ret); | ||
| 2998 | } | ||
| 2901 | } | 2999 | } |
| 2902 | new_ns->ns.ops = &mntns_operations; | 3000 | new_ns->ns.ops = &mntns_operations; |
| 2903 | new_ns->seq = atomic64_add_return(1, &mnt_ns_seq); | 3001 | if (!anon) |
| 3002 | new_ns->seq = atomic64_add_return(1, &mnt_ns_seq); | ||
| 2904 | atomic_set(&new_ns->count, 1); | 3003 | atomic_set(&new_ns->count, 1); |
| 2905 | new_ns->root = NULL; | ||
| 2906 | INIT_LIST_HEAD(&new_ns->list); | 3004 | INIT_LIST_HEAD(&new_ns->list); |
| 2907 | init_waitqueue_head(&new_ns->poll); | 3005 | init_waitqueue_head(&new_ns->poll); |
| 2908 | new_ns->event = 0; | ||
| 2909 | new_ns->user_ns = get_user_ns(user_ns); | 3006 | new_ns->user_ns = get_user_ns(user_ns); |
| 2910 | new_ns->ucounts = ucounts; | 3007 | new_ns->ucounts = ucounts; |
| 2911 | new_ns->mounts = 0; | ||
| 2912 | new_ns->pending_mounts = 0; | ||
| 2913 | return new_ns; | 3008 | return new_ns; |
| 2914 | } | 3009 | } |
| 2915 | 3010 | ||
| @@ -2933,7 +3028,7 @@ struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns, | |||
| 2933 | 3028 | ||
| 2934 | old = ns->root; | 3029 | old = ns->root; |
| 2935 | 3030 | ||
| 2936 | new_ns = alloc_mnt_ns(user_ns); | 3031 | new_ns = alloc_mnt_ns(user_ns, false); |
| 2937 | if (IS_ERR(new_ns)) | 3032 | if (IS_ERR(new_ns)) |
| 2938 | return new_ns; | 3033 | return new_ns; |
| 2939 | 3034 | ||
| @@ -2941,13 +3036,18 @@ struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns, | |||
| 2941 | /* First pass: copy the tree topology */ | 3036 | /* First pass: copy the tree topology */ |
| 2942 | copy_flags = CL_COPY_UNBINDABLE | CL_EXPIRE; | 3037 | copy_flags = CL_COPY_UNBINDABLE | CL_EXPIRE; |
| 2943 | if (user_ns != ns->user_ns) | 3038 | if (user_ns != ns->user_ns) |
| 2944 | copy_flags |= CL_SHARED_TO_SLAVE | CL_UNPRIVILEGED; | 3039 | copy_flags |= CL_SHARED_TO_SLAVE; |
| 2945 | new = copy_tree(old, old->mnt.mnt_root, copy_flags); | 3040 | new = copy_tree(old, old->mnt.mnt_root, copy_flags); |
| 2946 | if (IS_ERR(new)) { | 3041 | if (IS_ERR(new)) { |
| 2947 | namespace_unlock(); | 3042 | namespace_unlock(); |
| 2948 | free_mnt_ns(new_ns); | 3043 | free_mnt_ns(new_ns); |
| 2949 | return ERR_CAST(new); | 3044 | return ERR_CAST(new); |
| 2950 | } | 3045 | } |
| 3046 | if (user_ns != ns->user_ns) { | ||
| 3047 | lock_mount_hash(); | ||
| 3048 | lock_mnt_tree(new); | ||
| 3049 | unlock_mount_hash(); | ||
| 3050 | } | ||
| 2951 | new_ns->root = new; | 3051 | new_ns->root = new; |
| 2952 | list_add_tail(&new_ns->list, &new->mnt_list); | 3052 | list_add_tail(&new_ns->list, &new->mnt_list); |
| 2953 | 3053 | ||
| @@ -2988,37 +3088,25 @@ struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns, | |||
| 2988 | return new_ns; | 3088 | return new_ns; |
| 2989 | } | 3089 | } |
| 2990 | 3090 | ||
| 2991 | /** | 3091 | struct dentry *mount_subtree(struct vfsmount *m, const char *name) |
| 2992 | * create_mnt_ns - creates a private namespace and adds a root filesystem | ||
| 2993 | * @mnt: pointer to the new root filesystem mountpoint | ||
| 2994 | */ | ||
| 2995 | static struct mnt_namespace *create_mnt_ns(struct vfsmount *m) | ||
| 2996 | { | ||
| 2997 | struct mnt_namespace *new_ns = alloc_mnt_ns(&init_user_ns); | ||
| 2998 | if (!IS_ERR(new_ns)) { | ||
| 2999 | struct mount *mnt = real_mount(m); | ||
| 3000 | mnt->mnt_ns = new_ns; | ||
| 3001 | new_ns->root = mnt; | ||
| 3002 | new_ns->mounts++; | ||
| 3003 | list_add(&mnt->mnt_list, &new_ns->list); | ||
| 3004 | } else { | ||
| 3005 | mntput(m); | ||
| 3006 | } | ||
| 3007 | return new_ns; | ||
| 3008 | } | ||
| 3009 | |||
| 3010 | struct dentry *mount_subtree(struct vfsmount *mnt, const char *name) | ||
| 3011 | { | 3092 | { |
| 3093 | struct mount *mnt = real_mount(m); | ||
| 3012 | struct mnt_namespace *ns; | 3094 | struct mnt_namespace *ns; |
| 3013 | struct super_block *s; | 3095 | struct super_block *s; |
| 3014 | struct path path; | 3096 | struct path path; |
| 3015 | int err; | 3097 | int err; |
| 3016 | 3098 | ||
| 3017 | ns = create_mnt_ns(mnt); | 3099 | ns = alloc_mnt_ns(&init_user_ns, true); |
| 3018 | if (IS_ERR(ns)) | 3100 | if (IS_ERR(ns)) { |
| 3101 | mntput(m); | ||
| 3019 | return ERR_CAST(ns); | 3102 | return ERR_CAST(ns); |
| 3103 | } | ||
| 3104 | mnt->mnt_ns = ns; | ||
| 3105 | ns->root = mnt; | ||
| 3106 | ns->mounts++; | ||
| 3107 | list_add(&mnt->mnt_list, &ns->list); | ||
| 3020 | 3108 | ||
| 3021 | err = vfs_path_lookup(mnt->mnt_root, mnt, | 3109 | err = vfs_path_lookup(m->mnt_root, m, |
| 3022 | name, LOOKUP_FOLLOW|LOOKUP_AUTOMOUNT, &path); | 3110 | name, LOOKUP_FOLLOW|LOOKUP_AUTOMOUNT, &path); |
| 3023 | 3111 | ||
| 3024 | put_mnt_ns(ns); | 3112 | put_mnt_ns(ns); |
| @@ -3228,6 +3316,7 @@ out0: | |||
| 3228 | static void __init init_mount_tree(void) | 3316 | static void __init init_mount_tree(void) |
| 3229 | { | 3317 | { |
| 3230 | struct vfsmount *mnt; | 3318 | struct vfsmount *mnt; |
| 3319 | struct mount *m; | ||
| 3231 | struct mnt_namespace *ns; | 3320 | struct mnt_namespace *ns; |
| 3232 | struct path root; | 3321 | struct path root; |
| 3233 | struct file_system_type *type; | 3322 | struct file_system_type *type; |
| @@ -3240,10 +3329,14 @@ static void __init init_mount_tree(void) | |||
| 3240 | if (IS_ERR(mnt)) | 3329 | if (IS_ERR(mnt)) |
| 3241 | panic("Can't create rootfs"); | 3330 | panic("Can't create rootfs"); |
| 3242 | 3331 | ||
| 3243 | ns = create_mnt_ns(mnt); | 3332 | ns = alloc_mnt_ns(&init_user_ns, false); |
| 3244 | if (IS_ERR(ns)) | 3333 | if (IS_ERR(ns)) |
| 3245 | panic("Can't allocate initial namespace"); | 3334 | panic("Can't allocate initial namespace"); |
| 3246 | 3335 | m = real_mount(mnt); | |
| 3336 | m->mnt_ns = ns; | ||
| 3337 | ns->root = m; | ||
| 3338 | ns->mounts = 1; | ||
| 3339 | list_add(&m->mnt_list, &ns->list); | ||
| 3247 | init_task.nsproxy->mnt_ns = ns; | 3340 | init_task.nsproxy->mnt_ns = ns; |
| 3248 | get_mnt_ns(ns); | 3341 | get_mnt_ns(ns); |
| 3249 | 3342 | ||
| @@ -3297,10 +3390,10 @@ void put_mnt_ns(struct mnt_namespace *ns) | |||
| 3297 | free_mnt_ns(ns); | 3390 | free_mnt_ns(ns); |
| 3298 | } | 3391 | } |
| 3299 | 3392 | ||
| 3300 | struct vfsmount *kern_mount_data(struct file_system_type *type, void *data) | 3393 | struct vfsmount *kern_mount(struct file_system_type *type) |
| 3301 | { | 3394 | { |
| 3302 | struct vfsmount *mnt; | 3395 | struct vfsmount *mnt; |
| 3303 | mnt = vfs_kern_mount(type, SB_KERNMOUNT, type->name, data); | 3396 | mnt = vfs_kern_mount(type, SB_KERNMOUNT, type->name, NULL); |
| 3304 | if (!IS_ERR(mnt)) { | 3397 | if (!IS_ERR(mnt)) { |
| 3305 | /* | 3398 | /* |
| 3306 | * it is a longterm mount, don't release mnt until | 3399 | * it is a longterm mount, don't release mnt until |
| @@ -3310,7 +3403,7 @@ struct vfsmount *kern_mount_data(struct file_system_type *type, void *data) | |||
| 3310 | } | 3403 | } |
| 3311 | return mnt; | 3404 | return mnt; |
| 3312 | } | 3405 | } |
| 3313 | EXPORT_SYMBOL_GPL(kern_mount_data); | 3406 | EXPORT_SYMBOL_GPL(kern_mount); |
| 3314 | 3407 | ||
| 3315 | void kern_unmount(struct vfsmount *mnt) | 3408 | void kern_unmount(struct vfsmount *mnt) |
| 3316 | { | 3409 | { |
| @@ -3352,7 +3445,8 @@ bool current_chrooted(void) | |||
| 3352 | return chrooted; | 3445 | return chrooted; |
| 3353 | } | 3446 | } |
| 3354 | 3447 | ||
| 3355 | static bool mnt_already_visible(struct mnt_namespace *ns, struct vfsmount *new, | 3448 | static bool mnt_already_visible(struct mnt_namespace *ns, |
| 3449 | const struct super_block *sb, | ||
| 3356 | int *new_mnt_flags) | 3450 | int *new_mnt_flags) |
| 3357 | { | 3451 | { |
| 3358 | int new_flags = *new_mnt_flags; | 3452 | int new_flags = *new_mnt_flags; |
| @@ -3364,7 +3458,7 @@ static bool mnt_already_visible(struct mnt_namespace *ns, struct vfsmount *new, | |||
| 3364 | struct mount *child; | 3458 | struct mount *child; |
| 3365 | int mnt_flags; | 3459 | int mnt_flags; |
| 3366 | 3460 | ||
| 3367 | if (mnt->mnt.mnt_sb->s_type != new->mnt_sb->s_type) | 3461 | if (mnt->mnt.mnt_sb->s_type != sb->s_type) |
| 3368 | continue; | 3462 | continue; |
| 3369 | 3463 | ||
| 3370 | /* This mount is not fully visible if it's root directory | 3464 | /* This mount is not fully visible if it's root directory |
| @@ -3415,7 +3509,7 @@ found: | |||
| 3415 | return visible; | 3509 | return visible; |
| 3416 | } | 3510 | } |
| 3417 | 3511 | ||
| 3418 | static bool mount_too_revealing(struct vfsmount *mnt, int *new_mnt_flags) | 3512 | static bool mount_too_revealing(const struct super_block *sb, int *new_mnt_flags) |
| 3419 | { | 3513 | { |
| 3420 | const unsigned long required_iflags = SB_I_NOEXEC | SB_I_NODEV; | 3514 | const unsigned long required_iflags = SB_I_NOEXEC | SB_I_NODEV; |
| 3421 | struct mnt_namespace *ns = current->nsproxy->mnt_ns; | 3515 | struct mnt_namespace *ns = current->nsproxy->mnt_ns; |
| @@ -3425,7 +3519,7 @@ static bool mount_too_revealing(struct vfsmount *mnt, int *new_mnt_flags) | |||
| 3425 | return false; | 3519 | return false; |
| 3426 | 3520 | ||
| 3427 | /* Can this filesystem be too revealing? */ | 3521 | /* Can this filesystem be too revealing? */ |
| 3428 | s_iflags = mnt->mnt_sb->s_iflags; | 3522 | s_iflags = sb->s_iflags; |
| 3429 | if (!(s_iflags & SB_I_USERNS_VISIBLE)) | 3523 | if (!(s_iflags & SB_I_USERNS_VISIBLE)) |
| 3430 | return false; | 3524 | return false; |
| 3431 | 3525 | ||
| @@ -3435,7 +3529,7 @@ static bool mount_too_revealing(struct vfsmount *mnt, int *new_mnt_flags) | |||
| 3435 | return true; | 3529 | return true; |
| 3436 | } | 3530 | } |
| 3437 | 3531 | ||
| 3438 | return !mnt_already_visible(ns, mnt, new_mnt_flags); | 3532 | return !mnt_already_visible(ns, sb, new_mnt_flags); |
| 3439 | } | 3533 | } |
| 3440 | 3534 | ||
| 3441 | bool mnt_may_suid(struct vfsmount *mnt) | 3535 | bool mnt_may_suid(struct vfsmount *mnt) |
| @@ -3484,6 +3578,9 @@ static int mntns_install(struct nsproxy *nsproxy, struct ns_common *ns) | |||
| 3484 | !ns_capable(current_user_ns(), CAP_SYS_ADMIN)) | 3578 | !ns_capable(current_user_ns(), CAP_SYS_ADMIN)) |
| 3485 | return -EPERM; | 3579 | return -EPERM; |
| 3486 | 3580 | ||
| 3581 | if (is_anon_ns(mnt_ns)) | ||
| 3582 | return -EINVAL; | ||
| 3583 | |||
| 3487 | if (fs->users != 1) | 3584 | if (fs->users != 1) |
| 3488 | return -EINVAL; | 3585 | return -EINVAL; |
| 3489 | 3586 | ||
diff --git a/fs/pnode.c b/fs/pnode.c index 1100e810d855..7ea6cfb65077 100644 --- a/fs/pnode.c +++ b/fs/pnode.c | |||
| @@ -214,7 +214,6 @@ static struct mount *next_group(struct mount *m, struct mount *origin) | |||
| 214 | } | 214 | } |
| 215 | 215 | ||
| 216 | /* all accesses are serialized by namespace_sem */ | 216 | /* all accesses are serialized by namespace_sem */ |
| 217 | static struct user_namespace *user_ns; | ||
| 218 | static struct mount *last_dest, *first_source, *last_source, *dest_master; | 217 | static struct mount *last_dest, *first_source, *last_source, *dest_master; |
| 219 | static struct mountpoint *mp; | 218 | static struct mountpoint *mp; |
| 220 | static struct hlist_head *list; | 219 | static struct hlist_head *list; |
| @@ -260,9 +259,6 @@ static int propagate_one(struct mount *m) | |||
| 260 | type |= CL_MAKE_SHARED; | 259 | type |= CL_MAKE_SHARED; |
| 261 | } | 260 | } |
| 262 | 261 | ||
| 263 | /* Notice when we are propagating across user namespaces */ | ||
| 264 | if (m->mnt_ns->user_ns != user_ns) | ||
| 265 | type |= CL_UNPRIVILEGED; | ||
| 266 | child = copy_tree(last_source, last_source->mnt.mnt_root, type); | 262 | child = copy_tree(last_source, last_source->mnt.mnt_root, type); |
| 267 | if (IS_ERR(child)) | 263 | if (IS_ERR(child)) |
| 268 | return PTR_ERR(child); | 264 | return PTR_ERR(child); |
| @@ -303,7 +299,6 @@ int propagate_mnt(struct mount *dest_mnt, struct mountpoint *dest_mp, | |||
| 303 | * propagate_one(); everything is serialized by namespace_sem, | 299 | * propagate_one(); everything is serialized by namespace_sem, |
| 304 | * so globals will do just fine. | 300 | * so globals will do just fine. |
| 305 | */ | 301 | */ |
| 306 | user_ns = current->nsproxy->mnt_ns->user_ns; | ||
| 307 | last_dest = dest_mnt; | 302 | last_dest = dest_mnt; |
| 308 | first_source = source_mnt; | 303 | first_source = source_mnt; |
| 309 | last_source = source_mnt; | 304 | last_source = source_mnt; |
diff --git a/fs/pnode.h b/fs/pnode.h index dc87e65becd2..3960a83666cf 100644 --- a/fs/pnode.h +++ b/fs/pnode.h | |||
| @@ -27,8 +27,7 @@ | |||
| 27 | #define CL_MAKE_SHARED 0x08 | 27 | #define CL_MAKE_SHARED 0x08 |
| 28 | #define CL_PRIVATE 0x10 | 28 | #define CL_PRIVATE 0x10 |
| 29 | #define CL_SHARED_TO_SLAVE 0x20 | 29 | #define CL_SHARED_TO_SLAVE 0x20 |
| 30 | #define CL_UNPRIVILEGED 0x40 | 30 | #define CL_COPY_MNT_NS_FILE 0x40 |
| 31 | #define CL_COPY_MNT_NS_FILE 0x80 | ||
| 32 | 31 | ||
| 33 | #define CL_COPY_ALL (CL_COPY_UNBINDABLE | CL_COPY_MNT_NS_FILE) | 32 | #define CL_COPY_ALL (CL_COPY_UNBINDABLE | CL_COPY_MNT_NS_FILE) |
| 34 | 33 | ||
diff --git a/fs/proc/inode.c b/fs/proc/inode.c index da649ccd6804..fc7e38def174 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c | |||
| @@ -24,7 +24,6 @@ | |||
| 24 | #include <linux/seq_file.h> | 24 | #include <linux/seq_file.h> |
| 25 | #include <linux/slab.h> | 25 | #include <linux/slab.h> |
| 26 | #include <linux/mount.h> | 26 | #include <linux/mount.h> |
| 27 | #include <linux/magic.h> | ||
| 28 | 27 | ||
| 29 | #include <linux/uaccess.h> | 28 | #include <linux/uaccess.h> |
| 30 | 29 | ||
| @@ -122,13 +121,12 @@ static int proc_show_options(struct seq_file *seq, struct dentry *root) | |||
| 122 | return 0; | 121 | return 0; |
| 123 | } | 122 | } |
| 124 | 123 | ||
| 125 | static const struct super_operations proc_sops = { | 124 | const struct super_operations proc_sops = { |
| 126 | .alloc_inode = proc_alloc_inode, | 125 | .alloc_inode = proc_alloc_inode, |
| 127 | .destroy_inode = proc_destroy_inode, | 126 | .destroy_inode = proc_destroy_inode, |
| 128 | .drop_inode = generic_delete_inode, | 127 | .drop_inode = generic_delete_inode, |
| 129 | .evict_inode = proc_evict_inode, | 128 | .evict_inode = proc_evict_inode, |
| 130 | .statfs = simple_statfs, | 129 | .statfs = simple_statfs, |
| 131 | .remount_fs = proc_remount, | ||
| 132 | .show_options = proc_show_options, | 130 | .show_options = proc_show_options, |
| 133 | }; | 131 | }; |
| 134 | 132 | ||
| @@ -488,51 +486,3 @@ struct inode *proc_get_inode(struct super_block *sb, struct proc_dir_entry *de) | |||
| 488 | pde_put(de); | 486 | pde_put(de); |
| 489 | return inode; | 487 | return inode; |
| 490 | } | 488 | } |
| 491 | |||
| 492 | int proc_fill_super(struct super_block *s, void *data, int silent) | ||
| 493 | { | ||
| 494 | struct pid_namespace *ns = get_pid_ns(s->s_fs_info); | ||
| 495 | struct inode *root_inode; | ||
| 496 | int ret; | ||
| 497 | |||
| 498 | if (!proc_parse_options(data, ns)) | ||
| 499 | return -EINVAL; | ||
| 500 | |||
| 501 | /* User space would break if executables or devices appear on proc */ | ||
| 502 | s->s_iflags |= SB_I_USERNS_VISIBLE | SB_I_NOEXEC | SB_I_NODEV; | ||
| 503 | s->s_flags |= SB_NODIRATIME | SB_NOSUID | SB_NOEXEC; | ||
| 504 | s->s_blocksize = 1024; | ||
| 505 | s->s_blocksize_bits = 10; | ||
| 506 | s->s_magic = PROC_SUPER_MAGIC; | ||
| 507 | s->s_op = &proc_sops; | ||
| 508 | s->s_time_gran = 1; | ||
| 509 | |||
| 510 | /* | ||
| 511 | * procfs isn't actually a stacking filesystem; however, there is | ||
| 512 | * too much magic going on inside it to permit stacking things on | ||
| 513 | * top of it | ||
| 514 | */ | ||
| 515 | s->s_stack_depth = FILESYSTEM_MAX_STACK_DEPTH; | ||
| 516 | |||
| 517 | /* procfs dentries and inodes don't require IO to create */ | ||
| 518 | s->s_shrink.seeks = 0; | ||
| 519 | |||
| 520 | pde_get(&proc_root); | ||
| 521 | root_inode = proc_get_inode(s, &proc_root); | ||
| 522 | if (!root_inode) { | ||
| 523 | pr_err("proc_fill_super: get root inode failed\n"); | ||
| 524 | return -ENOMEM; | ||
| 525 | } | ||
| 526 | |||
| 527 | s->s_root = d_make_root(root_inode); | ||
| 528 | if (!s->s_root) { | ||
| 529 | pr_err("proc_fill_super: allocate dentry failed\n"); | ||
| 530 | return -ENOMEM; | ||
| 531 | } | ||
| 532 | |||
| 533 | ret = proc_setup_self(s); | ||
| 534 | if (ret) { | ||
| 535 | return ret; | ||
| 536 | } | ||
| 537 | return proc_setup_thread_self(s); | ||
| 538 | } | ||
diff --git a/fs/proc/internal.h b/fs/proc/internal.h index ea575375f210..d1671e97f7fe 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h | |||
| @@ -207,13 +207,12 @@ struct pde_opener { | |||
| 207 | struct completion *c; | 207 | struct completion *c; |
| 208 | } __randomize_layout; | 208 | } __randomize_layout; |
| 209 | extern const struct inode_operations proc_link_inode_operations; | 209 | extern const struct inode_operations proc_link_inode_operations; |
| 210 | |||
| 211 | extern const struct inode_operations proc_pid_link_inode_operations; | 210 | extern const struct inode_operations proc_pid_link_inode_operations; |
| 211 | extern const struct super_operations proc_sops; | ||
| 212 | 212 | ||
| 213 | void proc_init_kmemcache(void); | 213 | void proc_init_kmemcache(void); |
| 214 | void set_proc_pid_nlink(void); | 214 | void set_proc_pid_nlink(void); |
| 215 | extern struct inode *proc_get_inode(struct super_block *, struct proc_dir_entry *); | 215 | extern struct inode *proc_get_inode(struct super_block *, struct proc_dir_entry *); |
| 216 | extern int proc_fill_super(struct super_block *, void *data, int flags); | ||
| 217 | extern void proc_entry_rundown(struct proc_dir_entry *); | 216 | extern void proc_entry_rundown(struct proc_dir_entry *); |
| 218 | 217 | ||
| 219 | /* | 218 | /* |
| @@ -271,10 +270,8 @@ static inline void proc_tty_init(void) {} | |||
| 271 | * root.c | 270 | * root.c |
| 272 | */ | 271 | */ |
| 273 | extern struct proc_dir_entry proc_root; | 272 | extern struct proc_dir_entry proc_root; |
| 274 | extern int proc_parse_options(char *options, struct pid_namespace *pid); | ||
| 275 | 273 | ||
| 276 | extern void proc_self_init(void); | 274 | extern void proc_self_init(void); |
| 277 | extern int proc_remount(struct super_block *, int *, char *); | ||
| 278 | 275 | ||
| 279 | /* | 276 | /* |
| 280 | * task_[no]mmu.c | 277 | * task_[no]mmu.c |
diff --git a/fs/proc/root.c b/fs/proc/root.c index 621e6ec322ca..8b145e7b9661 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c | |||
| @@ -19,86 +19,178 @@ | |||
| 19 | #include <linux/module.h> | 19 | #include <linux/module.h> |
| 20 | #include <linux/bitops.h> | 20 | #include <linux/bitops.h> |
| 21 | #include <linux/user_namespace.h> | 21 | #include <linux/user_namespace.h> |
| 22 | #include <linux/fs_context.h> | ||
| 22 | #include <linux/mount.h> | 23 | #include <linux/mount.h> |
| 23 | #include <linux/pid_namespace.h> | 24 | #include <linux/pid_namespace.h> |
| 24 | #include <linux/parser.h> | 25 | #include <linux/fs_parser.h> |
| 25 | #include <linux/cred.h> | 26 | #include <linux/cred.h> |
| 27 | #include <linux/magic.h> | ||
| 28 | #include <linux/slab.h> | ||
| 26 | 29 | ||
| 27 | #include "internal.h" | 30 | #include "internal.h" |
| 28 | 31 | ||
| 29 | enum { | 32 | struct proc_fs_context { |
| 30 | Opt_gid, Opt_hidepid, Opt_err, | 33 | struct pid_namespace *pid_ns; |
| 34 | unsigned int mask; | ||
| 35 | int hidepid; | ||
| 36 | int gid; | ||
| 31 | }; | 37 | }; |
| 32 | 38 | ||
| 33 | static const match_table_t tokens = { | 39 | enum proc_param { |
| 34 | {Opt_hidepid, "hidepid=%u"}, | 40 | Opt_gid, |
| 35 | {Opt_gid, "gid=%u"}, | 41 | Opt_hidepid, |
| 36 | {Opt_err, NULL}, | ||
| 37 | }; | 42 | }; |
| 38 | 43 | ||
| 39 | int proc_parse_options(char *options, struct pid_namespace *pid) | 44 | static const struct fs_parameter_spec proc_param_specs[] = { |
| 45 | fsparam_u32("gid", Opt_gid), | ||
| 46 | fsparam_u32("hidepid", Opt_hidepid), | ||
| 47 | {} | ||
| 48 | }; | ||
| 49 | |||
| 50 | static const struct fs_parameter_description proc_fs_parameters = { | ||
| 51 | .name = "proc", | ||
| 52 | .specs = proc_param_specs, | ||
| 53 | }; | ||
| 54 | |||
| 55 | static int proc_parse_param(struct fs_context *fc, struct fs_parameter *param) | ||
| 40 | { | 56 | { |
| 41 | char *p; | 57 | struct proc_fs_context *ctx = fc->fs_private; |
| 42 | substring_t args[MAX_OPT_ARGS]; | 58 | struct fs_parse_result result; |
| 43 | int option; | 59 | int opt; |
| 44 | 60 | ||
| 45 | if (!options) | 61 | opt = fs_parse(fc, &proc_fs_parameters, param, &result); |
| 46 | return 1; | 62 | if (opt < 0) |
| 47 | 63 | return opt; | |
| 48 | while ((p = strsep(&options, ",")) != NULL) { | 64 | |
| 49 | int token; | 65 | switch (opt) { |
| 50 | if (!*p) | 66 | case Opt_gid: |
| 51 | continue; | 67 | ctx->gid = result.uint_32; |
| 52 | 68 | break; | |
| 53 | args[0].to = args[0].from = NULL; | 69 | |
| 54 | token = match_token(p, tokens, args); | 70 | case Opt_hidepid: |
| 55 | switch (token) { | 71 | ctx->hidepid = result.uint_32; |
| 56 | case Opt_gid: | 72 | if (ctx->hidepid < HIDEPID_OFF || |
| 57 | if (match_int(&args[0], &option)) | 73 | ctx->hidepid > HIDEPID_INVISIBLE) |
| 58 | return 0; | 74 | return invalf(fc, "proc: hidepid value must be between 0 and 2.\n"); |
| 59 | pid->pid_gid = make_kgid(current_user_ns(), option); | 75 | break; |
| 60 | break; | 76 | |
| 61 | case Opt_hidepid: | 77 | default: |
| 62 | if (match_int(&args[0], &option)) | 78 | return -EINVAL; |
| 63 | return 0; | ||
| 64 | if (option < HIDEPID_OFF || | ||
| 65 | option > HIDEPID_INVISIBLE) { | ||
| 66 | pr_err("proc: hidepid value must be between 0 and 2.\n"); | ||
| 67 | return 0; | ||
| 68 | } | ||
| 69 | pid->hide_pid = option; | ||
| 70 | break; | ||
| 71 | default: | ||
| 72 | pr_err("proc: unrecognized mount option \"%s\" " | ||
| 73 | "or missing value\n", p); | ||
| 74 | return 0; | ||
| 75 | } | ||
| 76 | } | 79 | } |
| 77 | 80 | ||
| 78 | return 1; | 81 | ctx->mask |= 1 << opt; |
| 82 | return 0; | ||
| 79 | } | 83 | } |
| 80 | 84 | ||
| 81 | int proc_remount(struct super_block *sb, int *flags, char *data) | 85 | static void proc_apply_options(struct super_block *s, |
| 86 | struct fs_context *fc, | ||
| 87 | struct pid_namespace *pid_ns, | ||
| 88 | struct user_namespace *user_ns) | ||
| 82 | { | 89 | { |
| 90 | struct proc_fs_context *ctx = fc->fs_private; | ||
| 91 | |||
| 92 | if (ctx->mask & (1 << Opt_gid)) | ||
| 93 | pid_ns->pid_gid = make_kgid(user_ns, ctx->gid); | ||
| 94 | if (ctx->mask & (1 << Opt_hidepid)) | ||
| 95 | pid_ns->hide_pid = ctx->hidepid; | ||
| 96 | } | ||
| 97 | |||
| 98 | static int proc_fill_super(struct super_block *s, struct fs_context *fc) | ||
| 99 | { | ||
| 100 | struct pid_namespace *pid_ns = get_pid_ns(s->s_fs_info); | ||
| 101 | struct inode *root_inode; | ||
| 102 | int ret; | ||
| 103 | |||
| 104 | proc_apply_options(s, fc, pid_ns, current_user_ns()); | ||
| 105 | |||
| 106 | /* User space would break if executables or devices appear on proc */ | ||
| 107 | s->s_iflags |= SB_I_USERNS_VISIBLE | SB_I_NOEXEC | SB_I_NODEV; | ||
| 108 | s->s_flags |= SB_NODIRATIME | SB_NOSUID | SB_NOEXEC; | ||
| 109 | s->s_blocksize = 1024; | ||
| 110 | s->s_blocksize_bits = 10; | ||
| 111 | s->s_magic = PROC_SUPER_MAGIC; | ||
| 112 | s->s_op = &proc_sops; | ||
| 113 | s->s_time_gran = 1; | ||
| 114 | |||
| 115 | /* | ||
| 116 | * procfs isn't actually a stacking filesystem; however, there is | ||
| 117 | * too much magic going on inside it to permit stacking things on | ||
| 118 | * top of it | ||
| 119 | */ | ||
| 120 | s->s_stack_depth = FILESYSTEM_MAX_STACK_DEPTH; | ||
| 121 | |||
| 122 | /* procfs dentries and inodes don't require IO to create */ | ||
| 123 | s->s_shrink.seeks = 0; | ||
| 124 | |||
| 125 | pde_get(&proc_root); | ||
| 126 | root_inode = proc_get_inode(s, &proc_root); | ||
| 127 | if (!root_inode) { | ||
| 128 | pr_err("proc_fill_super: get root inode failed\n"); | ||
| 129 | return -ENOMEM; | ||
| 130 | } | ||
| 131 | |||
| 132 | s->s_root = d_make_root(root_inode); | ||
| 133 | if (!s->s_root) { | ||
| 134 | pr_err("proc_fill_super: allocate dentry failed\n"); | ||
| 135 | return -ENOMEM; | ||
| 136 | } | ||
| 137 | |||
| 138 | ret = proc_setup_self(s); | ||
| 139 | if (ret) { | ||
| 140 | return ret; | ||
| 141 | } | ||
| 142 | return proc_setup_thread_self(s); | ||
| 143 | } | ||
| 144 | |||
| 145 | static int proc_reconfigure(struct fs_context *fc) | ||
| 146 | { | ||
| 147 | struct super_block *sb = fc->root->d_sb; | ||
| 83 | struct pid_namespace *pid = sb->s_fs_info; | 148 | struct pid_namespace *pid = sb->s_fs_info; |
| 84 | 149 | ||
| 85 | sync_filesystem(sb); | 150 | sync_filesystem(sb); |
| 86 | return !proc_parse_options(data, pid); | 151 | |
| 152 | proc_apply_options(sb, fc, pid, current_user_ns()); | ||
| 153 | return 0; | ||
| 87 | } | 154 | } |
| 88 | 155 | ||
| 89 | static struct dentry *proc_mount(struct file_system_type *fs_type, | 156 | static int proc_get_tree(struct fs_context *fc) |
| 90 | int flags, const char *dev_name, void *data) | ||
| 91 | { | 157 | { |
| 92 | struct pid_namespace *ns; | 158 | struct proc_fs_context *ctx = fc->fs_private; |
| 93 | 159 | ||
| 94 | if (flags & SB_KERNMOUNT) { | 160 | put_user_ns(fc->user_ns); |
| 95 | ns = data; | 161 | fc->user_ns = get_user_ns(ctx->pid_ns->user_ns); |
| 96 | data = NULL; | 162 | fc->s_fs_info = ctx->pid_ns; |
| 97 | } else { | 163 | return vfs_get_super(fc, vfs_get_keyed_super, proc_fill_super); |
| 98 | ns = task_active_pid_ns(current); | 164 | } |
| 99 | } | ||
| 100 | 165 | ||
| 101 | return mount_ns(fs_type, flags, data, ns, ns->user_ns, proc_fill_super); | 166 | static void proc_fs_context_free(struct fs_context *fc) |
| 167 | { | ||
| 168 | struct proc_fs_context *ctx = fc->fs_private; | ||
| 169 | |||
| 170 | if (ctx->pid_ns) | ||
| 171 | put_pid_ns(ctx->pid_ns); | ||
| 172 | kfree(ctx); | ||
| 173 | } | ||
| 174 | |||
| 175 | static const struct fs_context_operations proc_fs_context_ops = { | ||
| 176 | .free = proc_fs_context_free, | ||
| 177 | .parse_param = proc_parse_param, | ||
| 178 | .get_tree = proc_get_tree, | ||
| 179 | .reconfigure = proc_reconfigure, | ||
| 180 | }; | ||
| 181 | |||
| 182 | static int proc_init_fs_context(struct fs_context *fc) | ||
| 183 | { | ||
| 184 | struct proc_fs_context *ctx; | ||
| 185 | |||
| 186 | ctx = kzalloc(sizeof(struct proc_fs_context), GFP_KERNEL); | ||
| 187 | if (!ctx) | ||
| 188 | return -ENOMEM; | ||
| 189 | |||
| 190 | ctx->pid_ns = get_pid_ns(task_active_pid_ns(current)); | ||
| 191 | fc->fs_private = ctx; | ||
| 192 | fc->ops = &proc_fs_context_ops; | ||
| 193 | return 0; | ||
| 102 | } | 194 | } |
| 103 | 195 | ||
| 104 | static void proc_kill_sb(struct super_block *sb) | 196 | static void proc_kill_sb(struct super_block *sb) |
| @@ -115,10 +207,11 @@ static void proc_kill_sb(struct super_block *sb) | |||
| 115 | } | 207 | } |
| 116 | 208 | ||
| 117 | static struct file_system_type proc_fs_type = { | 209 | static struct file_system_type proc_fs_type = { |
| 118 | .name = "proc", | 210 | .name = "proc", |
| 119 | .mount = proc_mount, | 211 | .init_fs_context = proc_init_fs_context, |
| 120 | .kill_sb = proc_kill_sb, | 212 | .parameters = &proc_fs_parameters, |
| 121 | .fs_flags = FS_USERNS_MOUNT, | 213 | .kill_sb = proc_kill_sb, |
| 214 | .fs_flags = FS_USERNS_MOUNT, | ||
| 122 | }; | 215 | }; |
| 123 | 216 | ||
| 124 | void __init proc_root_init(void) | 217 | void __init proc_root_init(void) |
| @@ -156,7 +249,7 @@ static struct dentry *proc_root_lookup(struct inode * dir, struct dentry * dentr | |||
| 156 | { | 249 | { |
| 157 | if (!proc_pid_lookup(dentry, flags)) | 250 | if (!proc_pid_lookup(dentry, flags)) |
| 158 | return NULL; | 251 | return NULL; |
| 159 | 252 | ||
| 160 | return proc_lookup(dir, dentry, flags); | 253 | return proc_lookup(dir, dentry, flags); |
| 161 | } | 254 | } |
| 162 | 255 | ||
| @@ -209,9 +302,28 @@ struct proc_dir_entry proc_root = { | |||
| 209 | 302 | ||
| 210 | int pid_ns_prepare_proc(struct pid_namespace *ns) | 303 | int pid_ns_prepare_proc(struct pid_namespace *ns) |
| 211 | { | 304 | { |
| 305 | struct proc_fs_context *ctx; | ||
| 306 | struct fs_context *fc; | ||
| 212 | struct vfsmount *mnt; | 307 | struct vfsmount *mnt; |
| 213 | 308 | ||
| 214 | mnt = kern_mount_data(&proc_fs_type, ns); | 309 | fc = fs_context_for_mount(&proc_fs_type, SB_KERNMOUNT); |
| 310 | if (IS_ERR(fc)) | ||
| 311 | return PTR_ERR(fc); | ||
| 312 | |||
| 313 | if (fc->user_ns != ns->user_ns) { | ||
| 314 | put_user_ns(fc->user_ns); | ||
| 315 | fc->user_ns = get_user_ns(ns->user_ns); | ||
| 316 | } | ||
| 317 | |||
| 318 | ctx = fc->fs_private; | ||
| 319 | if (ctx->pid_ns != ns) { | ||
| 320 | put_pid_ns(ctx->pid_ns); | ||
| 321 | get_pid_ns(ns); | ||
| 322 | ctx->pid_ns = ns; | ||
| 323 | } | ||
| 324 | |||
| 325 | mnt = fc_mount(fc); | ||
| 326 | put_fs_context(fc); | ||
| 215 | if (IS_ERR(mnt)) | 327 | if (IS_ERR(mnt)) |
| 216 | return PTR_ERR(mnt); | 328 | return PTR_ERR(mnt); |
| 217 | 329 | ||
diff --git a/fs/super.c b/fs/super.c index 48e25eba8465..583a0124bc39 100644 --- a/fs/super.c +++ b/fs/super.c | |||
| @@ -35,6 +35,7 @@ | |||
| 35 | #include <linux/fsnotify.h> | 35 | #include <linux/fsnotify.h> |
| 36 | #include <linux/lockdep.h> | 36 | #include <linux/lockdep.h> |
| 37 | #include <linux/user_namespace.h> | 37 | #include <linux/user_namespace.h> |
| 38 | #include <linux/fs_context.h> | ||
| 38 | #include <uapi/linux/mount.h> | 39 | #include <uapi/linux/mount.h> |
| 39 | #include "internal.h" | 40 | #include "internal.h" |
| 40 | 41 | ||
| @@ -476,6 +477,94 @@ void generic_shutdown_super(struct super_block *sb) | |||
| 476 | EXPORT_SYMBOL(generic_shutdown_super); | 477 | EXPORT_SYMBOL(generic_shutdown_super); |
| 477 | 478 | ||
| 478 | /** | 479 | /** |
| 480 | * sget_fc - Find or create a superblock | ||
| 481 | * @fc: Filesystem context. | ||
| 482 | * @test: Comparison callback | ||
| 483 | * @set: Setup callback | ||
| 484 | * | ||
| 485 | * Find or create a superblock using the parameters stored in the filesystem | ||
| 486 | * context and the two callback functions. | ||
| 487 | * | ||
| 488 | * If an extant superblock is matched, then that will be returned with an | ||
| 489 | * elevated reference count that the caller must transfer or discard. | ||
| 490 | * | ||
| 491 | * If no match is made, a new superblock will be allocated and basic | ||
| 492 | * initialisation will be performed (s_type, s_fs_info and s_id will be set and | ||
| 493 | * the set() callback will be invoked), the superblock will be published and it | ||
| 494 | * will be returned in a partially constructed state with SB_BORN and SB_ACTIVE | ||
| 495 | * as yet unset. | ||
| 496 | */ | ||
| 497 | struct super_block *sget_fc(struct fs_context *fc, | ||
| 498 | int (*test)(struct super_block *, struct fs_context *), | ||
| 499 | int (*set)(struct super_block *, struct fs_context *)) | ||
| 500 | { | ||
| 501 | struct super_block *s = NULL; | ||
| 502 | struct super_block *old; | ||
| 503 | struct user_namespace *user_ns = fc->global ? &init_user_ns : fc->user_ns; | ||
| 504 | int err; | ||
| 505 | |||
| 506 | if (!(fc->sb_flags & SB_KERNMOUNT) && | ||
| 507 | fc->purpose != FS_CONTEXT_FOR_SUBMOUNT) { | ||
| 508 | /* Don't allow mounting unless the caller has CAP_SYS_ADMIN | ||
| 509 | * over the namespace. | ||
| 510 | */ | ||
| 511 | if (!(fc->fs_type->fs_flags & FS_USERNS_MOUNT)) { | ||
| 512 | if (!capable(CAP_SYS_ADMIN)) | ||
| 513 | return ERR_PTR(-EPERM); | ||
| 514 | } else { | ||
| 515 | if (!ns_capable(fc->user_ns, CAP_SYS_ADMIN)) | ||
| 516 | return ERR_PTR(-EPERM); | ||
| 517 | } | ||
| 518 | } | ||
| 519 | |||
| 520 | retry: | ||
| 521 | spin_lock(&sb_lock); | ||
| 522 | if (test) { | ||
| 523 | hlist_for_each_entry(old, &fc->fs_type->fs_supers, s_instances) { | ||
| 524 | if (test(old, fc)) | ||
| 525 | goto share_extant_sb; | ||
| 526 | } | ||
| 527 | } | ||
| 528 | if (!s) { | ||
| 529 | spin_unlock(&sb_lock); | ||
| 530 | s = alloc_super(fc->fs_type, fc->sb_flags, user_ns); | ||
| 531 | if (!s) | ||
| 532 | return ERR_PTR(-ENOMEM); | ||
| 533 | goto retry; | ||
| 534 | } | ||
| 535 | |||
| 536 | s->s_fs_info = fc->s_fs_info; | ||
| 537 | err = set(s, fc); | ||
| 538 | if (err) { | ||
| 539 | s->s_fs_info = NULL; | ||
| 540 | spin_unlock(&sb_lock); | ||
| 541 | destroy_unused_super(s); | ||
| 542 | return ERR_PTR(err); | ||
| 543 | } | ||
| 544 | fc->s_fs_info = NULL; | ||
| 545 | s->s_type = fc->fs_type; | ||
| 546 | strlcpy(s->s_id, s->s_type->name, sizeof(s->s_id)); | ||
| 547 | list_add_tail(&s->s_list, &super_blocks); | ||
| 548 | hlist_add_head(&s->s_instances, &s->s_type->fs_supers); | ||
| 549 | spin_unlock(&sb_lock); | ||
| 550 | get_filesystem(s->s_type); | ||
| 551 | register_shrinker_prepared(&s->s_shrink); | ||
| 552 | return s; | ||
| 553 | |||
| 554 | share_extant_sb: | ||
| 555 | if (user_ns != old->s_user_ns) { | ||
| 556 | spin_unlock(&sb_lock); | ||
| 557 | destroy_unused_super(s); | ||
| 558 | return ERR_PTR(-EBUSY); | ||
| 559 | } | ||
| 560 | if (!grab_super(old)) | ||
| 561 | goto retry; | ||
| 562 | destroy_unused_super(s); | ||
| 563 | return old; | ||
| 564 | } | ||
| 565 | EXPORT_SYMBOL(sget_fc); | ||
| 566 | |||
| 567 | /** | ||
| 479 | * sget_userns - find or create a superblock | 568 | * sget_userns - find or create a superblock |
| 480 | * @type: filesystem type superblock should belong to | 569 | * @type: filesystem type superblock should belong to |
| 481 | * @test: comparison callback | 570 | * @test: comparison callback |
| @@ -835,28 +924,35 @@ rescan: | |||
| 835 | } | 924 | } |
| 836 | 925 | ||
| 837 | /** | 926 | /** |
| 838 | * do_remount_sb - asks filesystem to change mount options. | 927 | * reconfigure_super - asks filesystem to change superblock parameters |
| 839 | * @sb: superblock in question | 928 | * @fc: The superblock and configuration |
| 840 | * @sb_flags: revised superblock flags | ||
| 841 | * @data: the rest of options | ||
| 842 | * @force: whether or not to force the change | ||
| 843 | * | 929 | * |
| 844 | * Alters the mount options of a mounted file system. | 930 | * Alters the configuration parameters of a live superblock. |
| 845 | */ | 931 | */ |
| 846 | int do_remount_sb(struct super_block *sb, int sb_flags, void *data, int force) | 932 | int reconfigure_super(struct fs_context *fc) |
| 847 | { | 933 | { |
| 934 | struct super_block *sb = fc->root->d_sb; | ||
| 848 | int retval; | 935 | int retval; |
| 849 | int remount_ro; | 936 | bool remount_ro = false; |
| 937 | bool force = fc->sb_flags & SB_FORCE; | ||
| 850 | 938 | ||
| 939 | if (fc->sb_flags_mask & ~MS_RMT_MASK) | ||
| 940 | return -EINVAL; | ||
| 851 | if (sb->s_writers.frozen != SB_UNFROZEN) | 941 | if (sb->s_writers.frozen != SB_UNFROZEN) |
| 852 | return -EBUSY; | 942 | return -EBUSY; |
| 853 | 943 | ||
| 944 | retval = security_sb_remount(sb, fc->security); | ||
| 945 | if (retval) | ||
| 946 | return retval; | ||
| 947 | |||
| 948 | if (fc->sb_flags_mask & SB_RDONLY) { | ||
| 854 | #ifdef CONFIG_BLOCK | 949 | #ifdef CONFIG_BLOCK |
| 855 | if (!(sb_flags & SB_RDONLY) && bdev_read_only(sb->s_bdev)) | 950 | if (!(fc->sb_flags & SB_RDONLY) && bdev_read_only(sb->s_bdev)) |
| 856 | return -EACCES; | 951 | return -EACCES; |
| 857 | #endif | 952 | #endif |
| 858 | 953 | ||
| 859 | remount_ro = (sb_flags & SB_RDONLY) && !sb_rdonly(sb); | 954 | remount_ro = (fc->sb_flags & SB_RDONLY) && !sb_rdonly(sb); |
| 955 | } | ||
| 860 | 956 | ||
| 861 | if (remount_ro) { | 957 | if (remount_ro) { |
| 862 | if (!hlist_empty(&sb->s_pins)) { | 958 | if (!hlist_empty(&sb->s_pins)) { |
| @@ -867,13 +963,14 @@ int do_remount_sb(struct super_block *sb, int sb_flags, void *data, int force) | |||
| 867 | return 0; | 963 | return 0; |
| 868 | if (sb->s_writers.frozen != SB_UNFROZEN) | 964 | if (sb->s_writers.frozen != SB_UNFROZEN) |
| 869 | return -EBUSY; | 965 | return -EBUSY; |
| 870 | remount_ro = (sb_flags & SB_RDONLY) && !sb_rdonly(sb); | 966 | remount_ro = !sb_rdonly(sb); |
| 871 | } | 967 | } |
| 872 | } | 968 | } |
| 873 | shrink_dcache_sb(sb); | 969 | shrink_dcache_sb(sb); |
| 874 | 970 | ||
| 875 | /* If we are remounting RDONLY and current sb is read/write, | 971 | /* If we are reconfiguring to RDONLY and current sb is read/write, |
| 876 | make sure there are no rw files opened */ | 972 | * make sure there are no files open for writing. |
| 973 | */ | ||
| 877 | if (remount_ro) { | 974 | if (remount_ro) { |
| 878 | if (force) { | 975 | if (force) { |
| 879 | sb->s_readonly_remount = 1; | 976 | sb->s_readonly_remount = 1; |
| @@ -885,8 +982,8 @@ int do_remount_sb(struct super_block *sb, int sb_flags, void *data, int force) | |||
| 885 | } | 982 | } |
| 886 | } | 983 | } |
| 887 | 984 | ||
| 888 | if (sb->s_op->remount_fs) { | 985 | if (fc->ops->reconfigure) { |
| 889 | retval = sb->s_op->remount_fs(sb, &sb_flags, data); | 986 | retval = fc->ops->reconfigure(fc); |
| 890 | if (retval) { | 987 | if (retval) { |
| 891 | if (!force) | 988 | if (!force) |
| 892 | goto cancel_readonly; | 989 | goto cancel_readonly; |
| @@ -895,7 +992,9 @@ int do_remount_sb(struct super_block *sb, int sb_flags, void *data, int force) | |||
| 895 | sb->s_type->name, retval); | 992 | sb->s_type->name, retval); |
| 896 | } | 993 | } |
| 897 | } | 994 | } |
| 898 | sb->s_flags = (sb->s_flags & ~MS_RMT_MASK) | (sb_flags & MS_RMT_MASK); | 995 | |
| 996 | WRITE_ONCE(sb->s_flags, ((sb->s_flags & ~fc->sb_flags_mask) | | ||
| 997 | (fc->sb_flags & fc->sb_flags_mask))); | ||
| 899 | /* Needs to be ordered wrt mnt_is_readonly() */ | 998 | /* Needs to be ordered wrt mnt_is_readonly() */ |
| 900 | smp_wmb(); | 999 | smp_wmb(); |
| 901 | sb->s_readonly_remount = 0; | 1000 | sb->s_readonly_remount = 0; |
| @@ -922,10 +1021,15 @@ static void do_emergency_remount_callback(struct super_block *sb) | |||
| 922 | down_write(&sb->s_umount); | 1021 | down_write(&sb->s_umount); |
| 923 | if (sb->s_root && sb->s_bdev && (sb->s_flags & SB_BORN) && | 1022 | if (sb->s_root && sb->s_bdev && (sb->s_flags & SB_BORN) && |
| 924 | !sb_rdonly(sb)) { | 1023 | !sb_rdonly(sb)) { |
| 925 | /* | 1024 | struct fs_context *fc; |
| 926 | * What lock protects sb->s_flags?? | 1025 | |
| 927 | */ | 1026 | fc = fs_context_for_reconfigure(sb->s_root, |
| 928 | do_remount_sb(sb, SB_RDONLY, NULL, 1); | 1027 | SB_RDONLY | SB_FORCE, SB_RDONLY); |
| 1028 | if (!IS_ERR(fc)) { | ||
| 1029 | if (parse_monolithic_mount_data(fc, NULL) == 0) | ||
| 1030 | (void)reconfigure_super(fc); | ||
| 1031 | put_fs_context(fc); | ||
| 1032 | } | ||
| 929 | } | 1033 | } |
| 930 | up_write(&sb->s_umount); | 1034 | up_write(&sb->s_umount); |
| 931 | } | 1035 | } |
| @@ -1087,6 +1191,89 @@ struct dentry *mount_ns(struct file_system_type *fs_type, | |||
| 1087 | 1191 | ||
| 1088 | EXPORT_SYMBOL(mount_ns); | 1192 | EXPORT_SYMBOL(mount_ns); |
| 1089 | 1193 | ||
| 1194 | int set_anon_super_fc(struct super_block *sb, struct fs_context *fc) | ||
| 1195 | { | ||
| 1196 | return set_anon_super(sb, NULL); | ||
| 1197 | } | ||
| 1198 | EXPORT_SYMBOL(set_anon_super_fc); | ||
| 1199 | |||
| 1200 | static int test_keyed_super(struct super_block *sb, struct fs_context *fc) | ||
| 1201 | { | ||
| 1202 | return sb->s_fs_info == fc->s_fs_info; | ||
| 1203 | } | ||
| 1204 | |||
| 1205 | static int test_single_super(struct super_block *s, struct fs_context *fc) | ||
| 1206 | { | ||
| 1207 | return 1; | ||
| 1208 | } | ||
| 1209 | |||
| 1210 | /** | ||
| 1211 | * vfs_get_super - Get a superblock with a search key set in s_fs_info. | ||
| 1212 | * @fc: The filesystem context holding the parameters | ||
| 1213 | * @keying: How to distinguish superblocks | ||
| 1214 | * @fill_super: Helper to initialise a new superblock | ||
| 1215 | * | ||
| 1216 | * Search for a superblock and create a new one if not found. The search | ||
| 1217 | * criterion is controlled by @keying. If the search fails, a new superblock | ||
| 1218 | * is created and @fill_super() is called to initialise it. | ||
| 1219 | * | ||
| 1220 | * @keying can take one of a number of values: | ||
| 1221 | * | ||
| 1222 | * (1) vfs_get_single_super - Only one superblock of this type may exist on the | ||
| 1223 | * system. This is typically used for special system filesystems. | ||
| 1224 | * | ||
| 1225 | * (2) vfs_get_keyed_super - Multiple superblocks may exist, but they must have | ||
| 1226 | * distinct keys (where the key is in s_fs_info). Searching for the same | ||
| 1227 | * key again will turn up the superblock for that key. | ||
| 1228 | * | ||
| 1229 | * (3) vfs_get_independent_super - Multiple superblocks may exist and are | ||
| 1230 | * unkeyed. Each call will get a new superblock. | ||
| 1231 | * | ||
| 1232 | * A permissions check is made by sget_fc() unless we're getting a superblock | ||
| 1233 | * for a kernel-internal mount or a submount. | ||
| 1234 | */ | ||
| 1235 | int vfs_get_super(struct fs_context *fc, | ||
| 1236 | enum vfs_get_super_keying keying, | ||
| 1237 | int (*fill_super)(struct super_block *sb, | ||
| 1238 | struct fs_context *fc)) | ||
| 1239 | { | ||
| 1240 | int (*test)(struct super_block *, struct fs_context *); | ||
| 1241 | struct super_block *sb; | ||
| 1242 | |||
| 1243 | switch (keying) { | ||
| 1244 | case vfs_get_single_super: | ||
| 1245 | test = test_single_super; | ||
| 1246 | break; | ||
| 1247 | case vfs_get_keyed_super: | ||
| 1248 | test = test_keyed_super; | ||
| 1249 | break; | ||
| 1250 | case vfs_get_independent_super: | ||
| 1251 | test = NULL; | ||
| 1252 | break; | ||
| 1253 | default: | ||
| 1254 | BUG(); | ||
| 1255 | } | ||
| 1256 | |||
| 1257 | sb = sget_fc(fc, test, set_anon_super_fc); | ||
| 1258 | if (IS_ERR(sb)) | ||
| 1259 | return PTR_ERR(sb); | ||
| 1260 | |||
| 1261 | if (!sb->s_root) { | ||
| 1262 | int err = fill_super(sb, fc); | ||
| 1263 | if (err) { | ||
| 1264 | deactivate_locked_super(sb); | ||
| 1265 | return err; | ||
| 1266 | } | ||
| 1267 | |||
| 1268 | sb->s_flags |= SB_ACTIVE; | ||
| 1269 | } | ||
| 1270 | |||
| 1271 | BUG_ON(fc->root); | ||
| 1272 | fc->root = dget(sb->s_root); | ||
| 1273 | return 0; | ||
| 1274 | } | ||
| 1275 | EXPORT_SYMBOL(vfs_get_super); | ||
| 1276 | |||
| 1090 | #ifdef CONFIG_BLOCK | 1277 | #ifdef CONFIG_BLOCK |
| 1091 | static int set_bdev_super(struct super_block *s, void *data) | 1278 | static int set_bdev_super(struct super_block *s, void *data) |
| 1092 | { | 1279 | { |
| @@ -1212,6 +1399,31 @@ struct dentry *mount_nodev(struct file_system_type *fs_type, | |||
| 1212 | } | 1399 | } |
| 1213 | EXPORT_SYMBOL(mount_nodev); | 1400 | EXPORT_SYMBOL(mount_nodev); |
| 1214 | 1401 | ||
| 1402 | static int reconfigure_single(struct super_block *s, | ||
| 1403 | int flags, void *data) | ||
| 1404 | { | ||
| 1405 | struct fs_context *fc; | ||
| 1406 | int ret; | ||
| 1407 | |||
| 1408 | /* The caller really need to be passing fc down into mount_single(), | ||
| 1409 | * then a chunk of this can be removed. [Bollocks -- AV] | ||
| 1410 | * Better yet, reconfiguration shouldn't happen, but rather the second | ||
| 1411 | * mount should be rejected if the parameters are not compatible. | ||
| 1412 | */ | ||
| 1413 | fc = fs_context_for_reconfigure(s->s_root, flags, MS_RMT_MASK); | ||
| 1414 | if (IS_ERR(fc)) | ||
| 1415 | return PTR_ERR(fc); | ||
| 1416 | |||
| 1417 | ret = parse_monolithic_mount_data(fc, data); | ||
| 1418 | if (ret < 0) | ||
| 1419 | goto out; | ||
| 1420 | |||
| 1421 | ret = reconfigure_super(fc); | ||
| 1422 | out: | ||
| 1423 | put_fs_context(fc); | ||
| 1424 | return ret; | ||
| 1425 | } | ||
| 1426 | |||
| 1215 | static int compare_single(struct super_block *s, void *p) | 1427 | static int compare_single(struct super_block *s, void *p) |
| 1216 | { | 1428 | { |
| 1217 | return 1; | 1429 | return 1; |
| @@ -1229,41 +1441,64 @@ struct dentry *mount_single(struct file_system_type *fs_type, | |||
| 1229 | return ERR_CAST(s); | 1441 | return ERR_CAST(s); |
| 1230 | if (!s->s_root) { | 1442 | if (!s->s_root) { |
| 1231 | error = fill_super(s, data, flags & SB_SILENT ? 1 : 0); | 1443 | error = fill_super(s, data, flags & SB_SILENT ? 1 : 0); |
| 1232 | if (error) { | 1444 | if (!error) |
| 1233 | deactivate_locked_super(s); | 1445 | s->s_flags |= SB_ACTIVE; |
| 1234 | return ERR_PTR(error); | ||
| 1235 | } | ||
| 1236 | s->s_flags |= SB_ACTIVE; | ||
| 1237 | } else { | 1446 | } else { |
| 1238 | do_remount_sb(s, flags, data, 0); | 1447 | error = reconfigure_single(s, flags, data); |
| 1448 | } | ||
| 1449 | if (unlikely(error)) { | ||
| 1450 | deactivate_locked_super(s); | ||
| 1451 | return ERR_PTR(error); | ||
| 1239 | } | 1452 | } |
| 1240 | return dget(s->s_root); | 1453 | return dget(s->s_root); |
| 1241 | } | 1454 | } |
| 1242 | EXPORT_SYMBOL(mount_single); | 1455 | EXPORT_SYMBOL(mount_single); |
| 1243 | 1456 | ||
| 1244 | struct dentry * | 1457 | /** |
| 1245 | mount_fs(struct file_system_type *type, int flags, const char *name, void *data) | 1458 | * vfs_get_tree - Get the mountable root |
| 1459 | * @fc: The superblock configuration context. | ||
| 1460 | * | ||
| 1461 | * The filesystem is invoked to get or create a superblock which can then later | ||
| 1462 | * be used for mounting. The filesystem places a pointer to the root to be | ||
| 1463 | * used for mounting in @fc->root. | ||
| 1464 | */ | ||
| 1465 | int vfs_get_tree(struct fs_context *fc) | ||
| 1246 | { | 1466 | { |
| 1247 | struct dentry *root; | ||
| 1248 | struct super_block *sb; | 1467 | struct super_block *sb; |
| 1249 | int error = -ENOMEM; | 1468 | int error; |
| 1250 | void *sec_opts = NULL; | ||
| 1251 | 1469 | ||
| 1252 | if (data && !(type->fs_flags & FS_BINARY_MOUNTDATA)) { | 1470 | if (fc->fs_type->fs_flags & FS_REQUIRES_DEV && !fc->source) { |
| 1253 | error = security_sb_eat_lsm_opts(data, &sec_opts); | 1471 | errorf(fc, "Filesystem requires source device"); |
| 1254 | if (error) | 1472 | return -ENOENT; |
| 1255 | return ERR_PTR(error); | ||
| 1256 | } | 1473 | } |
| 1257 | 1474 | ||
| 1258 | root = type->mount(type, flags, name, data); | 1475 | if (fc->root) |
| 1259 | if (IS_ERR(root)) { | 1476 | return -EBUSY; |
| 1260 | error = PTR_ERR(root); | 1477 | |
| 1261 | goto out_free_secdata; | 1478 | /* Get the mountable root in fc->root, with a ref on the root and a ref |
| 1479 | * on the superblock. | ||
| 1480 | */ | ||
| 1481 | error = fc->ops->get_tree(fc); | ||
| 1482 | if (error < 0) | ||
| 1483 | return error; | ||
| 1484 | |||
| 1485 | if (!fc->root) { | ||
| 1486 | pr_err("Filesystem %s get_tree() didn't set fc->root\n", | ||
| 1487 | fc->fs_type->name); | ||
| 1488 | /* We don't know what the locking state of the superblock is - | ||
| 1489 | * if there is a superblock. | ||
| 1490 | */ | ||
| 1491 | BUG(); | ||
| 1262 | } | 1492 | } |
| 1263 | sb = root->d_sb; | 1493 | |
| 1264 | BUG_ON(!sb); | 1494 | sb = fc->root->d_sb; |
| 1265 | WARN_ON(!sb->s_bdi); | 1495 | WARN_ON(!sb->s_bdi); |
| 1266 | 1496 | ||
| 1497 | if (fc->subtype && !sb->s_subtype) { | ||
| 1498 | sb->s_subtype = fc->subtype; | ||
| 1499 | fc->subtype = NULL; | ||
| 1500 | } | ||
| 1501 | |||
| 1267 | /* | 1502 | /* |
| 1268 | * Write barrier is for super_cache_count(). We place it before setting | 1503 | * Write barrier is for super_cache_count(). We place it before setting |
| 1269 | * SB_BORN as the data dependency between the two functions is the | 1504 | * SB_BORN as the data dependency between the two functions is the |
| @@ -1273,14 +1508,10 @@ mount_fs(struct file_system_type *type, int flags, const char *name, void *data) | |||
| 1273 | smp_wmb(); | 1508 | smp_wmb(); |
| 1274 | sb->s_flags |= SB_BORN; | 1509 | sb->s_flags |= SB_BORN; |
| 1275 | 1510 | ||
| 1276 | error = security_sb_set_mnt_opts(sb, sec_opts, 0, NULL); | 1511 | error = security_sb_set_mnt_opts(sb, fc->security, 0, NULL); |
| 1277 | if (error) | 1512 | if (unlikely(error)) { |
| 1278 | goto out_sb; | 1513 | fc_drop_locked(fc); |
| 1279 | 1514 | return error; | |
| 1280 | if (!(flags & (MS_KERNMOUNT|MS_SUBMOUNT))) { | ||
| 1281 | error = security_sb_kern_mount(sb); | ||
| 1282 | if (error) | ||
| 1283 | goto out_sb; | ||
| 1284 | } | 1515 | } |
| 1285 | 1516 | ||
| 1286 | /* | 1517 | /* |
| @@ -1290,18 +1521,11 @@ mount_fs(struct file_system_type *type, int flags, const char *name, void *data) | |||
| 1290 | * violate this rule. | 1521 | * violate this rule. |
| 1291 | */ | 1522 | */ |
| 1292 | WARN((sb->s_maxbytes < 0), "%s set sb->s_maxbytes to " | 1523 | WARN((sb->s_maxbytes < 0), "%s set sb->s_maxbytes to " |
| 1293 | "negative value (%lld)\n", type->name, sb->s_maxbytes); | 1524 | "negative value (%lld)\n", fc->fs_type->name, sb->s_maxbytes); |
| 1294 | 1525 | ||
| 1295 | up_write(&sb->s_umount); | 1526 | return 0; |
| 1296 | security_free_mnt_opts(&sec_opts); | ||
| 1297 | return root; | ||
| 1298 | out_sb: | ||
| 1299 | dput(root); | ||
| 1300 | deactivate_locked_super(sb); | ||
| 1301 | out_free_secdata: | ||
| 1302 | security_free_mnt_opts(&sec_opts); | ||
| 1303 | return ERR_PTR(error); | ||
| 1304 | } | 1527 | } |
| 1528 | EXPORT_SYMBOL(vfs_get_tree); | ||
| 1305 | 1529 | ||
| 1306 | /* | 1530 | /* |
| 1307 | * Setup private BDI for given superblock. It gets automatically cleaned up | 1531 | * Setup private BDI for given superblock. It gets automatically cleaned up |
diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c index 92682fcc41f6..4cb21b558a85 100644 --- a/fs/sysfs/mount.c +++ b/fs/sysfs/mount.c | |||
| @@ -13,34 +13,69 @@ | |||
| 13 | #include <linux/magic.h> | 13 | #include <linux/magic.h> |
| 14 | #include <linux/mount.h> | 14 | #include <linux/mount.h> |
| 15 | #include <linux/init.h> | 15 | #include <linux/init.h> |
| 16 | #include <linux/slab.h> | ||
| 16 | #include <linux/user_namespace.h> | 17 | #include <linux/user_namespace.h> |
| 18 | #include <linux/fs_context.h> | ||
| 19 | #include <net/net_namespace.h> | ||
| 17 | 20 | ||
| 18 | #include "sysfs.h" | 21 | #include "sysfs.h" |
| 19 | 22 | ||
| 20 | static struct kernfs_root *sysfs_root; | 23 | static struct kernfs_root *sysfs_root; |
| 21 | struct kernfs_node *sysfs_root_kn; | 24 | struct kernfs_node *sysfs_root_kn; |
| 22 | 25 | ||
| 23 | static struct dentry *sysfs_mount(struct file_system_type *fs_type, | 26 | static int sysfs_get_tree(struct fs_context *fc) |
| 24 | int flags, const char *dev_name, void *data) | ||
| 25 | { | 27 | { |
| 26 | struct dentry *root; | 28 | struct kernfs_fs_context *kfc = fc->fs_private; |
| 27 | void *ns; | 29 | int ret; |
| 28 | bool new_sb = false; | ||
| 29 | 30 | ||
| 30 | if (!(flags & SB_KERNMOUNT)) { | 31 | ret = kernfs_get_tree(fc); |
| 32 | if (ret) | ||
| 33 | return ret; | ||
| 34 | |||
| 35 | if (kfc->new_sb_created) | ||
| 36 | fc->root->d_sb->s_iflags |= SB_I_USERNS_VISIBLE; | ||
| 37 | return 0; | ||
| 38 | } | ||
| 39 | |||
| 40 | static void sysfs_fs_context_free(struct fs_context *fc) | ||
| 41 | { | ||
| 42 | struct kernfs_fs_context *kfc = fc->fs_private; | ||
| 43 | |||
| 44 | if (kfc->ns_tag) | ||
| 45 | kobj_ns_drop(KOBJ_NS_TYPE_NET, kfc->ns_tag); | ||
| 46 | kernfs_free_fs_context(fc); | ||
| 47 | kfree(kfc); | ||
| 48 | } | ||
| 49 | |||
| 50 | static const struct fs_context_operations sysfs_fs_context_ops = { | ||
| 51 | .free = sysfs_fs_context_free, | ||
| 52 | .get_tree = sysfs_get_tree, | ||
| 53 | }; | ||
| 54 | |||
| 55 | static int sysfs_init_fs_context(struct fs_context *fc) | ||
| 56 | { | ||
| 57 | struct kernfs_fs_context *kfc; | ||
| 58 | struct net *netns; | ||
| 59 | |||
| 60 | if (!(fc->sb_flags & SB_KERNMOUNT)) { | ||
| 31 | if (!kobj_ns_current_may_mount(KOBJ_NS_TYPE_NET)) | 61 | if (!kobj_ns_current_may_mount(KOBJ_NS_TYPE_NET)) |
| 32 | return ERR_PTR(-EPERM); | 62 | return -EPERM; |
| 33 | } | 63 | } |
| 34 | 64 | ||
| 35 | ns = kobj_ns_grab_current(KOBJ_NS_TYPE_NET); | 65 | kfc = kzalloc(sizeof(struct kernfs_fs_context), GFP_KERNEL); |
| 36 | root = kernfs_mount_ns(fs_type, flags, sysfs_root, | 66 | if (!kfc) |
| 37 | SYSFS_MAGIC, &new_sb, ns); | 67 | return -ENOMEM; |
| 38 | if (!new_sb) | ||
| 39 | kobj_ns_drop(KOBJ_NS_TYPE_NET, ns); | ||
| 40 | else if (!IS_ERR(root)) | ||
| 41 | root->d_sb->s_iflags |= SB_I_USERNS_VISIBLE; | ||
| 42 | 68 | ||
| 43 | return root; | 69 | kfc->ns_tag = netns = kobj_ns_grab_current(KOBJ_NS_TYPE_NET); |
| 70 | kfc->root = sysfs_root; | ||
| 71 | kfc->magic = SYSFS_MAGIC; | ||
| 72 | fc->fs_private = kfc; | ||
| 73 | fc->ops = &sysfs_fs_context_ops; | ||
| 74 | if (fc->user_ns) | ||
| 75 | put_user_ns(fc->user_ns); | ||
| 76 | fc->user_ns = get_user_ns(netns->user_ns); | ||
| 77 | fc->global = true; | ||
| 78 | return 0; | ||
| 44 | } | 79 | } |
| 45 | 80 | ||
| 46 | static void sysfs_kill_sb(struct super_block *sb) | 81 | static void sysfs_kill_sb(struct super_block *sb) |
| @@ -52,10 +87,10 @@ static void sysfs_kill_sb(struct super_block *sb) | |||
| 52 | } | 87 | } |
| 53 | 88 | ||
| 54 | static struct file_system_type sysfs_fs_type = { | 89 | static struct file_system_type sysfs_fs_type = { |
| 55 | .name = "sysfs", | 90 | .name = "sysfs", |
| 56 | .mount = sysfs_mount, | 91 | .init_fs_context = sysfs_init_fs_context, |
| 57 | .kill_sb = sysfs_kill_sb, | 92 | .kill_sb = sysfs_kill_sb, |
| 58 | .fs_flags = FS_USERNS_MOUNT, | 93 | .fs_flags = FS_USERNS_MOUNT, |
| 59 | }; | 94 | }; |
| 60 | 95 | ||
| 61 | int __init sysfs_init(void) | 96 | int __init sysfs_init(void) |
diff --git a/include/linux/errno.h b/include/linux/errno.h index 3cba627577d6..d73f597a2484 100644 --- a/include/linux/errno.h +++ b/include/linux/errno.h | |||
| @@ -18,6 +18,7 @@ | |||
| 18 | #define ERESTART_RESTARTBLOCK 516 /* restart by calling sys_restart_syscall */ | 18 | #define ERESTART_RESTARTBLOCK 516 /* restart by calling sys_restart_syscall */ |
| 19 | #define EPROBE_DEFER 517 /* Driver requests probe retry */ | 19 | #define EPROBE_DEFER 517 /* Driver requests probe retry */ |
| 20 | #define EOPENSTALE 518 /* open found a stale dentry */ | 20 | #define EOPENSTALE 518 /* open found a stale dentry */ |
| 21 | #define ENOPARAM 519 /* Parameter not supported */ | ||
| 21 | 22 | ||
| 22 | /* Defined for the NFSv3 protocol */ | 23 | /* Defined for the NFSv3 protocol */ |
| 23 | #define EBADHANDLE 521 /* Illegal NFS file handle */ | 24 | #define EBADHANDLE 521 /* Illegal NFS file handle */ |
diff --git a/include/linux/fs.h b/include/linux/fs.h index 80c6a4093b46..8b42df09b04c 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h | |||
| @@ -64,6 +64,8 @@ struct workqueue_struct; | |||
| 64 | struct iov_iter; | 64 | struct iov_iter; |
| 65 | struct fscrypt_info; | 65 | struct fscrypt_info; |
| 66 | struct fscrypt_operations; | 66 | struct fscrypt_operations; |
| 67 | struct fs_context; | ||
| 68 | struct fs_parameter_description; | ||
| 67 | 69 | ||
| 68 | extern void __init inode_init(void); | 70 | extern void __init inode_init(void); |
| 69 | extern void __init inode_init_early(void); | 71 | extern void __init inode_init_early(void); |
| @@ -1349,6 +1351,7 @@ extern int send_sigurg(struct fown_struct *fown); | |||
| 1349 | 1351 | ||
| 1350 | /* These sb flags are internal to the kernel */ | 1352 | /* These sb flags are internal to the kernel */ |
| 1351 | #define SB_SUBMOUNT (1<<26) | 1353 | #define SB_SUBMOUNT (1<<26) |
| 1354 | #define SB_FORCE (1<<27) | ||
| 1352 | #define SB_NOSEC (1<<28) | 1355 | #define SB_NOSEC (1<<28) |
| 1353 | #define SB_BORN (1<<29) | 1356 | #define SB_BORN (1<<29) |
| 1354 | #define SB_ACTIVE (1<<30) | 1357 | #define SB_ACTIVE (1<<30) |
| @@ -1459,7 +1462,7 @@ struct super_block { | |||
| 1459 | * Filesystem subtype. If non-empty the filesystem type field | 1462 | * Filesystem subtype. If non-empty the filesystem type field |
| 1460 | * in /proc/mounts will be "type.subtype" | 1463 | * in /proc/mounts will be "type.subtype" |
| 1461 | */ | 1464 | */ |
| 1462 | char *s_subtype; | 1465 | const char *s_subtype; |
| 1463 | 1466 | ||
| 1464 | const struct dentry_operations *s_d_op; /* default d_op for dentries */ | 1467 | const struct dentry_operations *s_d_op; /* default d_op for dentries */ |
| 1465 | 1468 | ||
| @@ -2170,6 +2173,8 @@ struct file_system_type { | |||
| 2170 | #define FS_HAS_SUBTYPE 4 | 2173 | #define FS_HAS_SUBTYPE 4 |
| 2171 | #define FS_USERNS_MOUNT 8 /* Can be mounted by userns root */ | 2174 | #define FS_USERNS_MOUNT 8 /* Can be mounted by userns root */ |
| 2172 | #define FS_RENAME_DOES_D_MOVE 32768 /* FS will handle d_move() during rename() internally. */ | 2175 | #define FS_RENAME_DOES_D_MOVE 32768 /* FS will handle d_move() during rename() internally. */ |
| 2176 | int (*init_fs_context)(struct fs_context *); | ||
| 2177 | const struct fs_parameter_description *parameters; | ||
| 2173 | struct dentry *(*mount) (struct file_system_type *, int, | 2178 | struct dentry *(*mount) (struct file_system_type *, int, |
| 2174 | const char *, void *); | 2179 | const char *, void *); |
| 2175 | void (*kill_sb) (struct super_block *); | 2180 | void (*kill_sb) (struct super_block *); |
| @@ -2225,8 +2230,12 @@ void kill_litter_super(struct super_block *sb); | |||
| 2225 | void deactivate_super(struct super_block *sb); | 2230 | void deactivate_super(struct super_block *sb); |
| 2226 | void deactivate_locked_super(struct super_block *sb); | 2231 | void deactivate_locked_super(struct super_block *sb); |
| 2227 | int set_anon_super(struct super_block *s, void *data); | 2232 | int set_anon_super(struct super_block *s, void *data); |
| 2233 | int set_anon_super_fc(struct super_block *s, struct fs_context *fc); | ||
| 2228 | int get_anon_bdev(dev_t *); | 2234 | int get_anon_bdev(dev_t *); |
| 2229 | void free_anon_bdev(dev_t); | 2235 | void free_anon_bdev(dev_t); |
| 2236 | struct super_block *sget_fc(struct fs_context *fc, | ||
| 2237 | int (*test)(struct super_block *, struct fs_context *), | ||
| 2238 | int (*set)(struct super_block *, struct fs_context *)); | ||
| 2230 | struct super_block *sget_userns(struct file_system_type *type, | 2239 | struct super_block *sget_userns(struct file_system_type *type, |
| 2231 | int (*test)(struct super_block *,void *), | 2240 | int (*test)(struct super_block *,void *), |
| 2232 | int (*set)(struct super_block *,void *), | 2241 | int (*set)(struct super_block *,void *), |
| @@ -2269,8 +2278,7 @@ mount_pseudo(struct file_system_type *fs_type, char *name, | |||
| 2269 | 2278 | ||
| 2270 | extern int register_filesystem(struct file_system_type *); | 2279 | extern int register_filesystem(struct file_system_type *); |
| 2271 | extern int unregister_filesystem(struct file_system_type *); | 2280 | extern int unregister_filesystem(struct file_system_type *); |
| 2272 | extern struct vfsmount *kern_mount_data(struct file_system_type *, void *data); | 2281 | extern struct vfsmount *kern_mount(struct file_system_type *); |
| 2273 | #define kern_mount(type) kern_mount_data(type, NULL) | ||
| 2274 | extern void kern_unmount(struct vfsmount *mnt); | 2282 | extern void kern_unmount(struct vfsmount *mnt); |
| 2275 | extern int may_umount_tree(struct vfsmount *); | 2283 | extern int may_umount_tree(struct vfsmount *); |
| 2276 | extern int may_umount(struct vfsmount *); | 2284 | extern int may_umount(struct vfsmount *); |
diff --git a/include/linux/fs_context.h b/include/linux/fs_context.h new file mode 100644 index 000000000000..eaca452088fa --- /dev/null +++ b/include/linux/fs_context.h | |||
| @@ -0,0 +1,188 @@ | |||
| 1 | /* Filesystem superblock creation and reconfiguration context. | ||
| 2 | * | ||
| 3 | * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved. | ||
| 4 | * Written by David Howells (dhowells@redhat.com) | ||
| 5 | * | ||
| 6 | * This program is free software; you can redistribute it and/or | ||
| 7 | * modify it under the terms of the GNU General Public Licence | ||
| 8 | * as published by the Free Software Foundation; either version | ||
| 9 | * 2 of the Licence, or (at your option) any later version. | ||
| 10 | */ | ||
| 11 | |||
| 12 | #ifndef _LINUX_FS_CONTEXT_H | ||
| 13 | #define _LINUX_FS_CONTEXT_H | ||
| 14 | |||
| 15 | #include <linux/kernel.h> | ||
| 16 | #include <linux/errno.h> | ||
| 17 | #include <linux/security.h> | ||
| 18 | |||
| 19 | struct cred; | ||
| 20 | struct dentry; | ||
| 21 | struct file_operations; | ||
| 22 | struct file_system_type; | ||
| 23 | struct mnt_namespace; | ||
| 24 | struct net; | ||
| 25 | struct pid_namespace; | ||
| 26 | struct super_block; | ||
| 27 | struct user_namespace; | ||
| 28 | struct vfsmount; | ||
| 29 | struct path; | ||
| 30 | |||
| 31 | enum fs_context_purpose { | ||
| 32 | FS_CONTEXT_FOR_MOUNT, /* New superblock for explicit mount */ | ||
| 33 | FS_CONTEXT_FOR_SUBMOUNT, /* New superblock for automatic submount */ | ||
| 34 | FS_CONTEXT_FOR_RECONFIGURE, /* Superblock reconfiguration (remount) */ | ||
| 35 | }; | ||
| 36 | |||
| 37 | /* | ||
| 38 | * Type of parameter value. | ||
| 39 | */ | ||
| 40 | enum fs_value_type { | ||
| 41 | fs_value_is_undefined, | ||
| 42 | fs_value_is_flag, /* Value not given a value */ | ||
| 43 | fs_value_is_string, /* Value is a string */ | ||
| 44 | fs_value_is_blob, /* Value is a binary blob */ | ||
| 45 | fs_value_is_filename, /* Value is a filename* + dirfd */ | ||
| 46 | fs_value_is_filename_empty, /* Value is a filename* + dirfd + AT_EMPTY_PATH */ | ||
| 47 | fs_value_is_file, /* Value is a file* */ | ||
| 48 | }; | ||
| 49 | |||
| 50 | /* | ||
| 51 | * Configuration parameter. | ||
| 52 | */ | ||
| 53 | struct fs_parameter { | ||
| 54 | const char *key; /* Parameter name */ | ||
| 55 | enum fs_value_type type:8; /* The type of value here */ | ||
| 56 | union { | ||
| 57 | char *string; | ||
| 58 | void *blob; | ||
| 59 | struct filename *name; | ||
| 60 | struct file *file; | ||
| 61 | }; | ||
| 62 | size_t size; | ||
| 63 | int dirfd; | ||
| 64 | }; | ||
| 65 | |||
| 66 | /* | ||
| 67 | * Filesystem context for holding the parameters used in the creation or | ||
| 68 | * reconfiguration of a superblock. | ||
| 69 | * | ||
| 70 | * Superblock creation fills in ->root whereas reconfiguration begins with this | ||
| 71 | * already set. | ||
| 72 | * | ||
| 73 | * See Documentation/filesystems/mounting.txt | ||
| 74 | */ | ||
| 75 | struct fs_context { | ||
| 76 | const struct fs_context_operations *ops; | ||
| 77 | struct file_system_type *fs_type; | ||
| 78 | void *fs_private; /* The filesystem's context */ | ||
| 79 | struct dentry *root; /* The root and superblock */ | ||
| 80 | struct user_namespace *user_ns; /* The user namespace for this mount */ | ||
| 81 | struct net *net_ns; /* The network namespace for this mount */ | ||
| 82 | const struct cred *cred; /* The mounter's credentials */ | ||
| 83 | const char *source; /* The source name (eg. dev path) */ | ||
| 84 | const char *subtype; /* The subtype to set on the superblock */ | ||
| 85 | void *security; /* Linux S&M options */ | ||
| 86 | void *s_fs_info; /* Proposed s_fs_info */ | ||
| 87 | unsigned int sb_flags; /* Proposed superblock flags (SB_*) */ | ||
| 88 | unsigned int sb_flags_mask; /* Superblock flags that were changed */ | ||
| 89 | unsigned int lsm_flags; /* Information flags from the fs to the LSM */ | ||
| 90 | enum fs_context_purpose purpose:8; | ||
| 91 | bool need_free:1; /* Need to call ops->free() */ | ||
| 92 | bool global:1; /* Goes into &init_user_ns */ | ||
| 93 | }; | ||
| 94 | |||
| 95 | struct fs_context_operations { | ||
| 96 | void (*free)(struct fs_context *fc); | ||
| 97 | int (*dup)(struct fs_context *fc, struct fs_context *src_fc); | ||
| 98 | int (*parse_param)(struct fs_context *fc, struct fs_parameter *param); | ||
| 99 | int (*parse_monolithic)(struct fs_context *fc, void *data); | ||
| 100 | int (*get_tree)(struct fs_context *fc); | ||
| 101 | int (*reconfigure)(struct fs_context *fc); | ||
| 102 | }; | ||
| 103 | |||
| 104 | /* | ||
| 105 | * fs_context manipulation functions. | ||
| 106 | */ | ||
| 107 | extern struct fs_context *fs_context_for_mount(struct file_system_type *fs_type, | ||
| 108 | unsigned int sb_flags); | ||
| 109 | extern struct fs_context *fs_context_for_reconfigure(struct dentry *dentry, | ||
| 110 | unsigned int sb_flags, | ||
| 111 | unsigned int sb_flags_mask); | ||
| 112 | extern struct fs_context *fs_context_for_submount(struct file_system_type *fs_type, | ||
| 113 | struct dentry *reference); | ||
| 114 | |||
| 115 | extern struct fs_context *vfs_dup_fs_context(struct fs_context *fc); | ||
| 116 | extern int vfs_parse_fs_param(struct fs_context *fc, struct fs_parameter *param); | ||
| 117 | extern int vfs_parse_fs_string(struct fs_context *fc, const char *key, | ||
| 118 | const char *value, size_t v_size); | ||
| 119 | extern int generic_parse_monolithic(struct fs_context *fc, void *data); | ||
| 120 | extern int vfs_get_tree(struct fs_context *fc); | ||
| 121 | extern void put_fs_context(struct fs_context *fc); | ||
| 122 | |||
| 123 | /* | ||
| 124 | * sget() wrapper to be called from the ->get_tree() op. | ||
| 125 | */ | ||
| 126 | enum vfs_get_super_keying { | ||
| 127 | vfs_get_single_super, /* Only one such superblock may exist */ | ||
| 128 | vfs_get_keyed_super, /* Superblocks with different s_fs_info keys may exist */ | ||
| 129 | vfs_get_independent_super, /* Multiple independent superblocks may exist */ | ||
| 130 | }; | ||
| 131 | extern int vfs_get_super(struct fs_context *fc, | ||
| 132 | enum vfs_get_super_keying keying, | ||
| 133 | int (*fill_super)(struct super_block *sb, | ||
| 134 | struct fs_context *fc)); | ||
| 135 | |||
| 136 | extern const struct file_operations fscontext_fops; | ||
| 137 | |||
| 138 | #ifdef CONFIG_PRINTK | ||
| 139 | extern __attribute__((format(printf, 2, 3))) | ||
| 140 | void logfc(struct fs_context *fc, const char *fmt, ...); | ||
| 141 | #else | ||
| 142 | static inline __attribute__((format(printf, 2, 3))) | ||
| 143 | void logfc(struct fs_context *fc, const char *fmt, ...) | ||
| 144 | { | ||
| 145 | } | ||
| 146 | #endif | ||
| 147 | |||
| 148 | /** | ||
| 149 | * infof - Store supplementary informational message | ||
| 150 | * @fc: The context in which to log the informational message | ||
| 151 | * @fmt: The format string | ||
| 152 | * | ||
| 153 | * Store the supplementary informational message for the process if the process | ||
| 154 | * has enabled the facility. | ||
| 155 | */ | ||
| 156 | #define infof(fc, fmt, ...) ({ logfc(fc, "i "fmt, ## __VA_ARGS__); }) | ||
| 157 | |||
| 158 | /** | ||
| 159 | * warnf - Store supplementary warning message | ||
| 160 | * @fc: The context in which to log the error message | ||
| 161 | * @fmt: The format string | ||
| 162 | * | ||
| 163 | * Store the supplementary warning message for the process if the process has | ||
| 164 | * enabled the facility. | ||
| 165 | */ | ||
| 166 | #define warnf(fc, fmt, ...) ({ logfc(fc, "w "fmt, ## __VA_ARGS__); }) | ||
| 167 | |||
| 168 | /** | ||
| 169 | * errorf - Store supplementary error message | ||
| 170 | * @fc: The context in which to log the error message | ||
| 171 | * @fmt: The format string | ||
| 172 | * | ||
| 173 | * Store the supplementary error message for the process if the process has | ||
| 174 | * enabled the facility. | ||
| 175 | */ | ||
| 176 | #define errorf(fc, fmt, ...) ({ logfc(fc, "e "fmt, ## __VA_ARGS__); }) | ||
| 177 | |||
| 178 | /** | ||
| 179 | * invalf - Store supplementary invalid argument error message | ||
| 180 | * @fc: The context in which to log the error message | ||
| 181 | * @fmt: The format string | ||
| 182 | * | ||
| 183 | * Store the supplementary error message for the process if the process has | ||
| 184 | * enabled the facility and return -EINVAL. | ||
| 185 | */ | ||
| 186 | #define invalf(fc, fmt, ...) ({ errorf(fc, fmt, ## __VA_ARGS__); -EINVAL; }) | ||
| 187 | |||
| 188 | #endif /* _LINUX_FS_CONTEXT_H */ | ||
diff --git a/include/linux/fs_parser.h b/include/linux/fs_parser.h new file mode 100644 index 000000000000..d966f96ffe62 --- /dev/null +++ b/include/linux/fs_parser.h | |||
| @@ -0,0 +1,151 @@ | |||
| 1 | /* Filesystem parameter description and parser | ||
| 2 | * | ||
| 3 | * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved. | ||
| 4 | * Written by David Howells (dhowells@redhat.com) | ||
| 5 | * | ||
| 6 | * This program is free software; you can redistribute it and/or | ||
| 7 | * modify it under the terms of the GNU General Public Licence | ||
| 8 | * as published by the Free Software Foundation; either version | ||
| 9 | * 2 of the Licence, or (at your option) any later version. | ||
| 10 | */ | ||
| 11 | |||
| 12 | #ifndef _LINUX_FS_PARSER_H | ||
| 13 | #define _LINUX_FS_PARSER_H | ||
| 14 | |||
| 15 | #include <linux/fs_context.h> | ||
| 16 | |||
| 17 | struct path; | ||
| 18 | |||
| 19 | struct constant_table { | ||
| 20 | const char *name; | ||
| 21 | int value; | ||
| 22 | }; | ||
| 23 | |||
| 24 | /* | ||
| 25 | * The type of parameter expected. | ||
| 26 | */ | ||
| 27 | enum fs_parameter_type { | ||
| 28 | __fs_param_wasnt_defined, | ||
| 29 | fs_param_is_flag, | ||
| 30 | fs_param_is_bool, | ||
| 31 | fs_param_is_u32, | ||
| 32 | fs_param_is_u32_octal, | ||
| 33 | fs_param_is_u32_hex, | ||
| 34 | fs_param_is_s32, | ||
| 35 | fs_param_is_u64, | ||
| 36 | fs_param_is_enum, | ||
| 37 | fs_param_is_string, | ||
| 38 | fs_param_is_blob, | ||
| 39 | fs_param_is_blockdev, | ||
| 40 | fs_param_is_path, | ||
| 41 | fs_param_is_fd, | ||
| 42 | nr__fs_parameter_type, | ||
| 43 | }; | ||
| 44 | |||
| 45 | /* | ||
| 46 | * Specification of the type of value a parameter wants. | ||
| 47 | * | ||
| 48 | * Note that the fsparam_flag(), fsparam_string(), fsparam_u32(), ... macros | ||
| 49 | * should be used to generate elements of this type. | ||
| 50 | */ | ||
| 51 | struct fs_parameter_spec { | ||
| 52 | const char *name; | ||
| 53 | u8 opt; /* Option number (returned by fs_parse()) */ | ||
| 54 | enum fs_parameter_type type:8; /* The desired parameter type */ | ||
| 55 | unsigned short flags; | ||
| 56 | #define fs_param_v_optional 0x0001 /* The value is optional */ | ||
| 57 | #define fs_param_neg_with_no 0x0002 /* "noxxx" is negative param */ | ||
| 58 | #define fs_param_neg_with_empty 0x0004 /* "xxx=" is negative param */ | ||
| 59 | #define fs_param_deprecated 0x0008 /* The param is deprecated */ | ||
| 60 | }; | ||
| 61 | |||
| 62 | struct fs_parameter_enum { | ||
| 63 | u8 opt; /* Option number (as fs_parameter_spec::opt) */ | ||
| 64 | char name[14]; | ||
| 65 | u8 value; | ||
| 66 | }; | ||
| 67 | |||
| 68 | struct fs_parameter_description { | ||
| 69 | const char name[16]; /* Name for logging purposes */ | ||
| 70 | const struct fs_parameter_spec *specs; /* List of param specifications */ | ||
| 71 | const struct fs_parameter_enum *enums; /* Enum values */ | ||
| 72 | }; | ||
| 73 | |||
| 74 | /* | ||
| 75 | * Result of parse. | ||
| 76 | */ | ||
| 77 | struct fs_parse_result { | ||
| 78 | bool negated; /* T if param was "noxxx" */ | ||
| 79 | bool has_value; /* T if value supplied to param */ | ||
| 80 | union { | ||
| 81 | bool boolean; /* For spec_bool */ | ||
| 82 | int int_32; /* For spec_s32/spec_enum */ | ||
| 83 | unsigned int uint_32; /* For spec_u32{,_octal,_hex}/spec_enum */ | ||
| 84 | u64 uint_64; /* For spec_u64 */ | ||
| 85 | }; | ||
| 86 | }; | ||
| 87 | |||
| 88 | extern int fs_parse(struct fs_context *fc, | ||
| 89 | const struct fs_parameter_description *desc, | ||
| 90 | struct fs_parameter *value, | ||
| 91 | struct fs_parse_result *result); | ||
| 92 | extern int fs_lookup_param(struct fs_context *fc, | ||
| 93 | struct fs_parameter *param, | ||
| 94 | bool want_bdev, | ||
| 95 | struct path *_path); | ||
| 96 | |||
| 97 | extern int __lookup_constant(const struct constant_table tbl[], size_t tbl_size, | ||
| 98 | const char *name, int not_found); | ||
| 99 | #define lookup_constant(t, n, nf) __lookup_constant(t, ARRAY_SIZE(t), (n), (nf)) | ||
| 100 | |||
| 101 | #ifdef CONFIG_VALIDATE_FS_PARSER | ||
| 102 | extern bool validate_constant_table(const struct constant_table *tbl, size_t tbl_size, | ||
| 103 | int low, int high, int special); | ||
| 104 | extern bool fs_validate_description(const struct fs_parameter_description *desc); | ||
| 105 | #else | ||
| 106 | static inline bool validate_constant_table(const struct constant_table *tbl, size_t tbl_size, | ||
| 107 | int low, int high, int special) | ||
| 108 | { return true; } | ||
| 109 | static inline bool fs_validate_description(const struct fs_parameter_description *desc) | ||
| 110 | { return true; } | ||
| 111 | #endif | ||
| 112 | |||
| 113 | /* | ||
| 114 | * Parameter type, name, index and flags element constructors. Use as: | ||
| 115 | * | ||
| 116 | * fsparam_xxxx("foo", Opt_foo) | ||
| 117 | * | ||
| 118 | * If existing helpers are not enough, direct use of __fsparam() would | ||
| 119 | * work, but any such case is probably a sign that new helper is needed. | ||
| 120 | * Helpers will remain stable; low-level implementation may change. | ||
| 121 | */ | ||
| 122 | #define __fsparam(TYPE, NAME, OPT, FLAGS) \ | ||
| 123 | { \ | ||
| 124 | .name = NAME, \ | ||
| 125 | .opt = OPT, \ | ||
| 126 | .type = TYPE, \ | ||
| 127 | .flags = FLAGS \ | ||
| 128 | } | ||
| 129 | |||
| 130 | #define fsparam_flag(NAME, OPT) __fsparam(fs_param_is_flag, NAME, OPT, 0) | ||
| 131 | #define fsparam_flag_no(NAME, OPT) \ | ||
| 132 | __fsparam(fs_param_is_flag, NAME, OPT, \ | ||
| 133 | fs_param_neg_with_no) | ||
| 134 | #define fsparam_bool(NAME, OPT) __fsparam(fs_param_is_bool, NAME, OPT, 0) | ||
| 135 | #define fsparam_u32(NAME, OPT) __fsparam(fs_param_is_u32, NAME, OPT, 0) | ||
| 136 | #define fsparam_u32oct(NAME, OPT) \ | ||
| 137 | __fsparam(fs_param_is_u32_octal, NAME, OPT, 0) | ||
| 138 | #define fsparam_u32hex(NAME, OPT) \ | ||
| 139 | __fsparam(fs_param_is_u32_hex, NAME, OPT, 0) | ||
| 140 | #define fsparam_s32(NAME, OPT) __fsparam(fs_param_is_s32, NAME, OPT, 0) | ||
| 141 | #define fsparam_u64(NAME, OPT) __fsparam(fs_param_is_u64, NAME, OPT, 0) | ||
| 142 | #define fsparam_enum(NAME, OPT) __fsparam(fs_param_is_enum, NAME, OPT, 0) | ||
| 143 | #define fsparam_string(NAME, OPT) \ | ||
| 144 | __fsparam(fs_param_is_string, NAME, OPT, 0) | ||
| 145 | #define fsparam_blob(NAME, OPT) __fsparam(fs_param_is_blob, NAME, OPT, 0) | ||
| 146 | #define fsparam_bdev(NAME, OPT) __fsparam(fs_param_is_blockdev, NAME, OPT, 0) | ||
| 147 | #define fsparam_path(NAME, OPT) __fsparam(fs_param_is_path, NAME, OPT, 0) | ||
| 148 | #define fsparam_fd(NAME, OPT) __fsparam(fs_param_is_fd, NAME, OPT, 0) | ||
| 149 | |||
| 150 | |||
| 151 | #endif /* _LINUX_FS_PARSER_H */ | ||
diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h index 0cac1207bb00..c8893f663470 100644 --- a/include/linux/kernfs.h +++ b/include/linux/kernfs.h | |||
| @@ -26,7 +26,9 @@ struct vm_area_struct; | |||
| 26 | struct super_block; | 26 | struct super_block; |
| 27 | struct file_system_type; | 27 | struct file_system_type; |
| 28 | struct poll_table_struct; | 28 | struct poll_table_struct; |
| 29 | struct fs_context; | ||
| 29 | 30 | ||
| 31 | struct kernfs_fs_context; | ||
| 30 | struct kernfs_open_node; | 32 | struct kernfs_open_node; |
| 31 | struct kernfs_iattrs; | 33 | struct kernfs_iattrs; |
| 32 | 34 | ||
| @@ -168,7 +170,6 @@ struct kernfs_node { | |||
| 168 | * kernfs_node parameter. | 170 | * kernfs_node parameter. |
| 169 | */ | 171 | */ |
| 170 | struct kernfs_syscall_ops { | 172 | struct kernfs_syscall_ops { |
| 171 | int (*remount_fs)(struct kernfs_root *root, int *flags, char *data); | ||
| 172 | int (*show_options)(struct seq_file *sf, struct kernfs_root *root); | 173 | int (*show_options)(struct seq_file *sf, struct kernfs_root *root); |
| 173 | 174 | ||
| 174 | int (*mkdir)(struct kernfs_node *parent, const char *name, | 175 | int (*mkdir)(struct kernfs_node *parent, const char *name, |
| @@ -272,6 +273,18 @@ struct kernfs_ops { | |||
| 272 | #endif | 273 | #endif |
| 273 | }; | 274 | }; |
| 274 | 275 | ||
| 276 | /* | ||
| 277 | * The kernfs superblock creation/mount parameter context. | ||
| 278 | */ | ||
| 279 | struct kernfs_fs_context { | ||
| 280 | struct kernfs_root *root; /* Root of the hierarchy being mounted */ | ||
| 281 | void *ns_tag; /* Namespace tag of the mount (or NULL) */ | ||
| 282 | unsigned long magic; /* File system specific magic number */ | ||
| 283 | |||
| 284 | /* The following are set/used by kernfs_mount() */ | ||
| 285 | bool new_sb_created; /* Set to T if we allocated a new sb */ | ||
| 286 | }; | ||
| 287 | |||
| 275 | #ifdef CONFIG_KERNFS | 288 | #ifdef CONFIG_KERNFS |
| 276 | 289 | ||
| 277 | static inline enum kernfs_node_type kernfs_type(struct kernfs_node *kn) | 290 | static inline enum kernfs_node_type kernfs_type(struct kernfs_node *kn) |
| @@ -359,11 +372,9 @@ __poll_t kernfs_generic_poll(struct kernfs_open_file *of, | |||
| 359 | void kernfs_notify(struct kernfs_node *kn); | 372 | void kernfs_notify(struct kernfs_node *kn); |
| 360 | 373 | ||
| 361 | const void *kernfs_super_ns(struct super_block *sb); | 374 | const void *kernfs_super_ns(struct super_block *sb); |
| 362 | struct dentry *kernfs_mount_ns(struct file_system_type *fs_type, int flags, | 375 | int kernfs_get_tree(struct fs_context *fc); |
| 363 | struct kernfs_root *root, unsigned long magic, | 376 | void kernfs_free_fs_context(struct fs_context *fc); |
| 364 | bool *new_sb_created, const void *ns); | ||
| 365 | void kernfs_kill_sb(struct super_block *sb); | 377 | void kernfs_kill_sb(struct super_block *sb); |
| 366 | struct super_block *kernfs_pin_sb(struct kernfs_root *root, const void *ns); | ||
| 367 | 378 | ||
| 368 | void kernfs_init(void); | 379 | void kernfs_init(void); |
| 369 | 380 | ||
| @@ -465,11 +476,10 @@ static inline void kernfs_notify(struct kernfs_node *kn) { } | |||
| 465 | static inline const void *kernfs_super_ns(struct super_block *sb) | 476 | static inline const void *kernfs_super_ns(struct super_block *sb) |
| 466 | { return NULL; } | 477 | { return NULL; } |
| 467 | 478 | ||
| 468 | static inline struct dentry * | 479 | static inline int kernfs_get_tree(struct fs_context *fc) |
| 469 | kernfs_mount_ns(struct file_system_type *fs_type, int flags, | 480 | { return -ENOSYS; } |
| 470 | struct kernfs_root *root, unsigned long magic, | 481 | |
| 471 | bool *new_sb_created, const void *ns) | 482 | static inline void kernfs_free_fs_context(struct fs_context *fc) { } |
| 472 | { return ERR_PTR(-ENOSYS); } | ||
| 473 | 483 | ||
| 474 | static inline void kernfs_kill_sb(struct super_block *sb) { } | 484 | static inline void kernfs_kill_sb(struct super_block *sb) { } |
| 475 | 485 | ||
| @@ -552,13 +562,4 @@ static inline int kernfs_rename(struct kernfs_node *kn, | |||
| 552 | return kernfs_rename_ns(kn, new_parent, new_name, NULL); | 562 | return kernfs_rename_ns(kn, new_parent, new_name, NULL); |
| 553 | } | 563 | } |
| 554 | 564 | ||
| 555 | static inline struct dentry * | ||
| 556 | kernfs_mount(struct file_system_type *fs_type, int flags, | ||
| 557 | struct kernfs_root *root, unsigned long magic, | ||
| 558 | bool *new_sb_created) | ||
| 559 | { | ||
| 560 | return kernfs_mount_ns(fs_type, flags, root, | ||
| 561 | magic, new_sb_created, NULL); | ||
| 562 | } | ||
| 563 | |||
| 564 | #endif /* __LINUX_KERNFS_H */ | 565 | #endif /* __LINUX_KERNFS_H */ |
diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index 85a301632cf1..a9b8ff578b6b 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h | |||
| @@ -76,6 +76,22 @@ | |||
| 76 | * changes on the process such as clearing out non-inheritable signal | 76 | * changes on the process such as clearing out non-inheritable signal |
| 77 | * state. This is called immediately after commit_creds(). | 77 | * state. This is called immediately after commit_creds(). |
| 78 | * | 78 | * |
| 79 | * Security hooks for mount using fs_context. | ||
| 80 | * [See also Documentation/filesystems/mounting.txt] | ||
| 81 | * | ||
| 82 | * @fs_context_dup: | ||
| 83 | * Allocate and attach a security structure to sc->security. This pointer | ||
| 84 | * is initialised to NULL by the caller. | ||
| 85 | * @fc indicates the new filesystem context. | ||
| 86 | * @src_fc indicates the original filesystem context. | ||
| 87 | * @fs_context_parse_param: | ||
| 88 | * Userspace provided a parameter to configure a superblock. The LSM may | ||
| 89 | * reject it with an error and may use it for itself, in which case it | ||
| 90 | * should return 0; otherwise it should return -ENOPARAM to pass it on to | ||
| 91 | * the filesystem. | ||
| 92 | * @fc indicates the filesystem context. | ||
| 93 | * @param The parameter | ||
| 94 | * | ||
| 79 | * Security hooks for filesystem operations. | 95 | * Security hooks for filesystem operations. |
| 80 | * | 96 | * |
| 81 | * @sb_alloc_security: | 97 | * @sb_alloc_security: |
| @@ -1460,6 +1476,9 @@ union security_list_options { | |||
| 1460 | void (*bprm_committing_creds)(struct linux_binprm *bprm); | 1476 | void (*bprm_committing_creds)(struct linux_binprm *bprm); |
| 1461 | void (*bprm_committed_creds)(struct linux_binprm *bprm); | 1477 | void (*bprm_committed_creds)(struct linux_binprm *bprm); |
| 1462 | 1478 | ||
| 1479 | int (*fs_context_dup)(struct fs_context *fc, struct fs_context *src_sc); | ||
| 1480 | int (*fs_context_parse_param)(struct fs_context *fc, struct fs_parameter *param); | ||
| 1481 | |||
| 1463 | int (*sb_alloc_security)(struct super_block *sb); | 1482 | int (*sb_alloc_security)(struct super_block *sb); |
| 1464 | void (*sb_free_security)(struct super_block *sb); | 1483 | void (*sb_free_security)(struct super_block *sb); |
| 1465 | void (*sb_free_mnt_opts)(void *mnt_opts); | 1484 | void (*sb_free_mnt_opts)(void *mnt_opts); |
| @@ -1800,6 +1819,8 @@ struct security_hook_heads { | |||
| 1800 | struct hlist_head bprm_check_security; | 1819 | struct hlist_head bprm_check_security; |
| 1801 | struct hlist_head bprm_committing_creds; | 1820 | struct hlist_head bprm_committing_creds; |
| 1802 | struct hlist_head bprm_committed_creds; | 1821 | struct hlist_head bprm_committed_creds; |
| 1822 | struct hlist_head fs_context_dup; | ||
| 1823 | struct hlist_head fs_context_parse_param; | ||
| 1803 | struct hlist_head sb_alloc_security; | 1824 | struct hlist_head sb_alloc_security; |
| 1804 | struct hlist_head sb_free_security; | 1825 | struct hlist_head sb_free_security; |
| 1805 | struct hlist_head sb_free_mnt_opts; | 1826 | struct hlist_head sb_free_mnt_opts; |
diff --git a/include/linux/mount.h b/include/linux/mount.h index 037eed52164b..9197ddbf35fb 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h | |||
| @@ -21,6 +21,7 @@ struct super_block; | |||
| 21 | struct vfsmount; | 21 | struct vfsmount; |
| 22 | struct dentry; | 22 | struct dentry; |
| 23 | struct mnt_namespace; | 23 | struct mnt_namespace; |
| 24 | struct fs_context; | ||
| 24 | 25 | ||
| 25 | #define MNT_NOSUID 0x01 | 26 | #define MNT_NOSUID 0x01 |
| 26 | #define MNT_NODEV 0x02 | 27 | #define MNT_NODEV 0x02 |
| @@ -88,6 +89,8 @@ struct path; | |||
| 88 | extern struct vfsmount *clone_private_mount(const struct path *path); | 89 | extern struct vfsmount *clone_private_mount(const struct path *path); |
| 89 | 90 | ||
| 90 | struct file_system_type; | 91 | struct file_system_type; |
| 92 | extern struct vfsmount *fc_mount(struct fs_context *fc); | ||
| 93 | extern struct vfsmount *vfs_create_mount(struct fs_context *fc); | ||
| 91 | extern struct vfsmount *vfs_kern_mount(struct file_system_type *type, | 94 | extern struct vfsmount *vfs_kern_mount(struct file_system_type *type, |
| 92 | int flags, const char *name, | 95 | int flags, const char *name, |
| 93 | void *data); | 96 | void *data); |
diff --git a/include/linux/security.h b/include/linux/security.h index 2b35a43d11d6..49f2685324b0 100644 --- a/include/linux/security.h +++ b/include/linux/security.h | |||
| @@ -53,6 +53,9 @@ struct msg_msg; | |||
| 53 | struct xattr; | 53 | struct xattr; |
| 54 | struct xfrm_sec_ctx; | 54 | struct xfrm_sec_ctx; |
| 55 | struct mm_struct; | 55 | struct mm_struct; |
| 56 | struct fs_context; | ||
| 57 | struct fs_parameter; | ||
| 58 | enum fs_value_type; | ||
| 56 | 59 | ||
| 57 | /* Default (no) options for the capable function */ | 60 | /* Default (no) options for the capable function */ |
| 58 | #define CAP_OPT_NONE 0x0 | 61 | #define CAP_OPT_NONE 0x0 |
| @@ -61,7 +64,7 @@ struct mm_struct; | |||
| 61 | /* If capable is being called by a setid function */ | 64 | /* If capable is being called by a setid function */ |
| 62 | #define CAP_OPT_INSETID BIT(2) | 65 | #define CAP_OPT_INSETID BIT(2) |
| 63 | 66 | ||
| 64 | /* LSM Agnostic defines for sb_set_mnt_opts */ | 67 | /* LSM Agnostic defines for fs_context::lsm_flags */ |
| 65 | #define SECURITY_LSM_NATIVE_LABELS 1 | 68 | #define SECURITY_LSM_NATIVE_LABELS 1 |
| 66 | 69 | ||
| 67 | struct ctl_table; | 70 | struct ctl_table; |
| @@ -223,6 +226,8 @@ int security_bprm_set_creds(struct linux_binprm *bprm); | |||
| 223 | int security_bprm_check(struct linux_binprm *bprm); | 226 | int security_bprm_check(struct linux_binprm *bprm); |
| 224 | void security_bprm_committing_creds(struct linux_binprm *bprm); | 227 | void security_bprm_committing_creds(struct linux_binprm *bprm); |
| 225 | void security_bprm_committed_creds(struct linux_binprm *bprm); | 228 | void security_bprm_committed_creds(struct linux_binprm *bprm); |
| 229 | int security_fs_context_dup(struct fs_context *fc, struct fs_context *src_fc); | ||
| 230 | int security_fs_context_parse_param(struct fs_context *fc, struct fs_parameter *param); | ||
| 226 | int security_sb_alloc(struct super_block *sb); | 231 | int security_sb_alloc(struct super_block *sb); |
| 227 | void security_sb_free(struct super_block *sb); | 232 | void security_sb_free(struct super_block *sb); |
| 228 | void security_free_mnt_opts(void **mnt_opts); | 233 | void security_free_mnt_opts(void **mnt_opts); |
| @@ -519,6 +524,17 @@ static inline void security_bprm_committed_creds(struct linux_binprm *bprm) | |||
| 519 | { | 524 | { |
| 520 | } | 525 | } |
| 521 | 526 | ||
| 527 | static inline int security_fs_context_dup(struct fs_context *fc, | ||
| 528 | struct fs_context *src_fc) | ||
| 529 | { | ||
| 530 | return 0; | ||
| 531 | } | ||
| 532 | static inline int security_fs_context_parse_param(struct fs_context *fc, | ||
| 533 | struct fs_parameter *param) | ||
| 534 | { | ||
| 535 | return -ENOPARAM; | ||
| 536 | } | ||
| 537 | |||
| 522 | static inline int security_sb_alloc(struct super_block *sb) | 538 | static inline int security_sb_alloc(struct super_block *sb) |
| 523 | { | 539 | { |
| 524 | return 0; | 540 | return 0; |
diff --git a/ipc/mqueue.c b/ipc/mqueue.c index c839bf83231d..aea30530c472 100644 --- a/ipc/mqueue.c +++ b/ipc/mqueue.c | |||
| @@ -18,6 +18,7 @@ | |||
| 18 | #include <linux/pagemap.h> | 18 | #include <linux/pagemap.h> |
| 19 | #include <linux/file.h> | 19 | #include <linux/file.h> |
| 20 | #include <linux/mount.h> | 20 | #include <linux/mount.h> |
| 21 | #include <linux/fs_context.h> | ||
| 21 | #include <linux/namei.h> | 22 | #include <linux/namei.h> |
| 22 | #include <linux/sysctl.h> | 23 | #include <linux/sysctl.h> |
| 23 | #include <linux/poll.h> | 24 | #include <linux/poll.h> |
| @@ -42,6 +43,10 @@ | |||
| 42 | #include <net/sock.h> | 43 | #include <net/sock.h> |
| 43 | #include "util.h" | 44 | #include "util.h" |
| 44 | 45 | ||
| 46 | struct mqueue_fs_context { | ||
| 47 | struct ipc_namespace *ipc_ns; | ||
| 48 | }; | ||
| 49 | |||
| 45 | #define MQUEUE_MAGIC 0x19800202 | 50 | #define MQUEUE_MAGIC 0x19800202 |
| 46 | #define DIRENT_SIZE 20 | 51 | #define DIRENT_SIZE 20 |
| 47 | #define FILENT_SIZE 80 | 52 | #define FILENT_SIZE 80 |
| @@ -87,9 +92,11 @@ struct mqueue_inode_info { | |||
| 87 | unsigned long qsize; /* size of queue in memory (sum of all msgs) */ | 92 | unsigned long qsize; /* size of queue in memory (sum of all msgs) */ |
| 88 | }; | 93 | }; |
| 89 | 94 | ||
| 95 | static struct file_system_type mqueue_fs_type; | ||
| 90 | static const struct inode_operations mqueue_dir_inode_operations; | 96 | static const struct inode_operations mqueue_dir_inode_operations; |
| 91 | static const struct file_operations mqueue_file_operations; | 97 | static const struct file_operations mqueue_file_operations; |
| 92 | static const struct super_operations mqueue_super_ops; | 98 | static const struct super_operations mqueue_super_ops; |
| 99 | static const struct fs_context_operations mqueue_fs_context_ops; | ||
| 93 | static void remove_notification(struct mqueue_inode_info *info); | 100 | static void remove_notification(struct mqueue_inode_info *info); |
| 94 | 101 | ||
| 95 | static struct kmem_cache *mqueue_inode_cachep; | 102 | static struct kmem_cache *mqueue_inode_cachep; |
| @@ -322,7 +329,7 @@ err: | |||
| 322 | return ERR_PTR(ret); | 329 | return ERR_PTR(ret); |
| 323 | } | 330 | } |
| 324 | 331 | ||
| 325 | static int mqueue_fill_super(struct super_block *sb, void *data, int silent) | 332 | static int mqueue_fill_super(struct super_block *sb, struct fs_context *fc) |
| 326 | { | 333 | { |
| 327 | struct inode *inode; | 334 | struct inode *inode; |
| 328 | struct ipc_namespace *ns = sb->s_fs_info; | 335 | struct ipc_namespace *ns = sb->s_fs_info; |
| @@ -343,18 +350,56 @@ static int mqueue_fill_super(struct super_block *sb, void *data, int silent) | |||
| 343 | return 0; | 350 | return 0; |
| 344 | } | 351 | } |
| 345 | 352 | ||
| 346 | static struct dentry *mqueue_mount(struct file_system_type *fs_type, | 353 | static int mqueue_get_tree(struct fs_context *fc) |
| 347 | int flags, const char *dev_name, | ||
| 348 | void *data) | ||
| 349 | { | 354 | { |
| 350 | struct ipc_namespace *ns; | 355 | struct mqueue_fs_context *ctx = fc->fs_private; |
| 351 | if (flags & SB_KERNMOUNT) { | 356 | |
| 352 | ns = data; | 357 | put_user_ns(fc->user_ns); |
| 353 | data = NULL; | 358 | fc->user_ns = get_user_ns(ctx->ipc_ns->user_ns); |
| 354 | } else { | 359 | fc->s_fs_info = ctx->ipc_ns; |
| 355 | ns = current->nsproxy->ipc_ns; | 360 | return vfs_get_super(fc, vfs_get_keyed_super, mqueue_fill_super); |
| 356 | } | 361 | } |
| 357 | return mount_ns(fs_type, flags, data, ns, ns->user_ns, mqueue_fill_super); | 362 | |
| 363 | static void mqueue_fs_context_free(struct fs_context *fc) | ||
| 364 | { | ||
| 365 | struct mqueue_fs_context *ctx = fc->fs_private; | ||
| 366 | |||
| 367 | if (ctx->ipc_ns) | ||
| 368 | put_ipc_ns(ctx->ipc_ns); | ||
| 369 | kfree(ctx); | ||
| 370 | } | ||
| 371 | |||
| 372 | static int mqueue_init_fs_context(struct fs_context *fc) | ||
| 373 | { | ||
| 374 | struct mqueue_fs_context *ctx; | ||
| 375 | |||
| 376 | ctx = kzalloc(sizeof(struct mqueue_fs_context), GFP_KERNEL); | ||
| 377 | if (!ctx) | ||
| 378 | return -ENOMEM; | ||
| 379 | |||
| 380 | ctx->ipc_ns = get_ipc_ns(current->nsproxy->ipc_ns); | ||
| 381 | fc->fs_private = ctx; | ||
| 382 | fc->ops = &mqueue_fs_context_ops; | ||
| 383 | return 0; | ||
| 384 | } | ||
| 385 | |||
| 386 | static struct vfsmount *mq_create_mount(struct ipc_namespace *ns) | ||
| 387 | { | ||
| 388 | struct mqueue_fs_context *ctx; | ||
| 389 | struct fs_context *fc; | ||
| 390 | struct vfsmount *mnt; | ||
| 391 | |||
| 392 | fc = fs_context_for_mount(&mqueue_fs_type, SB_KERNMOUNT); | ||
| 393 | if (IS_ERR(fc)) | ||
| 394 | return ERR_CAST(fc); | ||
| 395 | |||
| 396 | ctx = fc->fs_private; | ||
| 397 | put_ipc_ns(ctx->ipc_ns); | ||
| 398 | ctx->ipc_ns = get_ipc_ns(ns); | ||
| 399 | |||
| 400 | mnt = fc_mount(fc); | ||
| 401 | put_fs_context(fc); | ||
| 402 | return mnt; | ||
| 358 | } | 403 | } |
| 359 | 404 | ||
| 360 | static void init_once(void *foo) | 405 | static void init_once(void *foo) |
| @@ -1522,15 +1567,22 @@ static const struct super_operations mqueue_super_ops = { | |||
| 1522 | .statfs = simple_statfs, | 1567 | .statfs = simple_statfs, |
| 1523 | }; | 1568 | }; |
| 1524 | 1569 | ||
| 1570 | static const struct fs_context_operations mqueue_fs_context_ops = { | ||
| 1571 | .free = mqueue_fs_context_free, | ||
| 1572 | .get_tree = mqueue_get_tree, | ||
| 1573 | }; | ||
| 1574 | |||
| 1525 | static struct file_system_type mqueue_fs_type = { | 1575 | static struct file_system_type mqueue_fs_type = { |
| 1526 | .name = "mqueue", | 1576 | .name = "mqueue", |
| 1527 | .mount = mqueue_mount, | 1577 | .init_fs_context = mqueue_init_fs_context, |
| 1528 | .kill_sb = kill_litter_super, | 1578 | .kill_sb = kill_litter_super, |
| 1529 | .fs_flags = FS_USERNS_MOUNT, | 1579 | .fs_flags = FS_USERNS_MOUNT, |
| 1530 | }; | 1580 | }; |
| 1531 | 1581 | ||
| 1532 | int mq_init_ns(struct ipc_namespace *ns) | 1582 | int mq_init_ns(struct ipc_namespace *ns) |
| 1533 | { | 1583 | { |
| 1584 | struct vfsmount *m; | ||
| 1585 | |||
| 1534 | ns->mq_queues_count = 0; | 1586 | ns->mq_queues_count = 0; |
| 1535 | ns->mq_queues_max = DFLT_QUEUESMAX; | 1587 | ns->mq_queues_max = DFLT_QUEUESMAX; |
| 1536 | ns->mq_msg_max = DFLT_MSGMAX; | 1588 | ns->mq_msg_max = DFLT_MSGMAX; |
| @@ -1538,12 +1590,10 @@ int mq_init_ns(struct ipc_namespace *ns) | |||
| 1538 | ns->mq_msg_default = DFLT_MSG; | 1590 | ns->mq_msg_default = DFLT_MSG; |
| 1539 | ns->mq_msgsize_default = DFLT_MSGSIZE; | 1591 | ns->mq_msgsize_default = DFLT_MSGSIZE; |
| 1540 | 1592 | ||
| 1541 | ns->mq_mnt = kern_mount_data(&mqueue_fs_type, ns); | 1593 | m = mq_create_mount(ns); |
| 1542 | if (IS_ERR(ns->mq_mnt)) { | 1594 | if (IS_ERR(m)) |
| 1543 | int err = PTR_ERR(ns->mq_mnt); | 1595 | return PTR_ERR(m); |
| 1544 | ns->mq_mnt = NULL; | 1596 | ns->mq_mnt = m; |
| 1545 | return err; | ||
| 1546 | } | ||
| 1547 | return 0; | 1597 | return 0; |
| 1548 | } | 1598 | } |
| 1549 | 1599 | ||
diff --git a/ipc/namespace.c b/ipc/namespace.c index 21607791d62c..b3ca1476ca51 100644 --- a/ipc/namespace.c +++ b/ipc/namespace.c | |||
| @@ -42,7 +42,7 @@ static struct ipc_namespace *create_ipc_ns(struct user_namespace *user_ns, | |||
| 42 | goto fail; | 42 | goto fail; |
| 43 | 43 | ||
| 44 | err = -ENOMEM; | 44 | err = -ENOMEM; |
| 45 | ns = kmalloc(sizeof(struct ipc_namespace), GFP_KERNEL); | 45 | ns = kzalloc(sizeof(struct ipc_namespace), GFP_KERNEL); |
| 46 | if (ns == NULL) | 46 | if (ns == NULL) |
| 47 | goto fail_dec; | 47 | goto fail_dec; |
| 48 | 48 | ||
diff --git a/kernel/cgroup/cgroup-internal.h b/kernel/cgroup/cgroup-internal.h index c9a35f09e4b9..30e39f3932ad 100644 --- a/kernel/cgroup/cgroup-internal.h +++ b/kernel/cgroup/cgroup-internal.h | |||
| @@ -7,6 +7,7 @@ | |||
| 7 | #include <linux/workqueue.h> | 7 | #include <linux/workqueue.h> |
| 8 | #include <linux/list.h> | 8 | #include <linux/list.h> |
| 9 | #include <linux/refcount.h> | 9 | #include <linux/refcount.h> |
| 10 | #include <linux/fs_context.h> | ||
| 10 | 11 | ||
| 11 | #define TRACE_CGROUP_PATH_LEN 1024 | 12 | #define TRACE_CGROUP_PATH_LEN 1024 |
| 12 | extern spinlock_t trace_cgroup_path_lock; | 13 | extern spinlock_t trace_cgroup_path_lock; |
| @@ -37,6 +38,31 @@ extern void __init enable_debug_cgroup(void); | |||
| 37 | } while (0) | 38 | } while (0) |
| 38 | 39 | ||
| 39 | /* | 40 | /* |
| 41 | * The cgroup filesystem superblock creation/mount context. | ||
| 42 | */ | ||
| 43 | struct cgroup_fs_context { | ||
| 44 | struct kernfs_fs_context kfc; | ||
| 45 | struct cgroup_root *root; | ||
| 46 | struct cgroup_namespace *ns; | ||
| 47 | unsigned int flags; /* CGRP_ROOT_* flags */ | ||
| 48 | |||
| 49 | /* cgroup1 bits */ | ||
| 50 | bool cpuset_clone_children; | ||
| 51 | bool none; /* User explicitly requested empty subsystem */ | ||
| 52 | bool all_ss; /* Seen 'all' option */ | ||
| 53 | u16 subsys_mask; /* Selected subsystems */ | ||
| 54 | char *name; /* Hierarchy name */ | ||
| 55 | char *release_agent; /* Path for release notifications */ | ||
| 56 | }; | ||
| 57 | |||
| 58 | static inline struct cgroup_fs_context *cgroup_fc2context(struct fs_context *fc) | ||
| 59 | { | ||
| 60 | struct kernfs_fs_context *kfc = fc->fs_private; | ||
| 61 | |||
| 62 | return container_of(kfc, struct cgroup_fs_context, kfc); | ||
| 63 | } | ||
| 64 | |||
| 65 | /* | ||
| 40 | * A cgroup can be associated with multiple css_sets as different tasks may | 66 | * A cgroup can be associated with multiple css_sets as different tasks may |
| 41 | * belong to different cgroups on different hierarchies. In the other | 67 | * belong to different cgroups on different hierarchies. In the other |
| 42 | * direction, a css_set is naturally associated with multiple cgroups. | 68 | * direction, a css_set is naturally associated with multiple cgroups. |
| @@ -117,16 +143,6 @@ struct cgroup_mgctx { | |||
| 117 | #define DEFINE_CGROUP_MGCTX(name) \ | 143 | #define DEFINE_CGROUP_MGCTX(name) \ |
| 118 | struct cgroup_mgctx name = CGROUP_MGCTX_INIT(name) | 144 | struct cgroup_mgctx name = CGROUP_MGCTX_INIT(name) |
| 119 | 145 | ||
| 120 | struct cgroup_sb_opts { | ||
| 121 | u16 subsys_mask; | ||
| 122 | unsigned int flags; | ||
| 123 | char *release_agent; | ||
| 124 | bool cpuset_clone_children; | ||
| 125 | char *name; | ||
| 126 | /* User explicitly requested empty subsystem */ | ||
| 127 | bool none; | ||
| 128 | }; | ||
| 129 | |||
| 130 | extern struct mutex cgroup_mutex; | 146 | extern struct mutex cgroup_mutex; |
| 131 | extern spinlock_t css_set_lock; | 147 | extern spinlock_t css_set_lock; |
| 132 | extern struct cgroup_subsys *cgroup_subsys[]; | 148 | extern struct cgroup_subsys *cgroup_subsys[]; |
| @@ -197,12 +213,10 @@ int cgroup_path_ns_locked(struct cgroup *cgrp, char *buf, size_t buflen, | |||
| 197 | struct cgroup_namespace *ns); | 213 | struct cgroup_namespace *ns); |
| 198 | 214 | ||
| 199 | void cgroup_free_root(struct cgroup_root *root); | 215 | void cgroup_free_root(struct cgroup_root *root); |
| 200 | void init_cgroup_root(struct cgroup_root *root, struct cgroup_sb_opts *opts); | 216 | void init_cgroup_root(struct cgroup_fs_context *ctx); |
| 201 | int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask); | 217 | int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask); |
| 202 | int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask); | 218 | int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask); |
| 203 | struct dentry *cgroup_do_mount(struct file_system_type *fs_type, int flags, | 219 | int cgroup_do_get_tree(struct fs_context *fc); |
| 204 | struct cgroup_root *root, unsigned long magic, | ||
| 205 | struct cgroup_namespace *ns); | ||
| 206 | 220 | ||
| 207 | int cgroup_migrate_vet_dst(struct cgroup *dst_cgrp); | 221 | int cgroup_migrate_vet_dst(struct cgroup *dst_cgrp); |
| 208 | void cgroup_migrate_finish(struct cgroup_mgctx *mgctx); | 222 | void cgroup_migrate_finish(struct cgroup_mgctx *mgctx); |
| @@ -246,14 +260,15 @@ extern const struct proc_ns_operations cgroupns_operations; | |||
| 246 | */ | 260 | */ |
| 247 | extern struct cftype cgroup1_base_files[]; | 261 | extern struct cftype cgroup1_base_files[]; |
| 248 | extern struct kernfs_syscall_ops cgroup1_kf_syscall_ops; | 262 | extern struct kernfs_syscall_ops cgroup1_kf_syscall_ops; |
| 263 | extern const struct fs_parameter_description cgroup1_fs_parameters; | ||
| 249 | 264 | ||
| 250 | int proc_cgroupstats_show(struct seq_file *m, void *v); | 265 | int proc_cgroupstats_show(struct seq_file *m, void *v); |
| 251 | bool cgroup1_ssid_disabled(int ssid); | 266 | bool cgroup1_ssid_disabled(int ssid); |
| 252 | void cgroup1_pidlist_destroy_all(struct cgroup *cgrp); | 267 | void cgroup1_pidlist_destroy_all(struct cgroup *cgrp); |
| 253 | void cgroup1_release_agent(struct work_struct *work); | 268 | void cgroup1_release_agent(struct work_struct *work); |
| 254 | void cgroup1_check_for_release(struct cgroup *cgrp); | 269 | void cgroup1_check_for_release(struct cgroup *cgrp); |
| 255 | struct dentry *cgroup1_mount(struct file_system_type *fs_type, int flags, | 270 | int cgroup1_parse_param(struct fs_context *fc, struct fs_parameter *param); |
| 256 | void *data, unsigned long magic, | 271 | int cgroup1_get_tree(struct fs_context *fc); |
| 257 | struct cgroup_namespace *ns); | 272 | int cgroup1_reconfigure(struct fs_context *ctx); |
| 258 | 273 | ||
| 259 | #endif /* __CGROUP_INTERNAL_H */ | 274 | #endif /* __CGROUP_INTERNAL_H */ |
diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c index f94a7229974e..c126b34fd4ff 100644 --- a/kernel/cgroup/cgroup-v1.c +++ b/kernel/cgroup/cgroup-v1.c | |||
| @@ -13,9 +13,12 @@ | |||
| 13 | #include <linux/delayacct.h> | 13 | #include <linux/delayacct.h> |
| 14 | #include <linux/pid_namespace.h> | 14 | #include <linux/pid_namespace.h> |
| 15 | #include <linux/cgroupstats.h> | 15 | #include <linux/cgroupstats.h> |
| 16 | #include <linux/fs_parser.h> | ||
| 16 | 17 | ||
| 17 | #include <trace/events/cgroup.h> | 18 | #include <trace/events/cgroup.h> |
| 18 | 19 | ||
| 20 | #define cg_invalf(fc, fmt, ...) invalf(fc, fmt, ## __VA_ARGS__) | ||
| 21 | |||
| 19 | /* | 22 | /* |
| 20 | * pidlists linger the following amount before being destroyed. The goal | 23 | * pidlists linger the following amount before being destroyed. The goal |
| 21 | * is avoiding frequent destruction in the middle of consecutive read calls | 24 | * is avoiding frequent destruction in the middle of consecutive read calls |
| @@ -906,172 +909,195 @@ static int cgroup1_show_options(struct seq_file *seq, struct kernfs_root *kf_roo | |||
| 906 | return 0; | 909 | return 0; |
| 907 | } | 910 | } |
| 908 | 911 | ||
| 909 | static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts) | 912 | enum cgroup1_param { |
| 910 | { | 913 | Opt_all, |
| 911 | char *token, *o = data; | 914 | Opt_clone_children, |
| 912 | bool all_ss = false, one_ss = false; | 915 | Opt_cpuset_v2_mode, |
| 913 | u16 mask = U16_MAX; | 916 | Opt_name, |
| 914 | struct cgroup_subsys *ss; | 917 | Opt_none, |
| 915 | int nr_opts = 0; | 918 | Opt_noprefix, |
| 916 | int i; | 919 | Opt_release_agent, |
| 917 | 920 | Opt_xattr, | |
| 918 | #ifdef CONFIG_CPUSETS | 921 | }; |
| 919 | mask = ~((u16)1 << cpuset_cgrp_id); | ||
| 920 | #endif | ||
| 921 | |||
| 922 | memset(opts, 0, sizeof(*opts)); | ||
| 923 | 922 | ||
| 924 | while ((token = strsep(&o, ",")) != NULL) { | 923 | static const struct fs_parameter_spec cgroup1_param_specs[] = { |
| 925 | nr_opts++; | 924 | fsparam_flag ("all", Opt_all), |
| 925 | fsparam_flag ("clone_children", Opt_clone_children), | ||
| 926 | fsparam_flag ("cpuset_v2_mode", Opt_cpuset_v2_mode), | ||
| 927 | fsparam_string("name", Opt_name), | ||
| 928 | fsparam_flag ("none", Opt_none), | ||
| 929 | fsparam_flag ("noprefix", Opt_noprefix), | ||
| 930 | fsparam_string("release_agent", Opt_release_agent), | ||
| 931 | fsparam_flag ("xattr", Opt_xattr), | ||
| 932 | {} | ||
| 933 | }; | ||
| 926 | 934 | ||
| 927 | if (!*token) | 935 | const struct fs_parameter_description cgroup1_fs_parameters = { |
| 928 | return -EINVAL; | 936 | .name = "cgroup1", |
| 929 | if (!strcmp(token, "none")) { | 937 | .specs = cgroup1_param_specs, |
| 930 | /* Explicitly have no subsystems */ | 938 | }; |
| 931 | opts->none = true; | ||
| 932 | continue; | ||
| 933 | } | ||
| 934 | if (!strcmp(token, "all")) { | ||
| 935 | /* Mutually exclusive option 'all' + subsystem name */ | ||
| 936 | if (one_ss) | ||
| 937 | return -EINVAL; | ||
| 938 | all_ss = true; | ||
| 939 | continue; | ||
| 940 | } | ||
| 941 | if (!strcmp(token, "noprefix")) { | ||
| 942 | opts->flags |= CGRP_ROOT_NOPREFIX; | ||
| 943 | continue; | ||
| 944 | } | ||
| 945 | if (!strcmp(token, "clone_children")) { | ||
| 946 | opts->cpuset_clone_children = true; | ||
| 947 | continue; | ||
| 948 | } | ||
| 949 | if (!strcmp(token, "cpuset_v2_mode")) { | ||
| 950 | opts->flags |= CGRP_ROOT_CPUSET_V2_MODE; | ||
| 951 | continue; | ||
| 952 | } | ||
| 953 | if (!strcmp(token, "xattr")) { | ||
| 954 | opts->flags |= CGRP_ROOT_XATTR; | ||
| 955 | continue; | ||
| 956 | } | ||
| 957 | if (!strncmp(token, "release_agent=", 14)) { | ||
| 958 | /* Specifying two release agents is forbidden */ | ||
| 959 | if (opts->release_agent) | ||
| 960 | return -EINVAL; | ||
| 961 | opts->release_agent = | ||
| 962 | kstrndup(token + 14, PATH_MAX - 1, GFP_KERNEL); | ||
| 963 | if (!opts->release_agent) | ||
| 964 | return -ENOMEM; | ||
| 965 | continue; | ||
| 966 | } | ||
| 967 | if (!strncmp(token, "name=", 5)) { | ||
| 968 | const char *name = token + 5; | ||
| 969 | |||
| 970 | /* blocked by boot param? */ | ||
| 971 | if (cgroup_no_v1_named) | ||
| 972 | return -ENOENT; | ||
| 973 | /* Can't specify an empty name */ | ||
| 974 | if (!strlen(name)) | ||
| 975 | return -EINVAL; | ||
| 976 | /* Must match [\w.-]+ */ | ||
| 977 | for (i = 0; i < strlen(name); i++) { | ||
| 978 | char c = name[i]; | ||
| 979 | if (isalnum(c)) | ||
| 980 | continue; | ||
| 981 | if ((c == '.') || (c == '-') || (c == '_')) | ||
| 982 | continue; | ||
| 983 | return -EINVAL; | ||
| 984 | } | ||
| 985 | /* Specifying two names is forbidden */ | ||
| 986 | if (opts->name) | ||
| 987 | return -EINVAL; | ||
| 988 | opts->name = kstrndup(name, | ||
| 989 | MAX_CGROUP_ROOT_NAMELEN - 1, | ||
| 990 | GFP_KERNEL); | ||
| 991 | if (!opts->name) | ||
| 992 | return -ENOMEM; | ||
| 993 | 939 | ||
| 994 | continue; | 940 | int cgroup1_parse_param(struct fs_context *fc, struct fs_parameter *param) |
| 941 | { | ||
| 942 | struct cgroup_fs_context *ctx = cgroup_fc2context(fc); | ||
| 943 | struct cgroup_subsys *ss; | ||
| 944 | struct fs_parse_result result; | ||
| 945 | int opt, i; | ||
| 946 | |||
| 947 | opt = fs_parse(fc, &cgroup1_fs_parameters, param, &result); | ||
| 948 | if (opt == -ENOPARAM) { | ||
| 949 | if (strcmp(param->key, "source") == 0) { | ||
| 950 | fc->source = param->string; | ||
| 951 | param->string = NULL; | ||
| 952 | return 0; | ||
| 995 | } | 953 | } |
| 996 | |||
| 997 | for_each_subsys(ss, i) { | 954 | for_each_subsys(ss, i) { |
| 998 | if (strcmp(token, ss->legacy_name)) | 955 | if (strcmp(param->key, ss->legacy_name)) |
| 999 | continue; | 956 | continue; |
| 1000 | if (!cgroup_ssid_enabled(i)) | 957 | ctx->subsys_mask |= (1 << i); |
| 958 | return 0; | ||
| 959 | } | ||
| 960 | return cg_invalf(fc, "cgroup1: Unknown subsys name '%s'", param->key); | ||
| 961 | } | ||
| 962 | if (opt < 0) | ||
| 963 | return opt; | ||
| 964 | |||
| 965 | switch (opt) { | ||
| 966 | case Opt_none: | ||
| 967 | /* Explicitly have no subsystems */ | ||
| 968 | ctx->none = true; | ||
| 969 | break; | ||
| 970 | case Opt_all: | ||
| 971 | ctx->all_ss = true; | ||
| 972 | break; | ||
| 973 | case Opt_noprefix: | ||
| 974 | ctx->flags |= CGRP_ROOT_NOPREFIX; | ||
| 975 | break; | ||
| 976 | case Opt_clone_children: | ||
| 977 | ctx->cpuset_clone_children = true; | ||
| 978 | break; | ||
| 979 | case Opt_cpuset_v2_mode: | ||
| 980 | ctx->flags |= CGRP_ROOT_CPUSET_V2_MODE; | ||
| 981 | break; | ||
| 982 | case Opt_xattr: | ||
| 983 | ctx->flags |= CGRP_ROOT_XATTR; | ||
| 984 | break; | ||
| 985 | case Opt_release_agent: | ||
| 986 | /* Specifying two release agents is forbidden */ | ||
| 987 | if (ctx->release_agent) | ||
| 988 | return cg_invalf(fc, "cgroup1: release_agent respecified"); | ||
| 989 | ctx->release_agent = param->string; | ||
| 990 | param->string = NULL; | ||
| 991 | break; | ||
| 992 | case Opt_name: | ||
| 993 | /* blocked by boot param? */ | ||
| 994 | if (cgroup_no_v1_named) | ||
| 995 | return -ENOENT; | ||
| 996 | /* Can't specify an empty name */ | ||
| 997 | if (!param->size) | ||
| 998 | return cg_invalf(fc, "cgroup1: Empty name"); | ||
| 999 | if (param->size > MAX_CGROUP_ROOT_NAMELEN - 1) | ||
| 1000 | return cg_invalf(fc, "cgroup1: Name too long"); | ||
| 1001 | /* Must match [\w.-]+ */ | ||
| 1002 | for (i = 0; i < param->size; i++) { | ||
| 1003 | char c = param->string[i]; | ||
| 1004 | if (isalnum(c)) | ||
| 1001 | continue; | 1005 | continue; |
| 1002 | if (cgroup1_ssid_disabled(i)) | 1006 | if ((c == '.') || (c == '-') || (c == '_')) |
| 1003 | continue; | 1007 | continue; |
| 1004 | 1008 | return cg_invalf(fc, "cgroup1: Invalid name"); | |
| 1005 | /* Mutually exclusive option 'all' + subsystem name */ | ||
| 1006 | if (all_ss) | ||
| 1007 | return -EINVAL; | ||
| 1008 | opts->subsys_mask |= (1 << i); | ||
| 1009 | one_ss = true; | ||
| 1010 | |||
| 1011 | break; | ||
| 1012 | } | 1009 | } |
| 1013 | if (i == CGROUP_SUBSYS_COUNT) | 1010 | /* Specifying two names is forbidden */ |
| 1014 | return -ENOENT; | 1011 | if (ctx->name) |
| 1012 | return cg_invalf(fc, "cgroup1: name respecified"); | ||
| 1013 | ctx->name = param->string; | ||
| 1014 | param->string = NULL; | ||
| 1015 | break; | ||
| 1015 | } | 1016 | } |
| 1017 | return 0; | ||
| 1018 | } | ||
| 1019 | |||
| 1020 | static int check_cgroupfs_options(struct fs_context *fc) | ||
| 1021 | { | ||
| 1022 | struct cgroup_fs_context *ctx = cgroup_fc2context(fc); | ||
| 1023 | u16 mask = U16_MAX; | ||
| 1024 | u16 enabled = 0; | ||
| 1025 | struct cgroup_subsys *ss; | ||
| 1026 | int i; | ||
| 1027 | |||
| 1028 | #ifdef CONFIG_CPUSETS | ||
| 1029 | mask = ~((u16)1 << cpuset_cgrp_id); | ||
| 1030 | #endif | ||
| 1031 | for_each_subsys(ss, i) | ||
| 1032 | if (cgroup_ssid_enabled(i) && !cgroup1_ssid_disabled(i)) | ||
| 1033 | enabled |= 1 << i; | ||
| 1034 | |||
| 1035 | ctx->subsys_mask &= enabled; | ||
| 1016 | 1036 | ||
| 1017 | /* | 1037 | /* |
| 1018 | * If the 'all' option was specified select all the subsystems, | 1038 | * In absense of 'none', 'name=' or subsystem name options, |
| 1019 | * otherwise if 'none', 'name=' and a subsystem name options were | 1039 | * let's default to 'all'. |
| 1020 | * not specified, let's default to 'all' | ||
| 1021 | */ | 1040 | */ |
| 1022 | if (all_ss || (!one_ss && !opts->none && !opts->name)) | 1041 | if (!ctx->subsys_mask && !ctx->none && !ctx->name) |
| 1023 | for_each_subsys(ss, i) | 1042 | ctx->all_ss = true; |
| 1024 | if (cgroup_ssid_enabled(i) && !cgroup1_ssid_disabled(i)) | 1043 | |
| 1025 | opts->subsys_mask |= (1 << i); | 1044 | if (ctx->all_ss) { |
| 1045 | /* Mutually exclusive option 'all' + subsystem name */ | ||
| 1046 | if (ctx->subsys_mask) | ||
| 1047 | return cg_invalf(fc, "cgroup1: subsys name conflicts with all"); | ||
| 1048 | /* 'all' => select all the subsystems */ | ||
| 1049 | ctx->subsys_mask = enabled; | ||
| 1050 | } | ||
| 1026 | 1051 | ||
| 1027 | /* | 1052 | /* |
| 1028 | * We either have to specify by name or by subsystems. (So all | 1053 | * We either have to specify by name or by subsystems. (So all |
| 1029 | * empty hierarchies must have a name). | 1054 | * empty hierarchies must have a name). |
| 1030 | */ | 1055 | */ |
| 1031 | if (!opts->subsys_mask && !opts->name) | 1056 | if (!ctx->subsys_mask && !ctx->name) |
| 1032 | return -EINVAL; | 1057 | return cg_invalf(fc, "cgroup1: Need name or subsystem set"); |
| 1033 | 1058 | ||
| 1034 | /* | 1059 | /* |
| 1035 | * Option noprefix was introduced just for backward compatibility | 1060 | * Option noprefix was introduced just for backward compatibility |
| 1036 | * with the old cpuset, so we allow noprefix only if mounting just | 1061 | * with the old cpuset, so we allow noprefix only if mounting just |
| 1037 | * the cpuset subsystem. | 1062 | * the cpuset subsystem. |
| 1038 | */ | 1063 | */ |
| 1039 | if ((opts->flags & CGRP_ROOT_NOPREFIX) && (opts->subsys_mask & mask)) | 1064 | if ((ctx->flags & CGRP_ROOT_NOPREFIX) && (ctx->subsys_mask & mask)) |
| 1040 | return -EINVAL; | 1065 | return cg_invalf(fc, "cgroup1: noprefix used incorrectly"); |
| 1041 | 1066 | ||
| 1042 | /* Can't specify "none" and some subsystems */ | 1067 | /* Can't specify "none" and some subsystems */ |
| 1043 | if (opts->subsys_mask && opts->none) | 1068 | if (ctx->subsys_mask && ctx->none) |
| 1044 | return -EINVAL; | 1069 | return cg_invalf(fc, "cgroup1: none used incorrectly"); |
| 1045 | 1070 | ||
| 1046 | return 0; | 1071 | return 0; |
| 1047 | } | 1072 | } |
| 1048 | 1073 | ||
| 1049 | static int cgroup1_remount(struct kernfs_root *kf_root, int *flags, char *data) | 1074 | int cgroup1_reconfigure(struct fs_context *fc) |
| 1050 | { | 1075 | { |
| 1051 | int ret = 0; | 1076 | struct cgroup_fs_context *ctx = cgroup_fc2context(fc); |
| 1077 | struct kernfs_root *kf_root = kernfs_root_from_sb(fc->root->d_sb); | ||
| 1052 | struct cgroup_root *root = cgroup_root_from_kf(kf_root); | 1078 | struct cgroup_root *root = cgroup_root_from_kf(kf_root); |
| 1053 | struct cgroup_sb_opts opts; | 1079 | int ret = 0; |
| 1054 | u16 added_mask, removed_mask; | 1080 | u16 added_mask, removed_mask; |
| 1055 | 1081 | ||
| 1056 | cgroup_lock_and_drain_offline(&cgrp_dfl_root.cgrp); | 1082 | cgroup_lock_and_drain_offline(&cgrp_dfl_root.cgrp); |
| 1057 | 1083 | ||
| 1058 | /* See what subsystems are wanted */ | 1084 | /* See what subsystems are wanted */ |
| 1059 | ret = parse_cgroupfs_options(data, &opts); | 1085 | ret = check_cgroupfs_options(fc); |
| 1060 | if (ret) | 1086 | if (ret) |
| 1061 | goto out_unlock; | 1087 | goto out_unlock; |
| 1062 | 1088 | ||
| 1063 | if (opts.subsys_mask != root->subsys_mask || opts.release_agent) | 1089 | if (ctx->subsys_mask != root->subsys_mask || ctx->release_agent) |
| 1064 | pr_warn("option changes via remount are deprecated (pid=%d comm=%s)\n", | 1090 | pr_warn("option changes via remount are deprecated (pid=%d comm=%s)\n", |
| 1065 | task_tgid_nr(current), current->comm); | 1091 | task_tgid_nr(current), current->comm); |
| 1066 | 1092 | ||
| 1067 | added_mask = opts.subsys_mask & ~root->subsys_mask; | 1093 | added_mask = ctx->subsys_mask & ~root->subsys_mask; |
| 1068 | removed_mask = root->subsys_mask & ~opts.subsys_mask; | 1094 | removed_mask = root->subsys_mask & ~ctx->subsys_mask; |
| 1069 | 1095 | ||
| 1070 | /* Don't allow flags or name to change at remount */ | 1096 | /* Don't allow flags or name to change at remount */ |
| 1071 | if ((opts.flags ^ root->flags) || | 1097 | if ((ctx->flags ^ root->flags) || |
| 1072 | (opts.name && strcmp(opts.name, root->name))) { | 1098 | (ctx->name && strcmp(ctx->name, root->name))) { |
| 1073 | pr_err("option or name mismatch, new: 0x%x \"%s\", old: 0x%x \"%s\"\n", | 1099 | cg_invalf(fc, "option or name mismatch, new: 0x%x \"%s\", old: 0x%x \"%s\"", |
| 1074 | opts.flags, opts.name ?: "", root->flags, root->name); | 1100 | ctx->flags, ctx->name ?: "", root->flags, root->name); |
| 1075 | ret = -EINVAL; | 1101 | ret = -EINVAL; |
| 1076 | goto out_unlock; | 1102 | goto out_unlock; |
| 1077 | } | 1103 | } |
| @@ -1088,17 +1114,15 @@ static int cgroup1_remount(struct kernfs_root *kf_root, int *flags, char *data) | |||
| 1088 | 1114 | ||
| 1089 | WARN_ON(rebind_subsystems(&cgrp_dfl_root, removed_mask)); | 1115 | WARN_ON(rebind_subsystems(&cgrp_dfl_root, removed_mask)); |
| 1090 | 1116 | ||
| 1091 | if (opts.release_agent) { | 1117 | if (ctx->release_agent) { |
| 1092 | spin_lock(&release_agent_path_lock); | 1118 | spin_lock(&release_agent_path_lock); |
| 1093 | strcpy(root->release_agent_path, opts.release_agent); | 1119 | strcpy(root->release_agent_path, ctx->release_agent); |
| 1094 | spin_unlock(&release_agent_path_lock); | 1120 | spin_unlock(&release_agent_path_lock); |
| 1095 | } | 1121 | } |
| 1096 | 1122 | ||
| 1097 | trace_cgroup_remount(root); | 1123 | trace_cgroup_remount(root); |
| 1098 | 1124 | ||
| 1099 | out_unlock: | 1125 | out_unlock: |
| 1100 | kfree(opts.release_agent); | ||
| 1101 | kfree(opts.name); | ||
| 1102 | mutex_unlock(&cgroup_mutex); | 1126 | mutex_unlock(&cgroup_mutex); |
| 1103 | return ret; | 1127 | return ret; |
| 1104 | } | 1128 | } |
| @@ -1106,28 +1130,30 @@ static int cgroup1_remount(struct kernfs_root *kf_root, int *flags, char *data) | |||
| 1106 | struct kernfs_syscall_ops cgroup1_kf_syscall_ops = { | 1130 | struct kernfs_syscall_ops cgroup1_kf_syscall_ops = { |
| 1107 | .rename = cgroup1_rename, | 1131 | .rename = cgroup1_rename, |
| 1108 | .show_options = cgroup1_show_options, | 1132 | .show_options = cgroup1_show_options, |
| 1109 | .remount_fs = cgroup1_remount, | ||
| 1110 | .mkdir = cgroup_mkdir, | 1133 | .mkdir = cgroup_mkdir, |
| 1111 | .rmdir = cgroup_rmdir, | 1134 | .rmdir = cgroup_rmdir, |
| 1112 | .show_path = cgroup_show_path, | 1135 | .show_path = cgroup_show_path, |
| 1113 | }; | 1136 | }; |
| 1114 | 1137 | ||
| 1115 | struct dentry *cgroup1_mount(struct file_system_type *fs_type, int flags, | 1138 | /* |
| 1116 | void *data, unsigned long magic, | 1139 | * The guts of cgroup1 mount - find or create cgroup_root to use. |
| 1117 | struct cgroup_namespace *ns) | 1140 | * Called with cgroup_mutex held; returns 0 on success, -E... on |
| 1141 | * error and positive - in case when the candidate is busy dying. | ||
| 1142 | * On success it stashes a reference to cgroup_root into given | ||
| 1143 | * cgroup_fs_context; that reference is *NOT* counting towards the | ||
| 1144 | * cgroup_root refcount. | ||
| 1145 | */ | ||
| 1146 | static int cgroup1_root_to_use(struct fs_context *fc) | ||
| 1118 | { | 1147 | { |
| 1119 | struct cgroup_sb_opts opts; | 1148 | struct cgroup_fs_context *ctx = cgroup_fc2context(fc); |
| 1120 | struct cgroup_root *root; | 1149 | struct cgroup_root *root; |
| 1121 | struct cgroup_subsys *ss; | 1150 | struct cgroup_subsys *ss; |
| 1122 | struct dentry *dentry; | ||
| 1123 | int i, ret; | 1151 | int i, ret; |
| 1124 | 1152 | ||
| 1125 | cgroup_lock_and_drain_offline(&cgrp_dfl_root.cgrp); | ||
| 1126 | |||
| 1127 | /* First find the desired set of subsystems */ | 1153 | /* First find the desired set of subsystems */ |
| 1128 | ret = parse_cgroupfs_options(data, &opts); | 1154 | ret = check_cgroupfs_options(fc); |
| 1129 | if (ret) | 1155 | if (ret) |
| 1130 | goto out_unlock; | 1156 | return ret; |
| 1131 | 1157 | ||
| 1132 | /* | 1158 | /* |
| 1133 | * Destruction of cgroup root is asynchronous, so subsystems may | 1159 | * Destruction of cgroup root is asynchronous, so subsystems may |
| @@ -1137,16 +1163,12 @@ struct dentry *cgroup1_mount(struct file_system_type *fs_type, int flags, | |||
| 1137 | * starting. Testing ref liveliness is good enough. | 1163 | * starting. Testing ref liveliness is good enough. |
| 1138 | */ | 1164 | */ |
| 1139 | for_each_subsys(ss, i) { | 1165 | for_each_subsys(ss, i) { |
| 1140 | if (!(opts.subsys_mask & (1 << i)) || | 1166 | if (!(ctx->subsys_mask & (1 << i)) || |
| 1141 | ss->root == &cgrp_dfl_root) | 1167 | ss->root == &cgrp_dfl_root) |
| 1142 | continue; | 1168 | continue; |
| 1143 | 1169 | ||
| 1144 | if (!percpu_ref_tryget_live(&ss->root->cgrp.self.refcnt)) { | 1170 | if (!percpu_ref_tryget_live(&ss->root->cgrp.self.refcnt)) |
| 1145 | mutex_unlock(&cgroup_mutex); | 1171 | return 1; /* restart */ |
| 1146 | msleep(10); | ||
| 1147 | ret = restart_syscall(); | ||
| 1148 | goto out_free; | ||
| 1149 | } | ||
| 1150 | cgroup_put(&ss->root->cgrp); | 1172 | cgroup_put(&ss->root->cgrp); |
| 1151 | } | 1173 | } |
| 1152 | 1174 | ||
| @@ -1161,8 +1183,8 @@ struct dentry *cgroup1_mount(struct file_system_type *fs_type, int flags, | |||
| 1161 | * name matches but sybsys_mask doesn't, we should fail. | 1183 | * name matches but sybsys_mask doesn't, we should fail. |
| 1162 | * Remember whether name matched. | 1184 | * Remember whether name matched. |
| 1163 | */ | 1185 | */ |
| 1164 | if (opts.name) { | 1186 | if (ctx->name) { |
| 1165 | if (strcmp(opts.name, root->name)) | 1187 | if (strcmp(ctx->name, root->name)) |
| 1166 | continue; | 1188 | continue; |
| 1167 | name_match = true; | 1189 | name_match = true; |
| 1168 | } | 1190 | } |
| @@ -1171,19 +1193,18 @@ struct dentry *cgroup1_mount(struct file_system_type *fs_type, int flags, | |||
| 1171 | * If we asked for subsystems (or explicitly for no | 1193 | * If we asked for subsystems (or explicitly for no |
| 1172 | * subsystems) then they must match. | 1194 | * subsystems) then they must match. |
| 1173 | */ | 1195 | */ |
| 1174 | if ((opts.subsys_mask || opts.none) && | 1196 | if ((ctx->subsys_mask || ctx->none) && |
| 1175 | (opts.subsys_mask != root->subsys_mask)) { | 1197 | (ctx->subsys_mask != root->subsys_mask)) { |
| 1176 | if (!name_match) | 1198 | if (!name_match) |
| 1177 | continue; | 1199 | continue; |
| 1178 | ret = -EBUSY; | 1200 | return -EBUSY; |
| 1179 | goto out_unlock; | ||
| 1180 | } | 1201 | } |
| 1181 | 1202 | ||
| 1182 | if (root->flags ^ opts.flags) | 1203 | if (root->flags ^ ctx->flags) |
| 1183 | pr_warn("new mount options do not match the existing superblock, will be ignored\n"); | 1204 | pr_warn("new mount options do not match the existing superblock, will be ignored\n"); |
| 1184 | 1205 | ||
| 1185 | ret = 0; | 1206 | ctx->root = root; |
| 1186 | goto out_unlock; | 1207 | return 0; |
| 1187 | } | 1208 | } |
| 1188 | 1209 | ||
| 1189 | /* | 1210 | /* |
| @@ -1191,55 +1212,58 @@ struct dentry *cgroup1_mount(struct file_system_type *fs_type, int flags, | |||
| 1191 | * specification is allowed for already existing hierarchies but we | 1212 | * specification is allowed for already existing hierarchies but we |
| 1192 | * can't create new one without subsys specification. | 1213 | * can't create new one without subsys specification. |
| 1193 | */ | 1214 | */ |
| 1194 | if (!opts.subsys_mask && !opts.none) { | 1215 | if (!ctx->subsys_mask && !ctx->none) |
| 1195 | ret = -EINVAL; | 1216 | return cg_invalf(fc, "cgroup1: No subsys list or none specified"); |
| 1196 | goto out_unlock; | ||
| 1197 | } | ||
| 1198 | 1217 | ||
| 1199 | /* Hierarchies may only be created in the initial cgroup namespace. */ | 1218 | /* Hierarchies may only be created in the initial cgroup namespace. */ |
| 1200 | if (ns != &init_cgroup_ns) { | 1219 | if (ctx->ns != &init_cgroup_ns) |
| 1201 | ret = -EPERM; | 1220 | return -EPERM; |
| 1202 | goto out_unlock; | ||
| 1203 | } | ||
| 1204 | 1221 | ||
| 1205 | root = kzalloc(sizeof(*root), GFP_KERNEL); | 1222 | root = kzalloc(sizeof(*root), GFP_KERNEL); |
| 1206 | if (!root) { | 1223 | if (!root) |
| 1207 | ret = -ENOMEM; | 1224 | return -ENOMEM; |
| 1208 | goto out_unlock; | ||
| 1209 | } | ||
| 1210 | 1225 | ||
| 1211 | init_cgroup_root(root, &opts); | 1226 | ctx->root = root; |
| 1227 | init_cgroup_root(ctx); | ||
| 1212 | 1228 | ||
| 1213 | ret = cgroup_setup_root(root, opts.subsys_mask); | 1229 | ret = cgroup_setup_root(root, ctx->subsys_mask); |
| 1214 | if (ret) | 1230 | if (ret) |
| 1215 | cgroup_free_root(root); | 1231 | cgroup_free_root(root); |
| 1232 | return ret; | ||
| 1233 | } | ||
| 1216 | 1234 | ||
| 1217 | out_unlock: | 1235 | int cgroup1_get_tree(struct fs_context *fc) |
| 1218 | if (!ret && !percpu_ref_tryget_live(&root->cgrp.self.refcnt)) { | 1236 | { |
| 1219 | mutex_unlock(&cgroup_mutex); | 1237 | struct cgroup_fs_context *ctx = cgroup_fc2context(fc); |
| 1220 | msleep(10); | 1238 | int ret; |
| 1221 | ret = restart_syscall(); | ||
| 1222 | goto out_free; | ||
| 1223 | } | ||
| 1224 | mutex_unlock(&cgroup_mutex); | ||
| 1225 | out_free: | ||
| 1226 | kfree(opts.release_agent); | ||
| 1227 | kfree(opts.name); | ||
| 1228 | 1239 | ||
| 1229 | if (ret) | 1240 | /* Check if the caller has permission to mount. */ |
| 1230 | return ERR_PTR(ret); | 1241 | if (!ns_capable(ctx->ns->user_ns, CAP_SYS_ADMIN)) |
| 1242 | return -EPERM; | ||
| 1243 | |||
| 1244 | cgroup_lock_and_drain_offline(&cgrp_dfl_root.cgrp); | ||
| 1245 | |||
| 1246 | ret = cgroup1_root_to_use(fc); | ||
| 1247 | if (!ret && !percpu_ref_tryget_live(&ctx->root->cgrp.self.refcnt)) | ||
| 1248 | ret = 1; /* restart */ | ||
| 1231 | 1249 | ||
| 1232 | dentry = cgroup_do_mount(&cgroup_fs_type, flags, root, | 1250 | mutex_unlock(&cgroup_mutex); |
| 1233 | CGROUP_SUPER_MAGIC, ns); | ||
| 1234 | 1251 | ||
| 1235 | if (!IS_ERR(dentry) && percpu_ref_is_dying(&root->cgrp.self.refcnt)) { | 1252 | if (!ret) |
| 1236 | struct super_block *sb = dentry->d_sb; | 1253 | ret = cgroup_do_get_tree(fc); |
| 1237 | dput(dentry); | 1254 | |
| 1255 | if (!ret && percpu_ref_is_dying(&ctx->root->cgrp.self.refcnt)) { | ||
| 1256 | struct super_block *sb = fc->root->d_sb; | ||
| 1257 | dput(fc->root); | ||
| 1238 | deactivate_locked_super(sb); | 1258 | deactivate_locked_super(sb); |
| 1259 | ret = 1; | ||
| 1260 | } | ||
| 1261 | |||
| 1262 | if (unlikely(ret > 0)) { | ||
| 1239 | msleep(10); | 1263 | msleep(10); |
| 1240 | dentry = ERR_PTR(restart_syscall()); | 1264 | return restart_syscall(); |
| 1241 | } | 1265 | } |
| 1242 | return dentry; | 1266 | return ret; |
| 1243 | } | 1267 | } |
| 1244 | 1268 | ||
| 1245 | static int __init cgroup1_wq_init(void) | 1269 | static int __init cgroup1_wq_init(void) |
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index eef24a25bda7..3f2b4bde0f9c 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c | |||
| @@ -54,6 +54,7 @@ | |||
| 54 | #include <linux/proc_ns.h> | 54 | #include <linux/proc_ns.h> |
| 55 | #include <linux/nsproxy.h> | 55 | #include <linux/nsproxy.h> |
| 56 | #include <linux/file.h> | 56 | #include <linux/file.h> |
| 57 | #include <linux/fs_parser.h> | ||
| 57 | #include <linux/sched/cputime.h> | 58 | #include <linux/sched/cputime.h> |
| 58 | #include <linux/psi.h> | 59 | #include <linux/psi.h> |
| 59 | #include <net/sock.h> | 60 | #include <net/sock.h> |
| @@ -1772,26 +1773,37 @@ int cgroup_show_path(struct seq_file *sf, struct kernfs_node *kf_node, | |||
| 1772 | return len; | 1773 | return len; |
| 1773 | } | 1774 | } |
| 1774 | 1775 | ||
| 1775 | static int parse_cgroup_root_flags(char *data, unsigned int *root_flags) | 1776 | enum cgroup2_param { |
| 1776 | { | 1777 | Opt_nsdelegate, |
| 1777 | char *token; | 1778 | nr__cgroup2_params |
| 1779 | }; | ||
| 1778 | 1780 | ||
| 1779 | *root_flags = 0; | 1781 | static const struct fs_parameter_spec cgroup2_param_specs[] = { |
| 1782 | fsparam_flag ("nsdelegate", Opt_nsdelegate), | ||
| 1783 | {} | ||
| 1784 | }; | ||
| 1780 | 1785 | ||
| 1781 | if (!data || *data == '\0') | 1786 | static const struct fs_parameter_description cgroup2_fs_parameters = { |
| 1782 | return 0; | 1787 | .name = "cgroup2", |
| 1788 | .specs = cgroup2_param_specs, | ||
| 1789 | }; | ||
| 1783 | 1790 | ||
| 1784 | while ((token = strsep(&data, ",")) != NULL) { | 1791 | static int cgroup2_parse_param(struct fs_context *fc, struct fs_parameter *param) |
| 1785 | if (!strcmp(token, "nsdelegate")) { | 1792 | { |
| 1786 | *root_flags |= CGRP_ROOT_NS_DELEGATE; | 1793 | struct cgroup_fs_context *ctx = cgroup_fc2context(fc); |
| 1787 | continue; | 1794 | struct fs_parse_result result; |
| 1788 | } | 1795 | int opt; |
| 1789 | 1796 | ||
| 1790 | pr_err("cgroup2: unknown option \"%s\"\n", token); | 1797 | opt = fs_parse(fc, &cgroup2_fs_parameters, param, &result); |
| 1791 | return -EINVAL; | 1798 | if (opt < 0) |
| 1792 | } | 1799 | return opt; |
| 1793 | 1800 | ||
| 1794 | return 0; | 1801 | switch (opt) { |
| 1802 | case Opt_nsdelegate: | ||
| 1803 | ctx->flags |= CGRP_ROOT_NS_DELEGATE; | ||
| 1804 | return 0; | ||
| 1805 | } | ||
| 1806 | return -EINVAL; | ||
| 1795 | } | 1807 | } |
| 1796 | 1808 | ||
| 1797 | static void apply_cgroup_root_flags(unsigned int root_flags) | 1809 | static void apply_cgroup_root_flags(unsigned int root_flags) |
| @@ -1811,16 +1823,11 @@ static int cgroup_show_options(struct seq_file *seq, struct kernfs_root *kf_root | |||
| 1811 | return 0; | 1823 | return 0; |
| 1812 | } | 1824 | } |
| 1813 | 1825 | ||
| 1814 | static int cgroup_remount(struct kernfs_root *kf_root, int *flags, char *data) | 1826 | static int cgroup_reconfigure(struct fs_context *fc) |
| 1815 | { | 1827 | { |
| 1816 | unsigned int root_flags; | 1828 | struct cgroup_fs_context *ctx = cgroup_fc2context(fc); |
| 1817 | int ret; | ||
| 1818 | |||
| 1819 | ret = parse_cgroup_root_flags(data, &root_flags); | ||
| 1820 | if (ret) | ||
| 1821 | return ret; | ||
| 1822 | 1829 | ||
| 1823 | apply_cgroup_root_flags(root_flags); | 1830 | apply_cgroup_root_flags(ctx->flags); |
| 1824 | return 0; | 1831 | return 0; |
| 1825 | } | 1832 | } |
| 1826 | 1833 | ||
| @@ -1908,8 +1915,9 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp) | |||
| 1908 | INIT_WORK(&cgrp->release_agent_work, cgroup1_release_agent); | 1915 | INIT_WORK(&cgrp->release_agent_work, cgroup1_release_agent); |
| 1909 | } | 1916 | } |
| 1910 | 1917 | ||
| 1911 | void init_cgroup_root(struct cgroup_root *root, struct cgroup_sb_opts *opts) | 1918 | void init_cgroup_root(struct cgroup_fs_context *ctx) |
| 1912 | { | 1919 | { |
| 1920 | struct cgroup_root *root = ctx->root; | ||
| 1913 | struct cgroup *cgrp = &root->cgrp; | 1921 | struct cgroup *cgrp = &root->cgrp; |
| 1914 | 1922 | ||
| 1915 | INIT_LIST_HEAD(&root->root_list); | 1923 | INIT_LIST_HEAD(&root->root_list); |
| @@ -1918,12 +1926,12 @@ void init_cgroup_root(struct cgroup_root *root, struct cgroup_sb_opts *opts) | |||
| 1918 | init_cgroup_housekeeping(cgrp); | 1926 | init_cgroup_housekeeping(cgrp); |
| 1919 | idr_init(&root->cgroup_idr); | 1927 | idr_init(&root->cgroup_idr); |
| 1920 | 1928 | ||
| 1921 | root->flags = opts->flags; | 1929 | root->flags = ctx->flags; |
| 1922 | if (opts->release_agent) | 1930 | if (ctx->release_agent) |
| 1923 | strscpy(root->release_agent_path, opts->release_agent, PATH_MAX); | 1931 | strscpy(root->release_agent_path, ctx->release_agent, PATH_MAX); |
| 1924 | if (opts->name) | 1932 | if (ctx->name) |
| 1925 | strscpy(root->name, opts->name, MAX_CGROUP_ROOT_NAMELEN); | 1933 | strscpy(root->name, ctx->name, MAX_CGROUP_ROOT_NAMELEN); |
| 1926 | if (opts->cpuset_clone_children) | 1934 | if (ctx->cpuset_clone_children) |
| 1927 | set_bit(CGRP_CPUSET_CLONE_CHILDREN, &root->cgrp.flags); | 1935 | set_bit(CGRP_CPUSET_CLONE_CHILDREN, &root->cgrp.flags); |
| 1928 | } | 1936 | } |
| 1929 | 1937 | ||
| @@ -2028,60 +2036,104 @@ out: | |||
| 2028 | return ret; | 2036 | return ret; |
| 2029 | } | 2037 | } |
| 2030 | 2038 | ||
| 2031 | struct dentry *cgroup_do_mount(struct file_system_type *fs_type, int flags, | 2039 | int cgroup_do_get_tree(struct fs_context *fc) |
| 2032 | struct cgroup_root *root, unsigned long magic, | ||
| 2033 | struct cgroup_namespace *ns) | ||
| 2034 | { | 2040 | { |
| 2035 | struct dentry *dentry; | 2041 | struct cgroup_fs_context *ctx = cgroup_fc2context(fc); |
| 2036 | bool new_sb = false; | 2042 | int ret; |
| 2037 | 2043 | ||
| 2038 | dentry = kernfs_mount(fs_type, flags, root->kf_root, magic, &new_sb); | 2044 | ctx->kfc.root = ctx->root->kf_root; |
| 2045 | if (fc->fs_type == &cgroup2_fs_type) | ||
| 2046 | ctx->kfc.magic = CGROUP2_SUPER_MAGIC; | ||
| 2047 | else | ||
| 2048 | ctx->kfc.magic = CGROUP_SUPER_MAGIC; | ||
| 2049 | ret = kernfs_get_tree(fc); | ||
| 2039 | 2050 | ||
| 2040 | /* | 2051 | /* |
| 2041 | * In non-init cgroup namespace, instead of root cgroup's dentry, | 2052 | * In non-init cgroup namespace, instead of root cgroup's dentry, |
| 2042 | * we return the dentry corresponding to the cgroupns->root_cgrp. | 2053 | * we return the dentry corresponding to the cgroupns->root_cgrp. |
| 2043 | */ | 2054 | */ |
| 2044 | if (!IS_ERR(dentry) && ns != &init_cgroup_ns) { | 2055 | if (!ret && ctx->ns != &init_cgroup_ns) { |
| 2045 | struct dentry *nsdentry; | 2056 | struct dentry *nsdentry; |
| 2046 | struct super_block *sb = dentry->d_sb; | 2057 | struct super_block *sb = fc->root->d_sb; |
| 2047 | struct cgroup *cgrp; | 2058 | struct cgroup *cgrp; |
| 2048 | 2059 | ||
| 2049 | mutex_lock(&cgroup_mutex); | 2060 | mutex_lock(&cgroup_mutex); |
| 2050 | spin_lock_irq(&css_set_lock); | 2061 | spin_lock_irq(&css_set_lock); |
| 2051 | 2062 | ||
| 2052 | cgrp = cset_cgroup_from_root(ns->root_cset, root); | 2063 | cgrp = cset_cgroup_from_root(ctx->ns->root_cset, ctx->root); |
| 2053 | 2064 | ||
| 2054 | spin_unlock_irq(&css_set_lock); | 2065 | spin_unlock_irq(&css_set_lock); |
| 2055 | mutex_unlock(&cgroup_mutex); | 2066 | mutex_unlock(&cgroup_mutex); |
| 2056 | 2067 | ||
| 2057 | nsdentry = kernfs_node_dentry(cgrp->kn, sb); | 2068 | nsdentry = kernfs_node_dentry(cgrp->kn, sb); |
| 2058 | dput(dentry); | 2069 | dput(fc->root); |
| 2059 | if (IS_ERR(nsdentry)) | 2070 | fc->root = nsdentry; |
| 2071 | if (IS_ERR(nsdentry)) { | ||
| 2072 | ret = PTR_ERR(nsdentry); | ||
| 2060 | deactivate_locked_super(sb); | 2073 | deactivate_locked_super(sb); |
| 2061 | dentry = nsdentry; | 2074 | } |
| 2062 | } | 2075 | } |
| 2063 | 2076 | ||
| 2064 | if (!new_sb) | 2077 | if (!ctx->kfc.new_sb_created) |
| 2065 | cgroup_put(&root->cgrp); | 2078 | cgroup_put(&ctx->root->cgrp); |
| 2066 | 2079 | ||
| 2067 | return dentry; | 2080 | return ret; |
| 2068 | } | 2081 | } |
| 2069 | 2082 | ||
| 2070 | static struct dentry *cgroup_mount(struct file_system_type *fs_type, | 2083 | /* |
| 2071 | int flags, const char *unused_dev_name, | 2084 | * Destroy a cgroup filesystem context. |
| 2072 | void *data) | 2085 | */ |
| 2086 | static void cgroup_fs_context_free(struct fs_context *fc) | ||
| 2073 | { | 2087 | { |
| 2074 | struct cgroup_namespace *ns = current->nsproxy->cgroup_ns; | 2088 | struct cgroup_fs_context *ctx = cgroup_fc2context(fc); |
| 2075 | struct dentry *dentry; | 2089 | |
| 2090 | kfree(ctx->name); | ||
| 2091 | kfree(ctx->release_agent); | ||
| 2092 | put_cgroup_ns(ctx->ns); | ||
| 2093 | kernfs_free_fs_context(fc); | ||
| 2094 | kfree(ctx); | ||
| 2095 | } | ||
| 2096 | |||
| 2097 | static int cgroup_get_tree(struct fs_context *fc) | ||
| 2098 | { | ||
| 2099 | struct cgroup_fs_context *ctx = cgroup_fc2context(fc); | ||
| 2076 | int ret; | 2100 | int ret; |
| 2077 | 2101 | ||
| 2078 | get_cgroup_ns(ns); | 2102 | cgrp_dfl_visible = true; |
| 2103 | cgroup_get_live(&cgrp_dfl_root.cgrp); | ||
| 2104 | ctx->root = &cgrp_dfl_root; | ||
| 2079 | 2105 | ||
| 2080 | /* Check if the caller has permission to mount. */ | 2106 | ret = cgroup_do_get_tree(fc); |
| 2081 | if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN)) { | 2107 | if (!ret) |
| 2082 | put_cgroup_ns(ns); | 2108 | apply_cgroup_root_flags(ctx->flags); |
| 2083 | return ERR_PTR(-EPERM); | 2109 | return ret; |
| 2084 | } | 2110 | } |
| 2111 | |||
| 2112 | static const struct fs_context_operations cgroup_fs_context_ops = { | ||
| 2113 | .free = cgroup_fs_context_free, | ||
| 2114 | .parse_param = cgroup2_parse_param, | ||
| 2115 | .get_tree = cgroup_get_tree, | ||
| 2116 | .reconfigure = cgroup_reconfigure, | ||
| 2117 | }; | ||
| 2118 | |||
| 2119 | static const struct fs_context_operations cgroup1_fs_context_ops = { | ||
| 2120 | .free = cgroup_fs_context_free, | ||
| 2121 | .parse_param = cgroup1_parse_param, | ||
| 2122 | .get_tree = cgroup1_get_tree, | ||
| 2123 | .reconfigure = cgroup1_reconfigure, | ||
| 2124 | }; | ||
| 2125 | |||
| 2126 | /* | ||
| 2127 | * Initialise the cgroup filesystem creation/reconfiguration context. Notably, | ||
| 2128 | * we select the namespace we're going to use. | ||
| 2129 | */ | ||
| 2130 | static int cgroup_init_fs_context(struct fs_context *fc) | ||
| 2131 | { | ||
| 2132 | struct cgroup_fs_context *ctx; | ||
| 2133 | |||
| 2134 | ctx = kzalloc(sizeof(struct cgroup_fs_context), GFP_KERNEL); | ||
| 2135 | if (!ctx) | ||
| 2136 | return -ENOMEM; | ||
| 2085 | 2137 | ||
| 2086 | /* | 2138 | /* |
| 2087 | * The first time anyone tries to mount a cgroup, enable the list | 2139 | * The first time anyone tries to mount a cgroup, enable the list |
| @@ -2090,29 +2142,18 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, | |||
| 2090 | if (!use_task_css_set_links) | 2142 | if (!use_task_css_set_links) |
| 2091 | cgroup_enable_task_cg_lists(); | 2143 | cgroup_enable_task_cg_lists(); |
| 2092 | 2144 | ||
| 2093 | if (fs_type == &cgroup2_fs_type) { | 2145 | ctx->ns = current->nsproxy->cgroup_ns; |
| 2094 | unsigned int root_flags; | 2146 | get_cgroup_ns(ctx->ns); |
| 2095 | 2147 | fc->fs_private = &ctx->kfc; | |
| 2096 | ret = parse_cgroup_root_flags(data, &root_flags); | 2148 | if (fc->fs_type == &cgroup2_fs_type) |
| 2097 | if (ret) { | 2149 | fc->ops = &cgroup_fs_context_ops; |
| 2098 | put_cgroup_ns(ns); | 2150 | else |
| 2099 | return ERR_PTR(ret); | 2151 | fc->ops = &cgroup1_fs_context_ops; |
| 2100 | } | 2152 | if (fc->user_ns) |
| 2101 | 2153 | put_user_ns(fc->user_ns); | |
| 2102 | cgrp_dfl_visible = true; | 2154 | fc->user_ns = get_user_ns(ctx->ns->user_ns); |
| 2103 | cgroup_get_live(&cgrp_dfl_root.cgrp); | 2155 | fc->global = true; |
| 2104 | 2156 | return 0; | |
| 2105 | dentry = cgroup_do_mount(&cgroup2_fs_type, flags, &cgrp_dfl_root, | ||
| 2106 | CGROUP2_SUPER_MAGIC, ns); | ||
| 2107 | if (!IS_ERR(dentry)) | ||
| 2108 | apply_cgroup_root_flags(root_flags); | ||
| 2109 | } else { | ||
| 2110 | dentry = cgroup1_mount(&cgroup_fs_type, flags, data, | ||
| 2111 | CGROUP_SUPER_MAGIC, ns); | ||
| 2112 | } | ||
| 2113 | |||
| 2114 | put_cgroup_ns(ns); | ||
| 2115 | return dentry; | ||
| 2116 | } | 2157 | } |
| 2117 | 2158 | ||
| 2118 | static void cgroup_kill_sb(struct super_block *sb) | 2159 | static void cgroup_kill_sb(struct super_block *sb) |
| @@ -2135,17 +2176,19 @@ static void cgroup_kill_sb(struct super_block *sb) | |||
| 2135 | } | 2176 | } |
| 2136 | 2177 | ||
| 2137 | struct file_system_type cgroup_fs_type = { | 2178 | struct file_system_type cgroup_fs_type = { |
| 2138 | .name = "cgroup", | 2179 | .name = "cgroup", |
| 2139 | .mount = cgroup_mount, | 2180 | .init_fs_context = cgroup_init_fs_context, |
| 2140 | .kill_sb = cgroup_kill_sb, | 2181 | .parameters = &cgroup1_fs_parameters, |
| 2141 | .fs_flags = FS_USERNS_MOUNT, | 2182 | .kill_sb = cgroup_kill_sb, |
| 2183 | .fs_flags = FS_USERNS_MOUNT, | ||
| 2142 | }; | 2184 | }; |
| 2143 | 2185 | ||
| 2144 | static struct file_system_type cgroup2_fs_type = { | 2186 | static struct file_system_type cgroup2_fs_type = { |
| 2145 | .name = "cgroup2", | 2187 | .name = "cgroup2", |
| 2146 | .mount = cgroup_mount, | 2188 | .init_fs_context = cgroup_init_fs_context, |
| 2147 | .kill_sb = cgroup_kill_sb, | 2189 | .parameters = &cgroup2_fs_parameters, |
| 2148 | .fs_flags = FS_USERNS_MOUNT, | 2190 | .kill_sb = cgroup_kill_sb, |
| 2191 | .fs_flags = FS_USERNS_MOUNT, | ||
| 2149 | }; | 2192 | }; |
| 2150 | 2193 | ||
| 2151 | int cgroup_path_ns_locked(struct cgroup *cgrp, char *buf, size_t buflen, | 2194 | int cgroup_path_ns_locked(struct cgroup *cgrp, char *buf, size_t buflen, |
| @@ -5280,7 +5323,6 @@ int cgroup_rmdir(struct kernfs_node *kn) | |||
| 5280 | 5323 | ||
| 5281 | static struct kernfs_syscall_ops cgroup_kf_syscall_ops = { | 5324 | static struct kernfs_syscall_ops cgroup_kf_syscall_ops = { |
| 5282 | .show_options = cgroup_show_options, | 5325 | .show_options = cgroup_show_options, |
| 5283 | .remount_fs = cgroup_remount, | ||
| 5284 | .mkdir = cgroup_mkdir, | 5326 | .mkdir = cgroup_mkdir, |
| 5285 | .rmdir = cgroup_rmdir, | 5327 | .rmdir = cgroup_rmdir, |
| 5286 | .show_path = cgroup_show_path, | 5328 | .show_path = cgroup_show_path, |
| @@ -5347,11 +5389,12 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss, bool early) | |||
| 5347 | */ | 5389 | */ |
| 5348 | int __init cgroup_init_early(void) | 5390 | int __init cgroup_init_early(void) |
| 5349 | { | 5391 | { |
| 5350 | static struct cgroup_sb_opts __initdata opts; | 5392 | static struct cgroup_fs_context __initdata ctx; |
| 5351 | struct cgroup_subsys *ss; | 5393 | struct cgroup_subsys *ss; |
| 5352 | int i; | 5394 | int i; |
| 5353 | 5395 | ||
| 5354 | init_cgroup_root(&cgrp_dfl_root, &opts); | 5396 | ctx.root = &cgrp_dfl_root; |
| 5397 | init_cgroup_root(&ctx); | ||
| 5355 | cgrp_dfl_root.cgrp.self.flags |= CSS_NO_REF; | 5398 | cgrp_dfl_root.cgrp.self.flags |= CSS_NO_REF; |
| 5356 | 5399 | ||
| 5357 | RCU_INIT_POINTER(init_task.cgroups, &init_css_set); | 5400 | RCU_INIT_POINTER(init_task.cgroups, &init_css_set); |
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index 72afd55f70c6..4834c4214e9c 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c | |||
| @@ -39,6 +39,7 @@ | |||
| 39 | #include <linux/memory.h> | 39 | #include <linux/memory.h> |
| 40 | #include <linux/export.h> | 40 | #include <linux/export.h> |
| 41 | #include <linux/mount.h> | 41 | #include <linux/mount.h> |
| 42 | #include <linux/fs_context.h> | ||
| 42 | #include <linux/namei.h> | 43 | #include <linux/namei.h> |
| 43 | #include <linux/pagemap.h> | 44 | #include <linux/pagemap.h> |
| 44 | #include <linux/proc_fs.h> | 45 | #include <linux/proc_fs.h> |
| @@ -359,25 +360,52 @@ static inline bool is_in_v2_mode(void) | |||
| 359 | * users. If someone tries to mount the "cpuset" filesystem, we | 360 | * users. If someone tries to mount the "cpuset" filesystem, we |
| 360 | * silently switch it to mount "cgroup" instead | 361 | * silently switch it to mount "cgroup" instead |
| 361 | */ | 362 | */ |
| 362 | static struct dentry *cpuset_mount(struct file_system_type *fs_type, | 363 | static int cpuset_get_tree(struct fs_context *fc) |
| 363 | int flags, const char *unused_dev_name, void *data) | 364 | { |
| 364 | { | 365 | struct file_system_type *cgroup_fs; |
| 365 | struct file_system_type *cgroup_fs = get_fs_type("cgroup"); | 366 | struct fs_context *new_fc; |
| 366 | struct dentry *ret = ERR_PTR(-ENODEV); | 367 | int ret; |
| 367 | if (cgroup_fs) { | 368 | |
| 368 | char mountopts[] = | 369 | cgroup_fs = get_fs_type("cgroup"); |
| 369 | "cpuset,noprefix," | 370 | if (!cgroup_fs) |
| 370 | "release_agent=/sbin/cpuset_release_agent"; | 371 | return -ENODEV; |
| 371 | ret = cgroup_fs->mount(cgroup_fs, flags, | 372 | |
| 372 | unused_dev_name, mountopts); | 373 | new_fc = fs_context_for_mount(cgroup_fs, fc->sb_flags); |
| 373 | put_filesystem(cgroup_fs); | 374 | if (IS_ERR(new_fc)) { |
| 375 | ret = PTR_ERR(new_fc); | ||
| 376 | } else { | ||
| 377 | static const char agent_path[] = "/sbin/cpuset_release_agent"; | ||
| 378 | ret = vfs_parse_fs_string(new_fc, "cpuset", NULL, 0); | ||
| 379 | if (!ret) | ||
| 380 | ret = vfs_parse_fs_string(new_fc, "noprefix", NULL, 0); | ||
| 381 | if (!ret) | ||
| 382 | ret = vfs_parse_fs_string(new_fc, "release_agent", | ||
| 383 | agent_path, sizeof(agent_path) - 1); | ||
| 384 | if (!ret) | ||
| 385 | ret = vfs_get_tree(new_fc); | ||
| 386 | if (!ret) { /* steal the result */ | ||
| 387 | fc->root = new_fc->root; | ||
| 388 | new_fc->root = NULL; | ||
| 389 | } | ||
| 390 | put_fs_context(new_fc); | ||
| 374 | } | 391 | } |
| 392 | put_filesystem(cgroup_fs); | ||
| 375 | return ret; | 393 | return ret; |
| 376 | } | 394 | } |
| 377 | 395 | ||
| 396 | static const struct fs_context_operations cpuset_fs_context_ops = { | ||
| 397 | .get_tree = cpuset_get_tree, | ||
| 398 | }; | ||
| 399 | |||
| 400 | static int cpuset_init_fs_context(struct fs_context *fc) | ||
| 401 | { | ||
| 402 | fc->ops = &cpuset_fs_context_ops; | ||
| 403 | return 0; | ||
| 404 | } | ||
| 405 | |||
| 378 | static struct file_system_type cpuset_fs_type = { | 406 | static struct file_system_type cpuset_fs_type = { |
| 379 | .name = "cpuset", | 407 | .name = "cpuset", |
| 380 | .mount = cpuset_mount, | 408 | .init_fs_context = cpuset_init_fs_context, |
| 381 | }; | 409 | }; |
| 382 | 410 | ||
| 383 | /* | 411 | /* |
diff --git a/security/security.c b/security/security.c index 301b141b9a32..23cbb1a295a3 100644 --- a/security/security.c +++ b/security/security.c | |||
| @@ -764,6 +764,16 @@ void security_bprm_committed_creds(struct linux_binprm *bprm) | |||
| 764 | call_void_hook(bprm_committed_creds, bprm); | 764 | call_void_hook(bprm_committed_creds, bprm); |
| 765 | } | 765 | } |
| 766 | 766 | ||
| 767 | int security_fs_context_dup(struct fs_context *fc, struct fs_context *src_fc) | ||
| 768 | { | ||
| 769 | return call_int_hook(fs_context_dup, 0, fc, src_fc); | ||
| 770 | } | ||
| 771 | |||
| 772 | int security_fs_context_parse_param(struct fs_context *fc, struct fs_parameter *param) | ||
| 773 | { | ||
| 774 | return call_int_hook(fs_context_parse_param, -ENOPARAM, fc, param); | ||
| 775 | } | ||
| 776 | |||
| 767 | int security_sb_alloc(struct super_block *sb) | 777 | int security_sb_alloc(struct super_block *sb) |
| 768 | { | 778 | { |
| 769 | return call_int_hook(sb_alloc_security, 0, sb); | 779 | return call_int_hook(sb_alloc_security, 0, sb); |
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 333606b3a8ef..c5363f0c67ef 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c | |||
| @@ -48,6 +48,8 @@ | |||
| 48 | #include <linux/fdtable.h> | 48 | #include <linux/fdtable.h> |
| 49 | #include <linux/namei.h> | 49 | #include <linux/namei.h> |
| 50 | #include <linux/mount.h> | 50 | #include <linux/mount.h> |
| 51 | #include <linux/fs_context.h> | ||
| 52 | #include <linux/fs_parser.h> | ||
| 51 | #include <linux/netfilter_ipv4.h> | 53 | #include <linux/netfilter_ipv4.h> |
| 52 | #include <linux/netfilter_ipv6.h> | 54 | #include <linux/netfilter_ipv6.h> |
| 53 | #include <linux/tty.h> | 55 | #include <linux/tty.h> |
| @@ -410,11 +412,11 @@ static inline int inode_doinit(struct inode *inode) | |||
| 410 | 412 | ||
| 411 | enum { | 413 | enum { |
| 412 | Opt_error = -1, | 414 | Opt_error = -1, |
| 413 | Opt_context = 1, | 415 | Opt_context = 0, |
| 416 | Opt_defcontext = 1, | ||
| 414 | Opt_fscontext = 2, | 417 | Opt_fscontext = 2, |
| 415 | Opt_defcontext = 3, | 418 | Opt_rootcontext = 3, |
| 416 | Opt_rootcontext = 4, | 419 | Opt_seclabel = 4, |
| 417 | Opt_seclabel = 5, | ||
| 418 | }; | 420 | }; |
| 419 | 421 | ||
| 420 | #define A(s, has_arg) {#s, sizeof(#s) - 1, Opt_##s, has_arg} | 422 | #define A(s, has_arg) {#s, sizeof(#s) - 1, Opt_##s, has_arg} |
| @@ -1067,6 +1069,7 @@ static int show_sid(struct seq_file *m, u32 sid) | |||
| 1067 | if (!rc) { | 1069 | if (!rc) { |
| 1068 | bool has_comma = context && strchr(context, ','); | 1070 | bool has_comma = context && strchr(context, ','); |
| 1069 | 1071 | ||
| 1072 | seq_putc(m, '='); | ||
| 1070 | if (has_comma) | 1073 | if (has_comma) |
| 1071 | seq_putc(m, '\"'); | 1074 | seq_putc(m, '\"'); |
| 1072 | seq_escape(m, context, "\"\n\\"); | 1075 | seq_escape(m, context, "\"\n\\"); |
| @@ -1120,7 +1123,7 @@ static int selinux_sb_show_options(struct seq_file *m, struct super_block *sb) | |||
| 1120 | } | 1123 | } |
| 1121 | if (sbsec->flags & SBLABEL_MNT) { | 1124 | if (sbsec->flags & SBLABEL_MNT) { |
| 1122 | seq_putc(m, ','); | 1125 | seq_putc(m, ','); |
| 1123 | seq_puts(m, LABELSUPP_STR); | 1126 | seq_puts(m, SECLABEL_STR); |
| 1124 | } | 1127 | } |
| 1125 | return 0; | 1128 | return 0; |
| 1126 | } | 1129 | } |
| @@ -2739,6 +2742,76 @@ static int selinux_umount(struct vfsmount *mnt, int flags) | |||
| 2739 | FILESYSTEM__UNMOUNT, NULL); | 2742 | FILESYSTEM__UNMOUNT, NULL); |
| 2740 | } | 2743 | } |
| 2741 | 2744 | ||
| 2745 | static int selinux_fs_context_dup(struct fs_context *fc, | ||
| 2746 | struct fs_context *src_fc) | ||
| 2747 | { | ||
| 2748 | const struct selinux_mnt_opts *src = src_fc->security; | ||
| 2749 | struct selinux_mnt_opts *opts; | ||
| 2750 | |||
| 2751 | if (!src) | ||
| 2752 | return 0; | ||
| 2753 | |||
| 2754 | fc->security = kzalloc(sizeof(struct selinux_mnt_opts), GFP_KERNEL); | ||
| 2755 | if (!fc->security) | ||
| 2756 | return -ENOMEM; | ||
| 2757 | |||
| 2758 | opts = fc->security; | ||
| 2759 | |||
| 2760 | if (src->fscontext) { | ||
| 2761 | opts->fscontext = kstrdup(src->fscontext, GFP_KERNEL); | ||
| 2762 | if (!opts->fscontext) | ||
| 2763 | return -ENOMEM; | ||
| 2764 | } | ||
| 2765 | if (src->context) { | ||
| 2766 | opts->context = kstrdup(src->context, GFP_KERNEL); | ||
| 2767 | if (!opts->context) | ||
| 2768 | return -ENOMEM; | ||
| 2769 | } | ||
| 2770 | if (src->rootcontext) { | ||
| 2771 | opts->rootcontext = kstrdup(src->rootcontext, GFP_KERNEL); | ||
| 2772 | if (!opts->rootcontext) | ||
| 2773 | return -ENOMEM; | ||
| 2774 | } | ||
| 2775 | if (src->defcontext) { | ||
| 2776 | opts->defcontext = kstrdup(src->defcontext, GFP_KERNEL); | ||
| 2777 | if (!opts->defcontext) | ||
| 2778 | return -ENOMEM; | ||
| 2779 | } | ||
| 2780 | return 0; | ||
| 2781 | } | ||
| 2782 | |||
| 2783 | static const struct fs_parameter_spec selinux_param_specs[] = { | ||
| 2784 | fsparam_string(CONTEXT_STR, Opt_context), | ||
| 2785 | fsparam_string(DEFCONTEXT_STR, Opt_defcontext), | ||
| 2786 | fsparam_string(FSCONTEXT_STR, Opt_fscontext), | ||
| 2787 | fsparam_string(ROOTCONTEXT_STR, Opt_rootcontext), | ||
| 2788 | fsparam_flag (SECLABEL_STR, Opt_seclabel), | ||
| 2789 | {} | ||
| 2790 | }; | ||
| 2791 | |||
| 2792 | static const struct fs_parameter_description selinux_fs_parameters = { | ||
| 2793 | .name = "SELinux", | ||
| 2794 | .specs = selinux_param_specs, | ||
| 2795 | }; | ||
| 2796 | |||
| 2797 | static int selinux_fs_context_parse_param(struct fs_context *fc, | ||
| 2798 | struct fs_parameter *param) | ||
| 2799 | { | ||
| 2800 | struct fs_parse_result result; | ||
| 2801 | int opt, rc; | ||
| 2802 | |||
| 2803 | opt = fs_parse(fc, &selinux_fs_parameters, param, &result); | ||
| 2804 | if (opt < 0) | ||
| 2805 | return opt; | ||
| 2806 | |||
| 2807 | rc = selinux_add_opt(opt, param->string, &fc->security); | ||
| 2808 | if (!rc) { | ||
| 2809 | param->string = NULL; | ||
| 2810 | rc = 1; | ||
| 2811 | } | ||
| 2812 | return rc; | ||
| 2813 | } | ||
| 2814 | |||
| 2742 | /* inode security operations */ | 2815 | /* inode security operations */ |
| 2743 | 2816 | ||
| 2744 | static int selinux_inode_alloc_security(struct inode *inode) | 2817 | static int selinux_inode_alloc_security(struct inode *inode) |
| @@ -6592,6 +6665,9 @@ static struct security_hook_list selinux_hooks[] __lsm_ro_after_init = { | |||
| 6592 | LSM_HOOK_INIT(bprm_committing_creds, selinux_bprm_committing_creds), | 6665 | LSM_HOOK_INIT(bprm_committing_creds, selinux_bprm_committing_creds), |
| 6593 | LSM_HOOK_INIT(bprm_committed_creds, selinux_bprm_committed_creds), | 6666 | LSM_HOOK_INIT(bprm_committed_creds, selinux_bprm_committed_creds), |
| 6594 | 6667 | ||
| 6668 | LSM_HOOK_INIT(fs_context_dup, selinux_fs_context_dup), | ||
| 6669 | LSM_HOOK_INIT(fs_context_parse_param, selinux_fs_context_parse_param), | ||
| 6670 | |||
| 6595 | LSM_HOOK_INIT(sb_alloc_security, selinux_sb_alloc_security), | 6671 | LSM_HOOK_INIT(sb_alloc_security, selinux_sb_alloc_security), |
| 6596 | LSM_HOOK_INIT(sb_free_security, selinux_sb_free_security), | 6672 | LSM_HOOK_INIT(sb_free_security, selinux_sb_free_security), |
| 6597 | LSM_HOOK_INIT(sb_eat_lsm_opts, selinux_sb_eat_lsm_opts), | 6673 | LSM_HOOK_INIT(sb_eat_lsm_opts, selinux_sb_eat_lsm_opts), |
| @@ -6837,6 +6913,8 @@ static __init int selinux_init(void) | |||
| 6837 | else | 6913 | else |
| 6838 | pr_debug("SELinux: Starting in permissive mode\n"); | 6914 | pr_debug("SELinux: Starting in permissive mode\n"); |
| 6839 | 6915 | ||
| 6916 | fs_validate_description(&selinux_fs_parameters); | ||
| 6917 | |||
| 6840 | return 0; | 6918 | return 0; |
| 6841 | } | 6919 | } |
| 6842 | 6920 | ||
diff --git a/security/selinux/include/security.h b/security/selinux/include/security.h index f68fb25b5702..b5b7c5aade8c 100644 --- a/security/selinux/include/security.h +++ b/security/selinux/include/security.h | |||
| @@ -59,11 +59,11 @@ | |||
| 59 | #define SE_SBPROC 0x0200 | 59 | #define SE_SBPROC 0x0200 |
| 60 | #define SE_SBGENFS 0x0400 | 60 | #define SE_SBGENFS 0x0400 |
| 61 | 61 | ||
| 62 | #define CONTEXT_STR "context=" | 62 | #define CONTEXT_STR "context" |
| 63 | #define FSCONTEXT_STR "fscontext=" | 63 | #define FSCONTEXT_STR "fscontext" |
| 64 | #define ROOTCONTEXT_STR "rootcontext=" | 64 | #define ROOTCONTEXT_STR "rootcontext" |
| 65 | #define DEFCONTEXT_STR "defcontext=" | 65 | #define DEFCONTEXT_STR "defcontext" |
| 66 | #define LABELSUPP_STR "seclabel" | 66 | #define SECLABEL_STR "seclabel" |
| 67 | 67 | ||
| 68 | struct netlbl_lsm_secattr; | 68 | struct netlbl_lsm_secattr; |
| 69 | 69 | ||
diff --git a/security/smack/smack.h b/security/smack/smack.h index 9c7c95a5c497..cf52af77d15e 100644 --- a/security/smack/smack.h +++ b/security/smack/smack.h | |||
| @@ -196,22 +196,13 @@ struct smack_known_list_elem { | |||
| 196 | 196 | ||
| 197 | enum { | 197 | enum { |
| 198 | Opt_error = -1, | 198 | Opt_error = -1, |
| 199 | Opt_fsdefault = 1, | 199 | Opt_fsdefault = 0, |
| 200 | Opt_fsfloor = 2, | 200 | Opt_fsfloor = 1, |
| 201 | Opt_fshat = 3, | 201 | Opt_fshat = 2, |
| 202 | Opt_fsroot = 4, | 202 | Opt_fsroot = 3, |
| 203 | Opt_fstransmute = 5, | 203 | Opt_fstransmute = 4, |
| 204 | }; | 204 | }; |
| 205 | 205 | ||
| 206 | /* | ||
| 207 | * Mount options | ||
| 208 | */ | ||
| 209 | #define SMK_FSDEFAULT "smackfsdef=" | ||
| 210 | #define SMK_FSFLOOR "smackfsfloor=" | ||
| 211 | #define SMK_FSHAT "smackfshat=" | ||
| 212 | #define SMK_FSROOT "smackfsroot=" | ||
| 213 | #define SMK_FSTRANS "smackfstransmute=" | ||
| 214 | |||
| 215 | #define SMACK_DELETE_OPTION "-DELETE" | 206 | #define SMACK_DELETE_OPTION "-DELETE" |
| 216 | #define SMACK_CIPSO_OPTION "-CIPSO" | 207 | #define SMACK_CIPSO_OPTION "-CIPSO" |
| 217 | 208 | ||
diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index 424bce4ef21d..5c1613519d5a 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c | |||
| @@ -43,6 +43,8 @@ | |||
| 43 | #include <linux/shm.h> | 43 | #include <linux/shm.h> |
| 44 | #include <linux/binfmts.h> | 44 | #include <linux/binfmts.h> |
| 45 | #include <linux/parser.h> | 45 | #include <linux/parser.h> |
| 46 | #include <linux/fs_context.h> | ||
| 47 | #include <linux/fs_parser.h> | ||
| 46 | #include "smack.h" | 48 | #include "smack.h" |
| 47 | 49 | ||
| 48 | #define TRANS_TRUE "TRUE" | 50 | #define TRANS_TRUE "TRUE" |
| @@ -526,7 +528,6 @@ static int smack_syslog(int typefrom_file) | |||
| 526 | return rc; | 528 | return rc; |
| 527 | } | 529 | } |
| 528 | 530 | ||
| 529 | |||
| 530 | /* | 531 | /* |
| 531 | * Superblock Hooks. | 532 | * Superblock Hooks. |
| 532 | */ | 533 | */ |
| @@ -631,6 +632,92 @@ out_opt_err: | |||
| 631 | return -EINVAL; | 632 | return -EINVAL; |
| 632 | } | 633 | } |
| 633 | 634 | ||
| 635 | /** | ||
| 636 | * smack_fs_context_dup - Duplicate the security data on fs_context duplication | ||
| 637 | * @fc: The new filesystem context. | ||
| 638 | * @src_fc: The source filesystem context being duplicated. | ||
| 639 | * | ||
| 640 | * Returns 0 on success or -ENOMEM on error. | ||
| 641 | */ | ||
| 642 | static int smack_fs_context_dup(struct fs_context *fc, | ||
| 643 | struct fs_context *src_fc) | ||
| 644 | { | ||
| 645 | struct smack_mnt_opts *dst, *src = src_fc->security; | ||
| 646 | |||
| 647 | if (!src) | ||
| 648 | return 0; | ||
| 649 | |||
| 650 | fc->security = kzalloc(sizeof(struct smack_mnt_opts), GFP_KERNEL); | ||
| 651 | if (!fc->security) | ||
| 652 | return -ENOMEM; | ||
| 653 | dst = fc->security; | ||
| 654 | |||
| 655 | if (src->fsdefault) { | ||
| 656 | dst->fsdefault = kstrdup(src->fsdefault, GFP_KERNEL); | ||
| 657 | if (!dst->fsdefault) | ||
| 658 | return -ENOMEM; | ||
| 659 | } | ||
| 660 | if (src->fsfloor) { | ||
| 661 | dst->fsfloor = kstrdup(src->fsfloor, GFP_KERNEL); | ||
| 662 | if (!dst->fsfloor) | ||
| 663 | return -ENOMEM; | ||
| 664 | } | ||
| 665 | if (src->fshat) { | ||
| 666 | dst->fshat = kstrdup(src->fshat, GFP_KERNEL); | ||
| 667 | if (!dst->fshat) | ||
| 668 | return -ENOMEM; | ||
| 669 | } | ||
| 670 | if (src->fsroot) { | ||
| 671 | dst->fsroot = kstrdup(src->fsroot, GFP_KERNEL); | ||
| 672 | if (!dst->fsroot) | ||
| 673 | return -ENOMEM; | ||
| 674 | } | ||
| 675 | if (src->fstransmute) { | ||
| 676 | dst->fstransmute = kstrdup(src->fstransmute, GFP_KERNEL); | ||
| 677 | if (!dst->fstransmute) | ||
| 678 | return -ENOMEM; | ||
| 679 | } | ||
| 680 | return 0; | ||
| 681 | } | ||
| 682 | |||
| 683 | static const struct fs_parameter_spec smack_param_specs[] = { | ||
| 684 | fsparam_string("fsdefault", Opt_fsdefault), | ||
| 685 | fsparam_string("fsfloor", Opt_fsfloor), | ||
| 686 | fsparam_string("fshat", Opt_fshat), | ||
| 687 | fsparam_string("fsroot", Opt_fsroot), | ||
| 688 | fsparam_string("fstransmute", Opt_fstransmute), | ||
| 689 | {} | ||
| 690 | }; | ||
| 691 | |||
| 692 | static const struct fs_parameter_description smack_fs_parameters = { | ||
| 693 | .name = "smack", | ||
| 694 | .specs = smack_param_specs, | ||
| 695 | }; | ||
| 696 | |||
| 697 | /** | ||
| 698 | * smack_fs_context_parse_param - Parse a single mount parameter | ||
| 699 | * @fc: The new filesystem context being constructed. | ||
| 700 | * @param: The parameter. | ||
| 701 | * | ||
| 702 | * Returns 0 on success, -ENOPARAM to pass the parameter on or anything else on | ||
| 703 | * error. | ||
| 704 | */ | ||
| 705 | static int smack_fs_context_parse_param(struct fs_context *fc, | ||
| 706 | struct fs_parameter *param) | ||
| 707 | { | ||
| 708 | struct fs_parse_result result; | ||
| 709 | int opt, rc; | ||
| 710 | |||
| 711 | opt = fs_parse(fc, &smack_fs_parameters, param, &result); | ||
| 712 | if (opt < 0) | ||
| 713 | return opt; | ||
| 714 | |||
| 715 | rc = smack_add_opt(opt, param->string, &fc->security); | ||
| 716 | if (!rc) | ||
| 717 | param->string = NULL; | ||
| 718 | return rc; | ||
| 719 | } | ||
| 720 | |||
| 634 | static int smack_sb_eat_lsm_opts(char *options, void **mnt_opts) | 721 | static int smack_sb_eat_lsm_opts(char *options, void **mnt_opts) |
| 635 | { | 722 | { |
| 636 | char *from = options, *to = options; | 723 | char *from = options, *to = options; |
| @@ -4495,6 +4582,9 @@ static struct security_hook_list smack_hooks[] __lsm_ro_after_init = { | |||
| 4495 | LSM_HOOK_INIT(ptrace_traceme, smack_ptrace_traceme), | 4582 | LSM_HOOK_INIT(ptrace_traceme, smack_ptrace_traceme), |
| 4496 | LSM_HOOK_INIT(syslog, smack_syslog), | 4583 | LSM_HOOK_INIT(syslog, smack_syslog), |
| 4497 | 4584 | ||
| 4585 | LSM_HOOK_INIT(fs_context_dup, smack_fs_context_dup), | ||
| 4586 | LSM_HOOK_INIT(fs_context_parse_param, smack_fs_context_parse_param), | ||
| 4587 | |||
| 4498 | LSM_HOOK_INIT(sb_alloc_security, smack_sb_alloc_security), | 4588 | LSM_HOOK_INIT(sb_alloc_security, smack_sb_alloc_security), |
| 4499 | LSM_HOOK_INIT(sb_free_security, smack_sb_free_security), | 4589 | LSM_HOOK_INIT(sb_free_security, smack_sb_free_security), |
| 4500 | LSM_HOOK_INIT(sb_free_mnt_opts, smack_free_mnt_opts), | 4590 | LSM_HOOK_INIT(sb_free_mnt_opts, smack_free_mnt_opts), |
