diff options
author | Nick Piggin <npiggin@kernel.dk> | 2011-01-07 01:49:58 -0500 |
---|---|---|
committer | Nick Piggin <npiggin@kernel.dk> | 2011-01-07 01:50:29 -0500 |
commit | b74c79e99389cd79b31fcc08f82c24e492e63c7e (patch) | |
tree | 763c6b412517306670bc625e90035f2d16bb739f /Documentation/filesystems | |
parent | 34286d6662308d82aed891852d04c7c3a2649b16 (diff) |
fs: provide rcu-walk aware permission i_ops
Signed-off-by: Nick Piggin <npiggin@kernel.dk>
Diffstat (limited to 'Documentation/filesystems')
-rw-r--r-- | Documentation/filesystems/Locking | 6 | ||||
-rw-r--r-- | Documentation/filesystems/path-lookup.txt | 44 | ||||
-rw-r--r-- | Documentation/filesystems/porting | 5 | ||||
-rw-r--r-- | Documentation/filesystems/vfs.txt | 10 |
4 files changed, 58 insertions, 7 deletions
diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index e90ffe61eb65..977d8919cc69 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking | |||
@@ -47,8 +47,8 @@ ata *); | |||
47 | void * (*follow_link) (struct dentry *, struct nameidata *); | 47 | void * (*follow_link) (struct dentry *, struct nameidata *); |
48 | void (*put_link) (struct dentry *, struct nameidata *, void *); | 48 | void (*put_link) (struct dentry *, struct nameidata *, void *); |
49 | void (*truncate) (struct inode *); | 49 | void (*truncate) (struct inode *); |
50 | int (*permission) (struct inode *, int, struct nameidata *); | 50 | int (*permission) (struct inode *, int, unsigned int); |
51 | int (*check_acl)(struct inode *, int); | 51 | int (*check_acl)(struct inode *, int, unsigned int); |
52 | int (*setattr) (struct dentry *, struct iattr *); | 52 | int (*setattr) (struct dentry *, struct iattr *); |
53 | int (*getattr) (struct vfsmount *, struct dentry *, struct kstat *); | 53 | int (*getattr) (struct vfsmount *, struct dentry *, struct kstat *); |
54 | int (*setxattr) (struct dentry *, const char *,const void *,size_t,int); | 54 | int (*setxattr) (struct dentry *, const char *,const void *,size_t,int); |
@@ -76,7 +76,7 @@ follow_link: no | |||
76 | put_link: no | 76 | put_link: no |
77 | truncate: yes (see below) | 77 | truncate: yes (see below) |
78 | setattr: yes | 78 | setattr: yes |
79 | permission: no | 79 | permission: no (may not block if called in rcu-walk mode) |
80 | check_acl: no | 80 | check_acl: no |
81 | getattr: no | 81 | getattr: no |
82 | setxattr: yes | 82 | setxattr: yes |
diff --git a/Documentation/filesystems/path-lookup.txt b/Documentation/filesystems/path-lookup.txt index 8789d1810bed..eb59c8b44be9 100644 --- a/Documentation/filesystems/path-lookup.txt +++ b/Documentation/filesystems/path-lookup.txt | |||
@@ -316,11 +316,9 @@ The detailed design for rcu-walk is like this: | |||
316 | 316 | ||
317 | The cases where rcu-walk cannot continue are: | 317 | The cases where rcu-walk cannot continue are: |
318 | * NULL dentry (ie. any uncached path element) | 318 | * NULL dentry (ie. any uncached path element) |
319 | * parent with d_inode->i_op->permission or ACLs | ||
320 | * Following links | 319 | * Following links |
321 | 320 | ||
322 | In future patches, permission checks become rcu-walk aware. It may be possible | 321 | It may be possible eventually to make following links rcu-walk aware. |
323 | eventually to make following links rcu-walk aware. | ||
324 | 322 | ||
325 | Uncached path elements will always require dropping to ref-walk mode, at the | 323 | Uncached path elements will always require dropping to ref-walk mode, at the |
326 | very least because i_mutex needs to be grabbed, and objects allocated. | 324 | very least because i_mutex needs to be grabbed, and objects allocated. |
@@ -336,9 +334,49 @@ or stored into. The result is massive improvements in performance and | |||
336 | scalability of path resolution. | 334 | scalability of path resolution. |
337 | 335 | ||
338 | 336 | ||
337 | Interesting statistics | ||
338 | ====================== | ||
339 | |||
340 | The following table gives rcu lookup statistics for a few simple workloads | ||
341 | (2s12c24t Westmere, debian non-graphical system). Ungraceful are attempts to | ||
342 | drop rcu that fail due to d_seq failure and requiring the entire path lookup | ||
343 | again. Other cases are successful rcu-drops that are required before the final | ||
344 | element, nodentry for missing dentry, revalidate for filesystem revalidate | ||
345 | routine requiring rcu drop, permission for permission check requiring drop, | ||
346 | and link for symlink traversal requiring drop. | ||
347 | |||
348 | rcu-lookups restart nodentry link revalidate permission | ||
349 | bootup 47121 0 4624 1010 10283 7852 | ||
350 | dbench 25386793 0 6778659(26.7%) 55 549 1156 | ||
351 | kbuild 2696672 10 64442(2.3%) 108764(4.0%) 1 1590 | ||
352 | git diff 39605 0 28 2 0 106 | ||
353 | vfstest 24185492 4945 708725(2.9%) 1076136(4.4%) 0 2651 | ||
354 | |||
355 | What this shows is that failed rcu-walk lookups, ie. ones that are restarted | ||
356 | entirely with ref-walk, are quite rare. Even the "vfstest" case which | ||
357 | specifically has concurrent renames/mkdir/rmdir/ creat/unlink/etc to excercise | ||
358 | such races is not showing a huge amount of restarts. | ||
359 | |||
360 | Dropping from rcu-walk to ref-walk mean that we have encountered a dentry where | ||
361 | the reference count needs to be taken for some reason. This is either because | ||
362 | we have reached the target of the path walk, or because we have encountered a | ||
363 | condition that can't be resolved in rcu-walk mode. Ideally, we drop rcu-walk | ||
364 | only when we have reached the target dentry, so the other statistics show where | ||
365 | this does not happen. | ||
366 | |||
367 | Note that a graceful drop from rcu-walk mode due to something such as the | ||
368 | dentry not existing (which can be common) is not necessarily a failure of | ||
369 | rcu-walk scheme, because some elements of the path may have been walked in | ||
370 | rcu-walk mode. The further we get from common path elements (such as cwd or | ||
371 | root), the less contended the dentry is likely to be. The closer we are to | ||
372 | common path elements, the more likely they will exist in dentry cache. | ||
373 | |||
374 | |||
339 | Papers and other documentation on dcache locking | 375 | Papers and other documentation on dcache locking |
340 | ================================================ | 376 | ================================================ |
341 | 377 | ||
342 | 1. Scaling dcache with RCU (http://linuxjournal.com/article.php?sid=7124). | 378 | 1. Scaling dcache with RCU (http://linuxjournal.com/article.php?sid=7124). |
343 | 379 | ||
344 | 2. http://lse.sourceforge.net/locking/dcache/dcache.html | 380 | 2. http://lse.sourceforge.net/locking/dcache/dcache.html |
381 | |||
382 | |||
diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting index cd9756a2709d..07a32b42cf9c 100644 --- a/Documentation/filesystems/porting +++ b/Documentation/filesystems/porting | |||
@@ -380,3 +380,8 @@ the filesystem provides it), which requires dropping out of rcu-walk mode. This | |||
380 | may now be called in rcu-walk mode (nd->flags & LOOKUP_RCU). -ECHILD should be | 380 | may now be called in rcu-walk mode (nd->flags & LOOKUP_RCU). -ECHILD should be |
381 | returned if the filesystem cannot handle rcu-walk. See | 381 | returned if the filesystem cannot handle rcu-walk. See |
382 | Documentation/filesystems/vfs.txt for more details. | 382 | Documentation/filesystems/vfs.txt for more details. |
383 | |||
384 | permission and check_acl are inode permission checks that are called | ||
385 | on many or all directory inodes on the way down a path walk (to check for | ||
386 | exec permission). These must now be rcu-walk aware (flags & IPERM_RCU). See | ||
387 | Documentation/filesystems/vfs.txt for more details. | ||
diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt index c936b4912383..fbb324e2bd43 100644 --- a/Documentation/filesystems/vfs.txt +++ b/Documentation/filesystems/vfs.txt | |||
@@ -325,7 +325,8 @@ struct inode_operations { | |||
325 | void * (*follow_link) (struct dentry *, struct nameidata *); | 325 | void * (*follow_link) (struct dentry *, struct nameidata *); |
326 | void (*put_link) (struct dentry *, struct nameidata *, void *); | 326 | void (*put_link) (struct dentry *, struct nameidata *, void *); |
327 | void (*truncate) (struct inode *); | 327 | void (*truncate) (struct inode *); |
328 | int (*permission) (struct inode *, int, struct nameidata *); | 328 | int (*permission) (struct inode *, int, unsigned int); |
329 | int (*check_acl)(struct inode *, int, unsigned int); | ||
329 | int (*setattr) (struct dentry *, struct iattr *); | 330 | int (*setattr) (struct dentry *, struct iattr *); |
330 | int (*getattr) (struct vfsmount *mnt, struct dentry *, struct kstat *); | 331 | int (*getattr) (struct vfsmount *mnt, struct dentry *, struct kstat *); |
331 | int (*setxattr) (struct dentry *, const char *,const void *,size_t,int); | 332 | int (*setxattr) (struct dentry *, const char *,const void *,size_t,int); |
@@ -414,6 +415,13 @@ otherwise noted. | |||
414 | permission: called by the VFS to check for access rights on a POSIX-like | 415 | permission: called by the VFS to check for access rights on a POSIX-like |
415 | filesystem. | 416 | filesystem. |
416 | 417 | ||
418 | May be called in rcu-walk mode (flags & IPERM_RCU). If in rcu-walk | ||
419 | mode, the filesystem must check the permission without blocking or | ||
420 | storing to the inode. | ||
421 | |||
422 | If a situation is encountered that rcu-walk cannot handle, return | ||
423 | -ECHILD and it will be called again in ref-walk mode. | ||
424 | |||
417 | setattr: called by the VFS to set attributes for a file. This method | 425 | setattr: called by the VFS to set attributes for a file. This method |
418 | is called by chmod(2) and related system calls. | 426 | is called by chmod(2) and related system calls. |
419 | 427 | ||