diff options
Diffstat (limited to 'Documentation')
-rw-r--r-- | Documentation/filesystems/Locking | 22 | ||||
-rw-r--r-- | Documentation/filesystems/porting | 45 |
2 files changed, 57 insertions, 10 deletions
diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index 96d4293607ec..bbcc15651a21 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking | |||
@@ -92,8 +92,8 @@ prototypes: | |||
92 | void (*destroy_inode)(struct inode *); | 92 | void (*destroy_inode)(struct inode *); |
93 | void (*dirty_inode) (struct inode *); | 93 | void (*dirty_inode) (struct inode *); |
94 | int (*write_inode) (struct inode *, int); | 94 | int (*write_inode) (struct inode *, int); |
95 | void (*drop_inode) (struct inode *); | 95 | int (*drop_inode) (struct inode *); |
96 | void (*delete_inode) (struct inode *); | 96 | void (*evict_inode) (struct inode *); |
97 | void (*put_super) (struct super_block *); | 97 | void (*put_super) (struct super_block *); |
98 | void (*write_super) (struct super_block *); | 98 | void (*write_super) (struct super_block *); |
99 | int (*sync_fs)(struct super_block *sb, int wait); | 99 | int (*sync_fs)(struct super_block *sb, int wait); |
@@ -101,14 +101,13 @@ prototypes: | |||
101 | int (*unfreeze_fs) (struct super_block *); | 101 | int (*unfreeze_fs) (struct super_block *); |
102 | int (*statfs) (struct dentry *, struct kstatfs *); | 102 | int (*statfs) (struct dentry *, struct kstatfs *); |
103 | int (*remount_fs) (struct super_block *, int *, char *); | 103 | int (*remount_fs) (struct super_block *, int *, char *); |
104 | void (*clear_inode) (struct inode *); | ||
105 | void (*umount_begin) (struct super_block *); | 104 | void (*umount_begin) (struct super_block *); |
106 | int (*show_options)(struct seq_file *, struct vfsmount *); | 105 | int (*show_options)(struct seq_file *, struct vfsmount *); |
107 | ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t); | 106 | ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t); |
108 | ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t); | 107 | ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t); |
109 | 108 | ||
110 | locking rules: | 109 | locking rules: |
111 | All may block. | 110 | All may block [not true, see below] |
112 | None have BKL | 111 | None have BKL |
113 | s_umount | 112 | s_umount |
114 | alloc_inode: | 113 | alloc_inode: |
@@ -116,22 +115,25 @@ destroy_inode: | |||
116 | dirty_inode: (must not sleep) | 115 | dirty_inode: (must not sleep) |
117 | write_inode: | 116 | write_inode: |
118 | drop_inode: !!!inode_lock!!! | 117 | drop_inode: !!!inode_lock!!! |
119 | delete_inode: | 118 | evict_inode: |
120 | put_super: write | 119 | put_super: write |
121 | write_super: read | 120 | write_super: read |
122 | sync_fs: read | 121 | sync_fs: read |
123 | freeze_fs: read | 122 | freeze_fs: read |
124 | unfreeze_fs: read | 123 | unfreeze_fs: read |
125 | statfs: no | 124 | statfs: maybe(read) (see below) |
126 | remount_fs: maybe (see below) | 125 | remount_fs: write |
127 | clear_inode: | ||
128 | umount_begin: no | 126 | umount_begin: no |
129 | show_options: no (namespace_sem) | 127 | show_options: no (namespace_sem) |
130 | quota_read: no (see below) | 128 | quota_read: no (see below) |
131 | quota_write: no (see below) | 129 | quota_write: no (see below) |
132 | 130 | ||
133 | ->remount_fs() will have the s_umount exclusive lock if it's already mounted. | 131 | ->statfs() has s_umount (shared) when called by ustat(2) (native or |
134 | When called from get_sb_single, it does NOT have the s_umount lock. | 132 | compat), but that's an accident of bad API; s_umount is used to pin |
133 | the superblock down when we only have dev_t given us by userland to | ||
134 | identify the superblock. Everything else (statfs(), fstatfs(), etc.) | ||
135 | doesn't hold it when calling ->statfs() - superblock is pinned down | ||
136 | by resolving the pathname passed to syscall. | ||
135 | ->quota_read() and ->quota_write() functions are both guaranteed to | 137 | ->quota_read() and ->quota_write() functions are both guaranteed to |
136 | be the only ones operating on the quota file by the quota code (via | 138 | be the only ones operating on the quota file by the quota code (via |
137 | dqio_sem) (unless an admin really wants to screw up something and | 139 | dqio_sem) (unless an admin really wants to screw up something and |
diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting index a7e9746ee7ea..b12c89538680 100644 --- a/Documentation/filesystems/porting +++ b/Documentation/filesystems/porting | |||
@@ -273,3 +273,48 @@ it's safe to remove it. If you don't need it, remove it. | |||
273 | deliberate; as soon as struct block_device * is propagated in a reasonable | 273 | deliberate; as soon as struct block_device * is propagated in a reasonable |
274 | way by that code fixing will become trivial; until then nothing can be | 274 | way by that code fixing will become trivial; until then nothing can be |
275 | done. | 275 | done. |
276 | |||
277 | [mandatory] | ||
278 | |||
279 | block truncatation on error exit from ->write_begin, and ->direct_IO | ||
280 | moved from generic methods (block_write_begin, cont_write_begin, | ||
281 | nobh_write_begin, blockdev_direct_IO*) to callers. Take a look at | ||
282 | ext2_write_failed and callers for an example. | ||
283 | |||
284 | [mandatory] | ||
285 | |||
286 | ->truncate is going away. The whole truncate sequence needs to be | ||
287 | implemented in ->setattr, which is now mandatory for filesystems | ||
288 | implementing on-disk size changes. Start with a copy of the old inode_setattr | ||
289 | and vmtruncate, and the reorder the vmtruncate + foofs_vmtruncate sequence to | ||
290 | be in order of zeroing blocks using block_truncate_page or similar helpers, | ||
291 | size update and on finally on-disk truncation which should not fail. | ||
292 | inode_change_ok now includes the size checks for ATTR_SIZE and must be called | ||
293 | in the beginning of ->setattr unconditionally. | ||
294 | |||
295 | [mandatory] | ||
296 | |||
297 | ->clear_inode() and ->delete_inode() are gone; ->evict_inode() should | ||
298 | be used instead. It gets called whenever the inode is evicted, whether it has | ||
299 | remaining links or not. Caller does *not* evict the pagecache or inode-associated | ||
300 | metadata buffers; getting rid of those is responsibility of method, as it had | ||
301 | been for ->delete_inode(). | ||
302 | ->drop_inode() returns int now; it's called on final iput() with inode_lock | ||
303 | held and it returns true if filesystems wants the inode to be dropped. As before, | ||
304 | generic_drop_inode() is still the default and it's been updated appropriately. | ||
305 | generic_delete_inode() is also alive and it consists simply of return 1. Note that | ||
306 | all actual eviction work is done by caller after ->drop_inode() returns. | ||
307 | clear_inode() is gone; use end_writeback() instead. As before, it must | ||
308 | be called exactly once on each call of ->evict_inode() (as it used to be for | ||
309 | each call of ->delete_inode()). Unlike before, if you are using inode-associated | ||
310 | metadata buffers (i.e. mark_buffer_dirty_inode()), it's your responsibility to | ||
311 | call invalidate_inode_buffers() before end_writeback(). | ||
312 | No async writeback (and thus no calls of ->write_inode()) will happen | ||
313 | after end_writeback() returns, so actions that should not overlap with ->write_inode() | ||
314 | (e.g. freeing on-disk inode if i_nlink is 0) ought to be done after that call. | ||
315 | |||
316 | NOTE: checking i_nlink in the beginning of ->write_inode() and bailing out | ||
317 | if it's zero is not *and* *never* *had* *been* enough. Final unlink() and iput() | ||
318 | may happen while the inode is in the middle of ->write_inode(); e.g. if you blindly | ||
319 | free the on-disk inode, you may end up doing that while ->write_inode() is writing | ||
320 | to it. | ||