diff options
Diffstat (limited to 'Documentation/filesystems/porting')
| -rw-r--r-- | Documentation/filesystems/porting | 266 |
1 files changed, 266 insertions, 0 deletions
diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting new file mode 100644 index 000000000000..2f388460cbe7 --- /dev/null +++ b/Documentation/filesystems/porting | |||
| @@ -0,0 +1,266 @@ | |||
| 1 | Changes since 2.5.0: | ||
| 2 | |||
| 3 | --- | ||
| 4 | [recommended] | ||
| 5 | |||
| 6 | New helpers: sb_bread(), sb_getblk(), sb_find_get_block(), set_bh(), | ||
| 7 | sb_set_blocksize() and sb_min_blocksize(). | ||
| 8 | |||
| 9 | Use them. | ||
| 10 | |||
| 11 | (sb_find_get_block() replaces 2.4's get_hash_table()) | ||
| 12 | |||
| 13 | --- | ||
| 14 | [recommended] | ||
| 15 | |||
| 16 | New methods: ->alloc_inode() and ->destroy_inode(). | ||
| 17 | |||
| 18 | Remove inode->u.foo_inode_i | ||
| 19 | Declare | ||
| 20 | struct foo_inode_info { | ||
| 21 | /* fs-private stuff */ | ||
| 22 | struct inode vfs_inode; | ||
| 23 | }; | ||
| 24 | static inline struct foo_inode_info *FOO_I(struct inode *inode) | ||
| 25 | { | ||
| 26 | return list_entry(inode, struct foo_inode_info, vfs_inode); | ||
| 27 | } | ||
| 28 | |||
| 29 | Use FOO_I(inode) instead of &inode->u.foo_inode_i; | ||
| 30 | |||
| 31 | Add foo_alloc_inode() and foo_destory_inode() - the former should allocate | ||
| 32 | foo_inode_info and return the address of ->vfs_inode, the latter should free | ||
| 33 | FOO_I(inode) (see in-tree filesystems for examples). | ||
| 34 | |||
| 35 | Make them ->alloc_inode and ->destroy_inode in your super_operations. | ||
| 36 | |||
| 37 | Keep in mind that now you need explicit initialization of private data - | ||
| 38 | typically in ->read_inode() and after getting an inode from new_inode(). | ||
| 39 | |||
| 40 | At some point that will become mandatory. | ||
| 41 | |||
| 42 | --- | ||
| 43 | [mandatory] | ||
| 44 | |||
| 45 | Change of file_system_type method (->read_super to ->get_sb) | ||
| 46 | |||
| 47 | ->read_super() is no more. Ditto for DECLARE_FSTYPE and DECLARE_FSTYPE_DEV. | ||
| 48 | |||
| 49 | Turn your foo_read_super() into a function that would return 0 in case of | ||
| 50 | success and negative number in case of error (-EINVAL unless you have more | ||
| 51 | informative error value to report). Call it foo_fill_super(). Now declare | ||
| 52 | |||
| 53 | struct super_block foo_get_sb(struct file_system_type *fs_type, | ||
| 54 | int flags, const char *dev_name, void *data) | ||
| 55 | { | ||
| 56 | return get_sb_bdev(fs_type, flags, dev_name, data, ext2_fill_super); | ||
| 57 | } | ||
| 58 | |||
| 59 | (or similar with s/bdev/nodev/ or s/bdev/single/, depending on the kind of | ||
| 60 | filesystem). | ||
| 61 | |||
| 62 | Replace DECLARE_FSTYPE... with explicit initializer and have ->get_sb set as | ||
| 63 | foo_get_sb. | ||
| 64 | |||
| 65 | --- | ||
| 66 | [mandatory] | ||
| 67 | |||
| 68 | Locking change: ->s_vfs_rename_sem is taken only by cross-directory renames. | ||
| 69 | Most likely there is no need to change anything, but if you relied on | ||
| 70 | global exclusion between renames for some internal purpose - you need to | ||
| 71 | change your internal locking. Otherwise exclusion warranties remain the | ||
| 72 | same (i.e. parents and victim are locked, etc.). | ||
| 73 | |||
| 74 | --- | ||
| 75 | [informational] | ||
| 76 | |||
| 77 | Now we have the exclusion between ->lookup() and directory removal (by | ||
| 78 | ->rmdir() and ->rename()). If you used to need that exclusion and do | ||
| 79 | it by internal locking (most of filesystems couldn't care less) - you | ||
| 80 | can relax your locking. | ||
| 81 | |||
| 82 | --- | ||
| 83 | [mandatory] | ||
| 84 | |||
| 85 | ->lookup(), ->truncate(), ->create(), ->unlink(), ->mknod(), ->mkdir(), | ||
| 86 | ->rmdir(), ->link(), ->lseek(), ->symlink(), ->rename() | ||
| 87 | and ->readdir() are called without BKL now. Grab it on entry, drop upon return | ||
| 88 | - that will guarantee the same locking you used to have. If your method or its | ||
| 89 | parts do not need BKL - better yet, now you can shift lock_kernel() and | ||
| 90 | unlock_kernel() so that they would protect exactly what needs to be | ||
| 91 | protected. | ||
| 92 | |||
| 93 | --- | ||
| 94 | [mandatory] | ||
| 95 | |||
| 96 | BKL is also moved from around sb operations. ->write_super() Is now called | ||
| 97 | without BKL held. BKL should have been shifted into individual fs sb_op | ||
| 98 | functions. If you don't need it, remove it. | ||
| 99 | |||
| 100 | --- | ||
| 101 | [informational] | ||
| 102 | |||
| 103 | check for ->link() target not being a directory is done by callers. Feel | ||
| 104 | free to drop it... | ||
| 105 | |||
| 106 | --- | ||
| 107 | [informational] | ||
| 108 | |||
| 109 | ->link() callers hold ->i_sem on the object we are linking to. Some of your | ||
| 110 | problems might be over... | ||
| 111 | |||
| 112 | --- | ||
| 113 | [mandatory] | ||
| 114 | |||
| 115 | new file_system_type method - kill_sb(superblock). If you are converting | ||
| 116 | an existing filesystem, set it according to ->fs_flags: | ||
| 117 | FS_REQUIRES_DEV - kill_block_super | ||
| 118 | FS_LITTER - kill_litter_super | ||
| 119 | neither - kill_anon_super | ||
| 120 | FS_LITTER is gone - just remove it from fs_flags. | ||
| 121 | |||
| 122 | --- | ||
| 123 | [mandatory] | ||
| 124 | |||
| 125 | FS_SINGLE is gone (actually, that had happened back when ->get_sb() | ||
| 126 | went in - and hadn't been documented ;-/). Just remove it from fs_flags | ||
| 127 | (and see ->get_sb() entry for other actions). | ||
| 128 | |||
| 129 | --- | ||
| 130 | [mandatory] | ||
| 131 | |||
| 132 | ->setattr() is called without BKL now. Caller _always_ holds ->i_sem, so | ||
| 133 | watch for ->i_sem-grabbing code that might be used by your ->setattr(). | ||
| 134 | Callers of notify_change() need ->i_sem now. | ||
| 135 | |||
| 136 | --- | ||
| 137 | [recommended] | ||
| 138 | |||
| 139 | New super_block field "struct export_operations *s_export_op" for | ||
| 140 | explicit support for exporting, e.g. via NFS. The structure is fully | ||
| 141 | documented at its declaration in include/linux/fs.h, and in | ||
| 142 | Documentation/filesystems/Exporting. | ||
| 143 | |||
| 144 | Briefly it allows for the definition of decode_fh and encode_fh operations | ||
| 145 | to encode and decode filehandles, and allows the filesystem to use | ||
| 146 | a standard helper function for decode_fh, and provide file-system specific | ||
| 147 | support for this helper, particularly get_parent. | ||
| 148 | |||
| 149 | It is planned that this will be required for exporting once the code | ||
| 150 | settles down a bit. | ||
| 151 | |||
| 152 | [mandatory] | ||
| 153 | |||
| 154 | s_export_op is now required for exporting a filesystem. | ||
| 155 | isofs, ext2, ext3, resierfs, fat | ||
| 156 | can be used as examples of very different filesystems. | ||
| 157 | |||
| 158 | --- | ||
| 159 | [mandatory] | ||
| 160 | |||
| 161 | iget4() and the read_inode2 callback have been superseded by iget5_locked() | ||
| 162 | which has the following prototype, | ||
| 163 | |||
| 164 | struct inode *iget5_locked(struct super_block *sb, unsigned long ino, | ||
| 165 | int (*test)(struct inode *, void *), | ||
| 166 | int (*set)(struct inode *, void *), | ||
| 167 | void *data); | ||
| 168 | |||
| 169 | 'test' is an additional function that can be used when the inode | ||
| 170 | number is not sufficient to identify the actual file object. 'set' | ||
| 171 | should be a non-blocking function that initializes those parts of a | ||
| 172 | newly created inode to allow the test function to succeed. 'data' is | ||
| 173 | passed as an opaque value to both test and set functions. | ||
| 174 | |||
| 175 | When the inode has been created by iget5_locked(), it will be returned with | ||
| 176 | the I_NEW flag set and will still be locked. read_inode has not been | ||
| 177 | called so the file system still has to finalize the initialization. Once | ||
| 178 | the inode is initialized it must be unlocked by calling unlock_new_inode(). | ||
| 179 | |||
| 180 | The filesystem is responsible for setting (and possibly testing) i_ino | ||
| 181 | when appropriate. There is also a simpler iget_locked function that | ||
| 182 | just takes the superblock and inode number as arguments and does the | ||
| 183 | test and set for you. | ||
| 184 | |||
| 185 | e.g. | ||
| 186 | inode = iget_locked(sb, ino); | ||
| 187 | if (inode->i_state & I_NEW) { | ||
| 188 | read_inode_from_disk(inode); | ||
| 189 | unlock_new_inode(inode); | ||
| 190 | } | ||
| 191 | |||
| 192 | --- | ||
| 193 | [recommended] | ||
| 194 | |||
| 195 | ->getattr() finally getting used. See instances in nfs, minix, etc. | ||
| 196 | |||
| 197 | --- | ||
| 198 | [mandatory] | ||
| 199 | |||
| 200 | ->revalidate() is gone. If your filesystem had it - provide ->getattr() | ||
| 201 | and let it call whatever you had as ->revlidate() + (for symlinks that | ||
| 202 | had ->revalidate()) add calls in ->follow_link()/->readlink(). | ||
| 203 | |||
| 204 | --- | ||
| 205 | [mandatory] | ||
| 206 | |||
| 207 | ->d_parent changes are not protected by BKL anymore. Read access is safe | ||
| 208 | if at least one of the following is true: | ||
| 209 | * filesystem has no cross-directory rename() | ||
| 210 | * dcache_lock is held | ||
| 211 | * we know that parent had been locked (e.g. we are looking at | ||
| 212 | ->d_parent of ->lookup() argument). | ||
| 213 | * we are called from ->rename(). | ||
| 214 | * the child's ->d_lock is held | ||
| 215 | Audit your code and add locking if needed. Notice that any place that is | ||
| 216 | not protected by the conditions above is risky even in the old tree - you | ||
| 217 | had been relying on BKL and that's prone to screwups. Old tree had quite | ||
| 218 | a few holes of that kind - unprotected access to ->d_parent leading to | ||
| 219 | anything from oops to silent memory corruption. | ||
| 220 | |||
| 221 | --- | ||
| 222 | [mandatory] | ||
| 223 | |||
| 224 | FS_NOMOUNT is gone. If you use it - just set MS_NOUSER in flags | ||
| 225 | (see rootfs for one kind of solution and bdev/socket/pipe for another). | ||
| 226 | |||
| 227 | --- | ||
| 228 | [recommended] | ||
| 229 | |||
| 230 | Use bdev_read_only(bdev) instead of is_read_only(kdev). The latter | ||
| 231 | is still alive, but only because of the mess in drivers/s390/block/dasd.c. | ||
| 232 | As soon as it gets fixed is_read_only() will die. | ||
| 233 | |||
| 234 | --- | ||
| 235 | [mandatory] | ||
| 236 | |||
| 237 | ->permission() is called without BKL now. Grab it on entry, drop upon | ||
| 238 | return - that will guarantee the same locking you used to have. If | ||
| 239 | your method or its parts do not need BKL - better yet, now you can | ||
| 240 | shift lock_kernel() and unlock_kernel() so that they would protect | ||
| 241 | exactly what needs to be protected. | ||
| 242 | |||
| 243 | --- | ||
| 244 | [mandatory] | ||
| 245 | |||
| 246 | ->statfs() is now called without BKL held. BKL should have been | ||
| 247 | shifted into individual fs sb_op functions where it's not clear that | ||
| 248 | it's safe to remove it. If you don't need it, remove it. | ||
| 249 | |||
| 250 | --- | ||
| 251 | [mandatory] | ||
| 252 | |||
| 253 | is_read_only() is gone; use bdev_read_only() instead. | ||
| 254 | |||
| 255 | --- | ||
| 256 | [mandatory] | ||
| 257 | |||
| 258 | destroy_buffers() is gone; use invalidate_bdev(). | ||
| 259 | |||
| 260 | --- | ||
| 261 | [mandatory] | ||
| 262 | |||
| 263 | fsync_dev() is gone; use fsync_bdev(). NOTE: lvm breakage is | ||
| 264 | deliberate; as soon as struct block_device * is propagated in a reasonable | ||
| 265 | way by that code fixing will become trivial; until then nothing can be | ||
| 266 | done. | ||
