diff options
author | Glenn Elliott <gelliott@cs.unc.edu> | 2012-03-04 19:47:13 -0500 |
---|---|---|
committer | Glenn Elliott <gelliott@cs.unc.edu> | 2012-03-04 19:47:13 -0500 |
commit | c71c03bda1e86c9d5198c5d83f712e695c4f2a1e (patch) | |
tree | ecb166cb3e2b7e2adb3b5e292245fefd23381ac8 /Documentation/filesystems/Locking | |
parent | ea53c912f8a86a8567697115b6a0d8152beee5c8 (diff) | |
parent | 6a00f206debf8a5c8899055726ad127dbeeed098 (diff) |
Merge branch 'mpi-master' into wip-k-fmlpwip-k-fmlp
Conflicts:
litmus/sched_cedf.c
Diffstat (limited to 'Documentation/filesystems/Locking')
-rw-r--r-- | Documentation/filesystems/Locking | 270 |
1 files changed, 139 insertions, 131 deletions
diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index 2db4283efa8d..57d827d6071d 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking | |||
@@ -9,24 +9,30 @@ be able to use diff(1). | |||
9 | 9 | ||
10 | --------------------------- dentry_operations -------------------------- | 10 | --------------------------- dentry_operations -------------------------- |
11 | prototypes: | 11 | prototypes: |
12 | int (*d_revalidate)(struct dentry *, int); | 12 | int (*d_revalidate)(struct dentry *, struct nameidata *); |
13 | int (*d_hash) (struct dentry *, struct qstr *); | 13 | int (*d_hash)(const struct dentry *, const struct inode *, |
14 | int (*d_compare) (struct dentry *, struct qstr *, struct qstr *); | 14 | struct qstr *); |
15 | int (*d_compare)(const struct dentry *, const struct inode *, | ||
16 | const struct dentry *, const struct inode *, | ||
17 | unsigned int, const char *, const struct qstr *); | ||
15 | int (*d_delete)(struct dentry *); | 18 | int (*d_delete)(struct dentry *); |
16 | void (*d_release)(struct dentry *); | 19 | void (*d_release)(struct dentry *); |
17 | void (*d_iput)(struct dentry *, struct inode *); | 20 | void (*d_iput)(struct dentry *, struct inode *); |
18 | char *(*d_dname)((struct dentry *dentry, char *buffer, int buflen); | 21 | char *(*d_dname)((struct dentry *dentry, char *buffer, int buflen); |
22 | struct vfsmount *(*d_automount)(struct path *path); | ||
23 | int (*d_manage)(struct dentry *, bool); | ||
19 | 24 | ||
20 | locking rules: | 25 | locking rules: |
21 | none have BKL | 26 | rename_lock ->d_lock may block rcu-walk |
22 | dcache_lock rename_lock ->d_lock may block | 27 | d_revalidate: no no yes (ref-walk) maybe |
23 | d_revalidate: no no no yes | 28 | d_hash no no no maybe |
24 | d_hash no no no yes | 29 | d_compare: yes no no maybe |
25 | d_compare: no yes no no | 30 | d_delete: no yes no no |
26 | d_delete: yes no yes no | 31 | d_release: no no yes no |
27 | d_release: no no no yes | 32 | d_iput: no no yes no |
28 | d_iput: no no no yes | ||
29 | d_dname: no no no no | 33 | d_dname: no no no no |
34 | d_automount: no no yes no | ||
35 | d_manage: no no yes (ref-walk) maybe | ||
30 | 36 | ||
31 | --------------------------- inode_operations --------------------------- | 37 | --------------------------- inode_operations --------------------------- |
32 | prototypes: | 38 | prototypes: |
@@ -42,18 +48,22 @@ ata *); | |||
42 | int (*rename) (struct inode *, struct dentry *, | 48 | int (*rename) (struct inode *, struct dentry *, |
43 | struct inode *, struct dentry *); | 49 | struct inode *, struct dentry *); |
44 | int (*readlink) (struct dentry *, char __user *,int); | 50 | int (*readlink) (struct dentry *, char __user *,int); |
45 | int (*follow_link) (struct dentry *, struct nameidata *); | 51 | void * (*follow_link) (struct dentry *, struct nameidata *); |
52 | void (*put_link) (struct dentry *, struct nameidata *, void *); | ||
46 | void (*truncate) (struct inode *); | 53 | void (*truncate) (struct inode *); |
47 | int (*permission) (struct inode *, int, struct nameidata *); | 54 | int (*permission) (struct inode *, int, unsigned int); |
55 | int (*check_acl)(struct inode *, int, unsigned int); | ||
48 | int (*setattr) (struct dentry *, struct iattr *); | 56 | int (*setattr) (struct dentry *, struct iattr *); |
49 | int (*getattr) (struct vfsmount *, struct dentry *, struct kstat *); | 57 | int (*getattr) (struct vfsmount *, struct dentry *, struct kstat *); |
50 | int (*setxattr) (struct dentry *, const char *,const void *,size_t,int); | 58 | int (*setxattr) (struct dentry *, const char *,const void *,size_t,int); |
51 | ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t); | 59 | ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t); |
52 | ssize_t (*listxattr) (struct dentry *, char *, size_t); | 60 | ssize_t (*listxattr) (struct dentry *, char *, size_t); |
53 | int (*removexattr) (struct dentry *, const char *); | 61 | int (*removexattr) (struct dentry *, const char *); |
62 | void (*truncate_range)(struct inode *, loff_t, loff_t); | ||
63 | int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start, u64 len); | ||
54 | 64 | ||
55 | locking rules: | 65 | locking rules: |
56 | all may block, none have BKL | 66 | all may block |
57 | i_mutex(inode) | 67 | i_mutex(inode) |
58 | lookup: yes | 68 | lookup: yes |
59 | create: yes | 69 | create: yes |
@@ -66,19 +76,23 @@ rmdir: yes (both) (see below) | |||
66 | rename: yes (all) (see below) | 76 | rename: yes (all) (see below) |
67 | readlink: no | 77 | readlink: no |
68 | follow_link: no | 78 | follow_link: no |
79 | put_link: no | ||
69 | truncate: yes (see below) | 80 | truncate: yes (see below) |
70 | setattr: yes | 81 | setattr: yes |
71 | permission: no | 82 | permission: no (may not block if called in rcu-walk mode) |
83 | check_acl: no | ||
72 | getattr: no | 84 | getattr: no |
73 | setxattr: yes | 85 | setxattr: yes |
74 | getxattr: no | 86 | getxattr: no |
75 | listxattr: no | 87 | listxattr: no |
76 | removexattr: yes | 88 | removexattr: yes |
89 | truncate_range: yes | ||
90 | fiemap: no | ||
77 | Additionally, ->rmdir(), ->unlink() and ->rename() have ->i_mutex on | 91 | Additionally, ->rmdir(), ->unlink() and ->rename() have ->i_mutex on |
78 | victim. | 92 | victim. |
79 | cross-directory ->rename() has (per-superblock) ->s_vfs_rename_sem. | 93 | cross-directory ->rename() has (per-superblock) ->s_vfs_rename_sem. |
80 | ->truncate() is never called directly - it's a callback, not a | 94 | ->truncate() is never called directly - it's a callback, not a |
81 | method. It's called by vmtruncate() - library function normally used by | 95 | method. It's called by vmtruncate() - deprecated library function used by |
82 | ->setattr(). Locking information above applies to that call (i.e. is | 96 | ->setattr(). Locking information above applies to that call (i.e. is |
83 | inherited from ->setattr() - vmtruncate() is used when ATTR_SIZE had been | 97 | inherited from ->setattr() - vmtruncate() is used when ATTR_SIZE had been |
84 | passed). | 98 | passed). |
@@ -90,8 +104,8 @@ of the locking scheme for directory operations. | |||
90 | prototypes: | 104 | prototypes: |
91 | struct inode *(*alloc_inode)(struct super_block *sb); | 105 | struct inode *(*alloc_inode)(struct super_block *sb); |
92 | void (*destroy_inode)(struct inode *); | 106 | void (*destroy_inode)(struct inode *); |
93 | void (*dirty_inode) (struct inode *); | 107 | void (*dirty_inode) (struct inode *, int flags); |
94 | int (*write_inode) (struct inode *, int); | 108 | int (*write_inode) (struct inode *, struct writeback_control *wbc); |
95 | int (*drop_inode) (struct inode *); | 109 | int (*drop_inode) (struct inode *); |
96 | void (*evict_inode) (struct inode *); | 110 | void (*evict_inode) (struct inode *); |
97 | void (*put_super) (struct super_block *); | 111 | void (*put_super) (struct super_block *); |
@@ -105,16 +119,16 @@ prototypes: | |||
105 | int (*show_options)(struct seq_file *, struct vfsmount *); | 119 | int (*show_options)(struct seq_file *, struct vfsmount *); |
106 | ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t); | 120 | ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t); |
107 | ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t); | 121 | ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t); |
122 | int (*bdev_try_to_free_page)(struct super_block*, struct page*, gfp_t); | ||
108 | 123 | ||
109 | locking rules: | 124 | locking rules: |
110 | All may block [not true, see below] | 125 | All may block [not true, see below] |
111 | None have BKL | ||
112 | s_umount | 126 | s_umount |
113 | alloc_inode: | 127 | alloc_inode: |
114 | destroy_inode: | 128 | destroy_inode: |
115 | dirty_inode: (must not sleep) | 129 | dirty_inode: |
116 | write_inode: | 130 | write_inode: |
117 | drop_inode: !!!inode_lock!!! | 131 | drop_inode: !!!inode->i_lock!!! |
118 | evict_inode: | 132 | evict_inode: |
119 | put_super: write | 133 | put_super: write |
120 | write_super: read | 134 | write_super: read |
@@ -127,6 +141,7 @@ umount_begin: no | |||
127 | show_options: no (namespace_sem) | 141 | show_options: no (namespace_sem) |
128 | quota_read: no (see below) | 142 | quota_read: no (see below) |
129 | quota_write: no (see below) | 143 | quota_write: no (see below) |
144 | bdev_try_to_free_page: no (see below) | ||
130 | 145 | ||
131 | ->statfs() has s_umount (shared) when called by ustat(2) (native or | 146 | ->statfs() has s_umount (shared) when called by ustat(2) (native or |
132 | compat), but that's an accident of bad API; s_umount is used to pin | 147 | compat), but that's an accident of bad API; s_umount is used to pin |
@@ -139,19 +154,23 @@ be the only ones operating on the quota file by the quota code (via | |||
139 | dqio_sem) (unless an admin really wants to screw up something and | 154 | dqio_sem) (unless an admin really wants to screw up something and |
140 | writes to quota files with quotas on). For other details about locking | 155 | writes to quota files with quotas on). For other details about locking |
141 | see also dquot_operations section. | 156 | see also dquot_operations section. |
157 | ->bdev_try_to_free_page is called from the ->releasepage handler of | ||
158 | the block device inode. See there for more details. | ||
142 | 159 | ||
143 | --------------------------- file_system_type --------------------------- | 160 | --------------------------- file_system_type --------------------------- |
144 | prototypes: | 161 | prototypes: |
145 | int (*get_sb) (struct file_system_type *, int, | 162 | int (*get_sb) (struct file_system_type *, int, |
146 | const char *, void *, struct vfsmount *); | 163 | const char *, void *, struct vfsmount *); |
164 | struct dentry *(*mount) (struct file_system_type *, int, | ||
165 | const char *, void *); | ||
147 | void (*kill_sb) (struct super_block *); | 166 | void (*kill_sb) (struct super_block *); |
148 | locking rules: | 167 | locking rules: |
149 | may block BKL | 168 | may block |
150 | get_sb yes no | 169 | mount yes |
151 | kill_sb yes no | 170 | kill_sb yes |
152 | 171 | ||
153 | ->get_sb() returns error or 0 with locked superblock attached to the vfsmount | 172 | ->mount() returns ERR_PTR or the root dentry; its superblock should be locked |
154 | (exclusive on ->s_umount). | 173 | on return. |
155 | ->kill_sb() takes a write-locked superblock, does all shutdown work on it, | 174 | ->kill_sb() takes a write-locked superblock, does all shutdown work on it, |
156 | unlocks and drops the reference. | 175 | unlocks and drops the reference. |
157 | 176 | ||
@@ -173,28 +192,38 @@ prototypes: | |||
173 | sector_t (*bmap)(struct address_space *, sector_t); | 192 | sector_t (*bmap)(struct address_space *, sector_t); |
174 | int (*invalidatepage) (struct page *, unsigned long); | 193 | int (*invalidatepage) (struct page *, unsigned long); |
175 | int (*releasepage) (struct page *, int); | 194 | int (*releasepage) (struct page *, int); |
195 | void (*freepage)(struct page *); | ||
176 | int (*direct_IO)(int, struct kiocb *, const struct iovec *iov, | 196 | int (*direct_IO)(int, struct kiocb *, const struct iovec *iov, |
177 | loff_t offset, unsigned long nr_segs); | 197 | loff_t offset, unsigned long nr_segs); |
178 | int (*launder_page) (struct page *); | 198 | int (*get_xip_mem)(struct address_space *, pgoff_t, int, void **, |
199 | unsigned long *); | ||
200 | int (*migratepage)(struct address_space *, struct page *, struct page *); | ||
201 | int (*launder_page)(struct page *); | ||
202 | int (*is_partially_uptodate)(struct page *, read_descriptor_t *, unsigned long); | ||
203 | int (*error_remove_page)(struct address_space *, struct page *); | ||
179 | 204 | ||
180 | locking rules: | 205 | locking rules: |
181 | All except set_page_dirty may block | 206 | All except set_page_dirty and freepage may block |
182 | 207 | ||
183 | BKL PageLocked(page) i_mutex | 208 | PageLocked(page) i_mutex |
184 | writepage: no yes, unlocks (see below) | 209 | writepage: yes, unlocks (see below) |
185 | readpage: no yes, unlocks | 210 | readpage: yes, unlocks |
186 | sync_page: no maybe | 211 | sync_page: maybe |
187 | writepages: no | 212 | writepages: |
188 | set_page_dirty no no | 213 | set_page_dirty no |
189 | readpages: no | 214 | readpages: |
190 | write_begin: no locks the page yes | 215 | write_begin: locks the page yes |
191 | write_end: no yes, unlocks yes | 216 | write_end: yes, unlocks yes |
192 | perform_write: no n/a yes | 217 | bmap: |
193 | bmap: no | 218 | invalidatepage: yes |
194 | invalidatepage: no yes | 219 | releasepage: yes |
195 | releasepage: no yes | 220 | freepage: yes |
196 | direct_IO: no | 221 | direct_IO: |
197 | launder_page: no yes | 222 | get_xip_mem: maybe |
223 | migratepage: yes (both) | ||
224 | launder_page: yes | ||
225 | is_partially_uptodate: yes | ||
226 | error_remove_page: yes | ||
198 | 227 | ||
199 | ->write_begin(), ->write_end(), ->sync_page() and ->readpage() | 228 | ->write_begin(), ->write_end(), ->sync_page() and ->readpage() |
200 | may be called from the request handler (/dev/loop). | 229 | may be called from the request handler (/dev/loop). |
@@ -274,9 +303,8 @@ under spinlock (it cannot block) and is sometimes called with the page | |||
274 | not locked. | 303 | not locked. |
275 | 304 | ||
276 | ->bmap() is currently used by legacy ioctl() (FIBMAP) provided by some | 305 | ->bmap() is currently used by legacy ioctl() (FIBMAP) provided by some |
277 | filesystems and by the swapper. The latter will eventually go away. All | 306 | filesystems and by the swapper. The latter will eventually go away. Please, |
278 | instances do not actually need the BKL. Please, keep it that way and don't | 307 | keep it that way and don't breed new callers. |
279 | breed new callers. | ||
280 | 308 | ||
281 | ->invalidatepage() is called when the filesystem must attempt to drop | 309 | ->invalidatepage() is called when the filesystem must attempt to drop |
282 | some or all of the buffers from the page when it is being truncated. It | 310 | some or all of the buffers from the page when it is being truncated. It |
@@ -288,55 +316,44 @@ buffers from the page in preparation for freeing it. It returns zero to | |||
288 | indicate that the buffers are (or may be) freeable. If ->releasepage is zero, | 316 | indicate that the buffers are (or may be) freeable. If ->releasepage is zero, |
289 | the kernel assumes that the fs has no private interest in the buffers. | 317 | the kernel assumes that the fs has no private interest in the buffers. |
290 | 318 | ||
319 | ->freepage() is called when the kernel is done dropping the page | ||
320 | from the page cache. | ||
321 | |||
291 | ->launder_page() may be called prior to releasing a page if | 322 | ->launder_page() may be called prior to releasing a page if |
292 | it is still found to be dirty. It returns zero if the page was successfully | 323 | it is still found to be dirty. It returns zero if the page was successfully |
293 | cleaned, or an error value if not. Note that in order to prevent the page | 324 | cleaned, or an error value if not. Note that in order to prevent the page |
294 | getting mapped back in and redirtied, it needs to be kept locked | 325 | getting mapped back in and redirtied, it needs to be kept locked |
295 | across the entire operation. | 326 | across the entire operation. |
296 | 327 | ||
297 | Note: currently almost all instances of address_space methods are | ||
298 | using BKL for internal serialization and that's one of the worst sources | ||
299 | of contention. Normally they are calling library functions (in fs/buffer.c) | ||
300 | and pass foo_get_block() as a callback (on local block-based filesystems, | ||
301 | indeed). BKL is not needed for library stuff and is usually taken by | ||
302 | foo_get_block(). It's an overkill, since block bitmaps can be protected by | ||
303 | internal fs locking and real critical areas are much smaller than the areas | ||
304 | filesystems protect now. | ||
305 | |||
306 | ----------------------- file_lock_operations ------------------------------ | 328 | ----------------------- file_lock_operations ------------------------------ |
307 | prototypes: | 329 | prototypes: |
308 | void (*fl_insert)(struct file_lock *); /* lock insertion callback */ | ||
309 | void (*fl_remove)(struct file_lock *); /* lock removal callback */ | ||
310 | void (*fl_copy_lock)(struct file_lock *, struct file_lock *); | 330 | void (*fl_copy_lock)(struct file_lock *, struct file_lock *); |
311 | void (*fl_release_private)(struct file_lock *); | 331 | void (*fl_release_private)(struct file_lock *); |
312 | 332 | ||
313 | 333 | ||
314 | locking rules: | 334 | locking rules: |
315 | BKL may block | 335 | file_lock_lock may block |
316 | fl_insert: yes no | 336 | fl_copy_lock: yes no |
317 | fl_remove: yes no | 337 | fl_release_private: maybe no |
318 | fl_copy_lock: yes no | ||
319 | fl_release_private: yes yes | ||
320 | 338 | ||
321 | ----------------------- lock_manager_operations --------------------------- | 339 | ----------------------- lock_manager_operations --------------------------- |
322 | prototypes: | 340 | prototypes: |
323 | int (*fl_compare_owner)(struct file_lock *, struct file_lock *); | 341 | int (*fl_compare_owner)(struct file_lock *, struct file_lock *); |
324 | void (*fl_notify)(struct file_lock *); /* unblock callback */ | 342 | void (*fl_notify)(struct file_lock *); /* unblock callback */ |
325 | void (*fl_copy_lock)(struct file_lock *, struct file_lock *); | 343 | int (*fl_grant)(struct file_lock *, struct file_lock *, int); |
326 | void (*fl_release_private)(struct file_lock *); | 344 | void (*fl_release_private)(struct file_lock *); |
327 | void (*fl_break)(struct file_lock *); /* break_lease callback */ | 345 | void (*fl_break)(struct file_lock *); /* break_lease callback */ |
346 | int (*fl_change)(struct file_lock **, int); | ||
328 | 347 | ||
329 | locking rules: | 348 | locking rules: |
330 | BKL may block | 349 | file_lock_lock may block |
331 | fl_compare_owner: yes no | 350 | fl_compare_owner: yes no |
332 | fl_notify: yes no | 351 | fl_notify: yes no |
333 | fl_copy_lock: yes no | 352 | fl_grant: no no |
334 | fl_release_private: yes yes | 353 | fl_release_private: maybe no |
335 | fl_break: yes no | 354 | fl_break: yes no |
336 | 355 | fl_change yes no | |
337 | Currently only NFSD and NLM provide instances of this class. None of the | 356 | |
338 | them block. If you have out-of-tree instances - please, show up. Locking | ||
339 | in that area will change. | ||
340 | --------------------------- buffer_head ----------------------------------- | 357 | --------------------------- buffer_head ----------------------------------- |
341 | prototypes: | 358 | prototypes: |
342 | void (*b_end_io)(struct buffer_head *bh, int uptodate); | 359 | void (*b_end_io)(struct buffer_head *bh, int uptodate); |
@@ -349,21 +366,36 @@ call this method upon the IO completion. | |||
349 | 366 | ||
350 | --------------------------- block_device_operations ----------------------- | 367 | --------------------------- block_device_operations ----------------------- |
351 | prototypes: | 368 | prototypes: |
352 | int (*open) (struct inode *, struct file *); | 369 | int (*open) (struct block_device *, fmode_t); |
353 | int (*release) (struct inode *, struct file *); | 370 | int (*release) (struct gendisk *, fmode_t); |
354 | int (*ioctl) (struct inode *, struct file *, unsigned, unsigned long); | 371 | int (*ioctl) (struct block_device *, fmode_t, unsigned, unsigned long); |
372 | int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long); | ||
373 | int (*direct_access) (struct block_device *, sector_t, void **, unsigned long *); | ||
355 | int (*media_changed) (struct gendisk *); | 374 | int (*media_changed) (struct gendisk *); |
375 | void (*unlock_native_capacity) (struct gendisk *); | ||
356 | int (*revalidate_disk) (struct gendisk *); | 376 | int (*revalidate_disk) (struct gendisk *); |
377 | int (*getgeo)(struct block_device *, struct hd_geometry *); | ||
378 | void (*swap_slot_free_notify) (struct block_device *, unsigned long); | ||
357 | 379 | ||
358 | locking rules: | 380 | locking rules: |
359 | BKL bd_sem | 381 | bd_mutex |
360 | open: yes yes | 382 | open: yes |
361 | release: yes yes | 383 | release: yes |
362 | ioctl: yes no | 384 | ioctl: no |
363 | media_changed: no no | 385 | compat_ioctl: no |
364 | revalidate_disk: no no | 386 | direct_access: no |
387 | media_changed: no | ||
388 | unlock_native_capacity: no | ||
389 | revalidate_disk: no | ||
390 | getgeo: no | ||
391 | swap_slot_free_notify: no (see below) | ||
392 | |||
393 | media_changed, unlock_native_capacity and revalidate_disk are called only from | ||
394 | check_disk_change(). | ||
395 | |||
396 | swap_slot_free_notify is called with swap_lock and sometimes the page lock | ||
397 | held. | ||
365 | 398 | ||
366 | The last two are called only from check_disk_change(). | ||
367 | 399 | ||
368 | --------------------------- file_operations ------------------------------- | 400 | --------------------------- file_operations ------------------------------- |
369 | prototypes: | 401 | prototypes: |
@@ -395,34 +427,22 @@ prototypes: | |||
395 | unsigned long (*get_unmapped_area)(struct file *, unsigned long, | 427 | unsigned long (*get_unmapped_area)(struct file *, unsigned long, |
396 | unsigned long, unsigned long, unsigned long); | 428 | unsigned long, unsigned long, unsigned long); |
397 | int (*check_flags)(int); | 429 | int (*check_flags)(int); |
430 | int (*flock) (struct file *, int, struct file_lock *); | ||
431 | ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, loff_t *, | ||
432 | size_t, unsigned int); | ||
433 | ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info *, | ||
434 | size_t, unsigned int); | ||
435 | int (*setlease)(struct file *, long, struct file_lock **); | ||
436 | long (*fallocate)(struct file *, int, loff_t, loff_t); | ||
398 | }; | 437 | }; |
399 | 438 | ||
400 | locking rules: | 439 | locking rules: |
401 | All may block. | 440 | All may block except for ->setlease. |
402 | BKL | 441 | No VFS locks held on entry except for ->fsync and ->setlease. |
403 | llseek: no (see below) | 442 | |
404 | read: no | 443 | ->fsync() has i_mutex on inode. |
405 | aio_read: no | 444 | |
406 | write: no | 445 | ->setlease has the file_list_lock held and must not sleep. |
407 | aio_write: no | ||
408 | readdir: no | ||
409 | poll: no | ||
410 | unlocked_ioctl: no | ||
411 | compat_ioctl: no | ||
412 | mmap: no | ||
413 | open: no | ||
414 | flush: no | ||
415 | release: no | ||
416 | fsync: no (see below) | ||
417 | aio_fsync: no | ||
418 | fasync: no | ||
419 | lock: yes | ||
420 | readv: no | ||
421 | writev: no | ||
422 | sendfile: no | ||
423 | sendpage: no | ||
424 | get_unmapped_area: no | ||
425 | check_flags: no | ||
426 | 446 | ||
427 | ->llseek() locking has moved from llseek to the individual llseek | 447 | ->llseek() locking has moved from llseek to the individual llseek |
428 | implementations. If your fs is not using generic_file_llseek, you | 448 | implementations. If your fs is not using generic_file_llseek, you |
@@ -432,17 +452,10 @@ mutex or just to use i_size_read() instead. | |||
432 | Note: this does not protect the file->f_pos against concurrent modifications | 452 | Note: this does not protect the file->f_pos against concurrent modifications |
433 | since this is something the userspace has to take care about. | 453 | since this is something the userspace has to take care about. |
434 | 454 | ||
435 | Note: ext2_release() was *the* source of contention on fs-intensive | 455 | ->fasync() is responsible for maintaining the FASYNC bit in filp->f_flags. |
436 | loads and dropping BKL on ->release() helps to get rid of that (we still | 456 | Most instances call fasync_helper(), which does that maintenance, so it's |
437 | grab BKL for cases when we close a file that had been opened r/w, but that | 457 | not normally something one needs to worry about. Return values > 0 will be |
438 | can and should be done using the internal locking with smaller critical areas). | 458 | mapped to zero in the VFS layer. |
439 | Current worst offender is ext2_get_block()... | ||
440 | |||
441 | ->fasync() is called without BKL protection, and is responsible for | ||
442 | maintaining the FASYNC bit in filp->f_flags. Most instances call | ||
443 | fasync_helper(), which does that maintenance, so it's not normally | ||
444 | something one needs to worry about. Return values > 0 will be mapped to | ||
445 | zero in the VFS layer. | ||
446 | 459 | ||
447 | ->readdir() and ->ioctl() on directories must be changed. Ideally we would | 460 | ->readdir() and ->ioctl() on directories must be changed. Ideally we would |
448 | move ->readdir() to inode_operations and use a separate method for directory | 461 | move ->readdir() to inode_operations and use a separate method for directory |
@@ -453,8 +466,6 @@ components. And there are other reasons why the current interface is a mess... | |||
453 | ->read on directories probably must go away - we should just enforce -EISDIR | 466 | ->read on directories probably must go away - we should just enforce -EISDIR |
454 | in sys_read() and friends. | 467 | in sys_read() and friends. |
455 | 468 | ||
456 | ->fsync() has i_mutex on inode. | ||
457 | |||
458 | --------------------------- dquot_operations ------------------------------- | 469 | --------------------------- dquot_operations ------------------------------- |
459 | prototypes: | 470 | prototypes: |
460 | int (*write_dquot) (struct dquot *); | 471 | int (*write_dquot) (struct dquot *); |
@@ -489,12 +500,12 @@ prototypes: | |||
489 | int (*access)(struct vm_area_struct *, unsigned long, void*, int, int); | 500 | int (*access)(struct vm_area_struct *, unsigned long, void*, int, int); |
490 | 501 | ||
491 | locking rules: | 502 | locking rules: |
492 | BKL mmap_sem PageLocked(page) | 503 | mmap_sem PageLocked(page) |
493 | open: no yes | 504 | open: yes |
494 | close: no yes | 505 | close: yes |
495 | fault: no yes can return with page locked | 506 | fault: yes can return with page locked |
496 | page_mkwrite: no yes can return with page locked | 507 | page_mkwrite: yes can return with page locked |
497 | access: no yes | 508 | access: yes |
498 | 509 | ||
499 | ->fault() is called when a previously not present pte is about | 510 | ->fault() is called when a previously not present pte is about |
500 | to be faulted in. The filesystem must find and return the page associated | 511 | to be faulted in. The filesystem must find and return the page associated |
@@ -521,6 +532,3 @@ VM_IO | VM_PFNMAP VMAs. | |||
521 | 532 | ||
522 | (if you break something or notice that it is broken and do not fix it yourself | 533 | (if you break something or notice that it is broken and do not fix it yourself |
523 | - at least put it here) | 534 | - at least put it here) |
524 | |||
525 | ipc/shm.c::shm_delete() - may need BKL. | ||
526 | ->read() and ->write() in many drivers are (probably) missing BKL. | ||