diff options
author | Mauro Carvalho Chehab <mchehab+samsung@kernel.org> | 2019-07-26 08:51:27 -0400 |
---|---|---|
committer | Jonathan Corbet <corbet@lwn.net> | 2019-07-31 15:31:05 -0400 |
commit | ec23eb54fbc7a07405d416d77e8115e575ce3adc (patch) | |
tree | 8cfd014d305628f35f42f75e24b8f5db46537ad5 | |
parent | 5a5e045bb3b839405e3a58b02a3333d33812214c (diff) |
docs: fs: convert docs without extension to ReST
There are 3 remaining files without an extension inside the fs docs
dir.
Manually convert them to ReST.
In the case of the nfs/exporting.rst file, as the nfs docs
aren't ported yet, I opted to convert and add a :orphan: there,
with should be removed when it gets added into a nfs-specific
part of the fs documentation.
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
-rw-r--r-- | Documentation/filesystems/directory-locking.rst (renamed from Documentation/filesystems/directory-locking) | 40 | ||||
-rw-r--r-- | Documentation/filesystems/index.rst | 2 | ||||
-rw-r--r-- | Documentation/filesystems/locking.rst (renamed from Documentation/filesystems/Locking) | 259 | ||||
-rw-r--r-- | Documentation/filesystems/nfs/exporting.rst (renamed from Documentation/filesystems/nfs/Exporting) | 31 | ||||
-rw-r--r-- | Documentation/filesystems/vfs.rst | 2 | ||||
-rw-r--r-- | fs/cifs/export.c | 2 | ||||
-rw-r--r-- | fs/exportfs/expfs.c | 2 | ||||
-rw-r--r-- | fs/isofs/export.c | 2 | ||||
-rw-r--r-- | fs/orangefs/file.c | 2 | ||||
-rw-r--r-- | include/linux/dcache.h | 2 | ||||
-rw-r--r-- | include/linux/exportfs.h | 2 |
11 files changed, 226 insertions, 120 deletions
diff --git a/Documentation/filesystems/directory-locking b/Documentation/filesystems/directory-locking.rst index 4e32cb961e5b..de12016ee419 100644 --- a/Documentation/filesystems/directory-locking +++ b/Documentation/filesystems/directory-locking.rst | |||
@@ -1,12 +1,17 @@ | |||
1 | Locking scheme used for directory operations is based on two | 1 | ================= |
2 | Directory Locking | ||
3 | ================= | ||
4 | |||
5 | |||
6 | Locking scheme used for directory operations is based on two | ||
2 | kinds of locks - per-inode (->i_rwsem) and per-filesystem | 7 | kinds of locks - per-inode (->i_rwsem) and per-filesystem |
3 | (->s_vfs_rename_mutex). | 8 | (->s_vfs_rename_mutex). |
4 | 9 | ||
5 | When taking the i_rwsem on multiple non-directory objects, we | 10 | When taking the i_rwsem on multiple non-directory objects, we |
6 | always acquire the locks in order by increasing address. We'll call | 11 | always acquire the locks in order by increasing address. We'll call |
7 | that "inode pointer" order in the following. | 12 | that "inode pointer" order in the following. |
8 | 13 | ||
9 | For our purposes all operations fall in 5 classes: | 14 | For our purposes all operations fall in 5 classes: |
10 | 15 | ||
11 | 1) read access. Locking rules: caller locks directory we are accessing. | 16 | 1) read access. Locking rules: caller locks directory we are accessing. |
12 | The lock is taken shared. | 17 | The lock is taken shared. |
@@ -27,25 +32,29 @@ NB: we might get away with locking the the source (and target in exchange | |||
27 | case) shared. | 32 | case) shared. |
28 | 33 | ||
29 | 5) link creation. Locking rules: | 34 | 5) link creation. Locking rules: |
35 | |||
30 | * lock parent | 36 | * lock parent |
31 | * check that source is not a directory | 37 | * check that source is not a directory |
32 | * lock source | 38 | * lock source |
33 | * call the method. | 39 | * call the method. |
40 | |||
34 | All locks are exclusive. | 41 | All locks are exclusive. |
35 | 42 | ||
36 | 6) cross-directory rename. The trickiest in the whole bunch. Locking | 43 | 6) cross-directory rename. The trickiest in the whole bunch. Locking |
37 | rules: | 44 | rules: |
45 | |||
38 | * lock the filesystem | 46 | * lock the filesystem |
39 | * lock parents in "ancestors first" order. | 47 | * lock parents in "ancestors first" order. |
40 | * find source and target. | 48 | * find source and target. |
41 | * if old parent is equal to or is a descendent of target | 49 | * if old parent is equal to or is a descendent of target |
42 | fail with -ENOTEMPTY | 50 | fail with -ENOTEMPTY |
43 | * if new parent is equal to or is a descendent of source | 51 | * if new parent is equal to or is a descendent of source |
44 | fail with -ELOOP | 52 | fail with -ELOOP |
45 | * If it's an exchange, lock both the source and the target. | 53 | * If it's an exchange, lock both the source and the target. |
46 | * If the target exists, lock it. If the source is a non-directory, | 54 | * If the target exists, lock it. If the source is a non-directory, |
47 | lock it. If we need to lock both, do so in inode pointer order. | 55 | lock it. If we need to lock both, do so in inode pointer order. |
48 | * call the method. | 56 | * call the method. |
57 | |||
49 | All ->i_rwsem are taken exclusive. Again, we might get away with locking | 58 | All ->i_rwsem are taken exclusive. Again, we might get away with locking |
50 | the the source (and target in exchange case) shared. | 59 | the the source (and target in exchange case) shared. |
51 | 60 | ||
@@ -54,10 +63,11 @@ read, modified or removed by method will be locked by caller. | |||
54 | 63 | ||
55 | 64 | ||
56 | If no directory is its own ancestor, the scheme above is deadlock-free. | 65 | If no directory is its own ancestor, the scheme above is deadlock-free. |
66 | |||
57 | Proof: | 67 | Proof: |
58 | 68 | ||
59 | First of all, at any moment we have a partial ordering of the | 69 | First of all, at any moment we have a partial ordering of the |
60 | objects - A < B iff A is an ancestor of B. | 70 | objects - A < B iff A is an ancestor of B. |
61 | 71 | ||
62 | That ordering can change. However, the following is true: | 72 | That ordering can change. However, the following is true: |
63 | 73 | ||
@@ -77,32 +87,32 @@ objects - A < B iff A is an ancestor of B. | |||
77 | non-directory object, except renames, which take locks on source and | 87 | non-directory object, except renames, which take locks on source and |
78 | target in inode pointer order in the case they are not directories.) | 88 | target in inode pointer order in the case they are not directories.) |
79 | 89 | ||
80 | Now consider the minimal deadlock. Each process is blocked on | 90 | Now consider the minimal deadlock. Each process is blocked on |
81 | attempt to acquire some lock and already holds at least one lock. Let's | 91 | attempt to acquire some lock and already holds at least one lock. Let's |
82 | consider the set of contended locks. First of all, filesystem lock is | 92 | consider the set of contended locks. First of all, filesystem lock is |
83 | not contended, since any process blocked on it is not holding any locks. | 93 | not contended, since any process blocked on it is not holding any locks. |
84 | Thus all processes are blocked on ->i_rwsem. | 94 | Thus all processes are blocked on ->i_rwsem. |
85 | 95 | ||
86 | By (3), any process holding a non-directory lock can only be | 96 | By (3), any process holding a non-directory lock can only be |
87 | waiting on another non-directory lock with a larger address. Therefore | 97 | waiting on another non-directory lock with a larger address. Therefore |
88 | the process holding the "largest" such lock can always make progress, and | 98 | the process holding the "largest" such lock can always make progress, and |
89 | non-directory objects are not included in the set of contended locks. | 99 | non-directory objects are not included in the set of contended locks. |
90 | 100 | ||
91 | Thus link creation can't be a part of deadlock - it can't be | 101 | Thus link creation can't be a part of deadlock - it can't be |
92 | blocked on source and it means that it doesn't hold any locks. | 102 | blocked on source and it means that it doesn't hold any locks. |
93 | 103 | ||
94 | Any contended object is either held by cross-directory rename or | 104 | Any contended object is either held by cross-directory rename or |
95 | has a child that is also contended. Indeed, suppose that it is held by | 105 | has a child that is also contended. Indeed, suppose that it is held by |
96 | operation other than cross-directory rename. Then the lock this operation | 106 | operation other than cross-directory rename. Then the lock this operation |
97 | is blocked on belongs to child of that object due to (1). | 107 | is blocked on belongs to child of that object due to (1). |
98 | 108 | ||
99 | It means that one of the operations is cross-directory rename. | 109 | It means that one of the operations is cross-directory rename. |
100 | Otherwise the set of contended objects would be infinite - each of them | 110 | Otherwise the set of contended objects would be infinite - each of them |
101 | would have a contended child and we had assumed that no object is its | 111 | would have a contended child and we had assumed that no object is its |
102 | own descendent. Moreover, there is exactly one cross-directory rename | 112 | own descendent. Moreover, there is exactly one cross-directory rename |
103 | (see above). | 113 | (see above). |
104 | 114 | ||
105 | Consider the object blocking the cross-directory rename. One | 115 | Consider the object blocking the cross-directory rename. One |
106 | of its descendents is locked by cross-directory rename (otherwise we | 116 | of its descendents is locked by cross-directory rename (otherwise we |
107 | would again have an infinite set of contended objects). But that | 117 | would again have an infinite set of contended objects). But that |
108 | means that cross-directory rename is taking locks out of order. Due | 118 | means that cross-directory rename is taking locks out of order. Due |
@@ -112,7 +122,7 @@ try to acquire lock on descendent before the lock on ancestor. | |||
112 | Contradiction. I.e. deadlock is impossible. Q.E.D. | 122 | Contradiction. I.e. deadlock is impossible. Q.E.D. |
113 | 123 | ||
114 | 124 | ||
115 | These operations are guaranteed to avoid loop creation. Indeed, | 125 | These operations are guaranteed to avoid loop creation. Indeed, |
116 | the only operation that could introduce loops is cross-directory rename. | 126 | the only operation that could introduce loops is cross-directory rename. |
117 | Since the only new (parent, child) pair added by rename() is (new parent, | 127 | Since the only new (parent, child) pair added by rename() is (new parent, |
118 | source), such loop would have to contain these objects and the rest of it | 128 | source), such loop would have to contain these objects and the rest of it |
@@ -123,13 +133,13 @@ new parent had been equal to or a descendent of source since the moment when | |||
123 | we had acquired filesystem lock and rename() would fail with -ELOOP in that | 133 | we had acquired filesystem lock and rename() would fail with -ELOOP in that |
124 | case. | 134 | case. |
125 | 135 | ||
126 | While this locking scheme works for arbitrary DAGs, it relies on | 136 | While this locking scheme works for arbitrary DAGs, it relies on |
127 | ability to check that directory is a descendent of another object. Current | 137 | ability to check that directory is a descendent of another object. Current |
128 | implementation assumes that directory graph is a tree. This assumption is | 138 | implementation assumes that directory graph is a tree. This assumption is |
129 | also preserved by all operations (cross-directory rename on a tree that would | 139 | also preserved by all operations (cross-directory rename on a tree that would |
130 | not introduce a cycle will leave it a tree and link() fails for directories). | 140 | not introduce a cycle will leave it a tree and link() fails for directories). |
131 | 141 | ||
132 | Notice that "directory" in the above == "anything that might have | 142 | Notice that "directory" in the above == "anything that might have |
133 | children", so if we are going to introduce hybrid objects we will need | 143 | children", so if we are going to introduce hybrid objects we will need |
134 | either to make sure that link(2) doesn't work for them or to make changes | 144 | either to make sure that link(2) doesn't work for them or to make changes |
135 | in is_subdir() that would make it work even in presence of such beasts. | 145 | in is_subdir() that would make it work even in presence of such beasts. |
diff --git a/Documentation/filesystems/index.rst b/Documentation/filesystems/index.rst index 2de2fe2ab078..08320c35d03b 100644 --- a/Documentation/filesystems/index.rst +++ b/Documentation/filesystems/index.rst | |||
@@ -20,6 +20,8 @@ algorithms work. | |||
20 | path-lookup | 20 | path-lookup |
21 | api-summary | 21 | api-summary |
22 | splice | 22 | splice |
23 | locking | ||
24 | directory-locking | ||
23 | 25 | ||
24 | Filesystem support layers | 26 | Filesystem support layers |
25 | ========================= | 27 | ========================= |
diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/locking.rst index 204dd3ea36bb..fc3a0704553c 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/locking.rst | |||
@@ -1,14 +1,22 @@ | |||
1 | The text below describes the locking rules for VFS-related methods. | 1 | ======= |
2 | Locking | ||
3 | ======= | ||
4 | |||
5 | The text below describes the locking rules for VFS-related methods. | ||
2 | It is (believed to be) up-to-date. *Please*, if you change anything in | 6 | It is (believed to be) up-to-date. *Please*, if you change anything in |
3 | prototypes or locking protocols - update this file. And update the relevant | 7 | prototypes or locking protocols - update this file. And update the relevant |
4 | instances in the tree, don't leave that to maintainers of filesystems/devices/ | 8 | instances in the tree, don't leave that to maintainers of filesystems/devices/ |
5 | etc. At the very least, put the list of dubious cases in the end of this file. | 9 | etc. At the very least, put the list of dubious cases in the end of this file. |
6 | Don't turn it into log - maintainers of out-of-the-tree code are supposed to | 10 | Don't turn it into log - maintainers of out-of-the-tree code are supposed to |
7 | be able to use diff(1). | 11 | be able to use diff(1). |
8 | Thing currently missing here: socket operations. Alexey? | ||
9 | 12 | ||
10 | --------------------------- dentry_operations -------------------------- | 13 | Thing currently missing here: socket operations. Alexey? |
11 | prototypes: | 14 | |
15 | dentry_operations | ||
16 | ================= | ||
17 | |||
18 | prototypes:: | ||
19 | |||
12 | int (*d_revalidate)(struct dentry *, unsigned int); | 20 | int (*d_revalidate)(struct dentry *, unsigned int); |
13 | int (*d_weak_revalidate)(struct dentry *, unsigned int); | 21 | int (*d_weak_revalidate)(struct dentry *, unsigned int); |
14 | int (*d_hash)(const struct dentry *, struct qstr *); | 22 | int (*d_hash)(const struct dentry *, struct qstr *); |
@@ -24,23 +32,30 @@ prototypes: | |||
24 | struct dentry *(*d_real)(struct dentry *, const struct inode *); | 32 | struct dentry *(*d_real)(struct dentry *, const struct inode *); |
25 | 33 | ||
26 | locking rules: | 34 | locking rules: |
27 | rename_lock ->d_lock may block rcu-walk | 35 | |
28 | d_revalidate: no no yes (ref-walk) maybe | 36 | ================== =========== ======== ============== ======== |
29 | d_weak_revalidate:no no yes no | 37 | ops rename_lock ->d_lock may block rcu-walk |
30 | d_hash no no no maybe | 38 | ================== =========== ======== ============== ======== |
31 | d_compare: yes no no maybe | 39 | d_revalidate: no no yes (ref-walk) maybe |
32 | d_delete: no yes no no | 40 | d_weak_revalidate: no no yes no |
33 | d_init: no no yes no | 41 | d_hash no no no maybe |
34 | d_release: no no yes no | 42 | d_compare: yes no no maybe |
35 | d_prune: no yes no no | 43 | d_delete: no yes no no |
36 | d_iput: no no yes no | 44 | d_init: no no yes no |
37 | d_dname: no no no no | 45 | d_release: no no yes no |
38 | d_automount: no no yes no | 46 | d_prune: no yes no no |
39 | d_manage: no no yes (ref-walk) maybe | 47 | d_iput: no no yes no |
40 | d_real no no yes no | 48 | d_dname: no no no no |
41 | 49 | d_automount: no no yes no | |
42 | --------------------------- inode_operations --------------------------- | 50 | d_manage: no no yes (ref-walk) maybe |
43 | prototypes: | 51 | d_real no no yes no |
52 | ================== =========== ======== ============== ======== | ||
53 | |||
54 | inode_operations | ||
55 | ================ | ||
56 | |||
57 | prototypes:: | ||
58 | |||
44 | int (*create) (struct inode *,struct dentry *,umode_t, bool); | 59 | int (*create) (struct inode *,struct dentry *,umode_t, bool); |
45 | struct dentry * (*lookup) (struct inode *,struct dentry *, unsigned int); | 60 | struct dentry * (*lookup) (struct inode *,struct dentry *, unsigned int); |
46 | int (*link) (struct dentry *,struct inode *,struct dentry *); | 61 | int (*link) (struct dentry *,struct inode *,struct dentry *); |
@@ -68,7 +83,10 @@ prototypes: | |||
68 | 83 | ||
69 | locking rules: | 84 | locking rules: |
70 | all may block | 85 | all may block |
71 | i_rwsem(inode) | 86 | |
87 | ============ ============================================= | ||
88 | ops i_rwsem(inode) | ||
89 | ============ ============================================= | ||
72 | lookup: shared | 90 | lookup: shared |
73 | create: exclusive | 91 | create: exclusive |
74 | link: exclusive (both) | 92 | link: exclusive (both) |
@@ -89,17 +107,21 @@ fiemap: no | |||
89 | update_time: no | 107 | update_time: no |
90 | atomic_open: exclusive | 108 | atomic_open: exclusive |
91 | tmpfile: no | 109 | tmpfile: no |
110 | ============ ============================================= | ||
92 | 111 | ||
93 | 112 | ||
94 | Additionally, ->rmdir(), ->unlink() and ->rename() have ->i_rwsem | 113 | Additionally, ->rmdir(), ->unlink() and ->rename() have ->i_rwsem |
95 | exclusive on victim. | 114 | exclusive on victim. |
96 | cross-directory ->rename() has (per-superblock) ->s_vfs_rename_sem. | 115 | cross-directory ->rename() has (per-superblock) ->s_vfs_rename_sem. |
97 | 116 | ||
98 | See Documentation/filesystems/directory-locking for more detailed discussion | 117 | See Documentation/filesystems/directory-locking.rst for more detailed discussion |
99 | of the locking scheme for directory operations. | 118 | of the locking scheme for directory operations. |
100 | 119 | ||
101 | ----------------------- xattr_handler operations ----------------------- | 120 | xattr_handler operations |
102 | prototypes: | 121 | ======================== |
122 | |||
123 | prototypes:: | ||
124 | |||
103 | bool (*list)(struct dentry *dentry); | 125 | bool (*list)(struct dentry *dentry); |
104 | int (*get)(const struct xattr_handler *handler, struct dentry *dentry, | 126 | int (*get)(const struct xattr_handler *handler, struct dentry *dentry, |
105 | struct inode *inode, const char *name, void *buffer, | 127 | struct inode *inode, const char *name, void *buffer, |
@@ -110,13 +132,20 @@ prototypes: | |||
110 | 132 | ||
111 | locking rules: | 133 | locking rules: |
112 | all may block | 134 | all may block |
113 | i_rwsem(inode) | 135 | |
136 | ===== ============== | ||
137 | ops i_rwsem(inode) | ||
138 | ===== ============== | ||
114 | list: no | 139 | list: no |
115 | get: no | 140 | get: no |
116 | set: exclusive | 141 | set: exclusive |
142 | ===== ============== | ||
143 | |||
144 | super_operations | ||
145 | ================ | ||
146 | |||
147 | prototypes:: | ||
117 | 148 | ||
118 | --------------------------- super_operations --------------------------- | ||
119 | prototypes: | ||
120 | struct inode *(*alloc_inode)(struct super_block *sb); | 149 | struct inode *(*alloc_inode)(struct super_block *sb); |
121 | void (*free_inode)(struct inode *); | 150 | void (*free_inode)(struct inode *); |
122 | void (*destroy_inode)(struct inode *); | 151 | void (*destroy_inode)(struct inode *); |
@@ -138,7 +167,10 @@ prototypes: | |||
138 | 167 | ||
139 | locking rules: | 168 | locking rules: |
140 | All may block [not true, see below] | 169 | All may block [not true, see below] |
141 | s_umount | 170 | |
171 | ====================== ============ ======================== | ||
172 | ops s_umount note | ||
173 | ====================== ============ ======================== | ||
142 | alloc_inode: | 174 | alloc_inode: |
143 | free_inode: called from RCU callback | 175 | free_inode: called from RCU callback |
144 | destroy_inode: | 176 | destroy_inode: |
@@ -157,6 +189,7 @@ show_options: no (namespace_sem) | |||
157 | quota_read: no (see below) | 189 | quota_read: no (see below) |
158 | quota_write: no (see below) | 190 | quota_write: no (see below) |
159 | bdev_try_to_free_page: no (see below) | 191 | bdev_try_to_free_page: no (see below) |
192 | ====================== ============ ======================== | ||
160 | 193 | ||
161 | ->statfs() has s_umount (shared) when called by ustat(2) (native or | 194 | ->statfs() has s_umount (shared) when called by ustat(2) (native or |
162 | compat), but that's an accident of bad API; s_umount is used to pin | 195 | compat), but that's an accident of bad API; s_umount is used to pin |
@@ -164,31 +197,44 @@ the superblock down when we only have dev_t given us by userland to | |||
164 | identify the superblock. Everything else (statfs(), fstatfs(), etc.) | 197 | identify the superblock. Everything else (statfs(), fstatfs(), etc.) |
165 | doesn't hold it when calling ->statfs() - superblock is pinned down | 198 | doesn't hold it when calling ->statfs() - superblock is pinned down |
166 | by resolving the pathname passed to syscall. | 199 | by resolving the pathname passed to syscall. |
200 | |||
167 | ->quota_read() and ->quota_write() functions are both guaranteed to | 201 | ->quota_read() and ->quota_write() functions are both guaranteed to |
168 | be the only ones operating on the quota file by the quota code (via | 202 | be the only ones operating on the quota file by the quota code (via |
169 | dqio_sem) (unless an admin really wants to screw up something and | 203 | dqio_sem) (unless an admin really wants to screw up something and |
170 | writes to quota files with quotas on). For other details about locking | 204 | writes to quota files with quotas on). For other details about locking |
171 | see also dquot_operations section. | 205 | see also dquot_operations section. |
206 | |||
172 | ->bdev_try_to_free_page is called from the ->releasepage handler of | 207 | ->bdev_try_to_free_page is called from the ->releasepage handler of |
173 | the block device inode. See there for more details. | 208 | the block device inode. See there for more details. |
174 | 209 | ||
175 | --------------------------- file_system_type --------------------------- | 210 | file_system_type |
176 | prototypes: | 211 | ================ |
212 | |||
213 | prototypes:: | ||
214 | |||
177 | struct dentry *(*mount) (struct file_system_type *, int, | 215 | struct dentry *(*mount) (struct file_system_type *, int, |
178 | const char *, void *); | 216 | const char *, void *); |
179 | void (*kill_sb) (struct super_block *); | 217 | void (*kill_sb) (struct super_block *); |
218 | |||
180 | locking rules: | 219 | locking rules: |
181 | may block | 220 | |
221 | ======= ========= | ||
222 | ops may block | ||
223 | ======= ========= | ||
182 | mount yes | 224 | mount yes |
183 | kill_sb yes | 225 | kill_sb yes |
226 | ======= ========= | ||
184 | 227 | ||
185 | ->mount() returns ERR_PTR or the root dentry; its superblock should be locked | 228 | ->mount() returns ERR_PTR or the root dentry; its superblock should be locked |
186 | on return. | 229 | on return. |
230 | |||
187 | ->kill_sb() takes a write-locked superblock, does all shutdown work on it, | 231 | ->kill_sb() takes a write-locked superblock, does all shutdown work on it, |
188 | unlocks and drops the reference. | 232 | unlocks and drops the reference. |
189 | 233 | ||
190 | --------------------------- address_space_operations -------------------------- | 234 | address_space_operations |
191 | prototypes: | 235 | ======================== |
236 | prototypes:: | ||
237 | |||
192 | int (*writepage)(struct page *page, struct writeback_control *wbc); | 238 | int (*writepage)(struct page *page, struct writeback_control *wbc); |
193 | int (*readpage)(struct file *, struct page *); | 239 | int (*readpage)(struct file *, struct page *); |
194 | int (*writepages)(struct address_space *, struct writeback_control *); | 240 | int (*writepages)(struct address_space *, struct writeback_control *); |
@@ -218,14 +264,16 @@ prototypes: | |||
218 | locking rules: | 264 | locking rules: |
219 | All except set_page_dirty and freepage may block | 265 | All except set_page_dirty and freepage may block |
220 | 266 | ||
221 | PageLocked(page) i_rwsem | 267 | ====================== ======================== ========= |
268 | ops PageLocked(page) i_rwsem | ||
269 | ====================== ======================== ========= | ||
222 | writepage: yes, unlocks (see below) | 270 | writepage: yes, unlocks (see below) |
223 | readpage: yes, unlocks | 271 | readpage: yes, unlocks |
224 | writepages: | 272 | writepages: |
225 | set_page_dirty no | 273 | set_page_dirty no |
226 | readpages: | 274 | readpages: |
227 | write_begin: locks the page exclusive | 275 | write_begin: locks the page exclusive |
228 | write_end: yes, unlocks exclusive | 276 | write_end: yes, unlocks exclusive |
229 | bmap: | 277 | bmap: |
230 | invalidatepage: yes | 278 | invalidatepage: yes |
231 | releasepage: yes | 279 | releasepage: yes |
@@ -239,17 +287,18 @@ is_partially_uptodate: yes | |||
239 | error_remove_page: yes | 287 | error_remove_page: yes |
240 | swap_activate: no | 288 | swap_activate: no |
241 | swap_deactivate: no | 289 | swap_deactivate: no |
290 | ====================== ======================== ========= | ||
242 | 291 | ||
243 | ->write_begin(), ->write_end() and ->readpage() may be called from | 292 | ->write_begin(), ->write_end() and ->readpage() may be called from |
244 | the request handler (/dev/loop). | 293 | the request handler (/dev/loop). |
245 | 294 | ||
246 | ->readpage() unlocks the page, either synchronously or via I/O | 295 | ->readpage() unlocks the page, either synchronously or via I/O |
247 | completion. | 296 | completion. |
248 | 297 | ||
249 | ->readpages() populates the pagecache with the passed pages and starts | 298 | ->readpages() populates the pagecache with the passed pages and starts |
250 | I/O against them. They come unlocked upon I/O completion. | 299 | I/O against them. They come unlocked upon I/O completion. |
251 | 300 | ||
252 | ->writepage() is used for two purposes: for "memory cleansing" and for | 301 | ->writepage() is used for two purposes: for "memory cleansing" and for |
253 | "sync". These are quite different operations and the behaviour may differ | 302 | "sync". These are quite different operations and the behaviour may differ |
254 | depending upon the mode. | 303 | depending upon the mode. |
255 | 304 | ||
@@ -297,70 +346,81 @@ will leave the page itself marked clean but it will be tagged as dirty in the | |||
297 | radix tree. This incoherency can lead to all sorts of hard-to-debug problems | 346 | radix tree. This incoherency can lead to all sorts of hard-to-debug problems |
298 | in the filesystem like having dirty inodes at umount and losing written data. | 347 | in the filesystem like having dirty inodes at umount and losing written data. |
299 | 348 | ||
300 | ->writepages() is used for periodic writeback and for syscall-initiated | 349 | ->writepages() is used for periodic writeback and for syscall-initiated |
301 | sync operations. The address_space should start I/O against at least | 350 | sync operations. The address_space should start I/O against at least |
302 | *nr_to_write pages. *nr_to_write must be decremented for each page which is | 351 | ``*nr_to_write`` pages. ``*nr_to_write`` must be decremented for each page |
303 | written. The address_space implementation may write more (or less) pages | 352 | which is written. The address_space implementation may write more (or less) |
304 | than *nr_to_write asks for, but it should try to be reasonably close. If | 353 | pages than ``*nr_to_write`` asks for, but it should try to be reasonably close. |
305 | nr_to_write is NULL, all dirty pages must be written. | 354 | If nr_to_write is NULL, all dirty pages must be written. |
306 | 355 | ||
307 | writepages should _only_ write pages which are present on | 356 | writepages should _only_ write pages which are present on |
308 | mapping->io_pages. | 357 | mapping->io_pages. |
309 | 358 | ||
310 | ->set_page_dirty() is called from various places in the kernel | 359 | ->set_page_dirty() is called from various places in the kernel |
311 | when the target page is marked as needing writeback. It may be called | 360 | when the target page is marked as needing writeback. It may be called |
312 | under spinlock (it cannot block) and is sometimes called with the page | 361 | under spinlock (it cannot block) and is sometimes called with the page |
313 | not locked. | 362 | not locked. |
314 | 363 | ||
315 | ->bmap() is currently used by legacy ioctl() (FIBMAP) provided by some | 364 | ->bmap() is currently used by legacy ioctl() (FIBMAP) provided by some |
316 | filesystems and by the swapper. The latter will eventually go away. Please, | 365 | filesystems and by the swapper. The latter will eventually go away. Please, |
317 | keep it that way and don't breed new callers. | 366 | keep it that way and don't breed new callers. |
318 | 367 | ||
319 | ->invalidatepage() is called when the filesystem must attempt to drop | 368 | ->invalidatepage() is called when the filesystem must attempt to drop |
320 | some or all of the buffers from the page when it is being truncated. It | 369 | some or all of the buffers from the page when it is being truncated. It |
321 | returns zero on success. If ->invalidatepage is zero, the kernel uses | 370 | returns zero on success. If ->invalidatepage is zero, the kernel uses |
322 | block_invalidatepage() instead. | 371 | block_invalidatepage() instead. |
323 | 372 | ||
324 | ->releasepage() is called when the kernel is about to try to drop the | 373 | ->releasepage() is called when the kernel is about to try to drop the |
325 | buffers from the page in preparation for freeing it. It returns zero to | 374 | buffers from the page in preparation for freeing it. It returns zero to |
326 | indicate that the buffers are (or may be) freeable. If ->releasepage is zero, | 375 | indicate that the buffers are (or may be) freeable. If ->releasepage is zero, |
327 | the kernel assumes that the fs has no private interest in the buffers. | 376 | the kernel assumes that the fs has no private interest in the buffers. |
328 | 377 | ||
329 | ->freepage() is called when the kernel is done dropping the page | 378 | ->freepage() is called when the kernel is done dropping the page |
330 | from the page cache. | 379 | from the page cache. |
331 | 380 | ||
332 | ->launder_page() may be called prior to releasing a page if | 381 | ->launder_page() may be called prior to releasing a page if |
333 | it is still found to be dirty. It returns zero if the page was successfully | 382 | it is still found to be dirty. It returns zero if the page was successfully |
334 | cleaned, or an error value if not. Note that in order to prevent the page | 383 | cleaned, or an error value if not. Note that in order to prevent the page |
335 | getting mapped back in and redirtied, it needs to be kept locked | 384 | getting mapped back in and redirtied, it needs to be kept locked |
336 | across the entire operation. | 385 | across the entire operation. |
337 | 386 | ||
338 | ->swap_activate will be called with a non-zero argument on | 387 | ->swap_activate will be called with a non-zero argument on |
339 | files backing (non block device backed) swapfiles. A return value | 388 | files backing (non block device backed) swapfiles. A return value |
340 | of zero indicates success, in which case this file can be used for | 389 | of zero indicates success, in which case this file can be used for |
341 | backing swapspace. The swapspace operations will be proxied to the | 390 | backing swapspace. The swapspace operations will be proxied to the |
342 | address space operations. | 391 | address space operations. |
343 | 392 | ||
344 | ->swap_deactivate() will be called in the sys_swapoff() | 393 | ->swap_deactivate() will be called in the sys_swapoff() |
345 | path after ->swap_activate() returned success. | 394 | path after ->swap_activate() returned success. |
346 | 395 | ||
347 | ----------------------- file_lock_operations ------------------------------ | 396 | file_lock_operations |
348 | prototypes: | 397 | ==================== |
398 | |||
399 | prototypes:: | ||
400 | |||
349 | void (*fl_copy_lock)(struct file_lock *, struct file_lock *); | 401 | void (*fl_copy_lock)(struct file_lock *, struct file_lock *); |
350 | void (*fl_release_private)(struct file_lock *); | 402 | void (*fl_release_private)(struct file_lock *); |
351 | 403 | ||
352 | 404 | ||
353 | locking rules: | 405 | locking rules: |
354 | inode->i_lock may block | 406 | |
407 | =================== ============= ========= | ||
408 | ops inode->i_lock may block | ||
409 | =================== ============= ========= | ||
355 | fl_copy_lock: yes no | 410 | fl_copy_lock: yes no |
356 | fl_release_private: maybe maybe[1] | 411 | fl_release_private: maybe maybe[1]_ |
412 | =================== ============= ========= | ||
413 | |||
414 | .. [1]: | ||
415 | ->fl_release_private for flock or POSIX locks is currently allowed | ||
416 | to block. Leases however can still be freed while the i_lock is held and | ||
417 | so fl_release_private called on a lease should not block. | ||
357 | 418 | ||
358 | [1]: ->fl_release_private for flock or POSIX locks is currently allowed | 419 | lock_manager_operations |
359 | to block. Leases however can still be freed while the i_lock is held and | 420 | ======================= |
360 | so fl_release_private called on a lease should not block. | 421 | |
422 | prototypes:: | ||
361 | 423 | ||
362 | ----------------------- lock_manager_operations --------------------------- | ||
363 | prototypes: | ||
364 | void (*lm_notify)(struct file_lock *); /* unblock callback */ | 424 | void (*lm_notify)(struct file_lock *); /* unblock callback */ |
365 | int (*lm_grant)(struct file_lock *, struct file_lock *, int); | 425 | int (*lm_grant)(struct file_lock *, struct file_lock *, int); |
366 | void (*lm_break)(struct file_lock *); /* break_lease callback */ | 426 | void (*lm_break)(struct file_lock *); /* break_lease callback */ |
@@ -368,24 +428,33 @@ prototypes: | |||
368 | 428 | ||
369 | locking rules: | 429 | locking rules: |
370 | 430 | ||
371 | inode->i_lock blocked_lock_lock may block | 431 | ========== ============= ================= ========= |
432 | ops inode->i_lock blocked_lock_lock may block | ||
433 | ========== ============= ================= ========= | ||
372 | lm_notify: yes yes no | 434 | lm_notify: yes yes no |
373 | lm_grant: no no no | 435 | lm_grant: no no no |
374 | lm_break: yes no no | 436 | lm_break: yes no no |
375 | lm_change yes no no | 437 | lm_change yes no no |
438 | ========== ============= ================= ========= | ||
439 | |||
440 | buffer_head | ||
441 | =========== | ||
442 | |||
443 | prototypes:: | ||
376 | 444 | ||
377 | --------------------------- buffer_head ----------------------------------- | ||
378 | prototypes: | ||
379 | void (*b_end_io)(struct buffer_head *bh, int uptodate); | 445 | void (*b_end_io)(struct buffer_head *bh, int uptodate); |
380 | 446 | ||
381 | locking rules: | 447 | locking rules: |
382 | called from interrupts. In other words, extreme care is needed here. | 448 | |
449 | called from interrupts. In other words, extreme care is needed here. | ||
383 | bh is locked, but that's all warranties we have here. Currently only RAID1, | 450 | bh is locked, but that's all warranties we have here. Currently only RAID1, |
384 | highmem, fs/buffer.c, and fs/ntfs/aops.c are providing these. Block devices | 451 | highmem, fs/buffer.c, and fs/ntfs/aops.c are providing these. Block devices |
385 | call this method upon the IO completion. | 452 | call this method upon the IO completion. |
386 | 453 | ||
387 | --------------------------- block_device_operations ----------------------- | 454 | block_device_operations |
388 | prototypes: | 455 | ======================= |
456 | prototypes:: | ||
457 | |||
389 | int (*open) (struct block_device *, fmode_t); | 458 | int (*open) (struct block_device *, fmode_t); |
390 | int (*release) (struct gendisk *, fmode_t); | 459 | int (*release) (struct gendisk *, fmode_t); |
391 | int (*ioctl) (struct block_device *, fmode_t, unsigned, unsigned long); | 460 | int (*ioctl) (struct block_device *, fmode_t, unsigned, unsigned long); |
@@ -399,7 +468,10 @@ prototypes: | |||
399 | void (*swap_slot_free_notify) (struct block_device *, unsigned long); | 468 | void (*swap_slot_free_notify) (struct block_device *, unsigned long); |
400 | 469 | ||
401 | locking rules: | 470 | locking rules: |
402 | bd_mutex | 471 | |
472 | ======================= =================== | ||
473 | ops bd_mutex | ||
474 | ======================= =================== | ||
403 | open: yes | 475 | open: yes |
404 | release: yes | 476 | release: yes |
405 | ioctl: no | 477 | ioctl: no |
@@ -410,6 +482,7 @@ unlock_native_capacity: no | |||
410 | revalidate_disk: no | 482 | revalidate_disk: no |
411 | getgeo: no | 483 | getgeo: no |
412 | swap_slot_free_notify: no (see below) | 484 | swap_slot_free_notify: no (see below) |
485 | ======================= =================== | ||
413 | 486 | ||
414 | media_changed, unlock_native_capacity and revalidate_disk are called only from | 487 | media_changed, unlock_native_capacity and revalidate_disk are called only from |
415 | check_disk_change(). | 488 | check_disk_change(). |
@@ -418,8 +491,11 @@ swap_slot_free_notify is called with swap_lock and sometimes the page lock | |||
418 | held. | 491 | held. |
419 | 492 | ||
420 | 493 | ||
421 | --------------------------- file_operations ------------------------------- | 494 | file_operations |
422 | prototypes: | 495 | =============== |
496 | |||
497 | prototypes:: | ||
498 | |||
423 | loff_t (*llseek) (struct file *, loff_t, int); | 499 | loff_t (*llseek) (struct file *, loff_t, int); |
424 | ssize_t (*read) (struct file *, char __user *, size_t, loff_t *); | 500 | ssize_t (*read) (struct file *, char __user *, size_t, loff_t *); |
425 | ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *); | 501 | ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *); |
@@ -455,7 +531,6 @@ prototypes: | |||
455 | size_t, unsigned int); | 531 | size_t, unsigned int); |
456 | int (*setlease)(struct file *, long, struct file_lock **, void **); | 532 | int (*setlease)(struct file *, long, struct file_lock **, void **); |
457 | long (*fallocate)(struct file *, int, loff_t, loff_t); | 533 | long (*fallocate)(struct file *, int, loff_t, loff_t); |
458 | }; | ||
459 | 534 | ||
460 | locking rules: | 535 | locking rules: |
461 | All may block. | 536 | All may block. |
@@ -490,8 +565,11 @@ in sys_read() and friends. | |||
490 | the lease within the individual filesystem to record the result of the | 565 | the lease within the individual filesystem to record the result of the |
491 | operation | 566 | operation |
492 | 567 | ||
493 | --------------------------- dquot_operations ------------------------------- | 568 | dquot_operations |
494 | prototypes: | 569 | ================ |
570 | |||
571 | prototypes:: | ||
572 | |||
495 | int (*write_dquot) (struct dquot *); | 573 | int (*write_dquot) (struct dquot *); |
496 | int (*acquire_dquot) (struct dquot *); | 574 | int (*acquire_dquot) (struct dquot *); |
497 | int (*release_dquot) (struct dquot *); | 575 | int (*release_dquot) (struct dquot *); |
@@ -503,20 +581,26 @@ a proper locking wrt the filesystem and call the generic quota operations. | |||
503 | 581 | ||
504 | What filesystem should expect from the generic quota functions: | 582 | What filesystem should expect from the generic quota functions: |
505 | 583 | ||
506 | FS recursion Held locks when called | 584 | ============== ============ ========================= |
585 | ops FS recursion Held locks when called | ||
586 | ============== ============ ========================= | ||
507 | write_dquot: yes dqonoff_sem or dqptr_sem | 587 | write_dquot: yes dqonoff_sem or dqptr_sem |
508 | acquire_dquot: yes dqonoff_sem or dqptr_sem | 588 | acquire_dquot: yes dqonoff_sem or dqptr_sem |
509 | release_dquot: yes dqonoff_sem or dqptr_sem | 589 | release_dquot: yes dqonoff_sem or dqptr_sem |
510 | mark_dirty: no - | 590 | mark_dirty: no - |
511 | write_info: yes dqonoff_sem | 591 | write_info: yes dqonoff_sem |
592 | ============== ============ ========================= | ||
512 | 593 | ||
513 | FS recursion means calling ->quota_read() and ->quota_write() from superblock | 594 | FS recursion means calling ->quota_read() and ->quota_write() from superblock |
514 | operations. | 595 | operations. |
515 | 596 | ||
516 | More details about quota locking can be found in fs/dquot.c. | 597 | More details about quota locking can be found in fs/dquot.c. |
517 | 598 | ||
518 | --------------------------- vm_operations_struct ----------------------------- | 599 | vm_operations_struct |
519 | prototypes: | 600 | ==================== |
601 | |||
602 | prototypes:: | ||
603 | |||
520 | void (*open)(struct vm_area_struct*); | 604 | void (*open)(struct vm_area_struct*); |
521 | void (*close)(struct vm_area_struct*); | 605 | void (*close)(struct vm_area_struct*); |
522 | vm_fault_t (*fault)(struct vm_area_struct*, struct vm_fault *); | 606 | vm_fault_t (*fault)(struct vm_area_struct*, struct vm_fault *); |
@@ -525,7 +609,10 @@ prototypes: | |||
525 | int (*access)(struct vm_area_struct *, unsigned long, void*, int, int); | 609 | int (*access)(struct vm_area_struct *, unsigned long, void*, int, int); |
526 | 610 | ||
527 | locking rules: | 611 | locking rules: |
528 | mmap_sem PageLocked(page) | 612 | |
613 | ============= ======== =========================== | ||
614 | ops mmap_sem PageLocked(page) | ||
615 | ============= ======== =========================== | ||
529 | open: yes | 616 | open: yes |
530 | close: yes | 617 | close: yes |
531 | fault: yes can return with page locked | 618 | fault: yes can return with page locked |
@@ -533,8 +620,9 @@ map_pages: yes | |||
533 | page_mkwrite: yes can return with page locked | 620 | page_mkwrite: yes can return with page locked |
534 | pfn_mkwrite: yes | 621 | pfn_mkwrite: yes |
535 | access: yes | 622 | access: yes |
623 | ============= ======== =========================== | ||
536 | 624 | ||
537 | ->fault() is called when a previously not present pte is about | 625 | ->fault() is called when a previously not present pte is about |
538 | to be faulted in. The filesystem must find and return the page associated | 626 | to be faulted in. The filesystem must find and return the page associated |
539 | with the passed in "pgoff" in the vm_fault structure. If it is possible that | 627 | with the passed in "pgoff" in the vm_fault structure. If it is possible that |
540 | the page may be truncated and/or invalidated, then the filesystem must lock | 628 | the page may be truncated and/or invalidated, then the filesystem must lock |
@@ -542,7 +630,7 @@ the page, then ensure it is not already truncated (the page lock will block | |||
542 | subsequent truncate), and then return with VM_FAULT_LOCKED, and the page | 630 | subsequent truncate), and then return with VM_FAULT_LOCKED, and the page |
543 | locked. The VM will unlock the page. | 631 | locked. The VM will unlock the page. |
544 | 632 | ||
545 | ->map_pages() is called when VM asks to map easy accessible pages. | 633 | ->map_pages() is called when VM asks to map easy accessible pages. |
546 | Filesystem should find and map pages associated with offsets from "start_pgoff" | 634 | Filesystem should find and map pages associated with offsets from "start_pgoff" |
547 | till "end_pgoff". ->map_pages() is called with page table locked and must | 635 | till "end_pgoff". ->map_pages() is called with page table locked and must |
548 | not block. If it's not possible to reach a page without blocking, | 636 | not block. If it's not possible to reach a page without blocking, |
@@ -551,25 +639,26 @@ page table entry. Pointer to entry associated with the page is passed in | |||
551 | "pte" field in vm_fault structure. Pointers to entries for other offsets | 639 | "pte" field in vm_fault structure. Pointers to entries for other offsets |
552 | should be calculated relative to "pte". | 640 | should be calculated relative to "pte". |
553 | 641 | ||
554 | ->page_mkwrite() is called when a previously read-only pte is | 642 | ->page_mkwrite() is called when a previously read-only pte is |
555 | about to become writeable. The filesystem again must ensure that there are | 643 | about to become writeable. The filesystem again must ensure that there are |
556 | no truncate/invalidate races, and then return with the page locked. If | 644 | no truncate/invalidate races, and then return with the page locked. If |
557 | the page has been truncated, the filesystem should not look up a new page | 645 | the page has been truncated, the filesystem should not look up a new page |
558 | like the ->fault() handler, but simply return with VM_FAULT_NOPAGE, which | 646 | like the ->fault() handler, but simply return with VM_FAULT_NOPAGE, which |
559 | will cause the VM to retry the fault. | 647 | will cause the VM to retry the fault. |
560 | 648 | ||
561 | ->pfn_mkwrite() is the same as page_mkwrite but when the pte is | 649 | ->pfn_mkwrite() is the same as page_mkwrite but when the pte is |
562 | VM_PFNMAP or VM_MIXEDMAP with a page-less entry. Expected return is | 650 | VM_PFNMAP or VM_MIXEDMAP with a page-less entry. Expected return is |
563 | VM_FAULT_NOPAGE. Or one of the VM_FAULT_ERROR types. The default behavior | 651 | VM_FAULT_NOPAGE. Or one of the VM_FAULT_ERROR types. The default behavior |
564 | after this call is to make the pte read-write, unless pfn_mkwrite returns | 652 | after this call is to make the pte read-write, unless pfn_mkwrite returns |
565 | an error. | 653 | an error. |
566 | 654 | ||
567 | ->access() is called when get_user_pages() fails in | 655 | ->access() is called when get_user_pages() fails in |
568 | access_process_vm(), typically used to debug a process through | 656 | access_process_vm(), typically used to debug a process through |
569 | /proc/pid/mem or ptrace. This function is needed only for | 657 | /proc/pid/mem or ptrace. This function is needed only for |
570 | VM_IO | VM_PFNMAP VMAs. | 658 | VM_IO | VM_PFNMAP VMAs. |
571 | 659 | ||
572 | ================================================================================ | 660 | -------------------------------------------------------------------------------- |
661 | |||
573 | Dubious stuff | 662 | Dubious stuff |
574 | 663 | ||
575 | (if you break something or notice that it is broken and do not fix it yourself | 664 | (if you break something or notice that it is broken and do not fix it yourself |
diff --git a/Documentation/filesystems/nfs/Exporting b/Documentation/filesystems/nfs/exporting.rst index 63889149f532..33d588a01ace 100644 --- a/Documentation/filesystems/nfs/Exporting +++ b/Documentation/filesystems/nfs/exporting.rst | |||
@@ -1,3 +1,4 @@ | |||
1 | :orphan: | ||
1 | 2 | ||
2 | Making Filesystems Exportable | 3 | Making Filesystems Exportable |
3 | ============================= | 4 | ============================= |
@@ -42,9 +43,9 @@ filehandle fragment, there is no automatic creation of a path prefix | |||
42 | for the object. This leads to two related but distinct features of | 43 | for the object. This leads to two related but distinct features of |
43 | the dcache that are not needed for normal filesystem access. | 44 | the dcache that are not needed for normal filesystem access. |
44 | 45 | ||
45 | 1/ The dcache must sometimes contain objects that are not part of the | 46 | 1. The dcache must sometimes contain objects that are not part of the |
46 | proper prefix. i.e that are not connected to the root. | 47 | proper prefix. i.e that are not connected to the root. |
47 | 2/ The dcache must be prepared for a newly found (via ->lookup) directory | 48 | 2. The dcache must be prepared for a newly found (via ->lookup) directory |
48 | to already have a (non-connected) dentry, and must be able to move | 49 | to already have a (non-connected) dentry, and must be able to move |
49 | that dentry into place (based on the parent and name in the | 50 | that dentry into place (based on the parent and name in the |
50 | ->lookup). This is particularly needed for directories as | 51 | ->lookup). This is particularly needed for directories as |
@@ -52,7 +53,7 @@ the dcache that are not needed for normal filesystem access. | |||
52 | 53 | ||
53 | To implement these features, the dcache has: | 54 | To implement these features, the dcache has: |
54 | 55 | ||
55 | a/ A dentry flag DCACHE_DISCONNECTED which is set on | 56 | a. A dentry flag DCACHE_DISCONNECTED which is set on |
56 | any dentry that might not be part of the proper prefix. | 57 | any dentry that might not be part of the proper prefix. |
57 | This is set when anonymous dentries are created, and cleared when a | 58 | This is set when anonymous dentries are created, and cleared when a |
58 | dentry is noticed to be a child of a dentry which is in the proper | 59 | dentry is noticed to be a child of a dentry which is in the proper |
@@ -71,48 +72,52 @@ a/ A dentry flag DCACHE_DISCONNECTED which is set on | |||
71 | dentries. That guarantees that we won't need to hunt them down upon | 72 | dentries. That guarantees that we won't need to hunt them down upon |
72 | umount. | 73 | umount. |
73 | 74 | ||
74 | b/ A primitive for creation of secondary roots - d_obtain_root(inode). | 75 | b. A primitive for creation of secondary roots - d_obtain_root(inode). |
75 | Those do _not_ bear DCACHE_DISCONNECTED. They are placed on the | 76 | Those do _not_ bear DCACHE_DISCONNECTED. They are placed on the |
76 | per-superblock list (->s_roots), so they can be located at umount | 77 | per-superblock list (->s_roots), so they can be located at umount |
77 | time for eviction purposes. | 78 | time for eviction purposes. |
78 | 79 | ||
79 | c/ Helper routines to allocate anonymous dentries, and to help attach | 80 | c. Helper routines to allocate anonymous dentries, and to help attach |
80 | loose directory dentries at lookup time. They are: | 81 | loose directory dentries at lookup time. They are: |
82 | |||
81 | d_obtain_alias(inode) will return a dentry for the given inode. | 83 | d_obtain_alias(inode) will return a dentry for the given inode. |
82 | If the inode already has a dentry, one of those is returned. | 84 | If the inode already has a dentry, one of those is returned. |
85 | |||
83 | If it doesn't, a new anonymous (IS_ROOT and | 86 | If it doesn't, a new anonymous (IS_ROOT and |
84 | DCACHE_DISCONNECTED) dentry is allocated and attached. | 87 | DCACHE_DISCONNECTED) dentry is allocated and attached. |
88 | |||
85 | In the case of a directory, care is taken that only one dentry | 89 | In the case of a directory, care is taken that only one dentry |
86 | can ever be attached. | 90 | can ever be attached. |
91 | |||
87 | d_splice_alias(inode, dentry) will introduce a new dentry into the tree; | 92 | d_splice_alias(inode, dentry) will introduce a new dentry into the tree; |
88 | either the passed-in dentry or a preexisting alias for the given inode | 93 | either the passed-in dentry or a preexisting alias for the given inode |
89 | (such as an anonymous one created by d_obtain_alias), if appropriate. | 94 | (such as an anonymous one created by d_obtain_alias), if appropriate. |
90 | It returns NULL when the passed-in dentry is used, following the calling | 95 | It returns NULL when the passed-in dentry is used, following the calling |
91 | convention of ->lookup. | 96 | convention of ->lookup. |
92 | 97 | ||
93 | Filesystem Issues | 98 | Filesystem Issues |
94 | ----------------- | 99 | ----------------- |
95 | 100 | ||
96 | For a filesystem to be exportable it must: | 101 | For a filesystem to be exportable it must: |
97 | 102 | ||
98 | 1/ provide the filehandle fragment routines described below. | 103 | 1. provide the filehandle fragment routines described below. |
99 | 2/ make sure that d_splice_alias is used rather than d_add | 104 | 2. make sure that d_splice_alias is used rather than d_add |
100 | when ->lookup finds an inode for a given parent and name. | 105 | when ->lookup finds an inode for a given parent and name. |
101 | 106 | ||
102 | If inode is NULL, d_splice_alias(inode, dentry) is equivalent to | 107 | If inode is NULL, d_splice_alias(inode, dentry) is equivalent to:: |
103 | 108 | ||
104 | d_add(dentry, inode), NULL | 109 | d_add(dentry, inode), NULL |
105 | 110 | ||
106 | Similarly, d_splice_alias(ERR_PTR(err), dentry) = ERR_PTR(err) | 111 | Similarly, d_splice_alias(ERR_PTR(err), dentry) = ERR_PTR(err) |
107 | 112 | ||
108 | Typically the ->lookup routine will simply end with a: | 113 | Typically the ->lookup routine will simply end with a:: |
109 | 114 | ||
110 | return d_splice_alias(inode, dentry); | 115 | return d_splice_alias(inode, dentry); |
111 | } | 116 | } |
112 | 117 | ||
113 | 118 | ||
114 | 119 | ||
115 | A file system implementation declares that instances of the filesystem | 120 | A file system implementation declares that instances of the filesystem |
116 | are exportable by setting the s_export_op field in the struct | 121 | are exportable by setting the s_export_op field in the struct |
117 | super_block. This field must point to a "struct export_operations" | 122 | super_block. This field must point to a "struct export_operations" |
118 | struct which has the following members: | 123 | struct which has the following members: |
diff --git a/Documentation/filesystems/vfs.rst b/Documentation/filesystems/vfs.rst index 0f85ab21c2ca..7d4d09dd5e6d 100644 --- a/Documentation/filesystems/vfs.rst +++ b/Documentation/filesystems/vfs.rst | |||
@@ -20,7 +20,7 @@ kernel which allows different filesystem implementations to coexist. | |||
20 | 20 | ||
21 | VFS system calls open(2), stat(2), read(2), write(2), chmod(2) and so on | 21 | VFS system calls open(2), stat(2), read(2), write(2), chmod(2) and so on |
22 | are called from a process context. Filesystem locking is described in | 22 | are called from a process context. Filesystem locking is described in |
23 | the document Documentation/filesystems/Locking. | 23 | the document Documentation/filesystems/locking.rst. |
24 | 24 | ||
25 | 25 | ||
26 | Directory Entry Cache (dcache) | 26 | Directory Entry Cache (dcache) |
diff --git a/fs/cifs/export.c b/fs/cifs/export.c index ce8b7f677c58..eb0bb8ca8e63 100644 --- a/fs/cifs/export.c +++ b/fs/cifs/export.c | |||
@@ -24,7 +24,7 @@ | |||
24 | */ | 24 | */ |
25 | 25 | ||
26 | /* | 26 | /* |
27 | * See Documentation/filesystems/nfs/Exporting | 27 | * See Documentation/filesystems/nfs/exporting.rst |
28 | * and examples in fs/exportfs | 28 | * and examples in fs/exportfs |
29 | * | 29 | * |
30 | * Since cifs is a network file system, an "fsid" must be included for | 30 | * Since cifs is a network file system, an "fsid" must be included for |
diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c index f0e549783caf..09bc68708d28 100644 --- a/fs/exportfs/expfs.c +++ b/fs/exportfs/expfs.c | |||
@@ -7,7 +7,7 @@ | |||
7 | * and for mapping back from file handles to dentries. | 7 | * and for mapping back from file handles to dentries. |
8 | * | 8 | * |
9 | * For details on why we do all the strange and hairy things in here | 9 | * For details on why we do all the strange and hairy things in here |
10 | * take a look at Documentation/filesystems/nfs/Exporting. | 10 | * take a look at Documentation/filesystems/nfs/exporting.rst. |
11 | */ | 11 | */ |
12 | #include <linux/exportfs.h> | 12 | #include <linux/exportfs.h> |
13 | #include <linux/fs.h> | 13 | #include <linux/fs.h> |
diff --git a/fs/isofs/export.c b/fs/isofs/export.c index 85a9093769a9..35768a63fb1d 100644 --- a/fs/isofs/export.c +++ b/fs/isofs/export.c | |||
@@ -10,7 +10,7 @@ | |||
10 | * | 10 | * |
11 | * The following files are helpful: | 11 | * The following files are helpful: |
12 | * | 12 | * |
13 | * Documentation/filesystems/nfs/Exporting | 13 | * Documentation/filesystems/nfs/exporting.rst |
14 | * fs/exportfs/expfs.c. | 14 | * fs/exportfs/expfs.c. |
15 | */ | 15 | */ |
16 | 16 | ||
diff --git a/fs/orangefs/file.c b/fs/orangefs/file.c index 960f9a3c012d..a5612abc0936 100644 --- a/fs/orangefs/file.c +++ b/fs/orangefs/file.c | |||
@@ -555,7 +555,7 @@ static int orangefs_fsync(struct file *file, | |||
555 | * Change the file pointer position for an instance of an open file. | 555 | * Change the file pointer position for an instance of an open file. |
556 | * | 556 | * |
557 | * \note If .llseek is overriden, we must acquire lock as described in | 557 | * \note If .llseek is overriden, we must acquire lock as described in |
558 | * Documentation/filesystems/Locking. | 558 | * Documentation/filesystems/locking.rst. |
559 | * | 559 | * |
560 | * Future upgrade could support SEEK_DATA and SEEK_HOLE but would | 560 | * Future upgrade could support SEEK_DATA and SEEK_HOLE but would |
561 | * require much changes to the FS | 561 | * require much changes to the FS |
diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 9451011ac014..10090f11ab95 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h | |||
@@ -151,7 +151,7 @@ struct dentry_operations { | |||
151 | 151 | ||
152 | /* | 152 | /* |
153 | * Locking rules for dentry_operations callbacks are to be found in | 153 | * Locking rules for dentry_operations callbacks are to be found in |
154 | * Documentation/filesystems/Locking. Keep it updated! | 154 | * Documentation/filesystems/locking.rst. Keep it updated! |
155 | * | 155 | * |
156 | * FUrther descriptions are found in Documentation/filesystems/vfs.rst. | 156 | * FUrther descriptions are found in Documentation/filesystems/vfs.rst. |
157 | * Keep it updated too! | 157 | * Keep it updated too! |
diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h index 0d3037419bc7..cf6571fc9c01 100644 --- a/include/linux/exportfs.h +++ b/include/linux/exportfs.h | |||
@@ -139,7 +139,7 @@ struct fid { | |||
139 | * @get_parent: find the parent of a given directory | 139 | * @get_parent: find the parent of a given directory |
140 | * @commit_metadata: commit metadata changes to stable storage | 140 | * @commit_metadata: commit metadata changes to stable storage |
141 | * | 141 | * |
142 | * See Documentation/filesystems/nfs/Exporting for details on how to use | 142 | * See Documentation/filesystems/nfs/exporting.rst for details on how to use |
143 | * this interface correctly. | 143 | * this interface correctly. |
144 | * | 144 | * |
145 | * encode_fh: | 145 | * encode_fh: |