diff options
author | Ingo Molnar <mingo@elte.hu> | 2008-10-28 11:26:12 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2008-10-28 11:26:12 -0400 |
commit | 7a9787e1eba95a166265e6a260cf30af04ef0a99 (patch) | |
tree | e730a4565e0318140d2fbd2f0415d18a339d7336 /Documentation/filesystems | |
parent | 41b9eb264c8407655db57b60b4457fe1b2ec9977 (diff) | |
parent | 0173a3265b228da319ceb9c1ec6a5682fd1b2d92 (diff) |
Merge commit 'v2.6.28-rc2' into x86/pci-ioapic-boot-irq-quirks
Diffstat (limited to 'Documentation/filesystems')
23 files changed, 1516 insertions, 175 deletions
diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index 8b22d7d8b991..8362860e21a7 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking | |||
@@ -144,8 +144,8 @@ prototypes: | |||
144 | void (*kill_sb) (struct super_block *); | 144 | void (*kill_sb) (struct super_block *); |
145 | locking rules: | 145 | locking rules: |
146 | may block BKL | 146 | may block BKL |
147 | get_sb yes yes | 147 | get_sb yes no |
148 | kill_sb yes yes | 148 | kill_sb yes no |
149 | 149 | ||
150 | ->get_sb() returns error or 0 with locked superblock attached to the vfsmount | 150 | ->get_sb() returns error or 0 with locked superblock attached to the vfsmount |
151 | (exclusive on ->s_umount). | 151 | (exclusive on ->s_umount). |
@@ -409,12 +409,12 @@ ioctl: yes (see below) | |||
409 | unlocked_ioctl: no (see below) | 409 | unlocked_ioctl: no (see below) |
410 | compat_ioctl: no | 410 | compat_ioctl: no |
411 | mmap: no | 411 | mmap: no |
412 | open: maybe (see below) | 412 | open: no |
413 | flush: no | 413 | flush: no |
414 | release: no | 414 | release: no |
415 | fsync: no (see below) | 415 | fsync: no (see below) |
416 | aio_fsync: no | 416 | aio_fsync: no |
417 | fasync: yes (see below) | 417 | fasync: no |
418 | lock: yes | 418 | lock: yes |
419 | readv: no | 419 | readv: no |
420 | writev: no | 420 | writev: no |
@@ -431,13 +431,6 @@ For many filesystems, it is probably safe to acquire the inode | |||
431 | semaphore. Note some filesystems (i.e. remote ones) provide no | 431 | semaphore. Note some filesystems (i.e. remote ones) provide no |
432 | protection for i_size so you will need to use the BKL. | 432 | protection for i_size so you will need to use the BKL. |
433 | 433 | ||
434 | ->open() locking is in-transit: big lock partially moved into the methods. | ||
435 | The only exception is ->open() in the instances of file_operations that never | ||
436 | end up in ->i_fop/->proc_fops, i.e. ones that belong to character devices | ||
437 | (chrdev_open() takes lock before replacing ->f_op and calling the secondary | ||
438 | method. As soon as we fix the handling of module reference counters all | ||
439 | instances of ->open() will be called without the BKL. | ||
440 | |||
441 | Note: ext2_release() was *the* source of contention on fs-intensive | 434 | Note: ext2_release() was *the* source of contention on fs-intensive |
442 | loads and dropping BKL on ->release() helps to get rid of that (we still | 435 | loads and dropping BKL on ->release() helps to get rid of that (we still |
443 | grab BKL for cases when we close a file that had been opened r/w, but that | 436 | grab BKL for cases when we close a file that had been opened r/w, but that |
@@ -510,6 +503,7 @@ prototypes: | |||
510 | void (*close)(struct vm_area_struct*); | 503 | void (*close)(struct vm_area_struct*); |
511 | int (*fault)(struct vm_area_struct*, struct vm_fault *); | 504 | int (*fault)(struct vm_area_struct*, struct vm_fault *); |
512 | int (*page_mkwrite)(struct vm_area_struct *, struct page *); | 505 | int (*page_mkwrite)(struct vm_area_struct *, struct page *); |
506 | int (*access)(struct vm_area_struct *, unsigned long, void*, int, int); | ||
513 | 507 | ||
514 | locking rules: | 508 | locking rules: |
515 | BKL mmap_sem PageLocked(page) | 509 | BKL mmap_sem PageLocked(page) |
@@ -517,6 +511,7 @@ open: no yes | |||
517 | close: no yes | 511 | close: no yes |
518 | fault: no yes | 512 | fault: no yes |
519 | page_mkwrite: no yes no | 513 | page_mkwrite: no yes no |
514 | access: no yes | ||
520 | 515 | ||
521 | ->page_mkwrite() is called when a previously read-only page is | 516 | ->page_mkwrite() is called when a previously read-only page is |
522 | about to become writeable. The file system is responsible for | 517 | about to become writeable. The file system is responsible for |
@@ -525,6 +520,11 @@ taking to lock out truncate, the page range should be verified to be | |||
525 | within i_size. The page mapping should also be checked that it is not | 520 | within i_size. The page mapping should also be checked that it is not |
526 | NULL. | 521 | NULL. |
527 | 522 | ||
523 | ->access() is called when get_user_pages() fails in | ||
524 | acces_process_vm(), typically used to debug a process through | ||
525 | /proc/pid/mem or ptrace. This function is needed only for | ||
526 | VM_IO | VM_PFNMAP VMAs. | ||
527 | |||
528 | ================================================================================ | 528 | ================================================================================ |
529 | Dubious stuff | 529 | Dubious stuff |
530 | 530 | ||
diff --git a/Documentation/filesystems/autofs4-mount-control.txt b/Documentation/filesystems/autofs4-mount-control.txt new file mode 100644 index 000000000000..c6341745df37 --- /dev/null +++ b/Documentation/filesystems/autofs4-mount-control.txt | |||
@@ -0,0 +1,393 @@ | |||
1 | |||
2 | Miscellaneous Device control operations for the autofs4 kernel module | ||
3 | ==================================================================== | ||
4 | |||
5 | The problem | ||
6 | =========== | ||
7 | |||
8 | There is a problem with active restarts in autofs (that is to say | ||
9 | restarting autofs when there are busy mounts). | ||
10 | |||
11 | During normal operation autofs uses a file descriptor opened on the | ||
12 | directory that is being managed in order to be able to issue control | ||
13 | operations. Using a file descriptor gives ioctl operations access to | ||
14 | autofs specific information stored in the super block. The operations | ||
15 | are things such as setting an autofs mount catatonic, setting the | ||
16 | expire timeout and requesting expire checks. As is explained below, | ||
17 | certain types of autofs triggered mounts can end up covering an autofs | ||
18 | mount itself which prevents us being able to use open(2) to obtain a | ||
19 | file descriptor for these operations if we don't already have one open. | ||
20 | |||
21 | Currently autofs uses "umount -l" (lazy umount) to clear active mounts | ||
22 | at restart. While using lazy umount works for most cases, anything that | ||
23 | needs to walk back up the mount tree to construct a path, such as | ||
24 | getcwd(2) and the proc file system /proc/<pid>/cwd, no longer works | ||
25 | because the point from which the path is constructed has been detached | ||
26 | from the mount tree. | ||
27 | |||
28 | The actual problem with autofs is that it can't reconnect to existing | ||
29 | mounts. Immediately one thinks of just adding the ability to remount | ||
30 | autofs file systems would solve it, but alas, that can't work. This is | ||
31 | because autofs direct mounts and the implementation of "on demand mount | ||
32 | and expire" of nested mount trees have the file system mounted directly | ||
33 | on top of the mount trigger directory dentry. | ||
34 | |||
35 | For example, there are two types of automount maps, direct (in the kernel | ||
36 | module source you will see a third type called an offset, which is just | ||
37 | a direct mount in disguise) and indirect. | ||
38 | |||
39 | Here is a master map with direct and indirect map entries: | ||
40 | |||
41 | /- /etc/auto.direct | ||
42 | /test /etc/auto.indirect | ||
43 | |||
44 | and the corresponding map files: | ||
45 | |||
46 | /etc/auto.direct: | ||
47 | |||
48 | /automount/dparse/g6 budgie:/autofs/export1 | ||
49 | /automount/dparse/g1 shark:/autofs/export1 | ||
50 | and so on. | ||
51 | |||
52 | /etc/auto.indirect: | ||
53 | |||
54 | g1 shark:/autofs/export1 | ||
55 | g6 budgie:/autofs/export1 | ||
56 | and so on. | ||
57 | |||
58 | For the above indirect map an autofs file system is mounted on /test and | ||
59 | mounts are triggered for each sub-directory key by the inode lookup | ||
60 | operation. So we see a mount of shark:/autofs/export1 on /test/g1, for | ||
61 | example. | ||
62 | |||
63 | The way that direct mounts are handled is by making an autofs mount on | ||
64 | each full path, such as /automount/dparse/g1, and using it as a mount | ||
65 | trigger. So when we walk on the path we mount shark:/autofs/export1 "on | ||
66 | top of this mount point". Since these are always directories we can | ||
67 | use the follow_link inode operation to trigger the mount. | ||
68 | |||
69 | But, each entry in direct and indirect maps can have offsets (making | ||
70 | them multi-mount map entries). | ||
71 | |||
72 | For example, an indirect mount map entry could also be: | ||
73 | |||
74 | g1 \ | ||
75 | / shark:/autofs/export5/testing/test \ | ||
76 | /s1 shark:/autofs/export/testing/test/s1 \ | ||
77 | /s2 shark:/autofs/export5/testing/test/s2 \ | ||
78 | /s1/ss1 shark:/autofs/export1 \ | ||
79 | /s2/ss2 shark:/autofs/export2 | ||
80 | |||
81 | and a similarly a direct mount map entry could also be: | ||
82 | |||
83 | /automount/dparse/g1 \ | ||
84 | / shark:/autofs/export5/testing/test \ | ||
85 | /s1 shark:/autofs/export/testing/test/s1 \ | ||
86 | /s2 shark:/autofs/export5/testing/test/s2 \ | ||
87 | /s1/ss1 shark:/autofs/export2 \ | ||
88 | /s2/ss2 shark:/autofs/export2 | ||
89 | |||
90 | One of the issues with version 4 of autofs was that, when mounting an | ||
91 | entry with a large number of offsets, possibly with nesting, we needed | ||
92 | to mount and umount all of the offsets as a single unit. Not really a | ||
93 | problem, except for people with a large number of offsets in map entries. | ||
94 | This mechanism is used for the well known "hosts" map and we have seen | ||
95 | cases (in 2.4) where the available number of mounts are exhausted or | ||
96 | where the number of privileged ports available is exhausted. | ||
97 | |||
98 | In version 5 we mount only as we go down the tree of offsets and | ||
99 | similarly for expiring them which resolves the above problem. There is | ||
100 | somewhat more detail to the implementation but it isn't needed for the | ||
101 | sake of the problem explanation. The one important detail is that these | ||
102 | offsets are implemented using the same mechanism as the direct mounts | ||
103 | above and so the mount points can be covered by a mount. | ||
104 | |||
105 | The current autofs implementation uses an ioctl file descriptor opened | ||
106 | on the mount point for control operations. The references held by the | ||
107 | descriptor are accounted for in checks made to determine if a mount is | ||
108 | in use and is also used to access autofs file system information held | ||
109 | in the mount super block. So the use of a file handle needs to be | ||
110 | retained. | ||
111 | |||
112 | |||
113 | The Solution | ||
114 | ============ | ||
115 | |||
116 | To be able to restart autofs leaving existing direct, indirect and | ||
117 | offset mounts in place we need to be able to obtain a file handle | ||
118 | for these potentially covered autofs mount points. Rather than just | ||
119 | implement an isolated operation it was decided to re-implement the | ||
120 | existing ioctl interface and add new operations to provide this | ||
121 | functionality. | ||
122 | |||
123 | In addition, to be able to reconstruct a mount tree that has busy mounts, | ||
124 | the uid and gid of the last user that triggered the mount needs to be | ||
125 | available because these can be used as macro substitution variables in | ||
126 | autofs maps. They are recorded at mount request time and an operation | ||
127 | has been added to retrieve them. | ||
128 | |||
129 | Since we're re-implementing the control interface, a couple of other | ||
130 | problems with the existing interface have been addressed. First, when | ||
131 | a mount or expire operation completes a status is returned to the | ||
132 | kernel by either a "send ready" or a "send fail" operation. The | ||
133 | "send fail" operation of the ioctl interface could only ever send | ||
134 | ENOENT so the re-implementation allows user space to send an actual | ||
135 | status. Another expensive operation in user space, for those using | ||
136 | very large maps, is discovering if a mount is present. Usually this | ||
137 | involves scanning /proc/mounts and since it needs to be done quite | ||
138 | often it can introduce significant overhead when there are many entries | ||
139 | in the mount table. An operation to lookup the mount status of a mount | ||
140 | point dentry (covered or not) has also been added. | ||
141 | |||
142 | Current kernel development policy recommends avoiding the use of the | ||
143 | ioctl mechanism in favor of systems such as Netlink. An implementation | ||
144 | using this system was attempted to evaluate its suitability and it was | ||
145 | found to be inadequate, in this case. The Generic Netlink system was | ||
146 | used for this as raw Netlink would lead to a significant increase in | ||
147 | complexity. There's no question that the Generic Netlink system is an | ||
148 | elegant solution for common case ioctl functions but it's not a complete | ||
149 | replacement probably because it's primary purpose in life is to be a | ||
150 | message bus implementation rather than specifically an ioctl replacement. | ||
151 | While it would be possible to work around this there is one concern | ||
152 | that lead to the decision to not use it. This is that the autofs | ||
153 | expire in the daemon has become far to complex because umount | ||
154 | candidates are enumerated, almost for no other reason than to "count" | ||
155 | the number of times to call the expire ioctl. This involves scanning | ||
156 | the mount table which has proved to be a big overhead for users with | ||
157 | large maps. The best way to improve this is try and get back to the | ||
158 | way the expire was done long ago. That is, when an expire request is | ||
159 | issued for a mount (file handle) we should continually call back to | ||
160 | the daemon until we can't umount any more mounts, then return the | ||
161 | appropriate status to the daemon. At the moment we just expire one | ||
162 | mount at a time. A Generic Netlink implementation would exclude this | ||
163 | possibility for future development due to the requirements of the | ||
164 | message bus architecture. | ||
165 | |||
166 | |||
167 | autofs4 Miscellaneous Device mount control interface | ||
168 | ==================================================== | ||
169 | |||
170 | The control interface is opening a device node, typically /dev/autofs. | ||
171 | |||
172 | All the ioctls use a common structure to pass the needed parameter | ||
173 | information and return operation results: | ||
174 | |||
175 | struct autofs_dev_ioctl { | ||
176 | __u32 ver_major; | ||
177 | __u32 ver_minor; | ||
178 | __u32 size; /* total size of data passed in | ||
179 | * including this struct */ | ||
180 | __s32 ioctlfd; /* automount command fd */ | ||
181 | |||
182 | __u32 arg1; /* Command parameters */ | ||
183 | __u32 arg2; | ||
184 | |||
185 | char path[0]; | ||
186 | }; | ||
187 | |||
188 | The ioctlfd field is a mount point file descriptor of an autofs mount | ||
189 | point. It is returned by the open call and is used by all calls except | ||
190 | the check for whether a given path is a mount point, where it may | ||
191 | optionally be used to check a specific mount corresponding to a given | ||
192 | mount point file descriptor, and when requesting the uid and gid of the | ||
193 | last successful mount on a directory within the autofs file system. | ||
194 | |||
195 | The fields arg1 and arg2 are used to communicate parameters and results of | ||
196 | calls made as described below. | ||
197 | |||
198 | The path field is used to pass a path where it is needed and the size field | ||
199 | is used account for the increased structure length when translating the | ||
200 | structure sent from user space. | ||
201 | |||
202 | This structure can be initialized before setting specific fields by using | ||
203 | the void function call init_autofs_dev_ioctl(struct autofs_dev_ioctl *). | ||
204 | |||
205 | All of the ioctls perform a copy of this structure from user space to | ||
206 | kernel space and return -EINVAL if the size parameter is smaller than | ||
207 | the structure size itself, -ENOMEM if the kernel memory allocation fails | ||
208 | or -EFAULT if the copy itself fails. Other checks include a version check | ||
209 | of the compiled in user space version against the module version and a | ||
210 | mismatch results in a -EINVAL return. If the size field is greater than | ||
211 | the structure size then a path is assumed to be present and is checked to | ||
212 | ensure it begins with a "/" and is NULL terminated, otherwise -EINVAL is | ||
213 | returned. Following these checks, for all ioctl commands except | ||
214 | AUTOFS_DEV_IOCTL_VERSION_CMD, AUTOFS_DEV_IOCTL_OPENMOUNT_CMD and | ||
215 | AUTOFS_DEV_IOCTL_CLOSEMOUNT_CMD the ioctlfd is validated and if it is | ||
216 | not a valid descriptor or doesn't correspond to an autofs mount point | ||
217 | an error of -EBADF, -ENOTTY or -EINVAL (not an autofs descriptor) is | ||
218 | returned. | ||
219 | |||
220 | |||
221 | The ioctls | ||
222 | ========== | ||
223 | |||
224 | An example of an implementation which uses this interface can be seen | ||
225 | in autofs version 5.0.4 and later in file lib/dev-ioctl-lib.c of the | ||
226 | distribution tar available for download from kernel.org in directory | ||
227 | /pub/linux/daemons/autofs/v5. | ||
228 | |||
229 | The device node ioctl operations implemented by this interface are: | ||
230 | |||
231 | |||
232 | AUTOFS_DEV_IOCTL_VERSION | ||
233 | ------------------------ | ||
234 | |||
235 | Get the major and minor version of the autofs4 device ioctl kernel module | ||
236 | implementation. It requires an initialized struct autofs_dev_ioctl as an | ||
237 | input parameter and sets the version information in the passed in structure. | ||
238 | It returns 0 on success or the error -EINVAL if a version mismatch is | ||
239 | detected. | ||
240 | |||
241 | |||
242 | AUTOFS_DEV_IOCTL_PROTOVER_CMD and AUTOFS_DEV_IOCTL_PROTOSUBVER_CMD | ||
243 | ------------------------------------------------------------------ | ||
244 | |||
245 | Get the major and minor version of the autofs4 protocol version understood | ||
246 | by loaded module. This call requires an initialized struct autofs_dev_ioctl | ||
247 | with the ioctlfd field set to a valid autofs mount point descriptor | ||
248 | and sets the requested version number in structure field arg1. These | ||
249 | commands return 0 on success or one of the negative error codes if | ||
250 | validation fails. | ||
251 | |||
252 | |||
253 | AUTOFS_DEV_IOCTL_OPENMOUNT and AUTOFS_DEV_IOCTL_CLOSEMOUNT | ||
254 | ---------------------------------------------------------- | ||
255 | |||
256 | Obtain and release a file descriptor for an autofs managed mount point | ||
257 | path. The open call requires an initialized struct autofs_dev_ioctl with | ||
258 | the the path field set and the size field adjusted appropriately as well | ||
259 | as the arg1 field set to the device number of the autofs mount. The | ||
260 | device number can be obtained from the mount options shown in | ||
261 | /proc/mounts. The close call requires an initialized struct | ||
262 | autofs_dev_ioct with the ioctlfd field set to the descriptor obtained | ||
263 | from the open call. The release of the file descriptor can also be done | ||
264 | with close(2) so any open descriptors will also be closed at process exit. | ||
265 | The close call is included in the implemented operations largely for | ||
266 | completeness and to provide for a consistent user space implementation. | ||
267 | |||
268 | |||
269 | AUTOFS_DEV_IOCTL_READY_CMD and AUTOFS_DEV_IOCTL_FAIL_CMD | ||
270 | -------------------------------------------------------- | ||
271 | |||
272 | Return mount and expire result status from user space to the kernel. | ||
273 | Both of these calls require an initialized struct autofs_dev_ioctl | ||
274 | with the ioctlfd field set to the descriptor obtained from the open | ||
275 | call and the arg1 field set to the wait queue token number, received | ||
276 | by user space in the foregoing mount or expire request. The arg2 field | ||
277 | is set to the status to be returned. For the ready call this is always | ||
278 | 0 and for the fail call it is set to the errno of the operation. | ||
279 | |||
280 | |||
281 | AUTOFS_DEV_IOCTL_SETPIPEFD_CMD | ||
282 | ------------------------------ | ||
283 | |||
284 | Set the pipe file descriptor used for kernel communication to the daemon. | ||
285 | Normally this is set at mount time using an option but when reconnecting | ||
286 | to a existing mount we need to use this to tell the autofs mount about | ||
287 | the new kernel pipe descriptor. In order to protect mounts against | ||
288 | incorrectly setting the pipe descriptor we also require that the autofs | ||
289 | mount be catatonic (see next call). | ||
290 | |||
291 | The call requires an initialized struct autofs_dev_ioctl with the | ||
292 | ioctlfd field set to the descriptor obtained from the open call and | ||
293 | the arg1 field set to descriptor of the pipe. On success the call | ||
294 | also sets the process group id used to identify the controlling process | ||
295 | (eg. the owning automount(8) daemon) to the process group of the caller. | ||
296 | |||
297 | |||
298 | AUTOFS_DEV_IOCTL_CATATONIC_CMD | ||
299 | ------------------------------ | ||
300 | |||
301 | Make the autofs mount point catatonic. The autofs mount will no longer | ||
302 | issue mount requests, the kernel communication pipe descriptor is released | ||
303 | and any remaining waits in the queue released. | ||
304 | |||
305 | The call requires an initialized struct autofs_dev_ioctl with the | ||
306 | ioctlfd field set to the descriptor obtained from the open call. | ||
307 | |||
308 | |||
309 | AUTOFS_DEV_IOCTL_TIMEOUT_CMD | ||
310 | ---------------------------- | ||
311 | |||
312 | Set the expire timeout for mounts withing an autofs mount point. | ||
313 | |||
314 | The call requires an initialized struct autofs_dev_ioctl with the | ||
315 | ioctlfd field set to the descriptor obtained from the open call. | ||
316 | |||
317 | |||
318 | AUTOFS_DEV_IOCTL_REQUESTER_CMD | ||
319 | ------------------------------ | ||
320 | |||
321 | Return the uid and gid of the last process to successfully trigger a the | ||
322 | mount on the given path dentry. | ||
323 | |||
324 | The call requires an initialized struct autofs_dev_ioctl with the path | ||
325 | field set to the mount point in question and the size field adjusted | ||
326 | appropriately as well as the arg1 field set to the device number of the | ||
327 | containing autofs mount. Upon return the struct field arg1 contains the | ||
328 | uid and arg2 the gid. | ||
329 | |||
330 | When reconstructing an autofs mount tree with active mounts we need to | ||
331 | re-connect to mounts that may have used the original process uid and | ||
332 | gid (or string variations of them) for mount lookups within the map entry. | ||
333 | This call provides the ability to obtain this uid and gid so they may be | ||
334 | used by user space for the mount map lookups. | ||
335 | |||
336 | |||
337 | AUTOFS_DEV_IOCTL_EXPIRE_CMD | ||
338 | --------------------------- | ||
339 | |||
340 | Issue an expire request to the kernel for an autofs mount. Typically | ||
341 | this ioctl is called until no further expire candidates are found. | ||
342 | |||
343 | The call requires an initialized struct autofs_dev_ioctl with the | ||
344 | ioctlfd field set to the descriptor obtained from the open call. In | ||
345 | addition an immediate expire, independent of the mount timeout, can be | ||
346 | requested by setting the arg1 field to 1. If no expire candidates can | ||
347 | be found the ioctl returns -1 with errno set to EAGAIN. | ||
348 | |||
349 | This call causes the kernel module to check the mount corresponding | ||
350 | to the given ioctlfd for mounts that can be expired, issues an expire | ||
351 | request back to the daemon and waits for completion. | ||
352 | |||
353 | AUTOFS_DEV_IOCTL_ASKUMOUNT_CMD | ||
354 | ------------------------------ | ||
355 | |||
356 | Checks if an autofs mount point is in use. | ||
357 | |||
358 | The call requires an initialized struct autofs_dev_ioctl with the | ||
359 | ioctlfd field set to the descriptor obtained from the open call and | ||
360 | it returns the result in the arg1 field, 1 for busy and 0 otherwise. | ||
361 | |||
362 | |||
363 | AUTOFS_DEV_IOCTL_ISMOUNTPOINT_CMD | ||
364 | --------------------------------- | ||
365 | |||
366 | Check if the given path is a mountpoint. | ||
367 | |||
368 | The call requires an initialized struct autofs_dev_ioctl. There are two | ||
369 | possible variations. Both use the path field set to the path of the mount | ||
370 | point to check and the size field adjusted appropriately. One uses the | ||
371 | ioctlfd field to identify a specific mount point to check while the other | ||
372 | variation uses the path and optionaly arg1 set to an autofs mount type. | ||
373 | The call returns 1 if this is a mount point and sets arg1 to the device | ||
374 | number of the mount and field arg2 to the relevant super block magic | ||
375 | number (described below) or 0 if it isn't a mountpoint. In both cases | ||
376 | the the device number (as returned by new_encode_dev()) is returned | ||
377 | in field arg1. | ||
378 | |||
379 | If supplied with a file descriptor we're looking for a specific mount, | ||
380 | not necessarily at the top of the mounted stack. In this case the path | ||
381 | the descriptor corresponds to is considered a mountpoint if it is itself | ||
382 | a mountpoint or contains a mount, such as a multi-mount without a root | ||
383 | mount. In this case we return 1 if the descriptor corresponds to a mount | ||
384 | point and and also returns the super magic of the covering mount if there | ||
385 | is one or 0 if it isn't a mountpoint. | ||
386 | |||
387 | If a path is supplied (and the ioctlfd field is set to -1) then the path | ||
388 | is looked up and is checked to see if it is the root of a mount. If a | ||
389 | type is also given we are looking for a particular autofs mount and if | ||
390 | a match isn't found a fail is returned. If the the located path is the | ||
391 | root of a mount 1 is returned along with the super magic of the mount | ||
392 | or 0 otherwise. | ||
393 | |||
diff --git a/Documentation/filesystems/bfs.txt b/Documentation/filesystems/bfs.txt index ea825e178e79..78043d5a8fc3 100644 --- a/Documentation/filesystems/bfs.txt +++ b/Documentation/filesystems/bfs.txt | |||
@@ -26,11 +26,11 @@ You can simplify mounting by just typing: | |||
26 | 26 | ||
27 | this will allocate the first available loopback device (and load loop.o | 27 | this will allocate the first available loopback device (and load loop.o |
28 | kernel module if necessary) automatically. If the loopback driver is not | 28 | kernel module if necessary) automatically. If the loopback driver is not |
29 | loaded automatically, make sure that your kernel is compiled with kmod | 29 | loaded automatically, make sure that you have compiled the module and |
30 | support (CONFIG_KMOD) enabled. Beware that umount will not | 30 | that modprobe is functioning. Beware that umount will not deallocate |
31 | deallocate /dev/loopN device if /etc/mtab file on your system is a | 31 | /dev/loopN device if /etc/mtab file on your system is a symbolic link to |
32 | symbolic link to /proc/mounts. You will need to do it manually using | 32 | /proc/mounts. You will need to do it manually using "-d" switch of |
33 | "-d" switch of losetup(8). Read losetup(8) manpage for more info. | 33 | losetup(8). Read losetup(8) manpage for more info. |
34 | 34 | ||
35 | To create the BFS image under UnixWare you need to find out first which | 35 | To create the BFS image under UnixWare you need to find out first which |
36 | slice contains it. The command prtvtoc(1M) is your friend: | 36 | slice contains it. The command prtvtoc(1M) is your friend: |
diff --git a/Documentation/filesystems/configfs/Makefile b/Documentation/filesystems/configfs/Makefile new file mode 100644 index 000000000000..be7ec5e67dbc --- /dev/null +++ b/Documentation/filesystems/configfs/Makefile | |||
@@ -0,0 +1,3 @@ | |||
1 | ifneq ($(CONFIG_CONFIGFS_FS),) | ||
2 | obj-m += configfs_example_explicit.o configfs_example_macros.o | ||
3 | endif | ||
diff --git a/Documentation/filesystems/configfs/configfs.txt b/Documentation/filesystems/configfs/configfs.txt index 15838d706ea2..fabcb0e00f25 100644 --- a/Documentation/filesystems/configfs/configfs.txt +++ b/Documentation/filesystems/configfs/configfs.txt | |||
@@ -233,12 +233,10 @@ accomplished via the group operations specified on the group's | |||
233 | config_item_type. | 233 | config_item_type. |
234 | 234 | ||
235 | struct configfs_group_operations { | 235 | struct configfs_group_operations { |
236 | int (*make_item)(struct config_group *group, | 236 | struct config_item *(*make_item)(struct config_group *group, |
237 | const char *name, | 237 | const char *name); |
238 | struct config_item **new_item); | 238 | struct config_group *(*make_group)(struct config_group *group, |
239 | int (*make_group)(struct config_group *group, | 239 | const char *name); |
240 | const char *name, | ||
241 | struct config_group **new_group); | ||
242 | int (*commit_item)(struct config_item *item); | 240 | int (*commit_item)(struct config_item *item); |
243 | void (*disconnect_notify)(struct config_group *group, | 241 | void (*disconnect_notify)(struct config_group *group, |
244 | struct config_item *item); | 242 | struct config_item *item); |
@@ -313,9 +311,20 @@ the subsystem must be ready for it. | |||
313 | [An Example] | 311 | [An Example] |
314 | 312 | ||
315 | The best example of these basic concepts is the simple_children | 313 | The best example of these basic concepts is the simple_children |
316 | subsystem/group and the simple_child item in configfs_example.c It | 314 | subsystem/group and the simple_child item in configfs_example_explicit.c |
317 | shows a trivial object displaying and storing an attribute, and a simple | 315 | and configfs_example_macros.c. It shows a trivial object displaying and |
318 | group creating and destroying these children. | 316 | storing an attribute, and a simple group creating and destroying these |
317 | children. | ||
318 | |||
319 | The only difference between configfs_example_explicit.c and | ||
320 | configfs_example_macros.c is how the attributes of the childless item | ||
321 | are defined. The childless item has extended attributes, each with | ||
322 | their own show()/store() operation. This follows a convention commonly | ||
323 | used in sysfs. configfs_example_explicit.c creates these attributes | ||
324 | by explicitly defining the structures involved. Conversely | ||
325 | configfs_example_macros.c uses some convenience macros from configfs.h | ||
326 | to define the attributes. These macros are similar to their sysfs | ||
327 | counterparts. | ||
319 | 328 | ||
320 | [Hierarchy Navigation and the Subsystem Mutex] | 329 | [Hierarchy Navigation and the Subsystem Mutex] |
321 | 330 | ||
diff --git a/Documentation/filesystems/configfs/configfs_example.c b/Documentation/filesystems/configfs/configfs_example_explicit.c index 0b422acd470c..d428cc9f07f3 100644 --- a/Documentation/filesystems/configfs/configfs_example.c +++ b/Documentation/filesystems/configfs/configfs_example_explicit.c | |||
@@ -1,8 +1,10 @@ | |||
1 | /* | 1 | /* |
2 | * vim: noexpandtab ts=8 sts=0 sw=8: | 2 | * vim: noexpandtab ts=8 sts=0 sw=8: |
3 | * | 3 | * |
4 | * configfs_example.c - This file is a demonstration module containing | 4 | * configfs_example_explicit.c - This file is a demonstration module |
5 | * a number of configfs subsystems. | 5 | * containing a number of configfs subsystems. It explicitly defines |
6 | * each structure without using the helper macros defined in | ||
7 | * configfs.h. | ||
6 | * | 8 | * |
7 | * This program is free software; you can redistribute it and/or | 9 | * This program is free software; you can redistribute it and/or |
8 | * modify it under the terms of the GNU General Public | 10 | * modify it under the terms of the GNU General Public |
@@ -273,22 +275,20 @@ static inline struct simple_children *to_simple_children(struct config_item *ite | |||
273 | return item ? container_of(to_config_group(item), struct simple_children, group) : NULL; | 275 | return item ? container_of(to_config_group(item), struct simple_children, group) : NULL; |
274 | } | 276 | } |
275 | 277 | ||
276 | static int simple_children_make_item(struct config_group *group, const char *name, struct config_item **new_item) | 278 | static struct config_item *simple_children_make_item(struct config_group *group, const char *name) |
277 | { | 279 | { |
278 | struct simple_child *simple_child; | 280 | struct simple_child *simple_child; |
279 | 281 | ||
280 | simple_child = kzalloc(sizeof(struct simple_child), GFP_KERNEL); | 282 | simple_child = kzalloc(sizeof(struct simple_child), GFP_KERNEL); |
281 | if (!simple_child) | 283 | if (!simple_child) |
282 | return -ENOMEM; | 284 | return ERR_PTR(-ENOMEM); |
283 | |||
284 | 285 | ||
285 | config_item_init_type_name(&simple_child->item, name, | 286 | config_item_init_type_name(&simple_child->item, name, |
286 | &simple_child_type); | 287 | &simple_child_type); |
287 | 288 | ||
288 | simple_child->storeme = 0; | 289 | simple_child->storeme = 0; |
289 | 290 | ||
290 | *new_item = &simple_child->item; | 291 | return &simple_child->item; |
291 | return 0; | ||
292 | } | 292 | } |
293 | 293 | ||
294 | static struct configfs_attribute simple_children_attr_description = { | 294 | static struct configfs_attribute simple_children_attr_description = { |
@@ -303,8 +303,8 @@ static struct configfs_attribute *simple_children_attrs[] = { | |||
303 | }; | 303 | }; |
304 | 304 | ||
305 | static ssize_t simple_children_attr_show(struct config_item *item, | 305 | static ssize_t simple_children_attr_show(struct config_item *item, |
306 | struct configfs_attribute *attr, | 306 | struct configfs_attribute *attr, |
307 | char *page) | 307 | char *page) |
308 | { | 308 | { |
309 | return sprintf(page, | 309 | return sprintf(page, |
310 | "[02-simple-children]\n" | 310 | "[02-simple-children]\n" |
@@ -319,7 +319,7 @@ static void simple_children_release(struct config_item *item) | |||
319 | } | 319 | } |
320 | 320 | ||
321 | static struct configfs_item_operations simple_children_item_ops = { | 321 | static struct configfs_item_operations simple_children_item_ops = { |
322 | .release = simple_children_release, | 322 | .release = simple_children_release, |
323 | .show_attribute = simple_children_attr_show, | 323 | .show_attribute = simple_children_attr_show, |
324 | }; | 324 | }; |
325 | 325 | ||
@@ -360,21 +360,19 @@ static struct configfs_subsystem simple_children_subsys = { | |||
360 | * children of its own. | 360 | * children of its own. |
361 | */ | 361 | */ |
362 | 362 | ||
363 | static int group_children_make_group(struct config_group *group, const char *name, struct config_group **new_group) | 363 | static struct config_group *group_children_make_group(struct config_group *group, const char *name) |
364 | { | 364 | { |
365 | struct simple_children *simple_children; | 365 | struct simple_children *simple_children; |
366 | 366 | ||
367 | simple_children = kzalloc(sizeof(struct simple_children), | 367 | simple_children = kzalloc(sizeof(struct simple_children), |
368 | GFP_KERNEL); | 368 | GFP_KERNEL); |
369 | if (!simple_children) | 369 | if (!simple_children) |
370 | return -ENOMEM; | 370 | return ERR_PTR(-ENOMEM); |
371 | |||
372 | 371 | ||
373 | config_group_init_type_name(&simple_children->group, name, | 372 | config_group_init_type_name(&simple_children->group, name, |
374 | &simple_children_type); | 373 | &simple_children_type); |
375 | 374 | ||
376 | *new_group = &simple_children->group; | 375 | return &simple_children->group; |
377 | return 0; | ||
378 | } | 376 | } |
379 | 377 | ||
380 | static struct configfs_attribute group_children_attr_description = { | 378 | static struct configfs_attribute group_children_attr_description = { |
@@ -389,8 +387,8 @@ static struct configfs_attribute *group_children_attrs[] = { | |||
389 | }; | 387 | }; |
390 | 388 | ||
391 | static ssize_t group_children_attr_show(struct config_item *item, | 389 | static ssize_t group_children_attr_show(struct config_item *item, |
392 | struct configfs_attribute *attr, | 390 | struct configfs_attribute *attr, |
393 | char *page) | 391 | char *page) |
394 | { | 392 | { |
395 | return sprintf(page, | 393 | return sprintf(page, |
396 | "[03-group-children]\n" | 394 | "[03-group-children]\n" |
diff --git a/Documentation/filesystems/configfs/configfs_example_macros.c b/Documentation/filesystems/configfs/configfs_example_macros.c new file mode 100644 index 000000000000..d8e30a0378aa --- /dev/null +++ b/Documentation/filesystems/configfs/configfs_example_macros.c | |||
@@ -0,0 +1,448 @@ | |||
1 | /* | ||
2 | * vim: noexpandtab ts=8 sts=0 sw=8: | ||
3 | * | ||
4 | * configfs_example_macros.c - This file is a demonstration module | ||
5 | * containing a number of configfs subsystems. It uses the helper | ||
6 | * macros defined by configfs.h | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or | ||
9 | * modify it under the terms of the GNU General Public | ||
10 | * License as published by the Free Software Foundation; either | ||
11 | * version 2 of the License, or (at your option) any later version. | ||
12 | * | ||
13 | * This program is distributed in the hope that it will be useful, | ||
14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
16 | * General Public License for more details. | ||
17 | * | ||
18 | * You should have received a copy of the GNU General Public | ||
19 | * License along with this program; if not, write to the | ||
20 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
21 | * Boston, MA 021110-1307, USA. | ||
22 | * | ||
23 | * Based on sysfs: | ||
24 | * sysfs is Copyright (C) 2001, 2002, 2003 Patrick Mochel | ||
25 | * | ||
26 | * configfs Copyright (C) 2005 Oracle. All rights reserved. | ||
27 | */ | ||
28 | |||
29 | #include <linux/init.h> | ||
30 | #include <linux/module.h> | ||
31 | #include <linux/slab.h> | ||
32 | |||
33 | #include <linux/configfs.h> | ||
34 | |||
35 | |||
36 | |||
37 | /* | ||
38 | * 01-childless | ||
39 | * | ||
40 | * This first example is a childless subsystem. It cannot create | ||
41 | * any config_items. It just has attributes. | ||
42 | * | ||
43 | * Note that we are enclosing the configfs_subsystem inside a container. | ||
44 | * This is not necessary if a subsystem has no attributes directly | ||
45 | * on the subsystem. See the next example, 02-simple-children, for | ||
46 | * such a subsystem. | ||
47 | */ | ||
48 | |||
49 | struct childless { | ||
50 | struct configfs_subsystem subsys; | ||
51 | int showme; | ||
52 | int storeme; | ||
53 | }; | ||
54 | |||
55 | static inline struct childless *to_childless(struct config_item *item) | ||
56 | { | ||
57 | return item ? container_of(to_configfs_subsystem(to_config_group(item)), struct childless, subsys) : NULL; | ||
58 | } | ||
59 | |||
60 | CONFIGFS_ATTR_STRUCT(childless); | ||
61 | #define CHILDLESS_ATTR(_name, _mode, _show, _store) \ | ||
62 | struct childless_attribute childless_attr_##_name = __CONFIGFS_ATTR(_name, _mode, _show, _store) | ||
63 | #define CHILDLESS_ATTR_RO(_name, _show) \ | ||
64 | struct childless_attribute childless_attr_##_name = __CONFIGFS_ATTR_RO(_name, _show); | ||
65 | |||
66 | static ssize_t childless_showme_read(struct childless *childless, | ||
67 | char *page) | ||
68 | { | ||
69 | ssize_t pos; | ||
70 | |||
71 | pos = sprintf(page, "%d\n", childless->showme); | ||
72 | childless->showme++; | ||
73 | |||
74 | return pos; | ||
75 | } | ||
76 | |||
77 | static ssize_t childless_storeme_read(struct childless *childless, | ||
78 | char *page) | ||
79 | { | ||
80 | return sprintf(page, "%d\n", childless->storeme); | ||
81 | } | ||
82 | |||
83 | static ssize_t childless_storeme_write(struct childless *childless, | ||
84 | const char *page, | ||
85 | size_t count) | ||
86 | { | ||
87 | unsigned long tmp; | ||
88 | char *p = (char *) page; | ||
89 | |||
90 | tmp = simple_strtoul(p, &p, 10); | ||
91 | if (!p || (*p && (*p != '\n'))) | ||
92 | return -EINVAL; | ||
93 | |||
94 | if (tmp > INT_MAX) | ||
95 | return -ERANGE; | ||
96 | |||
97 | childless->storeme = tmp; | ||
98 | |||
99 | return count; | ||
100 | } | ||
101 | |||
102 | static ssize_t childless_description_read(struct childless *childless, | ||
103 | char *page) | ||
104 | { | ||
105 | return sprintf(page, | ||
106 | "[01-childless]\n" | ||
107 | "\n" | ||
108 | "The childless subsystem is the simplest possible subsystem in\n" | ||
109 | "configfs. It does not support the creation of child config_items.\n" | ||
110 | "It only has a few attributes. In fact, it isn't much different\n" | ||
111 | "than a directory in /proc.\n"); | ||
112 | } | ||
113 | |||
114 | CHILDLESS_ATTR_RO(showme, childless_showme_read); | ||
115 | CHILDLESS_ATTR(storeme, S_IRUGO | S_IWUSR, childless_storeme_read, | ||
116 | childless_storeme_write); | ||
117 | CHILDLESS_ATTR_RO(description, childless_description_read); | ||
118 | |||
119 | static struct configfs_attribute *childless_attrs[] = { | ||
120 | &childless_attr_showme.attr, | ||
121 | &childless_attr_storeme.attr, | ||
122 | &childless_attr_description.attr, | ||
123 | NULL, | ||
124 | }; | ||
125 | |||
126 | CONFIGFS_ATTR_OPS(childless); | ||
127 | static struct configfs_item_operations childless_item_ops = { | ||
128 | .show_attribute = childless_attr_show, | ||
129 | .store_attribute = childless_attr_store, | ||
130 | }; | ||
131 | |||
132 | static struct config_item_type childless_type = { | ||
133 | .ct_item_ops = &childless_item_ops, | ||
134 | .ct_attrs = childless_attrs, | ||
135 | .ct_owner = THIS_MODULE, | ||
136 | }; | ||
137 | |||
138 | static struct childless childless_subsys = { | ||
139 | .subsys = { | ||
140 | .su_group = { | ||
141 | .cg_item = { | ||
142 | .ci_namebuf = "01-childless", | ||
143 | .ci_type = &childless_type, | ||
144 | }, | ||
145 | }, | ||
146 | }, | ||
147 | }; | ||
148 | |||
149 | |||
150 | /* ----------------------------------------------------------------- */ | ||
151 | |||
152 | /* | ||
153 | * 02-simple-children | ||
154 | * | ||
155 | * This example merely has a simple one-attribute child. Note that | ||
156 | * there is no extra attribute structure, as the child's attribute is | ||
157 | * known from the get-go. Also, there is no container for the | ||
158 | * subsystem, as it has no attributes of its own. | ||
159 | */ | ||
160 | |||
161 | struct simple_child { | ||
162 | struct config_item item; | ||
163 | int storeme; | ||
164 | }; | ||
165 | |||
166 | static inline struct simple_child *to_simple_child(struct config_item *item) | ||
167 | { | ||
168 | return item ? container_of(item, struct simple_child, item) : NULL; | ||
169 | } | ||
170 | |||
171 | static struct configfs_attribute simple_child_attr_storeme = { | ||
172 | .ca_owner = THIS_MODULE, | ||
173 | .ca_name = "storeme", | ||
174 | .ca_mode = S_IRUGO | S_IWUSR, | ||
175 | }; | ||
176 | |||
177 | static struct configfs_attribute *simple_child_attrs[] = { | ||
178 | &simple_child_attr_storeme, | ||
179 | NULL, | ||
180 | }; | ||
181 | |||
182 | static ssize_t simple_child_attr_show(struct config_item *item, | ||
183 | struct configfs_attribute *attr, | ||
184 | char *page) | ||
185 | { | ||
186 | ssize_t count; | ||
187 | struct simple_child *simple_child = to_simple_child(item); | ||
188 | |||
189 | count = sprintf(page, "%d\n", simple_child->storeme); | ||
190 | |||
191 | return count; | ||
192 | } | ||
193 | |||
194 | static ssize_t simple_child_attr_store(struct config_item *item, | ||
195 | struct configfs_attribute *attr, | ||
196 | const char *page, size_t count) | ||
197 | { | ||
198 | struct simple_child *simple_child = to_simple_child(item); | ||
199 | unsigned long tmp; | ||
200 | char *p = (char *) page; | ||
201 | |||
202 | tmp = simple_strtoul(p, &p, 10); | ||
203 | if (!p || (*p && (*p != '\n'))) | ||
204 | return -EINVAL; | ||
205 | |||
206 | if (tmp > INT_MAX) | ||
207 | return -ERANGE; | ||
208 | |||
209 | simple_child->storeme = tmp; | ||
210 | |||
211 | return count; | ||
212 | } | ||
213 | |||
214 | static void simple_child_release(struct config_item *item) | ||
215 | { | ||
216 | kfree(to_simple_child(item)); | ||
217 | } | ||
218 | |||
219 | static struct configfs_item_operations simple_child_item_ops = { | ||
220 | .release = simple_child_release, | ||
221 | .show_attribute = simple_child_attr_show, | ||
222 | .store_attribute = simple_child_attr_store, | ||
223 | }; | ||
224 | |||
225 | static struct config_item_type simple_child_type = { | ||
226 | .ct_item_ops = &simple_child_item_ops, | ||
227 | .ct_attrs = simple_child_attrs, | ||
228 | .ct_owner = THIS_MODULE, | ||
229 | }; | ||
230 | |||
231 | |||
232 | struct simple_children { | ||
233 | struct config_group group; | ||
234 | }; | ||
235 | |||
236 | static inline struct simple_children *to_simple_children(struct config_item *item) | ||
237 | { | ||
238 | return item ? container_of(to_config_group(item), struct simple_children, group) : NULL; | ||
239 | } | ||
240 | |||
241 | static struct config_item *simple_children_make_item(struct config_group *group, const char *name) | ||
242 | { | ||
243 | struct simple_child *simple_child; | ||
244 | |||
245 | simple_child = kzalloc(sizeof(struct simple_child), GFP_KERNEL); | ||
246 | if (!simple_child) | ||
247 | return ERR_PTR(-ENOMEM); | ||
248 | |||
249 | config_item_init_type_name(&simple_child->item, name, | ||
250 | &simple_child_type); | ||
251 | |||
252 | simple_child->storeme = 0; | ||
253 | |||
254 | return &simple_child->item; | ||
255 | } | ||
256 | |||
257 | static struct configfs_attribute simple_children_attr_description = { | ||
258 | .ca_owner = THIS_MODULE, | ||
259 | .ca_name = "description", | ||
260 | .ca_mode = S_IRUGO, | ||
261 | }; | ||
262 | |||
263 | static struct configfs_attribute *simple_children_attrs[] = { | ||
264 | &simple_children_attr_description, | ||
265 | NULL, | ||
266 | }; | ||
267 | |||
268 | static ssize_t simple_children_attr_show(struct config_item *item, | ||
269 | struct configfs_attribute *attr, | ||
270 | char *page) | ||
271 | { | ||
272 | return sprintf(page, | ||
273 | "[02-simple-children]\n" | ||
274 | "\n" | ||
275 | "This subsystem allows the creation of child config_items. These\n" | ||
276 | "items have only one attribute that is readable and writeable.\n"); | ||
277 | } | ||
278 | |||
279 | static void simple_children_release(struct config_item *item) | ||
280 | { | ||
281 | kfree(to_simple_children(item)); | ||
282 | } | ||
283 | |||
284 | static struct configfs_item_operations simple_children_item_ops = { | ||
285 | .release = simple_children_release, | ||
286 | .show_attribute = simple_children_attr_show, | ||
287 | }; | ||
288 | |||
289 | /* | ||
290 | * Note that, since no extra work is required on ->drop_item(), | ||
291 | * no ->drop_item() is provided. | ||
292 | */ | ||
293 | static struct configfs_group_operations simple_children_group_ops = { | ||
294 | .make_item = simple_children_make_item, | ||
295 | }; | ||
296 | |||
297 | static struct config_item_type simple_children_type = { | ||
298 | .ct_item_ops = &simple_children_item_ops, | ||
299 | .ct_group_ops = &simple_children_group_ops, | ||
300 | .ct_attrs = simple_children_attrs, | ||
301 | .ct_owner = THIS_MODULE, | ||
302 | }; | ||
303 | |||
304 | static struct configfs_subsystem simple_children_subsys = { | ||
305 | .su_group = { | ||
306 | .cg_item = { | ||
307 | .ci_namebuf = "02-simple-children", | ||
308 | .ci_type = &simple_children_type, | ||
309 | }, | ||
310 | }, | ||
311 | }; | ||
312 | |||
313 | |||
314 | /* ----------------------------------------------------------------- */ | ||
315 | |||
316 | /* | ||
317 | * 03-group-children | ||
318 | * | ||
319 | * This example reuses the simple_children group from above. However, | ||
320 | * the simple_children group is not the subsystem itself, it is a | ||
321 | * child of the subsystem. Creation of a group in the subsystem creates | ||
322 | * a new simple_children group. That group can then have simple_child | ||
323 | * children of its own. | ||
324 | */ | ||
325 | |||
326 | static struct config_group *group_children_make_group(struct config_group *group, const char *name) | ||
327 | { | ||
328 | struct simple_children *simple_children; | ||
329 | |||
330 | simple_children = kzalloc(sizeof(struct simple_children), | ||
331 | GFP_KERNEL); | ||
332 | if (!simple_children) | ||
333 | return ERR_PTR(-ENOMEM); | ||
334 | |||
335 | config_group_init_type_name(&simple_children->group, name, | ||
336 | &simple_children_type); | ||
337 | |||
338 | return &simple_children->group; | ||
339 | } | ||
340 | |||
341 | static struct configfs_attribute group_children_attr_description = { | ||
342 | .ca_owner = THIS_MODULE, | ||
343 | .ca_name = "description", | ||
344 | .ca_mode = S_IRUGO, | ||
345 | }; | ||
346 | |||
347 | static struct configfs_attribute *group_children_attrs[] = { | ||
348 | &group_children_attr_description, | ||
349 | NULL, | ||
350 | }; | ||
351 | |||
352 | static ssize_t group_children_attr_show(struct config_item *item, | ||
353 | struct configfs_attribute *attr, | ||
354 | char *page) | ||
355 | { | ||
356 | return sprintf(page, | ||
357 | "[03-group-children]\n" | ||
358 | "\n" | ||
359 | "This subsystem allows the creation of child config_groups. These\n" | ||
360 | "groups are like the subsystem simple-children.\n"); | ||
361 | } | ||
362 | |||
363 | static struct configfs_item_operations group_children_item_ops = { | ||
364 | .show_attribute = group_children_attr_show, | ||
365 | }; | ||
366 | |||
367 | /* | ||
368 | * Note that, since no extra work is required on ->drop_item(), | ||
369 | * no ->drop_item() is provided. | ||
370 | */ | ||
371 | static struct configfs_group_operations group_children_group_ops = { | ||
372 | .make_group = group_children_make_group, | ||
373 | }; | ||
374 | |||
375 | static struct config_item_type group_children_type = { | ||
376 | .ct_item_ops = &group_children_item_ops, | ||
377 | .ct_group_ops = &group_children_group_ops, | ||
378 | .ct_attrs = group_children_attrs, | ||
379 | .ct_owner = THIS_MODULE, | ||
380 | }; | ||
381 | |||
382 | static struct configfs_subsystem group_children_subsys = { | ||
383 | .su_group = { | ||
384 | .cg_item = { | ||
385 | .ci_namebuf = "03-group-children", | ||
386 | .ci_type = &group_children_type, | ||
387 | }, | ||
388 | }, | ||
389 | }; | ||
390 | |||
391 | /* ----------------------------------------------------------------- */ | ||
392 | |||
393 | /* | ||
394 | * We're now done with our subsystem definitions. | ||
395 | * For convenience in this module, here's a list of them all. It | ||
396 | * allows the init function to easily register them. Most modules | ||
397 | * will only have one subsystem, and will only call register_subsystem | ||
398 | * on it directly. | ||
399 | */ | ||
400 | static struct configfs_subsystem *example_subsys[] = { | ||
401 | &childless_subsys.subsys, | ||
402 | &simple_children_subsys, | ||
403 | &group_children_subsys, | ||
404 | NULL, | ||
405 | }; | ||
406 | |||
407 | static int __init configfs_example_init(void) | ||
408 | { | ||
409 | int ret; | ||
410 | int i; | ||
411 | struct configfs_subsystem *subsys; | ||
412 | |||
413 | for (i = 0; example_subsys[i]; i++) { | ||
414 | subsys = example_subsys[i]; | ||
415 | |||
416 | config_group_init(&subsys->su_group); | ||
417 | mutex_init(&subsys->su_mutex); | ||
418 | ret = configfs_register_subsystem(subsys); | ||
419 | if (ret) { | ||
420 | printk(KERN_ERR "Error %d while registering subsystem %s\n", | ||
421 | ret, | ||
422 | subsys->su_group.cg_item.ci_namebuf); | ||
423 | goto out_unregister; | ||
424 | } | ||
425 | } | ||
426 | |||
427 | return 0; | ||
428 | |||
429 | out_unregister: | ||
430 | for (; i >= 0; i--) { | ||
431 | configfs_unregister_subsystem(example_subsys[i]); | ||
432 | } | ||
433 | |||
434 | return ret; | ||
435 | } | ||
436 | |||
437 | static void __exit configfs_example_exit(void) | ||
438 | { | ||
439 | int i; | ||
440 | |||
441 | for (i = 0; example_subsys[i]; i++) { | ||
442 | configfs_unregister_subsystem(example_subsys[i]); | ||
443 | } | ||
444 | } | ||
445 | |||
446 | module_init(configfs_example_init); | ||
447 | module_exit(configfs_example_exit); | ||
448 | MODULE_LICENSE("GPL"); | ||
diff --git a/Documentation/filesystems/ext3.txt b/Documentation/filesystems/ext3.txt index b45f3c1b8b43..9dd2a3bb2acc 100644 --- a/Documentation/filesystems/ext3.txt +++ b/Documentation/filesystems/ext3.txt | |||
@@ -96,6 +96,11 @@ errors=remount-ro(*) Remount the filesystem read-only on an error. | |||
96 | errors=continue Keep going on a filesystem error. | 96 | errors=continue Keep going on a filesystem error. |
97 | errors=panic Panic and halt the machine if an error occurs. | 97 | errors=panic Panic and halt the machine if an error occurs. |
98 | 98 | ||
99 | data_err=ignore(*) Just print an error message if an error occurs | ||
100 | in a file data buffer in ordered mode. | ||
101 | data_err=abort Abort the journal if an error occurs in a file | ||
102 | data buffer in ordered mode. | ||
103 | |||
99 | grpid Give objects the same group ID as their creator. | 104 | grpid Give objects the same group ID as their creator. |
100 | bsdgroups | 105 | bsdgroups |
101 | 106 | ||
@@ -193,6 +198,5 @@ kernel source: <file:fs/ext3/> | |||
193 | programs: http://e2fsprogs.sourceforge.net/ | 198 | programs: http://e2fsprogs.sourceforge.net/ |
194 | http://ext2resize.sourceforge.net | 199 | http://ext2resize.sourceforge.net |
195 | 200 | ||
196 | useful links: http://www.zip.com.au/~akpm/linux/ext3/ext3-usage.html | 201 | useful links: http://www-106.ibm.com/developerworks/linux/library/l-fs7/ |
197 | http://www-106.ibm.com/developerworks/linux/library/l-fs7/ | ||
198 | http://www-106.ibm.com/developerworks/linux/library/l-fs8/ | 202 | http://www-106.ibm.com/developerworks/linux/library/l-fs8/ |
diff --git a/Documentation/filesystems/ext4.txt b/Documentation/filesystems/ext4.txt index 80e193d82e2e..174eaff7ded9 100644 --- a/Documentation/filesystems/ext4.txt +++ b/Documentation/filesystems/ext4.txt | |||
@@ -2,19 +2,24 @@ | |||
2 | Ext4 Filesystem | 2 | Ext4 Filesystem |
3 | =============== | 3 | =============== |
4 | 4 | ||
5 | This is a development version of the ext4 filesystem, an advanced level | 5 | Ext4 is an an advanced level of the ext3 filesystem which incorporates |
6 | of the ext3 filesystem which incorporates scalability and reliability | 6 | scalability and reliability enhancements for supporting large filesystems |
7 | enhancements for supporting large filesystems (64 bit) in keeping with | 7 | (64 bit) in keeping with increasing disk capacities and state-of-the-art |
8 | increasing disk capacities and state-of-the-art feature requirements. | 8 | feature requirements. |
9 | 9 | ||
10 | Mailing list: linux-ext4@vger.kernel.org | 10 | Mailing list: linux-ext4@vger.kernel.org |
11 | Web site: http://ext4.wiki.kernel.org | ||
11 | 12 | ||
12 | 13 | ||
13 | 1. Quick usage instructions: | 14 | 1. Quick usage instructions: |
14 | =========================== | 15 | =========================== |
15 | 16 | ||
17 | Note: More extensive information for getting started with ext4 can be | ||
18 | found at the ext4 wiki site at the URL: | ||
19 | http://ext4.wiki.kernel.org/index.php/Ext4_Howto | ||
20 | |||
16 | - Compile and install the latest version of e2fsprogs (as of this | 21 | - Compile and install the latest version of e2fsprogs (as of this |
17 | writing version 1.41) from: | 22 | writing version 1.41.3) from: |
18 | 23 | ||
19 | http://sourceforge.net/project/showfiles.php?group_id=2406 | 24 | http://sourceforge.net/project/showfiles.php?group_id=2406 |
20 | 25 | ||
@@ -26,28 +31,32 @@ Mailing list: linux-ext4@vger.kernel.org | |||
26 | 31 | ||
27 | git://git.kernel.org/pub/scm/fs/ext2/e2fsprogs.git | 32 | git://git.kernel.org/pub/scm/fs/ext2/e2fsprogs.git |
28 | 33 | ||
29 | - Create a new filesystem using the ext4dev filesystem type: | 34 | - Note that it is highly important to install the mke2fs.conf file |
35 | that comes with the e2fsprogs 1.41.x sources in /etc/mke2fs.conf. If | ||
36 | you have edited the /etc/mke2fs.conf file installed on your system, | ||
37 | you will need to merge your changes with the version from e2fsprogs | ||
38 | 1.41.x. | ||
39 | |||
40 | - Create a new filesystem using the ext4 filesystem type: | ||
30 | 41 | ||
31 | # mke2fs -t ext4dev /dev/hda1 | 42 | # mke2fs -t ext4 /dev/hda1 |
32 | 43 | ||
33 | Or configure an existing ext3 filesystem to support extents and set | 44 | Or to configure an existing ext3 filesystem to support extents: |
34 | the test_fs flag to indicate that it's ok for an in-development | ||
35 | filesystem to touch this filesystem: | ||
36 | 45 | ||
37 | # tune2fs -O extents -E test_fs /dev/hda1 | 46 | # tune2fs -O extents /dev/hda1 |
38 | 47 | ||
39 | If the filesystem was created with 128 byte inodes, it can be | 48 | If the filesystem was created with 128 byte inodes, it can be |
40 | converted to use 256 byte for greater efficiency via: | 49 | converted to use 256 byte for greater efficiency via: |
41 | 50 | ||
42 | # tune2fs -I 256 /dev/hda1 | 51 | # tune2fs -I 256 /dev/hda1 |
43 | 52 | ||
44 | (Note: we currently do not have tools to convert an ext4dev | 53 | (Note: we currently do not have tools to convert an ext4 |
45 | filesystem back to ext3; so please do not do try this on production | 54 | filesystem back to ext3; so please do not do try this on production |
46 | filesystems.) | 55 | filesystems.) |
47 | 56 | ||
48 | - Mounting: | 57 | - Mounting: |
49 | 58 | ||
50 | # mount -t ext4dev /dev/hda1 /wherever | 59 | # mount -t ext4 /dev/hda1 /wherever |
51 | 60 | ||
52 | - When comparing performance with other filesystems, remember that | 61 | - When comparing performance with other filesystems, remember that |
53 | ext3/4 by default offers higher data integrity guarantees than most. | 62 | ext3/4 by default offers higher data integrity guarantees than most. |
@@ -98,8 +107,8 @@ exist yet so I'm not sure they're in the near-term roadmap. | |||
98 | The big performance win will come with mballoc, delalloc and flex_bg | 107 | The big performance win will come with mballoc, delalloc and flex_bg |
99 | grouping of bitmaps and inode tables. Some test results available here: | 108 | grouping of bitmaps and inode tables. Some test results available here: |
100 | 109 | ||
101 | - http://www.bullopensource.org/ext4/20080530/ffsb-write-2.6.26-rc2.html | 110 | - http://www.bullopensource.org/ext4/20080818-ffsb/ffsb-write-2.6.27-rc1.html |
102 | - http://www.bullopensource.org/ext4/20080530/ffsb-readwrite-2.6.26-rc2.html | 111 | - http://www.bullopensource.org/ext4/20080818-ffsb/ffsb-readwrite-2.6.27-rc1.html |
103 | 112 | ||
104 | 3. Options | 113 | 3. Options |
105 | ========== | 114 | ========== |
@@ -171,6 +180,11 @@ barrier=<0|1(*)> This enables/disables the use of write barriers in | |||
171 | your disks are battery-backed in one way or another, | 180 | your disks are battery-backed in one way or another, |
172 | disabling barriers may safely improve performance. | 181 | disabling barriers may safely improve performance. |
173 | 182 | ||
183 | inode_readahead=n This tuning parameter controls the maximum | ||
184 | number of inode table blocks that ext4's inode | ||
185 | table readahead algorithm will pre-read into | ||
186 | the buffer cache. The default value is 32 blocks. | ||
187 | |||
174 | orlov (*) This enables the new Orlov block allocator. It is | 188 | orlov (*) This enables the new Orlov block allocator. It is |
175 | enabled by default. | 189 | enabled by default. |
176 | 190 | ||
@@ -203,15 +217,17 @@ noreservation | |||
203 | bsddf (*) Make 'df' act like BSD. | 217 | bsddf (*) Make 'df' act like BSD. |
204 | minixdf Make 'df' act like Minix. | 218 | minixdf Make 'df' act like Minix. |
205 | 219 | ||
206 | check=none Don't do extra checking of bitmaps on mount. | ||
207 | nocheck | ||
208 | |||
209 | debug Extra debugging information is sent to syslog. | 220 | debug Extra debugging information is sent to syslog. |
210 | 221 | ||
211 | errors=remount-ro(*) Remount the filesystem read-only on an error. | 222 | errors=remount-ro(*) Remount the filesystem read-only on an error. |
212 | errors=continue Keep going on a filesystem error. | 223 | errors=continue Keep going on a filesystem error. |
213 | errors=panic Panic and halt the machine if an error occurs. | 224 | errors=panic Panic and halt the machine if an error occurs. |
214 | 225 | ||
226 | data_err=ignore(*) Just print an error message if an error occurs | ||
227 | in a file data buffer in ordered mode. | ||
228 | data_err=abort Abort the journal if an error occurs in a file | ||
229 | data buffer in ordered mode. | ||
230 | |||
215 | grpid Give objects the same group ID as their creator. | 231 | grpid Give objects the same group ID as their creator. |
216 | bsdgroups | 232 | bsdgroups |
217 | 233 | ||
@@ -237,8 +253,6 @@ nobh (a) cache disk block mapping information | |||
237 | "nobh" option tries to avoid associating buffer | 253 | "nobh" option tries to avoid associating buffer |
238 | heads (supported only for "writeback" mode). | 254 | heads (supported only for "writeback" mode). |
239 | 255 | ||
240 | mballoc (*) Use the multiple block allocator for block allocation | ||
241 | nomballoc disabled multiple block allocator for block allocation. | ||
242 | stripe=n Number of filesystem blocks that mballoc will try | 256 | stripe=n Number of filesystem blocks that mballoc will try |
243 | to use for allocation size and alignment. For RAID5/6 | 257 | to use for allocation size and alignment. For RAID5/6 |
244 | systems this should be the number of data | 258 | systems this should be the number of data |
@@ -246,6 +260,7 @@ stripe=n Number of filesystem blocks that mballoc will try | |||
246 | delalloc (*) Deferring block allocation until write-out time. | 260 | delalloc (*) Deferring block allocation until write-out time. |
247 | nodelalloc Disable delayed allocation. Blocks are allocation | 261 | nodelalloc Disable delayed allocation. Blocks are allocation |
248 | when data is copied from user to page cache. | 262 | when data is copied from user to page cache. |
263 | |||
249 | Data Mode | 264 | Data Mode |
250 | ========= | 265 | ========= |
251 | There are 3 different data modes: | 266 | There are 3 different data modes: |
diff --git a/Documentation/filesystems/fiemap.txt b/Documentation/filesystems/fiemap.txt new file mode 100644 index 000000000000..1e3defcfe50b --- /dev/null +++ b/Documentation/filesystems/fiemap.txt | |||
@@ -0,0 +1,228 @@ | |||
1 | ============ | ||
2 | Fiemap Ioctl | ||
3 | ============ | ||
4 | |||
5 | The fiemap ioctl is an efficient method for userspace to get file | ||
6 | extent mappings. Instead of block-by-block mapping (such as bmap), fiemap | ||
7 | returns a list of extents. | ||
8 | |||
9 | |||
10 | Request Basics | ||
11 | -------------- | ||
12 | |||
13 | A fiemap request is encoded within struct fiemap: | ||
14 | |||
15 | struct fiemap { | ||
16 | __u64 fm_start; /* logical offset (inclusive) at | ||
17 | * which to start mapping (in) */ | ||
18 | __u64 fm_length; /* logical length of mapping which | ||
19 | * userspace cares about (in) */ | ||
20 | __u32 fm_flags; /* FIEMAP_FLAG_* flags for request (in/out) */ | ||
21 | __u32 fm_mapped_extents; /* number of extents that were | ||
22 | * mapped (out) */ | ||
23 | __u32 fm_extent_count; /* size of fm_extents array (in) */ | ||
24 | __u32 fm_reserved; | ||
25 | struct fiemap_extent fm_extents[0]; /* array of mapped extents (out) */ | ||
26 | }; | ||
27 | |||
28 | |||
29 | fm_start, and fm_length specify the logical range within the file | ||
30 | which the process would like mappings for. Extents returned mirror | ||
31 | those on disk - that is, the logical offset of the 1st returned extent | ||
32 | may start before fm_start, and the range covered by the last returned | ||
33 | extent may end after fm_length. All offsets and lengths are in bytes. | ||
34 | |||
35 | Certain flags to modify the way in which mappings are looked up can be | ||
36 | set in fm_flags. If the kernel doesn't understand some particular | ||
37 | flags, it will return EBADR and the contents of fm_flags will contain | ||
38 | the set of flags which caused the error. If the kernel is compatible | ||
39 | with all flags passed, the contents of fm_flags will be unmodified. | ||
40 | It is up to userspace to determine whether rejection of a particular | ||
41 | flag is fatal to it's operation. This scheme is intended to allow the | ||
42 | fiemap interface to grow in the future but without losing | ||
43 | compatibility with old software. | ||
44 | |||
45 | fm_extent_count specifies the number of elements in the fm_extents[] array | ||
46 | that can be used to return extents. If fm_extent_count is zero, then the | ||
47 | fm_extents[] array is ignored (no extents will be returned), and the | ||
48 | fm_mapped_extents count will hold the number of extents needed in | ||
49 | fm_extents[] to hold the file's current mapping. Note that there is | ||
50 | nothing to prevent the file from changing between calls to FIEMAP. | ||
51 | |||
52 | The following flags can be set in fm_flags: | ||
53 | |||
54 | * FIEMAP_FLAG_SYNC | ||
55 | If this flag is set, the kernel will sync the file before mapping extents. | ||
56 | |||
57 | * FIEMAP_FLAG_XATTR | ||
58 | If this flag is set, the extents returned will describe the inodes | ||
59 | extended attribute lookup tree, instead of it's data tree. | ||
60 | |||
61 | |||
62 | Extent Mapping | ||
63 | -------------- | ||
64 | |||
65 | Extent information is returned within the embedded fm_extents array | ||
66 | which userspace must allocate along with the fiemap structure. The | ||
67 | number of elements in the fiemap_extents[] array should be passed via | ||
68 | fm_extent_count. The number of extents mapped by kernel will be | ||
69 | returned via fm_mapped_extents. If the number of fiemap_extents | ||
70 | allocated is less than would be required to map the requested range, | ||
71 | the maximum number of extents that can be mapped in the fm_extent[] | ||
72 | array will be returned and fm_mapped_extents will be equal to | ||
73 | fm_extent_count. In that case, the last extent in the array will not | ||
74 | complete the requested range and will not have the FIEMAP_EXTENT_LAST | ||
75 | flag set (see the next section on extent flags). | ||
76 | |||
77 | Each extent is described by a single fiemap_extent structure as | ||
78 | returned in fm_extents. | ||
79 | |||
80 | struct fiemap_extent { | ||
81 | __u64 fe_logical; /* logical offset in bytes for the start of | ||
82 | * the extent */ | ||
83 | __u64 fe_physical; /* physical offset in bytes for the start | ||
84 | * of the extent */ | ||
85 | __u64 fe_length; /* length in bytes for the extent */ | ||
86 | __u64 fe_reserved64[2]; | ||
87 | __u32 fe_flags; /* FIEMAP_EXTENT_* flags for this extent */ | ||
88 | __u32 fe_reserved[3]; | ||
89 | }; | ||
90 | |||
91 | All offsets and lengths are in bytes and mirror those on disk. It is valid | ||
92 | for an extents logical offset to start before the request or it's logical | ||
93 | length to extend past the request. Unless FIEMAP_EXTENT_NOT_ALIGNED is | ||
94 | returned, fe_logical, fe_physical, and fe_length will be aligned to the | ||
95 | block size of the file system. With the exception of extents flagged as | ||
96 | FIEMAP_EXTENT_MERGED, adjacent extents will not be merged. | ||
97 | |||
98 | The fe_flags field contains flags which describe the extent returned. | ||
99 | A special flag, FIEMAP_EXTENT_LAST is always set on the last extent in | ||
100 | the file so that the process making fiemap calls can determine when no | ||
101 | more extents are available, without having to call the ioctl again. | ||
102 | |||
103 | Some flags are intentionally vague and will always be set in the | ||
104 | presence of other more specific flags. This way a program looking for | ||
105 | a general property does not have to know all existing and future flags | ||
106 | which imply that property. | ||
107 | |||
108 | For example, if FIEMAP_EXTENT_DATA_INLINE or FIEMAP_EXTENT_DATA_TAIL | ||
109 | are set, FIEMAP_EXTENT_NOT_ALIGNED will also be set. A program looking | ||
110 | for inline or tail-packed data can key on the specific flag. Software | ||
111 | which simply cares not to try operating on non-aligned extents | ||
112 | however, can just key on FIEMAP_EXTENT_NOT_ALIGNED, and not have to | ||
113 | worry about all present and future flags which might imply unaligned | ||
114 | data. Note that the opposite is not true - it would be valid for | ||
115 | FIEMAP_EXTENT_NOT_ALIGNED to appear alone. | ||
116 | |||
117 | * FIEMAP_EXTENT_LAST | ||
118 | This is the last extent in the file. A mapping attempt past this | ||
119 | extent will return nothing. | ||
120 | |||
121 | * FIEMAP_EXTENT_UNKNOWN | ||
122 | The location of this extent is currently unknown. This may indicate | ||
123 | the data is stored on an inaccessible volume or that no storage has | ||
124 | been allocated for the file yet. | ||
125 | |||
126 | * FIEMAP_EXTENT_DELALLOC | ||
127 | - This will also set FIEMAP_EXTENT_UNKNOWN. | ||
128 | Delayed allocation - while there is data for this extent, it's | ||
129 | physical location has not been allocated yet. | ||
130 | |||
131 | * FIEMAP_EXTENT_ENCODED | ||
132 | This extent does not consist of plain filesystem blocks but is | ||
133 | encoded (e.g. encrypted or compressed). Reading the data in this | ||
134 | extent via I/O to the block device will have undefined results. | ||
135 | |||
136 | Note that it is *always* undefined to try to update the data | ||
137 | in-place by writing to the indicated location without the | ||
138 | assistance of the filesystem, or to access the data using the | ||
139 | information returned by the FIEMAP interface while the filesystem | ||
140 | is mounted. In other words, user applications may only read the | ||
141 | extent data via I/O to the block device while the filesystem is | ||
142 | unmounted, and then only if the FIEMAP_EXTENT_ENCODED flag is | ||
143 | clear; user applications must not try reading or writing to the | ||
144 | filesystem via the block device under any other circumstances. | ||
145 | |||
146 | * FIEMAP_EXTENT_DATA_ENCRYPTED | ||
147 | - This will also set FIEMAP_EXTENT_ENCODED | ||
148 | The data in this extent has been encrypted by the file system. | ||
149 | |||
150 | * FIEMAP_EXTENT_NOT_ALIGNED | ||
151 | Extent offsets and length are not guaranteed to be block aligned. | ||
152 | |||
153 | * FIEMAP_EXTENT_DATA_INLINE | ||
154 | This will also set FIEMAP_EXTENT_NOT_ALIGNED | ||
155 | Data is located within a meta data block. | ||
156 | |||
157 | * FIEMAP_EXTENT_DATA_TAIL | ||
158 | This will also set FIEMAP_EXTENT_NOT_ALIGNED | ||
159 | Data is packed into a block with data from other files. | ||
160 | |||
161 | * FIEMAP_EXTENT_UNWRITTEN | ||
162 | Unwritten extent - the extent is allocated but it's data has not been | ||
163 | initialized. This indicates the extent's data will be all zero if read | ||
164 | through the filesystem but the contents are undefined if read directly from | ||
165 | the device. | ||
166 | |||
167 | * FIEMAP_EXTENT_MERGED | ||
168 | This will be set when a file does not support extents, i.e., it uses a block | ||
169 | based addressing scheme. Since returning an extent for each block back to | ||
170 | userspace would be highly inefficient, the kernel will try to merge most | ||
171 | adjacent blocks into 'extents'. | ||
172 | |||
173 | |||
174 | VFS -> File System Implementation | ||
175 | --------------------------------- | ||
176 | |||
177 | File systems wishing to support fiemap must implement a ->fiemap callback on | ||
178 | their inode_operations structure. The fs ->fiemap call is responsible for | ||
179 | defining it's set of supported fiemap flags, and calling a helper function on | ||
180 | each discovered extent: | ||
181 | |||
182 | struct inode_operations { | ||
183 | ... | ||
184 | |||
185 | int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start, | ||
186 | u64 len); | ||
187 | |||
188 | ->fiemap is passed struct fiemap_extent_info which describes the | ||
189 | fiemap request: | ||
190 | |||
191 | struct fiemap_extent_info { | ||
192 | unsigned int fi_flags; /* Flags as passed from user */ | ||
193 | unsigned int fi_extents_mapped; /* Number of mapped extents */ | ||
194 | unsigned int fi_extents_max; /* Size of fiemap_extent array */ | ||
195 | struct fiemap_extent *fi_extents_start; /* Start of fiemap_extent array */ | ||
196 | }; | ||
197 | |||
198 | It is intended that the file system should not need to access any of this | ||
199 | structure directly. | ||
200 | |||
201 | |||
202 | Flag checking should be done at the beginning of the ->fiemap callback via the | ||
203 | fiemap_check_flags() helper: | ||
204 | |||
205 | int fiemap_check_flags(struct fiemap_extent_info *fieinfo, u32 fs_flags); | ||
206 | |||
207 | The struct fieinfo should be passed in as recieved from ioctl_fiemap(). The | ||
208 | set of fiemap flags which the fs understands should be passed via fs_flags. If | ||
209 | fiemap_check_flags finds invalid user flags, it will place the bad values in | ||
210 | fieinfo->fi_flags and return -EBADR. If the file system gets -EBADR, from | ||
211 | fiemap_check_flags(), it should immediately exit, returning that error back to | ||
212 | ioctl_fiemap(). | ||
213 | |||
214 | |||
215 | For each extent in the request range, the file system should call | ||
216 | the helper function, fiemap_fill_next_extent(): | ||
217 | |||
218 | int fiemap_fill_next_extent(struct fiemap_extent_info *info, u64 logical, | ||
219 | u64 phys, u64 len, u32 flags, u32 dev); | ||
220 | |||
221 | fiemap_fill_next_extent() will use the passed values to populate the | ||
222 | next free extent in the fm_extents array. 'General' extent flags will | ||
223 | automatically be set from specific flags on behalf of the calling file | ||
224 | system so that the userspace API is not broken. | ||
225 | |||
226 | fiemap_fill_next_extent() returns 0 on success, and 1 when the | ||
227 | user-supplied fm_extents array is full. If an error is encountered | ||
228 | while copying the extent to user memory, -EFAULT will be returned. | ||
diff --git a/Documentation/filesystems/nfs-rdma.txt b/Documentation/filesystems/nfs-rdma.txt index d0ec45ae4e7d..44bd766f2e5d 100644 --- a/Documentation/filesystems/nfs-rdma.txt +++ b/Documentation/filesystems/nfs-rdma.txt | |||
@@ -5,7 +5,7 @@ | |||
5 | ################################################################################ | 5 | ################################################################################ |
6 | 6 | ||
7 | Author: NetApp and Open Grid Computing | 7 | Author: NetApp and Open Grid Computing |
8 | Date: April 15, 2008 | 8 | Date: May 29, 2008 |
9 | 9 | ||
10 | Table of Contents | 10 | Table of Contents |
11 | ~~~~~~~~~~~~~~~~~ | 11 | ~~~~~~~~~~~~~~~~~ |
@@ -60,16 +60,18 @@ Installation | |||
60 | The procedures described in this document have been tested with | 60 | The procedures described in this document have been tested with |
61 | distributions from Red Hat's Fedora Project (http://fedora.redhat.com/). | 61 | distributions from Red Hat's Fedora Project (http://fedora.redhat.com/). |
62 | 62 | ||
63 | - Install nfs-utils-1.1.1 or greater on the client | 63 | - Install nfs-utils-1.1.2 or greater on the client |
64 | 64 | ||
65 | An NFS/RDMA mount point can only be obtained by using the mount.nfs | 65 | An NFS/RDMA mount point can be obtained by using the mount.nfs command in |
66 | command in nfs-utils-1.1.1 or greater. To see which version of mount.nfs | 66 | nfs-utils-1.1.2 or greater (nfs-utils-1.1.1 was the first nfs-utils |
67 | you are using, type: | 67 | version with support for NFS/RDMA mounts, but for various reasons we |
68 | recommend using nfs-utils-1.1.2 or greater). To see which version of | ||
69 | mount.nfs you are using, type: | ||
68 | 70 | ||
69 | > /sbin/mount.nfs -V | 71 | $ /sbin/mount.nfs -V |
70 | 72 | ||
71 | If the version is less than 1.1.1 or the command does not exist, | 73 | If the version is less than 1.1.2 or the command does not exist, |
72 | then you will need to install the latest version of nfs-utils. | 74 | you should install the latest version of nfs-utils. |
73 | 75 | ||
74 | Download the latest package from: | 76 | Download the latest package from: |
75 | 77 | ||
@@ -77,22 +79,33 @@ Installation | |||
77 | 79 | ||
78 | Uncompress the package and follow the installation instructions. | 80 | Uncompress the package and follow the installation instructions. |
79 | 81 | ||
80 | If you will not be using GSS and NFSv4, the installation process | 82 | If you will not need the idmapper and gssd executables (you do not need |
81 | can be simplified by disabling these features when running configure: | 83 | these to create an NFS/RDMA enabled mount command), the installation |
84 | process can be simplified by disabling these features when running | ||
85 | configure: | ||
82 | 86 | ||
83 | > ./configure --disable-gss --disable-nfsv4 | 87 | $ ./configure --disable-gss --disable-nfsv4 |
84 | 88 | ||
85 | For more information on this see the package's README and INSTALL files. | 89 | To build nfs-utils you will need the tcp_wrappers package installed. For |
90 | more information on this see the package's README and INSTALL files. | ||
86 | 91 | ||
87 | After building the nfs-utils package, there will be a mount.nfs binary in | 92 | After building the nfs-utils package, there will be a mount.nfs binary in |
88 | the utils/mount directory. This binary can be used to initiate NFS v2, v3, | 93 | the utils/mount directory. This binary can be used to initiate NFS v2, v3, |
89 | or v4 mounts. To initiate a v4 mount, the binary must be called mount.nfs4. | 94 | or v4 mounts. To initiate a v4 mount, the binary must be called |
90 | The standard technique is to create a symlink called mount.nfs4 to mount.nfs. | 95 | mount.nfs4. The standard technique is to create a symlink called |
96 | mount.nfs4 to mount.nfs. | ||
91 | 97 | ||
92 | NOTE: mount.nfs and therefore nfs-utils-1.1.1 or greater is only needed | 98 | This mount.nfs binary should be installed at /sbin/mount.nfs as follows: |
99 | |||
100 | $ sudo cp utils/mount/mount.nfs /sbin/mount.nfs | ||
101 | |||
102 | In this location, mount.nfs will be invoked automatically for NFS mounts | ||
103 | by the system mount commmand. | ||
104 | |||
105 | NOTE: mount.nfs and therefore nfs-utils-1.1.2 or greater is only needed | ||
93 | on the NFS client machine. You do not need this specific version of | 106 | on the NFS client machine. You do not need this specific version of |
94 | nfs-utils on the server. Furthermore, only the mount.nfs command from | 107 | nfs-utils on the server. Furthermore, only the mount.nfs command from |
95 | nfs-utils-1.1.1 is needed on the client. | 108 | nfs-utils-1.1.2 is needed on the client. |
96 | 109 | ||
97 | - Install a Linux kernel with NFS/RDMA | 110 | - Install a Linux kernel with NFS/RDMA |
98 | 111 | ||
@@ -156,8 +169,8 @@ Check RDMA and NFS Setup | |||
156 | this time. For example, if you are using a Mellanox Tavor/Sinai/Arbel | 169 | this time. For example, if you are using a Mellanox Tavor/Sinai/Arbel |
157 | card: | 170 | card: |
158 | 171 | ||
159 | > modprobe ib_mthca | 172 | $ modprobe ib_mthca |
160 | > modprobe ib_ipoib | 173 | $ modprobe ib_ipoib |
161 | 174 | ||
162 | If you are using InfiniBand, make sure there is a Subnet Manager (SM) | 175 | If you are using InfiniBand, make sure there is a Subnet Manager (SM) |
163 | running on the network. If your IB switch has an embedded SM, you can | 176 | running on the network. If your IB switch has an embedded SM, you can |
@@ -166,7 +179,7 @@ Check RDMA and NFS Setup | |||
166 | 179 | ||
167 | If an SM is running on your network, you should see the following: | 180 | If an SM is running on your network, you should see the following: |
168 | 181 | ||
169 | > cat /sys/class/infiniband/driverX/ports/1/state | 182 | $ cat /sys/class/infiniband/driverX/ports/1/state |
170 | 4: ACTIVE | 183 | 4: ACTIVE |
171 | 184 | ||
172 | where driverX is mthca0, ipath5, ehca3, etc. | 185 | where driverX is mthca0, ipath5, ehca3, etc. |
@@ -174,10 +187,10 @@ Check RDMA and NFS Setup | |||
174 | To further test the InfiniBand software stack, use IPoIB (this | 187 | To further test the InfiniBand software stack, use IPoIB (this |
175 | assumes you have two IB hosts named host1 and host2): | 188 | assumes you have two IB hosts named host1 and host2): |
176 | 189 | ||
177 | host1> ifconfig ib0 a.b.c.x | 190 | host1$ ifconfig ib0 a.b.c.x |
178 | host2> ifconfig ib0 a.b.c.y | 191 | host2$ ifconfig ib0 a.b.c.y |
179 | host1> ping a.b.c.y | 192 | host1$ ping a.b.c.y |
180 | host2> ping a.b.c.x | 193 | host2$ ping a.b.c.x |
181 | 194 | ||
182 | For other device types, follow the appropriate procedures. | 195 | For other device types, follow the appropriate procedures. |
183 | 196 | ||
@@ -202,11 +215,11 @@ NFS/RDMA Setup | |||
202 | /vol0 192.168.0.47(fsid=0,rw,async,insecure,no_root_squash) | 215 | /vol0 192.168.0.47(fsid=0,rw,async,insecure,no_root_squash) |
203 | /vol0 192.168.0.0/255.255.255.0(fsid=0,rw,async,insecure,no_root_squash) | 216 | /vol0 192.168.0.0/255.255.255.0(fsid=0,rw,async,insecure,no_root_squash) |
204 | 217 | ||
205 | The IP address(es) is(are) the client's IPoIB address for an InfiniBand HCA or the | 218 | The IP address(es) is(are) the client's IPoIB address for an InfiniBand |
206 | cleint's iWARP address(es) for an RNIC. | 219 | HCA or the cleint's iWARP address(es) for an RNIC. |
207 | 220 | ||
208 | NOTE: The "insecure" option must be used because the NFS/RDMA client does not | 221 | NOTE: The "insecure" option must be used because the NFS/RDMA client does |
209 | use a reserved port. | 222 | not use a reserved port. |
210 | 223 | ||
211 | Each time a machine boots: | 224 | Each time a machine boots: |
212 | 225 | ||
@@ -214,43 +227,45 @@ NFS/RDMA Setup | |||
214 | 227 | ||
215 | For InfiniBand using a Mellanox adapter: | 228 | For InfiniBand using a Mellanox adapter: |
216 | 229 | ||
217 | > modprobe ib_mthca | 230 | $ modprobe ib_mthca |
218 | > modprobe ib_ipoib | 231 | $ modprobe ib_ipoib |
219 | > ifconfig ib0 a.b.c.d | 232 | $ ifconfig ib0 a.b.c.d |
220 | 233 | ||
221 | NOTE: use unique addresses for the client and server | 234 | NOTE: use unique addresses for the client and server |
222 | 235 | ||
223 | - Start the NFS server | 236 | - Start the NFS server |
224 | 237 | ||
225 | If the NFS/RDMA server was built as a module (CONFIG_SUNRPC_XPRT_RDMA=m in kernel config), | 238 | If the NFS/RDMA server was built as a module (CONFIG_SUNRPC_XPRT_RDMA=m in |
226 | load the RDMA transport module: | 239 | kernel config), load the RDMA transport module: |
227 | 240 | ||
228 | > modprobe svcrdma | 241 | $ modprobe svcrdma |
229 | 242 | ||
230 | Regardless of how the server was built (module or built-in), start the server: | 243 | Regardless of how the server was built (module or built-in), start the |
244 | server: | ||
231 | 245 | ||
232 | > /etc/init.d/nfs start | 246 | $ /etc/init.d/nfs start |
233 | 247 | ||
234 | or | 248 | or |
235 | 249 | ||
236 | > service nfs start | 250 | $ service nfs start |
237 | 251 | ||
238 | Instruct the server to listen on the RDMA transport: | 252 | Instruct the server to listen on the RDMA transport: |
239 | 253 | ||
240 | > echo rdma 2050 > /proc/fs/nfsd/portlist | 254 | $ echo rdma 2050 > /proc/fs/nfsd/portlist |
241 | 255 | ||
242 | - On the client system | 256 | - On the client system |
243 | 257 | ||
244 | If the NFS/RDMA client was built as a module (CONFIG_SUNRPC_XPRT_RDMA=m in kernel config), | 258 | If the NFS/RDMA client was built as a module (CONFIG_SUNRPC_XPRT_RDMA=m in |
245 | load the RDMA client module: | 259 | kernel config), load the RDMA client module: |
246 | 260 | ||
247 | > modprobe xprtrdma.ko | 261 | $ modprobe xprtrdma.ko |
248 | 262 | ||
249 | Regardless of how the client was built (module or built-in), issue the mount.nfs command: | 263 | Regardless of how the client was built (module or built-in), use this |
264 | command to mount the NFS/RDMA server: | ||
250 | 265 | ||
251 | > /path/to/your/mount.nfs <IPoIB-server-name-or-address>:/<export> /mnt -i -o rdma,port=2050 | 266 | $ mount -o rdma,port=2050 <IPoIB-server-name-or-address>:/<export> /mnt |
252 | 267 | ||
253 | To verify that the mount is using RDMA, run "cat /proc/mounts" and check the | 268 | To verify that the mount is using RDMA, run "cat /proc/mounts" and check |
254 | "proto" field for the given mount. | 269 | the "proto" field for the given mount. |
255 | 270 | ||
256 | Congratulations! You're using NFS/RDMA! | 271 | Congratulations! You're using NFS/RDMA! |
diff --git a/Documentation/filesystems/nfsroot.txt b/Documentation/filesystems/nfsroot.txt index 31b329172343..68baddf3c3e0 100644 --- a/Documentation/filesystems/nfsroot.txt +++ b/Documentation/filesystems/nfsroot.txt | |||
@@ -169,7 +169,7 @@ They depend on various facilities being available: | |||
169 | 3.1) Booting from a floppy using syslinux | 169 | 3.1) Booting from a floppy using syslinux |
170 | 170 | ||
171 | When building kernels, an easy way to create a boot floppy that uses | 171 | When building kernels, an easy way to create a boot floppy that uses |
172 | syslinux is to use the zdisk or bzdisk make targets which use | 172 | syslinux is to use the zdisk or bzdisk make targets which use zimage |
173 | and bzimage images respectively. Both targets accept the | 173 | and bzimage images respectively. Both targets accept the |
174 | FDARGS parameter which can be used to set the kernel command line. | 174 | FDARGS parameter which can be used to set the kernel command line. |
175 | 175 | ||
diff --git a/Documentation/filesystems/ntfs.txt b/Documentation/filesystems/ntfs.txt index e79ee2db183a..ac2a261c5f7d 100644 --- a/Documentation/filesystems/ntfs.txt +++ b/Documentation/filesystems/ntfs.txt | |||
@@ -40,7 +40,7 @@ Web site | |||
40 | ======== | 40 | ======== |
41 | 41 | ||
42 | There is plenty of additional information on the linux-ntfs web site | 42 | There is plenty of additional information on the linux-ntfs web site |
43 | at http://linux-ntfs.sourceforge.net/ | 43 | at http://www.linux-ntfs.org/ |
44 | 44 | ||
45 | The web site has a lot of additional information, such as a comprehensive | 45 | The web site has a lot of additional information, such as a comprehensive |
46 | FAQ, documentation on the NTFS on-disk format, information on the Linux-NTFS | 46 | FAQ, documentation on the NTFS on-disk format, information on the Linux-NTFS |
@@ -272,7 +272,7 @@ And you would know that /dev/hda2 has a size of 37768814 - 4209030 + 1 = | |||
272 | For Win2k and later dynamic disks, you can for example use the ldminfo utility | 272 | For Win2k and later dynamic disks, you can for example use the ldminfo utility |
273 | which is part of the Linux LDM tools (the latest version at the time of | 273 | which is part of the Linux LDM tools (the latest version at the time of |
274 | writing is linux-ldm-0.0.8.tar.bz2). You can download it from: | 274 | writing is linux-ldm-0.0.8.tar.bz2). You can download it from: |
275 | http://linux-ntfs.sourceforge.net/downloads.html | 275 | http://www.linux-ntfs.org/ |
276 | Simply extract the downloaded archive (tar xvjf linux-ldm-0.0.8.tar.bz2), go | 276 | Simply extract the downloaded archive (tar xvjf linux-ldm-0.0.8.tar.bz2), go |
277 | into it (cd linux-ldm-0.0.8) and change to the test directory (cd test). You | 277 | into it (cd linux-ldm-0.0.8) and change to the test directory (cd test). You |
278 | will find the precompiled (i386) ldminfo utility there. NOTE: You will not be | 278 | will find the precompiled (i386) ldminfo utility there. NOTE: You will not be |
diff --git a/Documentation/filesystems/ocfs2.txt b/Documentation/filesystems/ocfs2.txt index c318a8bbb1ef..4340cc825796 100644 --- a/Documentation/filesystems/ocfs2.txt +++ b/Documentation/filesystems/ocfs2.txt | |||
@@ -76,3 +76,9 @@ localalloc=8(*) Allows custom localalloc size in MB. If the value is too | |||
76 | large, the fs will silently revert it to the default. | 76 | large, the fs will silently revert it to the default. |
77 | Localalloc is not enabled for local mounts. | 77 | Localalloc is not enabled for local mounts. |
78 | localflocks This disables cluster aware flock. | 78 | localflocks This disables cluster aware flock. |
79 | inode64 Indicates that Ocfs2 is allowed to create inodes at | ||
80 | any location in the filesystem, including those which | ||
81 | will result in inode numbers occupying more than 32 | ||
82 | bits of significance. | ||
83 | user_xattr (*) Enables Extended User Attributes. | ||
84 | nouser_xattr Disables Extended User Attributes. | ||
diff --git a/Documentation/filesystems/omfs.txt b/Documentation/filesystems/omfs.txt new file mode 100644 index 000000000000..1d0d41ff5c65 --- /dev/null +++ b/Documentation/filesystems/omfs.txt | |||
@@ -0,0 +1,106 @@ | |||
1 | Optimized MPEG Filesystem (OMFS) | ||
2 | |||
3 | Overview | ||
4 | ======== | ||
5 | |||
6 | OMFS is a filesystem created by SonicBlue for use in the ReplayTV DVR | ||
7 | and Rio Karma MP3 player. The filesystem is extent-based, utilizing | ||
8 | block sizes from 2k to 8k, with hash-based directories. This | ||
9 | filesystem driver may be used to read and write disks from these | ||
10 | devices. | ||
11 | |||
12 | Note, it is not recommended that this FS be used in place of a general | ||
13 | filesystem for your own streaming media device. Native Linux filesystems | ||
14 | will likely perform better. | ||
15 | |||
16 | More information is available at: | ||
17 | |||
18 | http://linux-karma.sf.net/ | ||
19 | |||
20 | Various utilities, including mkomfs and omfsck, are included with | ||
21 | omfsprogs, available at: | ||
22 | |||
23 | http://bobcopeland.com/karma/ | ||
24 | |||
25 | Instructions are included in its README. | ||
26 | |||
27 | Options | ||
28 | ======= | ||
29 | |||
30 | OMFS supports the following mount-time options: | ||
31 | |||
32 | uid=n - make all files owned by specified user | ||
33 | gid=n - make all files owned by specified group | ||
34 | umask=xxx - set permission umask to xxx | ||
35 | fmask=xxx - set umask to xxx for files | ||
36 | dmask=xxx - set umask to xxx for directories | ||
37 | |||
38 | Disk format | ||
39 | =========== | ||
40 | |||
41 | OMFS discriminates between "sysblocks" and normal data blocks. The sysblock | ||
42 | group consists of super block information, file metadata, directory structures, | ||
43 | and extents. Each sysblock has a header containing CRCs of the entire | ||
44 | sysblock, and may be mirrored in successive blocks on the disk. A sysblock may | ||
45 | have a smaller size than a data block, but since they are both addressed by the | ||
46 | same 64-bit block number, any remaining space in the smaller sysblock is | ||
47 | unused. | ||
48 | |||
49 | Sysblock header information: | ||
50 | |||
51 | struct omfs_header { | ||
52 | __be64 h_self; /* FS block where this is located */ | ||
53 | __be32 h_body_size; /* size of useful data after header */ | ||
54 | __be16 h_crc; /* crc-ccitt of body_size bytes */ | ||
55 | char h_fill1[2]; | ||
56 | u8 h_version; /* version, always 1 */ | ||
57 | char h_type; /* OMFS_INODE_X */ | ||
58 | u8 h_magic; /* OMFS_IMAGIC */ | ||
59 | u8 h_check_xor; /* XOR of header bytes before this */ | ||
60 | __be32 h_fill2; | ||
61 | }; | ||
62 | |||
63 | Files and directories are both represented by omfs_inode: | ||
64 | |||
65 | struct omfs_inode { | ||
66 | struct omfs_header i_head; /* header */ | ||
67 | __be64 i_parent; /* parent containing this inode */ | ||
68 | __be64 i_sibling; /* next inode in hash bucket */ | ||
69 | __be64 i_ctime; /* ctime, in milliseconds */ | ||
70 | char i_fill1[35]; | ||
71 | char i_type; /* OMFS_[DIR,FILE] */ | ||
72 | __be32 i_fill2; | ||
73 | char i_fill3[64]; | ||
74 | char i_name[OMFS_NAMELEN]; /* filename */ | ||
75 | __be64 i_size; /* size of file, in bytes */ | ||
76 | }; | ||
77 | |||
78 | Directories in OMFS are implemented as a large hash table. Filenames are | ||
79 | hashed then prepended into the bucket list beginning at OMFS_DIR_START. | ||
80 | Lookup requires hashing the filename, then seeking across i_sibling pointers | ||
81 | until a match is found on i_name. Empty buckets are represented by block | ||
82 | pointers with all-1s (~0). | ||
83 | |||
84 | A file is an omfs_inode structure followed by an extent table beginning at | ||
85 | OMFS_EXTENT_START: | ||
86 | |||
87 | struct omfs_extent_entry { | ||
88 | __be64 e_cluster; /* start location of a set of blocks */ | ||
89 | __be64 e_blocks; /* number of blocks after e_cluster */ | ||
90 | }; | ||
91 | |||
92 | struct omfs_extent { | ||
93 | __be64 e_next; /* next extent table location */ | ||
94 | __be32 e_extent_count; /* total # extents in this table */ | ||
95 | __be32 e_fill; | ||
96 | struct omfs_extent_entry e_entry; /* start of extent entries */ | ||
97 | }; | ||
98 | |||
99 | Each extent holds the block offset followed by number of blocks allocated to | ||
100 | the extent. The final extent in each table is a terminator with e_cluster | ||
101 | being ~0 and e_blocks being ones'-complement of the total number of blocks | ||
102 | in the table. | ||
103 | |||
104 | If this table overflows, a continuation inode is written and pointed to by | ||
105 | e_next. These have a header but lack the rest of the inode structure. | ||
106 | |||
diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt index 7f268f327d75..bcceb99b81dd 100644 --- a/Documentation/filesystems/proc.txt +++ b/Documentation/filesystems/proc.txt | |||
@@ -296,6 +296,7 @@ Table 1-4: Kernel info in /proc | |||
296 | uptime System uptime | 296 | uptime System uptime |
297 | version Kernel version | 297 | version Kernel version |
298 | video bttv info of video resources (2.4) | 298 | video bttv info of video resources (2.4) |
299 | vmallocinfo Show vmalloced areas | ||
299 | .............................................................................. | 300 | .............................................................................. |
300 | 301 | ||
301 | You can, for example, check which interrupts are currently in use and what | 302 | You can, for example, check which interrupts are currently in use and what |
@@ -557,6 +558,49 @@ VmallocTotal: total size of vmalloc memory area | |||
557 | VmallocUsed: amount of vmalloc area which is used | 558 | VmallocUsed: amount of vmalloc area which is used |
558 | VmallocChunk: largest contigious block of vmalloc area which is free | 559 | VmallocChunk: largest contigious block of vmalloc area which is free |
559 | 560 | ||
561 | .............................................................................. | ||
562 | |||
563 | vmallocinfo: | ||
564 | |||
565 | Provides information about vmalloced/vmaped areas. One line per area, | ||
566 | containing the virtual address range of the area, size in bytes, | ||
567 | caller information of the creator, and optional information depending | ||
568 | on the kind of area : | ||
569 | |||
570 | pages=nr number of pages | ||
571 | phys=addr if a physical address was specified | ||
572 | ioremap I/O mapping (ioremap() and friends) | ||
573 | vmalloc vmalloc() area | ||
574 | vmap vmap()ed pages | ||
575 | user VM_USERMAP area | ||
576 | vpages buffer for pages pointers was vmalloced (huge area) | ||
577 | N<node>=nr (Only on NUMA kernels) | ||
578 | Number of pages allocated on memory node <node> | ||
579 | |||
580 | > cat /proc/vmallocinfo | ||
581 | 0xffffc20000000000-0xffffc20000201000 2101248 alloc_large_system_hash+0x204 ... | ||
582 | /0x2c0 pages=512 vmalloc N0=128 N1=128 N2=128 N3=128 | ||
583 | 0xffffc20000201000-0xffffc20000302000 1052672 alloc_large_system_hash+0x204 ... | ||
584 | /0x2c0 pages=256 vmalloc N0=64 N1=64 N2=64 N3=64 | ||
585 | 0xffffc20000302000-0xffffc20000304000 8192 acpi_tb_verify_table+0x21/0x4f... | ||
586 | phys=7fee8000 ioremap | ||
587 | 0xffffc20000304000-0xffffc20000307000 12288 acpi_tb_verify_table+0x21/0x4f... | ||
588 | phys=7fee7000 ioremap | ||
589 | 0xffffc2000031d000-0xffffc2000031f000 8192 init_vdso_vars+0x112/0x210 | ||
590 | 0xffffc2000031f000-0xffffc2000032b000 49152 cramfs_uncompress_init+0x2e ... | ||
591 | /0x80 pages=11 vmalloc N0=3 N1=3 N2=2 N3=3 | ||
592 | 0xffffc2000033a000-0xffffc2000033d000 12288 sys_swapon+0x640/0xac0 ... | ||
593 | pages=2 vmalloc N1=2 | ||
594 | 0xffffc20000347000-0xffffc2000034c000 20480 xt_alloc_table_info+0xfe ... | ||
595 | /0x130 [x_tables] pages=4 vmalloc N0=4 | ||
596 | 0xffffffffa0000000-0xffffffffa000f000 61440 sys_init_module+0xc27/0x1d00 ... | ||
597 | pages=14 vmalloc N2=14 | ||
598 | 0xffffffffa000f000-0xffffffffa0014000 20480 sys_init_module+0xc27/0x1d00 ... | ||
599 | pages=4 vmalloc N1=4 | ||
600 | 0xffffffffa0014000-0xffffffffa0017000 12288 sys_init_module+0xc27/0x1d00 ... | ||
601 | pages=2 vmalloc N1=2 | ||
602 | 0xffffffffa0017000-0xffffffffa0022000 45056 sys_init_module+0xc27/0x1d00 ... | ||
603 | pages=10 vmalloc N0=10 | ||
560 | 604 | ||
561 | 1.3 IDE devices in /proc/ide | 605 | 1.3 IDE devices in /proc/ide |
562 | ---------------------------- | 606 | ---------------------------- |
@@ -879,45 +923,44 @@ CPUs. | |||
879 | The "procs_blocked" line gives the number of processes currently blocked, | 923 | The "procs_blocked" line gives the number of processes currently blocked, |
880 | waiting for I/O to complete. | 924 | waiting for I/O to complete. |
881 | 925 | ||
926 | |||
882 | 1.9 Ext4 file system parameters | 927 | 1.9 Ext4 file system parameters |
883 | ------------------------------ | 928 | ------------------------------ |
884 | Ext4 file system have one directory per partition under /proc/fs/ext4/ | ||
885 | # ls /proc/fs/ext4/hdc/ | ||
886 | group_prealloc max_to_scan mb_groups mb_history min_to_scan order2_req | ||
887 | stats stream_req | ||
888 | |||
889 | mb_groups: | ||
890 | This file gives the details of mutiblock allocator buddy cache of free blocks | ||
891 | |||
892 | mb_history: | ||
893 | Multiblock allocation history. | ||
894 | 929 | ||
895 | stats: | 930 | Information about mounted ext4 file systems can be found in |
896 | This file indicate whether the multiblock allocator should start collecting | 931 | /proc/fs/ext4. Each mounted filesystem will have a directory in |
897 | statistics. The statistics are shown during unmount | 932 | /proc/fs/ext4 based on its device name (i.e., /proc/fs/ext4/hdc or |
933 | /proc/fs/ext4/dm-0). The files in each per-device directory are shown | ||
934 | in Table 1-10, below. | ||
898 | 935 | ||
899 | group_prealloc: | 936 | Table 1-10: Files in /proc/fs/ext4/<devname> |
900 | The multiblock allocator normalize the block allocation request to | 937 | .............................................................................. |
901 | group_prealloc filesystem blocks if we don't have strip value set. | 938 | File Content |
902 | The stripe value can be specified at mount time or during mke2fs. | 939 | mb_groups details of multiblock allocator buddy cache of free blocks |
903 | 940 | mb_history multiblock allocation history | |
904 | max_to_scan: | 941 | stats controls whether the multiblock allocator should start |
905 | How long multiblock allocator can look for a best extent (in found extents) | 942 | collecting statistics, which are shown during the unmount |
906 | 943 | group_prealloc the multiblock allocator will round up allocation | |
907 | min_to_scan: | 944 | requests to a multiple of this tuning parameter if the |
908 | How long multiblock allocator must look for a best extent | 945 | stripe size is not set in the ext4 superblock |
909 | 946 | max_to_scan The maximum number of extents the multiblock allocator | |
910 | order2_req: | 947 | will search to find the best extent |
911 | Multiblock allocator use 2^N search using buddies only for requests greater | 948 | min_to_scan The minimum number of extents the multiblock allocator |
912 | than or equal to order2_req. The request size is specfied in file system | 949 | will search to find the best extent |
913 | blocks. A value of 2 indicate only if the requests are greater than or equal | 950 | order2_req Tuning parameter which controls the minimum size for |
914 | to 4 blocks. | 951 | requests (as a power of 2) where the buddy cache is |
952 | used | ||
953 | stream_req Files which have fewer blocks than this tunable | ||
954 | parameter will have their blocks allocated out of a | ||
955 | block group specific preallocation pool, so that small | ||
956 | files are packed closely together. Each large file | ||
957 | will have its blocks allocated out of its own unique | ||
958 | preallocation pool. | ||
959 | inode_readahead Tuning parameter which controls the maximum number of | ||
960 | inode table blocks that ext4's inode table readahead | ||
961 | algorithm will pre-read into the buffer cache | ||
962 | .............................................................................. | ||
915 | 963 | ||
916 | stream_req: | ||
917 | Files smaller than stream_req are served by the stream allocator, whose | ||
918 | purpose is to pack requests as close each to other as possible to | ||
919 | produce smooth I/O traffic. Avalue of 16 indicate that file smaller than 16 | ||
920 | filesystem block size will use group based preallocation. | ||
921 | 964 | ||
922 | ------------------------------------------------------------------------------ | 965 | ------------------------------------------------------------------------------ |
923 | Summary | 966 | Summary |
@@ -1278,6 +1321,18 @@ debugging information is displayed on console. | |||
1278 | NMI switch that most IA32 servers have fires unknown NMI up, for example. | 1321 | NMI switch that most IA32 servers have fires unknown NMI up, for example. |
1279 | If a system hangs up, try pressing the NMI switch. | 1322 | If a system hangs up, try pressing the NMI switch. |
1280 | 1323 | ||
1324 | panic_on_unrecovered_nmi | ||
1325 | ------------------------ | ||
1326 | |||
1327 | The default Linux behaviour on an NMI of either memory or unknown is to continue | ||
1328 | operation. For many environments such as scientific computing it is preferable | ||
1329 | that the box is taken out and the error dealt with than an uncorrected | ||
1330 | parity/ECC error get propogated. | ||
1331 | |||
1332 | A small number of systems do generate NMI's for bizarre random reasons such as | ||
1333 | power management so the default is off. That sysctl works like the existing | ||
1334 | panic controls already in that directory. | ||
1335 | |||
1281 | nmi_watchdog | 1336 | nmi_watchdog |
1282 | ------------ | 1337 | ------------ |
1283 | 1338 | ||
@@ -1288,12 +1343,24 @@ determine whether or not they are still functioning properly. | |||
1288 | Because the NMI watchdog shares registers with oprofile, by disabling the NMI | 1343 | Because the NMI watchdog shares registers with oprofile, by disabling the NMI |
1289 | watchdog, oprofile may have more registers to utilize. | 1344 | watchdog, oprofile may have more registers to utilize. |
1290 | 1345 | ||
1291 | maps_protect | 1346 | msgmni |
1292 | ------------ | 1347 | ------ |
1348 | |||
1349 | Maximum number of message queue ids on the system. | ||
1350 | This value scales to the amount of lowmem. It is automatically recomputed | ||
1351 | upon memory add/remove or ipc namespace creation/removal. | ||
1352 | When a value is written into this file, msgmni's value becomes fixed, i.e. it | ||
1353 | is not recomputed anymore when one of the above events occurs. | ||
1354 | Use auto_msgmni to change this behavior. | ||
1293 | 1355 | ||
1294 | Enables/Disables the protection of the per-process proc entries "maps" and | 1356 | auto_msgmni |
1295 | "smaps". When enabled, the contents of these files are visible only to | 1357 | ----------- |
1296 | readers that are allowed to ptrace() the given process. | 1358 | |
1359 | Enables/Disables automatic recomputing of msgmni upon memory add/remove or | ||
1360 | upon ipc namespace creation/removal (see the msgmni description above). | ||
1361 | Echoing "1" into this file enables msgmni automatic recomputing. | ||
1362 | Echoing "0" turns it off. | ||
1363 | auto_msgmni default value is 1. | ||
1297 | 1364 | ||
1298 | 1365 | ||
1299 | 2.4 /proc/sys/vm - The virtual memory subsystem | 1366 | 2.4 /proc/sys/vm - The virtual memory subsystem |
@@ -1317,15 +1384,18 @@ causes the kernel to prefer to reclaim dentries and inodes. | |||
1317 | dirty_background_ratio | 1384 | dirty_background_ratio |
1318 | ---------------------- | 1385 | ---------------------- |
1319 | 1386 | ||
1320 | Contains, as a percentage of total system memory, the number of pages at which | 1387 | Contains, as a percentage of the dirtyable system memory (free pages + mapped |
1321 | the pdflush background writeback daemon will start writing out dirty data. | 1388 | pages + file cache, not including locked pages and HugePages), the number of |
1389 | pages at which the pdflush background writeback daemon will start writing out | ||
1390 | dirty data. | ||
1322 | 1391 | ||
1323 | dirty_ratio | 1392 | dirty_ratio |
1324 | ----------------- | 1393 | ----------------- |
1325 | 1394 | ||
1326 | Contains, as a percentage of total system memory, the number of pages at which | 1395 | Contains, as a percentage of the dirtyable system memory (free pages + mapped |
1327 | a process which is generating disk writes will itself start writing out dirty | 1396 | pages + file cache, not including locked pages and HugePages), the number of |
1328 | data. | 1397 | pages at which a process which is generating disk writes will itself start |
1398 | writing out dirty data. | ||
1329 | 1399 | ||
1330 | dirty_writeback_centisecs | 1400 | dirty_writeback_centisecs |
1331 | ------------------------- | 1401 | ------------------------- |
@@ -1430,7 +1500,7 @@ used because pages_free(1355) is smaller than watermark + protection[2] | |||
1430 | normal page requirement. If requirement is DMA zone(index=0), protection[0] | 1500 | normal page requirement. If requirement is DMA zone(index=0), protection[0] |
1431 | (=0) is used. | 1501 | (=0) is used. |
1432 | 1502 | ||
1433 | zone[i]'s protection[j] is calculated by following exprssion. | 1503 | zone[i]'s protection[j] is calculated by following expression. |
1434 | 1504 | ||
1435 | (i < j): | 1505 | (i < j): |
1436 | zone[i]->protection[j] | 1506 | zone[i]->protection[j] |
@@ -2345,22 +2415,29 @@ will be dumped when the <pid> process is dumped. coredump_filter is a bitmask | |||
2345 | of memory types. If a bit of the bitmask is set, memory segments of the | 2415 | of memory types. If a bit of the bitmask is set, memory segments of the |
2346 | corresponding memory type are dumped, otherwise they are not dumped. | 2416 | corresponding memory type are dumped, otherwise they are not dumped. |
2347 | 2417 | ||
2348 | The following 4 memory types are supported: | 2418 | The following 7 memory types are supported: |
2349 | - (bit 0) anonymous private memory | 2419 | - (bit 0) anonymous private memory |
2350 | - (bit 1) anonymous shared memory | 2420 | - (bit 1) anonymous shared memory |
2351 | - (bit 2) file-backed private memory | 2421 | - (bit 2) file-backed private memory |
2352 | - (bit 3) file-backed shared memory | 2422 | - (bit 3) file-backed shared memory |
2423 | - (bit 4) ELF header pages in file-backed private memory areas (it is | ||
2424 | effective only if the bit 2 is cleared) | ||
2425 | - (bit 5) hugetlb private memory | ||
2426 | - (bit 6) hugetlb shared memory | ||
2353 | 2427 | ||
2354 | Note that MMIO pages such as frame buffer are never dumped and vDSO pages | 2428 | Note that MMIO pages such as frame buffer are never dumped and vDSO pages |
2355 | are always dumped regardless of the bitmask status. | 2429 | are always dumped regardless of the bitmask status. |
2356 | 2430 | ||
2357 | Default value of coredump_filter is 0x3; this means all anonymous memory | 2431 | Note bit 0-4 doesn't effect any hugetlb memory. hugetlb memory are only |
2358 | segments are dumped. | 2432 | effected by bit 5-6. |
2433 | |||
2434 | Default value of coredump_filter is 0x23; this means all anonymous memory | ||
2435 | segments and hugetlb private memory are dumped. | ||
2359 | 2436 | ||
2360 | If you don't want to dump all shared memory segments attached to pid 1234, | 2437 | If you don't want to dump all shared memory segments attached to pid 1234, |
2361 | write 1 to the process's proc file. | 2438 | write 0x21 to the process's proc file. |
2362 | 2439 | ||
2363 | $ echo 0x1 > /proc/1234/coredump_filter | 2440 | $ echo 0x21 > /proc/1234/coredump_filter |
2364 | 2441 | ||
2365 | When a new process is created, the process inherits the bitmask status from its | 2442 | When a new process is created, the process inherits the bitmask status from its |
2366 | parent. It is useful to set up coredump_filter before the program runs. | 2443 | parent. It is useful to set up coredump_filter before the program runs. |
diff --git a/Documentation/filesystems/quota.txt b/Documentation/filesystems/quota.txt index a590c4093eff..5e8de25bf0f1 100644 --- a/Documentation/filesystems/quota.txt +++ b/Documentation/filesystems/quota.txt | |||
@@ -3,14 +3,14 @@ Quota subsystem | |||
3 | =============== | 3 | =============== |
4 | 4 | ||
5 | Quota subsystem allows system administrator to set limits on used space and | 5 | Quota subsystem allows system administrator to set limits on used space and |
6 | number of used inodes (inode is a filesystem structure which is associated | 6 | number of used inodes (inode is a filesystem structure which is associated with |
7 | with each file or directory) for users and/or groups. For both used space and | 7 | each file or directory) for users and/or groups. For both used space and number |
8 | number of used inodes there are actually two limits. The first one is called | 8 | of used inodes there are actually two limits. The first one is called softlimit |
9 | softlimit and the second one hardlimit. An user can never exceed a hardlimit | 9 | and the second one hardlimit. An user can never exceed a hardlimit for any |
10 | for any resource. User is allowed to exceed softlimit but only for limited | 10 | resource (unless he has CAP_SYS_RESOURCE capability). User is allowed to exceed |
11 | period of time. This period is called "grace period" or "grace time". When | 11 | softlimit but only for limited period of time. This period is called "grace |
12 | grace time is over, user is not able to allocate more space/inodes until he | 12 | period" or "grace time". When grace time is over, user is not able to allocate |
13 | frees enough of them to get below softlimit. | 13 | more space/inodes until he frees enough of them to get below softlimit. |
14 | 14 | ||
15 | Quota limits (and amount of grace time) are set independently for each | 15 | Quota limits (and amount of grace time) are set independently for each |
16 | filesystem. | 16 | filesystem. |
@@ -53,6 +53,12 @@ in parentheses): | |||
53 | QUOTA_NL_BSOFTLONGWARN - space (block) softlimit is exceeded | 53 | QUOTA_NL_BSOFTLONGWARN - space (block) softlimit is exceeded |
54 | longer than given grace period. | 54 | longer than given grace period. |
55 | QUOTA_NL_BSOFTWARN - space (block) softlimit | 55 | QUOTA_NL_BSOFTWARN - space (block) softlimit |
56 | - four warnings are also defined for the event when user stops | ||
57 | exceeding some limit: | ||
58 | QUOTA_NL_IHARDBELOW - inode hardlimit | ||
59 | QUOTA_NL_ISOFTBELOW - inode softlimit | ||
60 | QUOTA_NL_BHARDBELOW - space (block) hardlimit | ||
61 | QUOTA_NL_BSOFTBELOW - space (block) softlimit | ||
56 | QUOTA_NL_A_DEV_MAJOR (u32) | 62 | QUOTA_NL_A_DEV_MAJOR (u32) |
57 | - major number of a device with the affected filesystem | 63 | - major number of a device with the affected filesystem |
58 | QUOTA_NL_A_DEV_MINOR (u32) | 64 | QUOTA_NL_A_DEV_MINOR (u32) |
diff --git a/Documentation/filesystems/ramfs-rootfs-initramfs.txt b/Documentation/filesystems/ramfs-rootfs-initramfs.txt index 7be232b44ee4..62fe9b1e0890 100644 --- a/Documentation/filesystems/ramfs-rootfs-initramfs.txt +++ b/Documentation/filesystems/ramfs-rootfs-initramfs.txt | |||
@@ -263,7 +263,7 @@ User Mode Linux, like so: | |||
263 | sleep(999999999); | 263 | sleep(999999999); |
264 | } | 264 | } |
265 | EOF | 265 | EOF |
266 | gcc -static hello2.c -o init | 266 | gcc -static hello.c -o init |
267 | echo init | cpio -o -H newc | gzip > test.cpio.gz | 267 | echo init | cpio -o -H newc | gzip > test.cpio.gz |
268 | # Testing external initramfs using the initrd loading mechanism. | 268 | # Testing external initramfs using the initrd loading mechanism. |
269 | qemu -kernel /boot/vmlinuz -initrd test.cpio.gz /dev/zero | 269 | qemu -kernel /boot/vmlinuz -initrd test.cpio.gz /dev/zero |
diff --git a/Documentation/filesystems/relay.txt b/Documentation/filesystems/relay.txt index 094f2d2f38b1..510b722667ac 100644 --- a/Documentation/filesystems/relay.txt +++ b/Documentation/filesystems/relay.txt | |||
@@ -294,6 +294,16 @@ user-defined data with a channel, and is immediately available | |||
294 | (including in create_buf_file()) via chan->private_data or | 294 | (including in create_buf_file()) via chan->private_data or |
295 | buf->chan->private_data. | 295 | buf->chan->private_data. |
296 | 296 | ||
297 | Buffer-only channels | ||
298 | -------------------- | ||
299 | |||
300 | These channels have no files associated and can be created with | ||
301 | relay_open(NULL, NULL, ...). Such channels are useful in scenarios such | ||
302 | as when doing early tracing in the kernel, before the VFS is up. In these | ||
303 | cases, one may open a buffer-only channel and then call | ||
304 | relay_late_setup_files() when the kernel is ready to handle files, | ||
305 | to expose the buffered data to the userspace. | ||
306 | |||
297 | Channel 'modes' | 307 | Channel 'modes' |
298 | --------------- | 308 | --------------- |
299 | 309 | ||
diff --git a/Documentation/filesystems/sysfs.txt b/Documentation/filesystems/sysfs.txt index 7f27b8f840d0..9e9c348275a9 100644 --- a/Documentation/filesystems/sysfs.txt +++ b/Documentation/filesystems/sysfs.txt | |||
@@ -248,6 +248,7 @@ The top level sysfs directory looks like: | |||
248 | block/ | 248 | block/ |
249 | bus/ | 249 | bus/ |
250 | class/ | 250 | class/ |
251 | dev/ | ||
251 | devices/ | 252 | devices/ |
252 | firmware/ | 253 | firmware/ |
253 | net/ | 254 | net/ |
@@ -274,6 +275,11 @@ fs/ contains a directory for some filesystems. Currently each | |||
274 | filesystem wanting to export attributes must create its own hierarchy | 275 | filesystem wanting to export attributes must create its own hierarchy |
275 | below fs/ (see ./fuse.txt for an example). | 276 | below fs/ (see ./fuse.txt for an example). |
276 | 277 | ||
278 | dev/ contains two directories char/ and block/. Inside these two | ||
279 | directories there are symlinks named <major>:<minor>. These symlinks | ||
280 | point to the sysfs directory for the given device. /sys/dev provides a | ||
281 | quick way to lookup the sysfs interface for a device from the result of | ||
282 | a stat(2) operation. | ||
277 | 283 | ||
278 | More information can driver-model specific features can be found in | 284 | More information can driver-model specific features can be found in |
279 | Documentation/driver-model/. | 285 | Documentation/driver-model/. |
diff --git a/Documentation/filesystems/ubifs.txt b/Documentation/filesystems/ubifs.txt index 540e9e7f59c5..dd84ea3c10da 100644 --- a/Documentation/filesystems/ubifs.txt +++ b/Documentation/filesystems/ubifs.txt | |||
@@ -57,7 +57,7 @@ Similarly to JFFS2, UBIFS supports on-the-flight compression which makes | |||
57 | it possible to fit quite a lot of data to the flash. | 57 | it possible to fit quite a lot of data to the flash. |
58 | 58 | ||
59 | Similarly to JFFS2, UBIFS is tolerant of unclean reboots and power-cuts. | 59 | Similarly to JFFS2, UBIFS is tolerant of unclean reboots and power-cuts. |
60 | It does not need stuff like ckfs.ext2. UBIFS automatically replays its | 60 | It does not need stuff like fsck.ext2. UBIFS automatically replays its |
61 | journal and recovers from crashes, ensuring that the on-flash data | 61 | journal and recovers from crashes, ensuring that the on-flash data |
62 | structures are consistent. | 62 | structures are consistent. |
63 | 63 | ||
@@ -86,6 +86,15 @@ norm_unmount (*) commit on unmount; the journal is committed | |||
86 | fast_unmount do not commit on unmount; this option makes | 86 | fast_unmount do not commit on unmount; this option makes |
87 | unmount faster, but the next mount slower | 87 | unmount faster, but the next mount slower |
88 | because of the need to replay the journal. | 88 | because of the need to replay the journal. |
89 | bulk_read read more in one go to take advantage of flash | ||
90 | media that read faster sequentially | ||
91 | no_bulk_read (*) do not bulk-read | ||
92 | no_chk_data_crc skip checking of CRCs on data nodes in order to | ||
93 | improve read performance. Use this option only | ||
94 | if the flash media is highly reliable. The effect | ||
95 | of this option is that corruption of the contents | ||
96 | of a file can go unnoticed. | ||
97 | chk_data_crc (*) do not skip checking CRCs on data nodes | ||
89 | 98 | ||
90 | 99 | ||
91 | Quick usage instructions | 100 | Quick usage instructions |
diff --git a/Documentation/filesystems/vfat.txt b/Documentation/filesystems/vfat.txt index 2d5e1e582e13..bbac4f1d9056 100644 --- a/Documentation/filesystems/vfat.txt +++ b/Documentation/filesystems/vfat.txt | |||
@@ -96,6 +96,14 @@ shortname=lower|win95|winnt|mixed | |||
96 | emulate the Windows 95 rule for create. | 96 | emulate the Windows 95 rule for create. |
97 | Default setting is `lower'. | 97 | Default setting is `lower'. |
98 | 98 | ||
99 | tz=UTC -- Interpret timestamps as UTC rather than local time. | ||
100 | This option disables the conversion of timestamps | ||
101 | between local time (as used by Windows on FAT) and UTC | ||
102 | (which Linux uses internally). This is particuluarly | ||
103 | useful when mounting devices (like digital cameras) | ||
104 | that are set to UTC in order to avoid the pitfalls of | ||
105 | local time. | ||
106 | |||
99 | <bool>: 0,1,yes,no,true,false | 107 | <bool>: 0,1,yes,no,true,false |
100 | 108 | ||
101 | TODO | 109 | TODO |
diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt index b7522c6cbae3..c4d348dabe94 100644 --- a/Documentation/filesystems/vfs.txt +++ b/Documentation/filesystems/vfs.txt | |||
@@ -143,7 +143,7 @@ struct file_system_type { | |||
143 | 143 | ||
144 | The get_sb() method has the following arguments: | 144 | The get_sb() method has the following arguments: |
145 | 145 | ||
146 | struct file_system_type *fs_type: decribes the filesystem, partly initialized | 146 | struct file_system_type *fs_type: describes the filesystem, partly initialized |
147 | by the specific filesystem code | 147 | by the specific filesystem code |
148 | 148 | ||
149 | int flags: mount flags | 149 | int flags: mount flags |
@@ -895,9 +895,9 @@ struct dentry_operations { | |||
895 | iput() yourself | 895 | iput() yourself |
896 | 896 | ||
897 | d_dname: called when the pathname of a dentry should be generated. | 897 | d_dname: called when the pathname of a dentry should be generated. |
898 | Usefull for some pseudo filesystems (sockfs, pipefs, ...) to delay | 898 | Useful for some pseudo filesystems (sockfs, pipefs, ...) to delay |
899 | pathname generation. (Instead of doing it when dentry is created, | 899 | pathname generation. (Instead of doing it when dentry is created, |
900 | its done only when the path is needed.). Real filesystems probably | 900 | it's done only when the path is needed.). Real filesystems probably |
901 | dont want to use it, because their dentries are present in global | 901 | dont want to use it, because their dentries are present in global |
902 | dcache hash, so their hash should be an invariant. As no lock is | 902 | dcache hash, so their hash should be an invariant. As no lock is |
903 | held, d_dname() should not try to modify the dentry itself, unless | 903 | held, d_dname() should not try to modify the dentry itself, unless |