diff options
author | Grant Likely <grant.likely@secretlab.ca> | 2010-12-30 00:20:30 -0500 |
---|---|---|
committer | Grant Likely <grant.likely@secretlab.ca> | 2010-12-30 00:21:47 -0500 |
commit | d392da5207352f09030e95d9ea335a4225667ec0 (patch) | |
tree | 7d6cd1932afcad0a5619a5c504a6d93ca318187c /Documentation/filesystems | |
parent | e39d5ef678045d61812c1401f04fe8edb14d6359 (diff) | |
parent | 387c31c7e5c9805b0aef8833d1731a5fe7bdea14 (diff) |
Merge v2.6.37-rc8 into powerpc/next
Diffstat (limited to 'Documentation/filesystems')
-rw-r--r-- | Documentation/filesystems/00-INDEX | 2 | ||||
-rw-r--r-- | Documentation/filesystems/9p.txt | 4 | ||||
-rw-r--r-- | Documentation/filesystems/Locking | 70 | ||||
-rw-r--r-- | Documentation/filesystems/configfs/configfs_example_explicit.c | 2 | ||||
-rw-r--r-- | Documentation/filesystems/ext4.txt | 14 | ||||
-rw-r--r-- | Documentation/filesystems/nfs/00-INDEX | 4 | ||||
-rw-r--r-- | Documentation/filesystems/nfs/idmapper.txt | 67 | ||||
-rw-r--r-- | Documentation/filesystems/nfs/nfsroot.txt | 22 | ||||
-rw-r--r-- | Documentation/filesystems/nfs/pnfs.txt | 48 | ||||
-rw-r--r-- | Documentation/filesystems/ocfs2.txt | 7 | ||||
-rw-r--r-- | Documentation/filesystems/porting | 45 | ||||
-rw-r--r-- | Documentation/filesystems/proc.txt | 25 | ||||
-rw-r--r-- | Documentation/filesystems/sharedsubtree.txt | 4 | ||||
-rw-r--r-- | Documentation/filesystems/smbfs.txt | 8 | ||||
-rw-r--r-- | Documentation/filesystems/squashfs.txt | 2 | ||||
-rw-r--r-- | Documentation/filesystems/vfs.txt | 22 | ||||
-rw-r--r-- | Documentation/filesystems/xfs-delayed-logging-design.txt | 11 |
17 files changed, 286 insertions, 71 deletions
diff --git a/Documentation/filesystems/00-INDEX b/Documentation/filesystems/00-INDEX index 4303614b5add..8c624a18f67d 100644 --- a/Documentation/filesystems/00-INDEX +++ b/Documentation/filesystems/00-INDEX | |||
@@ -96,8 +96,6 @@ seq_file.txt | |||
96 | - how to use the seq_file API | 96 | - how to use the seq_file API |
97 | sharedsubtree.txt | 97 | sharedsubtree.txt |
98 | - a description of shared subtrees for namespaces. | 98 | - a description of shared subtrees for namespaces. |
99 | smbfs.txt | ||
100 | - info on using filesystems with the SMB protocol (Win 3.11 and NT). | ||
101 | spufs.txt | 99 | spufs.txt |
102 | - info and mount options for the SPU filesystem used on Cell. | 100 | - info and mount options for the SPU filesystem used on Cell. |
103 | sysfs-pci.txt | 101 | sysfs-pci.txt |
diff --git a/Documentation/filesystems/9p.txt b/Documentation/filesystems/9p.txt index f9765e8cf086..b22abba78fed 100644 --- a/Documentation/filesystems/9p.txt +++ b/Documentation/filesystems/9p.txt | |||
@@ -111,7 +111,7 @@ OPTIONS | |||
111 | This can be used to share devices/named pipes/sockets between | 111 | This can be used to share devices/named pipes/sockets between |
112 | hosts. This functionality will be expanded in later versions. | 112 | hosts. This functionality will be expanded in later versions. |
113 | 113 | ||
114 | access there are three access modes. | 114 | access there are four access modes. |
115 | user = if a user tries to access a file on v9fs | 115 | user = if a user tries to access a file on v9fs |
116 | filesystem for the first time, v9fs sends an | 116 | filesystem for the first time, v9fs sends an |
117 | attach command (Tattach) for that user. | 117 | attach command (Tattach) for that user. |
@@ -120,6 +120,8 @@ OPTIONS | |||
120 | the files on the mounted filesystem | 120 | the files on the mounted filesystem |
121 | any = v9fs does single attach and performs all | 121 | any = v9fs does single attach and performs all |
122 | operations as one user | 122 | operations as one user |
123 | client = ACL based access check on the 9p client | ||
124 | side for access validation | ||
123 | 125 | ||
124 | cachetag cache tag to use the specified persistent cache. | 126 | cachetag cache tag to use the specified persistent cache. |
125 | cache tags for existing cache sessions can be listed at | 127 | cache tags for existing cache sessions can be listed at |
diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index 96d4293607ec..b6426f15b4ae 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking | |||
@@ -92,8 +92,8 @@ prototypes: | |||
92 | void (*destroy_inode)(struct inode *); | 92 | void (*destroy_inode)(struct inode *); |
93 | void (*dirty_inode) (struct inode *); | 93 | void (*dirty_inode) (struct inode *); |
94 | int (*write_inode) (struct inode *, int); | 94 | int (*write_inode) (struct inode *, int); |
95 | void (*drop_inode) (struct inode *); | 95 | int (*drop_inode) (struct inode *); |
96 | void (*delete_inode) (struct inode *); | 96 | void (*evict_inode) (struct inode *); |
97 | void (*put_super) (struct super_block *); | 97 | void (*put_super) (struct super_block *); |
98 | void (*write_super) (struct super_block *); | 98 | void (*write_super) (struct super_block *); |
99 | int (*sync_fs)(struct super_block *sb, int wait); | 99 | int (*sync_fs)(struct super_block *sb, int wait); |
@@ -101,14 +101,13 @@ prototypes: | |||
101 | int (*unfreeze_fs) (struct super_block *); | 101 | int (*unfreeze_fs) (struct super_block *); |
102 | int (*statfs) (struct dentry *, struct kstatfs *); | 102 | int (*statfs) (struct dentry *, struct kstatfs *); |
103 | int (*remount_fs) (struct super_block *, int *, char *); | 103 | int (*remount_fs) (struct super_block *, int *, char *); |
104 | void (*clear_inode) (struct inode *); | ||
105 | void (*umount_begin) (struct super_block *); | 104 | void (*umount_begin) (struct super_block *); |
106 | int (*show_options)(struct seq_file *, struct vfsmount *); | 105 | int (*show_options)(struct seq_file *, struct vfsmount *); |
107 | ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t); | 106 | ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t); |
108 | ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t); | 107 | ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t); |
109 | 108 | ||
110 | locking rules: | 109 | locking rules: |
111 | All may block. | 110 | All may block [not true, see below] |
112 | None have BKL | 111 | None have BKL |
113 | s_umount | 112 | s_umount |
114 | alloc_inode: | 113 | alloc_inode: |
@@ -116,22 +115,25 @@ destroy_inode: | |||
116 | dirty_inode: (must not sleep) | 115 | dirty_inode: (must not sleep) |
117 | write_inode: | 116 | write_inode: |
118 | drop_inode: !!!inode_lock!!! | 117 | drop_inode: !!!inode_lock!!! |
119 | delete_inode: | 118 | evict_inode: |
120 | put_super: write | 119 | put_super: write |
121 | write_super: read | 120 | write_super: read |
122 | sync_fs: read | 121 | sync_fs: read |
123 | freeze_fs: read | 122 | freeze_fs: read |
124 | unfreeze_fs: read | 123 | unfreeze_fs: read |
125 | statfs: no | 124 | statfs: maybe(read) (see below) |
126 | remount_fs: maybe (see below) | 125 | remount_fs: write |
127 | clear_inode: | ||
128 | umount_begin: no | 126 | umount_begin: no |
129 | show_options: no (namespace_sem) | 127 | show_options: no (namespace_sem) |
130 | quota_read: no (see below) | 128 | quota_read: no (see below) |
131 | quota_write: no (see below) | 129 | quota_write: no (see below) |
132 | 130 | ||
133 | ->remount_fs() will have the s_umount exclusive lock if it's already mounted. | 131 | ->statfs() has s_umount (shared) when called by ustat(2) (native or |
134 | When called from get_sb_single, it does NOT have the s_umount lock. | 132 | compat), but that's an accident of bad API; s_umount is used to pin |
133 | the superblock down when we only have dev_t given us by userland to | ||
134 | identify the superblock. Everything else (statfs(), fstatfs(), etc.) | ||
135 | doesn't hold it when calling ->statfs() - superblock is pinned down | ||
136 | by resolving the pathname passed to syscall. | ||
135 | ->quota_read() and ->quota_write() functions are both guaranteed to | 137 | ->quota_read() and ->quota_write() functions are both guaranteed to |
136 | be the only ones operating on the quota file by the quota code (via | 138 | be the only ones operating on the quota file by the quota code (via |
137 | dqio_sem) (unless an admin really wants to screw up something and | 139 | dqio_sem) (unless an admin really wants to screw up something and |
@@ -171,12 +173,13 @@ prototypes: | |||
171 | sector_t (*bmap)(struct address_space *, sector_t); | 173 | sector_t (*bmap)(struct address_space *, sector_t); |
172 | int (*invalidatepage) (struct page *, unsigned long); | 174 | int (*invalidatepage) (struct page *, unsigned long); |
173 | int (*releasepage) (struct page *, int); | 175 | int (*releasepage) (struct page *, int); |
176 | void (*freepage)(struct page *); | ||
174 | int (*direct_IO)(int, struct kiocb *, const struct iovec *iov, | 177 | int (*direct_IO)(int, struct kiocb *, const struct iovec *iov, |
175 | loff_t offset, unsigned long nr_segs); | 178 | loff_t offset, unsigned long nr_segs); |
176 | int (*launder_page) (struct page *); | 179 | int (*launder_page) (struct page *); |
177 | 180 | ||
178 | locking rules: | 181 | locking rules: |
179 | All except set_page_dirty may block | 182 | All except set_page_dirty and freepage may block |
180 | 183 | ||
181 | BKL PageLocked(page) i_mutex | 184 | BKL PageLocked(page) i_mutex |
182 | writepage: no yes, unlocks (see below) | 185 | writepage: no yes, unlocks (see below) |
@@ -191,6 +194,7 @@ perform_write: no n/a yes | |||
191 | bmap: no | 194 | bmap: no |
192 | invalidatepage: no yes | 195 | invalidatepage: no yes |
193 | releasepage: no yes | 196 | releasepage: no yes |
197 | freepage: no yes | ||
194 | direct_IO: no | 198 | direct_IO: no |
195 | launder_page: no yes | 199 | launder_page: no yes |
196 | 200 | ||
@@ -286,6 +290,9 @@ buffers from the page in preparation for freeing it. It returns zero to | |||
286 | indicate that the buffers are (or may be) freeable. If ->releasepage is zero, | 290 | indicate that the buffers are (or may be) freeable. If ->releasepage is zero, |
287 | the kernel assumes that the fs has no private interest in the buffers. | 291 | the kernel assumes that the fs has no private interest in the buffers. |
288 | 292 | ||
293 | ->freepage() is called when the kernel is done dropping the page | ||
294 | from the page cache. | ||
295 | |||
289 | ->launder_page() may be called prior to releasing a page if | 296 | ->launder_page() may be called prior to releasing a page if |
290 | it is still found to be dirty. It returns zero if the page was successfully | 297 | it is still found to be dirty. It returns zero if the page was successfully |
291 | cleaned, or an error value if not. Note that in order to prevent the page | 298 | cleaned, or an error value if not. Note that in order to prevent the page |
@@ -320,7 +327,6 @@ fl_release_private: yes yes | |||
320 | prototypes: | 327 | prototypes: |
321 | int (*fl_compare_owner)(struct file_lock *, struct file_lock *); | 328 | int (*fl_compare_owner)(struct file_lock *, struct file_lock *); |
322 | void (*fl_notify)(struct file_lock *); /* unblock callback */ | 329 | void (*fl_notify)(struct file_lock *); /* unblock callback */ |
323 | void (*fl_copy_lock)(struct file_lock *, struct file_lock *); | ||
324 | void (*fl_release_private)(struct file_lock *); | 330 | void (*fl_release_private)(struct file_lock *); |
325 | void (*fl_break)(struct file_lock *); /* break_lease callback */ | 331 | void (*fl_break)(struct file_lock *); /* break_lease callback */ |
326 | 332 | ||
@@ -328,7 +334,6 @@ locking rules: | |||
328 | BKL may block | 334 | BKL may block |
329 | fl_compare_owner: yes no | 335 | fl_compare_owner: yes no |
330 | fl_notify: yes no | 336 | fl_notify: yes no |
331 | fl_copy_lock: yes no | ||
332 | fl_release_private: yes yes | 337 | fl_release_private: yes yes |
333 | fl_break: yes no | 338 | fl_break: yes no |
334 | 339 | ||
@@ -347,21 +352,36 @@ call this method upon the IO completion. | |||
347 | 352 | ||
348 | --------------------------- block_device_operations ----------------------- | 353 | --------------------------- block_device_operations ----------------------- |
349 | prototypes: | 354 | prototypes: |
350 | int (*open) (struct inode *, struct file *); | 355 | int (*open) (struct block_device *, fmode_t); |
351 | int (*release) (struct inode *, struct file *); | 356 | int (*release) (struct gendisk *, fmode_t); |
352 | int (*ioctl) (struct inode *, struct file *, unsigned, unsigned long); | 357 | int (*ioctl) (struct block_device *, fmode_t, unsigned, unsigned long); |
358 | int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long); | ||
359 | int (*direct_access) (struct block_device *, sector_t, void **, unsigned long *); | ||
353 | int (*media_changed) (struct gendisk *); | 360 | int (*media_changed) (struct gendisk *); |
361 | void (*unlock_native_capacity) (struct gendisk *); | ||
354 | int (*revalidate_disk) (struct gendisk *); | 362 | int (*revalidate_disk) (struct gendisk *); |
363 | int (*getgeo)(struct block_device *, struct hd_geometry *); | ||
364 | void (*swap_slot_free_notify) (struct block_device *, unsigned long); | ||
355 | 365 | ||
356 | locking rules: | 366 | locking rules: |
357 | BKL bd_sem | 367 | BKL bd_mutex |
358 | open: yes yes | 368 | open: no yes |
359 | release: yes yes | 369 | release: no yes |
360 | ioctl: yes no | 370 | ioctl: no no |
371 | compat_ioctl: no no | ||
372 | direct_access: no no | ||
361 | media_changed: no no | 373 | media_changed: no no |
374 | unlock_native_capacity: no no | ||
362 | revalidate_disk: no no | 375 | revalidate_disk: no no |
376 | getgeo: no no | ||
377 | swap_slot_free_notify: no no (see below) | ||
378 | |||
379 | media_changed, unlock_native_capacity and revalidate_disk are called only from | ||
380 | check_disk_change(). | ||
381 | |||
382 | swap_slot_free_notify is called with swap_lock and sometimes the page lock | ||
383 | held. | ||
363 | 384 | ||
364 | The last two are called only from check_disk_change(). | ||
365 | 385 | ||
366 | --------------------------- file_operations ------------------------------- | 386 | --------------------------- file_operations ------------------------------- |
367 | prototypes: | 387 | prototypes: |
@@ -372,8 +392,6 @@ prototypes: | |||
372 | ssize_t (*aio_write) (struct kiocb *, const struct iovec *, unsigned long, loff_t); | 392 | ssize_t (*aio_write) (struct kiocb *, const struct iovec *, unsigned long, loff_t); |
373 | int (*readdir) (struct file *, void *, filldir_t); | 393 | int (*readdir) (struct file *, void *, filldir_t); |
374 | unsigned int (*poll) (struct file *, struct poll_table_struct *); | 394 | unsigned int (*poll) (struct file *, struct poll_table_struct *); |
375 | int (*ioctl) (struct inode *, struct file *, unsigned int, | ||
376 | unsigned long); | ||
377 | long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long); | 395 | long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long); |
378 | long (*compat_ioctl) (struct file *, unsigned int, unsigned long); | 396 | long (*compat_ioctl) (struct file *, unsigned int, unsigned long); |
379 | int (*mmap) (struct file *, struct vm_area_struct *); | 397 | int (*mmap) (struct file *, struct vm_area_struct *); |
@@ -407,8 +425,7 @@ write: no | |||
407 | aio_write: no | 425 | aio_write: no |
408 | readdir: no | 426 | readdir: no |
409 | poll: no | 427 | poll: no |
410 | ioctl: yes (see below) | 428 | unlocked_ioctl: no |
411 | unlocked_ioctl: no (see below) | ||
412 | compat_ioctl: no | 429 | compat_ioctl: no |
413 | mmap: no | 430 | mmap: no |
414 | open: no | 431 | open: no |
@@ -451,9 +468,6 @@ move ->readdir() to inode_operations and use a separate method for directory | |||
451 | anything that resembles union-mount we won't have a struct file for all | 468 | anything that resembles union-mount we won't have a struct file for all |
452 | components. And there are other reasons why the current interface is a mess... | 469 | components. And there are other reasons why the current interface is a mess... |
453 | 470 | ||
454 | ->ioctl() on regular files is superceded by the ->unlocked_ioctl() that | ||
455 | doesn't take the BKL. | ||
456 | |||
457 | ->read on directories probably must go away - we should just enforce -EISDIR | 471 | ->read on directories probably must go away - we should just enforce -EISDIR |
458 | in sys_read() and friends. | 472 | in sys_read() and friends. |
459 | 473 | ||
diff --git a/Documentation/filesystems/configfs/configfs_example_explicit.c b/Documentation/filesystems/configfs/configfs_example_explicit.c index d428cc9f07f3..fd53869f5633 100644 --- a/Documentation/filesystems/configfs/configfs_example_explicit.c +++ b/Documentation/filesystems/configfs/configfs_example_explicit.c | |||
@@ -89,7 +89,7 @@ static ssize_t childless_storeme_write(struct childless *childless, | |||
89 | char *p = (char *) page; | 89 | char *p = (char *) page; |
90 | 90 | ||
91 | tmp = simple_strtoul(p, &p, 10); | 91 | tmp = simple_strtoul(p, &p, 10); |
92 | if (!p || (*p && (*p != '\n'))) | 92 | if ((*p != '\0') && (*p != '\n')) |
93 | return -EINVAL; | 93 | return -EINVAL; |
94 | 94 | ||
95 | if (tmp > INT_MAX) | 95 | if (tmp > INT_MAX) |
diff --git a/Documentation/filesystems/ext4.txt b/Documentation/filesystems/ext4.txt index e1def1786e50..6ab9442d7eeb 100644 --- a/Documentation/filesystems/ext4.txt +++ b/Documentation/filesystems/ext4.txt | |||
@@ -353,6 +353,20 @@ noauto_da_alloc replacing existing files via patterns such as | |||
353 | system crashes before the delayed allocation | 353 | system crashes before the delayed allocation |
354 | blocks are forced to disk. | 354 | blocks are forced to disk. |
355 | 355 | ||
356 | noinit_itable Do not initialize any uninitialized inode table | ||
357 | blocks in the background. This feature may be | ||
358 | used by installation CD's so that the install | ||
359 | process can complete as quickly as possible; the | ||
360 | inode table initialization process would then be | ||
361 | deferred until the next time the file system | ||
362 | is unmounted. | ||
363 | |||
364 | init_itable=n The lazy itable init code will wait n times the | ||
365 | number of milliseconds it took to zero out the | ||
366 | previous block group's inode table. This | ||
367 | minimizes the impact on the systme performance | ||
368 | while file system's inode table is being initialized. | ||
369 | |||
356 | discard Controls whether ext4 should issue discard/TRIM | 370 | discard Controls whether ext4 should issue discard/TRIM |
357 | nodiscard(*) commands to the underlying block device when | 371 | nodiscard(*) commands to the underlying block device when |
358 | blocks are freed. This is useful for SSD devices | 372 | blocks are freed. This is useful for SSD devices |
diff --git a/Documentation/filesystems/nfs/00-INDEX b/Documentation/filesystems/nfs/00-INDEX index 2f68cd688769..a57e12411d2a 100644 --- a/Documentation/filesystems/nfs/00-INDEX +++ b/Documentation/filesystems/nfs/00-INDEX | |||
@@ -12,5 +12,9 @@ nfs-rdma.txt | |||
12 | - how to install and setup the Linux NFS/RDMA client and server software | 12 | - how to install and setup the Linux NFS/RDMA client and server software |
13 | nfsroot.txt | 13 | nfsroot.txt |
14 | - short guide on setting up a diskless box with NFS root filesystem. | 14 | - short guide on setting up a diskless box with NFS root filesystem. |
15 | pnfs.txt | ||
16 | - short explanation of some of the internals of the pnfs client code | ||
15 | rpc-cache.txt | 17 | rpc-cache.txt |
16 | - introduction to the caching mechanisms in the sunrpc layer. | 18 | - introduction to the caching mechanisms in the sunrpc layer. |
19 | idmapper.txt | ||
20 | - information for configuring request-keys to be used by idmapper | ||
diff --git a/Documentation/filesystems/nfs/idmapper.txt b/Documentation/filesystems/nfs/idmapper.txt new file mode 100644 index 000000000000..b9b4192ea8b5 --- /dev/null +++ b/Documentation/filesystems/nfs/idmapper.txt | |||
@@ -0,0 +1,67 @@ | |||
1 | |||
2 | ========= | ||
3 | ID Mapper | ||
4 | ========= | ||
5 | Id mapper is used by NFS to translate user and group ids into names, and to | ||
6 | translate user and group names into ids. Part of this translation involves | ||
7 | performing an upcall to userspace to request the information. Id mapper will | ||
8 | user request-key to perform this upcall and cache the result. The program | ||
9 | /usr/sbin/nfs.idmap should be called by request-key, and will perform the | ||
10 | translation and initialize a key with the resulting information. | ||
11 | |||
12 | NFS_USE_NEW_IDMAPPER must be selected when configuring the kernel to use this | ||
13 | feature. | ||
14 | |||
15 | =========== | ||
16 | Configuring | ||
17 | =========== | ||
18 | The file /etc/request-key.conf will need to be modified so /sbin/request-key can | ||
19 | direct the upcall. The following line should be added: | ||
20 | |||
21 | #OP TYPE DESCRIPTION CALLOUT INFO PROGRAM ARG1 ARG2 ARG3 ... | ||
22 | #====== ======= =============== =============== =============================== | ||
23 | create id_resolver * * /usr/sbin/nfs.idmap %k %d 600 | ||
24 | |||
25 | This will direct all id_resolver requests to the program /usr/sbin/nfs.idmap. | ||
26 | The last parameter, 600, defines how many seconds into the future the key will | ||
27 | expire. This parameter is optional for /usr/sbin/nfs.idmap. When the timeout | ||
28 | is not specified, nfs.idmap will default to 600 seconds. | ||
29 | |||
30 | id mapper uses for key descriptions: | ||
31 | uid: Find the UID for the given user | ||
32 | gid: Find the GID for the given group | ||
33 | user: Find the user name for the given UID | ||
34 | group: Find the group name for the given GID | ||
35 | |||
36 | You can handle any of these individually, rather than using the generic upcall | ||
37 | program. If you would like to use your own program for a uid lookup then you | ||
38 | would edit your request-key.conf so it look similar to this: | ||
39 | |||
40 | #OP TYPE DESCRIPTION CALLOUT INFO PROGRAM ARG1 ARG2 ARG3 ... | ||
41 | #====== ======= =============== =============== =============================== | ||
42 | create id_resolver uid:* * /some/other/program %k %d 600 | ||
43 | create id_resolver * * /usr/sbin/nfs.idmap %k %d 600 | ||
44 | |||
45 | Notice that the new line was added above the line for the generic program. | ||
46 | request-key will find the first matching line and corresponding program. In | ||
47 | this case, /some/other/program will handle all uid lookups and | ||
48 | /usr/sbin/nfs.idmap will handle gid, user, and group lookups. | ||
49 | |||
50 | See <file:Documentation/keys-request-keys.txt> for more information about the | ||
51 | request-key function. | ||
52 | |||
53 | |||
54 | ========= | ||
55 | nfs.idmap | ||
56 | ========= | ||
57 | nfs.idmap is designed to be called by request-key, and should not be run "by | ||
58 | hand". This program takes two arguments, a serialized key and a key | ||
59 | description. The serialized key is first converted into a key_serial_t, and | ||
60 | then passed as an argument to keyctl_instantiate (both are part of keyutils.h). | ||
61 | |||
62 | The actual lookups are performed by functions found in nfsidmap.h. nfs.idmap | ||
63 | determines the correct function to call by looking at the first part of the | ||
64 | description string. For example, a uid lookup description will appear as | ||
65 | "uid:user@domain". | ||
66 | |||
67 | nfs.idmap will return 0 if the key was instantiated, and non-zero otherwise. | ||
diff --git a/Documentation/filesystems/nfs/nfsroot.txt b/Documentation/filesystems/nfs/nfsroot.txt index f2430a7974e1..90c71c6f0d00 100644 --- a/Documentation/filesystems/nfs/nfsroot.txt +++ b/Documentation/filesystems/nfs/nfsroot.txt | |||
@@ -159,6 +159,28 @@ ip=<client-ip>:<server-ip>:<gw-ip>:<netmask>:<hostname>:<device>:<autoconf> | |||
159 | Default: any | 159 | Default: any |
160 | 160 | ||
161 | 161 | ||
162 | nfsrootdebug | ||
163 | |||
164 | This parameter enables debugging messages to appear in the kernel | ||
165 | log at boot time so that administrators can verify that the correct | ||
166 | NFS mount options, server address, and root path are passed to the | ||
167 | NFS client. | ||
168 | |||
169 | |||
170 | rdinit=<executable file> | ||
171 | |||
172 | To specify which file contains the program that starts system | ||
173 | initialization, administrators can use this command line parameter. | ||
174 | The default value of this parameter is "/init". If the specified | ||
175 | file exists and the kernel can execute it, root filesystem related | ||
176 | kernel command line parameters, including `nfsroot=', are ignored. | ||
177 | |||
178 | A description of the process of mounting the root file system can be | ||
179 | found in: | ||
180 | |||
181 | Documentation/early-userspace/README | ||
182 | |||
183 | |||
162 | 184 | ||
163 | 185 | ||
164 | 3.) Boot Loader | 186 | 3.) Boot Loader |
diff --git a/Documentation/filesystems/nfs/pnfs.txt b/Documentation/filesystems/nfs/pnfs.txt new file mode 100644 index 000000000000..bc0b9cfe095b --- /dev/null +++ b/Documentation/filesystems/nfs/pnfs.txt | |||
@@ -0,0 +1,48 @@ | |||
1 | Reference counting in pnfs: | ||
2 | ========================== | ||
3 | |||
4 | The are several inter-related caches. We have layouts which can | ||
5 | reference multiple devices, each of which can reference multiple data servers. | ||
6 | Each data server can be referenced by multiple devices. Each device | ||
7 | can be referenced by multiple layouts. To keep all of this straight, | ||
8 | we need to reference count. | ||
9 | |||
10 | |||
11 | struct pnfs_layout_hdr | ||
12 | ---------------------- | ||
13 | The on-the-wire command LAYOUTGET corresponds to struct | ||
14 | pnfs_layout_segment, usually referred to by the variable name lseg. | ||
15 | Each nfs_inode may hold a pointer to a cache of of these layout | ||
16 | segments in nfsi->layout, of type struct pnfs_layout_hdr. | ||
17 | |||
18 | We reference the header for the inode pointing to it, across each | ||
19 | outstanding RPC call that references it (LAYOUTGET, LAYOUTRETURN, | ||
20 | LAYOUTCOMMIT), and for each lseg held within. | ||
21 | |||
22 | Each header is also (when non-empty) put on a list associated with | ||
23 | struct nfs_client (cl_layouts). Being put on this list does not bump | ||
24 | the reference count, as the layout is kept around by the lseg that | ||
25 | keeps it in the list. | ||
26 | |||
27 | deviceid_cache | ||
28 | -------------- | ||
29 | lsegs reference device ids, which are resolved per nfs_client and | ||
30 | layout driver type. The device ids are held in a RCU cache (struct | ||
31 | nfs4_deviceid_cache). The cache itself is referenced across each | ||
32 | mount. The entries (struct nfs4_deviceid) themselves are held across | ||
33 | the lifetime of each lseg referencing them. | ||
34 | |||
35 | RCU is used because the deviceid is basically a write once, read many | ||
36 | data structure. The hlist size of 32 buckets needs better | ||
37 | justification, but seems reasonable given that we can have multiple | ||
38 | deviceid's per filesystem, and multiple filesystems per nfs_client. | ||
39 | |||
40 | The hash code is copied from the nfsd code base. A discussion of | ||
41 | hashing and variations of this algorithm can be found at: | ||
42 | http://groups.google.com/group/comp.lang.c/browse_thread/thread/9522965e2b8d3809 | ||
43 | |||
44 | data server cache | ||
45 | ----------------- | ||
46 | file driver devices refer to data servers, which are kept in a module | ||
47 | level cache. Its reference is held over the lifetime of the deviceid | ||
48 | pointing to it. | ||
diff --git a/Documentation/filesystems/ocfs2.txt b/Documentation/filesystems/ocfs2.txt index 1f7ae144f6d8..5393e6611691 100644 --- a/Documentation/filesystems/ocfs2.txt +++ b/Documentation/filesystems/ocfs2.txt | |||
@@ -87,3 +87,10 @@ dir_resv_level= (*) By default, directory reservations will scale with file | |||
87 | reservations - users should rarely need to change this | 87 | reservations - users should rarely need to change this |
88 | value. If allocation reservations are turned off, this | 88 | value. If allocation reservations are turned off, this |
89 | option will have no effect. | 89 | option will have no effect. |
90 | coherency=full (*) Disallow concurrent O_DIRECT writes, cluster inode | ||
91 | lock will be taken to force other nodes drop cache, | ||
92 | therefore full cluster coherency is guaranteed even | ||
93 | for O_DIRECT writes. | ||
94 | coherency=buffered Allow concurrent O_DIRECT writes without EX lock among | ||
95 | nodes, which gains high performance at risk of getting | ||
96 | stale data on other nodes. | ||
diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting index a7e9746ee7ea..b12c89538680 100644 --- a/Documentation/filesystems/porting +++ b/Documentation/filesystems/porting | |||
@@ -273,3 +273,48 @@ it's safe to remove it. If you don't need it, remove it. | |||
273 | deliberate; as soon as struct block_device * is propagated in a reasonable | 273 | deliberate; as soon as struct block_device * is propagated in a reasonable |
274 | way by that code fixing will become trivial; until then nothing can be | 274 | way by that code fixing will become trivial; until then nothing can be |
275 | done. | 275 | done. |
276 | |||
277 | [mandatory] | ||
278 | |||
279 | block truncatation on error exit from ->write_begin, and ->direct_IO | ||
280 | moved from generic methods (block_write_begin, cont_write_begin, | ||
281 | nobh_write_begin, blockdev_direct_IO*) to callers. Take a look at | ||
282 | ext2_write_failed and callers for an example. | ||
283 | |||
284 | [mandatory] | ||
285 | |||
286 | ->truncate is going away. The whole truncate sequence needs to be | ||
287 | implemented in ->setattr, which is now mandatory for filesystems | ||
288 | implementing on-disk size changes. Start with a copy of the old inode_setattr | ||
289 | and vmtruncate, and the reorder the vmtruncate + foofs_vmtruncate sequence to | ||
290 | be in order of zeroing blocks using block_truncate_page or similar helpers, | ||
291 | size update and on finally on-disk truncation which should not fail. | ||
292 | inode_change_ok now includes the size checks for ATTR_SIZE and must be called | ||
293 | in the beginning of ->setattr unconditionally. | ||
294 | |||
295 | [mandatory] | ||
296 | |||
297 | ->clear_inode() and ->delete_inode() are gone; ->evict_inode() should | ||
298 | be used instead. It gets called whenever the inode is evicted, whether it has | ||
299 | remaining links or not. Caller does *not* evict the pagecache or inode-associated | ||
300 | metadata buffers; getting rid of those is responsibility of method, as it had | ||
301 | been for ->delete_inode(). | ||
302 | ->drop_inode() returns int now; it's called on final iput() with inode_lock | ||
303 | held and it returns true if filesystems wants the inode to be dropped. As before, | ||
304 | generic_drop_inode() is still the default and it's been updated appropriately. | ||
305 | generic_delete_inode() is also alive and it consists simply of return 1. Note that | ||
306 | all actual eviction work is done by caller after ->drop_inode() returns. | ||
307 | clear_inode() is gone; use end_writeback() instead. As before, it must | ||
308 | be called exactly once on each call of ->evict_inode() (as it used to be for | ||
309 | each call of ->delete_inode()). Unlike before, if you are using inode-associated | ||
310 | metadata buffers (i.e. mark_buffer_dirty_inode()), it's your responsibility to | ||
311 | call invalidate_inode_buffers() before end_writeback(). | ||
312 | No async writeback (and thus no calls of ->write_inode()) will happen | ||
313 | after end_writeback() returns, so actions that should not overlap with ->write_inode() | ||
314 | (e.g. freeing on-disk inode if i_nlink is 0) ought to be done after that call. | ||
315 | |||
316 | NOTE: checking i_nlink in the beginning of ->write_inode() and bailing out | ||
317 | if it's zero is not *and* *never* *had* *been* enough. Final unlink() and iput() | ||
318 | may happen while the inode is in the middle of ->write_inode(); e.g. if you blindly | ||
319 | free the on-disk inode, you may end up doing that while ->write_inode() is writing | ||
320 | to it. | ||
diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt index a6aca8740883..e73df2722ff3 100644 --- a/Documentation/filesystems/proc.txt +++ b/Documentation/filesystems/proc.txt | |||
@@ -136,6 +136,7 @@ Table 1-1: Process specific entries in /proc | |||
136 | statm Process memory status information | 136 | statm Process memory status information |
137 | status Process status in human readable form | 137 | status Process status in human readable form |
138 | wchan If CONFIG_KALLSYMS is set, a pre-decoded wchan | 138 | wchan If CONFIG_KALLSYMS is set, a pre-decoded wchan |
139 | pagemap Page table | ||
139 | stack Report full stack trace, enable via CONFIG_STACKTRACE | 140 | stack Report full stack trace, enable via CONFIG_STACKTRACE |
140 | smaps a extension based on maps, showing the memory consumption of | 141 | smaps a extension based on maps, showing the memory consumption of |
141 | each mapping | 142 | each mapping |
@@ -370,17 +371,24 @@ Shared_Dirty: 0 kB | |||
370 | Private_Clean: 0 kB | 371 | Private_Clean: 0 kB |
371 | Private_Dirty: 0 kB | 372 | Private_Dirty: 0 kB |
372 | Referenced: 892 kB | 373 | Referenced: 892 kB |
374 | Anonymous: 0 kB | ||
373 | Swap: 0 kB | 375 | Swap: 0 kB |
374 | KernelPageSize: 4 kB | 376 | KernelPageSize: 4 kB |
375 | MMUPageSize: 4 kB | 377 | MMUPageSize: 4 kB |
376 | 378 | ||
377 | The first of these lines shows the same information as is displayed for the | 379 | The first of these lines shows the same information as is displayed for the |
378 | mapping in /proc/PID/maps. The remaining lines show the size of the mapping, | 380 | mapping in /proc/PID/maps. The remaining lines show the size of the mapping |
379 | the amount of the mapping that is currently resident in RAM, the "proportional | 381 | (size), the amount of the mapping that is currently resident in RAM (RSS), the |
380 | set size” (divide each shared page by the number of processes sharing it), the | 382 | process' proportional share of this mapping (PSS), the number of clean and |
381 | number of clean and dirty shared pages in the mapping, and the number of clean | 383 | dirty private pages in the mapping. Note that even a page which is part of a |
382 | and dirty private pages in the mapping. The "Referenced" indicates the amount | 384 | MAP_SHARED mapping, but has only a single pte mapped, i.e. is currently used |
383 | of memory currently marked as referenced or accessed. | 385 | by only one process, is accounted as private and not as shared. "Referenced" |
386 | indicates the amount of memory currently marked as referenced or accessed. | ||
387 | "Anonymous" shows the amount of memory that does not belong to any file. Even | ||
388 | a mapping associated with a file may contain anonymous pages: when MAP_PRIVATE | ||
389 | and a page is modified, the file page is replaced by a private anonymous copy. | ||
390 | "Swap" shows how much would-be-anonymous memory is also used, but out on | ||
391 | swap. | ||
384 | 392 | ||
385 | This file is only present if the CONFIG_MMU kernel configuration option is | 393 | This file is only present if the CONFIG_MMU kernel configuration option is |
386 | enabled. | 394 | enabled. |
@@ -397,6 +405,9 @@ To clear the bits for the file mapped pages associated with the process | |||
397 | > echo 3 > /proc/PID/clear_refs | 405 | > echo 3 > /proc/PID/clear_refs |
398 | Any other value written to /proc/PID/clear_refs will have no effect. | 406 | Any other value written to /proc/PID/clear_refs will have no effect. |
399 | 407 | ||
408 | The /proc/pid/pagemap gives the PFN, which can be used to find the pageflags | ||
409 | using /proc/kpageflags and number of times a page is mapped using | ||
410 | /proc/kpagecount. For detailed explanation, see Documentation/vm/pagemap.txt. | ||
400 | 411 | ||
401 | 1.2 Kernel data | 412 | 1.2 Kernel data |
402 | --------------- | 413 | --------------- |
diff --git a/Documentation/filesystems/sharedsubtree.txt b/Documentation/filesystems/sharedsubtree.txt index fc0e39af43c3..4ede421c9687 100644 --- a/Documentation/filesystems/sharedsubtree.txt +++ b/Documentation/filesystems/sharedsubtree.txt | |||
@@ -62,10 +62,10 @@ replicas continue to be exactly same. | |||
62 | # mount /dev/sd0 /tmp/a | 62 | # mount /dev/sd0 /tmp/a |
63 | 63 | ||
64 | #ls /tmp/a | 64 | #ls /tmp/a |
65 | t1 t2 t2 | 65 | t1 t2 t3 |
66 | 66 | ||
67 | #ls /mnt/a | 67 | #ls /mnt/a |
68 | t1 t2 t2 | 68 | t1 t2 t3 |
69 | 69 | ||
70 | Note that the mount has propagated to the mount at /mnt as well. | 70 | Note that the mount has propagated to the mount at /mnt as well. |
71 | 71 | ||
diff --git a/Documentation/filesystems/smbfs.txt b/Documentation/filesystems/smbfs.txt deleted file mode 100644 index 194fb0decd2c..000000000000 --- a/Documentation/filesystems/smbfs.txt +++ /dev/null | |||
@@ -1,8 +0,0 @@ | |||
1 | Smbfs is a filesystem that implements the SMB protocol, which is the | ||
2 | protocol used by Windows for Workgroups, Windows 95 and Windows NT. | ||
3 | Smbfs was inspired by Samba, the program written by Andrew Tridgell | ||
4 | that turns any Unix host into a file server for DOS or Windows clients. | ||
5 | |||
6 | Smbfs is a SMB client, but uses parts of samba for its operation. For | ||
7 | more info on samba, including documentation, please go to | ||
8 | http://www.samba.org/ and then on to your nearest mirror. | ||
diff --git a/Documentation/filesystems/squashfs.txt b/Documentation/filesystems/squashfs.txt index 203f7202cc9e..66699afd66ca 100644 --- a/Documentation/filesystems/squashfs.txt +++ b/Documentation/filesystems/squashfs.txt | |||
@@ -2,7 +2,7 @@ SQUASHFS 4.0 FILESYSTEM | |||
2 | ======================= | 2 | ======================= |
3 | 3 | ||
4 | Squashfs is a compressed read-only filesystem for Linux. | 4 | Squashfs is a compressed read-only filesystem for Linux. |
5 | It uses zlib compression to compress files, inodes and directories. | 5 | It uses zlib/lzo compression to compress files, inodes and directories. |
6 | Inodes in the system are very small and all blocks are packed to minimise | 6 | Inodes in the system are very small and all blocks are packed to minimise |
7 | data overhead. Block sizes greater than 4K are supported up to a maximum | 7 | data overhead. Block sizes greater than 4K are supported up to a maximum |
8 | of 1Mbytes (default block size 128K). | 8 | of 1Mbytes (default block size 128K). |
diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt index 94677e7dcb13..20899e095e7e 100644 --- a/Documentation/filesystems/vfs.txt +++ b/Documentation/filesystems/vfs.txt | |||
@@ -534,6 +534,7 @@ struct address_space_operations { | |||
534 | sector_t (*bmap)(struct address_space *, sector_t); | 534 | sector_t (*bmap)(struct address_space *, sector_t); |
535 | int (*invalidatepage) (struct page *, unsigned long); | 535 | int (*invalidatepage) (struct page *, unsigned long); |
536 | int (*releasepage) (struct page *, int); | 536 | int (*releasepage) (struct page *, int); |
537 | void (*freepage)(struct page *); | ||
537 | ssize_t (*direct_IO)(int, struct kiocb *, const struct iovec *iov, | 538 | ssize_t (*direct_IO)(int, struct kiocb *, const struct iovec *iov, |
538 | loff_t offset, unsigned long nr_segs); | 539 | loff_t offset, unsigned long nr_segs); |
539 | struct page* (*get_xip_page)(struct address_space *, sector_t, | 540 | struct page* (*get_xip_page)(struct address_space *, sector_t, |
@@ -660,11 +661,10 @@ struct address_space_operations { | |||
660 | releasepage: releasepage is called on PagePrivate pages to indicate | 661 | releasepage: releasepage is called on PagePrivate pages to indicate |
661 | that the page should be freed if possible. ->releasepage | 662 | that the page should be freed if possible. ->releasepage |
662 | should remove any private data from the page and clear the | 663 | should remove any private data from the page and clear the |
663 | PagePrivate flag. It may also remove the page from the | 664 | PagePrivate flag. If releasepage() fails for some reason, it must |
664 | address_space. If this fails for some reason, it may indicate | 665 | indicate failure with a 0 return value. |
665 | failure with a 0 return value. | 666 | releasepage() is used in two distinct though related cases. The |
666 | This is used in two distinct though related cases. The first | 667 | first is when the VM finds a clean page with no active users and |
667 | is when the VM finds a clean page with no active users and | ||
668 | wants to make it a free page. If ->releasepage succeeds, the | 668 | wants to make it a free page. If ->releasepage succeeds, the |
669 | page will be removed from the address_space and become free. | 669 | page will be removed from the address_space and become free. |
670 | 670 | ||
@@ -679,6 +679,12 @@ struct address_space_operations { | |||
679 | need to ensure this. Possibly it can clear the PageUptodate | 679 | need to ensure this. Possibly it can clear the PageUptodate |
680 | bit if it cannot free private data yet. | 680 | bit if it cannot free private data yet. |
681 | 681 | ||
682 | freepage: freepage is called once the page is no longer visible in | ||
683 | the page cache in order to allow the cleanup of any private | ||
684 | data. Since it may be called by the memory reclaimer, it | ||
685 | should not assume that the original address_space mapping still | ||
686 | exists, and it should not block. | ||
687 | |||
682 | direct_IO: called by the generic read/write routines to perform | 688 | direct_IO: called by the generic read/write routines to perform |
683 | direct_IO - that is IO requests which bypass the page cache | 689 | direct_IO - that is IO requests which bypass the page cache |
684 | and transfer data directly between the storage and the | 690 | and transfer data directly between the storage and the |
@@ -727,7 +733,6 @@ struct file_operations { | |||
727 | ssize_t (*aio_write) (struct kiocb *, const struct iovec *, unsigned long, loff_t); | 733 | ssize_t (*aio_write) (struct kiocb *, const struct iovec *, unsigned long, loff_t); |
728 | int (*readdir) (struct file *, void *, filldir_t); | 734 | int (*readdir) (struct file *, void *, filldir_t); |
729 | unsigned int (*poll) (struct file *, struct poll_table_struct *); | 735 | unsigned int (*poll) (struct file *, struct poll_table_struct *); |
730 | int (*ioctl) (struct inode *, struct file *, unsigned int, unsigned long); | ||
731 | long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long); | 736 | long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long); |
732 | long (*compat_ioctl) (struct file *, unsigned int, unsigned long); | 737 | long (*compat_ioctl) (struct file *, unsigned int, unsigned long); |
733 | int (*mmap) (struct file *, struct vm_area_struct *); | 738 | int (*mmap) (struct file *, struct vm_area_struct *); |
@@ -768,10 +773,7 @@ otherwise noted. | |||
768 | activity on this file and (optionally) go to sleep until there | 773 | activity on this file and (optionally) go to sleep until there |
769 | is activity. Called by the select(2) and poll(2) system calls | 774 | is activity. Called by the select(2) and poll(2) system calls |
770 | 775 | ||
771 | ioctl: called by the ioctl(2) system call | 776 | unlocked_ioctl: called by the ioctl(2) system call. |
772 | |||
773 | unlocked_ioctl: called by the ioctl(2) system call. Filesystems that do not | ||
774 | require the BKL should use this method instead of the ioctl() above. | ||
775 | 777 | ||
776 | compat_ioctl: called by the ioctl(2) system call when 32 bit system calls | 778 | compat_ioctl: called by the ioctl(2) system call when 32 bit system calls |
777 | are used on 64 bit kernels. | 779 | are used on 64 bit kernels. |
diff --git a/Documentation/filesystems/xfs-delayed-logging-design.txt b/Documentation/filesystems/xfs-delayed-logging-design.txt index 96d0df28bed3..7445bf335dae 100644 --- a/Documentation/filesystems/xfs-delayed-logging-design.txt +++ b/Documentation/filesystems/xfs-delayed-logging-design.txt | |||
@@ -794,17 +794,6 @@ designed. | |||
794 | 794 | ||
795 | Roadmap: | 795 | Roadmap: |
796 | 796 | ||
797 | 2.6.37 Remove experimental tag from mount option | ||
798 | => should be roughly 6 months after initial merge | ||
799 | => enough time to: | ||
800 | => gain confidence and fix problems reported by early | ||
801 | adopters (a.k.a. guinea pigs) | ||
802 | => address worst performance regressions and undesired | ||
803 | behaviours | ||
804 | => start tuning/optimising code for parallelism | ||
805 | => start tuning/optimising algorithms consuming | ||
806 | excessive CPU time | ||
807 | |||
808 | 2.6.39 Switch default mount option to use delayed logging | 797 | 2.6.39 Switch default mount option to use delayed logging |
809 | => should be roughly 12 months after initial merge | 798 | => should be roughly 12 months after initial merge |
810 | => enough time to shake out remaining problems before next round of | 799 | => enough time to shake out remaining problems before next round of |