aboutsummaryrefslogtreecommitdiffstats
path: root/Documentation/filesystems
diff options
context:
space:
mode:
authorGrant Likely <grant.likely@secretlab.ca>2010-12-30 00:20:30 -0500
committerGrant Likely <grant.likely@secretlab.ca>2010-12-30 00:21:47 -0500
commitd392da5207352f09030e95d9ea335a4225667ec0 (patch)
tree7d6cd1932afcad0a5619a5c504a6d93ca318187c /Documentation/filesystems
parente39d5ef678045d61812c1401f04fe8edb14d6359 (diff)
parent387c31c7e5c9805b0aef8833d1731a5fe7bdea14 (diff)
Merge v2.6.37-rc8 into powerpc/next
Diffstat (limited to 'Documentation/filesystems')
-rw-r--r--Documentation/filesystems/00-INDEX2
-rw-r--r--Documentation/filesystems/9p.txt4
-rw-r--r--Documentation/filesystems/Locking70
-rw-r--r--Documentation/filesystems/configfs/configfs_example_explicit.c2
-rw-r--r--Documentation/filesystems/ext4.txt14
-rw-r--r--Documentation/filesystems/nfs/00-INDEX4
-rw-r--r--Documentation/filesystems/nfs/idmapper.txt67
-rw-r--r--Documentation/filesystems/nfs/nfsroot.txt22
-rw-r--r--Documentation/filesystems/nfs/pnfs.txt48
-rw-r--r--Documentation/filesystems/ocfs2.txt7
-rw-r--r--Documentation/filesystems/porting45
-rw-r--r--Documentation/filesystems/proc.txt25
-rw-r--r--Documentation/filesystems/sharedsubtree.txt4
-rw-r--r--Documentation/filesystems/smbfs.txt8
-rw-r--r--Documentation/filesystems/squashfs.txt2
-rw-r--r--Documentation/filesystems/vfs.txt22
-rw-r--r--Documentation/filesystems/xfs-delayed-logging-design.txt11
17 files changed, 286 insertions, 71 deletions
diff --git a/Documentation/filesystems/00-INDEX b/Documentation/filesystems/00-INDEX
index 4303614b5add..8c624a18f67d 100644
--- a/Documentation/filesystems/00-INDEX
+++ b/Documentation/filesystems/00-INDEX
@@ -96,8 +96,6 @@ seq_file.txt
96 - how to use the seq_file API 96 - how to use the seq_file API
97sharedsubtree.txt 97sharedsubtree.txt
98 - a description of shared subtrees for namespaces. 98 - a description of shared subtrees for namespaces.
99smbfs.txt
100 - info on using filesystems with the SMB protocol (Win 3.11 and NT).
101spufs.txt 99spufs.txt
102 - info and mount options for the SPU filesystem used on Cell. 100 - info and mount options for the SPU filesystem used on Cell.
103sysfs-pci.txt 101sysfs-pci.txt
diff --git a/Documentation/filesystems/9p.txt b/Documentation/filesystems/9p.txt
index f9765e8cf086..b22abba78fed 100644
--- a/Documentation/filesystems/9p.txt
+++ b/Documentation/filesystems/9p.txt
@@ -111,7 +111,7 @@ OPTIONS
111 This can be used to share devices/named pipes/sockets between 111 This can be used to share devices/named pipes/sockets between
112 hosts. This functionality will be expanded in later versions. 112 hosts. This functionality will be expanded in later versions.
113 113
114 access there are three access modes. 114 access there are four access modes.
115 user = if a user tries to access a file on v9fs 115 user = if a user tries to access a file on v9fs
116 filesystem for the first time, v9fs sends an 116 filesystem for the first time, v9fs sends an
117 attach command (Tattach) for that user. 117 attach command (Tattach) for that user.
@@ -120,6 +120,8 @@ OPTIONS
120 the files on the mounted filesystem 120 the files on the mounted filesystem
121 any = v9fs does single attach and performs all 121 any = v9fs does single attach and performs all
122 operations as one user 122 operations as one user
123 client = ACL based access check on the 9p client
124 side for access validation
123 125
124 cachetag cache tag to use the specified persistent cache. 126 cachetag cache tag to use the specified persistent cache.
125 cache tags for existing cache sessions can be listed at 127 cache tags for existing cache sessions can be listed at
diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index 96d4293607ec..b6426f15b4ae 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -92,8 +92,8 @@ prototypes:
92 void (*destroy_inode)(struct inode *); 92 void (*destroy_inode)(struct inode *);
93 void (*dirty_inode) (struct inode *); 93 void (*dirty_inode) (struct inode *);
94 int (*write_inode) (struct inode *, int); 94 int (*write_inode) (struct inode *, int);
95 void (*drop_inode) (struct inode *); 95 int (*drop_inode) (struct inode *);
96 void (*delete_inode) (struct inode *); 96 void (*evict_inode) (struct inode *);
97 void (*put_super) (struct super_block *); 97 void (*put_super) (struct super_block *);
98 void (*write_super) (struct super_block *); 98 void (*write_super) (struct super_block *);
99 int (*sync_fs)(struct super_block *sb, int wait); 99 int (*sync_fs)(struct super_block *sb, int wait);
@@ -101,14 +101,13 @@ prototypes:
101 int (*unfreeze_fs) (struct super_block *); 101 int (*unfreeze_fs) (struct super_block *);
102 int (*statfs) (struct dentry *, struct kstatfs *); 102 int (*statfs) (struct dentry *, struct kstatfs *);
103 int (*remount_fs) (struct super_block *, int *, char *); 103 int (*remount_fs) (struct super_block *, int *, char *);
104 void (*clear_inode) (struct inode *);
105 void (*umount_begin) (struct super_block *); 104 void (*umount_begin) (struct super_block *);
106 int (*show_options)(struct seq_file *, struct vfsmount *); 105 int (*show_options)(struct seq_file *, struct vfsmount *);
107 ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t); 106 ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t);
108 ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t); 107 ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t);
109 108
110locking rules: 109locking rules:
111 All may block. 110 All may block [not true, see below]
112 None have BKL 111 None have BKL
113 s_umount 112 s_umount
114alloc_inode: 113alloc_inode:
@@ -116,22 +115,25 @@ destroy_inode:
116dirty_inode: (must not sleep) 115dirty_inode: (must not sleep)
117write_inode: 116write_inode:
118drop_inode: !!!inode_lock!!! 117drop_inode: !!!inode_lock!!!
119delete_inode: 118evict_inode:
120put_super: write 119put_super: write
121write_super: read 120write_super: read
122sync_fs: read 121sync_fs: read
123freeze_fs: read 122freeze_fs: read
124unfreeze_fs: read 123unfreeze_fs: read
125statfs: no 124statfs: maybe(read) (see below)
126remount_fs: maybe (see below) 125remount_fs: write
127clear_inode:
128umount_begin: no 126umount_begin: no
129show_options: no (namespace_sem) 127show_options: no (namespace_sem)
130quota_read: no (see below) 128quota_read: no (see below)
131quota_write: no (see below) 129quota_write: no (see below)
132 130
133->remount_fs() will have the s_umount exclusive lock if it's already mounted. 131->statfs() has s_umount (shared) when called by ustat(2) (native or
134When called from get_sb_single, it does NOT have the s_umount lock. 132compat), but that's an accident of bad API; s_umount is used to pin
133the superblock down when we only have dev_t given us by userland to
134identify the superblock. Everything else (statfs(), fstatfs(), etc.)
135doesn't hold it when calling ->statfs() - superblock is pinned down
136by resolving the pathname passed to syscall.
135->quota_read() and ->quota_write() functions are both guaranteed to 137->quota_read() and ->quota_write() functions are both guaranteed to
136be the only ones operating on the quota file by the quota code (via 138be the only ones operating on the quota file by the quota code (via
137dqio_sem) (unless an admin really wants to screw up something and 139dqio_sem) (unless an admin really wants to screw up something and
@@ -171,12 +173,13 @@ prototypes:
171 sector_t (*bmap)(struct address_space *, sector_t); 173 sector_t (*bmap)(struct address_space *, sector_t);
172 int (*invalidatepage) (struct page *, unsigned long); 174 int (*invalidatepage) (struct page *, unsigned long);
173 int (*releasepage) (struct page *, int); 175 int (*releasepage) (struct page *, int);
176 void (*freepage)(struct page *);
174 int (*direct_IO)(int, struct kiocb *, const struct iovec *iov, 177 int (*direct_IO)(int, struct kiocb *, const struct iovec *iov,
175 loff_t offset, unsigned long nr_segs); 178 loff_t offset, unsigned long nr_segs);
176 int (*launder_page) (struct page *); 179 int (*launder_page) (struct page *);
177 180
178locking rules: 181locking rules:
179 All except set_page_dirty may block 182 All except set_page_dirty and freepage may block
180 183
181 BKL PageLocked(page) i_mutex 184 BKL PageLocked(page) i_mutex
182writepage: no yes, unlocks (see below) 185writepage: no yes, unlocks (see below)
@@ -191,6 +194,7 @@ perform_write: no n/a yes
191bmap: no 194bmap: no
192invalidatepage: no yes 195invalidatepage: no yes
193releasepage: no yes 196releasepage: no yes
197freepage: no yes
194direct_IO: no 198direct_IO: no
195launder_page: no yes 199launder_page: no yes
196 200
@@ -286,6 +290,9 @@ buffers from the page in preparation for freeing it. It returns zero to
286indicate that the buffers are (or may be) freeable. If ->releasepage is zero, 290indicate that the buffers are (or may be) freeable. If ->releasepage is zero,
287the kernel assumes that the fs has no private interest in the buffers. 291the kernel assumes that the fs has no private interest in the buffers.
288 292
293 ->freepage() is called when the kernel is done dropping the page
294from the page cache.
295
289 ->launder_page() may be called prior to releasing a page if 296 ->launder_page() may be called prior to releasing a page if
290it is still found to be dirty. It returns zero if the page was successfully 297it is still found to be dirty. It returns zero if the page was successfully
291cleaned, or an error value if not. Note that in order to prevent the page 298cleaned, or an error value if not. Note that in order to prevent the page
@@ -320,7 +327,6 @@ fl_release_private: yes yes
320prototypes: 327prototypes:
321 int (*fl_compare_owner)(struct file_lock *, struct file_lock *); 328 int (*fl_compare_owner)(struct file_lock *, struct file_lock *);
322 void (*fl_notify)(struct file_lock *); /* unblock callback */ 329 void (*fl_notify)(struct file_lock *); /* unblock callback */
323 void (*fl_copy_lock)(struct file_lock *, struct file_lock *);
324 void (*fl_release_private)(struct file_lock *); 330 void (*fl_release_private)(struct file_lock *);
325 void (*fl_break)(struct file_lock *); /* break_lease callback */ 331 void (*fl_break)(struct file_lock *); /* break_lease callback */
326 332
@@ -328,7 +334,6 @@ locking rules:
328 BKL may block 334 BKL may block
329fl_compare_owner: yes no 335fl_compare_owner: yes no
330fl_notify: yes no 336fl_notify: yes no
331fl_copy_lock: yes no
332fl_release_private: yes yes 337fl_release_private: yes yes
333fl_break: yes no 338fl_break: yes no
334 339
@@ -347,21 +352,36 @@ call this method upon the IO completion.
347 352
348--------------------------- block_device_operations ----------------------- 353--------------------------- block_device_operations -----------------------
349prototypes: 354prototypes:
350 int (*open) (struct inode *, struct file *); 355 int (*open) (struct block_device *, fmode_t);
351 int (*release) (struct inode *, struct file *); 356 int (*release) (struct gendisk *, fmode_t);
352 int (*ioctl) (struct inode *, struct file *, unsigned, unsigned long); 357 int (*ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
358 int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
359 int (*direct_access) (struct block_device *, sector_t, void **, unsigned long *);
353 int (*media_changed) (struct gendisk *); 360 int (*media_changed) (struct gendisk *);
361 void (*unlock_native_capacity) (struct gendisk *);
354 int (*revalidate_disk) (struct gendisk *); 362 int (*revalidate_disk) (struct gendisk *);
363 int (*getgeo)(struct block_device *, struct hd_geometry *);
364 void (*swap_slot_free_notify) (struct block_device *, unsigned long);
355 365
356locking rules: 366locking rules:
357 BKL bd_sem 367 BKL bd_mutex
358open: yes yes 368open: no yes
359release: yes yes 369release: no yes
360ioctl: yes no 370ioctl: no no
371compat_ioctl: no no
372direct_access: no no
361media_changed: no no 373media_changed: no no
374unlock_native_capacity: no no
362revalidate_disk: no no 375revalidate_disk: no no
376getgeo: no no
377swap_slot_free_notify: no no (see below)
378
379media_changed, unlock_native_capacity and revalidate_disk are called only from
380check_disk_change().
381
382swap_slot_free_notify is called with swap_lock and sometimes the page lock
383held.
363 384
364The last two are called only from check_disk_change().
365 385
366--------------------------- file_operations ------------------------------- 386--------------------------- file_operations -------------------------------
367prototypes: 387prototypes:
@@ -372,8 +392,6 @@ prototypes:
372 ssize_t (*aio_write) (struct kiocb *, const struct iovec *, unsigned long, loff_t); 392 ssize_t (*aio_write) (struct kiocb *, const struct iovec *, unsigned long, loff_t);
373 int (*readdir) (struct file *, void *, filldir_t); 393 int (*readdir) (struct file *, void *, filldir_t);
374 unsigned int (*poll) (struct file *, struct poll_table_struct *); 394 unsigned int (*poll) (struct file *, struct poll_table_struct *);
375 int (*ioctl) (struct inode *, struct file *, unsigned int,
376 unsigned long);
377 long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long); 395 long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
378 long (*compat_ioctl) (struct file *, unsigned int, unsigned long); 396 long (*compat_ioctl) (struct file *, unsigned int, unsigned long);
379 int (*mmap) (struct file *, struct vm_area_struct *); 397 int (*mmap) (struct file *, struct vm_area_struct *);
@@ -407,8 +425,7 @@ write: no
407aio_write: no 425aio_write: no
408readdir: no 426readdir: no
409poll: no 427poll: no
410ioctl: yes (see below) 428unlocked_ioctl: no
411unlocked_ioctl: no (see below)
412compat_ioctl: no 429compat_ioctl: no
413mmap: no 430mmap: no
414open: no 431open: no
@@ -451,9 +468,6 @@ move ->readdir() to inode_operations and use a separate method for directory
451anything that resembles union-mount we won't have a struct file for all 468anything that resembles union-mount we won't have a struct file for all
452components. And there are other reasons why the current interface is a mess... 469components. And there are other reasons why the current interface is a mess...
453 470
454->ioctl() on regular files is superceded by the ->unlocked_ioctl() that
455doesn't take the BKL.
456
457->read on directories probably must go away - we should just enforce -EISDIR 471->read on directories probably must go away - we should just enforce -EISDIR
458in sys_read() and friends. 472in sys_read() and friends.
459 473
diff --git a/Documentation/filesystems/configfs/configfs_example_explicit.c b/Documentation/filesystems/configfs/configfs_example_explicit.c
index d428cc9f07f3..fd53869f5633 100644
--- a/Documentation/filesystems/configfs/configfs_example_explicit.c
+++ b/Documentation/filesystems/configfs/configfs_example_explicit.c
@@ -89,7 +89,7 @@ static ssize_t childless_storeme_write(struct childless *childless,
89 char *p = (char *) page; 89 char *p = (char *) page;
90 90
91 tmp = simple_strtoul(p, &p, 10); 91 tmp = simple_strtoul(p, &p, 10);
92 if (!p || (*p && (*p != '\n'))) 92 if ((*p != '\0') && (*p != '\n'))
93 return -EINVAL; 93 return -EINVAL;
94 94
95 if (tmp > INT_MAX) 95 if (tmp > INT_MAX)
diff --git a/Documentation/filesystems/ext4.txt b/Documentation/filesystems/ext4.txt
index e1def1786e50..6ab9442d7eeb 100644
--- a/Documentation/filesystems/ext4.txt
+++ b/Documentation/filesystems/ext4.txt
@@ -353,6 +353,20 @@ noauto_da_alloc replacing existing files via patterns such as
353 system crashes before the delayed allocation 353 system crashes before the delayed allocation
354 blocks are forced to disk. 354 blocks are forced to disk.
355 355
356noinit_itable Do not initialize any uninitialized inode table
357 blocks in the background. This feature may be
358 used by installation CD's so that the install
359 process can complete as quickly as possible; the
360 inode table initialization process would then be
361 deferred until the next time the file system
362 is unmounted.
363
364init_itable=n The lazy itable init code will wait n times the
365 number of milliseconds it took to zero out the
366 previous block group's inode table. This
367 minimizes the impact on the systme performance
368 while file system's inode table is being initialized.
369
356discard Controls whether ext4 should issue discard/TRIM 370discard Controls whether ext4 should issue discard/TRIM
357nodiscard(*) commands to the underlying block device when 371nodiscard(*) commands to the underlying block device when
358 blocks are freed. This is useful for SSD devices 372 blocks are freed. This is useful for SSD devices
diff --git a/Documentation/filesystems/nfs/00-INDEX b/Documentation/filesystems/nfs/00-INDEX
index 2f68cd688769..a57e12411d2a 100644
--- a/Documentation/filesystems/nfs/00-INDEX
+++ b/Documentation/filesystems/nfs/00-INDEX
@@ -12,5 +12,9 @@ nfs-rdma.txt
12 - how to install and setup the Linux NFS/RDMA client and server software 12 - how to install and setup the Linux NFS/RDMA client and server software
13nfsroot.txt 13nfsroot.txt
14 - short guide on setting up a diskless box with NFS root filesystem. 14 - short guide on setting up a diskless box with NFS root filesystem.
15pnfs.txt
16 - short explanation of some of the internals of the pnfs client code
15rpc-cache.txt 17rpc-cache.txt
16 - introduction to the caching mechanisms in the sunrpc layer. 18 - introduction to the caching mechanisms in the sunrpc layer.
19idmapper.txt
20 - information for configuring request-keys to be used by idmapper
diff --git a/Documentation/filesystems/nfs/idmapper.txt b/Documentation/filesystems/nfs/idmapper.txt
new file mode 100644
index 000000000000..b9b4192ea8b5
--- /dev/null
+++ b/Documentation/filesystems/nfs/idmapper.txt
@@ -0,0 +1,67 @@
1
2=========
3ID Mapper
4=========
5Id mapper is used by NFS to translate user and group ids into names, and to
6translate user and group names into ids. Part of this translation involves
7performing an upcall to userspace to request the information. Id mapper will
8user request-key to perform this upcall and cache the result. The program
9/usr/sbin/nfs.idmap should be called by request-key, and will perform the
10translation and initialize a key with the resulting information.
11
12 NFS_USE_NEW_IDMAPPER must be selected when configuring the kernel to use this
13 feature.
14
15===========
16Configuring
17===========
18The file /etc/request-key.conf will need to be modified so /sbin/request-key can
19direct the upcall. The following line should be added:
20
21#OP TYPE DESCRIPTION CALLOUT INFO PROGRAM ARG1 ARG2 ARG3 ...
22#====== ======= =============== =============== ===============================
23create id_resolver * * /usr/sbin/nfs.idmap %k %d 600
24
25This will direct all id_resolver requests to the program /usr/sbin/nfs.idmap.
26The last parameter, 600, defines how many seconds into the future the key will
27expire. This parameter is optional for /usr/sbin/nfs.idmap. When the timeout
28is not specified, nfs.idmap will default to 600 seconds.
29
30id mapper uses for key descriptions:
31 uid: Find the UID for the given user
32 gid: Find the GID for the given group
33 user: Find the user name for the given UID
34 group: Find the group name for the given GID
35
36You can handle any of these individually, rather than using the generic upcall
37program. If you would like to use your own program for a uid lookup then you
38would edit your request-key.conf so it look similar to this:
39
40#OP TYPE DESCRIPTION CALLOUT INFO PROGRAM ARG1 ARG2 ARG3 ...
41#====== ======= =============== =============== ===============================
42create id_resolver uid:* * /some/other/program %k %d 600
43create id_resolver * * /usr/sbin/nfs.idmap %k %d 600
44
45Notice that the new line was added above the line for the generic program.
46request-key will find the first matching line and corresponding program. In
47this case, /some/other/program will handle all uid lookups and
48/usr/sbin/nfs.idmap will handle gid, user, and group lookups.
49
50See <file:Documentation/keys-request-keys.txt> for more information about the
51request-key function.
52
53
54=========
55nfs.idmap
56=========
57nfs.idmap is designed to be called by request-key, and should not be run "by
58hand". This program takes two arguments, a serialized key and a key
59description. The serialized key is first converted into a key_serial_t, and
60then passed as an argument to keyctl_instantiate (both are part of keyutils.h).
61
62The actual lookups are performed by functions found in nfsidmap.h. nfs.idmap
63determines the correct function to call by looking at the first part of the
64description string. For example, a uid lookup description will appear as
65"uid:user@domain".
66
67nfs.idmap will return 0 if the key was instantiated, and non-zero otherwise.
diff --git a/Documentation/filesystems/nfs/nfsroot.txt b/Documentation/filesystems/nfs/nfsroot.txt
index f2430a7974e1..90c71c6f0d00 100644
--- a/Documentation/filesystems/nfs/nfsroot.txt
+++ b/Documentation/filesystems/nfs/nfsroot.txt
@@ -159,6 +159,28 @@ ip=<client-ip>:<server-ip>:<gw-ip>:<netmask>:<hostname>:<device>:<autoconf>
159 Default: any 159 Default: any
160 160
161 161
162nfsrootdebug
163
164 This parameter enables debugging messages to appear in the kernel
165 log at boot time so that administrators can verify that the correct
166 NFS mount options, server address, and root path are passed to the
167 NFS client.
168
169
170rdinit=<executable file>
171
172 To specify which file contains the program that starts system
173 initialization, administrators can use this command line parameter.
174 The default value of this parameter is "/init". If the specified
175 file exists and the kernel can execute it, root filesystem related
176 kernel command line parameters, including `nfsroot=', are ignored.
177
178 A description of the process of mounting the root file system can be
179 found in:
180
181 Documentation/early-userspace/README
182
183
162 184
163 185
1643.) Boot Loader 1863.) Boot Loader
diff --git a/Documentation/filesystems/nfs/pnfs.txt b/Documentation/filesystems/nfs/pnfs.txt
new file mode 100644
index 000000000000..bc0b9cfe095b
--- /dev/null
+++ b/Documentation/filesystems/nfs/pnfs.txt
@@ -0,0 +1,48 @@
1Reference counting in pnfs:
2==========================
3
4The are several inter-related caches. We have layouts which can
5reference multiple devices, each of which can reference multiple data servers.
6Each data server can be referenced by multiple devices. Each device
7can be referenced by multiple layouts. To keep all of this straight,
8we need to reference count.
9
10
11struct pnfs_layout_hdr
12----------------------
13The on-the-wire command LAYOUTGET corresponds to struct
14pnfs_layout_segment, usually referred to by the variable name lseg.
15Each nfs_inode may hold a pointer to a cache of of these layout
16segments in nfsi->layout, of type struct pnfs_layout_hdr.
17
18We reference the header for the inode pointing to it, across each
19outstanding RPC call that references it (LAYOUTGET, LAYOUTRETURN,
20LAYOUTCOMMIT), and for each lseg held within.
21
22Each header is also (when non-empty) put on a list associated with
23struct nfs_client (cl_layouts). Being put on this list does not bump
24the reference count, as the layout is kept around by the lseg that
25keeps it in the list.
26
27deviceid_cache
28--------------
29lsegs reference device ids, which are resolved per nfs_client and
30layout driver type. The device ids are held in a RCU cache (struct
31nfs4_deviceid_cache). The cache itself is referenced across each
32mount. The entries (struct nfs4_deviceid) themselves are held across
33the lifetime of each lseg referencing them.
34
35RCU is used because the deviceid is basically a write once, read many
36data structure. The hlist size of 32 buckets needs better
37justification, but seems reasonable given that we can have multiple
38deviceid's per filesystem, and multiple filesystems per nfs_client.
39
40The hash code is copied from the nfsd code base. A discussion of
41hashing and variations of this algorithm can be found at:
42http://groups.google.com/group/comp.lang.c/browse_thread/thread/9522965e2b8d3809
43
44data server cache
45-----------------
46file driver devices refer to data servers, which are kept in a module
47level cache. Its reference is held over the lifetime of the deviceid
48pointing to it.
diff --git a/Documentation/filesystems/ocfs2.txt b/Documentation/filesystems/ocfs2.txt
index 1f7ae144f6d8..5393e6611691 100644
--- a/Documentation/filesystems/ocfs2.txt
+++ b/Documentation/filesystems/ocfs2.txt
@@ -87,3 +87,10 @@ dir_resv_level= (*) By default, directory reservations will scale with file
87 reservations - users should rarely need to change this 87 reservations - users should rarely need to change this
88 value. If allocation reservations are turned off, this 88 value. If allocation reservations are turned off, this
89 option will have no effect. 89 option will have no effect.
90coherency=full (*) Disallow concurrent O_DIRECT writes, cluster inode
91 lock will be taken to force other nodes drop cache,
92 therefore full cluster coherency is guaranteed even
93 for O_DIRECT writes.
94coherency=buffered Allow concurrent O_DIRECT writes without EX lock among
95 nodes, which gains high performance at risk of getting
96 stale data on other nodes.
diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting
index a7e9746ee7ea..b12c89538680 100644
--- a/Documentation/filesystems/porting
+++ b/Documentation/filesystems/porting
@@ -273,3 +273,48 @@ it's safe to remove it. If you don't need it, remove it.
273deliberate; as soon as struct block_device * is propagated in a reasonable 273deliberate; as soon as struct block_device * is propagated in a reasonable
274way by that code fixing will become trivial; until then nothing can be 274way by that code fixing will become trivial; until then nothing can be
275done. 275done.
276
277[mandatory]
278
279 block truncatation on error exit from ->write_begin, and ->direct_IO
280moved from generic methods (block_write_begin, cont_write_begin,
281nobh_write_begin, blockdev_direct_IO*) to callers. Take a look at
282ext2_write_failed and callers for an example.
283
284[mandatory]
285
286 ->truncate is going away. The whole truncate sequence needs to be
287implemented in ->setattr, which is now mandatory for filesystems
288implementing on-disk size changes. Start with a copy of the old inode_setattr
289and vmtruncate, and the reorder the vmtruncate + foofs_vmtruncate sequence to
290be in order of zeroing blocks using block_truncate_page or similar helpers,
291size update and on finally on-disk truncation which should not fail.
292inode_change_ok now includes the size checks for ATTR_SIZE and must be called
293in the beginning of ->setattr unconditionally.
294
295[mandatory]
296
297 ->clear_inode() and ->delete_inode() are gone; ->evict_inode() should
298be used instead. It gets called whenever the inode is evicted, whether it has
299remaining links or not. Caller does *not* evict the pagecache or inode-associated
300metadata buffers; getting rid of those is responsibility of method, as it had
301been for ->delete_inode().
302 ->drop_inode() returns int now; it's called on final iput() with inode_lock
303held and it returns true if filesystems wants the inode to be dropped. As before,
304generic_drop_inode() is still the default and it's been updated appropriately.
305generic_delete_inode() is also alive and it consists simply of return 1. Note that
306all actual eviction work is done by caller after ->drop_inode() returns.
307 clear_inode() is gone; use end_writeback() instead. As before, it must
308be called exactly once on each call of ->evict_inode() (as it used to be for
309each call of ->delete_inode()). Unlike before, if you are using inode-associated
310metadata buffers (i.e. mark_buffer_dirty_inode()), it's your responsibility to
311call invalidate_inode_buffers() before end_writeback().
312 No async writeback (and thus no calls of ->write_inode()) will happen
313after end_writeback() returns, so actions that should not overlap with ->write_inode()
314(e.g. freeing on-disk inode if i_nlink is 0) ought to be done after that call.
315
316 NOTE: checking i_nlink in the beginning of ->write_inode() and bailing out
317if it's zero is not *and* *never* *had* *been* enough. Final unlink() and iput()
318may happen while the inode is in the middle of ->write_inode(); e.g. if you blindly
319free the on-disk inode, you may end up doing that while ->write_inode() is writing
320to it.
diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
index a6aca8740883..e73df2722ff3 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -136,6 +136,7 @@ Table 1-1: Process specific entries in /proc
136 statm Process memory status information 136 statm Process memory status information
137 status Process status in human readable form 137 status Process status in human readable form
138 wchan If CONFIG_KALLSYMS is set, a pre-decoded wchan 138 wchan If CONFIG_KALLSYMS is set, a pre-decoded wchan
139 pagemap Page table
139 stack Report full stack trace, enable via CONFIG_STACKTRACE 140 stack Report full stack trace, enable via CONFIG_STACKTRACE
140 smaps a extension based on maps, showing the memory consumption of 141 smaps a extension based on maps, showing the memory consumption of
141 each mapping 142 each mapping
@@ -370,17 +371,24 @@ Shared_Dirty: 0 kB
370Private_Clean: 0 kB 371Private_Clean: 0 kB
371Private_Dirty: 0 kB 372Private_Dirty: 0 kB
372Referenced: 892 kB 373Referenced: 892 kB
374Anonymous: 0 kB
373Swap: 0 kB 375Swap: 0 kB
374KernelPageSize: 4 kB 376KernelPageSize: 4 kB
375MMUPageSize: 4 kB 377MMUPageSize: 4 kB
376 378
377The first of these lines shows the same information as is displayed for the 379The first of these lines shows the same information as is displayed for the
378mapping in /proc/PID/maps. The remaining lines show the size of the mapping, 380mapping in /proc/PID/maps. The remaining lines show the size of the mapping
379the amount of the mapping that is currently resident in RAM, the "proportional 381(size), the amount of the mapping that is currently resident in RAM (RSS), the
380set size” (divide each shared page by the number of processes sharing it), the 382process' proportional share of this mapping (PSS), the number of clean and
381number of clean and dirty shared pages in the mapping, and the number of clean 383dirty private pages in the mapping. Note that even a page which is part of a
382and dirty private pages in the mapping. The "Referenced" indicates the amount 384MAP_SHARED mapping, but has only a single pte mapped, i.e. is currently used
383of memory currently marked as referenced or accessed. 385by only one process, is accounted as private and not as shared. "Referenced"
386indicates the amount of memory currently marked as referenced or accessed.
387"Anonymous" shows the amount of memory that does not belong to any file. Even
388a mapping associated with a file may contain anonymous pages: when MAP_PRIVATE
389and a page is modified, the file page is replaced by a private anonymous copy.
390"Swap" shows how much would-be-anonymous memory is also used, but out on
391swap.
384 392
385This file is only present if the CONFIG_MMU kernel configuration option is 393This file is only present if the CONFIG_MMU kernel configuration option is
386enabled. 394enabled.
@@ -397,6 +405,9 @@ To clear the bits for the file mapped pages associated with the process
397 > echo 3 > /proc/PID/clear_refs 405 > echo 3 > /proc/PID/clear_refs
398Any other value written to /proc/PID/clear_refs will have no effect. 406Any other value written to /proc/PID/clear_refs will have no effect.
399 407
408The /proc/pid/pagemap gives the PFN, which can be used to find the pageflags
409using /proc/kpageflags and number of times a page is mapped using
410/proc/kpagecount. For detailed explanation, see Documentation/vm/pagemap.txt.
400 411
4011.2 Kernel data 4121.2 Kernel data
402--------------- 413---------------
diff --git a/Documentation/filesystems/sharedsubtree.txt b/Documentation/filesystems/sharedsubtree.txt
index fc0e39af43c3..4ede421c9687 100644
--- a/Documentation/filesystems/sharedsubtree.txt
+++ b/Documentation/filesystems/sharedsubtree.txt
@@ -62,10 +62,10 @@ replicas continue to be exactly same.
62 # mount /dev/sd0 /tmp/a 62 # mount /dev/sd0 /tmp/a
63 63
64 #ls /tmp/a 64 #ls /tmp/a
65 t1 t2 t2 65 t1 t2 t3
66 66
67 #ls /mnt/a 67 #ls /mnt/a
68 t1 t2 t2 68 t1 t2 t3
69 69
70 Note that the mount has propagated to the mount at /mnt as well. 70 Note that the mount has propagated to the mount at /mnt as well.
71 71
diff --git a/Documentation/filesystems/smbfs.txt b/Documentation/filesystems/smbfs.txt
deleted file mode 100644
index 194fb0decd2c..000000000000
--- a/Documentation/filesystems/smbfs.txt
+++ /dev/null
@@ -1,8 +0,0 @@
1Smbfs is a filesystem that implements the SMB protocol, which is the
2protocol used by Windows for Workgroups, Windows 95 and Windows NT.
3Smbfs was inspired by Samba, the program written by Andrew Tridgell
4that turns any Unix host into a file server for DOS or Windows clients.
5
6Smbfs is a SMB client, but uses parts of samba for its operation. For
7more info on samba, including documentation, please go to
8http://www.samba.org/ and then on to your nearest mirror.
diff --git a/Documentation/filesystems/squashfs.txt b/Documentation/filesystems/squashfs.txt
index 203f7202cc9e..66699afd66ca 100644
--- a/Documentation/filesystems/squashfs.txt
+++ b/Documentation/filesystems/squashfs.txt
@@ -2,7 +2,7 @@ SQUASHFS 4.0 FILESYSTEM
2======================= 2=======================
3 3
4Squashfs is a compressed read-only filesystem for Linux. 4Squashfs is a compressed read-only filesystem for Linux.
5It uses zlib compression to compress files, inodes and directories. 5It uses zlib/lzo compression to compress files, inodes and directories.
6Inodes in the system are very small and all blocks are packed to minimise 6Inodes in the system are very small and all blocks are packed to minimise
7data overhead. Block sizes greater than 4K are supported up to a maximum 7data overhead. Block sizes greater than 4K are supported up to a maximum
8of 1Mbytes (default block size 128K). 8of 1Mbytes (default block size 128K).
diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index 94677e7dcb13..20899e095e7e 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -534,6 +534,7 @@ struct address_space_operations {
534 sector_t (*bmap)(struct address_space *, sector_t); 534 sector_t (*bmap)(struct address_space *, sector_t);
535 int (*invalidatepage) (struct page *, unsigned long); 535 int (*invalidatepage) (struct page *, unsigned long);
536 int (*releasepage) (struct page *, int); 536 int (*releasepage) (struct page *, int);
537 void (*freepage)(struct page *);
537 ssize_t (*direct_IO)(int, struct kiocb *, const struct iovec *iov, 538 ssize_t (*direct_IO)(int, struct kiocb *, const struct iovec *iov,
538 loff_t offset, unsigned long nr_segs); 539 loff_t offset, unsigned long nr_segs);
539 struct page* (*get_xip_page)(struct address_space *, sector_t, 540 struct page* (*get_xip_page)(struct address_space *, sector_t,
@@ -660,11 +661,10 @@ struct address_space_operations {
660 releasepage: releasepage is called on PagePrivate pages to indicate 661 releasepage: releasepage is called on PagePrivate pages to indicate
661 that the page should be freed if possible. ->releasepage 662 that the page should be freed if possible. ->releasepage
662 should remove any private data from the page and clear the 663 should remove any private data from the page and clear the
663 PagePrivate flag. It may also remove the page from the 664 PagePrivate flag. If releasepage() fails for some reason, it must
664 address_space. If this fails for some reason, it may indicate 665 indicate failure with a 0 return value.
665 failure with a 0 return value. 666 releasepage() is used in two distinct though related cases. The
666 This is used in two distinct though related cases. The first 667 first is when the VM finds a clean page with no active users and
667 is when the VM finds a clean page with no active users and
668 wants to make it a free page. If ->releasepage succeeds, the 668 wants to make it a free page. If ->releasepage succeeds, the
669 page will be removed from the address_space and become free. 669 page will be removed from the address_space and become free.
670 670
@@ -679,6 +679,12 @@ struct address_space_operations {
679 need to ensure this. Possibly it can clear the PageUptodate 679 need to ensure this. Possibly it can clear the PageUptodate
680 bit if it cannot free private data yet. 680 bit if it cannot free private data yet.
681 681
682 freepage: freepage is called once the page is no longer visible in
683 the page cache in order to allow the cleanup of any private
684 data. Since it may be called by the memory reclaimer, it
685 should not assume that the original address_space mapping still
686 exists, and it should not block.
687
682 direct_IO: called by the generic read/write routines to perform 688 direct_IO: called by the generic read/write routines to perform
683 direct_IO - that is IO requests which bypass the page cache 689 direct_IO - that is IO requests which bypass the page cache
684 and transfer data directly between the storage and the 690 and transfer data directly between the storage and the
@@ -727,7 +733,6 @@ struct file_operations {
727 ssize_t (*aio_write) (struct kiocb *, const struct iovec *, unsigned long, loff_t); 733 ssize_t (*aio_write) (struct kiocb *, const struct iovec *, unsigned long, loff_t);
728 int (*readdir) (struct file *, void *, filldir_t); 734 int (*readdir) (struct file *, void *, filldir_t);
729 unsigned int (*poll) (struct file *, struct poll_table_struct *); 735 unsigned int (*poll) (struct file *, struct poll_table_struct *);
730 int (*ioctl) (struct inode *, struct file *, unsigned int, unsigned long);
731 long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long); 736 long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
732 long (*compat_ioctl) (struct file *, unsigned int, unsigned long); 737 long (*compat_ioctl) (struct file *, unsigned int, unsigned long);
733 int (*mmap) (struct file *, struct vm_area_struct *); 738 int (*mmap) (struct file *, struct vm_area_struct *);
@@ -768,10 +773,7 @@ otherwise noted.
768 activity on this file and (optionally) go to sleep until there 773 activity on this file and (optionally) go to sleep until there
769 is activity. Called by the select(2) and poll(2) system calls 774 is activity. Called by the select(2) and poll(2) system calls
770 775
771 ioctl: called by the ioctl(2) system call 776 unlocked_ioctl: called by the ioctl(2) system call.
772
773 unlocked_ioctl: called by the ioctl(2) system call. Filesystems that do not
774 require the BKL should use this method instead of the ioctl() above.
775 777
776 compat_ioctl: called by the ioctl(2) system call when 32 bit system calls 778 compat_ioctl: called by the ioctl(2) system call when 32 bit system calls
777 are used on 64 bit kernels. 779 are used on 64 bit kernels.
diff --git a/Documentation/filesystems/xfs-delayed-logging-design.txt b/Documentation/filesystems/xfs-delayed-logging-design.txt
index 96d0df28bed3..7445bf335dae 100644
--- a/Documentation/filesystems/xfs-delayed-logging-design.txt
+++ b/Documentation/filesystems/xfs-delayed-logging-design.txt
@@ -794,17 +794,6 @@ designed.
794 794
795Roadmap: 795Roadmap:
796 796
7972.6.37 Remove experimental tag from mount option
798 => should be roughly 6 months after initial merge
799 => enough time to:
800 => gain confidence and fix problems reported by early
801 adopters (a.k.a. guinea pigs)
802 => address worst performance regressions and undesired
803 behaviours
804 => start tuning/optimising code for parallelism
805 => start tuning/optimising algorithms consuming
806 excessive CPU time
807
8082.6.39 Switch default mount option to use delayed logging 7972.6.39 Switch default mount option to use delayed logging
809 => should be roughly 12 months after initial merge 798 => should be roughly 12 months after initial merge
810 => enough time to shake out remaining problems before next round of 799 => enough time to shake out remaining problems before next round of