aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/Kconfig30
-rw-r--r--fs/Makefile1
-rw-r--r--fs/afs/dir.c4
-rw-r--r--fs/aio.c4
-rw-r--r--fs/befs/befs_fs_types.h2
-rw-r--r--fs/binfmt_som.c18
-rw-r--r--fs/cifs/README6
-rw-r--r--fs/cifs/connect.c22
-rw-r--r--fs/cifs/sess.c6
-rw-r--r--fs/compat.c20
-rw-r--r--fs/configfs/file.c4
-rw-r--r--fs/dcache.c9
-rw-r--r--fs/debugfs/inode.c2
-rw-r--r--fs/dnotify.c2
-rw-r--r--fs/ecryptfs/Makefile7
-rw-r--r--fs/ecryptfs/crypto.c1659
-rw-r--r--fs/ecryptfs/debug.c123
-rw-r--r--fs/ecryptfs/dentry.c87
-rw-r--r--fs/ecryptfs/ecryptfs_kernel.h482
-rw-r--r--fs/ecryptfs/file.c440
-rw-r--r--fs/ecryptfs/inode.c1079
-rw-r--r--fs/ecryptfs/keystore.c1061
-rw-r--r--fs/ecryptfs/main.c831
-rw-r--r--fs/ecryptfs/mmap.c788
-rw-r--r--fs/ecryptfs/super.c198
-rw-r--r--fs/eventpoll.c5
-rw-r--r--fs/exec.c2
-rw-r--r--fs/exportfs/expfs.c2
-rw-r--r--fs/fat/dir.c2
-rw-r--r--fs/fcntl.c79
-rw-r--r--fs/file_table.c1
-rw-r--r--fs/hfsplus/part_tbl.c2
-rw-r--r--fs/inode.c14
-rw-r--r--fs/jbd/commit.c2
-rw-r--r--fs/jbd/journal.c2
-rw-r--r--fs/jfs/acl.c8
-rw-r--r--fs/jfs/endian24.h2
-rw-r--r--fs/jfs/file.c8
-rw-r--r--fs/jfs/inode.c8
-rw-r--r--fs/jfs/jfs_acl.h8
-rw-r--r--fs/jfs/jfs_btree.h6
-rw-r--r--fs/jfs/jfs_debug.c6
-rw-r--r--fs/jfs/jfs_dinode.h8
-rw-r--r--fs/jfs/jfs_dmap.c180
-rw-r--r--fs/jfs/jfs_dmap.h28
-rw-r--r--fs/jfs/jfs_dtree.c18
-rw-r--r--fs/jfs/jfs_dtree.h10
-rw-r--r--fs/jfs/jfs_extent.c42
-rw-r--r--fs/jfs/jfs_extent.h8
-rw-r--r--fs/jfs/jfs_filsys.h24
-rw-r--r--fs/jfs/jfs_imap.c222
-rw-r--r--fs/jfs/jfs_imap.h14
-rw-r--r--fs/jfs/jfs_incore.h8
-rw-r--r--fs/jfs/jfs_inode.c15
-rw-r--r--fs/jfs/jfs_inode.h6
-rw-r--r--fs/jfs/jfs_lock.h10
-rw-r--r--fs/jfs/jfs_logmgr.c38
-rw-r--r--fs/jfs/jfs_logmgr.h76
-rw-r--r--fs/jfs/jfs_metapage.c12
-rw-r--r--fs/jfs/jfs_metapage.h8
-rw-r--r--fs/jfs/jfs_mount.c34
-rw-r--r--fs/jfs/jfs_superblock.h22
-rw-r--r--fs/jfs/jfs_txnmgr.c8
-rw-r--r--fs/jfs/jfs_txnmgr.h12
-rw-r--r--fs/jfs/jfs_umount.c24
-rw-r--r--fs/jfs/jfs_unicode.c12
-rw-r--r--fs/jfs/jfs_unicode.h10
-rw-r--r--fs/jfs/jfs_uniupr.c8
-rw-r--r--fs/jfs/jfs_xattr.h2
-rw-r--r--fs/jfs/jfs_xtree.c12
-rw-r--r--fs/jfs/jfs_xtree.h8
-rw-r--r--fs/jfs/namei.c67
-rw-r--r--fs/jfs/resize.c6
-rw-r--r--fs/jfs/super.c12
-rw-r--r--fs/jfs/symlink.c6
-rw-r--r--fs/jfs/xattr.c38
-rw-r--r--fs/lockd/clntlock.c56
-rw-r--r--fs/lockd/clntproc.c21
-rw-r--r--fs/lockd/host.c325
-rw-r--r--fs/lockd/mon.c67
-rw-r--r--fs/lockd/svc.c93
-rw-r--r--fs/lockd/svc4proc.c29
-rw-r--r--fs/lockd/svclock.c199
-rw-r--r--fs/lockd/svcproc.c27
-rw-r--r--fs/lockd/svcshare.c20
-rw-r--r--fs/lockd/svcsubs.c164
-rw-r--r--fs/lockd/xdr.c2
-rw-r--r--fs/locks.c2
-rw-r--r--fs/namespace.c22
-rw-r--r--fs/nfs/callback.c7
-rw-r--r--fs/nfs/client.c3
-rw-r--r--fs/nfs/nfsroot.c2
-rw-r--r--fs/nfsd/export.c161
-rw-r--r--fs/nfsd/nfs2acl.c5
-rw-r--r--fs/nfsd/nfs3acl.c3
-rw-r--r--fs/nfsd/nfs3proc.c18
-rw-r--r--fs/nfsd/nfs3xdr.c56
-rw-r--r--fs/nfsd/nfs4acl.c711
-rw-r--r--fs/nfsd/nfs4callback.c2
-rw-r--r--fs/nfsd/nfs4proc.c34
-rw-r--r--fs/nfsd/nfs4recover.c2
-rw-r--r--fs/nfsd/nfs4xdr.c234
-rw-r--r--fs/nfsd/nfsctl.c189
-rw-r--r--fs/nfsd/nfsproc.c12
-rw-r--r--fs/nfsd/nfssvc.c335
-rw-r--r--fs/nfsd/nfsxdr.c45
-rw-r--r--fs/nfsd/vfs.c94
-rw-r--r--fs/nls/nls_ascii.c2
-rw-r--r--fs/nls/nls_base.c4
-rw-r--r--fs/nls/nls_cp1250.c2
-rw-r--r--fs/nls/nls_cp1251.c2
-rw-r--r--fs/nls/nls_cp1255.c2
-rw-r--r--fs/nls/nls_cp437.c2
-rw-r--r--fs/nls/nls_cp737.c2
-rw-r--r--fs/nls/nls_cp775.c2
-rw-r--r--fs/nls/nls_cp850.c2
-rw-r--r--fs/nls/nls_cp852.c2
-rw-r--r--fs/nls/nls_cp855.c2
-rw-r--r--fs/nls/nls_cp857.c2
-rw-r--r--fs/nls/nls_cp860.c2
-rw-r--r--fs/nls/nls_cp861.c2
-rw-r--r--fs/nls/nls_cp862.c2
-rw-r--r--fs/nls/nls_cp863.c2
-rw-r--r--fs/nls/nls_cp864.c2
-rw-r--r--fs/nls/nls_cp865.c2
-rw-r--r--fs/nls/nls_cp866.c2
-rw-r--r--fs/nls/nls_cp869.c2
-rw-r--r--fs/nls/nls_cp874.c2
-rw-r--r--fs/nls/nls_cp932.c2
-rw-r--r--fs/nls/nls_cp936.c2
-rw-r--r--fs/nls/nls_cp949.c2
-rw-r--r--fs/nls/nls_cp950.c2
-rw-r--r--fs/nls/nls_euc-jp.c2
-rw-r--r--fs/nls/nls_iso8859-1.c2
-rw-r--r--fs/nls/nls_iso8859-13.c2
-rw-r--r--fs/nls/nls_iso8859-14.c2
-rw-r--r--fs/nls/nls_iso8859-15.c2
-rw-r--r--fs/nls/nls_iso8859-2.c2
-rw-r--r--fs/nls/nls_iso8859-3.c2
-rw-r--r--fs/nls/nls_iso8859-4.c2
-rw-r--r--fs/nls/nls_iso8859-5.c2
-rw-r--r--fs/nls/nls_iso8859-6.c2
-rw-r--r--fs/nls/nls_iso8859-7.c2
-rw-r--r--fs/nls/nls_iso8859-9.c2
-rw-r--r--fs/nls/nls_koi8-r.c2
-rw-r--r--fs/nls/nls_koi8-ru.c2
-rw-r--r--fs/nls/nls_koi8-u.c2
-rw-r--r--fs/proc/array.c85
-rw-r--r--fs/proc/base.c1759
-rw-r--r--fs/proc/proc_misc.c3
-rw-r--r--fs/proc/root.c12
-rw-r--r--fs/readdir.c18
-rw-r--r--fs/reiserfs/file.c6
-rw-r--r--fs/reiserfs/inode.c2
-rw-r--r--fs/reiserfs/item_ops.c12
-rw-r--r--fs/reiserfs/journal.c49
-rw-r--r--fs/reiserfs/namei.c9
-rw-r--r--fs/reiserfs/stree.c4
-rw-r--r--fs/reiserfs/xattr.c6
-rw-r--r--fs/stat.c6
-rw-r--r--fs/sysfs/file.c4
-rw-r--r--fs/xfs/support/debug.c6
162 files changed, 10244 insertions, 2809 deletions
diff --git a/fs/Kconfig b/fs/Kconfig
index fa64867d6ed6..599de54451af 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -74,11 +74,11 @@ config EXT3_FS
74 tristate "Ext3 journalling file system support" 74 tristate "Ext3 journalling file system support"
75 select JBD 75 select JBD
76 help 76 help
77 This is the journaling version of the Second extended file system 77 This is the journalling version of the Second extended file system
78 (often called ext3), the de facto standard Linux file system 78 (often called ext3), the de facto standard Linux file system
79 (method to organize files on a storage device) for hard disks. 79 (method to organize files on a storage device) for hard disks.
80 80
81 The journaling code included in this driver means you do not have 81 The journalling code included in this driver means you do not have
82 to run e2fsck (file system checker) on your file systems after a 82 to run e2fsck (file system checker) on your file systems after a
83 crash. The journal keeps track of any changes that were being made 83 crash. The journal keeps track of any changes that were being made
84 at the time the system crashed, and can ensure that your file system 84 at the time the system crashed, and can ensure that your file system
@@ -143,7 +143,7 @@ config EXT3_FS_SECURITY
143config JBD 143config JBD
144 tristate 144 tristate
145 help 145 help
146 This is a generic journaling layer for block devices. It is 146 This is a generic journalling layer for block devices. It is
147 currently used by the ext3 and OCFS2 file systems, but it could 147 currently used by the ext3 and OCFS2 file systems, but it could
148 also be used to add journal support to other file systems or block 148 also be used to add journal support to other file systems or block
149 devices such as RAID or LVM. 149 devices such as RAID or LVM.
@@ -183,7 +183,7 @@ config REISERFS_FS
183 tristate "Reiserfs support" 183 tristate "Reiserfs support"
184 help 184 help
185 Stores not just filenames but the files themselves in a balanced 185 Stores not just filenames but the files themselves in a balanced
186 tree. Uses journaling. 186 tree. Uses journalling.
187 187
188 Balanced trees are more efficient than traditional file system 188 Balanced trees are more efficient than traditional file system
189 architectural foundations. 189 architectural foundations.
@@ -996,6 +996,18 @@ config AFFS_FS
996 To compile this file system support as a module, choose M here: the 996 To compile this file system support as a module, choose M here: the
997 module will be called affs. If unsure, say N. 997 module will be called affs. If unsure, say N.
998 998
999config ECRYPT_FS
1000 tristate "eCrypt filesystem layer support (EXPERIMENTAL)"
1001 depends on EXPERIMENTAL && KEYS && CRYPTO
1002 help
1003 Encrypted filesystem that operates on the VFS layer. See
1004 <file:Documentation/ecryptfs.txt> to learn more about
1005 eCryptfs. Userspace components are required and can be
1006 obtained from <http://ecryptfs.sf.net>.
1007
1008 To compile this file system support as a module, choose M here: the
1009 module will be called ecryptfs.
1010
999config HFS_FS 1011config HFS_FS
1000 tristate "Apple Macintosh file system support (EXPERIMENTAL)" 1012 tristate "Apple Macintosh file system support (EXPERIMENTAL)"
1001 depends on BLOCK && EXPERIMENTAL 1013 depends on BLOCK && EXPERIMENTAL
@@ -1033,7 +1045,7 @@ config BEFS_FS
1033 on files and directories, and database-like indeces on selected 1045 on files and directories, and database-like indeces on selected
1034 attributes. (Also note that this driver doesn't make those features 1046 attributes. (Also note that this driver doesn't make those features
1035 available at this time). It is a 64 bit filesystem, so it supports 1047 available at this time). It is a 64 bit filesystem, so it supports
1036 extremly large volumes and files. 1048 extremely large volumes and files.
1037 1049
1038 If you use this filesystem, you should also say Y to at least one 1050 If you use this filesystem, you should also say Y to at least one
1039 of the NLS (native language support) options below. 1051 of the NLS (native language support) options below.
@@ -1091,7 +1103,7 @@ config JFFS_FS
1091 tristate "Journalling Flash File System (JFFS) support" 1103 tristate "Journalling Flash File System (JFFS) support"
1092 depends on MTD && BLOCK 1104 depends on MTD && BLOCK
1093 help 1105 help
1094 JFFS is the Journaling Flash File System developed by Axis 1106 JFFS is the Journalling Flash File System developed by Axis
1095 Communications in Sweden, aimed at providing a crash/powerdown-safe 1107 Communications in Sweden, aimed at providing a crash/powerdown-safe
1096 file system for disk-less embedded devices. Further information is 1108 file system for disk-less embedded devices. Further information is
1097 available at (<http://developer.axis.com/software/jffs/>). 1109 available at (<http://developer.axis.com/software/jffs/>).
@@ -1261,7 +1273,7 @@ config JFFS2_CMODE_NONE
1261config JFFS2_CMODE_PRIORITY 1273config JFFS2_CMODE_PRIORITY
1262 bool "priority" 1274 bool "priority"
1263 help 1275 help
1264 Tries the compressors in a predefinied order and chooses the first 1276 Tries the compressors in a predefined order and chooses the first
1265 successful one. 1277 successful one.
1266 1278
1267config JFFS2_CMODE_SIZE 1279config JFFS2_CMODE_SIZE
@@ -1366,7 +1378,7 @@ config SYSV_FS
1366 1378
1367 If you have floppies or hard disk partitions like that, it is likely 1379 If you have floppies or hard disk partitions like that, it is likely
1368 that they contain binaries from those other Unix systems; in order 1380 that they contain binaries from those other Unix systems; in order
1369 to run these binaries, you will want to install linux-abi which is a 1381 to run these binaries, you will want to install linux-abi which is
1370 a set of kernel modules that lets you run SCO, Xenix, Wyse, 1382 a set of kernel modules that lets you run SCO, Xenix, Wyse,
1371 UnixWare, Dell Unix and System V programs under Linux. It is 1383 UnixWare, Dell Unix and System V programs under Linux. It is
1372 available via FTP (user: ftp) from 1384 available via FTP (user: ftp) from
@@ -1951,7 +1963,7 @@ config AFS_FS
1951 If you say Y here, you will get an experimental Andrew File System 1963 If you say Y here, you will get an experimental Andrew File System
1952 driver. It currently only supports unsecured read-only AFS access. 1964 driver. It currently only supports unsecured read-only AFS access.
1953 1965
1954 See <file:Documentation/filesystems/afs.txt> for more intormation. 1966 See <file:Documentation/filesystems/afs.txt> for more information.
1955 1967
1956 If unsure, say N. 1968 If unsure, say N.
1957 1969
diff --git a/fs/Makefile b/fs/Makefile
index 215f70378177..df614eacee86 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -76,6 +76,7 @@ obj-$(CONFIG_BFS_FS) += bfs/
76obj-$(CONFIG_ISO9660_FS) += isofs/ 76obj-$(CONFIG_ISO9660_FS) += isofs/
77obj-$(CONFIG_HFSPLUS_FS) += hfsplus/ # Before hfs to find wrapped HFS+ 77obj-$(CONFIG_HFSPLUS_FS) += hfsplus/ # Before hfs to find wrapped HFS+
78obj-$(CONFIG_HFS_FS) += hfs/ 78obj-$(CONFIG_HFS_FS) += hfs/
79obj-$(CONFIG_ECRYPT_FS) += ecryptfs/
79obj-$(CONFIG_VXFS_FS) += freevxfs/ 80obj-$(CONFIG_VXFS_FS) += freevxfs/
80obj-$(CONFIG_NFS_FS) += nfs/ 81obj-$(CONFIG_NFS_FS) += nfs/
81obj-$(CONFIG_EXPORTFS) += exportfs/ 82obj-$(CONFIG_EXPORTFS) += exportfs/
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index 2fc99877cb0d..cf8a2cb28505 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -30,7 +30,7 @@ static int afs_dir_readdir(struct file *file, void *dirent, filldir_t filldir);
30static int afs_d_revalidate(struct dentry *dentry, struct nameidata *nd); 30static int afs_d_revalidate(struct dentry *dentry, struct nameidata *nd);
31static int afs_d_delete(struct dentry *dentry); 31static int afs_d_delete(struct dentry *dentry);
32static int afs_dir_lookup_filldir(void *_cookie, const char *name, int nlen, 32static int afs_dir_lookup_filldir(void *_cookie, const char *name, int nlen,
33 loff_t fpos, ino_t ino, unsigned dtype); 33 loff_t fpos, u64 ino, unsigned dtype);
34 34
35const struct file_operations afs_dir_file_operations = { 35const struct file_operations afs_dir_file_operations = {
36 .open = afs_dir_open, 36 .open = afs_dir_open,
@@ -409,7 +409,7 @@ static int afs_dir_readdir(struct file *file, void *cookie, filldir_t filldir)
409 * uniquifier through dtype 409 * uniquifier through dtype
410 */ 410 */
411static int afs_dir_lookup_filldir(void *_cookie, const char *name, int nlen, 411static int afs_dir_lookup_filldir(void *_cookie, const char *name, int nlen,
412 loff_t fpos, ino_t ino, unsigned dtype) 412 loff_t fpos, u64 ino, unsigned dtype)
413{ 413{
414 struct afs_dir_lookup_cookie *cookie = _cookie; 414 struct afs_dir_lookup_cookie *cookie = _cookie;
415 415
diff --git a/fs/aio.c b/fs/aio.c
index 2e0d1505ee36..94766599db00 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -675,7 +675,7 @@ static ssize_t aio_run_iocb(struct kiocb *iocb)
675 } 675 }
676 676
677 if (!(iocb->ki_retried & 0xff)) { 677 if (!(iocb->ki_retried & 0xff)) {
678 pr_debug("%ld retry: %d of %d\n", iocb->ki_retried, 678 pr_debug("%ld retry: %zd of %zd\n", iocb->ki_retried,
679 iocb->ki_nbytes - iocb->ki_left, iocb->ki_nbytes); 679 iocb->ki_nbytes - iocb->ki_left, iocb->ki_nbytes);
680 } 680 }
681 681
@@ -1008,7 +1008,7 @@ int fastcall aio_complete(struct kiocb *iocb, long res, long res2)
1008 1008
1009 pr_debug("added to ring %p at [%lu]\n", iocb, tail); 1009 pr_debug("added to ring %p at [%lu]\n", iocb, tail);
1010 1010
1011 pr_debug("%ld retries: %d of %d\n", iocb->ki_retried, 1011 pr_debug("%ld retries: %zd of %zd\n", iocb->ki_retried,
1012 iocb->ki_nbytes - iocb->ki_left, iocb->ki_nbytes); 1012 iocb->ki_nbytes - iocb->ki_left, iocb->ki_nbytes);
1013put_rq: 1013put_rq:
1014 /* everything turned out well, dispose of the aiocb. */ 1014 /* everything turned out well, dispose of the aiocb. */
diff --git a/fs/befs/befs_fs_types.h b/fs/befs/befs_fs_types.h
index 9095518e918d..63ef1e18fb84 100644
--- a/fs/befs/befs_fs_types.h
+++ b/fs/befs/befs_fs_types.h
@@ -1,5 +1,5 @@
1/* 1/*
2 * include/linux/befs_fs_types.h 2 * fs/befs/befs_fs_types.h
3 * 3 *
4 * Copyright (C) 2001 Will Dyson (will@cs.earlham.edu) 4 * Copyright (C) 2001 Will Dyson (will@cs.earlham.edu)
5 * 5 *
diff --git a/fs/binfmt_som.c b/fs/binfmt_som.c
index 32b5d625ce9c..5bcdaaf4eae0 100644
--- a/fs/binfmt_som.c
+++ b/fs/binfmt_som.c
@@ -29,6 +29,7 @@
29#include <linux/personality.h> 29#include <linux/personality.h>
30#include <linux/init.h> 30#include <linux/init.h>
31 31
32#include <asm/a.out.h>
32#include <asm/uaccess.h> 33#include <asm/uaccess.h>
33#include <asm/pgtable.h> 34#include <asm/pgtable.h>
34 35
@@ -194,6 +195,7 @@ load_som_binary(struct linux_binprm * bprm, struct pt_regs * regs)
194 unsigned long som_entry; 195 unsigned long som_entry;
195 struct som_hdr *som_ex; 196 struct som_hdr *som_ex;
196 struct som_exec_auxhdr *hpuxhdr; 197 struct som_exec_auxhdr *hpuxhdr;
198 struct files_struct *files;
197 199
198 /* Get the exec-header */ 200 /* Get the exec-header */
199 som_ex = (struct som_hdr *) bprm->buf; 201 som_ex = (struct som_hdr *) bprm->buf;
@@ -208,15 +210,27 @@ load_som_binary(struct linux_binprm * bprm, struct pt_regs * regs)
208 size = som_ex->aux_header_size; 210 size = som_ex->aux_header_size;
209 if (size > SOM_PAGESIZE) 211 if (size > SOM_PAGESIZE)
210 goto out; 212 goto out;
211 hpuxhdr = (struct som_exec_auxhdr *) kmalloc(size, GFP_KERNEL); 213 hpuxhdr = kmalloc(size, GFP_KERNEL);
212 if (!hpuxhdr) 214 if (!hpuxhdr)
213 goto out; 215 goto out;
214 216
215 retval = kernel_read(bprm->file, som_ex->aux_header_location, 217 retval = kernel_read(bprm->file, som_ex->aux_header_location,
216 (char *) hpuxhdr, size); 218 (char *) hpuxhdr, size);
219 if (retval != size) {
220 if (retval >= 0)
221 retval = -EIO;
222 goto out_free;
223 }
224
225 files = current->files; /* Refcounted so ok */
226 retval = unshare_files();
217 if (retval < 0) 227 if (retval < 0)
218 goto out_free; 228 goto out_free;
219#error "Fix security hole before enabling me" 229 if (files == current->files) {
230 put_files_struct(files);
231 files = NULL;
232 }
233
220 retval = get_unused_fd(); 234 retval = get_unused_fd();
221 if (retval < 0) 235 if (retval < 0)
222 goto out_free; 236 goto out_free;
diff --git a/fs/cifs/README b/fs/cifs/README
index 5f0e1bd64fee..432e515431c4 100644
--- a/fs/cifs/README
+++ b/fs/cifs/README
@@ -269,7 +269,7 @@ A partial list of the supported mount options follows:
269 (gid) mount option is specified. For the uid (gid) of newly 269 (gid) mount option is specified. For the uid (gid) of newly
270 created files and directories, ie files created since 270 created files and directories, ie files created since
271 the last mount of the server share, the expected uid 271 the last mount of the server share, the expected uid
272 (gid) is cached as as long as the inode remains in 272 (gid) is cached as long as the inode remains in
273 memory on the client. Also note that permission 273 memory on the client. Also note that permission
274 checks (authorization checks) on accesses to a file occur 274 checks (authorization checks) on accesses to a file occur
275 at the server, but there are cases in which an administrator 275 at the server, but there are cases in which an administrator
@@ -375,7 +375,7 @@ A partial list of the supported mount options follows:
375 the local process on newly created files, directories, and 375 the local process on newly created files, directories, and
376 devices (create, mkdir, mknod). If the CIFS Unix Extensions 376 devices (create, mkdir, mknod). If the CIFS Unix Extensions
377 are not negotiated, for newly created files and directories 377 are not negotiated, for newly created files and directories
378 instead of using the default uid and gid specified on the 378 instead of using the default uid and gid specified on
379 the mount, cache the new file's uid and gid locally which means 379 the mount, cache the new file's uid and gid locally which means
380 that the uid for the file can change when the inode is 380 that the uid for the file can change when the inode is
381 reloaded (or the user remounts the share). 381 reloaded (or the user remounts the share).
@@ -440,7 +440,7 @@ A partial list of the supported mount options follows:
440 create device files and fifos in a format compatible with 440 create device files and fifos in a format compatible with
441 Services for Unix (SFU). In addition retrieve bits 10-12 441 Services for Unix (SFU). In addition retrieve bits 10-12
442 of the mode via the SETFILEBITS extended attribute (as 442 of the mode via the SETFILEBITS extended attribute (as
443 SFU does). In the future the bottom 9 bits of the mode 443 SFU does). In the future the bottom 9 bits of the
444 mode also will be emulated using queries of the security 444 mode also will be emulated using queries of the security
445 descriptor (ACL). 445 descriptor (ACL).
446 sign Must use packet signing (helps avoid unwanted data modification 446 sign Must use packet signing (helps avoid unwanted data modification
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 0e9ba0b9d71e..c78762051da4 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -772,12 +772,12 @@ cifs_parse_mount_options(char *options, const char *devname,struct smb_vol *vol)
772 separator[1] = 0; 772 separator[1] = 0;
773 773
774 memset(vol->source_rfc1001_name,0x20,15); 774 memset(vol->source_rfc1001_name,0x20,15);
775 for(i=0;i < strnlen(system_utsname.nodename,15);i++) { 775 for(i=0;i < strnlen(utsname()->nodename,15);i++) {
776 /* does not have to be a perfect mapping since the field is 776 /* does not have to be a perfect mapping since the field is
777 informational, only used for servers that do not support 777 informational, only used for servers that do not support
778 port 445 and it can be overridden at mount time */ 778 port 445 and it can be overridden at mount time */
779 vol->source_rfc1001_name[i] = 779 vol->source_rfc1001_name[i] =
780 toupper(system_utsname.nodename[i]); 780 toupper(utsname()->nodename[i]);
781 } 781 }
782 vol->source_rfc1001_name[15] = 0; 782 vol->source_rfc1001_name[15] = 0;
783 /* null target name indicates to use *SMBSERVR default called name 783 /* null target name indicates to use *SMBSERVR default called name
@@ -2153,7 +2153,7 @@ CIFSSessSetup(unsigned int xid, struct cifsSesInfo *ses,
2153 32, nls_codepage); 2153 32, nls_codepage);
2154 bcc_ptr += 2 * bytes_returned; 2154 bcc_ptr += 2 * bytes_returned;
2155 bytes_returned = 2155 bytes_returned =
2156 cifs_strtoUCS((__le16 *) bcc_ptr, system_utsname.release, 2156 cifs_strtoUCS((__le16 *) bcc_ptr, utsname()->release,
2157 32, nls_codepage); 2157 32, nls_codepage);
2158 bcc_ptr += 2 * bytes_returned; 2158 bcc_ptr += 2 * bytes_returned;
2159 bcc_ptr += 2; 2159 bcc_ptr += 2;
@@ -2180,8 +2180,8 @@ CIFSSessSetup(unsigned int xid, struct cifsSesInfo *ses,
2180 } 2180 }
2181 strcpy(bcc_ptr, "Linux version "); 2181 strcpy(bcc_ptr, "Linux version ");
2182 bcc_ptr += strlen("Linux version "); 2182 bcc_ptr += strlen("Linux version ");
2183 strcpy(bcc_ptr, system_utsname.release); 2183 strcpy(bcc_ptr, utsname()->release);
2184 bcc_ptr += strlen(system_utsname.release) + 1; 2184 bcc_ptr += strlen(utsname()->release) + 1;
2185 strcpy(bcc_ptr, CIFS_NETWORK_OPSYS); 2185 strcpy(bcc_ptr, CIFS_NETWORK_OPSYS);
2186 bcc_ptr += strlen(CIFS_NETWORK_OPSYS) + 1; 2186 bcc_ptr += strlen(CIFS_NETWORK_OPSYS) + 1;
2187 } 2187 }
@@ -2445,7 +2445,7 @@ CIFSNTLMSSPNegotiateSessSetup(unsigned int xid,
2445 32, nls_codepage); 2445 32, nls_codepage);
2446 bcc_ptr += 2 * bytes_returned; 2446 bcc_ptr += 2 * bytes_returned;
2447 bytes_returned = 2447 bytes_returned =
2448 cifs_strtoUCS((__le16 *) bcc_ptr, system_utsname.release, 32, 2448 cifs_strtoUCS((__le16 *) bcc_ptr, utsname()->release, 32,
2449 nls_codepage); 2449 nls_codepage);
2450 bcc_ptr += 2 * bytes_returned; 2450 bcc_ptr += 2 * bytes_returned;
2451 bcc_ptr += 2; /* null terminate Linux version */ 2451 bcc_ptr += 2; /* null terminate Linux version */
@@ -2462,8 +2462,8 @@ CIFSNTLMSSPNegotiateSessSetup(unsigned int xid,
2462 } else { /* ASCII */ 2462 } else { /* ASCII */
2463 strcpy(bcc_ptr, "Linux version "); 2463 strcpy(bcc_ptr, "Linux version ");
2464 bcc_ptr += strlen("Linux version "); 2464 bcc_ptr += strlen("Linux version ");
2465 strcpy(bcc_ptr, system_utsname.release); 2465 strcpy(bcc_ptr, utsname()->release);
2466 bcc_ptr += strlen(system_utsname.release) + 1; 2466 bcc_ptr += strlen(utsname()->release) + 1;
2467 strcpy(bcc_ptr, CIFS_NETWORK_OPSYS); 2467 strcpy(bcc_ptr, CIFS_NETWORK_OPSYS);
2468 bcc_ptr += strlen(CIFS_NETWORK_OPSYS) + 1; 2468 bcc_ptr += strlen(CIFS_NETWORK_OPSYS) + 1;
2469 bcc_ptr++; /* empty domain field */ 2469 bcc_ptr++; /* empty domain field */
@@ -2836,7 +2836,7 @@ CIFSNTLMSSPAuthSessSetup(unsigned int xid, struct cifsSesInfo *ses,
2836 32, nls_codepage); 2836 32, nls_codepage);
2837 bcc_ptr += 2 * bytes_returned; 2837 bcc_ptr += 2 * bytes_returned;
2838 bytes_returned = 2838 bytes_returned =
2839 cifs_strtoUCS((__le16 *) bcc_ptr, system_utsname.release, 32, 2839 cifs_strtoUCS((__le16 *) bcc_ptr, utsname()->release, 32,
2840 nls_codepage); 2840 nls_codepage);
2841 bcc_ptr += 2 * bytes_returned; 2841 bcc_ptr += 2 * bytes_returned;
2842 bcc_ptr += 2; /* null term version string */ 2842 bcc_ptr += 2; /* null term version string */
@@ -2888,8 +2888,8 @@ CIFSNTLMSSPAuthSessSetup(unsigned int xid, struct cifsSesInfo *ses,
2888 2888
2889 strcpy(bcc_ptr, "Linux version "); 2889 strcpy(bcc_ptr, "Linux version ");
2890 bcc_ptr += strlen("Linux version "); 2890 bcc_ptr += strlen("Linux version ");
2891 strcpy(bcc_ptr, system_utsname.release); 2891 strcpy(bcc_ptr, utsname()->release);
2892 bcc_ptr += strlen(system_utsname.release) + 1; 2892 bcc_ptr += strlen(utsname()->release) + 1;
2893 strcpy(bcc_ptr, CIFS_NETWORK_OPSYS); 2893 strcpy(bcc_ptr, CIFS_NETWORK_OPSYS);
2894 bcc_ptr += strlen(CIFS_NETWORK_OPSYS) + 1; 2894 bcc_ptr += strlen(CIFS_NETWORK_OPSYS) + 1;
2895 bcc_ptr++; /* null domain */ 2895 bcc_ptr++; /* null domain */
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index d1705ab8136e..22b4c35dcfe3 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -111,7 +111,7 @@ static void unicode_ssetup_strings(char ** pbcc_area, struct cifsSesInfo *ses,
111 bytes_ret = cifs_strtoUCS((__le16 *)bcc_ptr, "Linux version ", 32, 111 bytes_ret = cifs_strtoUCS((__le16 *)bcc_ptr, "Linux version ", 32,
112 nls_cp); 112 nls_cp);
113 bcc_ptr += 2 * bytes_ret; 113 bcc_ptr += 2 * bytes_ret;
114 bytes_ret = cifs_strtoUCS((__le16 *) bcc_ptr, system_utsname.release, 114 bytes_ret = cifs_strtoUCS((__le16 *) bcc_ptr, init_utsname()->release,
115 32, nls_cp); 115 32, nls_cp);
116 bcc_ptr += 2 * bytes_ret; 116 bcc_ptr += 2 * bytes_ret;
117 bcc_ptr += 2; /* trailing null */ 117 bcc_ptr += 2; /* trailing null */
@@ -158,8 +158,8 @@ static void ascii_ssetup_strings(char ** pbcc_area, struct cifsSesInfo *ses,
158 158
159 strcpy(bcc_ptr, "Linux version "); 159 strcpy(bcc_ptr, "Linux version ");
160 bcc_ptr += strlen("Linux version "); 160 bcc_ptr += strlen("Linux version ");
161 strcpy(bcc_ptr, system_utsname.release); 161 strcpy(bcc_ptr, init_utsname()->release);
162 bcc_ptr += strlen(system_utsname.release) + 1; 162 bcc_ptr += strlen(init_utsname()->release) + 1;
163 163
164 strcpy(bcc_ptr, CIFS_NETWORK_OPSYS); 164 strcpy(bcc_ptr, CIFS_NETWORK_OPSYS);
165 bcc_ptr += strlen(CIFS_NETWORK_OPSYS) + 1; 165 bcc_ptr += strlen(CIFS_NETWORK_OPSYS) + 1;
diff --git a/fs/compat.c b/fs/compat.c
index 13fb08d096c4..4d3fbcb2ddb1 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -56,8 +56,6 @@
56 56
57int compat_log = 1; 57int compat_log = 1;
58 58
59extern void sigset_from_compat(sigset_t *set, compat_sigset_t *compat);
60
61int compat_printk(const char *fmt, ...) 59int compat_printk(const char *fmt, ...)
62{ 60{
63 va_list ap; 61 va_list ap;
@@ -916,20 +914,24 @@ struct compat_readdir_callback {
916}; 914};
917 915
918static int compat_fillonedir(void *__buf, const char *name, int namlen, 916static int compat_fillonedir(void *__buf, const char *name, int namlen,
919 loff_t offset, ino_t ino, unsigned int d_type) 917 loff_t offset, u64 ino, unsigned int d_type)
920{ 918{
921 struct compat_readdir_callback *buf = __buf; 919 struct compat_readdir_callback *buf = __buf;
922 struct compat_old_linux_dirent __user *dirent; 920 struct compat_old_linux_dirent __user *dirent;
921 compat_ulong_t d_ino;
923 922
924 if (buf->result) 923 if (buf->result)
925 return -EINVAL; 924 return -EINVAL;
925 d_ino = ino;
926 if (sizeof(d_ino) < sizeof(ino) && d_ino != ino)
927 return -EOVERFLOW;
926 buf->result++; 928 buf->result++;
927 dirent = buf->dirent; 929 dirent = buf->dirent;
928 if (!access_ok(VERIFY_WRITE, dirent, 930 if (!access_ok(VERIFY_WRITE, dirent,
929 (unsigned long)(dirent->d_name + namlen + 1) - 931 (unsigned long)(dirent->d_name + namlen + 1) -
930 (unsigned long)dirent)) 932 (unsigned long)dirent))
931 goto efault; 933 goto efault;
932 if ( __put_user(ino, &dirent->d_ino) || 934 if ( __put_user(d_ino, &dirent->d_ino) ||
933 __put_user(offset, &dirent->d_offset) || 935 __put_user(offset, &dirent->d_offset) ||
934 __put_user(namlen, &dirent->d_namlen) || 936 __put_user(namlen, &dirent->d_namlen) ||
935 __copy_to_user(dirent->d_name, name, namlen) || 937 __copy_to_user(dirent->d_name, name, namlen) ||
@@ -980,22 +982,26 @@ struct compat_getdents_callback {
980}; 982};
981 983
982static int compat_filldir(void *__buf, const char *name, int namlen, 984static int compat_filldir(void *__buf, const char *name, int namlen,
983 loff_t offset, ino_t ino, unsigned int d_type) 985 loff_t offset, u64 ino, unsigned int d_type)
984{ 986{
985 struct compat_linux_dirent __user * dirent; 987 struct compat_linux_dirent __user * dirent;
986 struct compat_getdents_callback *buf = __buf; 988 struct compat_getdents_callback *buf = __buf;
989 compat_ulong_t d_ino;
987 int reclen = COMPAT_ROUND_UP(NAME_OFFSET(dirent) + namlen + 2); 990 int reclen = COMPAT_ROUND_UP(NAME_OFFSET(dirent) + namlen + 2);
988 991
989 buf->error = -EINVAL; /* only used if we fail.. */ 992 buf->error = -EINVAL; /* only used if we fail.. */
990 if (reclen > buf->count) 993 if (reclen > buf->count)
991 return -EINVAL; 994 return -EINVAL;
995 d_ino = ino;
996 if (sizeof(d_ino) < sizeof(ino) && d_ino != ino)
997 return -EOVERFLOW;
992 dirent = buf->previous; 998 dirent = buf->previous;
993 if (dirent) { 999 if (dirent) {
994 if (__put_user(offset, &dirent->d_off)) 1000 if (__put_user(offset, &dirent->d_off))
995 goto efault; 1001 goto efault;
996 } 1002 }
997 dirent = buf->current_dir; 1003 dirent = buf->current_dir;
998 if (__put_user(ino, &dirent->d_ino)) 1004 if (__put_user(d_ino, &dirent->d_ino))
999 goto efault; 1005 goto efault;
1000 if (__put_user(reclen, &dirent->d_reclen)) 1006 if (__put_user(reclen, &dirent->d_reclen))
1001 goto efault; 1007 goto efault;
@@ -1066,7 +1072,7 @@ struct compat_getdents_callback64 {
1066}; 1072};
1067 1073
1068static int compat_filldir64(void * __buf, const char * name, int namlen, loff_t offset, 1074static int compat_filldir64(void * __buf, const char * name, int namlen, loff_t offset,
1069 ino_t ino, unsigned int d_type) 1075 u64 ino, unsigned int d_type)
1070{ 1076{
1071 struct linux_dirent64 __user *dirent; 1077 struct linux_dirent64 __user *dirent;
1072 struct compat_getdents_callback64 *buf = __buf; 1078 struct compat_getdents_callback64 *buf = __buf;
diff --git a/fs/configfs/file.c b/fs/configfs/file.c
index 85105e50f7db..e6d5754a715e 100644
--- a/fs/configfs/file.c
+++ b/fs/configfs/file.c
@@ -137,8 +137,8 @@ configfs_read_file(struct file *file, char __user *buf, size_t count, loff_t *pp
137 if ((retval = fill_read_buffer(file->f_dentry,buffer))) 137 if ((retval = fill_read_buffer(file->f_dentry,buffer)))
138 goto out; 138 goto out;
139 } 139 }
140 pr_debug("%s: count = %d, ppos = %lld, buf = %s\n", 140 pr_debug("%s: count = %zd, ppos = %lld, buf = %s\n",
141 __FUNCTION__,count,*ppos,buffer->page); 141 __FUNCTION__, count, *ppos, buffer->page);
142 retval = flush_read_buffer(buffer,buf,count,ppos); 142 retval = flush_read_buffer(buffer,buf,count,ppos);
143out: 143out:
144 up(&buffer->sem); 144 up(&buffer->sem);
diff --git a/fs/dcache.c b/fs/dcache.c
index fc2faa44f8d1..2355bddad8de 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -291,9 +291,9 @@ struct dentry * dget_locked(struct dentry *dentry)
291 * it can be unhashed only if it has no children, or if it is the root 291 * it can be unhashed only if it has no children, or if it is the root
292 * of a filesystem. 292 * of a filesystem.
293 * 293 *
294 * If the inode has a DCACHE_DISCONNECTED alias, then prefer 294 * If the inode has an IS_ROOT, DCACHE_DISCONNECTED alias, then prefer
295 * any other hashed alias over that one unless @want_discon is set, 295 * any other hashed alias over that one unless @want_discon is set,
296 * in which case only return a DCACHE_DISCONNECTED alias. 296 * in which case only return an IS_ROOT, DCACHE_DISCONNECTED alias.
297 */ 297 */
298 298
299static struct dentry * __d_find_alias(struct inode *inode, int want_discon) 299static struct dentry * __d_find_alias(struct inode *inode, int want_discon)
@@ -309,7 +309,8 @@ static struct dentry * __d_find_alias(struct inode *inode, int want_discon)
309 prefetch(next); 309 prefetch(next);
310 alias = list_entry(tmp, struct dentry, d_alias); 310 alias = list_entry(tmp, struct dentry, d_alias);
311 if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) { 311 if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) {
312 if (alias->d_flags & DCACHE_DISCONNECTED) 312 if (IS_ROOT(alias) &&
313 (alias->d_flags & DCACHE_DISCONNECTED))
313 discon_alias = alias; 314 discon_alias = alias;
314 else if (!want_discon) { 315 else if (!want_discon) {
315 __dget_locked(alias); 316 __dget_locked(alias);
@@ -1004,7 +1005,7 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry)
1004{ 1005{
1005 struct dentry *new = NULL; 1006 struct dentry *new = NULL;
1006 1007
1007 if (inode) { 1008 if (inode && S_ISDIR(inode->i_mode)) {
1008 spin_lock(&dcache_lock); 1009 spin_lock(&dcache_lock);
1009 new = __d_find_alias(inode, 1); 1010 new = __d_find_alias(inode, 1);
1010 if (new) { 1011 if (new) {
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index ecf3da9edf21..e77676df6713 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -252,7 +252,7 @@ EXPORT_SYMBOL_GPL(debugfs_create_dir);
252 * 252 *
253 * This function removes a file or directory in debugfs that was previously 253 * This function removes a file or directory in debugfs that was previously
254 * created with a call to another debugfs function (like 254 * created with a call to another debugfs function (like
255 * debufs_create_file() or variants thereof.) 255 * debugfs_create_file() or variants thereof.)
256 * 256 *
257 * This function is required to be called in order for the file to be 257 * This function is required to be called in order for the file to be
258 * removed, no automatic cleanup of files will happen when a module is 258 * removed, no automatic cleanup of files will happen when a module is
diff --git a/fs/dnotify.c b/fs/dnotify.c
index f932591df5a4..2b0442db67e0 100644
--- a/fs/dnotify.c
+++ b/fs/dnotify.c
@@ -92,7 +92,7 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg)
92 prev = &odn->dn_next; 92 prev = &odn->dn_next;
93 } 93 }
94 94
95 error = f_setown(filp, current->pid, 0); 95 error = __f_setown(filp, task_pid(current), PIDTYPE_PID, 0);
96 if (error) 96 if (error)
97 goto out_free; 97 goto out_free;
98 98
diff --git a/fs/ecryptfs/Makefile b/fs/ecryptfs/Makefile
new file mode 100644
index 000000000000..ca6562451eeb
--- /dev/null
+++ b/fs/ecryptfs/Makefile
@@ -0,0 +1,7 @@
1#
2# Makefile for the Linux 2.6 eCryptfs
3#
4
5obj-$(CONFIG_ECRYPT_FS) += ecryptfs.o
6
7ecryptfs-objs := dentry.o file.o inode.o main.o super.o mmap.o crypto.o keystore.o debug.o
diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
new file mode 100644
index 000000000000..ed35a9712fa1
--- /dev/null
+++ b/fs/ecryptfs/crypto.c
@@ -0,0 +1,1659 @@
1/**
2 * eCryptfs: Linux filesystem encryption layer
3 *
4 * Copyright (C) 1997-2004 Erez Zadok
5 * Copyright (C) 2001-2004 Stony Brook University
6 * Copyright (C) 2004-2006 International Business Machines Corp.
7 * Author(s): Michael A. Halcrow <mahalcro@us.ibm.com>
8 * Michael C. Thompson <mcthomps@us.ibm.com>
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License as
12 * published by the Free Software Foundation; either version 2 of the
13 * License, or (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful, but
16 * WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
23 * 02111-1307, USA.
24 */
25
26#include <linux/fs.h>
27#include <linux/mount.h>
28#include <linux/pagemap.h>
29#include <linux/random.h>
30#include <linux/compiler.h>
31#include <linux/key.h>
32#include <linux/namei.h>
33#include <linux/crypto.h>
34#include <linux/file.h>
35#include <linux/scatterlist.h>
36#include "ecryptfs_kernel.h"
37
38static int
39ecryptfs_decrypt_page_offset(struct ecryptfs_crypt_stat *crypt_stat,
40 struct page *dst_page, int dst_offset,
41 struct page *src_page, int src_offset, int size,
42 unsigned char *iv);
43static int
44ecryptfs_encrypt_page_offset(struct ecryptfs_crypt_stat *crypt_stat,
45 struct page *dst_page, int dst_offset,
46 struct page *src_page, int src_offset, int size,
47 unsigned char *iv);
48
49/**
50 * ecryptfs_to_hex
51 * @dst: Buffer to take hex character representation of contents of
52 * src; must be at least of size (src_size * 2)
53 * @src: Buffer to be converted to a hex string respresentation
54 * @src_size: number of bytes to convert
55 */
56void ecryptfs_to_hex(char *dst, char *src, size_t src_size)
57{
58 int x;
59
60 for (x = 0; x < src_size; x++)
61 sprintf(&dst[x * 2], "%.2x", (unsigned char)src[x]);
62}
63
64/**
65 * ecryptfs_from_hex
66 * @dst: Buffer to take the bytes from src hex; must be at least of
67 * size (src_size / 2)
68 * @src: Buffer to be converted from a hex string respresentation to raw value
69 * @dst_size: size of dst buffer, or number of hex characters pairs to convert
70 */
71void ecryptfs_from_hex(char *dst, char *src, int dst_size)
72{
73 int x;
74 char tmp[3] = { 0, };
75
76 for (x = 0; x < dst_size; x++) {
77 tmp[0] = src[x * 2];
78 tmp[1] = src[x * 2 + 1];
79 dst[x] = (unsigned char)simple_strtol(tmp, NULL, 16);
80 }
81}
82
83/**
84 * ecryptfs_calculate_md5 - calculates the md5 of @src
85 * @dst: Pointer to 16 bytes of allocated memory
86 * @crypt_stat: Pointer to crypt_stat struct for the current inode
87 * @src: Data to be md5'd
88 * @len: Length of @src
89 *
90 * Uses the allocated crypto context that crypt_stat references to
91 * generate the MD5 sum of the contents of src.
92 */
93static int ecryptfs_calculate_md5(char *dst,
94 struct ecryptfs_crypt_stat *crypt_stat,
95 char *src, int len)
96{
97 int rc = 0;
98 struct scatterlist sg;
99
100 mutex_lock(&crypt_stat->cs_md5_tfm_mutex);
101 sg_init_one(&sg, (u8 *)src, len);
102 if (!crypt_stat->md5_tfm) {
103 crypt_stat->md5_tfm =
104 crypto_alloc_tfm("md5", CRYPTO_TFM_REQ_MAY_SLEEP);
105 if (!crypt_stat->md5_tfm) {
106 rc = -ENOMEM;
107 ecryptfs_printk(KERN_ERR, "Error attempting to "
108 "allocate crypto context\n");
109 goto out;
110 }
111 }
112 crypto_digest_init(crypt_stat->md5_tfm);
113 crypto_digest_update(crypt_stat->md5_tfm, &sg, 1);
114 crypto_digest_final(crypt_stat->md5_tfm, dst);
115 mutex_unlock(&crypt_stat->cs_md5_tfm_mutex);
116out:
117 return rc;
118}
119
120/**
121 * ecryptfs_derive_iv
122 * @iv: destination for the derived iv vale
123 * @crypt_stat: Pointer to crypt_stat struct for the current inode
124 * @offset: Offset of the page whose's iv we are to derive
125 *
126 * Generate the initialization vector from the given root IV and page
127 * offset.
128 *
129 * Returns zero on success; non-zero on error.
130 */
131static int ecryptfs_derive_iv(char *iv, struct ecryptfs_crypt_stat *crypt_stat,
132 pgoff_t offset)
133{
134 int rc = 0;
135 char dst[MD5_DIGEST_SIZE];
136 char src[ECRYPTFS_MAX_IV_BYTES + 16];
137
138 if (unlikely(ecryptfs_verbosity > 0)) {
139 ecryptfs_printk(KERN_DEBUG, "root iv:\n");
140 ecryptfs_dump_hex(crypt_stat->root_iv, crypt_stat->iv_bytes);
141 }
142 /* TODO: It is probably secure to just cast the least
143 * significant bits of the root IV into an unsigned long and
144 * add the offset to that rather than go through all this
145 * hashing business. -Halcrow */
146 memcpy(src, crypt_stat->root_iv, crypt_stat->iv_bytes);
147 memset((src + crypt_stat->iv_bytes), 0, 16);
148 snprintf((src + crypt_stat->iv_bytes), 16, "%ld", offset);
149 if (unlikely(ecryptfs_verbosity > 0)) {
150 ecryptfs_printk(KERN_DEBUG, "source:\n");
151 ecryptfs_dump_hex(src, (crypt_stat->iv_bytes + 16));
152 }
153 rc = ecryptfs_calculate_md5(dst, crypt_stat, src,
154 (crypt_stat->iv_bytes + 16));
155 if (rc) {
156 ecryptfs_printk(KERN_WARNING, "Error attempting to compute "
157 "MD5 while generating IV for a page\n");
158 goto out;
159 }
160 memcpy(iv, dst, crypt_stat->iv_bytes);
161 if (unlikely(ecryptfs_verbosity > 0)) {
162 ecryptfs_printk(KERN_DEBUG, "derived iv:\n");
163 ecryptfs_dump_hex(iv, crypt_stat->iv_bytes);
164 }
165out:
166 return rc;
167}
168
169/**
170 * ecryptfs_init_crypt_stat
171 * @crypt_stat: Pointer to the crypt_stat struct to initialize.
172 *
173 * Initialize the crypt_stat structure.
174 */
175void
176ecryptfs_init_crypt_stat(struct ecryptfs_crypt_stat *crypt_stat)
177{
178 memset((void *)crypt_stat, 0, sizeof(struct ecryptfs_crypt_stat));
179 mutex_init(&crypt_stat->cs_mutex);
180 mutex_init(&crypt_stat->cs_tfm_mutex);
181 mutex_init(&crypt_stat->cs_md5_tfm_mutex);
182 ECRYPTFS_SET_FLAG(crypt_stat->flags, ECRYPTFS_STRUCT_INITIALIZED);
183}
184
185/**
186 * ecryptfs_destruct_crypt_stat
187 * @crypt_stat: Pointer to the crypt_stat struct to initialize.
188 *
189 * Releases all memory associated with a crypt_stat struct.
190 */
191void ecryptfs_destruct_crypt_stat(struct ecryptfs_crypt_stat *crypt_stat)
192{
193 if (crypt_stat->tfm)
194 crypto_free_tfm(crypt_stat->tfm);
195 if (crypt_stat->md5_tfm)
196 crypto_free_tfm(crypt_stat->md5_tfm);
197 memset(crypt_stat, 0, sizeof(struct ecryptfs_crypt_stat));
198}
199
200void ecryptfs_destruct_mount_crypt_stat(
201 struct ecryptfs_mount_crypt_stat *mount_crypt_stat)
202{
203 if (mount_crypt_stat->global_auth_tok_key)
204 key_put(mount_crypt_stat->global_auth_tok_key);
205 if (mount_crypt_stat->global_key_tfm)
206 crypto_free_tfm(mount_crypt_stat->global_key_tfm);
207 memset(mount_crypt_stat, 0, sizeof(struct ecryptfs_mount_crypt_stat));
208}
209
210/**
211 * virt_to_scatterlist
212 * @addr: Virtual address
213 * @size: Size of data; should be an even multiple of the block size
214 * @sg: Pointer to scatterlist array; set to NULL to obtain only
215 * the number of scatterlist structs required in array
216 * @sg_size: Max array size
217 *
218 * Fills in a scatterlist array with page references for a passed
219 * virtual address.
220 *
221 * Returns the number of scatterlist structs in array used
222 */
223int virt_to_scatterlist(const void *addr, int size, struct scatterlist *sg,
224 int sg_size)
225{
226 int i = 0;
227 struct page *pg;
228 int offset;
229 int remainder_of_page;
230
231 while (size > 0 && i < sg_size) {
232 pg = virt_to_page(addr);
233 offset = offset_in_page(addr);
234 if (sg) {
235 sg[i].page = pg;
236 sg[i].offset = offset;
237 }
238 remainder_of_page = PAGE_CACHE_SIZE - offset;
239 if (size >= remainder_of_page) {
240 if (sg)
241 sg[i].length = remainder_of_page;
242 addr += remainder_of_page;
243 size -= remainder_of_page;
244 } else {
245 if (sg)
246 sg[i].length = size;
247 addr += size;
248 size = 0;
249 }
250 i++;
251 }
252 if (size > 0)
253 return -ENOMEM;
254 return i;
255}
256
257/**
258 * encrypt_scatterlist
259 * @crypt_stat: Pointer to the crypt_stat struct to initialize.
260 * @dest_sg: Destination of encrypted data
261 * @src_sg: Data to be encrypted
262 * @size: Length of data to be encrypted
263 * @iv: iv to use during encryption
264 *
265 * Returns the number of bytes encrypted; negative value on error
266 */
267static int encrypt_scatterlist(struct ecryptfs_crypt_stat *crypt_stat,
268 struct scatterlist *dest_sg,
269 struct scatterlist *src_sg, int size,
270 unsigned char *iv)
271{
272 int rc = 0;
273
274 BUG_ON(!crypt_stat || !crypt_stat->tfm
275 || !ECRYPTFS_CHECK_FLAG(crypt_stat->flags,
276 ECRYPTFS_STRUCT_INITIALIZED));
277 if (unlikely(ecryptfs_verbosity > 0)) {
278 ecryptfs_printk(KERN_DEBUG, "Key size [%d]; key:\n",
279 crypt_stat->key_size);
280 ecryptfs_dump_hex(crypt_stat->key,
281 crypt_stat->key_size);
282 }
283 /* Consider doing this once, when the file is opened */
284 mutex_lock(&crypt_stat->cs_tfm_mutex);
285 rc = crypto_cipher_setkey(crypt_stat->tfm, crypt_stat->key,
286 crypt_stat->key_size);
287 if (rc) {
288 ecryptfs_printk(KERN_ERR, "Error setting key; rc = [%d]\n",
289 rc);
290 mutex_unlock(&crypt_stat->cs_tfm_mutex);
291 rc = -EINVAL;
292 goto out;
293 }
294 ecryptfs_printk(KERN_DEBUG, "Encrypting [%d] bytes.\n", size);
295 crypto_cipher_encrypt_iv(crypt_stat->tfm, dest_sg, src_sg, size, iv);
296 mutex_unlock(&crypt_stat->cs_tfm_mutex);
297out:
298 return rc;
299}
300
301static void
302ecryptfs_extent_to_lwr_pg_idx_and_offset(unsigned long *lower_page_idx,
303 int *byte_offset,
304 struct ecryptfs_crypt_stat *crypt_stat,
305 unsigned long extent_num)
306{
307 unsigned long lower_extent_num;
308 int extents_occupied_by_headers_at_front;
309 int bytes_occupied_by_headers_at_front;
310 int extent_offset;
311 int extents_per_page;
312
313 bytes_occupied_by_headers_at_front =
314 ( crypt_stat->header_extent_size
315 * crypt_stat->num_header_extents_at_front );
316 extents_occupied_by_headers_at_front =
317 ( bytes_occupied_by_headers_at_front
318 / crypt_stat->extent_size );
319 lower_extent_num = extents_occupied_by_headers_at_front + extent_num;
320 extents_per_page = PAGE_CACHE_SIZE / crypt_stat->extent_size;
321 (*lower_page_idx) = lower_extent_num / extents_per_page;
322 extent_offset = lower_extent_num % extents_per_page;
323 (*byte_offset) = extent_offset * crypt_stat->extent_size;
324 ecryptfs_printk(KERN_DEBUG, " * crypt_stat->header_extent_size = "
325 "[%d]\n", crypt_stat->header_extent_size);
326 ecryptfs_printk(KERN_DEBUG, " * crypt_stat->"
327 "num_header_extents_at_front = [%d]\n",
328 crypt_stat->num_header_extents_at_front);
329 ecryptfs_printk(KERN_DEBUG, " * extents_occupied_by_headers_at_"
330 "front = [%d]\n", extents_occupied_by_headers_at_front);
331 ecryptfs_printk(KERN_DEBUG, " * lower_extent_num = [0x%.16x]\n",
332 lower_extent_num);
333 ecryptfs_printk(KERN_DEBUG, " * extents_per_page = [%d]\n",
334 extents_per_page);
335 ecryptfs_printk(KERN_DEBUG, " * (*lower_page_idx) = [0x%.16x]\n",
336 (*lower_page_idx));
337 ecryptfs_printk(KERN_DEBUG, " * extent_offset = [%d]\n",
338 extent_offset);
339 ecryptfs_printk(KERN_DEBUG, " * (*byte_offset) = [%d]\n",
340 (*byte_offset));
341}
342
343static int ecryptfs_write_out_page(struct ecryptfs_page_crypt_context *ctx,
344 struct page *lower_page,
345 struct inode *lower_inode,
346 int byte_offset_in_page, int bytes_to_write)
347{
348 int rc = 0;
349
350 if (ctx->mode == ECRYPTFS_PREPARE_COMMIT_MODE) {
351 rc = ecryptfs_commit_lower_page(lower_page, lower_inode,
352 ctx->param.lower_file,
353 byte_offset_in_page,
354 bytes_to_write);
355 if (rc) {
356 ecryptfs_printk(KERN_ERR, "Error calling lower "
357 "commit; rc = [%d]\n", rc);
358 goto out;
359 }
360 } else {
361 rc = ecryptfs_writepage_and_release_lower_page(lower_page,
362 lower_inode,
363 ctx->param.wbc);
364 if (rc) {
365 ecryptfs_printk(KERN_ERR, "Error calling lower "
366 "writepage(); rc = [%d]\n", rc);
367 goto out;
368 }
369 }
370out:
371 return rc;
372}
373
374static int ecryptfs_read_in_page(struct ecryptfs_page_crypt_context *ctx,
375 struct page **lower_page,
376 struct inode *lower_inode,
377 unsigned long lower_page_idx,
378 int byte_offset_in_page)
379{
380 int rc = 0;
381
382 if (ctx->mode == ECRYPTFS_PREPARE_COMMIT_MODE) {
383 /* TODO: Limit this to only the data extents that are
384 * needed */
385 rc = ecryptfs_get_lower_page(lower_page, lower_inode,
386 ctx->param.lower_file,
387 lower_page_idx,
388 byte_offset_in_page,
389 (PAGE_CACHE_SIZE
390 - byte_offset_in_page));
391 if (rc) {
392 ecryptfs_printk(
393 KERN_ERR, "Error attempting to grab, map, "
394 "and prepare_write lower page with index "
395 "[0x%.16x]; rc = [%d]\n", lower_page_idx, rc);
396 goto out;
397 }
398 } else {
399 rc = ecryptfs_grab_and_map_lower_page(lower_page, NULL,
400 lower_inode,
401 lower_page_idx);
402 if (rc) {
403 ecryptfs_printk(
404 KERN_ERR, "Error attempting to grab and map "
405 "lower page with index [0x%.16x]; rc = [%d]\n",
406 lower_page_idx, rc);
407 goto out;
408 }
409 }
410out:
411 return rc;
412}
413
414/**
415 * ecryptfs_encrypt_page
416 * @ctx: The context of the page
417 *
418 * Encrypt an eCryptfs page. This is done on a per-extent basis. Note
419 * that eCryptfs pages may straddle the lower pages -- for instance,
420 * if the file was created on a machine with an 8K page size
421 * (resulting in an 8K header), and then the file is copied onto a
422 * host with a 32K page size, then when reading page 0 of the eCryptfs
423 * file, 24K of page 0 of the lower file will be read and decrypted,
424 * and then 8K of page 1 of the lower file will be read and decrypted.
425 *
426 * The actual operations performed on each page depends on the
427 * contents of the ecryptfs_page_crypt_context struct.
428 *
429 * Returns zero on success; negative on error
430 */
431int ecryptfs_encrypt_page(struct ecryptfs_page_crypt_context *ctx)
432{
433 char extent_iv[ECRYPTFS_MAX_IV_BYTES];
434 unsigned long base_extent;
435 unsigned long extent_offset = 0;
436 unsigned long lower_page_idx = 0;
437 unsigned long prior_lower_page_idx = 0;
438 struct page *lower_page;
439 struct inode *lower_inode;
440 struct ecryptfs_inode_info *inode_info;
441 struct ecryptfs_crypt_stat *crypt_stat;
442 int rc = 0;
443 int lower_byte_offset = 0;
444 int orig_byte_offset = 0;
445 int num_extents_per_page;
446#define ECRYPTFS_PAGE_STATE_UNREAD 0
447#define ECRYPTFS_PAGE_STATE_READ 1
448#define ECRYPTFS_PAGE_STATE_MODIFIED 2
449#define ECRYPTFS_PAGE_STATE_WRITTEN 3
450 int page_state;
451
452 lower_inode = ecryptfs_inode_to_lower(ctx->page->mapping->host);
453 inode_info = ecryptfs_inode_to_private(ctx->page->mapping->host);
454 crypt_stat = &inode_info->crypt_stat;
455 if (!ECRYPTFS_CHECK_FLAG(crypt_stat->flags, ECRYPTFS_ENCRYPTED)) {
456 rc = ecryptfs_copy_page_to_lower(ctx->page, lower_inode,
457 ctx->param.lower_file);
458 if (rc)
459 ecryptfs_printk(KERN_ERR, "Error attempting to copy "
460 "page at index [0x%.16x]\n",
461 ctx->page->index);
462 goto out;
463 }
464 num_extents_per_page = PAGE_CACHE_SIZE / crypt_stat->extent_size;
465 base_extent = (ctx->page->index * num_extents_per_page);
466 page_state = ECRYPTFS_PAGE_STATE_UNREAD;
467 while (extent_offset < num_extents_per_page) {
468 ecryptfs_extent_to_lwr_pg_idx_and_offset(
469 &lower_page_idx, &lower_byte_offset, crypt_stat,
470 (base_extent + extent_offset));
471 if (prior_lower_page_idx != lower_page_idx
472 && page_state == ECRYPTFS_PAGE_STATE_MODIFIED) {
473 rc = ecryptfs_write_out_page(ctx, lower_page,
474 lower_inode,
475 orig_byte_offset,
476 (PAGE_CACHE_SIZE
477 - orig_byte_offset));
478 if (rc) {
479 ecryptfs_printk(KERN_ERR, "Error attempting "
480 "to write out page; rc = [%d]"
481 "\n", rc);
482 goto out;
483 }
484 page_state = ECRYPTFS_PAGE_STATE_WRITTEN;
485 }
486 if (page_state == ECRYPTFS_PAGE_STATE_UNREAD
487 || page_state == ECRYPTFS_PAGE_STATE_WRITTEN) {
488 rc = ecryptfs_read_in_page(ctx, &lower_page,
489 lower_inode, lower_page_idx,
490 lower_byte_offset);
491 if (rc) {
492 ecryptfs_printk(KERN_ERR, "Error attempting "
493 "to read in lower page with "
494 "index [0x%.16x]; rc = [%d]\n",
495 lower_page_idx, rc);
496 goto out;
497 }
498 orig_byte_offset = lower_byte_offset;
499 prior_lower_page_idx = lower_page_idx;
500 page_state = ECRYPTFS_PAGE_STATE_READ;
501 }
502 BUG_ON(!(page_state == ECRYPTFS_PAGE_STATE_MODIFIED
503 || page_state == ECRYPTFS_PAGE_STATE_READ));
504 rc = ecryptfs_derive_iv(extent_iv, crypt_stat,
505 (base_extent + extent_offset));
506 if (rc) {
507 ecryptfs_printk(KERN_ERR, "Error attempting to "
508 "derive IV for extent [0x%.16x]; "
509 "rc = [%d]\n",
510 (base_extent + extent_offset), rc);
511 goto out;
512 }
513 if (unlikely(ecryptfs_verbosity > 0)) {
514 ecryptfs_printk(KERN_DEBUG, "Encrypting extent "
515 "with iv:\n");
516 ecryptfs_dump_hex(extent_iv, crypt_stat->iv_bytes);
517 ecryptfs_printk(KERN_DEBUG, "First 8 bytes before "
518 "encryption:\n");
519 ecryptfs_dump_hex((char *)
520 (page_address(ctx->page)
521 + (extent_offset
522 * crypt_stat->extent_size)), 8);
523 }
524 rc = ecryptfs_encrypt_page_offset(
525 crypt_stat, lower_page, lower_byte_offset, ctx->page,
526 (extent_offset * crypt_stat->extent_size),
527 crypt_stat->extent_size, extent_iv);
528 ecryptfs_printk(KERN_DEBUG, "Encrypt extent [0x%.16x]; "
529 "rc = [%d]\n",
530 (base_extent + extent_offset), rc);
531 if (unlikely(ecryptfs_verbosity > 0)) {
532 ecryptfs_printk(KERN_DEBUG, "First 8 bytes after "
533 "encryption:\n");
534 ecryptfs_dump_hex((char *)(page_address(lower_page)
535 + lower_byte_offset), 8);
536 }
537 page_state = ECRYPTFS_PAGE_STATE_MODIFIED;
538 extent_offset++;
539 }
540 BUG_ON(orig_byte_offset != 0);
541 rc = ecryptfs_write_out_page(ctx, lower_page, lower_inode, 0,
542 (lower_byte_offset
543 + crypt_stat->extent_size));
544 if (rc) {
545 ecryptfs_printk(KERN_ERR, "Error attempting to write out "
546 "page; rc = [%d]\n", rc);
547 goto out;
548 }
549out:
550 return rc;
551}
552
553/**
554 * ecryptfs_decrypt_page
555 * @file: The ecryptfs file
556 * @page: The page in ecryptfs to decrypt
557 *
558 * Decrypt an eCryptfs page. This is done on a per-extent basis. Note
559 * that eCryptfs pages may straddle the lower pages -- for instance,
560 * if the file was created on a machine with an 8K page size
561 * (resulting in an 8K header), and then the file is copied onto a
562 * host with a 32K page size, then when reading page 0 of the eCryptfs
563 * file, 24K of page 0 of the lower file will be read and decrypted,
564 * and then 8K of page 1 of the lower file will be read and decrypted.
565 *
566 * Returns zero on success; negative on error
567 */
568int ecryptfs_decrypt_page(struct file *file, struct page *page)
569{
570 char extent_iv[ECRYPTFS_MAX_IV_BYTES];
571 unsigned long base_extent;
572 unsigned long extent_offset = 0;
573 unsigned long lower_page_idx = 0;
574 unsigned long prior_lower_page_idx = 0;
575 struct page *lower_page;
576 char *lower_page_virt = NULL;
577 struct inode *lower_inode;
578 struct ecryptfs_crypt_stat *crypt_stat;
579 int rc = 0;
580 int byte_offset;
581 int num_extents_per_page;
582 int page_state;
583
584 crypt_stat = &(ecryptfs_inode_to_private(
585 page->mapping->host)->crypt_stat);
586 lower_inode = ecryptfs_inode_to_lower(page->mapping->host);
587 if (!ECRYPTFS_CHECK_FLAG(crypt_stat->flags, ECRYPTFS_ENCRYPTED)) {
588 rc = ecryptfs_do_readpage(file, page, page->index);
589 if (rc)
590 ecryptfs_printk(KERN_ERR, "Error attempting to copy "
591 "page at index [0x%.16x]\n",
592 page->index);
593 goto out;
594 }
595 num_extents_per_page = PAGE_CACHE_SIZE / crypt_stat->extent_size;
596 base_extent = (page->index * num_extents_per_page);
597 lower_page_virt = kmem_cache_alloc(ecryptfs_lower_page_cache,
598 SLAB_KERNEL);
599 if (!lower_page_virt) {
600 rc = -ENOMEM;
601 ecryptfs_printk(KERN_ERR, "Error getting page for encrypted "
602 "lower page(s)\n");
603 goto out;
604 }
605 lower_page = virt_to_page(lower_page_virt);
606 page_state = ECRYPTFS_PAGE_STATE_UNREAD;
607 while (extent_offset < num_extents_per_page) {
608 ecryptfs_extent_to_lwr_pg_idx_and_offset(
609 &lower_page_idx, &byte_offset, crypt_stat,
610 (base_extent + extent_offset));
611 if (prior_lower_page_idx != lower_page_idx
612 || page_state == ECRYPTFS_PAGE_STATE_UNREAD) {
613 rc = ecryptfs_do_readpage(file, lower_page,
614 lower_page_idx);
615 if (rc) {
616 ecryptfs_printk(KERN_ERR, "Error reading "
617 "lower encrypted page; rc = "
618 "[%d]\n", rc);
619 goto out;
620 }
621 prior_lower_page_idx = lower_page_idx;
622 page_state = ECRYPTFS_PAGE_STATE_READ;
623 }
624 rc = ecryptfs_derive_iv(extent_iv, crypt_stat,
625 (base_extent + extent_offset));
626 if (rc) {
627 ecryptfs_printk(KERN_ERR, "Error attempting to "
628 "derive IV for extent [0x%.16x]; rc = "
629 "[%d]\n",
630 (base_extent + extent_offset), rc);
631 goto out;
632 }
633 if (unlikely(ecryptfs_verbosity > 0)) {
634 ecryptfs_printk(KERN_DEBUG, "Decrypting extent "
635 "with iv:\n");
636 ecryptfs_dump_hex(extent_iv, crypt_stat->iv_bytes);
637 ecryptfs_printk(KERN_DEBUG, "First 8 bytes before "
638 "decryption:\n");
639 ecryptfs_dump_hex((lower_page_virt + byte_offset), 8);
640 }
641 rc = ecryptfs_decrypt_page_offset(crypt_stat, page,
642 (extent_offset
643 * crypt_stat->extent_size),
644 lower_page, byte_offset,
645 crypt_stat->extent_size,
646 extent_iv);
647 if (rc != crypt_stat->extent_size) {
648 ecryptfs_printk(KERN_ERR, "Error attempting to "
649 "decrypt extent [0x%.16x]\n",
650 (base_extent + extent_offset));
651 goto out;
652 }
653 rc = 0;
654 if (unlikely(ecryptfs_verbosity > 0)) {
655 ecryptfs_printk(KERN_DEBUG, "First 8 bytes after "
656 "decryption:\n");
657 ecryptfs_dump_hex((char *)(page_address(page)
658 + byte_offset), 8);
659 }
660 extent_offset++;
661 }
662out:
663 if (lower_page_virt)
664 kmem_cache_free(ecryptfs_lower_page_cache, lower_page_virt);
665 return rc;
666}
667
668/**
669 * decrypt_scatterlist
670 *
671 * Returns the number of bytes decrypted; negative value on error
672 */
673static int decrypt_scatterlist(struct ecryptfs_crypt_stat *crypt_stat,
674 struct scatterlist *dest_sg,
675 struct scatterlist *src_sg, int size,
676 unsigned char *iv)
677{
678 int rc = 0;
679
680 /* Consider doing this once, when the file is opened */
681 mutex_lock(&crypt_stat->cs_tfm_mutex);
682 rc = crypto_cipher_setkey(crypt_stat->tfm, crypt_stat->key,
683 crypt_stat->key_size);
684 if (rc) {
685 ecryptfs_printk(KERN_ERR, "Error setting key; rc = [%d]\n",
686 rc);
687 mutex_unlock(&crypt_stat->cs_tfm_mutex);
688 rc = -EINVAL;
689 goto out;
690 }
691 ecryptfs_printk(KERN_DEBUG, "Decrypting [%d] bytes.\n", size);
692 rc = crypto_cipher_decrypt_iv(crypt_stat->tfm, dest_sg, src_sg, size,
693 iv);
694 mutex_unlock(&crypt_stat->cs_tfm_mutex);
695 if (rc) {
696 ecryptfs_printk(KERN_ERR, "Error decrypting; rc = [%d]\n",
697 rc);
698 goto out;
699 }
700 rc = size;
701out:
702 return rc;
703}
704
705/**
706 * ecryptfs_encrypt_page_offset
707 *
708 * Returns the number of bytes encrypted
709 */
710static int
711ecryptfs_encrypt_page_offset(struct ecryptfs_crypt_stat *crypt_stat,
712 struct page *dst_page, int dst_offset,
713 struct page *src_page, int src_offset, int size,
714 unsigned char *iv)
715{
716 struct scatterlist src_sg, dst_sg;
717
718 src_sg.page = src_page;
719 src_sg.offset = src_offset;
720 src_sg.length = size;
721 dst_sg.page = dst_page;
722 dst_sg.offset = dst_offset;
723 dst_sg.length = size;
724 return encrypt_scatterlist(crypt_stat, &dst_sg, &src_sg, size, iv);
725}
726
727/**
728 * ecryptfs_decrypt_page_offset
729 *
730 * Returns the number of bytes decrypted
731 */
732static int
733ecryptfs_decrypt_page_offset(struct ecryptfs_crypt_stat *crypt_stat,
734 struct page *dst_page, int dst_offset,
735 struct page *src_page, int src_offset, int size,
736 unsigned char *iv)
737{
738 struct scatterlist src_sg, dst_sg;
739
740 src_sg.page = src_page;
741 src_sg.offset = src_offset;
742 src_sg.length = size;
743 dst_sg.page = dst_page;
744 dst_sg.offset = dst_offset;
745 dst_sg.length = size;
746 return decrypt_scatterlist(crypt_stat, &dst_sg, &src_sg, size, iv);
747}
748
749#define ECRYPTFS_MAX_SCATTERLIST_LEN 4
750
751/**
752 * ecryptfs_init_crypt_ctx
753 * @crypt_stat: Uninitilized crypt stats structure
754 *
755 * Initialize the crypto context.
756 *
757 * TODO: Performance: Keep a cache of initialized cipher contexts;
758 * only init if needed
759 */
760int ecryptfs_init_crypt_ctx(struct ecryptfs_crypt_stat *crypt_stat)
761{
762 int rc = -EINVAL;
763
764 if (!crypt_stat->cipher) {
765 ecryptfs_printk(KERN_ERR, "No cipher specified\n");
766 goto out;
767 }
768 ecryptfs_printk(KERN_DEBUG,
769 "Initializing cipher [%s]; strlen = [%d]; "
770 "key_size_bits = [%d]\n",
771 crypt_stat->cipher, (int)strlen(crypt_stat->cipher),
772 crypt_stat->key_size << 3);
773 if (crypt_stat->tfm) {
774 rc = 0;
775 goto out;
776 }
777 mutex_lock(&crypt_stat->cs_tfm_mutex);
778 crypt_stat->tfm = crypto_alloc_tfm(crypt_stat->cipher,
779 ECRYPTFS_DEFAULT_CHAINING_MODE
780 | CRYPTO_TFM_REQ_WEAK_KEY);
781 mutex_unlock(&crypt_stat->cs_tfm_mutex);
782 if (!crypt_stat->tfm) {
783 ecryptfs_printk(KERN_ERR, "cryptfs: init_crypt_ctx(): "
784 "Error initializing cipher [%s]\n",
785 crypt_stat->cipher);
786 goto out;
787 }
788 rc = 0;
789out:
790 return rc;
791}
792
793static void set_extent_mask_and_shift(struct ecryptfs_crypt_stat *crypt_stat)
794{
795 int extent_size_tmp;
796
797 crypt_stat->extent_mask = 0xFFFFFFFF;
798 crypt_stat->extent_shift = 0;
799 if (crypt_stat->extent_size == 0)
800 return;
801 extent_size_tmp = crypt_stat->extent_size;
802 while ((extent_size_tmp & 0x01) == 0) {
803 extent_size_tmp >>= 1;
804 crypt_stat->extent_mask <<= 1;
805 crypt_stat->extent_shift++;
806 }
807}
808
809void ecryptfs_set_default_sizes(struct ecryptfs_crypt_stat *crypt_stat)
810{
811 /* Default values; may be overwritten as we are parsing the
812 * packets. */
813 crypt_stat->extent_size = ECRYPTFS_DEFAULT_EXTENT_SIZE;
814 set_extent_mask_and_shift(crypt_stat);
815 crypt_stat->iv_bytes = ECRYPTFS_DEFAULT_IV_BYTES;
816 if (PAGE_CACHE_SIZE <= ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE) {
817 crypt_stat->header_extent_size =
818 ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE;
819 } else
820 crypt_stat->header_extent_size = PAGE_CACHE_SIZE;
821 crypt_stat->num_header_extents_at_front = 1;
822}
823
824/**
825 * ecryptfs_compute_root_iv
826 * @crypt_stats
827 *
828 * On error, sets the root IV to all 0's.
829 */
830int ecryptfs_compute_root_iv(struct ecryptfs_crypt_stat *crypt_stat)
831{
832 int rc = 0;
833 char dst[MD5_DIGEST_SIZE];
834
835 BUG_ON(crypt_stat->iv_bytes > MD5_DIGEST_SIZE);
836 BUG_ON(crypt_stat->iv_bytes <= 0);
837 if (!ECRYPTFS_CHECK_FLAG(crypt_stat->flags, ECRYPTFS_KEY_VALID)) {
838 rc = -EINVAL;
839 ecryptfs_printk(KERN_WARNING, "Session key not valid; "
840 "cannot generate root IV\n");
841 goto out;
842 }
843 rc = ecryptfs_calculate_md5(dst, crypt_stat, crypt_stat->key,
844 crypt_stat->key_size);
845 if (rc) {
846 ecryptfs_printk(KERN_WARNING, "Error attempting to compute "
847 "MD5 while generating root IV\n");
848 goto out;
849 }
850 memcpy(crypt_stat->root_iv, dst, crypt_stat->iv_bytes);
851out:
852 if (rc) {
853 memset(crypt_stat->root_iv, 0, crypt_stat->iv_bytes);
854 ECRYPTFS_SET_FLAG(crypt_stat->flags,
855 ECRYPTFS_SECURITY_WARNING);
856 }
857 return rc;
858}
859
860static void ecryptfs_generate_new_key(struct ecryptfs_crypt_stat *crypt_stat)
861{
862 get_random_bytes(crypt_stat->key, crypt_stat->key_size);
863 ECRYPTFS_SET_FLAG(crypt_stat->flags, ECRYPTFS_KEY_VALID);
864 ecryptfs_compute_root_iv(crypt_stat);
865 if (unlikely(ecryptfs_verbosity > 0)) {
866 ecryptfs_printk(KERN_DEBUG, "Generated new session key:\n");
867 ecryptfs_dump_hex(crypt_stat->key,
868 crypt_stat->key_size);
869 }
870}
871
872/**
873 * ecryptfs_set_default_crypt_stat_vals
874 * @crypt_stat
875 *
876 * Default values in the event that policy does not override them.
877 */
878static void ecryptfs_set_default_crypt_stat_vals(
879 struct ecryptfs_crypt_stat *crypt_stat,
880 struct ecryptfs_mount_crypt_stat *mount_crypt_stat)
881{
882 ecryptfs_set_default_sizes(crypt_stat);
883 strcpy(crypt_stat->cipher, ECRYPTFS_DEFAULT_CIPHER);
884 crypt_stat->key_size = ECRYPTFS_DEFAULT_KEY_BYTES;
885 ECRYPTFS_CLEAR_FLAG(crypt_stat->flags, ECRYPTFS_KEY_VALID);
886 crypt_stat->file_version = ECRYPTFS_FILE_VERSION;
887 crypt_stat->mount_crypt_stat = mount_crypt_stat;
888}
889
890/**
891 * ecryptfs_new_file_context
892 * @ecryptfs_dentry
893 *
894 * If the crypto context for the file has not yet been established,
895 * this is where we do that. Establishing a new crypto context
896 * involves the following decisions:
897 * - What cipher to use?
898 * - What set of authentication tokens to use?
899 * Here we just worry about getting enough information into the
900 * authentication tokens so that we know that they are available.
901 * We associate the available authentication tokens with the new file
902 * via the set of signatures in the crypt_stat struct. Later, when
903 * the headers are actually written out, we may again defer to
904 * userspace to perform the encryption of the session key; for the
905 * foreseeable future, this will be the case with public key packets.
906 *
907 * Returns zero on success; non-zero otherwise
908 */
909/* Associate an authentication token(s) with the file */
910int ecryptfs_new_file_context(struct dentry *ecryptfs_dentry)
911{
912 int rc = 0;
913 struct ecryptfs_crypt_stat *crypt_stat =
914 &ecryptfs_inode_to_private(ecryptfs_dentry->d_inode)->crypt_stat;
915 struct ecryptfs_mount_crypt_stat *mount_crypt_stat =
916 &ecryptfs_superblock_to_private(
917 ecryptfs_dentry->d_sb)->mount_crypt_stat;
918 int cipher_name_len;
919
920 ecryptfs_set_default_crypt_stat_vals(crypt_stat, mount_crypt_stat);
921 /* See if there are mount crypt options */
922 if (mount_crypt_stat->global_auth_tok) {
923 ecryptfs_printk(KERN_DEBUG, "Initializing context for new "
924 "file using mount_crypt_stat\n");
925 ECRYPTFS_SET_FLAG(crypt_stat->flags, ECRYPTFS_ENCRYPTED);
926 ECRYPTFS_SET_FLAG(crypt_stat->flags, ECRYPTFS_KEY_VALID);
927 memcpy(crypt_stat->keysigs[crypt_stat->num_keysigs++],
928 mount_crypt_stat->global_auth_tok_sig,
929 ECRYPTFS_SIG_SIZE_HEX);
930 cipher_name_len =
931 strlen(mount_crypt_stat->global_default_cipher_name);
932 memcpy(crypt_stat->cipher,
933 mount_crypt_stat->global_default_cipher_name,
934 cipher_name_len);
935 crypt_stat->cipher[cipher_name_len] = '\0';
936 crypt_stat->key_size =
937 mount_crypt_stat->global_default_cipher_key_size;
938 ecryptfs_generate_new_key(crypt_stat);
939 } else
940 /* We should not encounter this scenario since we
941 * should detect lack of global_auth_tok at mount time
942 * TODO: Applies to 0.1 release only; remove in future
943 * release */
944 BUG();
945 rc = ecryptfs_init_crypt_ctx(crypt_stat);
946 if (rc)
947 ecryptfs_printk(KERN_ERR, "Error initializing cryptographic "
948 "context for cipher [%s]: rc = [%d]\n",
949 crypt_stat->cipher, rc);
950 return rc;
951}
952
953/**
954 * contains_ecryptfs_marker - check for the ecryptfs marker
955 * @data: The data block in which to check
956 *
957 * Returns one if marker found; zero if not found
958 */
959int contains_ecryptfs_marker(char *data)
960{
961 u32 m_1, m_2;
962
963 memcpy(&m_1, data, 4);
964 m_1 = be32_to_cpu(m_1);
965 memcpy(&m_2, (data + 4), 4);
966 m_2 = be32_to_cpu(m_2);
967 if ((m_1 ^ MAGIC_ECRYPTFS_MARKER) == m_2)
968 return 1;
969 ecryptfs_printk(KERN_DEBUG, "m_1 = [0x%.8x]; m_2 = [0x%.8x]; "
970 "MAGIC_ECRYPTFS_MARKER = [0x%.8x]\n", m_1, m_2,
971 MAGIC_ECRYPTFS_MARKER);
972 ecryptfs_printk(KERN_DEBUG, "(m_1 ^ MAGIC_ECRYPTFS_MARKER) = "
973 "[0x%.8x]\n", (m_1 ^ MAGIC_ECRYPTFS_MARKER));
974 return 0;
975}
976
977struct ecryptfs_flag_map_elem {
978 u32 file_flag;
979 u32 local_flag;
980};
981
982/* Add support for additional flags by adding elements here. */
983static struct ecryptfs_flag_map_elem ecryptfs_flag_map[] = {
984 {0x00000001, ECRYPTFS_ENABLE_HMAC},
985 {0x00000002, ECRYPTFS_ENCRYPTED}
986};
987
988/**
989 * ecryptfs_process_flags
990 * @crypt_stat
991 * @page_virt: Source data to be parsed
992 * @bytes_read: Updated with the number of bytes read
993 *
994 * Returns zero on success; non-zero if the flag set is invalid
995 */
996static int ecryptfs_process_flags(struct ecryptfs_crypt_stat *crypt_stat,
997 char *page_virt, int *bytes_read)
998{
999 int rc = 0;
1000 int i;
1001 u32 flags;
1002
1003 memcpy(&flags, page_virt, 4);
1004 flags = be32_to_cpu(flags);
1005 for (i = 0; i < ((sizeof(ecryptfs_flag_map)
1006 / sizeof(struct ecryptfs_flag_map_elem))); i++)
1007 if (flags & ecryptfs_flag_map[i].file_flag) {
1008 ECRYPTFS_SET_FLAG(crypt_stat->flags,
1009 ecryptfs_flag_map[i].local_flag);
1010 } else
1011 ECRYPTFS_CLEAR_FLAG(crypt_stat->flags,
1012 ecryptfs_flag_map[i].local_flag);
1013 /* Version is in top 8 bits of the 32-bit flag vector */
1014 crypt_stat->file_version = ((flags >> 24) & 0xFF);
1015 (*bytes_read) = 4;
1016 return rc;
1017}
1018
1019/**
1020 * write_ecryptfs_marker
1021 * @page_virt: The pointer to in a page to begin writing the marker
1022 * @written: Number of bytes written
1023 *
1024 * Marker = 0x3c81b7f5
1025 */
1026static void write_ecryptfs_marker(char *page_virt, size_t *written)
1027{
1028 u32 m_1, m_2;
1029
1030 get_random_bytes(&m_1, (MAGIC_ECRYPTFS_MARKER_SIZE_BYTES / 2));
1031 m_2 = (m_1 ^ MAGIC_ECRYPTFS_MARKER);
1032 m_1 = cpu_to_be32(m_1);
1033 memcpy(page_virt, &m_1, (MAGIC_ECRYPTFS_MARKER_SIZE_BYTES / 2));
1034 m_2 = cpu_to_be32(m_2);
1035 memcpy(page_virt + (MAGIC_ECRYPTFS_MARKER_SIZE_BYTES / 2), &m_2,
1036 (MAGIC_ECRYPTFS_MARKER_SIZE_BYTES / 2));
1037 (*written) = MAGIC_ECRYPTFS_MARKER_SIZE_BYTES;
1038}
1039
1040static void
1041write_ecryptfs_flags(char *page_virt, struct ecryptfs_crypt_stat *crypt_stat,
1042 size_t *written)
1043{
1044 u32 flags = 0;
1045 int i;
1046
1047 for (i = 0; i < ((sizeof(ecryptfs_flag_map)
1048 / sizeof(struct ecryptfs_flag_map_elem))); i++)
1049 if (ECRYPTFS_CHECK_FLAG(crypt_stat->flags,
1050 ecryptfs_flag_map[i].local_flag))
1051 flags |= ecryptfs_flag_map[i].file_flag;
1052 /* Version is in top 8 bits of the 32-bit flag vector */
1053 flags |= ((((u8)crypt_stat->file_version) << 24) & 0xFF000000);
1054 flags = cpu_to_be32(flags);
1055 memcpy(page_virt, &flags, 4);
1056 (*written) = 4;
1057}
1058
1059struct ecryptfs_cipher_code_str_map_elem {
1060 char cipher_str[16];
1061 u16 cipher_code;
1062};
1063
1064/* Add support for additional ciphers by adding elements here. The
1065 * cipher_code is whatever OpenPGP applicatoins use to identify the
1066 * ciphers. List in order of probability. */
1067static struct ecryptfs_cipher_code_str_map_elem
1068ecryptfs_cipher_code_str_map[] = {
1069 {"aes",RFC2440_CIPHER_AES_128 },
1070 {"blowfish", RFC2440_CIPHER_BLOWFISH},
1071 {"des3_ede", RFC2440_CIPHER_DES3_EDE},
1072 {"cast5", RFC2440_CIPHER_CAST_5},
1073 {"twofish", RFC2440_CIPHER_TWOFISH},
1074 {"cast6", RFC2440_CIPHER_CAST_6},
1075 {"aes", RFC2440_CIPHER_AES_192},
1076 {"aes", RFC2440_CIPHER_AES_256}
1077};
1078
1079/**
1080 * ecryptfs_code_for_cipher_string
1081 * @str: The string representing the cipher name
1082 *
1083 * Returns zero on no match, or the cipher code on match
1084 */
1085u16 ecryptfs_code_for_cipher_string(struct ecryptfs_crypt_stat *crypt_stat)
1086{
1087 int i;
1088 u16 code = 0;
1089 struct ecryptfs_cipher_code_str_map_elem *map =
1090 ecryptfs_cipher_code_str_map;
1091
1092 if (strcmp(crypt_stat->cipher, "aes") == 0) {
1093 switch (crypt_stat->key_size) {
1094 case 16:
1095 code = RFC2440_CIPHER_AES_128;
1096 break;
1097 case 24:
1098 code = RFC2440_CIPHER_AES_192;
1099 break;
1100 case 32:
1101 code = RFC2440_CIPHER_AES_256;
1102 }
1103 } else {
1104 for (i = 0; i < ARRAY_SIZE(ecryptfs_cipher_code_str_map); i++)
1105 if (strcmp(crypt_stat->cipher, map[i].cipher_str) == 0){
1106 code = map[i].cipher_code;
1107 break;
1108 }
1109 }
1110 return code;
1111}
1112
1113/**
1114 * ecryptfs_cipher_code_to_string
1115 * @str: Destination to write out the cipher name
1116 * @cipher_code: The code to convert to cipher name string
1117 *
1118 * Returns zero on success
1119 */
1120int ecryptfs_cipher_code_to_string(char *str, u16 cipher_code)
1121{
1122 int rc = 0;
1123 int i;
1124
1125 str[0] = '\0';
1126 for (i = 0; i < ARRAY_SIZE(ecryptfs_cipher_code_str_map); i++)
1127 if (cipher_code == ecryptfs_cipher_code_str_map[i].cipher_code)
1128 strcpy(str, ecryptfs_cipher_code_str_map[i].cipher_str);
1129 if (str[0] == '\0') {
1130 ecryptfs_printk(KERN_WARNING, "Cipher code not recognized: "
1131 "[%d]\n", cipher_code);
1132 rc = -EINVAL;
1133 }
1134 return rc;
1135}
1136
1137/**
1138 * ecryptfs_read_header_region
1139 * @data
1140 * @dentry
1141 * @nd
1142 *
1143 * Returns zero on success; non-zero otherwise
1144 */
1145int ecryptfs_read_header_region(char *data, struct dentry *dentry,
1146 struct vfsmount *mnt)
1147{
1148 struct file *file;
1149 mm_segment_t oldfs;
1150 int rc;
1151
1152 mnt = mntget(mnt);
1153 file = dentry_open(dentry, mnt, O_RDONLY);
1154 if (IS_ERR(file)) {
1155 ecryptfs_printk(KERN_DEBUG, "Error opening file to "
1156 "read header region\n");
1157 mntput(mnt);
1158 rc = PTR_ERR(file);
1159 goto out;
1160 }
1161 file->f_pos = 0;
1162 oldfs = get_fs();
1163 set_fs(get_ds());
1164 /* For releases 0.1 and 0.2, all of the header information
1165 * fits in the first data extent-sized region. */
1166 rc = file->f_op->read(file, (char __user *)data,
1167 ECRYPTFS_DEFAULT_EXTENT_SIZE, &file->f_pos);
1168 set_fs(oldfs);
1169 fput(file);
1170 rc = 0;
1171out:
1172 return rc;
1173}
1174
1175static void
1176write_header_metadata(char *virt, struct ecryptfs_crypt_stat *crypt_stat,
1177 size_t *written)
1178{
1179 u32 header_extent_size;
1180 u16 num_header_extents_at_front;
1181
1182 header_extent_size = (u32)crypt_stat->header_extent_size;
1183 num_header_extents_at_front =
1184 (u16)crypt_stat->num_header_extents_at_front;
1185 header_extent_size = cpu_to_be32(header_extent_size);
1186 memcpy(virt, &header_extent_size, 4);
1187 virt += 4;
1188 num_header_extents_at_front = cpu_to_be16(num_header_extents_at_front);
1189 memcpy(virt, &num_header_extents_at_front, 2);
1190 (*written) = 6;
1191}
1192
1193struct kmem_cache *ecryptfs_header_cache_0;
1194struct kmem_cache *ecryptfs_header_cache_1;
1195struct kmem_cache *ecryptfs_header_cache_2;
1196
1197/**
1198 * ecryptfs_write_headers_virt
1199 * @page_virt
1200 * @crypt_stat
1201 * @ecryptfs_dentry
1202 *
1203 * Format version: 1
1204 *
1205 * Header Extent:
1206 * Octets 0-7: Unencrypted file size (big-endian)
1207 * Octets 8-15: eCryptfs special marker
1208 * Octets 16-19: Flags
1209 * Octet 16: File format version number (between 0 and 255)
1210 * Octets 17-18: Reserved
1211 * Octet 19: Bit 1 (lsb): Reserved
1212 * Bit 2: Encrypted?
1213 * Bits 3-8: Reserved
1214 * Octets 20-23: Header extent size (big-endian)
1215 * Octets 24-25: Number of header extents at front of file
1216 * (big-endian)
1217 * Octet 26: Begin RFC 2440 authentication token packet set
1218 * Data Extent 0:
1219 * Lower data (CBC encrypted)
1220 * Data Extent 1:
1221 * Lower data (CBC encrypted)
1222 * ...
1223 *
1224 * Returns zero on success
1225 */
1226int ecryptfs_write_headers_virt(char *page_virt,
1227 struct ecryptfs_crypt_stat *crypt_stat,
1228 struct dentry *ecryptfs_dentry)
1229{
1230 int rc;
1231 size_t written;
1232 size_t offset;
1233
1234 offset = ECRYPTFS_FILE_SIZE_BYTES;
1235 write_ecryptfs_marker((page_virt + offset), &written);
1236 offset += written;
1237 write_ecryptfs_flags((page_virt + offset), crypt_stat, &written);
1238 offset += written;
1239 write_header_metadata((page_virt + offset), crypt_stat, &written);
1240 offset += written;
1241 rc = ecryptfs_generate_key_packet_set((page_virt + offset), crypt_stat,
1242 ecryptfs_dentry, &written,
1243 PAGE_CACHE_SIZE - offset);
1244 if (rc)
1245 ecryptfs_printk(KERN_WARNING, "Error generating key packet "
1246 "set; rc = [%d]\n", rc);
1247 return rc;
1248}
1249
1250/**
1251 * ecryptfs_write_headers
1252 * @lower_file: The lower file struct, which was returned from dentry_open
1253 *
1254 * Write the file headers out. This will likely involve a userspace
1255 * callout, in which the session key is encrypted with one or more
1256 * public keys and/or the passphrase necessary to do the encryption is
1257 * retrieved via a prompt. Exactly what happens at this point should
1258 * be policy-dependent.
1259 *
1260 * Returns zero on success; non-zero on error
1261 */
1262int ecryptfs_write_headers(struct dentry *ecryptfs_dentry,
1263 struct file *lower_file)
1264{
1265 mm_segment_t oldfs;
1266 struct ecryptfs_crypt_stat *crypt_stat;
1267 char *page_virt;
1268 int current_header_page;
1269 int header_pages;
1270 int rc = 0;
1271
1272 crypt_stat = &ecryptfs_inode_to_private(
1273 ecryptfs_dentry->d_inode)->crypt_stat;
1274 if (likely(ECRYPTFS_CHECK_FLAG(crypt_stat->flags,
1275 ECRYPTFS_ENCRYPTED))) {
1276 if (!ECRYPTFS_CHECK_FLAG(crypt_stat->flags,
1277 ECRYPTFS_KEY_VALID)) {
1278 ecryptfs_printk(KERN_DEBUG, "Key is "
1279 "invalid; bailing out\n");
1280 rc = -EINVAL;
1281 goto out;
1282 }
1283 } else {
1284 rc = -EINVAL;
1285 ecryptfs_printk(KERN_WARNING,
1286 "Called with crypt_stat->encrypted == 0\n");
1287 goto out;
1288 }
1289 /* Released in this function */
1290 page_virt = kmem_cache_alloc(ecryptfs_header_cache_0, SLAB_USER);
1291 if (!page_virt) {
1292 ecryptfs_printk(KERN_ERR, "Out of memory\n");
1293 rc = -ENOMEM;
1294 goto out;
1295 }
1296 memset(page_virt, 0, PAGE_CACHE_SIZE);
1297 rc = ecryptfs_write_headers_virt(page_virt, crypt_stat,
1298 ecryptfs_dentry);
1299 if (unlikely(rc)) {
1300 ecryptfs_printk(KERN_ERR, "Error whilst writing headers\n");
1301 memset(page_virt, 0, PAGE_CACHE_SIZE);
1302 goto out_free;
1303 }
1304 ecryptfs_printk(KERN_DEBUG,
1305 "Writing key packet set to underlying file\n");
1306 lower_file->f_pos = 0;
1307 oldfs = get_fs();
1308 set_fs(get_ds());
1309 ecryptfs_printk(KERN_DEBUG, "Calling lower_file->f_op->"
1310 "write() w/ header page; lower_file->f_pos = "
1311 "[0x%.16x]\n", lower_file->f_pos);
1312 lower_file->f_op->write(lower_file, (char __user *)page_virt,
1313 PAGE_CACHE_SIZE, &lower_file->f_pos);
1314 header_pages = ((crypt_stat->header_extent_size
1315 * crypt_stat->num_header_extents_at_front)
1316 / PAGE_CACHE_SIZE);
1317 memset(page_virt, 0, PAGE_CACHE_SIZE);
1318 current_header_page = 1;
1319 while (current_header_page < header_pages) {
1320 ecryptfs_printk(KERN_DEBUG, "Calling lower_file->f_op->"
1321 "write() w/ zero'd page; lower_file->f_pos = "
1322 "[0x%.16x]\n", lower_file->f_pos);
1323 lower_file->f_op->write(lower_file, (char __user *)page_virt,
1324 PAGE_CACHE_SIZE, &lower_file->f_pos);
1325 current_header_page++;
1326 }
1327 set_fs(oldfs);
1328 ecryptfs_printk(KERN_DEBUG,
1329 "Done writing key packet set to underlying file.\n");
1330out_free:
1331 kmem_cache_free(ecryptfs_header_cache_0, page_virt);
1332out:
1333 return rc;
1334}
1335
1336static int parse_header_metadata(struct ecryptfs_crypt_stat *crypt_stat,
1337 char *virt, int *bytes_read)
1338{
1339 int rc = 0;
1340 u32 header_extent_size;
1341 u16 num_header_extents_at_front;
1342
1343 memcpy(&header_extent_size, virt, 4);
1344 header_extent_size = be32_to_cpu(header_extent_size);
1345 virt += 4;
1346 memcpy(&num_header_extents_at_front, virt, 2);
1347 num_header_extents_at_front = be16_to_cpu(num_header_extents_at_front);
1348 crypt_stat->header_extent_size = (int)header_extent_size;
1349 crypt_stat->num_header_extents_at_front =
1350 (int)num_header_extents_at_front;
1351 (*bytes_read) = 6;
1352 if ((crypt_stat->header_extent_size
1353 * crypt_stat->num_header_extents_at_front)
1354 < ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE) {
1355 rc = -EINVAL;
1356 ecryptfs_printk(KERN_WARNING, "Invalid header extent size: "
1357 "[%d]\n", crypt_stat->header_extent_size);
1358 }
1359 return rc;
1360}
1361
1362/**
1363 * set_default_header_data
1364 *
1365 * For version 0 file format; this function is only for backwards
1366 * compatibility for files created with the prior versions of
1367 * eCryptfs.
1368 */
1369static void set_default_header_data(struct ecryptfs_crypt_stat *crypt_stat)
1370{
1371 crypt_stat->header_extent_size = 4096;
1372 crypt_stat->num_header_extents_at_front = 1;
1373}
1374
1375/**
1376 * ecryptfs_read_headers_virt
1377 *
1378 * Read/parse the header data. The header format is detailed in the
1379 * comment block for the ecryptfs_write_headers_virt() function.
1380 *
1381 * Returns zero on success
1382 */
1383static int ecryptfs_read_headers_virt(char *page_virt,
1384 struct ecryptfs_crypt_stat *crypt_stat,
1385 struct dentry *ecryptfs_dentry)
1386{
1387 int rc = 0;
1388 int offset;
1389 int bytes_read;
1390
1391 ecryptfs_set_default_sizes(crypt_stat);
1392 crypt_stat->mount_crypt_stat = &ecryptfs_superblock_to_private(
1393 ecryptfs_dentry->d_sb)->mount_crypt_stat;
1394 offset = ECRYPTFS_FILE_SIZE_BYTES;
1395 rc = contains_ecryptfs_marker(page_virt + offset);
1396 if (rc == 0) {
1397 rc = -EINVAL;
1398 goto out;
1399 }
1400 offset += MAGIC_ECRYPTFS_MARKER_SIZE_BYTES;
1401 rc = ecryptfs_process_flags(crypt_stat, (page_virt + offset),
1402 &bytes_read);
1403 if (rc) {
1404 ecryptfs_printk(KERN_WARNING, "Error processing flags\n");
1405 goto out;
1406 }
1407 if (crypt_stat->file_version > ECRYPTFS_SUPPORTED_FILE_VERSION) {
1408 ecryptfs_printk(KERN_WARNING, "File version is [%d]; only "
1409 "file version [%d] is supported by this "
1410 "version of eCryptfs\n",
1411 crypt_stat->file_version,
1412 ECRYPTFS_SUPPORTED_FILE_VERSION);
1413 rc = -EINVAL;
1414 goto out;
1415 }
1416 offset += bytes_read;
1417 if (crypt_stat->file_version >= 1) {
1418 rc = parse_header_metadata(crypt_stat, (page_virt + offset),
1419 &bytes_read);
1420 if (rc) {
1421 ecryptfs_printk(KERN_WARNING, "Error reading header "
1422 "metadata; rc = [%d]\n", rc);
1423 }
1424 offset += bytes_read;
1425 } else
1426 set_default_header_data(crypt_stat);
1427 rc = ecryptfs_parse_packet_set(crypt_stat, (page_virt + offset),
1428 ecryptfs_dentry);
1429out:
1430 return rc;
1431}
1432
1433/**
1434 * ecryptfs_read_headers
1435 *
1436 * Returns zero if valid headers found and parsed; non-zero otherwise
1437 */
1438int ecryptfs_read_headers(struct dentry *ecryptfs_dentry,
1439 struct file *lower_file)
1440{
1441 int rc = 0;
1442 char *page_virt = NULL;
1443 mm_segment_t oldfs;
1444 ssize_t bytes_read;
1445 struct ecryptfs_crypt_stat *crypt_stat =
1446 &ecryptfs_inode_to_private(ecryptfs_dentry->d_inode)->crypt_stat;
1447
1448 /* Read the first page from the underlying file */
1449 page_virt = kmem_cache_alloc(ecryptfs_header_cache_1, SLAB_USER);
1450 if (!page_virt) {
1451 rc = -ENOMEM;
1452 ecryptfs_printk(KERN_ERR, "Unable to allocate page_virt\n");
1453 goto out;
1454 }
1455 lower_file->f_pos = 0;
1456 oldfs = get_fs();
1457 set_fs(get_ds());
1458 bytes_read = lower_file->f_op->read(lower_file,
1459 (char __user *)page_virt,
1460 ECRYPTFS_DEFAULT_EXTENT_SIZE,
1461 &lower_file->f_pos);
1462 set_fs(oldfs);
1463 if (bytes_read != ECRYPTFS_DEFAULT_EXTENT_SIZE) {
1464 rc = -EINVAL;
1465 goto out;
1466 }
1467 rc = ecryptfs_read_headers_virt(page_virt, crypt_stat,
1468 ecryptfs_dentry);
1469 if (rc) {
1470 ecryptfs_printk(KERN_DEBUG, "Valid eCryptfs headers not "
1471 "found\n");
1472 rc = -EINVAL;
1473 }
1474out:
1475 if (page_virt) {
1476 memset(page_virt, 0, PAGE_CACHE_SIZE);
1477 kmem_cache_free(ecryptfs_header_cache_1, page_virt);
1478 }
1479 return rc;
1480}
1481
1482/**
1483 * ecryptfs_encode_filename - converts a plaintext file name to cipher text
1484 * @crypt_stat: The crypt_stat struct associated with the file anem to encode
1485 * @name: The plaintext name
1486 * @length: The length of the plaintext
1487 * @encoded_name: The encypted name
1488 *
1489 * Encrypts and encodes a filename into something that constitutes a
1490 * valid filename for a filesystem, with printable characters.
1491 *
1492 * We assume that we have a properly initialized crypto context,
1493 * pointed to by crypt_stat->tfm.
1494 *
1495 * TODO: Implement filename decoding and decryption here, in place of
1496 * memcpy. We are keeping the framework around for now to (1)
1497 * facilitate testing of the components needed to implement filename
1498 * encryption and (2) to provide a code base from which other
1499 * developers in the community can easily implement this feature.
1500 *
1501 * Returns the length of encoded filename; negative if error
1502 */
1503int
1504ecryptfs_encode_filename(struct ecryptfs_crypt_stat *crypt_stat,
1505 const char *name, int length, char **encoded_name)
1506{
1507 int error = 0;
1508
1509 (*encoded_name) = kmalloc(length + 2, GFP_KERNEL);
1510 if (!(*encoded_name)) {
1511 error = -ENOMEM;
1512 goto out;
1513 }
1514 /* TODO: Filename encryption is a scheduled feature for a
1515 * future version of eCryptfs. This function is here only for
1516 * the purpose of providing a framework for other developers
1517 * to easily implement filename encryption. Hint: Replace this
1518 * memcpy() with a call to encrypt and encode the
1519 * filename, the set the length accordingly. */
1520 memcpy((void *)(*encoded_name), (void *)name, length);
1521 (*encoded_name)[length] = '\0';
1522 error = length + 1;
1523out:
1524 return error;
1525}
1526
1527/**
1528 * ecryptfs_decode_filename - converts the cipher text name to plaintext
1529 * @crypt_stat: The crypt_stat struct associated with the file
1530 * @name: The filename in cipher text
1531 * @length: The length of the cipher text name
1532 * @decrypted_name: The plaintext name
1533 *
1534 * Decodes and decrypts the filename.
1535 *
1536 * We assume that we have a properly initialized crypto context,
1537 * pointed to by crypt_stat->tfm.
1538 *
1539 * TODO: Implement filename decoding and decryption here, in place of
1540 * memcpy. We are keeping the framework around for now to (1)
1541 * facilitate testing of the components needed to implement filename
1542 * encryption and (2) to provide a code base from which other
1543 * developers in the community can easily implement this feature.
1544 *
1545 * Returns the length of decoded filename; negative if error
1546 */
1547int
1548ecryptfs_decode_filename(struct ecryptfs_crypt_stat *crypt_stat,
1549 const char *name, int length, char **decrypted_name)
1550{
1551 int error = 0;
1552
1553 (*decrypted_name) = kmalloc(length + 2, GFP_KERNEL);
1554 if (!(*decrypted_name)) {
1555 error = -ENOMEM;
1556 goto out;
1557 }
1558 /* TODO: Filename encryption is a scheduled feature for a
1559 * future version of eCryptfs. This function is here only for
1560 * the purpose of providing a framework for other developers
1561 * to easily implement filename encryption. Hint: Replace this
1562 * memcpy() with a call to decode and decrypt the
1563 * filename, the set the length accordingly. */
1564 memcpy((void *)(*decrypted_name), (void *)name, length);
1565 (*decrypted_name)[length + 1] = '\0'; /* Only for convenience
1566 * in printing out the
1567 * string in debug
1568 * messages */
1569 error = length;
1570out:
1571 return error;
1572}
1573
1574/**
1575 * ecryptfs_process_cipher - Perform cipher initialization.
1576 * @tfm: Crypto context set by this function
1577 * @key_tfm: Crypto context for key material, set by this function
1578 * @cipher_name: Name of the cipher.
1579 * @key_size: Size of the key in bytes.
1580 *
1581 * Returns zero on success. Any crypto_tfm structs allocated here
1582 * should be released by other functions, such as on a superblock put
1583 * event, regardless of whether this function succeeds for fails.
1584 */
1585int
1586ecryptfs_process_cipher(struct crypto_tfm **tfm, struct crypto_tfm **key_tfm,
1587 char *cipher_name, size_t key_size)
1588{
1589 char dummy_key[ECRYPTFS_MAX_KEY_BYTES];
1590 int rc;
1591
1592 *tfm = *key_tfm = NULL;
1593 if (key_size > ECRYPTFS_MAX_KEY_BYTES) {
1594 rc = -EINVAL;
1595 printk(KERN_ERR "Requested key size is [%Zd] bytes; maximum "
1596 "allowable is [%d]\n", key_size, ECRYPTFS_MAX_KEY_BYTES);
1597 goto out;
1598 }
1599 *tfm = crypto_alloc_tfm(cipher_name, (ECRYPTFS_DEFAULT_CHAINING_MODE
1600 | CRYPTO_TFM_REQ_WEAK_KEY));
1601 if (!(*tfm)) {
1602 rc = -EINVAL;
1603 printk(KERN_ERR "Unable to allocate crypto cipher with name "
1604 "[%s]\n", cipher_name);
1605 goto out;
1606 }
1607 *key_tfm = crypto_alloc_tfm(cipher_name, CRYPTO_TFM_REQ_WEAK_KEY);
1608 if (!(*key_tfm)) {
1609 rc = -EINVAL;
1610 printk(KERN_ERR "Unable to allocate crypto cipher with name "
1611 "[%s]\n", cipher_name);
1612 goto out;
1613 }
1614 if (key_size < crypto_tfm_alg_min_keysize(*tfm)) {
1615 rc = -EINVAL;
1616 printk(KERN_ERR "Request key size is [%Zd]; minimum key size "
1617 "supported by cipher [%s] is [%d]\n", key_size,
1618 cipher_name, crypto_tfm_alg_min_keysize(*tfm));
1619 goto out;
1620 }
1621 if (key_size < crypto_tfm_alg_min_keysize(*key_tfm)) {
1622 rc = -EINVAL;
1623 printk(KERN_ERR "Request key size is [%Zd]; minimum key size "
1624 "supported by cipher [%s] is [%d]\n", key_size,
1625 cipher_name, crypto_tfm_alg_min_keysize(*key_tfm));
1626 goto out;
1627 }
1628 if (key_size > crypto_tfm_alg_max_keysize(*tfm)) {
1629 rc = -EINVAL;
1630 printk(KERN_ERR "Request key size is [%Zd]; maximum key size "
1631 "supported by cipher [%s] is [%d]\n", key_size,
1632 cipher_name, crypto_tfm_alg_min_keysize(*tfm));
1633 goto out;
1634 }
1635 if (key_size > crypto_tfm_alg_max_keysize(*key_tfm)) {
1636 rc = -EINVAL;
1637 printk(KERN_ERR "Request key size is [%Zd]; maximum key size "
1638 "supported by cipher [%s] is [%d]\n", key_size,
1639 cipher_name, crypto_tfm_alg_min_keysize(*key_tfm));
1640 goto out;
1641 }
1642 get_random_bytes(dummy_key, key_size);
1643 rc = crypto_cipher_setkey(*tfm, dummy_key, key_size);
1644 if (rc) {
1645 printk(KERN_ERR "Error attempting to set key of size [%Zd] for "
1646 "cipher [%s]; rc = [%d]\n", key_size, cipher_name, rc);
1647 rc = -EINVAL;
1648 goto out;
1649 }
1650 rc = crypto_cipher_setkey(*key_tfm, dummy_key, key_size);
1651 if (rc) {
1652 printk(KERN_ERR "Error attempting to set key of size [%Zd] for "
1653 "cipher [%s]; rc = [%d]\n", key_size, cipher_name, rc);
1654 rc = -EINVAL;
1655 goto out;
1656 }
1657out:
1658 return rc;
1659}
diff --git a/fs/ecryptfs/debug.c b/fs/ecryptfs/debug.c
new file mode 100644
index 000000000000..61f8e894284f
--- /dev/null
+++ b/fs/ecryptfs/debug.c
@@ -0,0 +1,123 @@
1/**
2 * eCryptfs: Linux filesystem encryption layer
3 * Functions only useful for debugging.
4 *
5 * Copyright (C) 2006 International Business Machines Corp.
6 * Author(s): Michael A. Halcrow <mahalcro@us.ibm.com>
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License as
10 * published by the Free Software Foundation; either version 2 of the
11 * License, or (at your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful, but
14 * WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
21 * 02111-1307, USA.
22 */
23
24#include "ecryptfs_kernel.h"
25
26/**
27 * ecryptfs_dump_auth_tok - debug function to print auth toks
28 *
29 * This function will print the contents of an ecryptfs authentication
30 * token.
31 */
32void ecryptfs_dump_auth_tok(struct ecryptfs_auth_tok *auth_tok)
33{
34 char salt[ECRYPTFS_SALT_SIZE * 2 + 1];
35 char sig[ECRYPTFS_SIG_SIZE_HEX + 1];
36
37 ecryptfs_printk(KERN_DEBUG, "Auth tok at mem loc [%p]:\n",
38 auth_tok);
39 if (ECRYPTFS_CHECK_FLAG(auth_tok->flags, ECRYPTFS_PRIVATE_KEY)) {
40 ecryptfs_printk(KERN_DEBUG, " * private key type\n");
41 ecryptfs_printk(KERN_DEBUG, " * (NO PRIVATE KEY SUPPORT "
42 "IN ECRYPTFS VERSION 0.1)\n");
43 } else {
44 ecryptfs_printk(KERN_DEBUG, " * passphrase type\n");
45 ecryptfs_to_hex(salt, auth_tok->token.password.salt,
46 ECRYPTFS_SALT_SIZE);
47 salt[ECRYPTFS_SALT_SIZE * 2] = '\0';
48 ecryptfs_printk(KERN_DEBUG, " * salt = [%s]\n", salt);
49 if (ECRYPTFS_CHECK_FLAG(auth_tok->token.password.flags,
50 ECRYPTFS_PERSISTENT_PASSWORD)) {
51 ecryptfs_printk(KERN_DEBUG, " * persistent\n");
52 }
53 memcpy(sig, auth_tok->token.password.signature,
54 ECRYPTFS_SIG_SIZE_HEX);
55 sig[ECRYPTFS_SIG_SIZE_HEX] = '\0';
56 ecryptfs_printk(KERN_DEBUG, " * signature = [%s]\n", sig);
57 }
58 ecryptfs_printk(KERN_DEBUG, " * session_key.flags = [0x%x]\n",
59 auth_tok->session_key.flags);
60 if (auth_tok->session_key.flags
61 & ECRYPTFS_USERSPACE_SHOULD_TRY_TO_DECRYPT)
62 ecryptfs_printk(KERN_DEBUG,
63 " * Userspace decrypt request set\n");
64 if (auth_tok->session_key.flags
65 & ECRYPTFS_USERSPACE_SHOULD_TRY_TO_ENCRYPT)
66 ecryptfs_printk(KERN_DEBUG,
67 " * Userspace encrypt request set\n");
68 if (auth_tok->session_key.flags & ECRYPTFS_CONTAINS_DECRYPTED_KEY) {
69 ecryptfs_printk(KERN_DEBUG, " * Contains decrypted key\n");
70 ecryptfs_printk(KERN_DEBUG,
71 " * session_key.decrypted_key_size = [0x%x]\n",
72 auth_tok->session_key.decrypted_key_size);
73 ecryptfs_printk(KERN_DEBUG, " * Decrypted session key "
74 "dump:\n");
75 if (ecryptfs_verbosity > 0)
76 ecryptfs_dump_hex(auth_tok->session_key.decrypted_key,
77 ECRYPTFS_DEFAULT_KEY_BYTES);
78 }
79 if (auth_tok->session_key.flags & ECRYPTFS_CONTAINS_ENCRYPTED_KEY) {
80 ecryptfs_printk(KERN_DEBUG, " * Contains encrypted key\n");
81 ecryptfs_printk(KERN_DEBUG,
82 " * session_key.encrypted_key_size = [0x%x]\n",
83 auth_tok->session_key.encrypted_key_size);
84 ecryptfs_printk(KERN_DEBUG, " * Encrypted session key "
85 "dump:\n");
86 if (ecryptfs_verbosity > 0)
87 ecryptfs_dump_hex(auth_tok->session_key.encrypted_key,
88 auth_tok->session_key.
89 encrypted_key_size);
90 }
91}
92
93/**
94 * ecryptfs_dump_hex - debug hex printer
95 * @data: string of bytes to be printed
96 * @bytes: number of bytes to print
97 *
98 * Dump hexadecimal representation of char array
99 */
100void ecryptfs_dump_hex(char *data, int bytes)
101{
102 int i = 0;
103 int add_newline = 1;
104
105 if (ecryptfs_verbosity < 1)
106 return;
107 if (bytes != 0) {
108 printk(KERN_DEBUG "0x%.2x.", (unsigned char)data[i]);
109 i++;
110 }
111 while (i < bytes) {
112 printk("0x%.2x.", (unsigned char)data[i]);
113 i++;
114 if (i % 16 == 0) {
115 printk("\n");
116 add_newline = 0;
117 } else
118 add_newline = 1;
119 }
120 if (add_newline)
121 printk("\n");
122}
123
diff --git a/fs/ecryptfs/dentry.c b/fs/ecryptfs/dentry.c
new file mode 100644
index 000000000000..f0d2a433242b
--- /dev/null
+++ b/fs/ecryptfs/dentry.c
@@ -0,0 +1,87 @@
1/**
2 * eCryptfs: Linux filesystem encryption layer
3 *
4 * Copyright (C) 1997-2003 Erez Zadok
5 * Copyright (C) 2001-2003 Stony Brook University
6 * Copyright (C) 2004-2006 International Business Machines Corp.
7 * Author(s): Michael A. Halcrow <mahalcro@us.ibm.com>
8 *
9 * This program is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU General Public License as
11 * published by the Free Software Foundation; either version 2 of the
12 * License, or (at your option) any later version.
13 *
14 * This program is distributed in the hope that it will be useful, but
15 * WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
22 * 02111-1307, USA.
23 */
24
25#include <linux/dcache.h>
26#include <linux/namei.h>
27#include "ecryptfs_kernel.h"
28
29/**
30 * ecryptfs_d_revalidate - revalidate an ecryptfs dentry
31 * @dentry: The ecryptfs dentry
32 * @nd: The associated nameidata
33 *
34 * Called when the VFS needs to revalidate a dentry. This
35 * is called whenever a name lookup finds a dentry in the
36 * dcache. Most filesystems leave this as NULL, because all their
37 * dentries in the dcache are valid.
38 *
39 * Returns 1 if valid, 0 otherwise.
40 *
41 */
42static int ecryptfs_d_revalidate(struct dentry *dentry, struct nameidata *nd)
43{
44 struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry);
45 struct vfsmount *lower_mnt = ecryptfs_dentry_to_lower_mnt(dentry);
46 struct dentry *dentry_save;
47 struct vfsmount *vfsmount_save;
48 int rc = 1;
49
50 if (!lower_dentry->d_op || !lower_dentry->d_op->d_revalidate)
51 goto out;
52 dentry_save = nd->dentry;
53 vfsmount_save = nd->mnt;
54 nd->dentry = lower_dentry;
55 nd->mnt = lower_mnt;
56 rc = lower_dentry->d_op->d_revalidate(lower_dentry, nd);
57 nd->dentry = dentry_save;
58 nd->mnt = vfsmount_save;
59out:
60 return rc;
61}
62
63struct kmem_cache *ecryptfs_dentry_info_cache;
64
65/**
66 * ecryptfs_d_release
67 * @dentry: The ecryptfs dentry
68 *
69 * Called when a dentry is really deallocated.
70 */
71static void ecryptfs_d_release(struct dentry *dentry)
72{
73 struct dentry *lower_dentry;
74
75 lower_dentry = ecryptfs_dentry_to_lower(dentry);
76 if (ecryptfs_dentry_to_private(dentry))
77 kmem_cache_free(ecryptfs_dentry_info_cache,
78 ecryptfs_dentry_to_private(dentry));
79 if (lower_dentry)
80 dput(lower_dentry);
81 return;
82}
83
84struct dentry_operations ecryptfs_dops = {
85 .d_revalidate = ecryptfs_d_revalidate,
86 .d_release = ecryptfs_d_release,
87};
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h
new file mode 100644
index 000000000000..872c9958531a
--- /dev/null
+++ b/fs/ecryptfs/ecryptfs_kernel.h
@@ -0,0 +1,482 @@
1/**
2 * eCryptfs: Linux filesystem encryption layer
3 * Kernel declarations.
4 *
5 * Copyright (C) 1997-2003 Erez Zadok
6 * Copyright (C) 2001-2003 Stony Brook University
7 * Copyright (C) 2004-2006 International Business Machines Corp.
8 * Author(s): Michael A. Halcrow <mahalcro@us.ibm.com>
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License as
12 * published by the Free Software Foundation; either version 2 of the
13 * License, or (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful, but
16 * WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
23 * 02111-1307, USA.
24 */
25
26#ifndef ECRYPTFS_KERNEL_H
27#define ECRYPTFS_KERNEL_H
28
29#include <keys/user-type.h>
30#include <linux/fs.h>
31#include <linux/scatterlist.h>
32
33/* Version verification for shared data structures w/ userspace */
34#define ECRYPTFS_VERSION_MAJOR 0x00
35#define ECRYPTFS_VERSION_MINOR 0x04
36#define ECRYPTFS_SUPPORTED_FILE_VERSION 0x01
37/* These flags indicate which features are supported by the kernel
38 * module; userspace tools such as the mount helper read
39 * ECRYPTFS_VERSIONING_MASK from a sysfs handle in order to determine
40 * how to behave. */
41#define ECRYPTFS_VERSIONING_PASSPHRASE 0x00000001
42#define ECRYPTFS_VERSIONING_PUBKEY 0x00000002
43#define ECRYPTFS_VERSIONING_PLAINTEXT_PASSTHROUGH 0x00000004
44#define ECRYPTFS_VERSIONING_POLICY 0x00000008
45#define ECRYPTFS_VERSIONING_MASK (ECRYPTFS_VERSIONING_PASSPHRASE \
46 | ECRYPTFS_VERSIONING_PLAINTEXT_PASSTHROUGH)
47
48#define ECRYPTFS_MAX_PASSWORD_LENGTH 64
49#define ECRYPTFS_MAX_PASSPHRASE_BYTES ECRYPTFS_MAX_PASSWORD_LENGTH
50#define ECRYPTFS_SALT_SIZE 8
51#define ECRYPTFS_SALT_SIZE_HEX (ECRYPTFS_SALT_SIZE*2)
52/* The original signature size is only for what is stored on disk; all
53 * in-memory representations are expanded hex, so it better adapted to
54 * be passed around or referenced on the command line */
55#define ECRYPTFS_SIG_SIZE 8
56#define ECRYPTFS_SIG_SIZE_HEX (ECRYPTFS_SIG_SIZE*2)
57#define ECRYPTFS_PASSWORD_SIG_SIZE ECRYPTFS_SIG_SIZE_HEX
58#define ECRYPTFS_MAX_KEY_BYTES 64
59#define ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES 512
60#define ECRYPTFS_DEFAULT_IV_BYTES 16
61#define ECRYPTFS_FILE_VERSION 0x01
62#define ECRYPTFS_DEFAULT_HEADER_EXTENT_SIZE 8192
63#define ECRYPTFS_DEFAULT_EXTENT_SIZE 4096
64#define ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE 8192
65
66#define RFC2440_CIPHER_DES3_EDE 0x02
67#define RFC2440_CIPHER_CAST_5 0x03
68#define RFC2440_CIPHER_BLOWFISH 0x04
69#define RFC2440_CIPHER_AES_128 0x07
70#define RFC2440_CIPHER_AES_192 0x08
71#define RFC2440_CIPHER_AES_256 0x09
72#define RFC2440_CIPHER_TWOFISH 0x0a
73#define RFC2440_CIPHER_CAST_6 0x0b
74
75#define ECRYPTFS_SET_FLAG(flag_bit_vector, flag) (flag_bit_vector |= (flag))
76#define ECRYPTFS_CLEAR_FLAG(flag_bit_vector, flag) (flag_bit_vector &= ~(flag))
77#define ECRYPTFS_CHECK_FLAG(flag_bit_vector, flag) (flag_bit_vector & (flag))
78
79/**
80 * For convenience, we may need to pass around the encrypted session
81 * key between kernel and userspace because the authentication token
82 * may not be extractable. For example, the TPM may not release the
83 * private key, instead requiring the encrypted data and returning the
84 * decrypted data.
85 */
86struct ecryptfs_session_key {
87#define ECRYPTFS_USERSPACE_SHOULD_TRY_TO_DECRYPT 0x00000001
88#define ECRYPTFS_USERSPACE_SHOULD_TRY_TO_ENCRYPT 0x00000002
89#define ECRYPTFS_CONTAINS_DECRYPTED_KEY 0x00000004
90#define ECRYPTFS_CONTAINS_ENCRYPTED_KEY 0x00000008
91 u32 flags;
92 u32 encrypted_key_size;
93 u32 decrypted_key_size;
94 u8 encrypted_key[ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES];
95 u8 decrypted_key[ECRYPTFS_MAX_KEY_BYTES];
96};
97
98struct ecryptfs_password {
99 u32 password_bytes;
100 s32 hash_algo;
101 u32 hash_iterations;
102 u32 session_key_encryption_key_bytes;
103#define ECRYPTFS_PERSISTENT_PASSWORD 0x01
104#define ECRYPTFS_SESSION_KEY_ENCRYPTION_KEY_SET 0x02
105 u32 flags;
106 /* Iterated-hash concatenation of salt and passphrase */
107 u8 session_key_encryption_key[ECRYPTFS_MAX_KEY_BYTES];
108 u8 signature[ECRYPTFS_PASSWORD_SIG_SIZE + 1];
109 /* Always in expanded hex */
110 u8 salt[ECRYPTFS_SALT_SIZE];
111};
112
113enum ecryptfs_token_types {ECRYPTFS_PASSWORD, ECRYPTFS_PRIVATE_KEY};
114
115/* May be a password or a private key */
116struct ecryptfs_auth_tok {
117 u16 version; /* 8-bit major and 8-bit minor */
118 u16 token_type;
119 u32 flags;
120 struct ecryptfs_session_key session_key;
121 u8 reserved[32];
122 union {
123 struct ecryptfs_password password;
124 /* Private key is in future eCryptfs releases */
125 } token;
126} __attribute__ ((packed));
127
128void ecryptfs_dump_auth_tok(struct ecryptfs_auth_tok *auth_tok);
129extern void ecryptfs_to_hex(char *dst, char *src, size_t src_size);
130extern void ecryptfs_from_hex(char *dst, char *src, int dst_size);
131
132struct ecryptfs_key_record {
133 unsigned char type;
134 size_t enc_key_size;
135 unsigned char sig[ECRYPTFS_SIG_SIZE];
136 unsigned char enc_key[ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES];
137};
138
139struct ecryptfs_auth_tok_list {
140 struct ecryptfs_auth_tok *auth_tok;
141 struct list_head list;
142};
143
144struct ecryptfs_crypt_stat;
145struct ecryptfs_mount_crypt_stat;
146
147struct ecryptfs_page_crypt_context {
148 struct page *page;
149#define ECRYPTFS_PREPARE_COMMIT_MODE 0
150#define ECRYPTFS_WRITEPAGE_MODE 1
151 unsigned int mode;
152 union {
153 struct file *lower_file;
154 struct writeback_control *wbc;
155 } param;
156};
157
158static inline struct ecryptfs_auth_tok *
159ecryptfs_get_key_payload_data(struct key *key)
160{
161 return (struct ecryptfs_auth_tok *)
162 (((struct user_key_payload*)key->payload.data)->data);
163}
164
165#define ECRYPTFS_SUPER_MAGIC 0xf15f
166#define ECRYPTFS_MAX_KEYSET_SIZE 1024
167#define ECRYPTFS_MAX_CIPHER_NAME_SIZE 32
168#define ECRYPTFS_MAX_NUM_ENC_KEYS 64
169#define ECRYPTFS_MAX_NUM_KEYSIGS 2 /* TODO: Make this a linked list */
170#define ECRYPTFS_MAX_IV_BYTES 16 /* 128 bits */
171#define ECRYPTFS_SALT_BYTES 2
172#define MAGIC_ECRYPTFS_MARKER 0x3c81b7f5
173#define MAGIC_ECRYPTFS_MARKER_SIZE_BYTES 8 /* 4*2 */
174#define ECRYPTFS_FILE_SIZE_BYTES 8
175#define ECRYPTFS_DEFAULT_CIPHER "aes"
176#define ECRYPTFS_DEFAULT_KEY_BYTES 16
177#define ECRYPTFS_DEFAULT_CHAINING_MODE CRYPTO_TFM_MODE_CBC
178#define ECRYPTFS_TAG_3_PACKET_TYPE 0x8C
179#define ECRYPTFS_TAG_11_PACKET_TYPE 0xED
180#define MD5_DIGEST_SIZE 16
181
182/**
183 * This is the primary struct associated with each encrypted file.
184 *
185 * TODO: cache align/pack?
186 */
187struct ecryptfs_crypt_stat {
188#define ECRYPTFS_STRUCT_INITIALIZED 0x00000001
189#define ECRYPTFS_POLICY_APPLIED 0x00000002
190#define ECRYPTFS_NEW_FILE 0x00000004
191#define ECRYPTFS_ENCRYPTED 0x00000008
192#define ECRYPTFS_SECURITY_WARNING 0x00000010
193#define ECRYPTFS_ENABLE_HMAC 0x00000020
194#define ECRYPTFS_ENCRYPT_IV_PAGES 0x00000040
195#define ECRYPTFS_KEY_VALID 0x00000080
196 u32 flags;
197 unsigned int file_version;
198 size_t iv_bytes;
199 size_t num_keysigs;
200 size_t header_extent_size;
201 size_t num_header_extents_at_front;
202 size_t extent_size; /* Data extent size; default is 4096 */
203 size_t key_size;
204 size_t extent_shift;
205 unsigned int extent_mask;
206 struct ecryptfs_mount_crypt_stat *mount_crypt_stat;
207 struct crypto_tfm *tfm;
208 struct crypto_tfm *md5_tfm; /* Crypto context for generating
209 * the initialization vectors */
210 unsigned char cipher[ECRYPTFS_MAX_CIPHER_NAME_SIZE];
211 unsigned char key[ECRYPTFS_MAX_KEY_BYTES];
212 unsigned char root_iv[ECRYPTFS_MAX_IV_BYTES];
213 unsigned char keysigs[ECRYPTFS_MAX_NUM_KEYSIGS][ECRYPTFS_SIG_SIZE_HEX];
214 struct mutex cs_tfm_mutex;
215 struct mutex cs_md5_tfm_mutex;
216 struct mutex cs_mutex;
217};
218
219/* inode private data. */
220struct ecryptfs_inode_info {
221 struct inode vfs_inode;
222 struct inode *wii_inode;
223 struct ecryptfs_crypt_stat crypt_stat;
224};
225
226/* dentry private data. Each dentry must keep track of a lower
227 * vfsmount too. */
228struct ecryptfs_dentry_info {
229 struct dentry *wdi_dentry;
230 struct vfsmount *lower_mnt;
231 struct ecryptfs_crypt_stat *crypt_stat;
232};
233
234/**
235 * This struct is to enable a mount-wide passphrase/salt combo. This
236 * is more or less a stopgap to provide similar functionality to other
237 * crypto filesystems like EncFS or CFS until full policy support is
238 * implemented in eCryptfs.
239 */
240struct ecryptfs_mount_crypt_stat {
241 /* Pointers to memory we do not own, do not free these */
242#define ECRYPTFS_PLAINTEXT_PASSTHROUGH_ENABLED 0x00000001
243 u32 flags;
244 struct ecryptfs_auth_tok *global_auth_tok;
245 struct key *global_auth_tok_key;
246 size_t global_default_cipher_key_size;
247 struct crypto_tfm *global_key_tfm;
248 struct mutex global_key_tfm_mutex;
249 unsigned char global_default_cipher_name[ECRYPTFS_MAX_CIPHER_NAME_SIZE
250 + 1];
251 unsigned char global_auth_tok_sig[ECRYPTFS_SIG_SIZE_HEX + 1];
252};
253
254/* superblock private data. */
255struct ecryptfs_sb_info {
256 struct super_block *wsi_sb;
257 struct ecryptfs_mount_crypt_stat mount_crypt_stat;
258};
259
260/* file private data. */
261struct ecryptfs_file_info {
262 struct file *wfi_file;
263 struct ecryptfs_crypt_stat *crypt_stat;
264};
265
266/* auth_tok <=> encrypted_session_key mappings */
267struct ecryptfs_auth_tok_list_item {
268 unsigned char encrypted_session_key[ECRYPTFS_MAX_KEY_BYTES];
269 struct list_head list;
270 struct ecryptfs_auth_tok auth_tok;
271};
272
273static inline struct ecryptfs_file_info *
274ecryptfs_file_to_private(struct file *file)
275{
276 return (struct ecryptfs_file_info *)file->private_data;
277}
278
279static inline void
280ecryptfs_set_file_private(struct file *file,
281 struct ecryptfs_file_info *file_info)
282{
283 file->private_data = file_info;
284}
285
286static inline struct file *ecryptfs_file_to_lower(struct file *file)
287{
288 return ((struct ecryptfs_file_info *)file->private_data)->wfi_file;
289}
290
291static inline void
292ecryptfs_set_file_lower(struct file *file, struct file *lower_file)
293{
294 ((struct ecryptfs_file_info *)file->private_data)->wfi_file =
295 lower_file;
296}
297
298static inline struct ecryptfs_inode_info *
299ecryptfs_inode_to_private(struct inode *inode)
300{
301 return container_of(inode, struct ecryptfs_inode_info, vfs_inode);
302}
303
304static inline struct inode *ecryptfs_inode_to_lower(struct inode *inode)
305{
306 return ecryptfs_inode_to_private(inode)->wii_inode;
307}
308
309static inline void
310ecryptfs_set_inode_lower(struct inode *inode, struct inode *lower_inode)
311{
312 ecryptfs_inode_to_private(inode)->wii_inode = lower_inode;
313}
314
315static inline struct ecryptfs_sb_info *
316ecryptfs_superblock_to_private(struct super_block *sb)
317{
318 return (struct ecryptfs_sb_info *)sb->s_fs_info;
319}
320
321static inline void
322ecryptfs_set_superblock_private(struct super_block *sb,
323 struct ecryptfs_sb_info *sb_info)
324{
325 sb->s_fs_info = sb_info;
326}
327
328static inline struct super_block *
329ecryptfs_superblock_to_lower(struct super_block *sb)
330{
331 return ((struct ecryptfs_sb_info *)sb->s_fs_info)->wsi_sb;
332}
333
334static inline void
335ecryptfs_set_superblock_lower(struct super_block *sb,
336 struct super_block *lower_sb)
337{
338 ((struct ecryptfs_sb_info *)sb->s_fs_info)->wsi_sb = lower_sb;
339}
340
341static inline struct ecryptfs_dentry_info *
342ecryptfs_dentry_to_private(struct dentry *dentry)
343{
344 return (struct ecryptfs_dentry_info *)dentry->d_fsdata;
345}
346
347static inline void
348ecryptfs_set_dentry_private(struct dentry *dentry,
349 struct ecryptfs_dentry_info *dentry_info)
350{
351 dentry->d_fsdata = dentry_info;
352}
353
354static inline struct dentry *
355ecryptfs_dentry_to_lower(struct dentry *dentry)
356{
357 return ((struct ecryptfs_dentry_info *)dentry->d_fsdata)->wdi_dentry;
358}
359
360static inline void
361ecryptfs_set_dentry_lower(struct dentry *dentry, struct dentry *lower_dentry)
362{
363 ((struct ecryptfs_dentry_info *)dentry->d_fsdata)->wdi_dentry =
364 lower_dentry;
365}
366
367static inline struct vfsmount *
368ecryptfs_dentry_to_lower_mnt(struct dentry *dentry)
369{
370 return ((struct ecryptfs_dentry_info *)dentry->d_fsdata)->lower_mnt;
371}
372
373static inline void
374ecryptfs_set_dentry_lower_mnt(struct dentry *dentry, struct vfsmount *lower_mnt)
375{
376 ((struct ecryptfs_dentry_info *)dentry->d_fsdata)->lower_mnt =
377 lower_mnt;
378}
379
380#define ecryptfs_printk(type, fmt, arg...) \
381 __ecryptfs_printk(type "%s: " fmt, __FUNCTION__, ## arg);
382void __ecryptfs_printk(const char *fmt, ...);
383
384extern const struct file_operations ecryptfs_main_fops;
385extern const struct file_operations ecryptfs_dir_fops;
386extern struct inode_operations ecryptfs_main_iops;
387extern struct inode_operations ecryptfs_dir_iops;
388extern struct inode_operations ecryptfs_symlink_iops;
389extern struct super_operations ecryptfs_sops;
390extern struct dentry_operations ecryptfs_dops;
391extern struct address_space_operations ecryptfs_aops;
392extern int ecryptfs_verbosity;
393
394extern struct kmem_cache *ecryptfs_auth_tok_list_item_cache;
395extern struct kmem_cache *ecryptfs_file_info_cache;
396extern struct kmem_cache *ecryptfs_dentry_info_cache;
397extern struct kmem_cache *ecryptfs_inode_info_cache;
398extern struct kmem_cache *ecryptfs_sb_info_cache;
399extern struct kmem_cache *ecryptfs_header_cache_0;
400extern struct kmem_cache *ecryptfs_header_cache_1;
401extern struct kmem_cache *ecryptfs_header_cache_2;
402extern struct kmem_cache *ecryptfs_lower_page_cache;
403
404int ecryptfs_interpose(struct dentry *hidden_dentry,
405 struct dentry *this_dentry, struct super_block *sb,
406 int flag);
407int ecryptfs_fill_zeros(struct file *file, loff_t new_length);
408int ecryptfs_decode_filename(struct ecryptfs_crypt_stat *crypt_stat,
409 const char *name, int length,
410 char **decrypted_name);
411int ecryptfs_encode_filename(struct ecryptfs_crypt_stat *crypt_stat,
412 const char *name, int length,
413 char **encoded_name);
414struct dentry *ecryptfs_lower_dentry(struct dentry *this_dentry);
415void ecryptfs_copy_attr_atime(struct inode *dest, const struct inode *src);
416void ecryptfs_copy_attr_all(struct inode *dest, const struct inode *src);
417void ecryptfs_copy_inode_size(struct inode *dst, const struct inode *src);
418void ecryptfs_dump_hex(char *data, int bytes);
419int virt_to_scatterlist(const void *addr, int size, struct scatterlist *sg,
420 int sg_size);
421int ecryptfs_compute_root_iv(struct ecryptfs_crypt_stat *crypt_stat);
422void ecryptfs_rotate_iv(unsigned char *iv);
423void ecryptfs_init_crypt_stat(struct ecryptfs_crypt_stat *crypt_stat);
424void ecryptfs_destruct_crypt_stat(struct ecryptfs_crypt_stat *crypt_stat);
425void ecryptfs_destruct_mount_crypt_stat(
426 struct ecryptfs_mount_crypt_stat *mount_crypt_stat);
427int ecryptfs_init_crypt_ctx(struct ecryptfs_crypt_stat *crypt_stat);
428int ecryptfs_write_inode_size_to_header(struct file *lower_file,
429 struct inode *lower_inode,
430 struct inode *inode);
431int ecryptfs_get_lower_page(struct page **lower_page, struct inode *lower_inode,
432 struct file *lower_file,
433 unsigned long lower_page_index, int byte_offset,
434 int region_bytes);
435int
436ecryptfs_commit_lower_page(struct page *lower_page, struct inode *lower_inode,
437 struct file *lower_file, int byte_offset,
438 int region_size);
439int ecryptfs_copy_page_to_lower(struct page *page, struct inode *lower_inode,
440 struct file *lower_file);
441int ecryptfs_do_readpage(struct file *file, struct page *page,
442 pgoff_t lower_page_index);
443int ecryptfs_grab_and_map_lower_page(struct page **lower_page,
444 char **lower_virt,
445 struct inode *lower_inode,
446 unsigned long lower_page_index);
447int ecryptfs_writepage_and_release_lower_page(struct page *lower_page,
448 struct inode *lower_inode,
449 struct writeback_control *wbc);
450int ecryptfs_encrypt_page(struct ecryptfs_page_crypt_context *ctx);
451int ecryptfs_decrypt_page(struct file *file, struct page *page);
452int ecryptfs_write_headers(struct dentry *ecryptfs_dentry,
453 struct file *lower_file);
454int ecryptfs_write_headers_virt(char *page_virt,
455 struct ecryptfs_crypt_stat *crypt_stat,
456 struct dentry *ecryptfs_dentry);
457int ecryptfs_read_headers(struct dentry *ecryptfs_dentry,
458 struct file *lower_file);
459int ecryptfs_new_file_context(struct dentry *ecryptfs_dentry);
460int contains_ecryptfs_marker(char *data);
461int ecryptfs_read_header_region(char *data, struct dentry *dentry,
462 struct vfsmount *mnt);
463u16 ecryptfs_code_for_cipher_string(struct ecryptfs_crypt_stat *crypt_stat);
464int ecryptfs_cipher_code_to_string(char *str, u16 cipher_code);
465void ecryptfs_set_default_sizes(struct ecryptfs_crypt_stat *crypt_stat);
466int ecryptfs_generate_key_packet_set(char *dest_base,
467 struct ecryptfs_crypt_stat *crypt_stat,
468 struct dentry *ecryptfs_dentry,
469 size_t *len, size_t max);
470int process_request_key_err(long err_code);
471int
472ecryptfs_parse_packet_set(struct ecryptfs_crypt_stat *crypt_stat,
473 unsigned char *src, struct dentry *ecryptfs_dentry);
474int ecryptfs_truncate(struct dentry *dentry, loff_t new_length);
475int
476ecryptfs_process_cipher(struct crypto_tfm **tfm, struct crypto_tfm **key_tfm,
477 char *cipher_name, size_t key_size);
478int ecryptfs_inode_test(struct inode *inode, void *candidate_lower_inode);
479int ecryptfs_inode_set(struct inode *inode, void *lower_inode);
480void ecryptfs_init_inode(struct inode *inode, struct inode *lower_inode);
481
482#endif /* #ifndef ECRYPTFS_KERNEL_H */
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
new file mode 100644
index 000000000000..c8550c9f9cd2
--- /dev/null
+++ b/fs/ecryptfs/file.c
@@ -0,0 +1,440 @@
1/**
2 * eCryptfs: Linux filesystem encryption layer
3 *
4 * Copyright (C) 1997-2004 Erez Zadok
5 * Copyright (C) 2001-2004 Stony Brook University
6 * Copyright (C) 2004-2006 International Business Machines Corp.
7 * Author(s): Michael A. Halcrow <mhalcrow@us.ibm.com>
8 * Michael C. Thompson <mcthomps@us.ibm.com>
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License as
12 * published by the Free Software Foundation; either version 2 of the
13 * License, or (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful, but
16 * WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
23 * 02111-1307, USA.
24 */
25
26#include <linux/file.h>
27#include <linux/poll.h>
28#include <linux/mount.h>
29#include <linux/pagemap.h>
30#include <linux/security.h>
31#include <linux/smp_lock.h>
32#include <linux/compat.h>
33#include "ecryptfs_kernel.h"
34
35/**
36 * ecryptfs_llseek
37 * @file: File we are seeking in
38 * @offset: The offset to seek to
39 * @origin: 2 - offset from i_size; 1 - offset from f_pos
40 *
41 * Returns the position we have seeked to, or negative on error
42 */
43static loff_t ecryptfs_llseek(struct file *file, loff_t offset, int origin)
44{
45 loff_t rv;
46 loff_t new_end_pos;
47 int rc;
48 int expanding_file = 0;
49 struct inode *inode = file->f_mapping->host;
50
51 /* If our offset is past the end of our file, we're going to
52 * need to grow it so we have a valid length of 0's */
53 new_end_pos = offset;
54 switch (origin) {
55 case 2:
56 new_end_pos += i_size_read(inode);
57 expanding_file = 1;
58 break;
59 case 1:
60 new_end_pos += file->f_pos;
61 if (new_end_pos > i_size_read(inode)) {
62 ecryptfs_printk(KERN_DEBUG, "new_end_pos(=[0x%.16x]) "
63 "> i_size_read(inode)(=[0x%.16x])\n",
64 new_end_pos, i_size_read(inode));
65 expanding_file = 1;
66 }
67 break;
68 default:
69 if (new_end_pos > i_size_read(inode)) {
70 ecryptfs_printk(KERN_DEBUG, "new_end_pos(=[0x%.16x]) "
71 "> i_size_read(inode)(=[0x%.16x])\n",
72 new_end_pos, i_size_read(inode));
73 expanding_file = 1;
74 }
75 }
76 ecryptfs_printk(KERN_DEBUG, "new_end_pos = [0x%.16x]\n", new_end_pos);
77 if (expanding_file) {
78 rc = ecryptfs_truncate(file->f_dentry, new_end_pos);
79 if (rc) {
80 rv = rc;
81 ecryptfs_printk(KERN_ERR, "Error on attempt to "
82 "truncate to (higher) offset [0x%.16x];"
83 " rc = [%d]\n", new_end_pos, rc);
84 goto out;
85 }
86 }
87 rv = generic_file_llseek(file, offset, origin);
88out:
89 return rv;
90}
91
92/**
93 * ecryptfs_read_update_atime
94 *
95 * generic_file_read updates the atime of upper layer inode. But, it
96 * doesn't give us a chance to update the atime of the lower layer
97 * inode. This function is a wrapper to generic_file_read. It
98 * updates the atime of the lower level inode if generic_file_read
99 * returns without any errors. This is to be used only for file reads.
100 * The function to be used for directory reads is ecryptfs_read.
101 */
102static ssize_t ecryptfs_read_update_atime(struct kiocb *iocb,
103 const struct iovec *iov,
104 unsigned long nr_segs, loff_t pos)
105{
106 int rc;
107 struct dentry *lower_dentry;
108 struct vfsmount *lower_vfsmount;
109 struct file *file = iocb->ki_filp;
110
111 rc = generic_file_aio_read(iocb, iov, nr_segs, pos);
112 /*
113 * Even though this is a async interface, we need to wait
114 * for IO to finish to update atime
115 */
116 if (-EIOCBQUEUED == rc)
117 rc = wait_on_sync_kiocb(iocb);
118 if (rc >= 0) {
119 lower_dentry = ecryptfs_dentry_to_lower(file->f_dentry);
120 lower_vfsmount = ecryptfs_dentry_to_lower_mnt(file->f_dentry);
121 touch_atime(lower_vfsmount, lower_dentry);
122 }
123 return rc;
124}
125
126struct ecryptfs_getdents_callback {
127 void *dirent;
128 struct dentry *dentry;
129 filldir_t filldir;
130 int err;
131 int filldir_called;
132 int entries_written;
133};
134
135/* Inspired by generic filldir in fs/readir.c */
136static int
137ecryptfs_filldir(void *dirent, const char *name, int namelen, loff_t offset,
138 u64 ino, unsigned int d_type)
139{
140 struct ecryptfs_crypt_stat *crypt_stat;
141 struct ecryptfs_getdents_callback *buf =
142 (struct ecryptfs_getdents_callback *)dirent;
143 int rc;
144 int decoded_length;
145 char *decoded_name;
146
147 crypt_stat = ecryptfs_dentry_to_private(buf->dentry)->crypt_stat;
148 buf->filldir_called++;
149 decoded_length = ecryptfs_decode_filename(crypt_stat, name, namelen,
150 &decoded_name);
151 if (decoded_length < 0) {
152 rc = decoded_length;
153 goto out;
154 }
155 rc = buf->filldir(buf->dirent, decoded_name, decoded_length, offset,
156 ino, d_type);
157 kfree(decoded_name);
158 if (rc >= 0)
159 buf->entries_written++;
160out:
161 return rc;
162}
163
164/**
165 * ecryptfs_readdir
166 * @file: The ecryptfs file struct
167 * @dirent: Directory entry
168 * @filldir: The filldir callback function
169 */
170static int ecryptfs_readdir(struct file *file, void *dirent, filldir_t filldir)
171{
172 int rc;
173 struct file *lower_file;
174 struct inode *inode;
175 struct ecryptfs_getdents_callback buf;
176
177 lower_file = ecryptfs_file_to_lower(file);
178 lower_file->f_pos = file->f_pos;
179 inode = file->f_dentry->d_inode;
180 memset(&buf, 0, sizeof(buf));
181 buf.dirent = dirent;
182 buf.dentry = file->f_dentry;
183 buf.filldir = filldir;
184retry:
185 buf.filldir_called = 0;
186 buf.entries_written = 0;
187 buf.err = 0;
188 rc = vfs_readdir(lower_file, ecryptfs_filldir, (void *)&buf);
189 if (buf.err)
190 rc = buf.err;
191 if (buf.filldir_called && !buf.entries_written)
192 goto retry;
193 file->f_pos = lower_file->f_pos;
194 if (rc >= 0)
195 ecryptfs_copy_attr_atime(inode, lower_file->f_dentry->d_inode);
196 return rc;
197}
198
199struct kmem_cache *ecryptfs_file_info_cache;
200
201/**
202 * ecryptfs_open
203 * @inode: inode speciying file to open
204 * @file: Structure to return filled in
205 *
206 * Opens the file specified by inode.
207 *
208 * Returns zero on success; non-zero otherwise
209 */
210static int ecryptfs_open(struct inode *inode, struct file *file)
211{
212 int rc = 0;
213 struct ecryptfs_crypt_stat *crypt_stat = NULL;
214 struct ecryptfs_mount_crypt_stat *mount_crypt_stat;
215 struct dentry *ecryptfs_dentry = file->f_dentry;
216 /* Private value of ecryptfs_dentry allocated in
217 * ecryptfs_lookup() */
218 struct dentry *lower_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry);
219 struct inode *lower_inode = NULL;
220 struct file *lower_file = NULL;
221 struct vfsmount *lower_mnt;
222 struct ecryptfs_file_info *file_info;
223 int lower_flags;
224
225 /* Released in ecryptfs_release or end of function if failure */
226 file_info = kmem_cache_alloc(ecryptfs_file_info_cache, SLAB_KERNEL);
227 ecryptfs_set_file_private(file, file_info);
228 if (!file_info) {
229 ecryptfs_printk(KERN_ERR,
230 "Error attempting to allocate memory\n");
231 rc = -ENOMEM;
232 goto out;
233 }
234 memset(file_info, 0, sizeof(*file_info));
235 lower_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry);
236 crypt_stat = &ecryptfs_inode_to_private(inode)->crypt_stat;
237 mount_crypt_stat = &ecryptfs_superblock_to_private(
238 ecryptfs_dentry->d_sb)->mount_crypt_stat;
239 mutex_lock(&crypt_stat->cs_mutex);
240 if (!ECRYPTFS_CHECK_FLAG(crypt_stat->flags, ECRYPTFS_POLICY_APPLIED)) {
241 ecryptfs_printk(KERN_DEBUG, "Setting flags for stat...\n");
242 /* Policy code enabled in future release */
243 ECRYPTFS_SET_FLAG(crypt_stat->flags, ECRYPTFS_POLICY_APPLIED);
244 ECRYPTFS_SET_FLAG(crypt_stat->flags, ECRYPTFS_ENCRYPTED);
245 }
246 mutex_unlock(&crypt_stat->cs_mutex);
247 /* This mntget & dget is undone via fput when the file is released */
248 dget(lower_dentry);
249 lower_flags = file->f_flags;
250 if ((lower_flags & O_ACCMODE) == O_WRONLY)
251 lower_flags = (lower_flags & O_ACCMODE) | O_RDWR;
252 if (file->f_flags & O_APPEND)
253 lower_flags &= ~O_APPEND;
254 lower_mnt = ecryptfs_dentry_to_lower_mnt(ecryptfs_dentry);
255 mntget(lower_mnt);
256 /* Corresponding fput() in ecryptfs_release() */
257 lower_file = dentry_open(lower_dentry, lower_mnt, lower_flags);
258 if (IS_ERR(lower_file)) {
259 rc = PTR_ERR(lower_file);
260 ecryptfs_printk(KERN_ERR, "Error opening lower file\n");
261 goto out_puts;
262 }
263 ecryptfs_set_file_lower(file, lower_file);
264 /* Isn't this check the same as the one in lookup? */
265 lower_inode = lower_dentry->d_inode;
266 if (S_ISDIR(ecryptfs_dentry->d_inode->i_mode)) {
267 ecryptfs_printk(KERN_DEBUG, "This is a directory\n");
268 ECRYPTFS_CLEAR_FLAG(crypt_stat->flags, ECRYPTFS_ENCRYPTED);
269 rc = 0;
270 goto out;
271 }
272 mutex_lock(&crypt_stat->cs_mutex);
273 if (i_size_read(lower_inode) < ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE) {
274 if (!(mount_crypt_stat->flags
275 & ECRYPTFS_PLAINTEXT_PASSTHROUGH_ENABLED)) {
276 rc = -EIO;
277 printk(KERN_WARNING "Attempt to read file that is "
278 "not in a valid eCryptfs format, and plaintext "
279 "passthrough mode is not enabled; returning "
280 "-EIO\n");
281 mutex_unlock(&crypt_stat->cs_mutex);
282 goto out_puts;
283 }
284 crypt_stat->flags &= ~(ECRYPTFS_ENCRYPTED);
285 rc = 0;
286 mutex_unlock(&crypt_stat->cs_mutex);
287 goto out;
288 } else if (!ECRYPTFS_CHECK_FLAG(crypt_stat->flags,
289 ECRYPTFS_POLICY_APPLIED)
290 || !ECRYPTFS_CHECK_FLAG(crypt_stat->flags,
291 ECRYPTFS_KEY_VALID)) {
292 rc = ecryptfs_read_headers(ecryptfs_dentry, lower_file);
293 if (rc) {
294 ecryptfs_printk(KERN_DEBUG,
295 "Valid headers not found\n");
296 if (!(mount_crypt_stat->flags
297 & ECRYPTFS_PLAINTEXT_PASSTHROUGH_ENABLED)) {
298 rc = -EIO;
299 printk(KERN_WARNING "Attempt to read file that "
300 "is not in a valid eCryptfs format, "
301 "and plaintext passthrough mode is not "
302 "enabled; returning -EIO\n");
303 mutex_unlock(&crypt_stat->cs_mutex);
304 goto out_puts;
305 }
306 ECRYPTFS_CLEAR_FLAG(crypt_stat->flags,
307 ECRYPTFS_ENCRYPTED);
308 rc = 0;
309 mutex_unlock(&crypt_stat->cs_mutex);
310 goto out;
311 }
312 }
313 mutex_unlock(&crypt_stat->cs_mutex);
314 ecryptfs_printk(KERN_DEBUG, "inode w/ addr = [0x%p], i_ino = [0x%.16x] "
315 "size: [0x%.16x]\n", inode, inode->i_ino,
316 i_size_read(inode));
317 ecryptfs_set_file_lower(file, lower_file);
318 goto out;
319out_puts:
320 mntput(lower_mnt);
321 dput(lower_dentry);
322 kmem_cache_free(ecryptfs_file_info_cache,
323 ecryptfs_file_to_private(file));
324out:
325 return rc;
326}
327
328static int ecryptfs_flush(struct file *file, fl_owner_t td)
329{
330 int rc = 0;
331 struct file *lower_file = NULL;
332
333 lower_file = ecryptfs_file_to_lower(file);
334 if (lower_file->f_op && lower_file->f_op->flush)
335 rc = lower_file->f_op->flush(lower_file, td);
336 return rc;
337}
338
339static int ecryptfs_release(struct inode *inode, struct file *file)
340{
341 struct file *lower_file = ecryptfs_file_to_lower(file);
342 struct ecryptfs_file_info *file_info = ecryptfs_file_to_private(file);
343 struct inode *lower_inode = ecryptfs_inode_to_lower(inode);
344
345 fput(lower_file);
346 inode->i_blocks = lower_inode->i_blocks;
347 kmem_cache_free(ecryptfs_file_info_cache, file_info);
348 return 0;
349}
350
351static int
352ecryptfs_fsync(struct file *file, struct dentry *dentry, int datasync)
353{
354 struct file *lower_file = ecryptfs_file_to_lower(file);
355 struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry);
356 struct inode *lower_inode = lower_dentry->d_inode;
357 int rc = -EINVAL;
358
359 if (lower_inode->i_fop->fsync) {
360 mutex_lock(&lower_inode->i_mutex);
361 rc = lower_inode->i_fop->fsync(lower_file, lower_dentry,
362 datasync);
363 mutex_unlock(&lower_inode->i_mutex);
364 }
365 return rc;
366}
367
368static int ecryptfs_fasync(int fd, struct file *file, int flag)
369{
370 int rc = 0;
371 struct file *lower_file = NULL;
372
373 lower_file = ecryptfs_file_to_lower(file);
374 if (lower_file->f_op && lower_file->f_op->fasync)
375 rc = lower_file->f_op->fasync(fd, lower_file, flag);
376 return rc;
377}
378
379static ssize_t ecryptfs_sendfile(struct file *file, loff_t * ppos,
380 size_t count, read_actor_t actor, void *target)
381{
382 struct file *lower_file = NULL;
383 int rc = -EINVAL;
384
385 lower_file = ecryptfs_file_to_lower(file);
386 if (lower_file->f_op && lower_file->f_op->sendfile)
387 rc = lower_file->f_op->sendfile(lower_file, ppos, count,
388 actor, target);
389
390 return rc;
391}
392
393static int ecryptfs_ioctl(struct inode *inode, struct file *file,
394 unsigned int cmd, unsigned long arg);
395
396const struct file_operations ecryptfs_dir_fops = {
397 .readdir = ecryptfs_readdir,
398 .ioctl = ecryptfs_ioctl,
399 .mmap = generic_file_mmap,
400 .open = ecryptfs_open,
401 .flush = ecryptfs_flush,
402 .release = ecryptfs_release,
403 .fsync = ecryptfs_fsync,
404 .fasync = ecryptfs_fasync,
405 .sendfile = ecryptfs_sendfile,
406};
407
408const struct file_operations ecryptfs_main_fops = {
409 .llseek = ecryptfs_llseek,
410 .read = do_sync_read,
411 .aio_read = ecryptfs_read_update_atime,
412 .write = do_sync_write,
413 .aio_write = generic_file_aio_write,
414 .readdir = ecryptfs_readdir,
415 .ioctl = ecryptfs_ioctl,
416 .mmap = generic_file_mmap,
417 .open = ecryptfs_open,
418 .flush = ecryptfs_flush,
419 .release = ecryptfs_release,
420 .fsync = ecryptfs_fsync,
421 .fasync = ecryptfs_fasync,
422 .sendfile = ecryptfs_sendfile,
423};
424
425static int
426ecryptfs_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
427 unsigned long arg)
428{
429 int rc = 0;
430 struct file *lower_file = NULL;
431
432 if (ecryptfs_file_to_private(file))
433 lower_file = ecryptfs_file_to_lower(file);
434 if (lower_file && lower_file->f_op && lower_file->f_op->ioctl)
435 rc = lower_file->f_op->ioctl(ecryptfs_inode_to_lower(inode),
436 lower_file, cmd, arg);
437 else
438 rc = -ENOTTY;
439 return rc;
440}
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
new file mode 100644
index 000000000000..efdd2b7b62d7
--- /dev/null
+++ b/fs/ecryptfs/inode.c
@@ -0,0 +1,1079 @@
1/**
2 * eCryptfs: Linux filesystem encryption layer
3 *
4 * Copyright (C) 1997-2004 Erez Zadok
5 * Copyright (C) 2001-2004 Stony Brook University
6 * Copyright (C) 2004-2006 International Business Machines Corp.
7 * Author(s): Michael A. Halcrow <mahalcro@us.ibm.com>
8 * Michael C. Thompsion <mcthomps@us.ibm.com>
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License as
12 * published by the Free Software Foundation; either version 2 of the
13 * License, or (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful, but
16 * WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
23 * 02111-1307, USA.
24 */
25
26#include <linux/file.h>
27#include <linux/vmalloc.h>
28#include <linux/pagemap.h>
29#include <linux/dcache.h>
30#include <linux/namei.h>
31#include <linux/mount.h>
32#include <linux/crypto.h>
33#include "ecryptfs_kernel.h"
34
35static struct dentry *lock_parent(struct dentry *dentry)
36{
37 struct dentry *dir;
38
39 dir = dget(dentry->d_parent);
40 mutex_lock(&(dir->d_inode->i_mutex));
41 return dir;
42}
43
44static void unlock_parent(struct dentry *dentry)
45{
46 mutex_unlock(&(dentry->d_parent->d_inode->i_mutex));
47 dput(dentry->d_parent);
48}
49
50static void unlock_dir(struct dentry *dir)
51{
52 mutex_unlock(&dir->d_inode->i_mutex);
53 dput(dir);
54}
55
56void ecryptfs_copy_inode_size(struct inode *dst, const struct inode *src)
57{
58 i_size_write(dst, i_size_read((struct inode *)src));
59 dst->i_blocks = src->i_blocks;
60}
61
62void ecryptfs_copy_attr_atime(struct inode *dest, const struct inode *src)
63{
64 dest->i_atime = src->i_atime;
65}
66
67static void ecryptfs_copy_attr_times(struct inode *dest,
68 const struct inode *src)
69{
70 dest->i_atime = src->i_atime;
71 dest->i_mtime = src->i_mtime;
72 dest->i_ctime = src->i_ctime;
73}
74
75static void ecryptfs_copy_attr_timesizes(struct inode *dest,
76 const struct inode *src)
77{
78 dest->i_atime = src->i_atime;
79 dest->i_mtime = src->i_mtime;
80 dest->i_ctime = src->i_ctime;
81 ecryptfs_copy_inode_size(dest, src);
82}
83
84void ecryptfs_copy_attr_all(struct inode *dest, const struct inode *src)
85{
86 dest->i_mode = src->i_mode;
87 dest->i_nlink = src->i_nlink;
88 dest->i_uid = src->i_uid;
89 dest->i_gid = src->i_gid;
90 dest->i_rdev = src->i_rdev;
91 dest->i_atime = src->i_atime;
92 dest->i_mtime = src->i_mtime;
93 dest->i_ctime = src->i_ctime;
94 dest->i_blkbits = src->i_blkbits;
95 dest->i_flags = src->i_flags;
96}
97
98/**
99 * ecryptfs_create_underlying_file
100 * @lower_dir_inode: inode of the parent in the lower fs of the new file
101 * @lower_dentry: New file's dentry in the lower fs
102 * @ecryptfs_dentry: New file's dentry in ecryptfs
103 * @mode: The mode of the new file
104 * @nd: nameidata of ecryptfs' parent's dentry & vfsmount
105 *
106 * Creates the file in the lower file system.
107 *
108 * Returns zero on success; non-zero on error condition
109 */
110static int
111ecryptfs_create_underlying_file(struct inode *lower_dir_inode,
112 struct dentry *dentry, int mode,
113 struct nameidata *nd)
114{
115 struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry);
116 struct vfsmount *lower_mnt = ecryptfs_dentry_to_lower_mnt(dentry);
117 struct dentry *dentry_save;
118 struct vfsmount *vfsmount_save;
119 int rc;
120
121 dentry_save = nd->dentry;
122 vfsmount_save = nd->mnt;
123 nd->dentry = lower_dentry;
124 nd->mnt = lower_mnt;
125 rc = vfs_create(lower_dir_inode, lower_dentry, mode, nd);
126 nd->dentry = dentry_save;
127 nd->mnt = vfsmount_save;
128 return rc;
129}
130
131/**
132 * ecryptfs_do_create
133 * @directory_inode: inode of the new file's dentry's parent in ecryptfs
134 * @ecryptfs_dentry: New file's dentry in ecryptfs
135 * @mode: The mode of the new file
136 * @nd: nameidata of ecryptfs' parent's dentry & vfsmount
137 *
138 * Creates the underlying file and the eCryptfs inode which will link to
139 * it. It will also update the eCryptfs directory inode to mimic the
140 * stat of the lower directory inode.
141 *
142 * Returns zero on success; non-zero on error condition
143 */
144static int
145ecryptfs_do_create(struct inode *directory_inode,
146 struct dentry *ecryptfs_dentry, int mode,
147 struct nameidata *nd)
148{
149 int rc;
150 struct dentry *lower_dentry;
151 struct dentry *lower_dir_dentry;
152
153 lower_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry);
154 lower_dir_dentry = lock_parent(lower_dentry);
155 if (unlikely(IS_ERR(lower_dir_dentry))) {
156 ecryptfs_printk(KERN_ERR, "Error locking directory of "
157 "dentry\n");
158 rc = PTR_ERR(lower_dir_dentry);
159 goto out;
160 }
161 rc = ecryptfs_create_underlying_file(lower_dir_dentry->d_inode,
162 ecryptfs_dentry, mode, nd);
163 if (unlikely(rc)) {
164 ecryptfs_printk(KERN_ERR,
165 "Failure to create underlying file\n");
166 goto out_lock;
167 }
168 rc = ecryptfs_interpose(lower_dentry, ecryptfs_dentry,
169 directory_inode->i_sb, 0);
170 if (rc) {
171 ecryptfs_printk(KERN_ERR, "Failure in ecryptfs_interpose\n");
172 goto out_lock;
173 }
174 ecryptfs_copy_attr_timesizes(directory_inode,
175 lower_dir_dentry->d_inode);
176out_lock:
177 unlock_dir(lower_dir_dentry);
178out:
179 return rc;
180}
181
182/**
183 * grow_file
184 * @ecryptfs_dentry: the ecryptfs dentry
185 * @lower_file: The lower file
186 * @inode: The ecryptfs inode
187 * @lower_inode: The lower inode
188 *
189 * This is the code which will grow the file to its correct size.
190 */
191static int grow_file(struct dentry *ecryptfs_dentry, struct file *lower_file,
192 struct inode *inode, struct inode *lower_inode)
193{
194 int rc = 0;
195 struct file fake_file;
196 struct ecryptfs_file_info tmp_file_info;
197
198 memset(&fake_file, 0, sizeof(fake_file));
199 fake_file.f_dentry = ecryptfs_dentry;
200 memset(&tmp_file_info, 0, sizeof(tmp_file_info));
201 ecryptfs_set_file_private(&fake_file, &tmp_file_info);
202 ecryptfs_set_file_lower(&fake_file, lower_file);
203 rc = ecryptfs_fill_zeros(&fake_file, 1);
204 if (rc) {
205 ECRYPTFS_SET_FLAG(
206 ecryptfs_inode_to_private(inode)->crypt_stat.flags,
207 ECRYPTFS_SECURITY_WARNING);
208 ecryptfs_printk(KERN_WARNING, "Error attempting to fill zeros "
209 "in file; rc = [%d]\n", rc);
210 goto out;
211 }
212 i_size_write(inode, 0);
213 ecryptfs_write_inode_size_to_header(lower_file, lower_inode, inode);
214 ECRYPTFS_SET_FLAG(ecryptfs_inode_to_private(inode)->crypt_stat.flags,
215 ECRYPTFS_NEW_FILE);
216out:
217 return rc;
218}
219
220/**
221 * ecryptfs_initialize_file
222 *
223 * Cause the file to be changed from a basic empty file to an ecryptfs
224 * file with a header and first data page.
225 *
226 * Returns zero on success
227 */
228static int ecryptfs_initialize_file(struct dentry *ecryptfs_dentry)
229{
230 int rc = 0;
231 int lower_flags;
232 struct ecryptfs_crypt_stat *crypt_stat;
233 struct dentry *lower_dentry;
234 struct dentry *tlower_dentry = NULL;
235 struct file *lower_file;
236 struct inode *inode, *lower_inode;
237 struct vfsmount *lower_mnt;
238
239 lower_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry);
240 ecryptfs_printk(KERN_DEBUG, "lower_dentry->d_name.name = [%s]\n",
241 lower_dentry->d_name.name);
242 inode = ecryptfs_dentry->d_inode;
243 crypt_stat = &ecryptfs_inode_to_private(inode)->crypt_stat;
244 tlower_dentry = dget(lower_dentry);
245 if (!tlower_dentry) {
246 rc = -ENOMEM;
247 ecryptfs_printk(KERN_ERR, "Error dget'ing lower_dentry\n");
248 goto out;
249 }
250 lower_flags = ((O_CREAT | O_WRONLY | O_TRUNC) & O_ACCMODE) | O_RDWR;
251#if BITS_PER_LONG != 32
252 lower_flags |= O_LARGEFILE;
253#endif
254 lower_mnt = ecryptfs_dentry_to_lower_mnt(ecryptfs_dentry);
255 mntget(lower_mnt);
256 /* Corresponding fput() at end of this function */
257 lower_file = dentry_open(tlower_dentry, lower_mnt, lower_flags);
258 if (IS_ERR(lower_file)) {
259 rc = PTR_ERR(lower_file);
260 ecryptfs_printk(KERN_ERR,
261 "Error opening dentry; rc = [%i]\n", rc);
262 goto out;
263 }
264 /* fput(lower_file) should handle the puts if we do this */
265 lower_file->f_dentry = tlower_dentry;
266 lower_file->f_vfsmnt = lower_mnt;
267 lower_inode = tlower_dentry->d_inode;
268 if (S_ISDIR(ecryptfs_dentry->d_inode->i_mode)) {
269 ecryptfs_printk(KERN_DEBUG, "This is a directory\n");
270 ECRYPTFS_CLEAR_FLAG(crypt_stat->flags, ECRYPTFS_ENCRYPTED);
271 goto out_fput;
272 }
273 ECRYPTFS_SET_FLAG(crypt_stat->flags, ECRYPTFS_NEW_FILE);
274 ecryptfs_printk(KERN_DEBUG, "Initializing crypto context\n");
275 rc = ecryptfs_new_file_context(ecryptfs_dentry);
276 if (rc) {
277 ecryptfs_printk(KERN_DEBUG, "Error creating new file "
278 "context\n");
279 goto out_fput;
280 }
281 rc = ecryptfs_write_headers(ecryptfs_dentry, lower_file);
282 if (rc) {
283 ecryptfs_printk(KERN_DEBUG, "Error writing headers\n");
284 goto out_fput;
285 }
286 rc = grow_file(ecryptfs_dentry, lower_file, inode, lower_inode);
287out_fput:
288 fput(lower_file);
289out:
290 return rc;
291}
292
293/**
294 * ecryptfs_create
295 * @dir: The inode of the directory in which to create the file.
296 * @dentry: The eCryptfs dentry
297 * @mode: The mode of the new file.
298 * @nd: nameidata
299 *
300 * Creates a new file.
301 *
302 * Returns zero on success; non-zero on error condition
303 */
304static int
305ecryptfs_create(struct inode *directory_inode, struct dentry *ecryptfs_dentry,
306 int mode, struct nameidata *nd)
307{
308 int rc;
309
310 rc = ecryptfs_do_create(directory_inode, ecryptfs_dentry, mode, nd);
311 if (unlikely(rc)) {
312 ecryptfs_printk(KERN_WARNING, "Failed to create file in"
313 "lower filesystem\n");
314 goto out;
315 }
316 /* At this point, a file exists on "disk"; we need to make sure
317 * that this on disk file is prepared to be an ecryptfs file */
318 rc = ecryptfs_initialize_file(ecryptfs_dentry);
319out:
320 return rc;
321}
322
323/**
324 * ecryptfs_lookup
325 * @dir: inode
326 * @dentry: The dentry
327 * @nd: nameidata, may be NULL
328 *
329 * Find a file on disk. If the file does not exist, then we'll add it to the
330 * dentry cache and continue on to read it from the disk.
331 */
332static struct dentry *ecryptfs_lookup(struct inode *dir, struct dentry *dentry,
333 struct nameidata *nd)
334{
335 int rc = 0;
336 struct dentry *lower_dir_dentry;
337 struct dentry *lower_dentry;
338 struct vfsmount *lower_mnt;
339 struct dentry *tlower_dentry = NULL;
340 char *encoded_name;
341 unsigned int encoded_namelen;
342 struct ecryptfs_crypt_stat *crypt_stat = NULL;
343 char *page_virt = NULL;
344 struct inode *lower_inode;
345 u64 file_size;
346
347 lower_dir_dentry = ecryptfs_dentry_to_lower(dentry->d_parent);
348 dentry->d_op = &ecryptfs_dops;
349 if ((dentry->d_name.len == 1 && !strcmp(dentry->d_name.name, "."))
350 || (dentry->d_name.len == 2 && !strcmp(dentry->d_name.name, "..")))
351 goto out_drop;
352 encoded_namelen = ecryptfs_encode_filename(crypt_stat,
353 dentry->d_name.name,
354 dentry->d_name.len,
355 &encoded_name);
356 if (encoded_namelen < 0) {
357 rc = encoded_namelen;
358 goto out_drop;
359 }
360 ecryptfs_printk(KERN_DEBUG, "encoded_name = [%s]; encoded_namelen "
361 "= [%d]\n", encoded_name, encoded_namelen);
362 lower_dentry = lookup_one_len(encoded_name, lower_dir_dentry,
363 encoded_namelen - 1);
364 kfree(encoded_name);
365 lower_mnt = mntget(ecryptfs_dentry_to_lower_mnt(dentry->d_parent));
366 if (IS_ERR(lower_dentry)) {
367 ecryptfs_printk(KERN_ERR, "ERR from lower_dentry\n");
368 rc = PTR_ERR(lower_dentry);
369 goto out_drop;
370 }
371 ecryptfs_printk(KERN_DEBUG, "lower_dentry = [%p]; lower_dentry->"
372 "d_name.name = [%s]\n", lower_dentry,
373 lower_dentry->d_name.name);
374 lower_inode = lower_dentry->d_inode;
375 ecryptfs_copy_attr_atime(dir, lower_dir_dentry->d_inode);
376 BUG_ON(!atomic_read(&lower_dentry->d_count));
377 ecryptfs_set_dentry_private(dentry,
378 kmem_cache_alloc(ecryptfs_dentry_info_cache,
379 SLAB_KERNEL));
380 if (!ecryptfs_dentry_to_private(dentry)) {
381 rc = -ENOMEM;
382 ecryptfs_printk(KERN_ERR, "Out of memory whilst attempting "
383 "to allocate ecryptfs_dentry_info struct\n");
384 goto out_dput;
385 }
386 ecryptfs_set_dentry_lower(dentry, lower_dentry);
387 ecryptfs_set_dentry_lower_mnt(dentry, lower_mnt);
388 if (!lower_dentry->d_inode) {
389 /* We want to add because we couldn't find in lower */
390 d_add(dentry, NULL);
391 goto out;
392 }
393 rc = ecryptfs_interpose(lower_dentry, dentry, dir->i_sb, 1);
394 if (rc) {
395 ecryptfs_printk(KERN_ERR, "Error interposing\n");
396 goto out_dput;
397 }
398 if (S_ISDIR(lower_inode->i_mode)) {
399 ecryptfs_printk(KERN_DEBUG, "Is a directory; returning\n");
400 goto out;
401 }
402 if (S_ISLNK(lower_inode->i_mode)) {
403 ecryptfs_printk(KERN_DEBUG, "Is a symlink; returning\n");
404 goto out;
405 }
406 if (!nd) {
407 ecryptfs_printk(KERN_DEBUG, "We have a NULL nd, just leave"
408 "as we *think* we are about to unlink\n");
409 goto out;
410 }
411 tlower_dentry = dget(lower_dentry);
412 if (!tlower_dentry || IS_ERR(tlower_dentry)) {
413 rc = -ENOMEM;
414 ecryptfs_printk(KERN_ERR, "Cannot dget lower_dentry\n");
415 goto out_dput;
416 }
417 /* Released in this function */
418 page_virt =
419 (char *)kmem_cache_alloc(ecryptfs_header_cache_2,
420 SLAB_USER);
421 if (!page_virt) {
422 rc = -ENOMEM;
423 ecryptfs_printk(KERN_ERR,
424 "Cannot ecryptfs_kmalloc a page\n");
425 goto out_dput;
426 }
427 memset(page_virt, 0, PAGE_CACHE_SIZE);
428 rc = ecryptfs_read_header_region(page_virt, tlower_dentry, nd->mnt);
429 crypt_stat = &ecryptfs_inode_to_private(dentry->d_inode)->crypt_stat;
430 if (!ECRYPTFS_CHECK_FLAG(crypt_stat->flags, ECRYPTFS_POLICY_APPLIED))
431 ecryptfs_set_default_sizes(crypt_stat);
432 if (rc) {
433 rc = 0;
434 ecryptfs_printk(KERN_WARNING, "Error reading header region;"
435 " assuming unencrypted\n");
436 } else {
437 if (!contains_ecryptfs_marker(page_virt
438 + ECRYPTFS_FILE_SIZE_BYTES)) {
439 kmem_cache_free(ecryptfs_header_cache_2, page_virt);
440 goto out;
441 }
442 memcpy(&file_size, page_virt, sizeof(file_size));
443 file_size = be64_to_cpu(file_size);
444 i_size_write(dentry->d_inode, (loff_t)file_size);
445 }
446 kmem_cache_free(ecryptfs_header_cache_2, page_virt);
447 goto out;
448
449out_dput:
450 dput(lower_dentry);
451 if (tlower_dentry)
452 dput(tlower_dentry);
453out_drop:
454 d_drop(dentry);
455out:
456 return ERR_PTR(rc);
457}
458
459static int ecryptfs_link(struct dentry *old_dentry, struct inode *dir,
460 struct dentry *new_dentry)
461{
462 struct dentry *lower_old_dentry;
463 struct dentry *lower_new_dentry;
464 struct dentry *lower_dir_dentry;
465 u64 file_size_save;
466 int rc;
467
468 file_size_save = i_size_read(old_dentry->d_inode);
469 lower_old_dentry = ecryptfs_dentry_to_lower(old_dentry);
470 lower_new_dentry = ecryptfs_dentry_to_lower(new_dentry);
471 dget(lower_old_dentry);
472 dget(lower_new_dentry);
473 lower_dir_dentry = lock_parent(lower_new_dentry);
474 rc = vfs_link(lower_old_dentry, lower_dir_dentry->d_inode,
475 lower_new_dentry);
476 if (rc || !lower_new_dentry->d_inode)
477 goto out_lock;
478 rc = ecryptfs_interpose(lower_new_dentry, new_dentry, dir->i_sb, 0);
479 if (rc)
480 goto out_lock;
481 ecryptfs_copy_attr_timesizes(dir, lower_new_dentry->d_inode);
482 old_dentry->d_inode->i_nlink =
483 ecryptfs_inode_to_lower(old_dentry->d_inode)->i_nlink;
484 i_size_write(new_dentry->d_inode, file_size_save);
485out_lock:
486 unlock_dir(lower_dir_dentry);
487 dput(lower_new_dentry);
488 dput(lower_old_dentry);
489 if (!new_dentry->d_inode)
490 d_drop(new_dentry);
491 return rc;
492}
493
494static int ecryptfs_unlink(struct inode *dir, struct dentry *dentry)
495{
496 int rc = 0;
497 struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry);
498 struct inode *lower_dir_inode = ecryptfs_inode_to_lower(dir);
499
500 lock_parent(lower_dentry);
501 rc = vfs_unlink(lower_dir_inode, lower_dentry);
502 if (rc) {
503 ecryptfs_printk(KERN_ERR, "Error in vfs_unlink\n");
504 goto out_unlock;
505 }
506 ecryptfs_copy_attr_times(dir, lower_dir_inode);
507 dentry->d_inode->i_nlink =
508 ecryptfs_inode_to_lower(dentry->d_inode)->i_nlink;
509 dentry->d_inode->i_ctime = dir->i_ctime;
510out_unlock:
511 unlock_parent(lower_dentry);
512 return rc;
513}
514
515static int ecryptfs_symlink(struct inode *dir, struct dentry *dentry,
516 const char *symname)
517{
518 int rc;
519 struct dentry *lower_dentry;
520 struct dentry *lower_dir_dentry;
521 umode_t mode;
522 char *encoded_symname;
523 unsigned int encoded_symlen;
524 struct ecryptfs_crypt_stat *crypt_stat = NULL;
525
526 lower_dentry = ecryptfs_dentry_to_lower(dentry);
527 dget(lower_dentry);
528 lower_dir_dentry = lock_parent(lower_dentry);
529 mode = S_IALLUGO;
530 encoded_symlen = ecryptfs_encode_filename(crypt_stat, symname,
531 strlen(symname),
532 &encoded_symname);
533 if (encoded_symlen < 0) {
534 rc = encoded_symlen;
535 goto out_lock;
536 }
537 rc = vfs_symlink(lower_dir_dentry->d_inode, lower_dentry,
538 encoded_symname, mode);
539 kfree(encoded_symname);
540 if (rc || !lower_dentry->d_inode)
541 goto out_lock;
542 rc = ecryptfs_interpose(lower_dentry, dentry, dir->i_sb, 0);
543 if (rc)
544 goto out_lock;
545 ecryptfs_copy_attr_timesizes(dir, lower_dir_dentry->d_inode);
546out_lock:
547 unlock_dir(lower_dir_dentry);
548 dput(lower_dentry);
549 if (!dentry->d_inode)
550 d_drop(dentry);
551 return rc;
552}
553
554static int ecryptfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
555{
556 int rc;
557 struct dentry *lower_dentry;
558 struct dentry *lower_dir_dentry;
559
560 lower_dentry = ecryptfs_dentry_to_lower(dentry);
561 lower_dir_dentry = lock_parent(lower_dentry);
562 rc = vfs_mkdir(lower_dir_dentry->d_inode, lower_dentry, mode);
563 if (rc || !lower_dentry->d_inode)
564 goto out;
565 rc = ecryptfs_interpose(lower_dentry, dentry, dir->i_sb, 0);
566 if (rc)
567 goto out;
568 ecryptfs_copy_attr_timesizes(dir, lower_dir_dentry->d_inode);
569 dir->i_nlink = lower_dir_dentry->d_inode->i_nlink;
570out:
571 unlock_dir(lower_dir_dentry);
572 if (!dentry->d_inode)
573 d_drop(dentry);
574 return rc;
575}
576
577static int ecryptfs_rmdir(struct inode *dir, struct dentry *dentry)
578{
579 int rc = 0;
580 struct dentry *tdentry = NULL;
581 struct dentry *lower_dentry;
582 struct dentry *tlower_dentry = NULL;
583 struct dentry *lower_dir_dentry;
584
585 lower_dentry = ecryptfs_dentry_to_lower(dentry);
586 if (!(tdentry = dget(dentry))) {
587 rc = -EINVAL;
588 ecryptfs_printk(KERN_ERR, "Error dget'ing dentry [%p]\n",
589 dentry);
590 goto out;
591 }
592 lower_dir_dentry = lock_parent(lower_dentry);
593 if (!(tlower_dentry = dget(lower_dentry))) {
594 rc = -EINVAL;
595 ecryptfs_printk(KERN_ERR, "Error dget'ing lower_dentry "
596 "[%p]\n", lower_dentry);
597 goto out;
598 }
599 rc = vfs_rmdir(lower_dir_dentry->d_inode, lower_dentry);
600 if (!rc) {
601 d_delete(tlower_dentry);
602 tlower_dentry = NULL;
603 }
604 ecryptfs_copy_attr_times(dir, lower_dir_dentry->d_inode);
605 dir->i_nlink = lower_dir_dentry->d_inode->i_nlink;
606 unlock_dir(lower_dir_dentry);
607 if (!rc)
608 d_drop(dentry);
609out:
610 if (tdentry)
611 dput(tdentry);
612 if (tlower_dentry)
613 dput(tlower_dentry);
614 return rc;
615}
616
617static int
618ecryptfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
619{
620 int rc;
621 struct dentry *lower_dentry;
622 struct dentry *lower_dir_dentry;
623
624 lower_dentry = ecryptfs_dentry_to_lower(dentry);
625 lower_dir_dentry = lock_parent(lower_dentry);
626 rc = vfs_mknod(lower_dir_dentry->d_inode, lower_dentry, mode, dev);
627 if (rc || !lower_dentry->d_inode)
628 goto out;
629 rc = ecryptfs_interpose(lower_dentry, dentry, dir->i_sb, 0);
630 if (rc)
631 goto out;
632 ecryptfs_copy_attr_timesizes(dir, lower_dir_dentry->d_inode);
633out:
634 unlock_dir(lower_dir_dentry);
635 if (!dentry->d_inode)
636 d_drop(dentry);
637 return rc;
638}
639
640static int
641ecryptfs_rename(struct inode *old_dir, struct dentry *old_dentry,
642 struct inode *new_dir, struct dentry *new_dentry)
643{
644 int rc;
645 struct dentry *lower_old_dentry;
646 struct dentry *lower_new_dentry;
647 struct dentry *lower_old_dir_dentry;
648 struct dentry *lower_new_dir_dentry;
649
650 lower_old_dentry = ecryptfs_dentry_to_lower(old_dentry);
651 lower_new_dentry = ecryptfs_dentry_to_lower(new_dentry);
652 dget(lower_old_dentry);
653 dget(lower_new_dentry);
654 lower_old_dir_dentry = dget_parent(lower_old_dentry);
655 lower_new_dir_dentry = dget_parent(lower_new_dentry);
656 lock_rename(lower_old_dir_dentry, lower_new_dir_dentry);
657 rc = vfs_rename(lower_old_dir_dentry->d_inode, lower_old_dentry,
658 lower_new_dir_dentry->d_inode, lower_new_dentry);
659 if (rc)
660 goto out_lock;
661 ecryptfs_copy_attr_all(new_dir, lower_new_dir_dentry->d_inode);
662 if (new_dir != old_dir)
663 ecryptfs_copy_attr_all(old_dir, lower_old_dir_dentry->d_inode);
664out_lock:
665 unlock_rename(lower_old_dir_dentry, lower_new_dir_dentry);
666 dput(lower_new_dentry);
667 dput(lower_old_dentry);
668 return rc;
669}
670
671static int
672ecryptfs_readlink(struct dentry *dentry, char __user * buf, int bufsiz)
673{
674 int rc;
675 struct dentry *lower_dentry;
676 char *decoded_name;
677 char *lower_buf;
678 mm_segment_t old_fs;
679 struct ecryptfs_crypt_stat *crypt_stat;
680
681 lower_dentry = ecryptfs_dentry_to_lower(dentry);
682 if (!lower_dentry->d_inode->i_op ||
683 !lower_dentry->d_inode->i_op->readlink) {
684 rc = -EINVAL;
685 goto out;
686 }
687 /* Released in this function */
688 lower_buf = kmalloc(bufsiz, GFP_KERNEL);
689 if (lower_buf == NULL) {
690 ecryptfs_printk(KERN_ERR, "Out of memory\n");
691 rc = -ENOMEM;
692 goto out;
693 }
694 old_fs = get_fs();
695 set_fs(get_ds());
696 ecryptfs_printk(KERN_DEBUG, "Calling readlink w/ "
697 "lower_dentry->d_name.name = [%s]\n",
698 lower_dentry->d_name.name);
699 rc = lower_dentry->d_inode->i_op->readlink(lower_dentry,
700 (char __user *)lower_buf,
701 bufsiz);
702 set_fs(old_fs);
703 if (rc >= 0) {
704 crypt_stat = NULL;
705 rc = ecryptfs_decode_filename(crypt_stat, lower_buf, rc,
706 &decoded_name);
707 if (rc == -ENOMEM)
708 goto out_free_lower_buf;
709 if (rc > 0) {
710 ecryptfs_printk(KERN_DEBUG, "Copying [%d] bytes "
711 "to userspace: [%*s]\n", rc,
712 decoded_name);
713 if (copy_to_user(buf, decoded_name, rc))
714 rc = -EFAULT;
715 }
716 kfree(decoded_name);
717 ecryptfs_copy_attr_atime(dentry->d_inode,
718 lower_dentry->d_inode);
719 }
720out_free_lower_buf:
721 kfree(lower_buf);
722out:
723 return rc;
724}
725
726static void *ecryptfs_follow_link(struct dentry *dentry, struct nameidata *nd)
727{
728 char *buf;
729 int len = PAGE_SIZE, rc;
730 mm_segment_t old_fs;
731
732 /* Released in ecryptfs_put_link(); only release here on error */
733 buf = kmalloc(len, GFP_KERNEL);
734 if (!buf) {
735 rc = -ENOMEM;
736 goto out;
737 }
738 old_fs = get_fs();
739 set_fs(get_ds());
740 ecryptfs_printk(KERN_DEBUG, "Calling readlink w/ "
741 "dentry->d_name.name = [%s]\n", dentry->d_name.name);
742 rc = dentry->d_inode->i_op->readlink(dentry, (char __user *)buf, len);
743 buf[rc] = '\0';
744 set_fs(old_fs);
745 if (rc < 0)
746 goto out_free;
747 rc = 0;
748 nd_set_link(nd, buf);
749 goto out;
750out_free:
751 kfree(buf);
752out:
753 return ERR_PTR(rc);
754}
755
756static void
757ecryptfs_put_link(struct dentry *dentry, struct nameidata *nd, void *ptr)
758{
759 /* Free the char* */
760 kfree(nd_get_link(nd));
761}
762
763/**
764 * upper_size_to_lower_size
765 * @crypt_stat: Crypt_stat associated with file
766 * @upper_size: Size of the upper file
767 *
768 * Calculate the requried size of the lower file based on the
769 * specified size of the upper file. This calculation is based on the
770 * number of headers in the underlying file and the extent size.
771 *
772 * Returns Calculated size of the lower file.
773 */
774static loff_t
775upper_size_to_lower_size(struct ecryptfs_crypt_stat *crypt_stat,
776 loff_t upper_size)
777{
778 loff_t lower_size;
779
780 lower_size = ( crypt_stat->header_extent_size
781 * crypt_stat->num_header_extents_at_front );
782 if (upper_size != 0) {
783 loff_t num_extents;
784
785 num_extents = upper_size >> crypt_stat->extent_shift;
786 if (upper_size & ~crypt_stat->extent_mask)
787 num_extents++;
788 lower_size += (num_extents * crypt_stat->extent_size);
789 }
790 return lower_size;
791}
792
793/**
794 * ecryptfs_truncate
795 * @dentry: The ecryptfs layer dentry
796 * @new_length: The length to expand the file to
797 *
798 * Function to handle truncations modifying the size of the file. Note
799 * that the file sizes are interpolated. When expanding, we are simply
800 * writing strings of 0's out. When truncating, we need to modify the
801 * underlying file size according to the page index interpolations.
802 *
803 * Returns zero on success; non-zero otherwise
804 */
805int ecryptfs_truncate(struct dentry *dentry, loff_t new_length)
806{
807 int rc = 0;
808 struct inode *inode = dentry->d_inode;
809 struct dentry *lower_dentry;
810 struct vfsmount *lower_mnt;
811 struct file fake_ecryptfs_file, *lower_file = NULL;
812 struct ecryptfs_crypt_stat *crypt_stat;
813 loff_t i_size = i_size_read(inode);
814 loff_t lower_size_before_truncate;
815 loff_t lower_size_after_truncate;
816
817 if (unlikely((new_length == i_size)))
818 goto out;
819 crypt_stat = &ecryptfs_inode_to_private(dentry->d_inode)->crypt_stat;
820 /* Set up a fake ecryptfs file, this is used to interface with
821 * the file in the underlying filesystem so that the
822 * truncation has an effect there as well. */
823 memset(&fake_ecryptfs_file, 0, sizeof(fake_ecryptfs_file));
824 fake_ecryptfs_file.f_dentry = dentry;
825 /* Released at out_free: label */
826 ecryptfs_set_file_private(&fake_ecryptfs_file,
827 kmem_cache_alloc(ecryptfs_file_info_cache,
828 SLAB_KERNEL));
829 if (unlikely(!ecryptfs_file_to_private(&fake_ecryptfs_file))) {
830 rc = -ENOMEM;
831 goto out;
832 }
833 lower_dentry = ecryptfs_dentry_to_lower(dentry);
834 /* This dget & mntget is released through fput at out_fput: */
835 dget(lower_dentry);
836 lower_mnt = ecryptfs_dentry_to_lower_mnt(dentry);
837 mntget(lower_mnt);
838 lower_file = dentry_open(lower_dentry, lower_mnt, O_RDWR);
839 if (unlikely(IS_ERR(lower_file))) {
840 rc = PTR_ERR(lower_file);
841 goto out_free;
842 }
843 ecryptfs_set_file_lower(&fake_ecryptfs_file, lower_file);
844 /* Switch on growing or shrinking file */
845 if (new_length > i_size) {
846 rc = ecryptfs_fill_zeros(&fake_ecryptfs_file, new_length);
847 if (rc) {
848 ecryptfs_printk(KERN_ERR,
849 "Problem with fill_zeros\n");
850 goto out_fput;
851 }
852 i_size_write(inode, new_length);
853 rc = ecryptfs_write_inode_size_to_header(lower_file,
854 lower_dentry->d_inode,
855 inode);
856 if (rc) {
857 ecryptfs_printk(KERN_ERR,
858 "Problem with ecryptfs_write"
859 "_inode_size\n");
860 goto out_fput;
861 }
862 } else { /* new_length < i_size_read(inode) */
863 vmtruncate(inode, new_length);
864 ecryptfs_write_inode_size_to_header(lower_file,
865 lower_dentry->d_inode,
866 inode);
867 /* We are reducing the size of the ecryptfs file, and need to
868 * know if we need to reduce the size of the lower file. */
869 lower_size_before_truncate =
870 upper_size_to_lower_size(crypt_stat, i_size);
871 lower_size_after_truncate =
872 upper_size_to_lower_size(crypt_stat, new_length);
873 if (lower_size_after_truncate < lower_size_before_truncate)
874 vmtruncate(lower_dentry->d_inode,
875 lower_size_after_truncate);
876 }
877 /* Update the access times */
878 lower_dentry->d_inode->i_mtime = lower_dentry->d_inode->i_ctime
879 = CURRENT_TIME;
880 mark_inode_dirty_sync(inode);
881out_fput:
882 fput(lower_file);
883out_free:
884 if (ecryptfs_file_to_private(&fake_ecryptfs_file))
885 kmem_cache_free(ecryptfs_file_info_cache,
886 ecryptfs_file_to_private(&fake_ecryptfs_file));
887out:
888 return rc;
889}
890
891static int
892ecryptfs_permission(struct inode *inode, int mask, struct nameidata *nd)
893{
894 int rc;
895
896 if (nd) {
897 struct vfsmount *vfsmnt_save = nd->mnt;
898 struct dentry *dentry_save = nd->dentry;
899
900 nd->mnt = ecryptfs_dentry_to_lower_mnt(nd->dentry);
901 nd->dentry = ecryptfs_dentry_to_lower(nd->dentry);
902 rc = permission(ecryptfs_inode_to_lower(inode), mask, nd);
903 nd->mnt = vfsmnt_save;
904 nd->dentry = dentry_save;
905 } else
906 rc = permission(ecryptfs_inode_to_lower(inode), mask, NULL);
907 return rc;
908}
909
910/**
911 * ecryptfs_setattr
912 * @dentry: dentry handle to the inode to modify
913 * @ia: Structure with flags of what to change and values
914 *
915 * Updates the metadata of an inode. If the update is to the size
916 * i.e. truncation, then ecryptfs_truncate will handle the size modification
917 * of both the ecryptfs inode and the lower inode.
918 *
919 * All other metadata changes will be passed right to the lower filesystem,
920 * and we will just update our inode to look like the lower.
921 */
922static int ecryptfs_setattr(struct dentry *dentry, struct iattr *ia)
923{
924 int rc = 0;
925 struct dentry *lower_dentry;
926 struct inode *inode;
927 struct inode *lower_inode;
928 struct ecryptfs_crypt_stat *crypt_stat;
929
930 crypt_stat = &ecryptfs_inode_to_private(dentry->d_inode)->crypt_stat;
931 lower_dentry = ecryptfs_dentry_to_lower(dentry);
932 inode = dentry->d_inode;
933 lower_inode = ecryptfs_inode_to_lower(inode);
934 if (ia->ia_valid & ATTR_SIZE) {
935 ecryptfs_printk(KERN_DEBUG,
936 "ia->ia_valid = [0x%x] ATTR_SIZE" " = [0x%x]\n",
937 ia->ia_valid, ATTR_SIZE);
938 rc = ecryptfs_truncate(dentry, ia->ia_size);
939 /* ecryptfs_truncate handles resizing of the lower file */
940 ia->ia_valid &= ~ATTR_SIZE;
941 ecryptfs_printk(KERN_DEBUG, "ia->ia_valid = [%x]\n",
942 ia->ia_valid);
943 if (rc < 0)
944 goto out;
945 }
946 rc = notify_change(lower_dentry, ia);
947out:
948 ecryptfs_copy_attr_all(inode, lower_inode);
949 return rc;
950}
951
952static int
953ecryptfs_setxattr(struct dentry *dentry, const char *name, const void *value,
954 size_t size, int flags)
955{
956 int rc = 0;
957 struct dentry *lower_dentry;
958
959 lower_dentry = ecryptfs_dentry_to_lower(dentry);
960 if (!lower_dentry->d_inode->i_op->setxattr) {
961 rc = -ENOSYS;
962 goto out;
963 }
964 mutex_lock(&lower_dentry->d_inode->i_mutex);
965 rc = lower_dentry->d_inode->i_op->setxattr(lower_dentry, name, value,
966 size, flags);
967 mutex_unlock(&lower_dentry->d_inode->i_mutex);
968out:
969 return rc;
970}
971
972static ssize_t
973ecryptfs_getxattr(struct dentry *dentry, const char *name, void *value,
974 size_t size)
975{
976 int rc = 0;
977 struct dentry *lower_dentry;
978
979 lower_dentry = ecryptfs_dentry_to_lower(dentry);
980 if (!lower_dentry->d_inode->i_op->getxattr) {
981 rc = -ENOSYS;
982 goto out;
983 }
984 mutex_lock(&lower_dentry->d_inode->i_mutex);
985 rc = lower_dentry->d_inode->i_op->getxattr(lower_dentry, name, value,
986 size);
987 mutex_unlock(&lower_dentry->d_inode->i_mutex);
988out:
989 return rc;
990}
991
992static ssize_t
993ecryptfs_listxattr(struct dentry *dentry, char *list, size_t size)
994{
995 int rc = 0;
996 struct dentry *lower_dentry;
997
998 lower_dentry = ecryptfs_dentry_to_lower(dentry);
999 if (!lower_dentry->d_inode->i_op->listxattr) {
1000 rc = -ENOSYS;
1001 goto out;
1002 }
1003 mutex_lock(&lower_dentry->d_inode->i_mutex);
1004 rc = lower_dentry->d_inode->i_op->listxattr(lower_dentry, list, size);
1005 mutex_unlock(&lower_dentry->d_inode->i_mutex);
1006out:
1007 return rc;
1008}
1009
1010static int ecryptfs_removexattr(struct dentry *dentry, const char *name)
1011{
1012 int rc = 0;
1013 struct dentry *lower_dentry;
1014
1015 lower_dentry = ecryptfs_dentry_to_lower(dentry);
1016 if (!lower_dentry->d_inode->i_op->removexattr) {
1017 rc = -ENOSYS;
1018 goto out;
1019 }
1020 mutex_lock(&lower_dentry->d_inode->i_mutex);
1021 rc = lower_dentry->d_inode->i_op->removexattr(lower_dentry, name);
1022 mutex_unlock(&lower_dentry->d_inode->i_mutex);
1023out:
1024 return rc;
1025}
1026
1027int ecryptfs_inode_test(struct inode *inode, void *candidate_lower_inode)
1028{
1029 if ((ecryptfs_inode_to_lower(inode)
1030 == (struct inode *)candidate_lower_inode))
1031 return 1;
1032 else
1033 return 0;
1034}
1035
1036int ecryptfs_inode_set(struct inode *inode, void *lower_inode)
1037{
1038 ecryptfs_init_inode(inode, (struct inode *)lower_inode);
1039 return 0;
1040}
1041
1042struct inode_operations ecryptfs_symlink_iops = {
1043 .readlink = ecryptfs_readlink,
1044 .follow_link = ecryptfs_follow_link,
1045 .put_link = ecryptfs_put_link,
1046 .permission = ecryptfs_permission,
1047 .setattr = ecryptfs_setattr,
1048 .setxattr = ecryptfs_setxattr,
1049 .getxattr = ecryptfs_getxattr,
1050 .listxattr = ecryptfs_listxattr,
1051 .removexattr = ecryptfs_removexattr
1052};
1053
1054struct inode_operations ecryptfs_dir_iops = {
1055 .create = ecryptfs_create,
1056 .lookup = ecryptfs_lookup,
1057 .link = ecryptfs_link,
1058 .unlink = ecryptfs_unlink,
1059 .symlink = ecryptfs_symlink,
1060 .mkdir = ecryptfs_mkdir,
1061 .rmdir = ecryptfs_rmdir,
1062 .mknod = ecryptfs_mknod,
1063 .rename = ecryptfs_rename,
1064 .permission = ecryptfs_permission,
1065 .setattr = ecryptfs_setattr,
1066 .setxattr = ecryptfs_setxattr,
1067 .getxattr = ecryptfs_getxattr,
1068 .listxattr = ecryptfs_listxattr,
1069 .removexattr = ecryptfs_removexattr
1070};
1071
1072struct inode_operations ecryptfs_main_iops = {
1073 .permission = ecryptfs_permission,
1074 .setattr = ecryptfs_setattr,
1075 .setxattr = ecryptfs_setxattr,
1076 .getxattr = ecryptfs_getxattr,
1077 .listxattr = ecryptfs_listxattr,
1078 .removexattr = ecryptfs_removexattr
1079};
diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c
new file mode 100644
index 000000000000..ba454785a0c5
--- /dev/null
+++ b/fs/ecryptfs/keystore.c
@@ -0,0 +1,1061 @@
1/**
2 * eCryptfs: Linux filesystem encryption layer
3 * In-kernel key management code. Includes functions to parse and
4 * write authentication token-related packets with the underlying
5 * file.
6 *
7 * Copyright (C) 2004-2006 International Business Machines Corp.
8 * Author(s): Michael A. Halcrow <mhalcrow@us.ibm.com>
9 * Michael C. Thompson <mcthomps@us.ibm.com>
10 *
11 * This program is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU General Public License as
13 * published by the Free Software Foundation; either version 2 of the
14 * License, or (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful, but
17 * WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
24 * 02111-1307, USA.
25 */
26
27#include <linux/string.h>
28#include <linux/sched.h>
29#include <linux/syscalls.h>
30#include <linux/pagemap.h>
31#include <linux/key.h>
32#include <linux/random.h>
33#include <linux/crypto.h>
34#include <linux/scatterlist.h>
35#include "ecryptfs_kernel.h"
36
37/**
38 * request_key returned an error instead of a valid key address;
39 * determine the type of error, make appropriate log entries, and
40 * return an error code.
41 */
42int process_request_key_err(long err_code)
43{
44 int rc = 0;
45
46 switch (err_code) {
47 case ENOKEY:
48 ecryptfs_printk(KERN_WARNING, "No key\n");
49 rc = -ENOENT;
50 break;
51 case EKEYEXPIRED:
52 ecryptfs_printk(KERN_WARNING, "Key expired\n");
53 rc = -ETIME;
54 break;
55 case EKEYREVOKED:
56 ecryptfs_printk(KERN_WARNING, "Key revoked\n");
57 rc = -EINVAL;
58 break;
59 default:
60 ecryptfs_printk(KERN_WARNING, "Unknown error code: "
61 "[0x%.16x]\n", err_code);
62 rc = -EINVAL;
63 }
64 return rc;
65}
66
67static void wipe_auth_tok_list(struct list_head *auth_tok_list_head)
68{
69 struct list_head *walker;
70 struct ecryptfs_auth_tok_list_item *auth_tok_list_item;
71
72 walker = auth_tok_list_head->next;
73 while (walker != auth_tok_list_head) {
74 auth_tok_list_item =
75 list_entry(walker, struct ecryptfs_auth_tok_list_item,
76 list);
77 walker = auth_tok_list_item->list.next;
78 memset(auth_tok_list_item, 0,
79 sizeof(struct ecryptfs_auth_tok_list_item));
80 kmem_cache_free(ecryptfs_auth_tok_list_item_cache,
81 auth_tok_list_item);
82 }
83}
84
85struct kmem_cache *ecryptfs_auth_tok_list_item_cache;
86
87/**
88 * parse_packet_length
89 * @data: Pointer to memory containing length at offset
90 * @size: This function writes the decoded size to this memory
91 * address; zero on error
92 * @length_size: The number of bytes occupied by the encoded length
93 *
94 * Returns Zero on success
95 */
96static int parse_packet_length(unsigned char *data, size_t *size,
97 size_t *length_size)
98{
99 int rc = 0;
100
101 (*length_size) = 0;
102 (*size) = 0;
103 if (data[0] < 192) {
104 /* One-byte length */
105 (*size) = data[0];
106 (*length_size) = 1;
107 } else if (data[0] < 224) {
108 /* Two-byte length */
109 (*size) = ((data[0] - 192) * 256);
110 (*size) += (data[1] + 192);
111 (*length_size) = 2;
112 } else if (data[0] == 255) {
113 /* Five-byte length; we're not supposed to see this */
114 ecryptfs_printk(KERN_ERR, "Five-byte packet length not "
115 "supported\n");
116 rc = -EINVAL;
117 goto out;
118 } else {
119 ecryptfs_printk(KERN_ERR, "Error parsing packet length\n");
120 rc = -EINVAL;
121 goto out;
122 }
123out:
124 return rc;
125}
126
127/**
128 * write_packet_length
129 * @dest: The byte array target into which to write the
130 * length. Must have at least 5 bytes allocated.
131 * @size: The length to write.
132 * @packet_size_length: The number of bytes used to encode the
133 * packet length is written to this address.
134 *
135 * Returns zero on success; non-zero on error.
136 */
137static int write_packet_length(char *dest, size_t size,
138 size_t *packet_size_length)
139{
140 int rc = 0;
141
142 if (size < 192) {
143 dest[0] = size;
144 (*packet_size_length) = 1;
145 } else if (size < 65536) {
146 dest[0] = (((size - 192) / 256) + 192);
147 dest[1] = ((size - 192) % 256);
148 (*packet_size_length) = 2;
149 } else {
150 rc = -EINVAL;
151 ecryptfs_printk(KERN_WARNING,
152 "Unsupported packet size: [%d]\n", size);
153 }
154 return rc;
155}
156
157/**
158 * parse_tag_3_packet
159 * @crypt_stat: The cryptographic context to modify based on packet
160 * contents.
161 * @data: The raw bytes of the packet.
162 * @auth_tok_list: eCryptfs parses packets into authentication tokens;
163 * a new authentication token will be placed at the end
164 * of this list for this packet.
165 * @new_auth_tok: Pointer to a pointer to memory that this function
166 * allocates; sets the memory address of the pointer to
167 * NULL on error. This object is added to the
168 * auth_tok_list.
169 * @packet_size: This function writes the size of the parsed packet
170 * into this memory location; zero on error.
171 * @max_packet_size: maximum number of bytes to parse
172 *
173 * Returns zero on success; non-zero on error.
174 */
175static int
176parse_tag_3_packet(struct ecryptfs_crypt_stat *crypt_stat,
177 unsigned char *data, struct list_head *auth_tok_list,
178 struct ecryptfs_auth_tok **new_auth_tok,
179 size_t *packet_size, size_t max_packet_size)
180{
181 int rc = 0;
182 size_t body_size;
183 struct ecryptfs_auth_tok_list_item *auth_tok_list_item;
184 size_t length_size;
185
186 (*packet_size) = 0;
187 (*new_auth_tok) = NULL;
188
189 /* we check that:
190 * one byte for the Tag 3 ID flag
191 * two bytes for the body size
192 * do not exceed the maximum_packet_size
193 */
194 if (unlikely((*packet_size) + 3 > max_packet_size)) {
195 ecryptfs_printk(KERN_ERR, "Packet size exceeds max\n");
196 rc = -EINVAL;
197 goto out;
198 }
199
200 /* check for Tag 3 identifyer - one byte */
201 if (data[(*packet_size)++] != ECRYPTFS_TAG_3_PACKET_TYPE) {
202 ecryptfs_printk(KERN_ERR, "Enter w/ first byte != 0x%.2x\n",
203 ECRYPTFS_TAG_3_PACKET_TYPE);
204 rc = -EINVAL;
205 goto out;
206 }
207 /* Released: wipe_auth_tok_list called in ecryptfs_parse_packet_set or
208 * at end of function upon failure */
209 auth_tok_list_item =
210 kmem_cache_alloc(ecryptfs_auth_tok_list_item_cache, SLAB_KERNEL);
211 if (!auth_tok_list_item) {
212 ecryptfs_printk(KERN_ERR, "Unable to allocate memory\n");
213 rc = -ENOMEM;
214 goto out;
215 }
216 memset(auth_tok_list_item, 0,
217 sizeof(struct ecryptfs_auth_tok_list_item));
218 (*new_auth_tok) = &auth_tok_list_item->auth_tok;
219
220 /* check for body size - one to two bytes */
221 rc = parse_packet_length(&data[(*packet_size)], &body_size,
222 &length_size);
223 if (rc) {
224 ecryptfs_printk(KERN_WARNING, "Error parsing packet length; "
225 "rc = [%d]\n", rc);
226 goto out_free;
227 }
228 if (unlikely(body_size < (0x05 + ECRYPTFS_SALT_SIZE))) {
229 ecryptfs_printk(KERN_WARNING, "Invalid body size ([%d])\n",
230 body_size);
231 rc = -EINVAL;
232 goto out_free;
233 }
234 (*packet_size) += length_size;
235
236 /* now we know the length of the remainting Tag 3 packet size:
237 * 5 fix bytes for: version string, cipher, S2K ID, hash algo,
238 * number of hash iterations
239 * ECRYPTFS_SALT_SIZE bytes for salt
240 * body_size bytes minus the stuff above is the encrypted key size
241 */
242 if (unlikely((*packet_size) + body_size > max_packet_size)) {
243 ecryptfs_printk(KERN_ERR, "Packet size exceeds max\n");
244 rc = -EINVAL;
245 goto out_free;
246 }
247
248 /* There are 5 characters of additional information in the
249 * packet */
250 (*new_auth_tok)->session_key.encrypted_key_size =
251 body_size - (0x05 + ECRYPTFS_SALT_SIZE);
252 ecryptfs_printk(KERN_DEBUG, "Encrypted key size = [%d]\n",
253 (*new_auth_tok)->session_key.encrypted_key_size);
254
255 /* Version 4 (from RFC2440) - one byte */
256 if (unlikely(data[(*packet_size)++] != 0x04)) {
257 ecryptfs_printk(KERN_DEBUG, "Unknown version number "
258 "[%d]\n", data[(*packet_size) - 1]);
259 rc = -EINVAL;
260 goto out_free;
261 }
262
263 /* cipher - one byte */
264 ecryptfs_cipher_code_to_string(crypt_stat->cipher,
265 (u16)data[(*packet_size)]);
266 /* A little extra work to differentiate among the AES key
267 * sizes; see RFC2440 */
268 switch(data[(*packet_size)++]) {
269 case RFC2440_CIPHER_AES_192:
270 crypt_stat->key_size = 24;
271 break;
272 default:
273 crypt_stat->key_size =
274 (*new_auth_tok)->session_key.encrypted_key_size;
275 }
276 ecryptfs_init_crypt_ctx(crypt_stat);
277 /* S2K identifier 3 (from RFC2440) */
278 if (unlikely(data[(*packet_size)++] != 0x03)) {
279 ecryptfs_printk(KERN_ERR, "Only S2K ID 3 is currently "
280 "supported\n");
281 rc = -ENOSYS;
282 goto out_free;
283 }
284
285 /* TODO: finish the hash mapping */
286 /* hash algorithm - one byte */
287 switch (data[(*packet_size)++]) {
288 case 0x01: /* See RFC2440 for these numbers and their mappings */
289 /* Choose MD5 */
290 /* salt - ECRYPTFS_SALT_SIZE bytes */
291 memcpy((*new_auth_tok)->token.password.salt,
292 &data[(*packet_size)], ECRYPTFS_SALT_SIZE);
293 (*packet_size) += ECRYPTFS_SALT_SIZE;
294
295 /* This conversion was taken straight from RFC2440 */
296 /* number of hash iterations - one byte */
297 (*new_auth_tok)->token.password.hash_iterations =
298 ((u32) 16 + (data[(*packet_size)] & 15))
299 << ((data[(*packet_size)] >> 4) + 6);
300 (*packet_size)++;
301
302 /* encrypted session key -
303 * (body_size-5-ECRYPTFS_SALT_SIZE) bytes */
304 memcpy((*new_auth_tok)->session_key.encrypted_key,
305 &data[(*packet_size)],
306 (*new_auth_tok)->session_key.encrypted_key_size);
307 (*packet_size) +=
308 (*new_auth_tok)->session_key.encrypted_key_size;
309 (*new_auth_tok)->session_key.flags &=
310 ~ECRYPTFS_CONTAINS_DECRYPTED_KEY;
311 (*new_auth_tok)->session_key.flags |=
312 ECRYPTFS_CONTAINS_ENCRYPTED_KEY;
313 (*new_auth_tok)->token.password.hash_algo = 0x01;
314 break;
315 default:
316 ecryptfs_printk(KERN_ERR, "Unsupported hash algorithm: "
317 "[%d]\n", data[(*packet_size) - 1]);
318 rc = -ENOSYS;
319 goto out_free;
320 }
321 (*new_auth_tok)->token_type = ECRYPTFS_PASSWORD;
322 /* TODO: Parametarize; we might actually want userspace to
323 * decrypt the session key. */
324 ECRYPTFS_CLEAR_FLAG((*new_auth_tok)->session_key.flags,
325 ECRYPTFS_USERSPACE_SHOULD_TRY_TO_DECRYPT);
326 ECRYPTFS_CLEAR_FLAG((*new_auth_tok)->session_key.flags,
327 ECRYPTFS_USERSPACE_SHOULD_TRY_TO_ENCRYPT);
328 list_add(&auth_tok_list_item->list, auth_tok_list);
329 goto out;
330out_free:
331 (*new_auth_tok) = NULL;
332 memset(auth_tok_list_item, 0,
333 sizeof(struct ecryptfs_auth_tok_list_item));
334 kmem_cache_free(ecryptfs_auth_tok_list_item_cache,
335 auth_tok_list_item);
336out:
337 if (rc)
338 (*packet_size) = 0;
339 return rc;
340}
341
342/**
343 * parse_tag_11_packet
344 * @data: The raw bytes of the packet
345 * @contents: This function writes the data contents of the literal
346 * packet into this memory location
347 * @max_contents_bytes: The maximum number of bytes that this function
348 * is allowed to write into contents
349 * @tag_11_contents_size: This function writes the size of the parsed
350 * contents into this memory location; zero on
351 * error
352 * @packet_size: This function writes the size of the parsed packet
353 * into this memory location; zero on error
354 * @max_packet_size: maximum number of bytes to parse
355 *
356 * Returns zero on success; non-zero on error.
357 */
358static int
359parse_tag_11_packet(unsigned char *data, unsigned char *contents,
360 size_t max_contents_bytes, size_t *tag_11_contents_size,
361 size_t *packet_size, size_t max_packet_size)
362{
363 int rc = 0;
364 size_t body_size;
365 size_t length_size;
366
367 (*packet_size) = 0;
368 (*tag_11_contents_size) = 0;
369
370 /* check that:
371 * one byte for the Tag 11 ID flag
372 * two bytes for the Tag 11 length
373 * do not exceed the maximum_packet_size
374 */
375 if (unlikely((*packet_size) + 3 > max_packet_size)) {
376 ecryptfs_printk(KERN_ERR, "Packet size exceeds max\n");
377 rc = -EINVAL;
378 goto out;
379 }
380
381 /* check for Tag 11 identifyer - one byte */
382 if (data[(*packet_size)++] != ECRYPTFS_TAG_11_PACKET_TYPE) {
383 ecryptfs_printk(KERN_WARNING,
384 "Invalid tag 11 packet format\n");
385 rc = -EINVAL;
386 goto out;
387 }
388
389 /* get Tag 11 content length - one or two bytes */
390 rc = parse_packet_length(&data[(*packet_size)], &body_size,
391 &length_size);
392 if (rc) {
393 ecryptfs_printk(KERN_WARNING,
394 "Invalid tag 11 packet format\n");
395 goto out;
396 }
397 (*packet_size) += length_size;
398
399 if (body_size < 13) {
400 ecryptfs_printk(KERN_WARNING, "Invalid body size ([%d])\n",
401 body_size);
402 rc = -EINVAL;
403 goto out;
404 }
405 /* We have 13 bytes of surrounding packet values */
406 (*tag_11_contents_size) = (body_size - 13);
407
408 /* now we know the length of the remainting Tag 11 packet size:
409 * 14 fix bytes for: special flag one, special flag two,
410 * 12 skipped bytes
411 * body_size bytes minus the stuff above is the Tag 11 content
412 */
413 /* FIXME why is the body size one byte smaller than the actual
414 * size of the body?
415 * this seems to be an error here as well as in
416 * write_tag_11_packet() */
417 if (unlikely((*packet_size) + body_size + 1 > max_packet_size)) {
418 ecryptfs_printk(KERN_ERR, "Packet size exceeds max\n");
419 rc = -EINVAL;
420 goto out;
421 }
422
423 /* special flag one - one byte */
424 if (data[(*packet_size)++] != 0x62) {
425 ecryptfs_printk(KERN_WARNING, "Unrecognizable packet\n");
426 rc = -EINVAL;
427 goto out;
428 }
429
430 /* special flag two - one byte */
431 if (data[(*packet_size)++] != 0x08) {
432 ecryptfs_printk(KERN_WARNING, "Unrecognizable packet\n");
433 rc = -EINVAL;
434 goto out;
435 }
436
437 /* skip the next 12 bytes */
438 (*packet_size) += 12; /* We don't care about the filename or
439 * the timestamp */
440
441 /* get the Tag 11 contents - tag_11_contents_size bytes */
442 memcpy(contents, &data[(*packet_size)], (*tag_11_contents_size));
443 (*packet_size) += (*tag_11_contents_size);
444
445out:
446 if (rc) {
447 (*packet_size) = 0;
448 (*tag_11_contents_size) = 0;
449 }
450 return rc;
451}
452
453/**
454 * decrypt_session_key - Decrypt the session key with the given auth_tok.
455 *
456 * Returns Zero on success; non-zero error otherwise.
457 */
458static int decrypt_session_key(struct ecryptfs_auth_tok *auth_tok,
459 struct ecryptfs_crypt_stat *crypt_stat)
460{
461 int rc = 0;
462 struct ecryptfs_password *password_s_ptr;
463 struct crypto_tfm *tfm = NULL;
464 struct scatterlist src_sg[2], dst_sg[2];
465 struct mutex *tfm_mutex = NULL;
466 /* TODO: Use virt_to_scatterlist for these */
467 char *encrypted_session_key;
468 char *session_key;
469
470 password_s_ptr = &auth_tok->token.password;
471 if (ECRYPTFS_CHECK_FLAG(password_s_ptr->flags,
472 ECRYPTFS_SESSION_KEY_ENCRYPTION_KEY_SET))
473 ecryptfs_printk(KERN_DEBUG, "Session key encryption key "
474 "set; skipping key generation\n");
475 ecryptfs_printk(KERN_DEBUG, "Session key encryption key (size [%d])"
476 ":\n",
477 password_s_ptr->session_key_encryption_key_bytes);
478 if (ecryptfs_verbosity > 0)
479 ecryptfs_dump_hex(password_s_ptr->session_key_encryption_key,
480 password_s_ptr->
481 session_key_encryption_key_bytes);
482 if (!strcmp(crypt_stat->cipher,
483 crypt_stat->mount_crypt_stat->global_default_cipher_name)
484 && crypt_stat->mount_crypt_stat->global_key_tfm) {
485 tfm = crypt_stat->mount_crypt_stat->global_key_tfm;
486 tfm_mutex = &crypt_stat->mount_crypt_stat->global_key_tfm_mutex;
487 } else {
488 tfm = crypto_alloc_tfm(crypt_stat->cipher,
489 CRYPTO_TFM_REQ_WEAK_KEY);
490 if (!tfm) {
491 printk(KERN_ERR "Error allocating crypto context\n");
492 rc = -ENOMEM;
493 goto out;
494 }
495 }
496 if (password_s_ptr->session_key_encryption_key_bytes
497 < crypto_tfm_alg_min_keysize(tfm)) {
498 printk(KERN_WARNING "Session key encryption key is [%d] bytes; "
499 "minimum keysize for selected cipher is [%d] bytes.\n",
500 password_s_ptr->session_key_encryption_key_bytes,
501 crypto_tfm_alg_min_keysize(tfm));
502 rc = -EINVAL;
503 goto out;
504 }
505 if (tfm_mutex)
506 mutex_lock(tfm_mutex);
507 crypto_cipher_setkey(tfm, password_s_ptr->session_key_encryption_key,
508 crypt_stat->key_size);
509 /* TODO: virt_to_scatterlist */
510 encrypted_session_key = (char *)__get_free_page(GFP_KERNEL);
511 if (!encrypted_session_key) {
512 ecryptfs_printk(KERN_ERR, "Out of memory\n");
513 rc = -ENOMEM;
514 goto out_free_tfm;
515 }
516 session_key = (char *)__get_free_page(GFP_KERNEL);
517 if (!session_key) {
518 kfree(encrypted_session_key);
519 ecryptfs_printk(KERN_ERR, "Out of memory\n");
520 rc = -ENOMEM;
521 goto out_free_tfm;
522 }
523 memcpy(encrypted_session_key, auth_tok->session_key.encrypted_key,
524 auth_tok->session_key.encrypted_key_size);
525 src_sg[0].page = virt_to_page(encrypted_session_key);
526 src_sg[0].offset = 0;
527 BUG_ON(auth_tok->session_key.encrypted_key_size > PAGE_CACHE_SIZE);
528 src_sg[0].length = auth_tok->session_key.encrypted_key_size;
529 dst_sg[0].page = virt_to_page(session_key);
530 dst_sg[0].offset = 0;
531 auth_tok->session_key.decrypted_key_size =
532 auth_tok->session_key.encrypted_key_size;
533 dst_sg[0].length = auth_tok->session_key.encrypted_key_size;
534 /* TODO: Handle error condition */
535 crypto_cipher_decrypt(tfm, dst_sg, src_sg,
536 auth_tok->session_key.encrypted_key_size);
537 auth_tok->session_key.decrypted_key_size =
538 auth_tok->session_key.encrypted_key_size;
539 memcpy(auth_tok->session_key.decrypted_key, session_key,
540 auth_tok->session_key.decrypted_key_size);
541 auth_tok->session_key.flags |= ECRYPTFS_CONTAINS_DECRYPTED_KEY;
542 memcpy(crypt_stat->key, auth_tok->session_key.decrypted_key,
543 auth_tok->session_key.decrypted_key_size);
544 ECRYPTFS_SET_FLAG(crypt_stat->flags, ECRYPTFS_KEY_VALID);
545 ecryptfs_printk(KERN_DEBUG, "Decrypted session key:\n");
546 if (ecryptfs_verbosity > 0)
547 ecryptfs_dump_hex(crypt_stat->key,
548 crypt_stat->key_size);
549 memset(encrypted_session_key, 0, PAGE_CACHE_SIZE);
550 free_page((unsigned long)encrypted_session_key);
551 memset(session_key, 0, PAGE_CACHE_SIZE);
552 free_page((unsigned long)session_key);
553out_free_tfm:
554 if (tfm_mutex)
555 mutex_unlock(tfm_mutex);
556 else
557 crypto_free_tfm(tfm);
558out:
559 return rc;
560}
561
562/**
563 * ecryptfs_parse_packet_set
564 * @dest: The header page in memory
565 * @version: Version of file format, to guide parsing behavior
566 *
567 * Get crypt_stat to have the file's session key if the requisite key
568 * is available to decrypt the session key.
569 *
570 * Returns Zero if a valid authentication token was retrieved and
571 * processed; negative value for file not encrypted or for error
572 * conditions.
573 */
574int ecryptfs_parse_packet_set(struct ecryptfs_crypt_stat *crypt_stat,
575 unsigned char *src,
576 struct dentry *ecryptfs_dentry)
577{
578 size_t i = 0;
579 int rc = 0;
580 size_t found_auth_tok = 0;
581 size_t next_packet_is_auth_tok_packet;
582 char sig[ECRYPTFS_SIG_SIZE_HEX];
583 struct list_head auth_tok_list;
584 struct list_head *walker;
585 struct ecryptfs_auth_tok *chosen_auth_tok = NULL;
586 struct ecryptfs_mount_crypt_stat *mount_crypt_stat =
587 &ecryptfs_superblock_to_private(
588 ecryptfs_dentry->d_sb)->mount_crypt_stat;
589 struct ecryptfs_auth_tok *candidate_auth_tok = NULL;
590 size_t packet_size;
591 struct ecryptfs_auth_tok *new_auth_tok;
592 unsigned char sig_tmp_space[ECRYPTFS_SIG_SIZE];
593 size_t tag_11_contents_size;
594 size_t tag_11_packet_size;
595
596 INIT_LIST_HEAD(&auth_tok_list);
597 /* Parse the header to find as many packets as we can, these will be
598 * added the our &auth_tok_list */
599 next_packet_is_auth_tok_packet = 1;
600 while (next_packet_is_auth_tok_packet) {
601 size_t max_packet_size = ((PAGE_CACHE_SIZE - 8) - i);
602
603 switch (src[i]) {
604 case ECRYPTFS_TAG_3_PACKET_TYPE:
605 rc = parse_tag_3_packet(crypt_stat,
606 (unsigned char *)&src[i],
607 &auth_tok_list, &new_auth_tok,
608 &packet_size, max_packet_size);
609 if (rc) {
610 ecryptfs_printk(KERN_ERR, "Error parsing "
611 "tag 3 packet\n");
612 rc = -EIO;
613 goto out_wipe_list;
614 }
615 i += packet_size;
616 rc = parse_tag_11_packet((unsigned char *)&src[i],
617 sig_tmp_space,
618 ECRYPTFS_SIG_SIZE,
619 &tag_11_contents_size,
620 &tag_11_packet_size,
621 max_packet_size);
622 if (rc) {
623 ecryptfs_printk(KERN_ERR, "No valid "
624 "(ecryptfs-specific) literal "
625 "packet containing "
626 "authentication token "
627 "signature found after "
628 "tag 3 packet\n");
629 rc = -EIO;
630 goto out_wipe_list;
631 }
632 i += tag_11_packet_size;
633 if (ECRYPTFS_SIG_SIZE != tag_11_contents_size) {
634 ecryptfs_printk(KERN_ERR, "Expected "
635 "signature of size [%d]; "
636 "read size [%d]\n",
637 ECRYPTFS_SIG_SIZE,
638 tag_11_contents_size);
639 rc = -EIO;
640 goto out_wipe_list;
641 }
642 ecryptfs_to_hex(new_auth_tok->token.password.signature,
643 sig_tmp_space, tag_11_contents_size);
644 new_auth_tok->token.password.signature[
645 ECRYPTFS_PASSWORD_SIG_SIZE] = '\0';
646 ECRYPTFS_SET_FLAG(crypt_stat->flags,
647 ECRYPTFS_ENCRYPTED);
648 break;
649 case ECRYPTFS_TAG_11_PACKET_TYPE:
650 ecryptfs_printk(KERN_WARNING, "Invalid packet set "
651 "(Tag 11 not allowed by itself)\n");
652 rc = -EIO;
653 goto out_wipe_list;
654 break;
655 default:
656 ecryptfs_printk(KERN_DEBUG, "No packet at offset "
657 "[%d] of the file header; hex value of "
658 "character is [0x%.2x]\n", i, src[i]);
659 next_packet_is_auth_tok_packet = 0;
660 }
661 }
662 if (list_empty(&auth_tok_list)) {
663 rc = -EINVAL; /* Do not support non-encrypted files in
664 * the 0.1 release */
665 goto out;
666 }
667 /* If we have a global auth tok, then we should try to use
668 * it */
669 if (mount_crypt_stat->global_auth_tok) {
670 memcpy(sig, mount_crypt_stat->global_auth_tok_sig,
671 ECRYPTFS_SIG_SIZE_HEX);
672 chosen_auth_tok = mount_crypt_stat->global_auth_tok;
673 } else
674 BUG(); /* We should always have a global auth tok in
675 * the 0.1 release */
676 /* Scan list to see if our chosen_auth_tok works */
677 list_for_each(walker, &auth_tok_list) {
678 struct ecryptfs_auth_tok_list_item *auth_tok_list_item;
679 auth_tok_list_item =
680 list_entry(walker, struct ecryptfs_auth_tok_list_item,
681 list);
682 candidate_auth_tok = &auth_tok_list_item->auth_tok;
683 if (unlikely(ecryptfs_verbosity > 0)) {
684 ecryptfs_printk(KERN_DEBUG,
685 "Considering cadidate auth tok:\n");
686 ecryptfs_dump_auth_tok(candidate_auth_tok);
687 }
688 /* TODO: Replace ECRYPTFS_SIG_SIZE_HEX w/ dynamic value */
689 if (candidate_auth_tok->token_type == ECRYPTFS_PASSWORD
690 && !strncmp(candidate_auth_tok->token.password.signature,
691 sig, ECRYPTFS_SIG_SIZE_HEX)) {
692 found_auth_tok = 1;
693 goto leave_list;
694 /* TODO: Transfer the common salt into the
695 * crypt_stat salt */
696 }
697 }
698leave_list:
699 if (!found_auth_tok) {
700 ecryptfs_printk(KERN_ERR, "Could not find authentication "
701 "token on temporary list for sig [%.*s]\n",
702 ECRYPTFS_SIG_SIZE_HEX, sig);
703 rc = -EIO;
704 goto out_wipe_list;
705 } else {
706 memcpy(&(candidate_auth_tok->token.password),
707 &(chosen_auth_tok->token.password),
708 sizeof(struct ecryptfs_password));
709 rc = decrypt_session_key(candidate_auth_tok, crypt_stat);
710 if (rc) {
711 ecryptfs_printk(KERN_ERR, "Error decrypting the "
712 "session key\n");
713 goto out_wipe_list;
714 }
715 rc = ecryptfs_compute_root_iv(crypt_stat);
716 if (rc) {
717 ecryptfs_printk(KERN_ERR, "Error computing "
718 "the root IV\n");
719 goto out_wipe_list;
720 }
721 }
722 rc = ecryptfs_init_crypt_ctx(crypt_stat);
723 if (rc) {
724 ecryptfs_printk(KERN_ERR, "Error initializing crypto "
725 "context for cipher [%s]; rc = [%d]\n",
726 crypt_stat->cipher, rc);
727 }
728out_wipe_list:
729 wipe_auth_tok_list(&auth_tok_list);
730out:
731 return rc;
732}
733
734/**
735 * write_tag_11_packet
736 * @dest: Target into which Tag 11 packet is to be written
737 * @max: Maximum packet length
738 * @contents: Byte array of contents to copy in
739 * @contents_length: Number of bytes in contents
740 * @packet_length: Length of the Tag 11 packet written; zero on error
741 *
742 * Returns zero on success; non-zero on error.
743 */
744static int
745write_tag_11_packet(char *dest, int max, char *contents, size_t contents_length,
746 size_t *packet_length)
747{
748 int rc = 0;
749 size_t packet_size_length;
750
751 (*packet_length) = 0;
752 if ((13 + contents_length) > max) {
753 rc = -EINVAL;
754 ecryptfs_printk(KERN_ERR, "Packet length larger than "
755 "maximum allowable\n");
756 goto out;
757 }
758 /* General packet header */
759 /* Packet tag */
760 dest[(*packet_length)++] = ECRYPTFS_TAG_11_PACKET_TYPE;
761 /* Packet length */
762 rc = write_packet_length(&dest[(*packet_length)],
763 (13 + contents_length), &packet_size_length);
764 if (rc) {
765 ecryptfs_printk(KERN_ERR, "Error generating tag 11 packet "
766 "header; cannot generate packet length\n");
767 goto out;
768 }
769 (*packet_length) += packet_size_length;
770 /* Tag 11 specific */
771 /* One-octet field that describes how the data is formatted */
772 dest[(*packet_length)++] = 0x62; /* binary data */
773 /* One-octet filename length followed by filename */
774 dest[(*packet_length)++] = 8;
775 memcpy(&dest[(*packet_length)], "_CONSOLE", 8);
776 (*packet_length) += 8;
777 /* Four-octet number indicating modification date */
778 memset(&dest[(*packet_length)], 0x00, 4);
779 (*packet_length) += 4;
780 /* Remainder is literal data */
781 memcpy(&dest[(*packet_length)], contents, contents_length);
782 (*packet_length) += contents_length;
783 out:
784 if (rc)
785 (*packet_length) = 0;
786 return rc;
787}
788
789/**
790 * write_tag_3_packet
791 * @dest: Buffer into which to write the packet
792 * @max: Maximum number of bytes that can be written
793 * @auth_tok: Authentication token
794 * @crypt_stat: The cryptographic context
795 * @key_rec: encrypted key
796 * @packet_size: This function will write the number of bytes that end
797 * up constituting the packet; set to zero on error
798 *
799 * Returns zero on success; non-zero on error.
800 */
801static int
802write_tag_3_packet(char *dest, size_t max, struct ecryptfs_auth_tok *auth_tok,
803 struct ecryptfs_crypt_stat *crypt_stat,
804 struct ecryptfs_key_record *key_rec, size_t *packet_size)
805{
806 int rc = 0;
807
808 size_t i;
809 size_t signature_is_valid = 0;
810 size_t encrypted_session_key_valid = 0;
811 char session_key_encryption_key[ECRYPTFS_MAX_KEY_BYTES];
812 struct scatterlist dest_sg[2];
813 struct scatterlist src_sg[2];
814 struct crypto_tfm *tfm = NULL;
815 struct mutex *tfm_mutex = NULL;
816 size_t key_rec_size;
817 size_t packet_size_length;
818 size_t cipher_code;
819
820 (*packet_size) = 0;
821 /* Check for a valid signature on the auth_tok */
822 for (i = 0; i < ECRYPTFS_SIG_SIZE_HEX; i++)
823 signature_is_valid |= auth_tok->token.password.signature[i];
824 if (!signature_is_valid)
825 BUG();
826 ecryptfs_from_hex((*key_rec).sig, auth_tok->token.password.signature,
827 ECRYPTFS_SIG_SIZE);
828 encrypted_session_key_valid = 0;
829 for (i = 0; i < crypt_stat->key_size; i++)
830 encrypted_session_key_valid |=
831 auth_tok->session_key.encrypted_key[i];
832 if (encrypted_session_key_valid) {
833 memcpy((*key_rec).enc_key,
834 auth_tok->session_key.encrypted_key,
835 auth_tok->session_key.encrypted_key_size);
836 goto encrypted_session_key_set;
837 }
838 if (auth_tok->session_key.encrypted_key_size == 0)
839 auth_tok->session_key.encrypted_key_size =
840 crypt_stat->key_size;
841 if (crypt_stat->key_size == 24
842 && strcmp("aes", crypt_stat->cipher) == 0) {
843 memset((crypt_stat->key + 24), 0, 8);
844 auth_tok->session_key.encrypted_key_size = 32;
845 }
846 (*key_rec).enc_key_size =
847 auth_tok->session_key.encrypted_key_size;
848 if (ECRYPTFS_CHECK_FLAG(auth_tok->token.password.flags,
849 ECRYPTFS_SESSION_KEY_ENCRYPTION_KEY_SET)) {
850 ecryptfs_printk(KERN_DEBUG, "Using previously generated "
851 "session key encryption key of size [%d]\n",
852 auth_tok->token.password.
853 session_key_encryption_key_bytes);
854 memcpy(session_key_encryption_key,
855 auth_tok->token.password.session_key_encryption_key,
856 crypt_stat->key_size);
857 ecryptfs_printk(KERN_DEBUG,
858 "Cached session key " "encryption key: \n");
859 if (ecryptfs_verbosity > 0)
860 ecryptfs_dump_hex(session_key_encryption_key, 16);
861 }
862 if (unlikely(ecryptfs_verbosity > 0)) {
863 ecryptfs_printk(KERN_DEBUG, "Session key encryption key:\n");
864 ecryptfs_dump_hex(session_key_encryption_key, 16);
865 }
866 rc = virt_to_scatterlist(crypt_stat->key,
867 (*key_rec).enc_key_size, src_sg, 2);
868 if (!rc) {
869 ecryptfs_printk(KERN_ERR, "Error generating scatterlist "
870 "for crypt_stat session key\n");
871 rc = -ENOMEM;
872 goto out;
873 }
874 rc = virt_to_scatterlist((*key_rec).enc_key,
875 (*key_rec).enc_key_size, dest_sg, 2);
876 if (!rc) {
877 ecryptfs_printk(KERN_ERR, "Error generating scatterlist "
878 "for crypt_stat encrypted session key\n");
879 rc = -ENOMEM;
880 goto out;
881 }
882 if (!strcmp(crypt_stat->cipher,
883 crypt_stat->mount_crypt_stat->global_default_cipher_name)
884 && crypt_stat->mount_crypt_stat->global_key_tfm) {
885 tfm = crypt_stat->mount_crypt_stat->global_key_tfm;
886 tfm_mutex = &crypt_stat->mount_crypt_stat->global_key_tfm_mutex;
887 } else
888 tfm = crypto_alloc_tfm(crypt_stat->cipher, 0);
889 if (!tfm) {
890 ecryptfs_printk(KERN_ERR, "Could not initialize crypto "
891 "context for cipher [%s]\n",
892 crypt_stat->cipher);
893 rc = -EINVAL;
894 goto out;
895 }
896 if (tfm_mutex)
897 mutex_lock(tfm_mutex);
898 rc = crypto_cipher_setkey(tfm, session_key_encryption_key,
899 crypt_stat->key_size);
900 if (rc < 0) {
901 if (tfm_mutex)
902 mutex_unlock(tfm_mutex);
903 ecryptfs_printk(KERN_ERR, "Error setting key for crypto "
904 "context\n");
905 goto out;
906 }
907 rc = 0;
908 ecryptfs_printk(KERN_DEBUG, "Encrypting [%d] bytes of the key\n",
909 crypt_stat->key_size);
910 crypto_cipher_encrypt(tfm, dest_sg, src_sg,
911 (*key_rec).enc_key_size);
912 if (tfm_mutex)
913 mutex_unlock(tfm_mutex);
914 ecryptfs_printk(KERN_DEBUG, "This should be the encrypted key:\n");
915 if (ecryptfs_verbosity > 0)
916 ecryptfs_dump_hex((*key_rec).enc_key,
917 (*key_rec).enc_key_size);
918encrypted_session_key_set:
919 /* Now we have a valid key_rec. Append it to the
920 * key_rec set. */
921 key_rec_size = (sizeof(struct ecryptfs_key_record)
922 - ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES
923 + ((*key_rec).enc_key_size));
924 /* TODO: Include a packet size limit as a parameter to this
925 * function once we have multi-packet headers (for versions
926 * later than 0.1 */
927 if (key_rec_size >= ECRYPTFS_MAX_KEYSET_SIZE) {
928 ecryptfs_printk(KERN_ERR, "Keyset too large\n");
929 rc = -EINVAL;
930 goto out;
931 }
932 /* TODO: Packet size limit */
933 /* We have 5 bytes of surrounding packet data */
934 if ((0x05 + ECRYPTFS_SALT_SIZE
935 + (*key_rec).enc_key_size) >= max) {
936 ecryptfs_printk(KERN_ERR, "Authentication token is too "
937 "large\n");
938 rc = -EINVAL;
939 goto out;
940 }
941 /* This format is inspired by OpenPGP; see RFC 2440
942 * packet tag 3 */
943 dest[(*packet_size)++] = ECRYPTFS_TAG_3_PACKET_TYPE;
944 /* ver+cipher+s2k+hash+salt+iter+enc_key */
945 rc = write_packet_length(&dest[(*packet_size)],
946 (0x05 + ECRYPTFS_SALT_SIZE
947 + (*key_rec).enc_key_size),
948 &packet_size_length);
949 if (rc) {
950 ecryptfs_printk(KERN_ERR, "Error generating tag 3 packet "
951 "header; cannot generate packet length\n");
952 goto out;
953 }
954 (*packet_size) += packet_size_length;
955 dest[(*packet_size)++] = 0x04; /* version 4 */
956 cipher_code = ecryptfs_code_for_cipher_string(crypt_stat);
957 if (cipher_code == 0) {
958 ecryptfs_printk(KERN_WARNING, "Unable to generate code for "
959 "cipher [%s]\n", crypt_stat->cipher);
960 rc = -EINVAL;
961 goto out;
962 }
963 dest[(*packet_size)++] = cipher_code;
964 dest[(*packet_size)++] = 0x03; /* S2K */
965 dest[(*packet_size)++] = 0x01; /* MD5 (TODO: parameterize) */
966 memcpy(&dest[(*packet_size)], auth_tok->token.password.salt,
967 ECRYPTFS_SALT_SIZE);
968 (*packet_size) += ECRYPTFS_SALT_SIZE; /* salt */
969 dest[(*packet_size)++] = 0x60; /* hash iterations (65536) */
970 memcpy(&dest[(*packet_size)], (*key_rec).enc_key,
971 (*key_rec).enc_key_size);
972 (*packet_size) += (*key_rec).enc_key_size;
973out:
974 if (tfm && !tfm_mutex)
975 crypto_free_tfm(tfm);
976 if (rc)
977 (*packet_size) = 0;
978 return rc;
979}
980
981/**
982 * ecryptfs_generate_key_packet_set
983 * @dest: Virtual address from which to write the key record set
984 * @crypt_stat: The cryptographic context from which the
985 * authentication tokens will be retrieved
986 * @ecryptfs_dentry: The dentry, used to retrieve the mount crypt stat
987 * for the global parameters
988 * @len: The amount written
989 * @max: The maximum amount of data allowed to be written
990 *
991 * Generates a key packet set and writes it to the virtual address
992 * passed in.
993 *
994 * Returns zero on success; non-zero on error.
995 */
996int
997ecryptfs_generate_key_packet_set(char *dest_base,
998 struct ecryptfs_crypt_stat *crypt_stat,
999 struct dentry *ecryptfs_dentry, size_t *len,
1000 size_t max)
1001{
1002 int rc = 0;
1003 struct ecryptfs_auth_tok *auth_tok;
1004 struct ecryptfs_mount_crypt_stat *mount_crypt_stat =
1005 &ecryptfs_superblock_to_private(
1006 ecryptfs_dentry->d_sb)->mount_crypt_stat;
1007 size_t written;
1008 struct ecryptfs_key_record key_rec;
1009
1010 (*len) = 0;
1011 if (mount_crypt_stat->global_auth_tok) {
1012 auth_tok = mount_crypt_stat->global_auth_tok;
1013 if (auth_tok->token_type == ECRYPTFS_PASSWORD) {
1014 rc = write_tag_3_packet((dest_base + (*len)),
1015 max, auth_tok,
1016 crypt_stat, &key_rec,
1017 &written);
1018 if (rc) {
1019 ecryptfs_printk(KERN_WARNING, "Error "
1020 "writing tag 3 packet\n");
1021 goto out;
1022 }
1023 (*len) += written;
1024 /* Write auth tok signature packet */
1025 rc = write_tag_11_packet(
1026 (dest_base + (*len)),
1027 (max - (*len)),
1028 key_rec.sig, ECRYPTFS_SIG_SIZE, &written);
1029 if (rc) {
1030 ecryptfs_printk(KERN_ERR, "Error writing "
1031 "auth tok signature packet\n");
1032 goto out;
1033 }
1034 (*len) += written;
1035 } else {
1036 ecryptfs_printk(KERN_WARNING, "Unsupported "
1037 "authentication token type\n");
1038 rc = -EINVAL;
1039 goto out;
1040 }
1041 if (rc) {
1042 ecryptfs_printk(KERN_WARNING, "Error writing "
1043 "authentication token packet with sig "
1044 "= [%s]\n",
1045 mount_crypt_stat->global_auth_tok_sig);
1046 rc = -EIO;
1047 goto out;
1048 }
1049 } else
1050 BUG();
1051 if (likely((max - (*len)) > 0)) {
1052 dest_base[(*len)] = 0x00;
1053 } else {
1054 ecryptfs_printk(KERN_ERR, "Error writing boundary byte\n");
1055 rc = -EIO;
1056 }
1057out:
1058 if (rc)
1059 (*len) = 0;
1060 return rc;
1061}
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
new file mode 100644
index 000000000000..7a11b8ae6644
--- /dev/null
+++ b/fs/ecryptfs/main.c
@@ -0,0 +1,831 @@
1/**
2 * eCryptfs: Linux filesystem encryption layer
3 *
4 * Copyright (C) 1997-2003 Erez Zadok
5 * Copyright (C) 2001-2003 Stony Brook University
6 * Copyright (C) 2004-2006 International Business Machines Corp.
7 * Author(s): Michael A. Halcrow <mahalcro@us.ibm.com>
8 * Michael C. Thompson <mcthomps@us.ibm.com>
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License as
12 * published by the Free Software Foundation; either version 2 of the
13 * License, or (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful, but
16 * WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
23 * 02111-1307, USA.
24 */
25
26#include <linux/dcache.h>
27#include <linux/file.h>
28#include <linux/module.h>
29#include <linux/namei.h>
30#include <linux/skbuff.h>
31#include <linux/crypto.h>
32#include <linux/netlink.h>
33#include <linux/mount.h>
34#include <linux/dcache.h>
35#include <linux/pagemap.h>
36#include <linux/key.h>
37#include <linux/parser.h>
38#include "ecryptfs_kernel.h"
39
40/**
41 * Module parameter that defines the ecryptfs_verbosity level.
42 */
43int ecryptfs_verbosity = 0;
44
45module_param(ecryptfs_verbosity, int, 0);
46MODULE_PARM_DESC(ecryptfs_verbosity,
47 "Initial verbosity level (0 or 1; defaults to "
48 "0, which is Quiet)");
49
50void __ecryptfs_printk(const char *fmt, ...)
51{
52 va_list args;
53 va_start(args, fmt);
54 if (fmt[1] == '7') { /* KERN_DEBUG */
55 if (ecryptfs_verbosity >= 1)
56 vprintk(fmt, args);
57 } else
58 vprintk(fmt, args);
59 va_end(args);
60}
61
62/**
63 * ecryptfs_interpose
64 * @lower_dentry: Existing dentry in the lower filesystem
65 * @dentry: ecryptfs' dentry
66 * @sb: ecryptfs's super_block
67 * @flag: If set to true, then d_add is called, else d_instantiate is called
68 *
69 * Interposes upper and lower dentries.
70 *
71 * Returns zero on success; non-zero otherwise
72 */
73int ecryptfs_interpose(struct dentry *lower_dentry, struct dentry *dentry,
74 struct super_block *sb, int flag)
75{
76 struct inode *lower_inode;
77 struct inode *inode;
78 int rc = 0;
79
80 lower_inode = lower_dentry->d_inode;
81 if (lower_inode->i_sb != ecryptfs_superblock_to_lower(sb)) {
82 rc = -EXDEV;
83 goto out;
84 }
85 if (!igrab(lower_inode)) {
86 rc = -ESTALE;
87 goto out;
88 }
89 inode = iget5_locked(sb, (unsigned long)lower_inode,
90 ecryptfs_inode_test, ecryptfs_inode_set,
91 lower_inode);
92 if (!inode) {
93 rc = -EACCES;
94 iput(lower_inode);
95 goto out;
96 }
97 if (inode->i_state & I_NEW)
98 unlock_new_inode(inode);
99 else
100 iput(lower_inode);
101 if (S_ISLNK(lower_inode->i_mode))
102 inode->i_op = &ecryptfs_symlink_iops;
103 else if (S_ISDIR(lower_inode->i_mode))
104 inode->i_op = &ecryptfs_dir_iops;
105 if (S_ISDIR(lower_inode->i_mode))
106 inode->i_fop = &ecryptfs_dir_fops;
107 /* TODO: Is there a better way to identify if the inode is
108 * special? */
109 if (S_ISBLK(lower_inode->i_mode) || S_ISCHR(lower_inode->i_mode) ||
110 S_ISFIFO(lower_inode->i_mode) || S_ISSOCK(lower_inode->i_mode))
111 init_special_inode(inode, lower_inode->i_mode,
112 lower_inode->i_rdev);
113 dentry->d_op = &ecryptfs_dops;
114 if (flag)
115 d_add(dentry, inode);
116 else
117 d_instantiate(dentry, inode);
118 ecryptfs_copy_attr_all(inode, lower_inode);
119 /* This size will be overwritten for real files w/ headers and
120 * other metadata */
121 ecryptfs_copy_inode_size(inode, lower_inode);
122out:
123 return rc;
124}
125
126enum { ecryptfs_opt_sig, ecryptfs_opt_ecryptfs_sig, ecryptfs_opt_debug,
127 ecryptfs_opt_ecryptfs_debug, ecryptfs_opt_cipher,
128 ecryptfs_opt_ecryptfs_cipher, ecryptfs_opt_ecryptfs_key_bytes,
129 ecryptfs_opt_passthrough, ecryptfs_opt_err };
130
131static match_table_t tokens = {
132 {ecryptfs_opt_sig, "sig=%s"},
133 {ecryptfs_opt_ecryptfs_sig, "ecryptfs_sig=%s"},
134 {ecryptfs_opt_debug, "debug=%u"},
135 {ecryptfs_opt_ecryptfs_debug, "ecryptfs_debug=%u"},
136 {ecryptfs_opt_cipher, "cipher=%s"},
137 {ecryptfs_opt_ecryptfs_cipher, "ecryptfs_cipher=%s"},
138 {ecryptfs_opt_ecryptfs_key_bytes, "ecryptfs_key_bytes=%u"},
139 {ecryptfs_opt_passthrough, "ecryptfs_passthrough"},
140 {ecryptfs_opt_err, NULL}
141};
142
143/**
144 * ecryptfs_verify_version
145 * @version: The version number to confirm
146 *
147 * Returns zero on good version; non-zero otherwise
148 */
149static int ecryptfs_verify_version(u16 version)
150{
151 int rc = 0;
152 unsigned char major;
153 unsigned char minor;
154
155 major = ((version >> 8) & 0xFF);
156 minor = (version & 0xFF);
157 if (major != ECRYPTFS_VERSION_MAJOR) {
158 ecryptfs_printk(KERN_ERR, "Major version number mismatch. "
159 "Expected [%d]; got [%d]\n",
160 ECRYPTFS_VERSION_MAJOR, major);
161 rc = -EINVAL;
162 goto out;
163 }
164 if (minor != ECRYPTFS_VERSION_MINOR) {
165 ecryptfs_printk(KERN_ERR, "Minor version number mismatch. "
166 "Expected [%d]; got [%d]\n",
167 ECRYPTFS_VERSION_MINOR, minor);
168 rc = -EINVAL;
169 goto out;
170 }
171out:
172 return rc;
173}
174
175/**
176 * ecryptfs_parse_options
177 * @sb: The ecryptfs super block
178 * @options: The options pased to the kernel
179 *
180 * Parse mount options:
181 * debug=N - ecryptfs_verbosity level for debug output
182 * sig=XXX - description(signature) of the key to use
183 *
184 * Returns the dentry object of the lower-level (lower/interposed)
185 * directory; We want to mount our stackable file system on top of
186 * that lower directory.
187 *
188 * The signature of the key to use must be the description of a key
189 * already in the keyring. Mounting will fail if the key can not be
190 * found.
191 *
192 * Returns zero on success; non-zero on error
193 */
194static int ecryptfs_parse_options(struct super_block *sb, char *options)
195{
196 char *p;
197 int rc = 0;
198 int sig_set = 0;
199 int cipher_name_set = 0;
200 int cipher_key_bytes;
201 int cipher_key_bytes_set = 0;
202 struct key *auth_tok_key = NULL;
203 struct ecryptfs_auth_tok *auth_tok = NULL;
204 struct ecryptfs_mount_crypt_stat *mount_crypt_stat =
205 &ecryptfs_superblock_to_private(sb)->mount_crypt_stat;
206 substring_t args[MAX_OPT_ARGS];
207 int token;
208 char *sig_src;
209 char *sig_dst;
210 char *debug_src;
211 char *cipher_name_dst;
212 char *cipher_name_src;
213 char *cipher_key_bytes_src;
214 struct crypto_tfm *tmp_tfm;
215 int cipher_name_len;
216
217 if (!options) {
218 rc = -EINVAL;
219 goto out;
220 }
221 while ((p = strsep(&options, ",")) != NULL) {
222 if (!*p)
223 continue;
224 token = match_token(p, tokens, args);
225 switch (token) {
226 case ecryptfs_opt_sig:
227 case ecryptfs_opt_ecryptfs_sig:
228 sig_src = args[0].from;
229 sig_dst =
230 mount_crypt_stat->global_auth_tok_sig;
231 memcpy(sig_dst, sig_src, ECRYPTFS_SIG_SIZE_HEX);
232 sig_dst[ECRYPTFS_SIG_SIZE_HEX] = '\0';
233 ecryptfs_printk(KERN_DEBUG,
234 "The mount_crypt_stat "
235 "global_auth_tok_sig set to: "
236 "[%s]\n", sig_dst);
237 sig_set = 1;
238 break;
239 case ecryptfs_opt_debug:
240 case ecryptfs_opt_ecryptfs_debug:
241 debug_src = args[0].from;
242 ecryptfs_verbosity =
243 (int)simple_strtol(debug_src, &debug_src,
244 0);
245 ecryptfs_printk(KERN_DEBUG,
246 "Verbosity set to [%d]" "\n",
247 ecryptfs_verbosity);
248 break;
249 case ecryptfs_opt_cipher:
250 case ecryptfs_opt_ecryptfs_cipher:
251 cipher_name_src = args[0].from;
252 cipher_name_dst =
253 mount_crypt_stat->
254 global_default_cipher_name;
255 strncpy(cipher_name_dst, cipher_name_src,
256 ECRYPTFS_MAX_CIPHER_NAME_SIZE);
257 ecryptfs_printk(KERN_DEBUG,
258 "The mount_crypt_stat "
259 "global_default_cipher_name set to: "
260 "[%s]\n", cipher_name_dst);
261 cipher_name_set = 1;
262 break;
263 case ecryptfs_opt_ecryptfs_key_bytes:
264 cipher_key_bytes_src = args[0].from;
265 cipher_key_bytes =
266 (int)simple_strtol(cipher_key_bytes_src,
267 &cipher_key_bytes_src, 0);
268 mount_crypt_stat->global_default_cipher_key_size =
269 cipher_key_bytes;
270 ecryptfs_printk(KERN_DEBUG,
271 "The mount_crypt_stat "
272 "global_default_cipher_key_size "
273 "set to: [%d]\n", mount_crypt_stat->
274 global_default_cipher_key_size);
275 cipher_key_bytes_set = 1;
276 break;
277 case ecryptfs_opt_passthrough:
278 mount_crypt_stat->flags |=
279 ECRYPTFS_PLAINTEXT_PASSTHROUGH_ENABLED;
280 break;
281 case ecryptfs_opt_err:
282 default:
283 ecryptfs_printk(KERN_WARNING,
284 "eCryptfs: unrecognized option '%s'\n",
285 p);
286 }
287 }
288 /* Do not support lack of mount-wide signature in 0.1
289 * release */
290 if (!sig_set) {
291 rc = -EINVAL;
292 ecryptfs_printk(KERN_ERR, "You must supply a valid "
293 "passphrase auth tok signature as a mount "
294 "parameter; see the eCryptfs README\n");
295 goto out;
296 }
297 if (!cipher_name_set) {
298 cipher_name_len = strlen(ECRYPTFS_DEFAULT_CIPHER);
299 if (unlikely(cipher_name_len
300 >= ECRYPTFS_MAX_CIPHER_NAME_SIZE)) {
301 rc = -EINVAL;
302 BUG();
303 goto out;
304 }
305 memcpy(mount_crypt_stat->global_default_cipher_name,
306 ECRYPTFS_DEFAULT_CIPHER, cipher_name_len);
307 mount_crypt_stat->global_default_cipher_name[cipher_name_len]
308 = '\0';
309 }
310 if (!cipher_key_bytes_set) {
311 mount_crypt_stat->global_default_cipher_key_size =
312 ECRYPTFS_DEFAULT_KEY_BYTES;
313 ecryptfs_printk(KERN_DEBUG, "Cipher key size was not "
314 "specified. Defaulting to [%d]\n",
315 mount_crypt_stat->
316 global_default_cipher_key_size);
317 }
318 rc = ecryptfs_process_cipher(
319 &tmp_tfm,
320 &mount_crypt_stat->global_key_tfm,
321 mount_crypt_stat->global_default_cipher_name,
322 mount_crypt_stat->global_default_cipher_key_size);
323 if (tmp_tfm)
324 crypto_free_tfm(tmp_tfm);
325 if (rc) {
326 printk(KERN_ERR "Error attempting to initialize cipher [%s] "
327 "with key size [%Zd] bytes; rc = [%d]\n",
328 mount_crypt_stat->global_default_cipher_name,
329 mount_crypt_stat->global_default_cipher_key_size, rc);
330 rc = -EINVAL;
331 goto out;
332 }
333 mutex_init(&mount_crypt_stat->global_key_tfm_mutex);
334 ecryptfs_printk(KERN_DEBUG, "Requesting the key with description: "
335 "[%s]\n", mount_crypt_stat->global_auth_tok_sig);
336 /* The reference to this key is held until umount is done The
337 * call to key_put is done in ecryptfs_put_super() */
338 auth_tok_key = request_key(&key_type_user,
339 mount_crypt_stat->global_auth_tok_sig,
340 NULL);
341 if (!auth_tok_key || IS_ERR(auth_tok_key)) {
342 ecryptfs_printk(KERN_ERR, "Could not find key with "
343 "description: [%s]\n",
344 mount_crypt_stat->global_auth_tok_sig);
345 process_request_key_err(PTR_ERR(auth_tok_key));
346 rc = -EINVAL;
347 goto out;
348 }
349 auth_tok = ecryptfs_get_key_payload_data(auth_tok_key);
350 if (ecryptfs_verify_version(auth_tok->version)) {
351 ecryptfs_printk(KERN_ERR, "Data structure version mismatch. "
352 "Userspace tools must match eCryptfs kernel "
353 "module with major version [%d] and minor "
354 "version [%d]\n", ECRYPTFS_VERSION_MAJOR,
355 ECRYPTFS_VERSION_MINOR);
356 rc = -EINVAL;
357 goto out;
358 }
359 if (auth_tok->token_type != ECRYPTFS_PASSWORD) {
360 ecryptfs_printk(KERN_ERR, "Invalid auth_tok structure "
361 "returned from key\n");
362 rc = -EINVAL;
363 goto out;
364 }
365 mount_crypt_stat->global_auth_tok_key = auth_tok_key;
366 mount_crypt_stat->global_auth_tok = auth_tok;
367out:
368 return rc;
369}
370
371struct kmem_cache *ecryptfs_sb_info_cache;
372
373/**
374 * ecryptfs_fill_super
375 * @sb: The ecryptfs super block
376 * @raw_data: The options passed to mount
377 * @silent: Not used but required by function prototype
378 *
379 * Sets up what we can of the sb, rest is done in ecryptfs_read_super
380 *
381 * Returns zero on success; non-zero otherwise
382 */
383static int
384ecryptfs_fill_super(struct super_block *sb, void *raw_data, int silent)
385{
386 int rc = 0;
387
388 /* Released in ecryptfs_put_super() */
389 ecryptfs_set_superblock_private(sb,
390 kmem_cache_alloc(ecryptfs_sb_info_cache,
391 SLAB_KERNEL));
392 if (!ecryptfs_superblock_to_private(sb)) {
393 ecryptfs_printk(KERN_WARNING, "Out of memory\n");
394 rc = -ENOMEM;
395 goto out;
396 }
397 memset(ecryptfs_superblock_to_private(sb), 0,
398 sizeof(struct ecryptfs_sb_info));
399 sb->s_op = &ecryptfs_sops;
400 /* Released through deactivate_super(sb) from get_sb_nodev */
401 sb->s_root = d_alloc(NULL, &(const struct qstr) {
402 .hash = 0,.name = "/",.len = 1});
403 if (!sb->s_root) {
404 ecryptfs_printk(KERN_ERR, "d_alloc failed\n");
405 rc = -ENOMEM;
406 goto out;
407 }
408 sb->s_root->d_op = &ecryptfs_dops;
409 sb->s_root->d_sb = sb;
410 sb->s_root->d_parent = sb->s_root;
411 /* Released in d_release when dput(sb->s_root) is called */
412 /* through deactivate_super(sb) from get_sb_nodev() */
413 ecryptfs_set_dentry_private(sb->s_root,
414 kmem_cache_alloc(ecryptfs_dentry_info_cache,
415 SLAB_KERNEL));
416 if (!ecryptfs_dentry_to_private(sb->s_root)) {
417 ecryptfs_printk(KERN_ERR,
418 "dentry_info_cache alloc failed\n");
419 rc = -ENOMEM;
420 goto out;
421 }
422 memset(ecryptfs_dentry_to_private(sb->s_root), 0,
423 sizeof(struct ecryptfs_dentry_info));
424 rc = 0;
425out:
426 /* Should be able to rely on deactivate_super called from
427 * get_sb_nodev */
428 return rc;
429}
430
431/**
432 * ecryptfs_read_super
433 * @sb: The ecryptfs super block
434 * @dev_name: The path to mount over
435 *
436 * Read the super block of the lower filesystem, and use
437 * ecryptfs_interpose to create our initial inode and super block
438 * struct.
439 */
440static int ecryptfs_read_super(struct super_block *sb, const char *dev_name)
441{
442 int rc;
443 struct nameidata nd;
444 struct dentry *lower_root;
445 struct vfsmount *lower_mnt;
446
447 memset(&nd, 0, sizeof(struct nameidata));
448 rc = path_lookup(dev_name, LOOKUP_FOLLOW, &nd);
449 if (rc) {
450 ecryptfs_printk(KERN_WARNING, "path_lookup() failed\n");
451 goto out_free;
452 }
453 lower_root = nd.dentry;
454 if (!lower_root->d_inode) {
455 ecryptfs_printk(KERN_WARNING,
456 "No directory to interpose on\n");
457 rc = -ENOENT;
458 goto out_free;
459 }
460 lower_mnt = nd.mnt;
461 ecryptfs_set_superblock_lower(sb, lower_root->d_sb);
462 sb->s_maxbytes = lower_root->d_sb->s_maxbytes;
463 ecryptfs_set_dentry_lower(sb->s_root, lower_root);
464 ecryptfs_set_dentry_lower_mnt(sb->s_root, lower_mnt);
465 if ((rc = ecryptfs_interpose(lower_root, sb->s_root, sb, 0)))
466 goto out_free;
467 rc = 0;
468 goto out;
469out_free:
470 path_release(&nd);
471out:
472 return rc;
473}
474
475/**
476 * ecryptfs_get_sb
477 * @fs_type
478 * @flags
479 * @dev_name: The path to mount over
480 * @raw_data: The options passed into the kernel
481 *
482 * The whole ecryptfs_get_sb process is broken into 4 functions:
483 * ecryptfs_parse_options(): handle options passed to ecryptfs, if any
484 * ecryptfs_fill_super(): used by get_sb_nodev, fills out the super_block
485 * with as much information as it can before needing
486 * the lower filesystem.
487 * ecryptfs_read_super(): this accesses the lower filesystem and uses
488 * ecryptfs_interpolate to perform most of the linking
489 * ecryptfs_interpolate(): links the lower filesystem into ecryptfs
490 */
491static int ecryptfs_get_sb(struct file_system_type *fs_type, int flags,
492 const char *dev_name, void *raw_data,
493 struct vfsmount *mnt)
494{
495 int rc;
496 struct super_block *sb;
497
498 rc = get_sb_nodev(fs_type, flags, raw_data, ecryptfs_fill_super, mnt);
499 if (rc < 0) {
500 printk(KERN_ERR "Getting sb failed; rc = [%d]\n", rc);
501 goto out;
502 }
503 sb = mnt->mnt_sb;
504 rc = ecryptfs_parse_options(sb, raw_data);
505 if (rc) {
506 printk(KERN_ERR "Error parsing options; rc = [%d]\n", rc);
507 goto out_abort;
508 }
509 rc = ecryptfs_read_super(sb, dev_name);
510 if (rc) {
511 printk(KERN_ERR "Reading sb failed; rc = [%d]\n", rc);
512 goto out_abort;
513 }
514 goto out;
515out_abort:
516 dput(sb->s_root);
517 up_write(&sb->s_umount);
518 deactivate_super(sb);
519out:
520 return rc;
521}
522
523/**
524 * ecryptfs_kill_block_super
525 * @sb: The ecryptfs super block
526 *
527 * Used to bring the superblock down and free the private data.
528 * Private data is free'd in ecryptfs_put_super()
529 */
530static void ecryptfs_kill_block_super(struct super_block *sb)
531{
532 generic_shutdown_super(sb);
533}
534
535static struct file_system_type ecryptfs_fs_type = {
536 .owner = THIS_MODULE,
537 .name = "ecryptfs",
538 .get_sb = ecryptfs_get_sb,
539 .kill_sb = ecryptfs_kill_block_super,
540 .fs_flags = 0
541};
542
543/**
544 * inode_info_init_once
545 *
546 * Initializes the ecryptfs_inode_info_cache when it is created
547 */
548static void
549inode_info_init_once(void *vptr, struct kmem_cache *cachep, unsigned long flags)
550{
551 struct ecryptfs_inode_info *ei = (struct ecryptfs_inode_info *)vptr;
552
553 if ((flags & (SLAB_CTOR_VERIFY | SLAB_CTOR_CONSTRUCTOR)) ==
554 SLAB_CTOR_CONSTRUCTOR)
555 inode_init_once(&ei->vfs_inode);
556}
557
558static struct ecryptfs_cache_info {
559 kmem_cache_t **cache;
560 const char *name;
561 size_t size;
562 void (*ctor)(void*, struct kmem_cache *, unsigned long);
563} ecryptfs_cache_infos[] = {
564 {
565 .cache = &ecryptfs_auth_tok_list_item_cache,
566 .name = "ecryptfs_auth_tok_list_item",
567 .size = sizeof(struct ecryptfs_auth_tok_list_item),
568 },
569 {
570 .cache = &ecryptfs_file_info_cache,
571 .name = "ecryptfs_file_cache",
572 .size = sizeof(struct ecryptfs_file_info),
573 },
574 {
575 .cache = &ecryptfs_dentry_info_cache,
576 .name = "ecryptfs_dentry_info_cache",
577 .size = sizeof(struct ecryptfs_dentry_info),
578 },
579 {
580 .cache = &ecryptfs_inode_info_cache,
581 .name = "ecryptfs_inode_cache",
582 .size = sizeof(struct ecryptfs_inode_info),
583 .ctor = inode_info_init_once,
584 },
585 {
586 .cache = &ecryptfs_sb_info_cache,
587 .name = "ecryptfs_sb_cache",
588 .size = sizeof(struct ecryptfs_sb_info),
589 },
590 {
591 .cache = &ecryptfs_header_cache_0,
592 .name = "ecryptfs_headers_0",
593 .size = PAGE_CACHE_SIZE,
594 },
595 {
596 .cache = &ecryptfs_header_cache_1,
597 .name = "ecryptfs_headers_1",
598 .size = PAGE_CACHE_SIZE,
599 },
600 {
601 .cache = &ecryptfs_header_cache_2,
602 .name = "ecryptfs_headers_2",
603 .size = PAGE_CACHE_SIZE,
604 },
605 {
606 .cache = &ecryptfs_lower_page_cache,
607 .name = "ecryptfs_lower_page_cache",
608 .size = PAGE_CACHE_SIZE,
609 },
610};
611
612static void ecryptfs_free_kmem_caches(void)
613{
614 int i;
615
616 for (i = 0; i < ARRAY_SIZE(ecryptfs_cache_infos); i++) {
617 struct ecryptfs_cache_info *info;
618
619 info = &ecryptfs_cache_infos[i];
620 if (*(info->cache))
621 kmem_cache_destroy(*(info->cache));
622 }
623}
624
625/**
626 * ecryptfs_init_kmem_caches
627 *
628 * Returns zero on success; non-zero otherwise
629 */
630static int ecryptfs_init_kmem_caches(void)
631{
632 int i;
633
634 for (i = 0; i < ARRAY_SIZE(ecryptfs_cache_infos); i++) {
635 struct ecryptfs_cache_info *info;
636
637 info = &ecryptfs_cache_infos[i];
638 *(info->cache) = kmem_cache_create(info->name, info->size,
639 0, SLAB_HWCACHE_ALIGN, info->ctor, NULL);
640 if (!*(info->cache)) {
641 ecryptfs_free_kmem_caches();
642 ecryptfs_printk(KERN_WARNING, "%s: "
643 "kmem_cache_create failed\n",
644 info->name);
645 return -ENOMEM;
646 }
647 }
648 return 0;
649}
650
651struct ecryptfs_obj {
652 char *name;
653 struct list_head slot_list;
654 struct kobject kobj;
655};
656
657struct ecryptfs_attribute {
658 struct attribute attr;
659 ssize_t(*show) (struct ecryptfs_obj *, char *);
660 ssize_t(*store) (struct ecryptfs_obj *, const char *, size_t);
661};
662
663static ssize_t
664ecryptfs_attr_store(struct kobject *kobj,
665 struct attribute *attr, const char *buf, size_t len)
666{
667 struct ecryptfs_obj *obj = container_of(kobj, struct ecryptfs_obj,
668 kobj);
669 struct ecryptfs_attribute *attribute =
670 container_of(attr, struct ecryptfs_attribute, attr);
671
672 return (attribute->store ? attribute->store(obj, buf, len) : 0);
673}
674
675static ssize_t
676ecryptfs_attr_show(struct kobject *kobj, struct attribute *attr, char *buf)
677{
678 struct ecryptfs_obj *obj = container_of(kobj, struct ecryptfs_obj,
679 kobj);
680 struct ecryptfs_attribute *attribute =
681 container_of(attr, struct ecryptfs_attribute, attr);
682
683 return (attribute->show ? attribute->show(obj, buf) : 0);
684}
685
686static struct sysfs_ops ecryptfs_sysfs_ops = {
687 .show = ecryptfs_attr_show,
688 .store = ecryptfs_attr_store
689};
690
691static struct kobj_type ecryptfs_ktype = {
692 .sysfs_ops = &ecryptfs_sysfs_ops
693};
694
695static decl_subsys(ecryptfs, &ecryptfs_ktype, NULL);
696
697static ssize_t version_show(struct ecryptfs_obj *obj, char *buff)
698{
699 return snprintf(buff, PAGE_SIZE, "%d\n", ECRYPTFS_VERSIONING_MASK);
700}
701
702static struct ecryptfs_attribute sysfs_attr_version = __ATTR_RO(version);
703
704struct ecryptfs_version_str_map_elem {
705 u32 flag;
706 char *str;
707} ecryptfs_version_str_map[] = {
708 {ECRYPTFS_VERSIONING_PASSPHRASE, "passphrase"},
709 {ECRYPTFS_VERSIONING_PUBKEY, "pubkey"},
710 {ECRYPTFS_VERSIONING_PLAINTEXT_PASSTHROUGH, "plaintext passthrough"},
711 {ECRYPTFS_VERSIONING_POLICY, "policy"}
712};
713
714static ssize_t version_str_show(struct ecryptfs_obj *obj, char *buff)
715{
716 int i;
717 int remaining = PAGE_SIZE;
718 int total_written = 0;
719
720 buff[0] = '\0';
721 for (i = 0; i < ARRAY_SIZE(ecryptfs_version_str_map); i++) {
722 int entry_size;
723
724 if (!(ECRYPTFS_VERSIONING_MASK
725 & ecryptfs_version_str_map[i].flag))
726 continue;
727 entry_size = strlen(ecryptfs_version_str_map[i].str);
728 if ((entry_size + 2) > remaining)
729 goto out;
730 memcpy(buff, ecryptfs_version_str_map[i].str, entry_size);
731 buff[entry_size++] = '\n';
732 buff[entry_size] = '\0';
733 buff += entry_size;
734 total_written += entry_size;
735 remaining -= entry_size;
736 }
737out:
738 return total_written;
739}
740
741static struct ecryptfs_attribute sysfs_attr_version_str = __ATTR_RO(version_str);
742
743static int do_sysfs_registration(void)
744{
745 int rc;
746
747 if ((rc = subsystem_register(&ecryptfs_subsys))) {
748 printk(KERN_ERR
749 "Unable to register ecryptfs sysfs subsystem\n");
750 goto out;
751 }
752 rc = sysfs_create_file(&ecryptfs_subsys.kset.kobj,
753 &sysfs_attr_version.attr);
754 if (rc) {
755 printk(KERN_ERR
756 "Unable to create ecryptfs version attribute\n");
757 subsystem_unregister(&ecryptfs_subsys);
758 goto out;
759 }
760 rc = sysfs_create_file(&ecryptfs_subsys.kset.kobj,
761 &sysfs_attr_version_str.attr);
762 if (rc) {
763 printk(KERN_ERR
764 "Unable to create ecryptfs version_str attribute\n");
765 sysfs_remove_file(&ecryptfs_subsys.kset.kobj,
766 &sysfs_attr_version.attr);
767 subsystem_unregister(&ecryptfs_subsys);
768 goto out;
769 }
770out:
771 return rc;
772}
773
774static int __init ecryptfs_init(void)
775{
776 int rc;
777
778 if (ECRYPTFS_DEFAULT_EXTENT_SIZE > PAGE_CACHE_SIZE) {
779 rc = -EINVAL;
780 ecryptfs_printk(KERN_ERR, "The eCryptfs extent size is "
781 "larger than the host's page size, and so "
782 "eCryptfs cannot run on this system. The "
783 "default eCryptfs extent size is [%d] bytes; "
784 "the page size is [%d] bytes.\n",
785 ECRYPTFS_DEFAULT_EXTENT_SIZE, PAGE_CACHE_SIZE);
786 goto out;
787 }
788 rc = ecryptfs_init_kmem_caches();
789 if (rc) {
790 printk(KERN_ERR
791 "Failed to allocate one or more kmem_cache objects\n");
792 goto out;
793 }
794 rc = register_filesystem(&ecryptfs_fs_type);
795 if (rc) {
796 printk(KERN_ERR "Failed to register filesystem\n");
797 ecryptfs_free_kmem_caches();
798 goto out;
799 }
800 kset_set_kset_s(&ecryptfs_subsys, fs_subsys);
801 sysfs_attr_version.attr.owner = THIS_MODULE;
802 sysfs_attr_version_str.attr.owner = THIS_MODULE;
803 rc = do_sysfs_registration();
804 if (rc) {
805 printk(KERN_ERR "sysfs registration failed\n");
806 unregister_filesystem(&ecryptfs_fs_type);
807 ecryptfs_free_kmem_caches();
808 goto out;
809 }
810out:
811 return rc;
812}
813
814static void __exit ecryptfs_exit(void)
815{
816 sysfs_remove_file(&ecryptfs_subsys.kset.kobj,
817 &sysfs_attr_version.attr);
818 sysfs_remove_file(&ecryptfs_subsys.kset.kobj,
819 &sysfs_attr_version_str.attr);
820 subsystem_unregister(&ecryptfs_subsys);
821 unregister_filesystem(&ecryptfs_fs_type);
822 ecryptfs_free_kmem_caches();
823}
824
825MODULE_AUTHOR("Michael A. Halcrow <mhalcrow@us.ibm.com>");
826MODULE_DESCRIPTION("eCryptfs");
827
828MODULE_LICENSE("GPL");
829
830module_init(ecryptfs_init)
831module_exit(ecryptfs_exit)
diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c
new file mode 100644
index 000000000000..924dd90a4cf5
--- /dev/null
+++ b/fs/ecryptfs/mmap.c
@@ -0,0 +1,788 @@
1/**
2 * eCryptfs: Linux filesystem encryption layer
3 * This is where eCryptfs coordinates the symmetric encryption and
4 * decryption of the file data as it passes between the lower
5 * encrypted file and the upper decrypted file.
6 *
7 * Copyright (C) 1997-2003 Erez Zadok
8 * Copyright (C) 2001-2003 Stony Brook University
9 * Copyright (C) 2004-2006 International Business Machines Corp.
10 * Author(s): Michael A. Halcrow <mahalcro@us.ibm.com>
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License as
14 * published by the Free Software Foundation; either version 2 of the
15 * License, or (at your option) any later version.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, write to the Free Software
24 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
25 * 02111-1307, USA.
26 */
27
28#include <linux/pagemap.h>
29#include <linux/writeback.h>
30#include <linux/page-flags.h>
31#include <linux/mount.h>
32#include <linux/file.h>
33#include <linux/crypto.h>
34#include <linux/scatterlist.h>
35#include "ecryptfs_kernel.h"
36
37struct kmem_cache *ecryptfs_lower_page_cache;
38
39/**
40 * ecryptfs_get1page
41 *
42 * Get one page from cache or lower f/s, return error otherwise.
43 *
44 * Returns unlocked and up-to-date page (if ok), with increased
45 * refcnt.
46 */
47static struct page *ecryptfs_get1page(struct file *file, int index)
48{
49 struct page *page;
50 struct dentry *dentry;
51 struct inode *inode;
52 struct address_space *mapping;
53
54 dentry = file->f_dentry;
55 inode = dentry->d_inode;
56 mapping = inode->i_mapping;
57 page = read_cache_page(mapping, index,
58 (filler_t *)mapping->a_ops->readpage,
59 (void *)file);
60 if (IS_ERR(page))
61 goto out;
62 wait_on_page_locked(page);
63out:
64 return page;
65}
66
67static
68int write_zeros(struct file *file, pgoff_t index, int start, int num_zeros);
69
70/**
71 * ecryptfs_fill_zeros
72 * @file: The ecryptfs file
73 * @new_length: The new length of the data in the underlying file;
74 * everything between the prior end of the file and the
75 * new end of the file will be filled with zero's.
76 * new_length must be greater than current length
77 *
78 * Function for handling lseek-ing past the end of the file.
79 *
80 * This function does not support shrinking, only growing a file.
81 *
82 * Returns zero on success; non-zero otherwise.
83 */
84int ecryptfs_fill_zeros(struct file *file, loff_t new_length)
85{
86 int rc = 0;
87 struct dentry *dentry = file->f_dentry;
88 struct inode *inode = dentry->d_inode;
89 pgoff_t old_end_page_index = 0;
90 pgoff_t index = old_end_page_index;
91 int old_end_pos_in_page = -1;
92 pgoff_t new_end_page_index;
93 int new_end_pos_in_page;
94 loff_t cur_length = i_size_read(inode);
95
96 if (cur_length != 0) {
97 index = old_end_page_index =
98 ((cur_length - 1) >> PAGE_CACHE_SHIFT);
99 old_end_pos_in_page = ((cur_length - 1) & ~PAGE_CACHE_MASK);
100 }
101 new_end_page_index = ((new_length - 1) >> PAGE_CACHE_SHIFT);
102 new_end_pos_in_page = ((new_length - 1) & ~PAGE_CACHE_MASK);
103 ecryptfs_printk(KERN_DEBUG, "old_end_page_index = [0x%.16x]; "
104 "old_end_pos_in_page = [%d]; "
105 "new_end_page_index = [0x%.16x]; "
106 "new_end_pos_in_page = [%d]\n",
107 old_end_page_index, old_end_pos_in_page,
108 new_end_page_index, new_end_pos_in_page);
109 if (old_end_page_index == new_end_page_index) {
110 /* Start and end are in the same page; we just need to
111 * set a portion of the existing page to zero's */
112 rc = write_zeros(file, index, (old_end_pos_in_page + 1),
113 (new_end_pos_in_page - old_end_pos_in_page));
114 if (rc)
115 ecryptfs_printk(KERN_ERR, "write_zeros(file=[%p], "
116 "index=[0x%.16x], "
117 "old_end_pos_in_page=[d], "
118 "(PAGE_CACHE_SIZE - new_end_pos_in_page"
119 "=[%d]"
120 ")=[d]) returned [%d]\n", file, index,
121 old_end_pos_in_page,
122 new_end_pos_in_page,
123 (PAGE_CACHE_SIZE - new_end_pos_in_page),
124 rc);
125 goto out;
126 }
127 /* Fill the remainder of the previous last page with zeros */
128 rc = write_zeros(file, index, (old_end_pos_in_page + 1),
129 ((PAGE_CACHE_SIZE - 1) - old_end_pos_in_page));
130 if (rc) {
131 ecryptfs_printk(KERN_ERR, "write_zeros(file=[%p], "
132 "index=[0x%.16x], old_end_pos_in_page=[d], "
133 "(PAGE_CACHE_SIZE - old_end_pos_in_page)=[d]) "
134 "returned [%d]\n", file, index,
135 old_end_pos_in_page,
136 (PAGE_CACHE_SIZE - old_end_pos_in_page), rc);
137 goto out;
138 }
139 index++;
140 while (index < new_end_page_index) {
141 /* Fill all intermediate pages with zeros */
142 rc = write_zeros(file, index, 0, PAGE_CACHE_SIZE);
143 if (rc) {
144 ecryptfs_printk(KERN_ERR, "write_zeros(file=[%p], "
145 "index=[0x%.16x], "
146 "old_end_pos_in_page=[d], "
147 "(PAGE_CACHE_SIZE - new_end_pos_in_page"
148 "=[%d]"
149 ")=[d]) returned [%d]\n", file, index,
150 old_end_pos_in_page,
151 new_end_pos_in_page,
152 (PAGE_CACHE_SIZE - new_end_pos_in_page),
153 rc);
154 goto out;
155 }
156 index++;
157 }
158 /* Fill the portion at the beginning of the last new page with
159 * zero's */
160 rc = write_zeros(file, index, 0, (new_end_pos_in_page + 1));
161 if (rc) {
162 ecryptfs_printk(KERN_ERR, "write_zeros(file="
163 "[%p], index=[0x%.16x], 0, "
164 "new_end_pos_in_page=[%d]"
165 "returned [%d]\n", file, index,
166 new_end_pos_in_page, rc);
167 goto out;
168 }
169out:
170 return rc;
171}
172
173/**
174 * ecryptfs_writepage
175 * @page: Page that is locked before this call is made
176 *
177 * Returns zero on success; non-zero otherwise
178 */
179static int ecryptfs_writepage(struct page *page, struct writeback_control *wbc)
180{
181 struct ecryptfs_page_crypt_context ctx;
182 int rc;
183
184 ctx.page = page;
185 ctx.mode = ECRYPTFS_WRITEPAGE_MODE;
186 ctx.param.wbc = wbc;
187 rc = ecryptfs_encrypt_page(&ctx);
188 if (rc) {
189 ecryptfs_printk(KERN_WARNING, "Error encrypting "
190 "page (upper index [0x%.16x])\n", page->index);
191 ClearPageUptodate(page);
192 goto out;
193 }
194 SetPageUptodate(page);
195 unlock_page(page);
196out:
197 return rc;
198}
199
200/**
201 * Reads the data from the lower file file at index lower_page_index
202 * and copies that data into page.
203 *
204 * @param page Page to fill
205 * @param lower_page_index Index of the page in the lower file to get
206 */
207int ecryptfs_do_readpage(struct file *file, struct page *page,
208 pgoff_t lower_page_index)
209{
210 int rc;
211 struct dentry *dentry;
212 struct file *lower_file;
213 struct dentry *lower_dentry;
214 struct inode *inode;
215 struct inode *lower_inode;
216 char *page_data;
217 struct page *lower_page = NULL;
218 char *lower_page_data;
219 const struct address_space_operations *lower_a_ops;
220
221 dentry = file->f_dentry;
222 lower_file = ecryptfs_file_to_lower(file);
223 lower_dentry = ecryptfs_dentry_to_lower(dentry);
224 inode = dentry->d_inode;
225 lower_inode = ecryptfs_inode_to_lower(inode);
226 lower_a_ops = lower_inode->i_mapping->a_ops;
227 lower_page = read_cache_page(lower_inode->i_mapping, lower_page_index,
228 (filler_t *)lower_a_ops->readpage,
229 (void *)lower_file);
230 if (IS_ERR(lower_page)) {
231 rc = PTR_ERR(lower_page);
232 lower_page = NULL;
233 ecryptfs_printk(KERN_ERR, "Error reading from page cache\n");
234 goto out;
235 }
236 wait_on_page_locked(lower_page);
237 page_data = (char *)kmap(page);
238 if (!page_data) {
239 rc = -ENOMEM;
240 ecryptfs_printk(KERN_ERR, "Error mapping page\n");
241 goto out;
242 }
243 lower_page_data = (char *)kmap(lower_page);
244 if (!lower_page_data) {
245 rc = -ENOMEM;
246 ecryptfs_printk(KERN_ERR, "Error mapping page\n");
247 kunmap(page);
248 goto out;
249 }
250 memcpy(page_data, lower_page_data, PAGE_CACHE_SIZE);
251 kunmap(lower_page);
252 kunmap(page);
253 rc = 0;
254out:
255 if (likely(lower_page))
256 page_cache_release(lower_page);
257 if (rc == 0)
258 SetPageUptodate(page);
259 else
260 ClearPageUptodate(page);
261 return rc;
262}
263
264/**
265 * ecryptfs_readpage
266 * @file: This is an ecryptfs file
267 * @page: ecryptfs associated page to stick the read data into
268 *
269 * Read in a page, decrypting if necessary.
270 *
271 * Returns zero on success; non-zero on error.
272 */
273static int ecryptfs_readpage(struct file *file, struct page *page)
274{
275 int rc = 0;
276 struct ecryptfs_crypt_stat *crypt_stat;
277
278 BUG_ON(!(file && file->f_dentry && file->f_dentry->d_inode));
279 crypt_stat =
280 &ecryptfs_inode_to_private(file->f_dentry->d_inode)->crypt_stat;
281 if (!crypt_stat
282 || !ECRYPTFS_CHECK_FLAG(crypt_stat->flags, ECRYPTFS_ENCRYPTED)
283 || ECRYPTFS_CHECK_FLAG(crypt_stat->flags, ECRYPTFS_NEW_FILE)) {
284 ecryptfs_printk(KERN_DEBUG,
285 "Passing through unencrypted page\n");
286 rc = ecryptfs_do_readpage(file, page, page->index);
287 if (rc) {
288 ecryptfs_printk(KERN_ERR, "Error reading page; rc = "
289 "[%d]\n", rc);
290 goto out;
291 }
292 } else {
293 rc = ecryptfs_decrypt_page(file, page);
294 if (rc) {
295
296 ecryptfs_printk(KERN_ERR, "Error decrypting page; "
297 "rc = [%d]\n", rc);
298 goto out;
299 }
300 }
301 SetPageUptodate(page);
302out:
303 if (rc)
304 ClearPageUptodate(page);
305 ecryptfs_printk(KERN_DEBUG, "Unlocking page with index = [0x%.16x]\n",
306 page->index);
307 unlock_page(page);
308 return rc;
309}
310
311static int fill_zeros_to_end_of_page(struct page *page, unsigned int to)
312{
313 struct inode *inode = page->mapping->host;
314 int end_byte_in_page;
315 int rc = 0;
316 char *page_virt;
317
318 if ((i_size_read(inode) / PAGE_CACHE_SIZE) == page->index) {
319 end_byte_in_page = i_size_read(inode) % PAGE_CACHE_SIZE;
320 if (to > end_byte_in_page)
321 end_byte_in_page = to;
322 page_virt = kmap(page);
323 if (!page_virt) {
324 rc = -ENOMEM;
325 ecryptfs_printk(KERN_WARNING,
326 "Could not map page\n");
327 goto out;
328 }
329 memset((page_virt + end_byte_in_page), 0,
330 (PAGE_CACHE_SIZE - end_byte_in_page));
331 kunmap(page);
332 }
333out:
334 return rc;
335}
336
337static int ecryptfs_prepare_write(struct file *file, struct page *page,
338 unsigned from, unsigned to)
339{
340 int rc = 0;
341
342 kmap(page);
343 if (from == 0 && to == PAGE_CACHE_SIZE)
344 goto out; /* If we are writing a full page, it will be
345 up to date. */
346 if (!PageUptodate(page))
347 rc = ecryptfs_do_readpage(file, page, page->index);
348out:
349 return rc;
350}
351
352int ecryptfs_grab_and_map_lower_page(struct page **lower_page,
353 char **lower_virt,
354 struct inode *lower_inode,
355 unsigned long lower_page_index)
356{
357 int rc = 0;
358
359 (*lower_page) = grab_cache_page(lower_inode->i_mapping,
360 lower_page_index);
361 if (!(*lower_page)) {
362 ecryptfs_printk(KERN_ERR, "grab_cache_page for "
363 "lower_page_index = [0x%.16x] failed\n",
364 lower_page_index);
365 rc = -EINVAL;
366 goto out;
367 }
368 if (lower_virt)
369 (*lower_virt) = kmap((*lower_page));
370 else
371 kmap((*lower_page));
372out:
373 return rc;
374}
375
376int ecryptfs_writepage_and_release_lower_page(struct page *lower_page,
377 struct inode *lower_inode,
378 struct writeback_control *wbc)
379{
380 int rc = 0;
381
382 rc = lower_inode->i_mapping->a_ops->writepage(lower_page, wbc);
383 if (rc) {
384 ecryptfs_printk(KERN_ERR, "Error calling lower writepage(); "
385 "rc = [%d]\n", rc);
386 goto out;
387 }
388 lower_inode->i_mtime = lower_inode->i_ctime = CURRENT_TIME;
389 page_cache_release(lower_page);
390out:
391 return rc;
392}
393
394static void ecryptfs_unmap_and_release_lower_page(struct page *lower_page)
395{
396 kunmap(lower_page);
397 ecryptfs_printk(KERN_DEBUG, "Unlocking lower page with index = "
398 "[0x%.16x]\n", lower_page->index);
399 unlock_page(lower_page);
400 page_cache_release(lower_page);
401}
402
403/**
404 * ecryptfs_write_inode_size_to_header
405 *
406 * Writes the lower file size to the first 8 bytes of the header.
407 *
408 * Returns zero on success; non-zero on error.
409 */
410int
411ecryptfs_write_inode_size_to_header(struct file *lower_file,
412 struct inode *lower_inode,
413 struct inode *inode)
414{
415 int rc = 0;
416 struct page *header_page;
417 char *header_virt;
418 const struct address_space_operations *lower_a_ops;
419 u64 file_size;
420
421 rc = ecryptfs_grab_and_map_lower_page(&header_page, &header_virt,
422 lower_inode, 0);
423 if (rc) {
424 ecryptfs_printk(KERN_ERR, "grab_cache_page for header page "
425 "failed\n");
426 goto out;
427 }
428 lower_a_ops = lower_inode->i_mapping->a_ops;
429 rc = lower_a_ops->prepare_write(lower_file, header_page, 0, 8);
430 file_size = (u64)i_size_read(inode);
431 ecryptfs_printk(KERN_DEBUG, "Writing size: [0x%.16x]\n", file_size);
432 file_size = cpu_to_be64(file_size);
433 memcpy(header_virt, &file_size, sizeof(u64));
434 rc = lower_a_ops->commit_write(lower_file, header_page, 0, 8);
435 if (rc < 0)
436 ecryptfs_printk(KERN_ERR, "Error commiting header page "
437 "write\n");
438 ecryptfs_unmap_and_release_lower_page(header_page);
439 lower_inode->i_mtime = lower_inode->i_ctime = CURRENT_TIME;
440 mark_inode_dirty_sync(inode);
441out:
442 return rc;
443}
444
445int ecryptfs_get_lower_page(struct page **lower_page, struct inode *lower_inode,
446 struct file *lower_file,
447 unsigned long lower_page_index, int byte_offset,
448 int region_bytes)
449{
450 int rc = 0;
451
452 rc = ecryptfs_grab_and_map_lower_page(lower_page, NULL, lower_inode,
453 lower_page_index);
454 if (rc) {
455 ecryptfs_printk(KERN_ERR, "Error attempting to grab and map "
456 "lower page with index [0x%.16x]\n",
457 lower_page_index);
458 goto out;
459 }
460 rc = lower_inode->i_mapping->a_ops->prepare_write(lower_file,
461 (*lower_page),
462 byte_offset,
463 region_bytes);
464 if (rc) {
465 ecryptfs_printk(KERN_ERR, "prepare_write for "
466 "lower_page_index = [0x%.16x] failed; rc = "
467 "[%d]\n", lower_page_index, rc);
468 }
469out:
470 if (rc && (*lower_page)) {
471 ecryptfs_unmap_and_release_lower_page(*lower_page);
472 (*lower_page) = NULL;
473 }
474 return rc;
475}
476
477/**
478 * ecryptfs_commit_lower_page
479 *
480 * Returns zero on success; non-zero on error
481 */
482int
483ecryptfs_commit_lower_page(struct page *lower_page, struct inode *lower_inode,
484 struct file *lower_file, int byte_offset,
485 int region_size)
486{
487 int rc = 0;
488
489 rc = lower_inode->i_mapping->a_ops->commit_write(
490 lower_file, lower_page, byte_offset, region_size);
491 if (rc < 0) {
492 ecryptfs_printk(KERN_ERR,
493 "Error committing write; rc = [%d]\n", rc);
494 } else
495 rc = 0;
496 ecryptfs_unmap_and_release_lower_page(lower_page);
497 return rc;
498}
499
500/**
501 * ecryptfs_copy_page_to_lower
502 *
503 * Used for plaintext pass-through; no page index interpolation
504 * required.
505 */
506int ecryptfs_copy_page_to_lower(struct page *page, struct inode *lower_inode,
507 struct file *lower_file)
508{
509 int rc = 0;
510 struct page *lower_page;
511
512 rc = ecryptfs_get_lower_page(&lower_page, lower_inode, lower_file,
513 page->index, 0, PAGE_CACHE_SIZE);
514 if (rc) {
515 ecryptfs_printk(KERN_ERR, "Error attempting to get page "
516 "at index [0x%.16x]\n", page->index);
517 goto out;
518 }
519 /* TODO: aops */
520 memcpy((char *)page_address(lower_page), page_address(page),
521 PAGE_CACHE_SIZE);
522 rc = ecryptfs_commit_lower_page(lower_page, lower_inode, lower_file,
523 0, PAGE_CACHE_SIZE);
524 if (rc)
525 ecryptfs_printk(KERN_ERR, "Error attempting to commit page "
526 "at index [0x%.16x]\n", page->index);
527out:
528 return rc;
529}
530
531static int
532process_new_file(struct ecryptfs_crypt_stat *crypt_stat,
533 struct file *file, struct inode *inode)
534{
535 struct page *header_page;
536 const struct address_space_operations *lower_a_ops;
537 struct inode *lower_inode;
538 struct file *lower_file;
539 char *header_virt;
540 int rc = 0;
541 int current_header_page = 0;
542 int header_pages;
543 int more_header_data_to_be_written = 1;
544
545 lower_inode = ecryptfs_inode_to_lower(inode);
546 lower_file = ecryptfs_file_to_lower(file);
547 lower_a_ops = lower_inode->i_mapping->a_ops;
548 header_pages = ((crypt_stat->header_extent_size
549 * crypt_stat->num_header_extents_at_front)
550 / PAGE_CACHE_SIZE);
551 BUG_ON(header_pages < 1);
552 while (current_header_page < header_pages) {
553 rc = ecryptfs_grab_and_map_lower_page(&header_page,
554 &header_virt,
555 lower_inode,
556 current_header_page);
557 if (rc) {
558 ecryptfs_printk(KERN_ERR, "grab_cache_page for "
559 "header page [%d] failed; rc = [%d]\n",
560 current_header_page, rc);
561 goto out;
562 }
563 rc = lower_a_ops->prepare_write(lower_file, header_page, 0,
564 PAGE_CACHE_SIZE);
565 if (rc) {
566 ecryptfs_printk(KERN_ERR, "Error preparing to write "
567 "header page out; rc = [%d]\n", rc);
568 goto out;
569 }
570 memset(header_virt, 0, PAGE_CACHE_SIZE);
571 if (more_header_data_to_be_written) {
572 rc = ecryptfs_write_headers_virt(header_virt,
573 crypt_stat,
574 file->f_dentry);
575 if (rc) {
576 ecryptfs_printk(KERN_WARNING, "Error "
577 "generating header; rc = "
578 "[%d]\n", rc);
579 rc = -EIO;
580 memset(header_virt, 0, PAGE_CACHE_SIZE);
581 ecryptfs_unmap_and_release_lower_page(
582 header_page);
583 goto out;
584 }
585 if (current_header_page == 0)
586 memset(header_virt, 0, 8);
587 more_header_data_to_be_written = 0;
588 }
589 rc = lower_a_ops->commit_write(lower_file, header_page, 0,
590 PAGE_CACHE_SIZE);
591 ecryptfs_unmap_and_release_lower_page(header_page);
592 if (rc < 0) {
593 ecryptfs_printk(KERN_ERR,
594 "Error commiting header page write; "
595 "rc = [%d]\n", rc);
596 break;
597 }
598 current_header_page++;
599 }
600 if (rc >= 0) {
601 rc = 0;
602 ecryptfs_printk(KERN_DEBUG, "lower_inode->i_blocks = "
603 "[0x%.16x]\n", lower_inode->i_blocks);
604 i_size_write(inode, 0);
605 lower_inode->i_mtime = lower_inode->i_ctime = CURRENT_TIME;
606 mark_inode_dirty_sync(inode);
607 }
608 ecryptfs_printk(KERN_DEBUG, "Clearing ECRYPTFS_NEW_FILE flag in "
609 "crypt_stat at memory location [%p]\n", crypt_stat);
610 ECRYPTFS_CLEAR_FLAG(crypt_stat->flags, ECRYPTFS_NEW_FILE);
611out:
612 return rc;
613}
614
615/**
616 * ecryptfs_commit_write
617 * @file: The eCryptfs file object
618 * @page: The eCryptfs page
619 * @from: Ignored (we rotate the page IV on each write)
620 * @to: Ignored
621 *
622 * This is where we encrypt the data and pass the encrypted data to
623 * the lower filesystem. In OpenPGP-compatible mode, we operate on
624 * entire underlying packets.
625 */
626static int ecryptfs_commit_write(struct file *file, struct page *page,
627 unsigned from, unsigned to)
628{
629 struct ecryptfs_page_crypt_context ctx;
630 loff_t pos;
631 struct inode *inode;
632 struct inode *lower_inode;
633 struct file *lower_file;
634 struct ecryptfs_crypt_stat *crypt_stat;
635 int rc;
636
637 inode = page->mapping->host;
638 lower_inode = ecryptfs_inode_to_lower(inode);
639 lower_file = ecryptfs_file_to_lower(file);
640 mutex_lock(&lower_inode->i_mutex);
641 crypt_stat =
642 &ecryptfs_inode_to_private(file->f_dentry->d_inode)->crypt_stat;
643 if (ECRYPTFS_CHECK_FLAG(crypt_stat->flags, ECRYPTFS_NEW_FILE)) {
644 ecryptfs_printk(KERN_DEBUG, "ECRYPTFS_NEW_FILE flag set in "
645 "crypt_stat at memory location [%p]\n", crypt_stat);
646 rc = process_new_file(crypt_stat, file, inode);
647 if (rc) {
648 ecryptfs_printk(KERN_ERR, "Error processing new "
649 "file; rc = [%d]\n", rc);
650 goto out;
651 }
652 } else
653 ecryptfs_printk(KERN_DEBUG, "Not a new file\n");
654 ecryptfs_printk(KERN_DEBUG, "Calling fill_zeros_to_end_of_page"
655 "(page w/ index = [0x%.16x], to = [%d])\n", page->index,
656 to);
657 rc = fill_zeros_to_end_of_page(page, to);
658 if (rc) {
659 ecryptfs_printk(KERN_WARNING, "Error attempting to fill "
660 "zeros in page with index = [0x%.16x]\n",
661 page->index);
662 goto out;
663 }
664 ctx.page = page;
665 ctx.mode = ECRYPTFS_PREPARE_COMMIT_MODE;
666 ctx.param.lower_file = lower_file;
667 rc = ecryptfs_encrypt_page(&ctx);
668 if (rc) {
669 ecryptfs_printk(KERN_WARNING, "Error encrypting page (upper "
670 "index [0x%.16x])\n", page->index);
671 goto out;
672 }
673 rc = 0;
674 inode->i_blocks = lower_inode->i_blocks;
675 pos = (page->index << PAGE_CACHE_SHIFT) + to;
676 if (pos > i_size_read(inode)) {
677 i_size_write(inode, pos);
678 ecryptfs_printk(KERN_DEBUG, "Expanded file size to "
679 "[0x%.16x]\n", i_size_read(inode));
680 }
681 ecryptfs_write_inode_size_to_header(lower_file, lower_inode, inode);
682 lower_inode->i_mtime = lower_inode->i_ctime = CURRENT_TIME;
683 mark_inode_dirty_sync(inode);
684out:
685 kunmap(page); /* mapped in prior call (prepare_write) */
686 if (rc < 0)
687 ClearPageUptodate(page);
688 else
689 SetPageUptodate(page);
690 mutex_unlock(&lower_inode->i_mutex);
691 return rc;
692}
693
694/**
695 * write_zeros
696 * @file: The ecryptfs file
697 * @index: The index in which we are writing
698 * @start: The position after the last block of data
699 * @num_zeros: The number of zeros to write
700 *
701 * Write a specified number of zero's to a page.
702 *
703 * (start + num_zeros) must be less than or equal to PAGE_CACHE_SIZE
704 */
705static
706int write_zeros(struct file *file, pgoff_t index, int start, int num_zeros)
707{
708 int rc = 0;
709 struct page *tmp_page;
710
711 tmp_page = ecryptfs_get1page(file, index);
712 if (IS_ERR(tmp_page)) {
713 ecryptfs_printk(KERN_ERR, "Error getting page at index "
714 "[0x%.16x]\n", index);
715 rc = PTR_ERR(tmp_page);
716 goto out;
717 }
718 kmap(tmp_page);
719 rc = ecryptfs_prepare_write(file, tmp_page, start, start + num_zeros);
720 if (rc) {
721 ecryptfs_printk(KERN_ERR, "Error preparing to write zero's "
722 "to remainder of page at index [0x%.16x]\n",
723 index);
724 kunmap(tmp_page);
725 page_cache_release(tmp_page);
726 goto out;
727 }
728 memset(((char *)page_address(tmp_page) + start), 0, num_zeros);
729 rc = ecryptfs_commit_write(file, tmp_page, start, start + num_zeros);
730 if (rc < 0) {
731 ecryptfs_printk(KERN_ERR, "Error attempting to write zero's "
732 "to remainder of page at index [0x%.16x]\n",
733 index);
734 kunmap(tmp_page);
735 page_cache_release(tmp_page);
736 goto out;
737 }
738 rc = 0;
739 kunmap(tmp_page);
740 page_cache_release(tmp_page);
741out:
742 return rc;
743}
744
745static sector_t ecryptfs_bmap(struct address_space *mapping, sector_t block)
746{
747 int rc = 0;
748 struct inode *inode;
749 struct inode *lower_inode;
750
751 inode = (struct inode *)mapping->host;
752 lower_inode = ecryptfs_inode_to_lower(inode);
753 if (lower_inode->i_mapping->a_ops->bmap)
754 rc = lower_inode->i_mapping->a_ops->bmap(lower_inode->i_mapping,
755 block);
756 return rc;
757}
758
759static void ecryptfs_sync_page(struct page *page)
760{
761 struct inode *inode;
762 struct inode *lower_inode;
763 struct page *lower_page;
764
765 inode = page->mapping->host;
766 lower_inode = ecryptfs_inode_to_lower(inode);
767 /* NOTE: Recently swapped with grab_cache_page(), since
768 * sync_page() just makes sure that pending I/O gets done. */
769 lower_page = find_lock_page(lower_inode->i_mapping, page->index);
770 if (!lower_page) {
771 ecryptfs_printk(KERN_DEBUG, "find_lock_page failed\n");
772 return;
773 }
774 lower_page->mapping->a_ops->sync_page(lower_page);
775 ecryptfs_printk(KERN_DEBUG, "Unlocking page with index = [0x%.16x]\n",
776 lower_page->index);
777 unlock_page(lower_page);
778 page_cache_release(lower_page);
779}
780
781struct address_space_operations ecryptfs_aops = {
782 .writepage = ecryptfs_writepage,
783 .readpage = ecryptfs_readpage,
784 .prepare_write = ecryptfs_prepare_write,
785 .commit_write = ecryptfs_commit_write,
786 .bmap = ecryptfs_bmap,
787 .sync_page = ecryptfs_sync_page,
788};
diff --git a/fs/ecryptfs/super.c b/fs/ecryptfs/super.c
new file mode 100644
index 000000000000..c337c0410fb1
--- /dev/null
+++ b/fs/ecryptfs/super.c
@@ -0,0 +1,198 @@
1/**
2 * eCryptfs: Linux filesystem encryption layer
3 *
4 * Copyright (C) 1997-2003 Erez Zadok
5 * Copyright (C) 2001-2003 Stony Brook University
6 * Copyright (C) 2004-2006 International Business Machines Corp.
7 * Author(s): Michael A. Halcrow <mahalcro@us.ibm.com>
8 * Michael C. Thompson <mcthomps@us.ibm.com>
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License as
12 * published by the Free Software Foundation; either version 2 of the
13 * License, or (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful, but
16 * WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
23 * 02111-1307, USA.
24 */
25
26#include <linux/fs.h>
27#include <linux/mount.h>
28#include <linux/key.h>
29#include <linux/seq_file.h>
30#include <linux/crypto.h>
31#include "ecryptfs_kernel.h"
32
33struct kmem_cache *ecryptfs_inode_info_cache;
34
35/**
36 * ecryptfs_alloc_inode - allocate an ecryptfs inode
37 * @sb: Pointer to the ecryptfs super block
38 *
39 * Called to bring an inode into existence.
40 *
41 * Only handle allocation, setting up structures should be done in
42 * ecryptfs_read_inode. This is because the kernel, between now and
43 * then, will 0 out the private data pointer.
44 *
45 * Returns a pointer to a newly allocated inode, NULL otherwise
46 */
47static struct inode *ecryptfs_alloc_inode(struct super_block *sb)
48{
49 struct ecryptfs_inode_info *ecryptfs_inode;
50 struct inode *inode = NULL;
51
52 ecryptfs_inode = kmem_cache_alloc(ecryptfs_inode_info_cache,
53 SLAB_KERNEL);
54 if (unlikely(!ecryptfs_inode))
55 goto out;
56 ecryptfs_init_crypt_stat(&ecryptfs_inode->crypt_stat);
57 inode = &ecryptfs_inode->vfs_inode;
58out:
59 return inode;
60}
61
62/**
63 * ecryptfs_destroy_inode
64 * @inode: The ecryptfs inode
65 *
66 * This is used during the final destruction of the inode.
67 * All allocation of memory related to the inode, including allocated
68 * memory in the crypt_stat struct, will be released here.
69 * There should be no chance that this deallocation will be missed.
70 */
71static void ecryptfs_destroy_inode(struct inode *inode)
72{
73 struct ecryptfs_inode_info *inode_info;
74
75 inode_info = ecryptfs_inode_to_private(inode);
76 ecryptfs_destruct_crypt_stat(&inode_info->crypt_stat);
77 kmem_cache_free(ecryptfs_inode_info_cache, inode_info);
78}
79
80/**
81 * ecryptfs_init_inode
82 * @inode: The ecryptfs inode
83 *
84 * Set up the ecryptfs inode.
85 */
86void ecryptfs_init_inode(struct inode *inode, struct inode *lower_inode)
87{
88 ecryptfs_set_inode_lower(inode, lower_inode);
89 inode->i_ino = lower_inode->i_ino;
90 inode->i_version++;
91 inode->i_op = &ecryptfs_main_iops;
92 inode->i_fop = &ecryptfs_main_fops;
93 inode->i_mapping->a_ops = &ecryptfs_aops;
94}
95
96/**
97 * ecryptfs_put_super
98 * @sb: Pointer to the ecryptfs super block
99 *
100 * Final actions when unmounting a file system.
101 * This will handle deallocation and release of our private data.
102 */
103static void ecryptfs_put_super(struct super_block *sb)
104{
105 struct ecryptfs_sb_info *sb_info = ecryptfs_superblock_to_private(sb);
106
107 ecryptfs_destruct_mount_crypt_stat(&sb_info->mount_crypt_stat);
108 kmem_cache_free(ecryptfs_sb_info_cache, sb_info);
109 ecryptfs_set_superblock_private(sb, NULL);
110}
111
112/**
113 * ecryptfs_statfs
114 * @sb: The ecryptfs super block
115 * @buf: The struct kstatfs to fill in with stats
116 *
117 * Get the filesystem statistics. Currently, we let this pass right through
118 * to the lower filesystem and take no action ourselves.
119 */
120static int ecryptfs_statfs(struct dentry *dentry, struct kstatfs *buf)
121{
122 return vfs_statfs(ecryptfs_dentry_to_lower(dentry), buf);
123}
124
125/**
126 * ecryptfs_clear_inode
127 * @inode - The ecryptfs inode
128 *
129 * Called by iput() when the inode reference count reached zero
130 * and the inode is not hashed anywhere. Used to clear anything
131 * that needs to be, before the inode is completely destroyed and put
132 * on the inode free list. We use this to drop out reference to the
133 * lower inode.
134 */
135static void ecryptfs_clear_inode(struct inode *inode)
136{
137 iput(ecryptfs_inode_to_lower(inode));
138}
139
140/**
141 * ecryptfs_umount_begin
142 *
143 * Called in do_umount().
144 */
145static void ecryptfs_umount_begin(struct vfsmount *vfsmnt, int flags)
146{
147 struct vfsmount *lower_mnt =
148 ecryptfs_dentry_to_lower_mnt(vfsmnt->mnt_sb->s_root);
149 struct super_block *lower_sb;
150
151 mntput(lower_mnt);
152 lower_sb = lower_mnt->mnt_sb;
153 if (lower_sb->s_op->umount_begin)
154 lower_sb->s_op->umount_begin(lower_mnt, flags);
155}
156
157/**
158 * ecryptfs_show_options
159 *
160 * Prints the directory we are currently mounted over.
161 * Returns zero on success; non-zero otherwise
162 */
163static int ecryptfs_show_options(struct seq_file *m, struct vfsmount *mnt)
164{
165 struct super_block *sb = mnt->mnt_sb;
166 struct dentry *lower_root_dentry = ecryptfs_dentry_to_lower(sb->s_root);
167 struct vfsmount *lower_mnt = ecryptfs_dentry_to_lower_mnt(sb->s_root);
168 char *tmp_page;
169 char *path;
170 int rc = 0;
171
172 tmp_page = (char *)__get_free_page(GFP_KERNEL);
173 if (!tmp_page) {
174 rc = -ENOMEM;
175 goto out;
176 }
177 path = d_path(lower_root_dentry, lower_mnt, tmp_page, PAGE_SIZE);
178 if (IS_ERR(path)) {
179 rc = PTR_ERR(path);
180 goto out;
181 }
182 seq_printf(m, ",dir=%s", path);
183 free_page((unsigned long)tmp_page);
184out:
185 return rc;
186}
187
188struct super_operations ecryptfs_sops = {
189 .alloc_inode = ecryptfs_alloc_inode,
190 .destroy_inode = ecryptfs_destroy_inode,
191 .drop_inode = generic_delete_inode,
192 .put_super = ecryptfs_put_super,
193 .statfs = ecryptfs_statfs,
194 .remount_fs = NULL,
195 .clear_inode = ecryptfs_clear_inode,
196 .umount_begin = ecryptfs_umount_begin,
197 .show_options = ecryptfs_show_options
198};
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 8d544334bcd2..557d5b614fae 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -720,9 +720,10 @@ static int ep_getfd(int *efd, struct inode **einode, struct file **efile,
720 720
721 /* Allocates an inode from the eventpoll file system */ 721 /* Allocates an inode from the eventpoll file system */
722 inode = ep_eventpoll_inode(); 722 inode = ep_eventpoll_inode();
723 error = PTR_ERR(inode); 723 if (IS_ERR(inode)) {
724 if (IS_ERR(inode)) 724 error = PTR_ERR(inode);
725 goto eexit_2; 725 goto eexit_2;
726 }
726 727
727 /* Allocates a free descriptor to plug the file onto */ 728 /* Allocates a free descriptor to plug the file onto */
728 error = get_unused_fd(); 729 error = get_unused_fd();
diff --git a/fs/exec.c b/fs/exec.c
index 6270f8f20a63..d993ea1a81ae 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1318,7 +1318,7 @@ static void format_corename(char *corename, const char *pattern, long signr)
1318 case 'h': 1318 case 'h':
1319 down_read(&uts_sem); 1319 down_read(&uts_sem);
1320 rc = snprintf(out_ptr, out_end - out_ptr, 1320 rc = snprintf(out_ptr, out_end - out_ptr,
1321 "%s", system_utsname.nodename); 1321 "%s", utsname()->nodename);
1322 up_read(&uts_sem); 1322 up_read(&uts_sem);
1323 if (rc > out_end - out_ptr) 1323 if (rc > out_end - out_ptr)
1324 goto out; 1324 goto out;
diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c
index 4c39009350f3..93e77c3d2490 100644
--- a/fs/exportfs/expfs.c
+++ b/fs/exportfs/expfs.c
@@ -315,7 +315,7 @@ struct getdents_callback {
315 * the name matching the specified inode number. 315 * the name matching the specified inode number.
316 */ 316 */
317static int filldir_one(void * __buf, const char * name, int len, 317static int filldir_one(void * __buf, const char * name, int len,
318 loff_t pos, ino_t ino, unsigned int d_type) 318 loff_t pos, u64 ino, unsigned int d_type)
319{ 319{
320 struct getdents_callback *buf = __buf; 320 struct getdents_callback *buf = __buf;
321 int result = 0; 321 int result = 0;
diff --git a/fs/fat/dir.c b/fs/fat/dir.c
index 3e50a4166283..69c439f44387 100644
--- a/fs/fat/dir.c
+++ b/fs/fat/dir.c
@@ -648,7 +648,7 @@ static int fat_readdir(struct file *filp, void *dirent, filldir_t filldir)
648} 648}
649 649
650static int fat_ioctl_filldir(void *__buf, const char *name, int name_len, 650static int fat_ioctl_filldir(void *__buf, const char *name, int name_len,
651 loff_t offset, ino_t ino, unsigned int d_type) 651 loff_t offset, u64 ino, unsigned int d_type)
652{ 652{
653 struct fat_ioctl_filldir_callback *buf = __buf; 653 struct fat_ioctl_filldir_callback *buf = __buf;
654 struct dirent __user *d1 = buf->dirent; 654 struct dirent __user *d1 = buf->dirent;
diff --git a/fs/fcntl.c b/fs/fcntl.c
index d35cbc6bc112..e4f26165f12a 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -250,19 +250,22 @@ static int setfl(int fd, struct file * filp, unsigned long arg)
250 return error; 250 return error;
251} 251}
252 252
253static void f_modown(struct file *filp, unsigned long pid, 253static void f_modown(struct file *filp, struct pid *pid, enum pid_type type,
254 uid_t uid, uid_t euid, int force) 254 uid_t uid, uid_t euid, int force)
255{ 255{
256 write_lock_irq(&filp->f_owner.lock); 256 write_lock_irq(&filp->f_owner.lock);
257 if (force || !filp->f_owner.pid) { 257 if (force || !filp->f_owner.pid) {
258 filp->f_owner.pid = pid; 258 put_pid(filp->f_owner.pid);
259 filp->f_owner.pid = get_pid(pid);
260 filp->f_owner.pid_type = type;
259 filp->f_owner.uid = uid; 261 filp->f_owner.uid = uid;
260 filp->f_owner.euid = euid; 262 filp->f_owner.euid = euid;
261 } 263 }
262 write_unlock_irq(&filp->f_owner.lock); 264 write_unlock_irq(&filp->f_owner.lock);
263} 265}
264 266
265int f_setown(struct file *filp, unsigned long arg, int force) 267int __f_setown(struct file *filp, struct pid *pid, enum pid_type type,
268 int force)
266{ 269{
267 int err; 270 int err;
268 271
@@ -270,15 +273,44 @@ int f_setown(struct file *filp, unsigned long arg, int force)
270 if (err) 273 if (err)
271 return err; 274 return err;
272 275
273 f_modown(filp, arg, current->uid, current->euid, force); 276 f_modown(filp, pid, type, current->uid, current->euid, force);
274 return 0; 277 return 0;
275} 278}
279EXPORT_SYMBOL(__f_setown);
276 280
281int f_setown(struct file *filp, unsigned long arg, int force)
282{
283 enum pid_type type;
284 struct pid *pid;
285 int who = arg;
286 int result;
287 type = PIDTYPE_PID;
288 if (who < 0) {
289 type = PIDTYPE_PGID;
290 who = -who;
291 }
292 rcu_read_lock();
293 pid = find_pid(who);
294 result = __f_setown(filp, pid, type, force);
295 rcu_read_unlock();
296 return result;
297}
277EXPORT_SYMBOL(f_setown); 298EXPORT_SYMBOL(f_setown);
278 299
279void f_delown(struct file *filp) 300void f_delown(struct file *filp)
280{ 301{
281 f_modown(filp, 0, 0, 0, 1); 302 f_modown(filp, NULL, PIDTYPE_PID, 0, 0, 1);
303}
304
305pid_t f_getown(struct file *filp)
306{
307 pid_t pid;
308 read_lock(&filp->f_owner.lock);
309 pid = pid_nr(filp->f_owner.pid);
310 if (filp->f_owner.pid_type == PIDTYPE_PGID)
311 pid = -pid;
312 read_unlock(&filp->f_owner.lock);
313 return pid;
282} 314}
283 315
284static long do_fcntl(int fd, unsigned int cmd, unsigned long arg, 316static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
@@ -319,7 +351,7 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
319 * current syscall conventions, the only way 351 * current syscall conventions, the only way
320 * to fix this will be in libc. 352 * to fix this will be in libc.
321 */ 353 */
322 err = filp->f_owner.pid; 354 err = f_getown(filp);
323 force_successful_syscall_return(); 355 force_successful_syscall_return();
324 break; 356 break;
325 case F_SETOWN: 357 case F_SETOWN:
@@ -470,24 +502,19 @@ static void send_sigio_to_task(struct task_struct *p,
470void send_sigio(struct fown_struct *fown, int fd, int band) 502void send_sigio(struct fown_struct *fown, int fd, int band)
471{ 503{
472 struct task_struct *p; 504 struct task_struct *p;
473 int pid; 505 enum pid_type type;
506 struct pid *pid;
474 507
475 read_lock(&fown->lock); 508 read_lock(&fown->lock);
509 type = fown->pid_type;
476 pid = fown->pid; 510 pid = fown->pid;
477 if (!pid) 511 if (!pid)
478 goto out_unlock_fown; 512 goto out_unlock_fown;
479 513
480 read_lock(&tasklist_lock); 514 read_lock(&tasklist_lock);
481 if (pid > 0) { 515 do_each_pid_task(pid, type, p) {
482 p = find_task_by_pid(pid); 516 send_sigio_to_task(p, fown, fd, band);
483 if (p) { 517 } while_each_pid_task(pid, type, p);
484 send_sigio_to_task(p, fown, fd, band);
485 }
486 } else {
487 do_each_task_pid(-pid, PIDTYPE_PGID, p) {
488 send_sigio_to_task(p, fown, fd, band);
489 } while_each_task_pid(-pid, PIDTYPE_PGID, p);
490 }
491 read_unlock(&tasklist_lock); 518 read_unlock(&tasklist_lock);
492 out_unlock_fown: 519 out_unlock_fown:
493 read_unlock(&fown->lock); 520 read_unlock(&fown->lock);
@@ -503,9 +530,12 @@ static void send_sigurg_to_task(struct task_struct *p,
503int send_sigurg(struct fown_struct *fown) 530int send_sigurg(struct fown_struct *fown)
504{ 531{
505 struct task_struct *p; 532 struct task_struct *p;
506 int pid, ret = 0; 533 enum pid_type type;
534 struct pid *pid;
535 int ret = 0;
507 536
508 read_lock(&fown->lock); 537 read_lock(&fown->lock);
538 type = fown->pid_type;
509 pid = fown->pid; 539 pid = fown->pid;
510 if (!pid) 540 if (!pid)
511 goto out_unlock_fown; 541 goto out_unlock_fown;
@@ -513,16 +543,9 @@ int send_sigurg(struct fown_struct *fown)
513 ret = 1; 543 ret = 1;
514 544
515 read_lock(&tasklist_lock); 545 read_lock(&tasklist_lock);
516 if (pid > 0) { 546 do_each_pid_task(pid, type, p) {
517 p = find_task_by_pid(pid); 547 send_sigurg_to_task(p, fown);
518 if (p) { 548 } while_each_pid_task(pid, type, p);
519 send_sigurg_to_task(p, fown);
520 }
521 } else {
522 do_each_task_pid(-pid, PIDTYPE_PGID, p) {
523 send_sigurg_to_task(p, fown);
524 } while_each_task_pid(-pid, PIDTYPE_PGID, p);
525 }
526 read_unlock(&tasklist_lock); 549 read_unlock(&tasklist_lock);
527 out_unlock_fown: 550 out_unlock_fown:
528 read_unlock(&fown->lock); 551 read_unlock(&fown->lock);
diff --git a/fs/file_table.c b/fs/file_table.c
index bc35a40417d7..24f25a057d9c 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -174,6 +174,7 @@ void fastcall __fput(struct file *file)
174 fops_put(file->f_op); 174 fops_put(file->f_op);
175 if (file->f_mode & FMODE_WRITE) 175 if (file->f_mode & FMODE_WRITE)
176 put_write_access(inode); 176 put_write_access(inode);
177 put_pid(file->f_owner.pid);
177 file_kill(file); 178 file_kill(file);
178 file->f_dentry = NULL; 179 file->f_dentry = NULL;
179 file->f_vfsmnt = NULL; 180 file->f_vfsmnt = NULL;
diff --git a/fs/hfsplus/part_tbl.c b/fs/hfsplus/part_tbl.c
index ae783066fc3a..1528a6fd0299 100644
--- a/fs/hfsplus/part_tbl.c
+++ b/fs/hfsplus/part_tbl.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * linux/fs/hfs/part_tbl.c 2 * linux/fs/hfsplus/part_tbl.c
3 * 3 *
4 * Copyright (C) 1996-1997 Paul H. Hargrove 4 * Copyright (C) 1996-1997 Paul H. Hargrove
5 * This file may be distributed under the terms of the GNU General Public License. 5 * This file may be distributed under the terms of the GNU General Public License.
diff --git a/fs/inode.c b/fs/inode.c
index ada7643104e1..bf6bec4e54ff 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -657,7 +657,7 @@ static struct inode * get_new_inode_fast(struct super_block *sb, struct hlist_he
657 return inode; 657 return inode;
658} 658}
659 659
660static inline unsigned long hash(struct super_block *sb, unsigned long hashval) 660static unsigned long hash(struct super_block *sb, unsigned long hashval)
661{ 661{
662 unsigned long tmp; 662 unsigned long tmp;
663 663
@@ -1003,7 +1003,7 @@ void generic_delete_inode(struct inode *inode)
1003 1003
1004 list_del_init(&inode->i_list); 1004 list_del_init(&inode->i_list);
1005 list_del_init(&inode->i_sb_list); 1005 list_del_init(&inode->i_sb_list);
1006 inode->i_state|=I_FREEING; 1006 inode->i_state |= I_FREEING;
1007 inodes_stat.nr_inodes--; 1007 inodes_stat.nr_inodes--;
1008 spin_unlock(&inode_lock); 1008 spin_unlock(&inode_lock);
1009 1009
@@ -1210,13 +1210,15 @@ void file_update_time(struct file *file)
1210 return; 1210 return;
1211 1211
1212 now = current_fs_time(inode->i_sb); 1212 now = current_fs_time(inode->i_sb);
1213 if (!timespec_equal(&inode->i_mtime, &now)) 1213 if (!timespec_equal(&inode->i_mtime, &now)) {
1214 inode->i_mtime = now;
1214 sync_it = 1; 1215 sync_it = 1;
1215 inode->i_mtime = now; 1216 }
1216 1217
1217 if (!timespec_equal(&inode->i_ctime, &now)) 1218 if (!timespec_equal(&inode->i_ctime, &now)) {
1219 inode->i_ctime = now;
1218 sync_it = 1; 1220 sync_it = 1;
1219 inode->i_ctime = now; 1221 }
1220 1222
1221 if (sync_it) 1223 if (sync_it)
1222 mark_inode_dirty_sync(inode); 1224 mark_inode_dirty_sync(inode);
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
index 32a8caf0c41e..10be51290a27 100644
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * linux/fs/commit.c 2 * linux/fs/jbd/commit.c
3 * 3 *
4 * Written by Stephen C. Tweedie <sct@redhat.com>, 1998 4 * Written by Stephen C. Tweedie <sct@redhat.com>, 1998
5 * 5 *
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index 7af6099c911c..c518dd8fe60a 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * linux/fs/journal.c 2 * linux/fs/jbd/journal.c
3 * 3 *
4 * Written by Stephen C. Tweedie <sct@redhat.com>, 1998 4 * Written by Stephen C. Tweedie <sct@redhat.com>, 1998
5 * 5 *
diff --git a/fs/jfs/acl.c b/fs/jfs/acl.c
index e2281300979c..4d84bdc88299 100644
--- a/fs/jfs/acl.c
+++ b/fs/jfs/acl.c
@@ -5,16 +5,16 @@
5 * 5 *
6 * This program is free software; you can redistribute it and/or modify 6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by 7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or 8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version. 9 * (at your option) any later version.
10 * 10 *
11 * This program is distributed in the hope that it will be useful, 11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
14 * the GNU General Public License for more details. 14 * the GNU General Public License for more details.
15 * 15 *
16 * You should have received a copy of the GNU General Public License 16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software 17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 */ 19 */
20 20
@@ -183,7 +183,7 @@ cleanup:
183 posix_acl_release(acl); 183 posix_acl_release(acl);
184 } else 184 } else
185 inode->i_mode &= ~current->fs->umask; 185 inode->i_mode &= ~current->fs->umask;
186 186
187 JFS_IP(inode)->mode2 = (JFS_IP(inode)->mode2 & 0xffff0000) | 187 JFS_IP(inode)->mode2 = (JFS_IP(inode)->mode2 & 0xffff0000) |
188 inode->i_mode; 188 inode->i_mode;
189 189
diff --git a/fs/jfs/endian24.h b/fs/jfs/endian24.h
index ab7cd0567c95..79494c4f2b10 100644
--- a/fs/jfs/endian24.h
+++ b/fs/jfs/endian24.h
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) International Business Machines Corp., 2001 2 * Copyright (C) International Business Machines Corp., 2001
3 * 3 *
4 * This program is free software; you can redistribute it and/or modify 4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by 5 * it under the terms of the GNU General Public License as published by
diff --git a/fs/jfs/file.c b/fs/jfs/file.c
index 976e90dc2d1b..34181b8f5a0a 100644
--- a/fs/jfs/file.c
+++ b/fs/jfs/file.c
@@ -4,16 +4,16 @@
4 * 4 *
5 * This program is free software; you can redistribute it and/or modify 5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by 6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or 7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version. 8 * (at your option) any later version.
9 * 9 *
10 * This program is distributed in the hope that it will be useful, 10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
13 * the GNU General Public License for more details. 13 * the GNU General Public License for more details.
14 * 14 *
15 * You should have received a copy of the GNU General Public License 15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software 16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 */ 18 */
19 19
@@ -108,7 +108,7 @@ const struct file_operations jfs_file_operations = {
108 .aio_read = generic_file_aio_read, 108 .aio_read = generic_file_aio_read,
109 .aio_write = generic_file_aio_write, 109 .aio_write = generic_file_aio_write,
110 .mmap = generic_file_mmap, 110 .mmap = generic_file_mmap,
111 .sendfile = generic_file_sendfile, 111 .sendfile = generic_file_sendfile,
112 .fsync = jfs_fsync, 112 .fsync = jfs_fsync,
113 .release = jfs_release, 113 .release = jfs_release,
114 .ioctl = jfs_ioctl, 114 .ioctl = jfs_ioctl,
diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c
index a8cc169235d9..f5719117edfe 100644
--- a/fs/jfs/inode.c
+++ b/fs/jfs/inode.c
@@ -4,16 +4,16 @@
4 * 4 *
5 * This program is free software; you can redistribute it and/or modify 5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by 6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or 7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version. 8 * (at your option) any later version.
9 * 9 *
10 * This program is distributed in the hope that it will be useful, 10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
13 * the GNU General Public License for more details. 13 * the GNU General Public License for more details.
14 * 14 *
15 * You should have received a copy of the GNU General Public License 15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software 16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 */ 18 */
19 19
@@ -33,7 +33,7 @@
33 33
34void jfs_read_inode(struct inode *inode) 34void jfs_read_inode(struct inode *inode)
35{ 35{
36 if (diRead(inode)) { 36 if (diRead(inode)) {
37 make_bad_inode(inode); 37 make_bad_inode(inode);
38 return; 38 return;
39 } 39 }
diff --git a/fs/jfs/jfs_acl.h b/fs/jfs/jfs_acl.h
index a76293767c73..455fa4292045 100644
--- a/fs/jfs/jfs_acl.h
+++ b/fs/jfs/jfs_acl.h
@@ -1,18 +1,18 @@
1/* 1/*
2 * Copyright (c) International Business Machines Corp., 2002 2 * Copyright (C) International Business Machines Corp., 2002
3 * 3 *
4 * This program is free software; you can redistribute it and/or modify 4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by 5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or 6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version. 7 * (at your option) any later version.
8 * 8 *
9 * This program is distributed in the hope that it will be useful, 9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
12 * the GNU General Public License for more details. 12 * the GNU General Public License for more details.
13 * 13 *
14 * You should have received a copy of the GNU General Public License 14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software 15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 */ 17 */
18#ifndef _H_JFS_ACL 18#ifndef _H_JFS_ACL
diff --git a/fs/jfs/jfs_btree.h b/fs/jfs/jfs_btree.h
index 7f3e9ac454ff..79c61805bd33 100644
--- a/fs/jfs/jfs_btree.h
+++ b/fs/jfs/jfs_btree.h
@@ -3,16 +3,16 @@
3 * 3 *
4 * This program is free software; you can redistribute it and/or modify 4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by 5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or 6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version. 7 * (at your option) any later version.
8 * 8 *
9 * This program is distributed in the hope that it will be useful, 9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
12 * the GNU General Public License for more details. 12 * the GNU General Public License for more details.
13 * 13 *
14 * You should have received a copy of the GNU General Public License 14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software 15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 */ 17 */
18#ifndef _H_JFS_BTREE 18#ifndef _H_JFS_BTREE
diff --git a/fs/jfs/jfs_debug.c b/fs/jfs/jfs_debug.c
index 81f0e514c490..9c5d59632aac 100644
--- a/fs/jfs/jfs_debug.c
+++ b/fs/jfs/jfs_debug.c
@@ -4,16 +4,16 @@
4 * 4 *
5 * This program is free software; you can redistribute it and/or modify 5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by 6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or 7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version. 8 * (at your option) any later version.
9 * 9 *
10 * This program is distributed in the hope that it will be useful, 10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
13 * the GNU General Public License for more details. 13 * the GNU General Public License for more details.
14 * 14 *
15 * You should have received a copy of the GNU General Public License 15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software 16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 */ 18 */
19 19
diff --git a/fs/jfs/jfs_dinode.h b/fs/jfs/jfs_dinode.h
index 9f2572aea561..40b20111383c 100644
--- a/fs/jfs/jfs_dinode.h
+++ b/fs/jfs/jfs_dinode.h
@@ -1,18 +1,18 @@
1/* 1/*
2 * Copyright (c) International Business Machines Corp., 2000-2001 2 * Copyright (C) International Business Machines Corp., 2000-2001
3 * 3 *
4 * This program is free software; you can redistribute it and/or modify 4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by 5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or 6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version. 7 * (at your option) any later version.
8 * 8 *
9 * This program is distributed in the hope that it will be useful, 9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
12 * the GNU General Public License for more details. 12 * the GNU General Public License for more details.
13 * 13 *
14 * You should have received a copy of the GNU General Public License 14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software 15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 */ 17 */
18#ifndef _H_JFS_DINODE 18#ifndef _H_JFS_DINODE
diff --git a/fs/jfs/jfs_dmap.c b/fs/jfs/jfs_dmap.c
index f05ebb629182..23546c8fd48b 100644
--- a/fs/jfs/jfs_dmap.c
+++ b/fs/jfs/jfs_dmap.c
@@ -3,16 +3,16 @@
3 * 3 *
4 * This program is free software; you can redistribute it and/or modify 4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by 5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or 6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version. 7 * (at your option) any later version.
8 * 8 *
9 * This program is distributed in the hope that it will be useful, 9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
12 * the GNU General Public License for more details. 12 * the GNU General Public License for more details.
13 * 13 *
14 * You should have received a copy of the GNU General Public License 14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software 15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 */ 17 */
18 18
@@ -30,28 +30,28 @@
30 * 30 *
31 * the working state of the block allocation map is accessed in 31 * the working state of the block allocation map is accessed in
32 * two directions: 32 * two directions:
33 * 33 *
34 * 1) allocation and free requests that start at the dmap 34 * 1) allocation and free requests that start at the dmap
35 * level and move up through the dmap control pages (i.e. 35 * level and move up through the dmap control pages (i.e.
36 * the vast majority of requests). 36 * the vast majority of requests).
37 * 37 *
38 * 2) allocation requests that start at dmap control page 38 * 2) allocation requests that start at dmap control page
39 * level and work down towards the dmaps. 39 * level and work down towards the dmaps.
40 *
41 * the serialization scheme used here is as follows.
42 * 40 *
43 * requests which start at the bottom are serialized against each 41 * the serialization scheme used here is as follows.
44 * other through buffers and each requests holds onto its buffers 42 *
45 * as it works it way up from a single dmap to the required level 43 * requests which start at the bottom are serialized against each
44 * other through buffers and each requests holds onto its buffers
45 * as it works it way up from a single dmap to the required level
46 * of dmap control page. 46 * of dmap control page.
47 * requests that start at the top are serialized against each other 47 * requests that start at the top are serialized against each other
48 * and request that start from the bottom by the multiple read/single 48 * and request that start from the bottom by the multiple read/single
49 * write inode lock of the bmap inode. requests starting at the top 49 * write inode lock of the bmap inode. requests starting at the top
50 * take this lock in write mode while request starting at the bottom 50 * take this lock in write mode while request starting at the bottom
51 * take the lock in read mode. a single top-down request may proceed 51 * take the lock in read mode. a single top-down request may proceed
52 * exclusively while multiple bottoms-up requests may proceed 52 * exclusively while multiple bottoms-up requests may proceed
53 * simultaneously (under the protection of busy buffers). 53 * simultaneously (under the protection of busy buffers).
54 * 54 *
55 * in addition to information found in dmaps and dmap control pages, 55 * in addition to information found in dmaps and dmap control pages,
56 * the working state of the block allocation map also includes read/ 56 * the working state of the block allocation map also includes read/
57 * write information maintained in the bmap descriptor (i.e. total 57 * write information maintained in the bmap descriptor (i.e. total
@@ -59,7 +59,7 @@
59 * a single exclusive lock (BMAP_LOCK) is used to guard this information 59 * a single exclusive lock (BMAP_LOCK) is used to guard this information
60 * in the face of multiple-bottoms up requests. 60 * in the face of multiple-bottoms up requests.
61 * (lock ordering: IREAD_LOCK, BMAP_LOCK); 61 * (lock ordering: IREAD_LOCK, BMAP_LOCK);
62 * 62 *
63 * accesses to the persistent state of the block allocation map (limited 63 * accesses to the persistent state of the block allocation map (limited
64 * to the persistent bitmaps in dmaps) is guarded by (busy) buffers. 64 * to the persistent bitmaps in dmaps) is guarded by (busy) buffers.
65 */ 65 */
@@ -120,7 +120,7 @@ static int dbGetL2AGSize(s64 nblocks);
120/* 120/*
121 * buddy table 121 * buddy table
122 * 122 *
123 * table used for determining buddy sizes within characters of 123 * table used for determining buddy sizes within characters of
124 * dmap bitmap words. the characters themselves serve as indexes 124 * dmap bitmap words. the characters themselves serve as indexes
125 * into the table, with the table elements yielding the maximum 125 * into the table, with the table elements yielding the maximum
126 * binary buddy of free bits within the character. 126 * binary buddy of free bits within the character.
@@ -146,7 +146,7 @@ static const s8 budtab[256] = {
146 146
147 147
148/* 148/*
149 * NAME: dbMount() 149 * NAME: dbMount()
150 * 150 *
151 * FUNCTION: initializate the block allocation map. 151 * FUNCTION: initializate the block allocation map.
152 * 152 *
@@ -223,12 +223,12 @@ int dbMount(struct inode *ipbmap)
223 223
224 224
225/* 225/*
226 * NAME: dbUnmount() 226 * NAME: dbUnmount()
227 * 227 *
228 * FUNCTION: terminate the block allocation map in preparation for 228 * FUNCTION: terminate the block allocation map in preparation for
229 * file system unmount. 229 * file system unmount.
230 * 230 *
231 * the in-core bmap descriptor is written to disk and 231 * the in-core bmap descriptor is written to disk and
232 * the memory for this descriptor is freed. 232 * the memory for this descriptor is freed.
233 * 233 *
234 * PARAMETERS: 234 * PARAMETERS:
@@ -311,7 +311,7 @@ int dbSync(struct inode *ipbmap)
311 311
312 312
313/* 313/*
314 * NAME: dbFree() 314 * NAME: dbFree()
315 * 315 *
316 * FUNCTION: free the specified block range from the working block 316 * FUNCTION: free the specified block range from the working block
317 * allocation map. 317 * allocation map.
@@ -397,7 +397,7 @@ int dbFree(struct inode *ip, s64 blkno, s64 nblocks)
397 * 397 *
398 * FUNCTION: update the allocation state (free or allocate) of the 398 * FUNCTION: update the allocation state (free or allocate) of the
399 * specified block range in the persistent block allocation map. 399 * specified block range in the persistent block allocation map.
400 * 400 *
401 * the blocks will be updated in the persistent map one 401 * the blocks will be updated in the persistent map one
402 * dmap at a time. 402 * dmap at a time.
403 * 403 *
@@ -475,7 +475,7 @@ dbUpdatePMap(struct inode *ipbmap,
475 /* update the bits of the dmap words. the first and last 475 /* update the bits of the dmap words. the first and last
476 * words may only have a subset of their bits updated. if 476 * words may only have a subset of their bits updated. if
477 * this is the case, we'll work against that word (i.e. 477 * this is the case, we'll work against that word (i.e.
478 * partial first and/or last) only in a single pass. a 478 * partial first and/or last) only in a single pass. a
479 * single pass will also be used to update all words that 479 * single pass will also be used to update all words that
480 * are to have all their bits updated. 480 * are to have all their bits updated.
481 */ 481 */
@@ -662,11 +662,11 @@ unlock:
662 * the block allocation policy uses hints and a multi-step 662 * the block allocation policy uses hints and a multi-step
663 * approach. 663 * approach.
664 * 664 *
665 * for allocation requests smaller than the number of blocks 665 * for allocation requests smaller than the number of blocks
666 * per dmap, we first try to allocate the new blocks 666 * per dmap, we first try to allocate the new blocks
667 * immediately following the hint. if these blocks are not 667 * immediately following the hint. if these blocks are not
668 * available, we try to allocate blocks near the hint. if 668 * available, we try to allocate blocks near the hint. if
669 * no blocks near the hint are available, we next try to 669 * no blocks near the hint are available, we next try to
670 * allocate within the same dmap as contains the hint. 670 * allocate within the same dmap as contains the hint.
671 * 671 *
672 * if no blocks are available in the dmap or the allocation 672 * if no blocks are available in the dmap or the allocation
@@ -713,7 +713,7 @@ int dbAlloc(struct inode *ip, s64 hint, s64 nblocks, s64 * results)
713#endif /* _STILL_TO_PORT */ 713#endif /* _STILL_TO_PORT */
714 714
715 /* get the log2 number of blocks to be allocated. 715 /* get the log2 number of blocks to be allocated.
716 * if the number of blocks is not a log2 multiple, 716 * if the number of blocks is not a log2 multiple,
717 * it will be rounded up to the next log2 multiple. 717 * it will be rounded up to the next log2 multiple.
718 */ 718 */
719 l2nb = BLKSTOL2(nblocks); 719 l2nb = BLKSTOL2(nblocks);
@@ -906,7 +906,7 @@ int dbAllocExact(struct inode *ip, s64 blkno, int nblocks)
906 * validate extent request: 906 * validate extent request:
907 * 907 *
908 * note: defragfs policy: 908 * note: defragfs policy:
909 * max 64 blocks will be moved. 909 * max 64 blocks will be moved.
910 * allocation request size must be satisfied from a single dmap. 910 * allocation request size must be satisfied from a single dmap.
911 */ 911 */
912 if (nblocks <= 0 || nblocks > BPERDMAP || blkno >= bmp->db_mapsize) { 912 if (nblocks <= 0 || nblocks > BPERDMAP || blkno >= bmp->db_mapsize) {
@@ -1333,7 +1333,7 @@ dbAllocNear(struct bmap * bmp,
1333 * or two sub-trees, depending on the allocation group size. 1333 * or two sub-trees, depending on the allocation group size.
1334 * we search the top nodes of these subtrees left to right for 1334 * we search the top nodes of these subtrees left to right for
1335 * sufficient free space. if sufficient free space is found, 1335 * sufficient free space. if sufficient free space is found,
1336 * the subtree is searched to find the leftmost leaf that 1336 * the subtree is searched to find the leftmost leaf that
1337 * has free space. once we have made it to the leaf, we 1337 * has free space. once we have made it to the leaf, we
1338 * move the search to the next lower level dmap control page 1338 * move the search to the next lower level dmap control page
1339 * corresponding to this leaf. we continue down the dmap control 1339 * corresponding to this leaf. we continue down the dmap control
@@ -1398,7 +1398,7 @@ dbAllocAG(struct bmap * bmp, int agno, s64 nblocks, int l2nb, s64 * results)
1398 * that fully describes the allocation group since the allocation 1398 * that fully describes the allocation group since the allocation
1399 * group is already fully described by a dmap. in this case, we 1399 * group is already fully described by a dmap. in this case, we
1400 * just call dbAllocCtl() to search the dmap tree and allocate the 1400 * just call dbAllocCtl() to search the dmap tree and allocate the
1401 * required space if available. 1401 * required space if available.
1402 * 1402 *
1403 * if the allocation group is completely free, dbAllocCtl() is 1403 * if the allocation group is completely free, dbAllocCtl() is
1404 * also called to allocate the required space. this is done for 1404 * also called to allocate the required space. this is done for
@@ -1450,7 +1450,7 @@ dbAllocAG(struct bmap * bmp, int agno, s64 nblocks, int l2nb, s64 * results)
1450 (1 << (L2LPERCTL - (bmp->db_agheigth << 1))) / bmp->db_agwidth; 1450 (1 << (L2LPERCTL - (bmp->db_agheigth << 1))) / bmp->db_agwidth;
1451 ti = bmp->db_agstart + bmp->db_agwidth * (agno & (agperlev - 1)); 1451 ti = bmp->db_agstart + bmp->db_agwidth * (agno & (agperlev - 1));
1452 1452
1453 /* dmap control page trees fan-out by 4 and a single allocation 1453 /* dmap control page trees fan-out by 4 and a single allocation
1454 * group may be described by 1 or 2 subtrees within the ag level 1454 * group may be described by 1 or 2 subtrees within the ag level
1455 * dmap control page, depending upon the ag size. examine the ag's 1455 * dmap control page, depending upon the ag size. examine the ag's
1456 * subtrees for sufficient free space, starting with the leftmost 1456 * subtrees for sufficient free space, starting with the leftmost
@@ -1633,7 +1633,7 @@ static int dbFindCtl(struct bmap * bmp, int l2nb, int level, s64 * blkno)
1633 1633
1634 /* starting at the specified dmap control page level and block 1634 /* starting at the specified dmap control page level and block
1635 * number, search down the dmap control levels for the starting 1635 * number, search down the dmap control levels for the starting
1636 * block number of a dmap page that contains or starts off 1636 * block number of a dmap page that contains or starts off
1637 * sufficient free blocks. 1637 * sufficient free blocks.
1638 */ 1638 */
1639 for (lev = level, b = *blkno; lev >= 0; lev--) { 1639 for (lev = level, b = *blkno; lev >= 0; lev--) {
@@ -1677,7 +1677,7 @@ static int dbFindCtl(struct bmap * bmp, int l2nb, int level, s64 * blkno)
1677 } 1677 }
1678 1678
1679 /* adjust the block number to reflect the location within 1679 /* adjust the block number to reflect the location within
1680 * the dmap control page (i.e. the leaf) at which free 1680 * the dmap control page (i.e. the leaf) at which free
1681 * space was found. 1681 * space was found.
1682 */ 1682 */
1683 b += (((s64) leafidx) << budmin); 1683 b += (((s64) leafidx) << budmin);
@@ -1700,12 +1700,12 @@ static int dbFindCtl(struct bmap * bmp, int l2nb, int level, s64 * blkno)
1700 * NAME: dbAllocCtl() 1700 * NAME: dbAllocCtl()
1701 * 1701 *
1702 * FUNCTION: attempt to allocate a specified number of contiguous 1702 * FUNCTION: attempt to allocate a specified number of contiguous
1703 * blocks starting within a specific dmap. 1703 * blocks starting within a specific dmap.
1704 * 1704 *
1705 * this routine is called by higher level routines that search 1705 * this routine is called by higher level routines that search
1706 * the dmap control pages above the actual dmaps for contiguous 1706 * the dmap control pages above the actual dmaps for contiguous
1707 * free space. the result of successful searches by these 1707 * free space. the result of successful searches by these
1708 * routines are the starting block numbers within dmaps, with 1708 * routines are the starting block numbers within dmaps, with
1709 * the dmaps themselves containing the desired contiguous free 1709 * the dmaps themselves containing the desired contiguous free
1710 * space or starting a contiguous free space of desired size 1710 * space or starting a contiguous free space of desired size
1711 * that is made up of the blocks of one or more dmaps. these 1711 * that is made up of the blocks of one or more dmaps. these
@@ -1872,14 +1872,14 @@ dbAllocCtl(struct bmap * bmp, s64 nblocks, int l2nb, s64 blkno, s64 * results)
1872 * 1872 *
1873 * FUNCTION: attempt to allocate a specified number of contiguous blocks 1873 * FUNCTION: attempt to allocate a specified number of contiguous blocks
1874 * from a specified dmap. 1874 * from a specified dmap.
1875 * 1875 *
1876 * this routine checks if the contiguous blocks are available. 1876 * this routine checks if the contiguous blocks are available.
1877 * if so, nblocks of blocks are allocated; otherwise, ENOSPC is 1877 * if so, nblocks of blocks are allocated; otherwise, ENOSPC is
1878 * returned. 1878 * returned.
1879 * 1879 *
1880 * PARAMETERS: 1880 * PARAMETERS:
1881 * mp - pointer to bmap descriptor 1881 * mp - pointer to bmap descriptor
1882 * dp - pointer to dmap to attempt to allocate blocks from. 1882 * dp - pointer to dmap to attempt to allocate blocks from.
1883 * l2nb - log2 number of contiguous block desired. 1883 * l2nb - log2 number of contiguous block desired.
1884 * nblocks - actual number of contiguous block desired. 1884 * nblocks - actual number of contiguous block desired.
1885 * results - on successful return, set to the starting block number 1885 * results - on successful return, set to the starting block number
@@ -1890,7 +1890,7 @@ dbAllocCtl(struct bmap * bmp, s64 nblocks, int l2nb, s64 blkno, s64 * results)
1890 * -ENOSPC - insufficient disk resources 1890 * -ENOSPC - insufficient disk resources
1891 * -EIO - i/o error 1891 * -EIO - i/o error
1892 * 1892 *
1893 * serialization: IREAD_LOCK(ipbmap), e.g., from dbAlloc(), or 1893 * serialization: IREAD_LOCK(ipbmap), e.g., from dbAlloc(), or
1894 * IWRITE_LOCK(ipbmap), e.g., dbAllocCtl(), held on entry/exit; 1894 * IWRITE_LOCK(ipbmap), e.g., dbAllocCtl(), held on entry/exit;
1895 */ 1895 */
1896static int 1896static int
@@ -2032,7 +2032,7 @@ static int dbFreeDmap(struct bmap * bmp, struct dmap * dp, s64 blkno,
2032 2032
2033 /* root changed. bubble the change up to the dmap control pages. 2033 /* root changed. bubble the change up to the dmap control pages.
2034 * if the adjustment of the upper level control pages fails, 2034 * if the adjustment of the upper level control pages fails,
2035 * backout the deallocation. 2035 * backout the deallocation.
2036 */ 2036 */
2037 if ((rc = dbAdjCtl(bmp, blkno, dp->tree.stree[ROOT], 0, 0))) { 2037 if ((rc = dbAdjCtl(bmp, blkno, dp->tree.stree[ROOT], 0, 0))) {
2038 word = (blkno & (BPERDMAP - 1)) >> L2DBWORD; 2038 word = (blkno & (BPERDMAP - 1)) >> L2DBWORD;
@@ -2245,7 +2245,7 @@ static int dbFreeBits(struct bmap * bmp, struct dmap * dp, s64 blkno,
2245 * words (i.e. partial first and/or last) on an individual basis 2245 * words (i.e. partial first and/or last) on an individual basis
2246 * (a single pass), freeing the bits of interest by hand and updating 2246 * (a single pass), freeing the bits of interest by hand and updating
2247 * the leaf corresponding to the dmap word. a single pass will be used 2247 * the leaf corresponding to the dmap word. a single pass will be used
2248 * for all dmap words fully contained within the specified range. 2248 * for all dmap words fully contained within the specified range.
2249 * within this pass, the bits of all fully contained dmap words will 2249 * within this pass, the bits of all fully contained dmap words will
2250 * be marked as free in a single shot and the leaves will be updated. a 2250 * be marked as free in a single shot and the leaves will be updated. a
2251 * single leaf may describe the free space of multiple dmap words, 2251 * single leaf may describe the free space of multiple dmap words,
@@ -2267,7 +2267,7 @@ static int dbFreeBits(struct bmap * bmp, struct dmap * dp, s64 blkno,
2267 */ 2267 */
2268 if (nb < DBWORD) { 2268 if (nb < DBWORD) {
2269 /* free (zero) the appropriate bits within this 2269 /* free (zero) the appropriate bits within this
2270 * dmap word. 2270 * dmap word.
2271 */ 2271 */
2272 dp->wmap[word] &= 2272 dp->wmap[word] &=
2273 cpu_to_le32(~(ONES << (DBWORD - nb) 2273 cpu_to_le32(~(ONES << (DBWORD - nb)
@@ -2327,7 +2327,7 @@ static int dbFreeBits(struct bmap * bmp, struct dmap * dp, s64 blkno,
2327 2327
2328 BMAP_LOCK(bmp); 2328 BMAP_LOCK(bmp);
2329 2329
2330 /* update the free count for the allocation group and 2330 /* update the free count for the allocation group and
2331 * map. 2331 * map.
2332 */ 2332 */
2333 agno = blkno >> bmp->db_agl2size; 2333 agno = blkno >> bmp->db_agl2size;
@@ -2378,7 +2378,7 @@ static int dbFreeBits(struct bmap * bmp, struct dmap * dp, s64 blkno,
2378 * or deallocation resulted in the root change. this range 2378 * or deallocation resulted in the root change. this range
2379 * is respresented by a single leaf of the current dmapctl 2379 * is respresented by a single leaf of the current dmapctl
2380 * and the leaf will be updated with this value, possibly 2380 * and the leaf will be updated with this value, possibly
2381 * causing a binary buddy system within the leaves to be 2381 * causing a binary buddy system within the leaves to be
2382 * split or joined. the update may also cause the dmapctl's 2382 * split or joined. the update may also cause the dmapctl's
2383 * dmtree to be updated. 2383 * dmtree to be updated.
2384 * 2384 *
@@ -2590,7 +2590,7 @@ static void dbSplit(dmtree_t * tp, int leafno, int splitsz, int newval)
2590 } 2590 }
2591 } 2591 }
2592 2592
2593 /* adjust the dmap tree to reflect the specified leaf's new 2593 /* adjust the dmap tree to reflect the specified leaf's new
2594 * value. 2594 * value.
2595 */ 2595 */
2596 dbAdjTree(tp, leafno, newval); 2596 dbAdjTree(tp, leafno, newval);
@@ -2638,7 +2638,7 @@ static int dbBackSplit(dmtree_t * tp, int leafno)
2638 /* the back split is accomplished by iteratively finding the leaf 2638 /* the back split is accomplished by iteratively finding the leaf
2639 * that starts the buddy system that contains the specified leaf and 2639 * that starts the buddy system that contains the specified leaf and
2640 * splitting that system in two. this iteration continues until 2640 * splitting that system in two. this iteration continues until
2641 * the specified leaf becomes the start of a buddy system. 2641 * the specified leaf becomes the start of a buddy system.
2642 * 2642 *
2643 * determine maximum possible l2 size for the specified leaf. 2643 * determine maximum possible l2 size for the specified leaf.
2644 */ 2644 */
@@ -2853,7 +2853,7 @@ static void dbAdjTree(dmtree_t * tp, int leafno, int newval)
2853 * NAME: dbFindLeaf() 2853 * NAME: dbFindLeaf()
2854 * 2854 *
2855 * FUNCTION: search a dmtree_t for sufficient free blocks, returning 2855 * FUNCTION: search a dmtree_t for sufficient free blocks, returning
2856 * the index of a leaf describing the free blocks if 2856 * the index of a leaf describing the free blocks if
2857 * sufficient free blocks are found. 2857 * sufficient free blocks are found.
2858 * 2858 *
2859 * the search starts at the top of the dmtree_t tree and 2859 * the search starts at the top of the dmtree_t tree and
@@ -2869,7 +2869,7 @@ static void dbAdjTree(dmtree_t * tp, int leafno, int newval)
2869 * 2869 *
2870 * RETURN VALUES: 2870 * RETURN VALUES:
2871 * 0 - success 2871 * 0 - success
2872 * -ENOSPC - insufficient free blocks. 2872 * -ENOSPC - insufficient free blocks.
2873 */ 2873 */
2874static int dbFindLeaf(dmtree_t * tp, int l2nb, int *leafidx) 2874static int dbFindLeaf(dmtree_t * tp, int l2nb, int *leafidx)
2875{ 2875{
@@ -3090,7 +3090,7 @@ static int blkstol2(s64 nb)
3090 3090
3091 3091
3092/* 3092/*
3093 * NAME: dbAllocBottomUp() 3093 * NAME: dbAllocBottomUp()
3094 * 3094 *
3095 * FUNCTION: alloc the specified block range from the working block 3095 * FUNCTION: alloc the specified block range from the working block
3096 * allocation map. 3096 * allocation map.
@@ -3241,7 +3241,7 @@ static int dbAllocDmapBU(struct bmap * bmp, struct dmap * dp, s64 blkno,
3241 BMAP_LOCK(bmp); 3241 BMAP_LOCK(bmp);
3242 3242
3243 /* if this allocation group is completely free, 3243 /* if this allocation group is completely free,
3244 * update the highest active allocation group number 3244 * update the highest active allocation group number
3245 * if this allocation group is the new max. 3245 * if this allocation group is the new max.
3246 */ 3246 */
3247 agno = blkno >> bmp->db_agl2size; 3247 agno = blkno >> bmp->db_agl2size;
@@ -3273,7 +3273,7 @@ static int dbAllocDmapBU(struct bmap * bmp, struct dmap * dp, s64 blkno,
3273 * NAME: dbExtendFS() 3273 * NAME: dbExtendFS()
3274 * 3274 *
3275 * FUNCTION: extend bmap from blkno for nblocks; 3275 * FUNCTION: extend bmap from blkno for nblocks;
3276 * dbExtendFS() updates bmap ready for dbAllocBottomUp(); 3276 * dbExtendFS() updates bmap ready for dbAllocBottomUp();
3277 * 3277 *
3278 * L2 3278 * L2
3279 * | 3279 * |
@@ -3284,7 +3284,7 @@ static int dbAllocDmapBU(struct bmap * bmp, struct dmap * dp, s64 blkno,
3284 * d0,...,dn d0,...,dn d0,...,dn d0,...,dn d0,...,dn d0,.,dm; 3284 * d0,...,dn d0,...,dn d0,...,dn d0,...,dn d0,...,dn d0,.,dm;
3285 * L2L1L0d0,...,dnL0d0,...,dnL0d0,...,dnL1L0d0,...,dnL0d0,...,dnL0d0,..dm 3285 * L2L1L0d0,...,dnL0d0,...,dnL0d0,...,dnL1L0d0,...,dnL0d0,...,dnL0d0,..dm
3286 * 3286 *
3287 * <---old---><----------------------------extend-----------------------> 3287 * <---old---><----------------------------extend----------------------->
3288 */ 3288 */
3289int dbExtendFS(struct inode *ipbmap, s64 blkno, s64 nblocks) 3289int dbExtendFS(struct inode *ipbmap, s64 blkno, s64 nblocks)
3290{ 3290{
@@ -3330,7 +3330,7 @@ int dbExtendFS(struct inode *ipbmap, s64 blkno, s64 nblocks)
3330 bmp->db_numag += ((u32) newsize % (u32) bmp->db_agsize) ? 1 : 0; 3330 bmp->db_numag += ((u32) newsize % (u32) bmp->db_agsize) ? 1 : 0;
3331 3331
3332 /* 3332 /*
3333 * reconfigure db_agfree[] 3333 * reconfigure db_agfree[]
3334 * from old AG configuration to new AG configuration; 3334 * from old AG configuration to new AG configuration;
3335 * 3335 *
3336 * coalesce contiguous k (newAGSize/oldAGSize) AGs; 3336 * coalesce contiguous k (newAGSize/oldAGSize) AGs;
@@ -3491,7 +3491,7 @@ int dbExtendFS(struct inode *ipbmap, s64 blkno, s64 nblocks)
3491 } /* for each dmap in a L0 */ 3491 } /* for each dmap in a L0 */
3492 3492
3493 /* 3493 /*
3494 * build current L0 page from its leaves, and 3494 * build current L0 page from its leaves, and
3495 * initialize corresponding parent L1 leaf 3495 * initialize corresponding parent L1 leaf
3496 */ 3496 */
3497 *l1leaf = dbInitDmapCtl(l0dcp, 0, ++i); 3497 *l1leaf = dbInitDmapCtl(l0dcp, 0, ++i);
@@ -3515,7 +3515,7 @@ int dbExtendFS(struct inode *ipbmap, s64 blkno, s64 nblocks)
3515 } /* for each L0 in a L1 */ 3515 } /* for each L0 in a L1 */
3516 3516
3517 /* 3517 /*
3518 * build current L1 page from its leaves, and 3518 * build current L1 page from its leaves, and
3519 * initialize corresponding parent L2 leaf 3519 * initialize corresponding parent L2 leaf
3520 */ 3520 */
3521 *l2leaf = dbInitDmapCtl(l1dcp, 1, ++j); 3521 *l2leaf = dbInitDmapCtl(l1dcp, 1, ++j);
@@ -3570,7 +3570,7 @@ void dbFinalizeBmap(struct inode *ipbmap)
3570 * finalize bmap control page 3570 * finalize bmap control page
3571 */ 3571 */
3572//finalize: 3572//finalize:
3573 /* 3573 /*
3574 * compute db_agpref: preferred ag to allocate from 3574 * compute db_agpref: preferred ag to allocate from
3575 * (the leftmost ag with average free space in it); 3575 * (the leftmost ag with average free space in it);
3576 */ 3576 */
@@ -3614,9 +3614,9 @@ void dbFinalizeBmap(struct inode *ipbmap)
3614 3614
3615 /* 3615 /*
3616 * compute db_aglevel, db_agheigth, db_width, db_agstart: 3616 * compute db_aglevel, db_agheigth, db_width, db_agstart:
3617 * an ag is covered in aglevel dmapctl summary tree, 3617 * an ag is covered in aglevel dmapctl summary tree,
3618 * at agheight level height (from leaf) with agwidth number of nodes 3618 * at agheight level height (from leaf) with agwidth number of nodes
3619 * each, which starts at agstart index node of the smmary tree node 3619 * each, which starts at agstart index node of the smmary tree node
3620 * array; 3620 * array;
3621 */ 3621 */
3622 bmp->db_aglevel = BMAPSZTOLEV(bmp->db_agsize); 3622 bmp->db_aglevel = BMAPSZTOLEV(bmp->db_agsize);
@@ -3635,13 +3635,13 @@ void dbFinalizeBmap(struct inode *ipbmap)
3635 3635
3636/* 3636/*
3637 * NAME: dbInitDmap()/ujfs_idmap_page() 3637 * NAME: dbInitDmap()/ujfs_idmap_page()
3638 * 3638 *
3639 * FUNCTION: initialize working/persistent bitmap of the dmap page 3639 * FUNCTION: initialize working/persistent bitmap of the dmap page
3640 * for the specified number of blocks: 3640 * for the specified number of blocks:
3641 * 3641 *
3642 * at entry, the bitmaps had been initialized as free (ZEROS); 3642 * at entry, the bitmaps had been initialized as free (ZEROS);
3643 * The number of blocks will only account for the actually 3643 * The number of blocks will only account for the actually
3644 * existing blocks. Blocks which don't actually exist in 3644 * existing blocks. Blocks which don't actually exist in
3645 * the aggregate will be marked as allocated (ONES); 3645 * the aggregate will be marked as allocated (ONES);
3646 * 3646 *
3647 * PARAMETERS: 3647 * PARAMETERS:
@@ -3677,7 +3677,7 @@ static int dbInitDmap(struct dmap * dp, s64 Blkno, int nblocks)
3677 3677
3678 /* 3678 /*
3679 * free the bits corresponding to the block range (ZEROS): 3679 * free the bits corresponding to the block range (ZEROS):
3680 * note: not all bits of the first and last words may be contained 3680 * note: not all bits of the first and last words may be contained
3681 * within the block range. 3681 * within the block range.
3682 */ 3682 */
3683 for (r = nblocks; r > 0; r -= nb, blkno += nb) { 3683 for (r = nblocks; r > 0; r -= nb, blkno += nb) {
@@ -3709,7 +3709,7 @@ static int dbInitDmap(struct dmap * dp, s64 Blkno, int nblocks)
3709 } 3709 }
3710 3710
3711 /* 3711 /*
3712 * mark bits following the range to be freed (non-existing 3712 * mark bits following the range to be freed (non-existing
3713 * blocks) as allocated (ONES) 3713 * blocks) as allocated (ONES)
3714 */ 3714 */
3715 3715
@@ -3741,11 +3741,11 @@ static int dbInitDmap(struct dmap * dp, s64 Blkno, int nblocks)
3741 3741
3742/* 3742/*
3743 * NAME: dbInitDmapTree()/ujfs_complete_dmap() 3743 * NAME: dbInitDmapTree()/ujfs_complete_dmap()
3744 * 3744 *
3745 * FUNCTION: initialize summary tree of the specified dmap: 3745 * FUNCTION: initialize summary tree of the specified dmap:
3746 * 3746 *
3747 * at entry, bitmap of the dmap has been initialized; 3747 * at entry, bitmap of the dmap has been initialized;
3748 * 3748 *
3749 * PARAMETERS: 3749 * PARAMETERS:
3750 * dp - dmap to complete 3750 * dp - dmap to complete
3751 * blkno - starting block number for this dmap 3751 * blkno - starting block number for this dmap
@@ -3769,7 +3769,7 @@ static int dbInitDmapTree(struct dmap * dp)
3769 3769
3770 /* init each leaf from corresponding wmap word: 3770 /* init each leaf from corresponding wmap word:
3771 * note: leaf is set to NOFREE(-1) if all blocks of corresponding 3771 * note: leaf is set to NOFREE(-1) if all blocks of corresponding
3772 * bitmap word are allocated. 3772 * bitmap word are allocated.
3773 */ 3773 */
3774 cp = tp->stree + le32_to_cpu(tp->leafidx); 3774 cp = tp->stree + le32_to_cpu(tp->leafidx);
3775 for (i = 0; i < LPERDMAP; i++) 3775 for (i = 0; i < LPERDMAP; i++)
@@ -3782,10 +3782,10 @@ static int dbInitDmapTree(struct dmap * dp)
3782 3782
3783/* 3783/*
3784 * NAME: dbInitTree()/ujfs_adjtree() 3784 * NAME: dbInitTree()/ujfs_adjtree()
3785 * 3785 *
3786 * FUNCTION: initialize binary buddy summary tree of a dmap or dmapctl. 3786 * FUNCTION: initialize binary buddy summary tree of a dmap or dmapctl.
3787 * 3787 *
3788 * at entry, the leaves of the tree has been initialized 3788 * at entry, the leaves of the tree has been initialized
3789 * from corresponding bitmap word or root of summary tree 3789 * from corresponding bitmap word or root of summary tree
3790 * of the child control page; 3790 * of the child control page;
3791 * configure binary buddy system at the leaf level, then 3791 * configure binary buddy system at the leaf level, then
@@ -3813,15 +3813,15 @@ static int dbInitTree(struct dmaptree * dtp)
3813 /* 3813 /*
3814 * configure the leaf levevl into binary buddy system 3814 * configure the leaf levevl into binary buddy system
3815 * 3815 *
3816 * Try to combine buddies starting with a buddy size of 1 3816 * Try to combine buddies starting with a buddy size of 1
3817 * (i.e. two leaves). At a buddy size of 1 two buddy leaves 3817 * (i.e. two leaves). At a buddy size of 1 two buddy leaves
3818 * can be combined if both buddies have a maximum free of l2min; 3818 * can be combined if both buddies have a maximum free of l2min;
3819 * the combination will result in the left-most buddy leaf having 3819 * the combination will result in the left-most buddy leaf having
3820 * a maximum free of l2min+1. 3820 * a maximum free of l2min+1.
3821 * After processing all buddies for a given size, process buddies 3821 * After processing all buddies for a given size, process buddies
3822 * at the next higher buddy size (i.e. current size * 2) and 3822 * at the next higher buddy size (i.e. current size * 2) and
3823 * the next maximum free (current free + 1). 3823 * the next maximum free (current free + 1).
3824 * This continues until the maximum possible buddy combination 3824 * This continues until the maximum possible buddy combination
3825 * yields maximum free. 3825 * yields maximum free.
3826 */ 3826 */
3827 for (l2free = dtp->budmin, bsize = 1; l2free < l2max; 3827 for (l2free = dtp->budmin, bsize = 1; l2free < l2max;
@@ -3845,10 +3845,10 @@ static int dbInitTree(struct dmaptree * dtp)
3845 * bubble summary information of leaves up the tree. 3845 * bubble summary information of leaves up the tree.
3846 * 3846 *
3847 * Starting at the leaf node level, the four nodes described by 3847 * Starting at the leaf node level, the four nodes described by
3848 * the higher level parent node are compared for a maximum free and 3848 * the higher level parent node are compared for a maximum free and
3849 * this maximum becomes the value of the parent node. 3849 * this maximum becomes the value of the parent node.
3850 * when all lower level nodes are processed in this fashion then 3850 * when all lower level nodes are processed in this fashion then
3851 * move up to the next level (parent becomes a lower level node) and 3851 * move up to the next level (parent becomes a lower level node) and
3852 * continue the process for that level. 3852 * continue the process for that level.
3853 */ 3853 */
3854 for (child = le32_to_cpu(dtp->leafidx), 3854 for (child = le32_to_cpu(dtp->leafidx),
@@ -3857,7 +3857,7 @@ static int dbInitTree(struct dmaptree * dtp)
3857 /* get index of 1st node of parent level */ 3857 /* get index of 1st node of parent level */
3858 parent = (child - 1) >> 2; 3858 parent = (child - 1) >> 2;
3859 3859
3860 /* set the value of the parent node as the maximum 3860 /* set the value of the parent node as the maximum
3861 * of the four nodes of the current level. 3861 * of the four nodes of the current level.
3862 */ 3862 */
3863 for (i = 0, cp = tp + child, cp1 = tp + parent; 3863 for (i = 0, cp = tp + child, cp1 = tp + parent;
@@ -3885,8 +3885,8 @@ static int dbInitDmapCtl(struct dmapctl * dcp, int level, int i)
3885 dcp->budmin = L2BPERDMAP + L2LPERCTL * level; 3885 dcp->budmin = L2BPERDMAP + L2LPERCTL * level;
3886 3886
3887 /* 3887 /*
3888 * initialize the leaves of current level that were not covered 3888 * initialize the leaves of current level that were not covered
3889 * by the specified input block range (i.e. the leaves have no 3889 * by the specified input block range (i.e. the leaves have no
3890 * low level dmapctl or dmap). 3890 * low level dmapctl or dmap).
3891 */ 3891 */
3892 cp = &dcp->stree[CTLLEAFIND + i]; 3892 cp = &dcp->stree[CTLLEAFIND + i];
@@ -3900,9 +3900,9 @@ static int dbInitDmapCtl(struct dmapctl * dcp, int level, int i)
3900 3900
3901/* 3901/*
3902 * NAME: dbGetL2AGSize()/ujfs_getagl2size() 3902 * NAME: dbGetL2AGSize()/ujfs_getagl2size()
3903 * 3903 *
3904 * FUNCTION: Determine log2(allocation group size) from aggregate size 3904 * FUNCTION: Determine log2(allocation group size) from aggregate size
3905 * 3905 *
3906 * PARAMETERS: 3906 * PARAMETERS:
3907 * nblocks - Number of blocks in aggregate 3907 * nblocks - Number of blocks in aggregate
3908 * 3908 *
@@ -3935,8 +3935,8 @@ static int dbGetL2AGSize(s64 nblocks)
3935 3935
3936/* 3936/*
3937 * NAME: dbMapFileSizeToMapSize() 3937 * NAME: dbMapFileSizeToMapSize()
3938 * 3938 *
3939 * FUNCTION: compute number of blocks the block allocation map file 3939 * FUNCTION: compute number of blocks the block allocation map file
3940 * can cover from the map file size; 3940 * can cover from the map file size;
3941 * 3941 *
3942 * RETURNS: Number of blocks which can be covered by this block map file; 3942 * RETURNS: Number of blocks which can be covered by this block map file;
@@ -3968,7 +3968,7 @@ s64 dbMapFileSizeToMapSize(struct inode * ipbmap)
3968 npages = nblocks >> JFS_SBI(sb)->l2nbperpage; 3968 npages = nblocks >> JFS_SBI(sb)->l2nbperpage;
3969 level = BMAPPGTOLEV(npages); 3969 level = BMAPPGTOLEV(npages);
3970 3970
3971 /* At each level, accumulate the number of dmap pages covered by 3971 /* At each level, accumulate the number of dmap pages covered by
3972 * the number of full child levels below it; 3972 * the number of full child levels below it;
3973 * repeat for the last incomplete child level. 3973 * repeat for the last incomplete child level.
3974 */ 3974 */
@@ -3990,7 +3990,7 @@ s64 dbMapFileSizeToMapSize(struct inode * ipbmap)
3990 npages--; 3990 npages--;
3991 } 3991 }
3992 3992
3993 /* convert the number of dmaps into the number of blocks 3993 /* convert the number of dmaps into the number of blocks
3994 * which can be covered by the dmaps; 3994 * which can be covered by the dmaps;
3995 */ 3995 */
3996 nblocks = ndmaps << L2BPERDMAP; 3996 nblocks = ndmaps << L2BPERDMAP;
diff --git a/fs/jfs/jfs_dmap.h b/fs/jfs/jfs_dmap.h
index 8b14cc8e0228..45ea454c74bd 100644
--- a/fs/jfs/jfs_dmap.h
+++ b/fs/jfs/jfs_dmap.h
@@ -1,18 +1,18 @@
1/* 1/*
2 * Copyright (c) International Business Machines Corp., 2000-2002 2 * Copyright (C) International Business Machines Corp., 2000-2002
3 * 3 *
4 * This program is free software; you can redistribute it and/or modify 4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by 5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or 6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version. 7 * (at your option) any later version.
8 * 8 *
9 * This program is distributed in the hope that it will be useful, 9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
12 * the GNU General Public License for more details. 12 * the GNU General Public License for more details.
13 * 13 *
14 * You should have received a copy of the GNU General Public License 14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software 15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 */ 17 */
18#ifndef _H_JFS_DMAP 18#ifndef _H_JFS_DMAP
@@ -27,7 +27,7 @@
27#define L2LPERDMAP 8 /* l2 number of leaves per dmap tree */ 27#define L2LPERDMAP 8 /* l2 number of leaves per dmap tree */
28#define DBWORD 32 /* # of blks covered by a map word */ 28#define DBWORD 32 /* # of blks covered by a map word */
29#define L2DBWORD 5 /* l2 # of blks covered by a mword */ 29#define L2DBWORD 5 /* l2 # of blks covered by a mword */
30#define BUDMIN L2DBWORD /* max free string in a map word */ 30#define BUDMIN L2DBWORD /* max free string in a map word */
31#define BPERDMAP (LPERDMAP * DBWORD) /* num of blks per dmap */ 31#define BPERDMAP (LPERDMAP * DBWORD) /* num of blks per dmap */
32#define L2BPERDMAP 13 /* l2 num of blks per dmap */ 32#define L2BPERDMAP 13 /* l2 num of blks per dmap */
33#define CTLTREESIZE (1024+256+64+16+4+1) /* size of a dmapctl tree */ 33#define CTLTREESIZE (1024+256+64+16+4+1) /* size of a dmapctl tree */
@@ -57,7 +57,7 @@
57 57
58#define MAXMAPSIZE MAXL2SIZE /* maximum aggregate map size */ 58#define MAXMAPSIZE MAXL2SIZE /* maximum aggregate map size */
59 59
60/* 60/*
61 * determine the maximum free string for four (lower level) nodes 61 * determine the maximum free string for four (lower level) nodes
62 * of the tree. 62 * of the tree.
63 */ 63 */
@@ -122,7 +122,7 @@ static __inline signed char TREEMAX(signed char *cp)
122#define BLKTOCTL(b,s,l) \ 122#define BLKTOCTL(b,s,l) \
123 (((l) == 2) ? 1 : ((l) == 1) ? BLKTOL1((b),(s)) : BLKTOL0((b),(s))) 123 (((l) == 2) ? 1 : ((l) == 1) ? BLKTOL1((b),(s)) : BLKTOL0((b),(s)))
124 124
125/* 125/*
126 * convert aggregate map size to the zero origin dmapctl level of the 126 * convert aggregate map size to the zero origin dmapctl level of the
127 * top dmapctl. 127 * top dmapctl.
128 */ 128 */
@@ -192,13 +192,13 @@ typedef union dmtree {
192 192
193/* macros for accessing fields within dmtree */ 193/* macros for accessing fields within dmtree */
194#define dmt_nleafs t1.nleafs 194#define dmt_nleafs t1.nleafs
195#define dmt_l2nleafs t1.l2nleafs 195#define dmt_l2nleafs t1.l2nleafs
196#define dmt_leafidx t1.leafidx 196#define dmt_leafidx t1.leafidx
197#define dmt_height t1.height 197#define dmt_height t1.height
198#define dmt_budmin t1.budmin 198#define dmt_budmin t1.budmin
199#define dmt_stree t1.stree 199#define dmt_stree t1.stree
200 200
201/* 201/*
202 * on-disk aggregate disk allocation map descriptor. 202 * on-disk aggregate disk allocation map descriptor.
203 */ 203 */
204struct dbmap_disk { 204struct dbmap_disk {
@@ -237,7 +237,7 @@ struct dbmap {
237 s64 dn_agsize; /* num of blks per alloc group */ 237 s64 dn_agsize; /* num of blks per alloc group */
238 signed char dn_maxfreebud; /* max free buddy system */ 238 signed char dn_maxfreebud; /* max free buddy system */
239}; /* - 4096 - */ 239}; /* - 4096 - */
240/* 240/*
241 * in-memory aggregate disk allocation map descriptor. 241 * in-memory aggregate disk allocation map descriptor.
242 */ 242 */
243struct bmap { 243struct bmap {
diff --git a/fs/jfs/jfs_dtree.c b/fs/jfs/jfs_dtree.c
index 6c3f08319846..ecb2216d881c 100644
--- a/fs/jfs/jfs_dtree.c
+++ b/fs/jfs/jfs_dtree.c
@@ -3,16 +3,16 @@
3 * 3 *
4 * This program is free software; you can redistribute it and/or modify 4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by 5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or 6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version. 7 * (at your option) any later version.
8 * 8 *
9 * This program is distributed in the hope that it will be useful, 9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
12 * the GNU General Public License for more details. 12 * the GNU General Public License for more details.
13 * 13 *
14 * You should have received a copy of the GNU General Public License 14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software 15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 */ 17 */
18 18
@@ -78,7 +78,7 @@
78 * 78 *
79 * case-insensitive search: 79 * case-insensitive search:
80 * 80 *
81 * fold search key; 81 * fold search key;
82 * 82 *
83 * case-insensitive search of B-tree: 83 * case-insensitive search of B-tree:
84 * for internal entry, router key is already folded; 84 * for internal entry, router key is already folded;
@@ -93,7 +93,7 @@
93 * else 93 * else
94 * return no match; 94 * return no match;
95 * 95 *
96 * serialization: 96 * serialization:
97 * target directory inode lock is being held on entry/exit 97 * target directory inode lock is being held on entry/exit
98 * of all main directory service routines. 98 * of all main directory service routines.
99 * 99 *
@@ -925,7 +925,7 @@ int dtInsert(tid_t tid, struct inode *ip,
925 * 925 *
926 * return: 0 - success; 926 * return: 0 - success;
927 * errno - failure; 927 * errno - failure;
928 * leaf page unpinned; 928 * leaf page unpinned;
929 */ 929 */
930static int dtSplitUp(tid_t tid, 930static int dtSplitUp(tid_t tid,
931 struct inode *ip, struct dtsplit * split, struct btstack * btstack) 931 struct inode *ip, struct dtsplit * split, struct btstack * btstack)
@@ -3767,7 +3767,7 @@ static int ciCompare(struct component_name * key, /* search key */
3767 * across page boundary 3767 * across page boundary
3768 * 3768 *
3769 * return: non-zero on error 3769 * return: non-zero on error
3770 * 3770 *
3771 */ 3771 */
3772static int ciGetLeafPrefixKey(dtpage_t * lp, int li, dtpage_t * rp, 3772static int ciGetLeafPrefixKey(dtpage_t * lp, int li, dtpage_t * rp,
3773 int ri, struct component_name * key, int flag) 3773 int ri, struct component_name * key, int flag)
@@ -3780,13 +3780,13 @@ static int ciGetLeafPrefixKey(dtpage_t * lp, int li, dtpage_t * rp,
3780 lkey.name = (wchar_t *) kmalloc((JFS_NAME_MAX + 1) * sizeof(wchar_t), 3780 lkey.name = (wchar_t *) kmalloc((JFS_NAME_MAX + 1) * sizeof(wchar_t),
3781 GFP_KERNEL); 3781 GFP_KERNEL);
3782 if (lkey.name == NULL) 3782 if (lkey.name == NULL)
3783 return -ENOSPC; 3783 return -ENOMEM;
3784 3784
3785 rkey.name = (wchar_t *) kmalloc((JFS_NAME_MAX + 1) * sizeof(wchar_t), 3785 rkey.name = (wchar_t *) kmalloc((JFS_NAME_MAX + 1) * sizeof(wchar_t),
3786 GFP_KERNEL); 3786 GFP_KERNEL);
3787 if (rkey.name == NULL) { 3787 if (rkey.name == NULL) {
3788 kfree(lkey.name); 3788 kfree(lkey.name);
3789 return -ENOSPC; 3789 return -ENOMEM;
3790 } 3790 }
3791 3791
3792 /* get left and right key */ 3792 /* get left and right key */
diff --git a/fs/jfs/jfs_dtree.h b/fs/jfs/jfs_dtree.h
index 13e4fdf07724..af8513f78648 100644
--- a/fs/jfs/jfs_dtree.h
+++ b/fs/jfs/jfs_dtree.h
@@ -1,18 +1,18 @@
1/* 1/*
2 * Copyright (c) International Business Machines Corp., 2000-2002 2 * Copyright (C) International Business Machines Corp., 2000-2002
3 * 3 *
4 * This program is free software; you can redistribute it and/or modify 4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by 5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or 6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version. 7 * (at your option) any later version.
8 * 8 *
9 * This program is distributed in the hope that it will be useful, 9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
12 * the GNU General Public License for more details. 12 * the GNU General Public License for more details.
13 * 13 *
14 * You should have received a copy of the GNU General Public License 14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software 15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 */ 17 */
18#ifndef _H_JFS_DTREE 18#ifndef _H_JFS_DTREE
@@ -80,7 +80,7 @@ struct idtentry {
80/* 80/*
81 * leaf node entry head/only segment 81 * leaf node entry head/only segment
82 * 82 *
83 * For legacy filesystems, name contains 13 wchars -- no index field 83 * For legacy filesystems, name contains 13 wchars -- no index field
84 */ 84 */
85struct ldtentry { 85struct ldtentry {
86 __le32 inumber; /* 4: 4-byte aligned */ 86 __le32 inumber; /* 4: 4-byte aligned */
diff --git a/fs/jfs/jfs_extent.c b/fs/jfs/jfs_extent.c
index 933b7457bfbd..a35bdca6a805 100644
--- a/fs/jfs/jfs_extent.c
+++ b/fs/jfs/jfs_extent.c
@@ -3,16 +3,16 @@
3 * 3 *
4 * This program is free software; you can redistribute it and/or modify 4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by 5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or 6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version. 7 * (at your option) any later version.
8 * 8 *
9 * This program is distributed in the hope that it will be useful, 9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
12 * the GNU General Public License for more details. 12 * the GNU General Public License for more details.
13 * 13 *
14 * You should have received a copy of the GNU General Public License 14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software 15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 */ 17 */
18 18
@@ -125,7 +125,7 @@ extAlloc(struct inode *ip, s64 xlen, s64 pno, xad_t * xp, bool abnr)
125 } 125 }
126 126
127 /* allocate the disk blocks for the extent. initially, extBalloc() 127 /* allocate the disk blocks for the extent. initially, extBalloc()
128 * will try to allocate disk blocks for the requested size (xlen). 128 * will try to allocate disk blocks for the requested size (xlen).
129 * if this fails (xlen contiguous free blocks not avaliable), it'll 129 * if this fails (xlen contiguous free blocks not avaliable), it'll
130 * try to allocate a smaller number of blocks (producing a smaller 130 * try to allocate a smaller number of blocks (producing a smaller
131 * extent), with this smaller number of blocks consisting of the 131 * extent), with this smaller number of blocks consisting of the
@@ -150,7 +150,7 @@ extAlloc(struct inode *ip, s64 xlen, s64 pno, xad_t * xp, bool abnr)
150 /* determine the value of the extent flag */ 150 /* determine the value of the extent flag */
151 xflag = abnr ? XAD_NOTRECORDED : 0; 151 xflag = abnr ? XAD_NOTRECORDED : 0;
152 152
153 /* if we can extend the hint extent to cover the current request, 153 /* if we can extend the hint extent to cover the current request,
154 * extend it. otherwise, insert a new extent to 154 * extend it. otherwise, insert a new extent to
155 * cover the current request. 155 * cover the current request.
156 */ 156 */
@@ -159,7 +159,7 @@ extAlloc(struct inode *ip, s64 xlen, s64 pno, xad_t * xp, bool abnr)
159 else 159 else
160 rc = xtInsert(0, ip, xflag, xoff, (int) nxlen, &nxaddr, 0); 160 rc = xtInsert(0, ip, xflag, xoff, (int) nxlen, &nxaddr, 0);
161 161
162 /* if the extend or insert failed, 162 /* if the extend or insert failed,
163 * free the newly allocated blocks and return the error. 163 * free the newly allocated blocks and return the error.
164 */ 164 */
165 if (rc) { 165 if (rc) {
@@ -235,7 +235,7 @@ int extRealloc(struct inode *ip, s64 nxlen, xad_t * xp, bool abnr)
235 xoff = offsetXAD(xp); 235 xoff = offsetXAD(xp);
236 236
237 /* if the extend page is abnr and if the request is for 237 /* if the extend page is abnr and if the request is for
238 * the extent to be allocated and recorded, 238 * the extent to be allocated and recorded,
239 * make the page allocated and recorded. 239 * make the page allocated and recorded.
240 */ 240 */
241 if ((xp->flag & XAD_NOTRECORDED) && !abnr) { 241 if ((xp->flag & XAD_NOTRECORDED) && !abnr) {
@@ -397,7 +397,7 @@ int extHint(struct inode *ip, s64 offset, xad_t * xp)
397 if ((rc = xtLookupList(ip, &lxdl, &xadl, 0))) 397 if ((rc = xtLookupList(ip, &lxdl, &xadl, 0)))
398 return (rc); 398 return (rc);
399 399
400 /* check if not extent exists for the previous page. 400 /* check if not extent exists for the previous page.
401 * this is possible for sparse files. 401 * this is possible for sparse files.
402 */ 402 */
403 if (xadl.nxad == 0) { 403 if (xadl.nxad == 0) {
@@ -410,7 +410,7 @@ int extHint(struct inode *ip, s64 offset, xad_t * xp)
410 */ 410 */
411 xp->flag &= XAD_NOTRECORDED; 411 xp->flag &= XAD_NOTRECORDED;
412 412
413 if(xadl.nxad != 1 || lengthXAD(xp) != nbperpage) { 413 if(xadl.nxad != 1 || lengthXAD(xp) != nbperpage) {
414 jfs_error(ip->i_sb, "extHint: corrupt xtree"); 414 jfs_error(ip->i_sb, "extHint: corrupt xtree");
415 return -EIO; 415 return -EIO;
416 } 416 }
@@ -492,7 +492,7 @@ int extFill(struct inode *ip, xad_t * xp)
492 * FUNCTION: allocate disk blocks to form an extent. 492 * FUNCTION: allocate disk blocks to form an extent.
493 * 493 *
494 * initially, we will try to allocate disk blocks for the 494 * initially, we will try to allocate disk blocks for the
495 * requested size (nblocks). if this fails (nblocks 495 * requested size (nblocks). if this fails (nblocks
496 * contiguous free blocks not avaliable), we'll try to allocate 496 * contiguous free blocks not avaliable), we'll try to allocate
497 * a smaller number of blocks (producing a smaller extent), with 497 * a smaller number of blocks (producing a smaller extent), with
498 * this smaller number of blocks consisting of the requested 498 * this smaller number of blocks consisting of the requested
@@ -500,7 +500,7 @@ int extFill(struct inode *ip, xad_t * xp)
500 * number (i.e. 16 -> 8). we'll continue to round down and 500 * number (i.e. 16 -> 8). we'll continue to round down and
501 * retry the allocation until the number of blocks to allocate 501 * retry the allocation until the number of blocks to allocate
502 * is smaller than the number of blocks per page. 502 * is smaller than the number of blocks per page.
503 * 503 *
504 * PARAMETERS: 504 * PARAMETERS:
505 * ip - the inode of the file. 505 * ip - the inode of the file.
506 * hint - disk block number to be used as an allocation hint. 506 * hint - disk block number to be used as an allocation hint.
@@ -509,7 +509,7 @@ int extFill(struct inode *ip, xad_t * xp)
509 * exit, this value is set to the number of blocks actually 509 * exit, this value is set to the number of blocks actually
510 * allocated. 510 * allocated.
511 * blkno - pointer to a block address that is filled in on successful 511 * blkno - pointer to a block address that is filled in on successful
512 * return with the starting block number of the newly 512 * return with the starting block number of the newly
513 * allocated block range. 513 * allocated block range.
514 * 514 *
515 * RETURN VALUES: 515 * RETURN VALUES:
@@ -530,7 +530,7 @@ extBalloc(struct inode *ip, s64 hint, s64 * nblocks, s64 * blkno)
530 /* get the number of blocks to initially attempt to allocate. 530 /* get the number of blocks to initially attempt to allocate.
531 * we'll first try the number of blocks requested unless this 531 * we'll first try the number of blocks requested unless this
532 * number is greater than the maximum number of contiguous free 532 * number is greater than the maximum number of contiguous free
533 * blocks in the map. in that case, we'll start off with the 533 * blocks in the map. in that case, we'll start off with the
534 * maximum free. 534 * maximum free.
535 */ 535 */
536 max = (s64) 1 << bmp->db_maxfreebud; 536 max = (s64) 1 << bmp->db_maxfreebud;
@@ -582,19 +582,19 @@ extBalloc(struct inode *ip, s64 hint, s64 * nblocks, s64 * blkno)
582 * 582 *
583 * FUNCTION: attempt to extend an extent's allocation. 583 * FUNCTION: attempt to extend an extent's allocation.
584 * 584 *
585 * initially, we will try to extend the extent's allocation 585 * Initially, we will try to extend the extent's allocation
586 * in place. if this fails, we'll try to move the extent 586 * in place. If this fails, we'll try to move the extent
587 * to a new set of blocks. if moving the extent, we initially 587 * to a new set of blocks. If moving the extent, we initially
588 * will try to allocate disk blocks for the requested size 588 * will try to allocate disk blocks for the requested size
589 * (nnew). if this fails (new contiguous free blocks not 589 * (newnblks). if this fails (new contiguous free blocks not
590 * avaliable), we'll try to allocate a smaller number of 590 * avaliable), we'll try to allocate a smaller number of
591 * blocks (producing a smaller extent), with this smaller 591 * blocks (producing a smaller extent), with this smaller
592 * number of blocks consisting of the requested number of 592 * number of blocks consisting of the requested number of
593 * blocks rounded down to the next smaller power of 2 593 * blocks rounded down to the next smaller power of 2
594 * number (i.e. 16 -> 8). we'll continue to round down and 594 * number (i.e. 16 -> 8). We'll continue to round down and
595 * retry the allocation until the number of blocks to allocate 595 * retry the allocation until the number of blocks to allocate
596 * is smaller than the number of blocks per page. 596 * is smaller than the number of blocks per page.
597 * 597 *
598 * PARAMETERS: 598 * PARAMETERS:
599 * ip - the inode of the file. 599 * ip - the inode of the file.
600 * blkno - starting block number of the extents current allocation. 600 * blkno - starting block number of the extents current allocation.
@@ -625,7 +625,7 @@ extBrealloc(struct inode *ip,
625 return (rc); 625 return (rc);
626 } 626 }
627 627
628 /* in place extension not possible. 628 /* in place extension not possible.
629 * try to move the extent to a new set of blocks. 629 * try to move the extent to a new set of blocks.
630 */ 630 */
631 return (extBalloc(ip, blkno, newnblks, newblkno)); 631 return (extBalloc(ip, blkno, newnblks, newblkno));
diff --git a/fs/jfs/jfs_extent.h b/fs/jfs/jfs_extent.h
index 3a7f3f22e989..b567e12c52d3 100644
--- a/fs/jfs/jfs_extent.h
+++ b/fs/jfs/jfs_extent.h
@@ -1,18 +1,18 @@
1/* 1/*
2 * Copyright (c) International Business Machines Corp., 2000-2001 2 * Copyright (C) International Business Machines Corp., 2000-2001
3 * 3 *
4 * This program is free software; you can redistribute it and/or modify 4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by 5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or 6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version. 7 * (at your option) any later version.
8 * 8 *
9 * This program is distributed in the hope that it will be useful, 9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
12 * the GNU General Public License for more details. 12 * the GNU General Public License for more details.
13 * 13 *
14 * You should have received a copy of the GNU General Public License 14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software 15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 */ 17 */
18#ifndef _H_JFS_EXTENT 18#ifndef _H_JFS_EXTENT
diff --git a/fs/jfs/jfs_filsys.h b/fs/jfs/jfs_filsys.h
index 72a5588faeca..9901928668cf 100644
--- a/fs/jfs/jfs_filsys.h
+++ b/fs/jfs/jfs_filsys.h
@@ -3,16 +3,16 @@
3 * 3 *
4 * This program is free software; you can redistribute it and/or modify 4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by 5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or 6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version. 7 * (at your option) any later version.
8 * 8 *
9 * This program is distributed in the hope that it will be useful, 9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
12 * the GNU General Public License for more details. 12 * the GNU General Public License for more details.
13 * 13 *
14 * You should have received a copy of the GNU General Public License 14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software 15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 */ 17 */
18#ifndef _H_JFS_FILSYS 18#ifndef _H_JFS_FILSYS
@@ -21,9 +21,9 @@
21/* 21/*
22 * jfs_filsys.h 22 * jfs_filsys.h
23 * 23 *
24 * file system (implementation-dependent) constants 24 * file system (implementation-dependent) constants
25 * 25 *
26 * refer to <limits.h> for system wide implementation-dependent constants 26 * refer to <limits.h> for system wide implementation-dependent constants
27 */ 27 */
28 28
29/* 29/*
@@ -49,7 +49,7 @@
49 49
50#define JFS_DFS 0x20000000 /* DCE DFS LFS support */ 50#define JFS_DFS 0x20000000 /* DCE DFS LFS support */
51 51
52#define JFS_LINUX 0x10000000 /* Linux support */ 52#define JFS_LINUX 0x10000000 /* Linux support */
53/* case-sensitive name/directory support */ 53/* case-sensitive name/directory support */
54 54
55/* directory option */ 55/* directory option */
@@ -59,7 +59,7 @@
59#define JFS_COMMIT 0x00000f00 /* commit option mask */ 59#define JFS_COMMIT 0x00000f00 /* commit option mask */
60#define JFS_GROUPCOMMIT 0x00000100 /* group (of 1) commit */ 60#define JFS_GROUPCOMMIT 0x00000100 /* group (of 1) commit */
61#define JFS_LAZYCOMMIT 0x00000200 /* lazy commit */ 61#define JFS_LAZYCOMMIT 0x00000200 /* lazy commit */
62#define JFS_TMPFS 0x00000400 /* temporary file system - 62#define JFS_TMPFS 0x00000400 /* temporary file system -
63 * do not log/commit: 63 * do not log/commit:
64 */ 64 */
65 65
@@ -196,7 +196,7 @@
196 * followed by 1st extent of map 196 * followed by 1st extent of map
197 */ 197 */
198#define AITBL_OFF (AIMAP_OFF + (SIZE_OF_MAP_PAGE << 1)) 198#define AITBL_OFF (AIMAP_OFF + (SIZE_OF_MAP_PAGE << 1))
199 /* 199 /*
200 * 1st extent of aggregate inode table 200 * 1st extent of aggregate inode table
201 */ 201 */
202#define SUPER2_OFF (AITBL_OFF + INODE_EXTENT_SIZE) 202#define SUPER2_OFF (AITBL_OFF + INODE_EXTENT_SIZE)
@@ -270,13 +270,13 @@
270 */ 270 */
271#define FM_CLEAN 0x00000000 /* file system is unmounted and clean */ 271#define FM_CLEAN 0x00000000 /* file system is unmounted and clean */
272#define FM_MOUNT 0x00000001 /* file system is mounted cleanly */ 272#define FM_MOUNT 0x00000001 /* file system is mounted cleanly */
273#define FM_DIRTY 0x00000002 /* file system was not unmounted and clean 273#define FM_DIRTY 0x00000002 /* file system was not unmounted and clean
274 * when mounted or 274 * when mounted or
275 * commit failure occurred while being mounted: 275 * commit failure occurred while being mounted:
276 * fsck() must be run to repair 276 * fsck() must be run to repair
277 */ 277 */
278#define FM_LOGREDO 0x00000004 /* log based recovery (logredo()) failed: 278#define FM_LOGREDO 0x00000004 /* log based recovery (logredo()) failed:
279 * fsck() must be run to repair 279 * fsck() must be run to repair
280 */ 280 */
281#define FM_EXTENDFS 0x00000008 /* file system extendfs() in progress */ 281#define FM_EXTENDFS 0x00000008 /* file system extendfs() in progress */
282 282
diff --git a/fs/jfs/jfs_imap.c b/fs/jfs/jfs_imap.c
index a45ee2489580..489a3d63002d 100644
--- a/fs/jfs/jfs_imap.c
+++ b/fs/jfs/jfs_imap.c
@@ -3,16 +3,16 @@
3 * 3 *
4 * This program is free software; you can redistribute it and/or modify 4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by 5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or 6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version. 7 * (at your option) any later version.
8 * 8 *
9 * This program is distributed in the hope that it will be useful, 9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
12 * the GNU General Public License for more details. 12 * the GNU General Public License for more details.
13 * 13 *
14 * You should have received a copy of the GNU General Public License 14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software 15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 */ 17 */
18 18
@@ -98,7 +98,7 @@ static void copy_to_dinode(struct dinode *, struct inode *);
98 * FUNCTION: initialize the incore inode map control structures for 98 * FUNCTION: initialize the incore inode map control structures for
99 * a fileset or aggregate init time. 99 * a fileset or aggregate init time.
100 * 100 *
101 * the inode map's control structure (dinomap) is 101 * the inode map's control structure (dinomap) is
102 * brought in from disk and placed in virtual memory. 102 * brought in from disk and placed in virtual memory.
103 * 103 *
104 * PARAMETERS: 104 * PARAMETERS:
@@ -107,7 +107,7 @@ static void copy_to_dinode(struct dinode *, struct inode *);
107 * RETURN VALUES: 107 * RETURN VALUES:
108 * 0 - success 108 * 0 - success
109 * -ENOMEM - insufficient free virtual memory. 109 * -ENOMEM - insufficient free virtual memory.
110 * -EIO - i/o error. 110 * -EIO - i/o error.
111 */ 111 */
112int diMount(struct inode *ipimap) 112int diMount(struct inode *ipimap)
113{ 113{
@@ -191,7 +191,7 @@ int diMount(struct inode *ipimap)
191 * RETURN VALUES: 191 * RETURN VALUES:
192 * 0 - success 192 * 0 - success
193 * -ENOMEM - insufficient free virtual memory. 193 * -ENOMEM - insufficient free virtual memory.
194 * -EIO - i/o error. 194 * -EIO - i/o error.
195 */ 195 */
196int diUnmount(struct inode *ipimap, int mounterror) 196int diUnmount(struct inode *ipimap, int mounterror)
197{ 197{
@@ -281,7 +281,7 @@ int diSync(struct inode *ipimap)
281 * on entry, the specifed incore inode should itself 281 * on entry, the specifed incore inode should itself
282 * specify the disk inode number corresponding to the 282 * specify the disk inode number corresponding to the
283 * incore inode (i.e. i_number should be initialized). 283 * incore inode (i.e. i_number should be initialized).
284 * 284 *
285 * this routine handles incore inode initialization for 285 * this routine handles incore inode initialization for
286 * both "special" and "regular" inodes. special inodes 286 * both "special" and "regular" inodes. special inodes
287 * are those required early in the mount process and 287 * are those required early in the mount process and
@@ -289,7 +289,7 @@ int diSync(struct inode *ipimap)
289 * is not yet initialized. these "special" inodes are 289 * is not yet initialized. these "special" inodes are
290 * identified by a NULL inode map inode pointer and are 290 * identified by a NULL inode map inode pointer and are
291 * actually initialized by a call to diReadSpecial(). 291 * actually initialized by a call to diReadSpecial().
292 * 292 *
293 * for regular inodes, the iag describing the disk inode 293 * for regular inodes, the iag describing the disk inode
294 * is read from disk to determine the inode extent address 294 * is read from disk to determine the inode extent address
295 * for the disk inode. with the inode extent address in 295 * for the disk inode. with the inode extent address in
@@ -302,9 +302,9 @@ int diSync(struct inode *ipimap)
302 * 302 *
303 * RETURN VALUES: 303 * RETURN VALUES:
304 * 0 - success 304 * 0 - success
305 * -EIO - i/o error. 305 * -EIO - i/o error.
306 * -ENOMEM - insufficient memory 306 * -ENOMEM - insufficient memory
307 * 307 *
308 */ 308 */
309int diRead(struct inode *ip) 309int diRead(struct inode *ip)
310{ 310{
@@ -586,14 +586,14 @@ void diFreeSpecial(struct inode *ip)
586 * page of the extent that contains the disk inode is 586 * page of the extent that contains the disk inode is
587 * read and the disk inode portion of the incore inode 587 * read and the disk inode portion of the incore inode
588 * is copied to the disk inode. 588 * is copied to the disk inode.
589 * 589 *
590 * PARAMETERS: 590 * PARAMETERS:
591 * tid - transacation id 591 * tid - transacation id
592 * ip - pointer to incore inode to be written to the inode extent. 592 * ip - pointer to incore inode to be written to the inode extent.
593 * 593 *
594 * RETURN VALUES: 594 * RETURN VALUES:
595 * 0 - success 595 * 0 - success
596 * -EIO - i/o error. 596 * -EIO - i/o error.
597 */ 597 */
598int diWrite(tid_t tid, struct inode *ip) 598int diWrite(tid_t tid, struct inode *ip)
599{ 599{
@@ -676,11 +676,11 @@ int diWrite(tid_t tid, struct inode *ip)
676 * copy btree root from in-memory inode to on-disk inode 676 * copy btree root from in-memory inode to on-disk inode
677 * 677 *
678 * (tlock is taken from inline B+-tree root in in-memory 678 * (tlock is taken from inline B+-tree root in in-memory
679 * inode when the B+-tree root is updated, which is pointed 679 * inode when the B+-tree root is updated, which is pointed
680 * by jfs_ip->blid as well as being on tx tlock list) 680 * by jfs_ip->blid as well as being on tx tlock list)
681 * 681 *
682 * further processing of btree root is based on the copy 682 * further processing of btree root is based on the copy
683 * in in-memory inode, where txLog() will log from, and, 683 * in in-memory inode, where txLog() will log from, and,
684 * for xtree root, txUpdateMap() will update map and reset 684 * for xtree root, txUpdateMap() will update map and reset
685 * XAD_NEW bit; 685 * XAD_NEW bit;
686 */ 686 */
@@ -824,7 +824,7 @@ int diWrite(tid_t tid, struct inode *ip)
824 memcpy(&dp->di_DASD, &ip->i_DASD, sizeof(struct dasd)); 824 memcpy(&dp->di_DASD, &ip->i_DASD, sizeof(struct dasd));
825#endif /* _JFS_FASTDASD */ 825#endif /* _JFS_FASTDASD */
826 826
827 /* release the buffer holding the updated on-disk inode. 827 /* release the buffer holding the updated on-disk inode.
828 * the buffer will be later written by commit processing. 828 * the buffer will be later written by commit processing.
829 */ 829 */
830 write_metapage(mp); 830 write_metapage(mp);
@@ -842,7 +842,7 @@ int diWrite(tid_t tid, struct inode *ip)
842 * if the inode to be freed represents the first (only) 842 * if the inode to be freed represents the first (only)
843 * free inode within the iag, the iag will be placed on 843 * free inode within the iag, the iag will be placed on
844 * the ag free inode list. 844 * the ag free inode list.
845 * 845 *
846 * freeing the inode will cause the inode extent to be 846 * freeing the inode will cause the inode extent to be
847 * freed if the inode is the only allocated inode within 847 * freed if the inode is the only allocated inode within
848 * the extent. in this case all the disk resource backing 848 * the extent. in this case all the disk resource backing
@@ -865,11 +865,11 @@ int diWrite(tid_t tid, struct inode *ip)
865 * any updates and are held until all updates are complete. 865 * any updates and are held until all updates are complete.
866 * 866 *
867 * PARAMETERS: 867 * PARAMETERS:
868 * ip - inode to be freed. 868 * ip - inode to be freed.
869 * 869 *
870 * RETURN VALUES: 870 * RETURN VALUES:
871 * 0 - success 871 * 0 - success
872 * -EIO - i/o error. 872 * -EIO - i/o error.
873 */ 873 */
874int diFree(struct inode *ip) 874int diFree(struct inode *ip)
875{ 875{
@@ -898,7 +898,7 @@ int diFree(struct inode *ip)
898 */ 898 */
899 iagno = INOTOIAG(inum); 899 iagno = INOTOIAG(inum);
900 900
901 /* make sure that the iag is contained within 901 /* make sure that the iag is contained within
902 * the map. 902 * the map.
903 */ 903 */
904 if (iagno >= imap->im_nextiag) { 904 if (iagno >= imap->im_nextiag) {
@@ -1013,7 +1013,7 @@ int diFree(struct inode *ip)
1013 1013
1014 /* update the free inode summary map for the extent if 1014 /* update the free inode summary map for the extent if
1015 * freeing the inode means the extent will now have free 1015 * freeing the inode means the extent will now have free
1016 * inodes (i.e., the inode being freed is the first free 1016 * inodes (i.e., the inode being freed is the first free
1017 * inode of extent), 1017 * inode of extent),
1018 */ 1018 */
1019 if (iagp->wmap[extno] == cpu_to_le32(ONES)) { 1019 if (iagp->wmap[extno] == cpu_to_le32(ONES)) {
@@ -1204,9 +1204,9 @@ int diFree(struct inode *ip)
1204 iagp->inofreefwd = iagp->inofreeback = cpu_to_le32(-1); 1204 iagp->inofreefwd = iagp->inofreeback = cpu_to_le32(-1);
1205 } 1205 }
1206 1206
1207 /* update the inode extent address and working map 1207 /* update the inode extent address and working map
1208 * to reflect the free extent. 1208 * to reflect the free extent.
1209 * the permanent map should have been updated already 1209 * the permanent map should have been updated already
1210 * for the inode being freed. 1210 * for the inode being freed.
1211 */ 1211 */
1212 if (iagp->pmap[extno] != 0) { 1212 if (iagp->pmap[extno] != 0) {
@@ -1218,7 +1218,7 @@ int diFree(struct inode *ip)
1218 1218
1219 /* update the free extent and free inode summary maps 1219 /* update the free extent and free inode summary maps
1220 * to reflect the freed extent. 1220 * to reflect the freed extent.
1221 * the inode summary map is marked to indicate no inodes 1221 * the inode summary map is marked to indicate no inodes
1222 * available for the freed extent. 1222 * available for the freed extent.
1223 */ 1223 */
1224 sword = extno >> L2EXTSPERSUM; 1224 sword = extno >> L2EXTSPERSUM;
@@ -1255,17 +1255,17 @@ int diFree(struct inode *ip)
1255 * start transaction to update block allocation map 1255 * start transaction to update block allocation map
1256 * for the inode extent freed; 1256 * for the inode extent freed;
1257 * 1257 *
1258 * N.B. AG_LOCK is released and iag will be released below, and 1258 * N.B. AG_LOCK is released and iag will be released below, and
1259 * other thread may allocate inode from/reusing the ixad freed 1259 * other thread may allocate inode from/reusing the ixad freed
1260 * BUT with new/different backing inode extent from the extent 1260 * BUT with new/different backing inode extent from the extent
1261 * to be freed by the transaction; 1261 * to be freed by the transaction;
1262 */ 1262 */
1263 tid = txBegin(ipimap->i_sb, COMMIT_FORCE); 1263 tid = txBegin(ipimap->i_sb, COMMIT_FORCE);
1264 mutex_lock(&JFS_IP(ipimap)->commit_mutex); 1264 mutex_lock(&JFS_IP(ipimap)->commit_mutex);
1265 1265
1266 /* acquire tlock of the iag page of the freed ixad 1266 /* acquire tlock of the iag page of the freed ixad
1267 * to force the page NOHOMEOK (even though no data is 1267 * to force the page NOHOMEOK (even though no data is
1268 * logged from the iag page) until NOREDOPAGE|FREEXTENT log 1268 * logged from the iag page) until NOREDOPAGE|FREEXTENT log
1269 * for the free of the extent is committed; 1269 * for the free of the extent is committed;
1270 * write FREEXTENT|NOREDOPAGE log record 1270 * write FREEXTENT|NOREDOPAGE log record
1271 * N.B. linelock is overlaid as freed extent descriptor; 1271 * N.B. linelock is overlaid as freed extent descriptor;
@@ -1284,8 +1284,8 @@ int diFree(struct inode *ip)
1284 * logredo needs the IAG number and IAG extent index in order 1284 * logredo needs the IAG number and IAG extent index in order
1285 * to ensure that the IMap is consistent. The least disruptive 1285 * to ensure that the IMap is consistent. The least disruptive
1286 * way to pass these values through to the transaction manager 1286 * way to pass these values through to the transaction manager
1287 * is in the iplist array. 1287 * is in the iplist array.
1288 * 1288 *
1289 * It's not pretty, but it works. 1289 * It's not pretty, but it works.
1290 */ 1290 */
1291 iplist[1] = (struct inode *) (size_t)iagno; 1291 iplist[1] = (struct inode *) (size_t)iagno;
@@ -1340,18 +1340,18 @@ diInitInode(struct inode *ip, int iagno, int ino, int extno, struct iag * iagp)
1340/* 1340/*
1341 * NAME: diAlloc(pip,dir,ip) 1341 * NAME: diAlloc(pip,dir,ip)
1342 * 1342 *
1343 * FUNCTION: allocate a disk inode from the inode working map 1343 * FUNCTION: allocate a disk inode from the inode working map
1344 * for a fileset or aggregate. 1344 * for a fileset or aggregate.
1345 * 1345 *
1346 * PARAMETERS: 1346 * PARAMETERS:
1347 * pip - pointer to incore inode for the parent inode. 1347 * pip - pointer to incore inode for the parent inode.
1348 * dir - 'true' if the new disk inode is for a directory. 1348 * dir - 'true' if the new disk inode is for a directory.
1349 * ip - pointer to a new inode 1349 * ip - pointer to a new inode
1350 * 1350 *
1351 * RETURN VALUES: 1351 * RETURN VALUES:
1352 * 0 - success. 1352 * 0 - success.
1353 * -ENOSPC - insufficient disk resources. 1353 * -ENOSPC - insufficient disk resources.
1354 * -EIO - i/o error. 1354 * -EIO - i/o error.
1355 */ 1355 */
1356int diAlloc(struct inode *pip, bool dir, struct inode *ip) 1356int diAlloc(struct inode *pip, bool dir, struct inode *ip)
1357{ 1357{
@@ -1372,7 +1372,7 @@ int diAlloc(struct inode *pip, bool dir, struct inode *ip)
1372 JFS_IP(ip)->ipimap = ipimap; 1372 JFS_IP(ip)->ipimap = ipimap;
1373 JFS_IP(ip)->fileset = FILESYSTEM_I; 1373 JFS_IP(ip)->fileset = FILESYSTEM_I;
1374 1374
1375 /* for a directory, the allocation policy is to start 1375 /* for a directory, the allocation policy is to start
1376 * at the ag level using the preferred ag. 1376 * at the ag level using the preferred ag.
1377 */ 1377 */
1378 if (dir) { 1378 if (dir) {
@@ -1435,7 +1435,7 @@ int diAlloc(struct inode *pip, bool dir, struct inode *ip)
1435 /* 1435 /*
1436 * try to allocate from the IAG 1436 * try to allocate from the IAG
1437 */ 1437 */
1438 /* check if the inode may be allocated from the iag 1438 /* check if the inode may be allocated from the iag
1439 * (i.e. the inode has free inodes or new extent can be added). 1439 * (i.e. the inode has free inodes or new extent can be added).
1440 */ 1440 */
1441 if (iagp->nfreeinos || addext) { 1441 if (iagp->nfreeinos || addext) {
@@ -1490,7 +1490,7 @@ int diAlloc(struct inode *pip, bool dir, struct inode *ip)
1490 * hint or, if appropriate (i.e. addext is true), allocate 1490 * hint or, if appropriate (i.e. addext is true), allocate
1491 * an extent of free inodes at or following the extent 1491 * an extent of free inodes at or following the extent
1492 * containing the hint. 1492 * containing the hint.
1493 * 1493 *
1494 * the free inode and free extent summary maps are used 1494 * the free inode and free extent summary maps are used
1495 * here, so determine the starting summary map position 1495 * here, so determine the starting summary map position
1496 * and the number of words we'll have to examine. again, 1496 * and the number of words we'll have to examine. again,
@@ -1641,7 +1641,7 @@ int diAlloc(struct inode *pip, bool dir, struct inode *ip)
1641 * inodes should be added for the allocation group, with 1641 * inodes should be added for the allocation group, with
1642 * the current request satisfied from this extent. if this 1642 * the current request satisfied from this extent. if this
1643 * is the case, an attempt will be made to do just that. if 1643 * is the case, an attempt will be made to do just that. if
1644 * this attempt fails or it has been determined that a new 1644 * this attempt fails or it has been determined that a new
1645 * extent should not be added, an attempt is made to satisfy 1645 * extent should not be added, an attempt is made to satisfy
1646 * the request by allocating an existing (backed) free inode 1646 * the request by allocating an existing (backed) free inode
1647 * from the allocation group. 1647 * from the allocation group.
@@ -1649,24 +1649,24 @@ int diAlloc(struct inode *pip, bool dir, struct inode *ip)
1649 * PRE CONDITION: Already have the AG lock for this AG. 1649 * PRE CONDITION: Already have the AG lock for this AG.
1650 * 1650 *
1651 * PARAMETERS: 1651 * PARAMETERS:
1652 * imap - pointer to inode map control structure. 1652 * imap - pointer to inode map control structure.
1653 * agno - allocation group to allocate from. 1653 * agno - allocation group to allocate from.
1654 * dir - 'true' if the new disk inode is for a directory. 1654 * dir - 'true' if the new disk inode is for a directory.
1655 * ip - pointer to the new inode to be filled in on successful return 1655 * ip - pointer to the new inode to be filled in on successful return
1656 * with the disk inode number allocated, its extent address 1656 * with the disk inode number allocated, its extent address
1657 * and the start of the ag. 1657 * and the start of the ag.
1658 * 1658 *
1659 * RETURN VALUES: 1659 * RETURN VALUES:
1660 * 0 - success. 1660 * 0 - success.
1661 * -ENOSPC - insufficient disk resources. 1661 * -ENOSPC - insufficient disk resources.
1662 * -EIO - i/o error. 1662 * -EIO - i/o error.
1663 */ 1663 */
1664static int 1664static int
1665diAllocAG(struct inomap * imap, int agno, bool dir, struct inode *ip) 1665diAllocAG(struct inomap * imap, int agno, bool dir, struct inode *ip)
1666{ 1666{
1667 int rc, addext, numfree, numinos; 1667 int rc, addext, numfree, numinos;
1668 1668
1669 /* get the number of free and the number of backed disk 1669 /* get the number of free and the number of backed disk
1670 * inodes currently within the ag. 1670 * inodes currently within the ag.
1671 */ 1671 */
1672 numfree = imap->im_agctl[agno].numfree; 1672 numfree = imap->im_agctl[agno].numfree;
@@ -1719,17 +1719,17 @@ diAllocAG(struct inomap * imap, int agno, bool dir, struct inode *ip)
1719 * specified primary group. 1719 * specified primary group.
1720 * 1720 *
1721 * PARAMETERS: 1721 * PARAMETERS:
1722 * imap - pointer to inode map control structure. 1722 * imap - pointer to inode map control structure.
1723 * agno - primary allocation group (to avoid). 1723 * agno - primary allocation group (to avoid).
1724 * dir - 'true' if the new disk inode is for a directory. 1724 * dir - 'true' if the new disk inode is for a directory.
1725 * ip - pointer to a new inode to be filled in on successful return 1725 * ip - pointer to a new inode to be filled in on successful return
1726 * with the disk inode number allocated, its extent address 1726 * with the disk inode number allocated, its extent address
1727 * and the start of the ag. 1727 * and the start of the ag.
1728 * 1728 *
1729 * RETURN VALUES: 1729 * RETURN VALUES:
1730 * 0 - success. 1730 * 0 - success.
1731 * -ENOSPC - insufficient disk resources. 1731 * -ENOSPC - insufficient disk resources.
1732 * -EIO - i/o error. 1732 * -EIO - i/o error.
1733 */ 1733 */
1734static int 1734static int
1735diAllocAny(struct inomap * imap, int agno, bool dir, struct inode *ip) 1735diAllocAny(struct inomap * imap, int agno, bool dir, struct inode *ip)
@@ -1738,7 +1738,7 @@ diAllocAny(struct inomap * imap, int agno, bool dir, struct inode *ip)
1738 int maxag = JFS_SBI(imap->im_ipimap->i_sb)->bmap->db_maxag; 1738 int maxag = JFS_SBI(imap->im_ipimap->i_sb)->bmap->db_maxag;
1739 1739
1740 1740
1741 /* try to allocate from the ags following agno up to 1741 /* try to allocate from the ags following agno up to
1742 * the maximum ag number. 1742 * the maximum ag number.
1743 */ 1743 */
1744 for (ag = agno + 1; ag <= maxag; ag++) { 1744 for (ag = agno + 1; ag <= maxag; ag++) {
@@ -1780,21 +1780,21 @@ diAllocAny(struct inomap * imap, int agno, bool dir, struct inode *ip)
1780 * 1780 *
1781 * allocation occurs from the first iag on the list using 1781 * allocation occurs from the first iag on the list using
1782 * the iag's free inode summary map to find the leftmost 1782 * the iag's free inode summary map to find the leftmost
1783 * free inode in the iag. 1783 * free inode in the iag.
1784 * 1784 *
1785 * PRE CONDITION: Already have AG lock for this AG. 1785 * PRE CONDITION: Already have AG lock for this AG.
1786 * 1786 *
1787 * PARAMETERS: 1787 * PARAMETERS:
1788 * imap - pointer to inode map control structure. 1788 * imap - pointer to inode map control structure.
1789 * agno - allocation group. 1789 * agno - allocation group.
1790 * ip - pointer to new inode to be filled in on successful return 1790 * ip - pointer to new inode to be filled in on successful return
1791 * with the disk inode number allocated, its extent address 1791 * with the disk inode number allocated, its extent address
1792 * and the start of the ag. 1792 * and the start of the ag.
1793 * 1793 *
1794 * RETURN VALUES: 1794 * RETURN VALUES:
1795 * 0 - success. 1795 * 0 - success.
1796 * -ENOSPC - insufficient disk resources. 1796 * -ENOSPC - insufficient disk resources.
1797 * -EIO - i/o error. 1797 * -EIO - i/o error.
1798 */ 1798 */
1799static int diAllocIno(struct inomap * imap, int agno, struct inode *ip) 1799static int diAllocIno(struct inomap * imap, int agno, struct inode *ip)
1800{ 1800{
@@ -1867,7 +1867,7 @@ static int diAllocIno(struct inomap * imap, int agno, struct inode *ip)
1867 return -EIO; 1867 return -EIO;
1868 } 1868 }
1869 1869
1870 /* compute the inode number within the iag. 1870 /* compute the inode number within the iag.
1871 */ 1871 */
1872 ino = (extno << L2INOSPEREXT) + rem; 1872 ino = (extno << L2INOSPEREXT) + rem;
1873 1873
@@ -1892,17 +1892,17 @@ static int diAllocIno(struct inomap * imap, int agno, struct inode *ip)
1892/* 1892/*
1893 * NAME: diAllocExt(imap,agno,ip) 1893 * NAME: diAllocExt(imap,agno,ip)
1894 * 1894 *
1895 * FUNCTION: add a new extent of free inodes to an iag, allocating 1895 * FUNCTION: add a new extent of free inodes to an iag, allocating
1896 * an inode from this extent to satisfy the current allocation 1896 * an inode from this extent to satisfy the current allocation
1897 * request. 1897 * request.
1898 * 1898 *
1899 * this routine first tries to find an existing iag with free 1899 * this routine first tries to find an existing iag with free
1900 * extents through the ag free extent list. if list is not 1900 * extents through the ag free extent list. if list is not
1901 * empty, the head of the list will be selected as the home 1901 * empty, the head of the list will be selected as the home
1902 * of the new extent of free inodes. otherwise (the list is 1902 * of the new extent of free inodes. otherwise (the list is
1903 * empty), a new iag will be allocated for the ag to contain 1903 * empty), a new iag will be allocated for the ag to contain
1904 * the extent. 1904 * the extent.
1905 * 1905 *
1906 * once an iag has been selected, the free extent summary map 1906 * once an iag has been selected, the free extent summary map
1907 * is used to locate a free extent within the iag and diNewExt() 1907 * is used to locate a free extent within the iag and diNewExt()
1908 * is called to initialize the extent, with initialization 1908 * is called to initialize the extent, with initialization
@@ -1910,16 +1910,16 @@ static int diAllocIno(struct inomap * imap, int agno, struct inode *ip)
1910 * for the purpose of satisfying this request. 1910 * for the purpose of satisfying this request.
1911 * 1911 *
1912 * PARAMETERS: 1912 * PARAMETERS:
1913 * imap - pointer to inode map control structure. 1913 * imap - pointer to inode map control structure.
1914 * agno - allocation group number. 1914 * agno - allocation group number.
1915 * ip - pointer to new inode to be filled in on successful return 1915 * ip - pointer to new inode to be filled in on successful return
1916 * with the disk inode number allocated, its extent address 1916 * with the disk inode number allocated, its extent address
1917 * and the start of the ag. 1917 * and the start of the ag.
1918 * 1918 *
1919 * RETURN VALUES: 1919 * RETURN VALUES:
1920 * 0 - success. 1920 * 0 - success.
1921 * -ENOSPC - insufficient disk resources. 1921 * -ENOSPC - insufficient disk resources.
1922 * -EIO - i/o error. 1922 * -EIO - i/o error.
1923 */ 1923 */
1924static int diAllocExt(struct inomap * imap, int agno, struct inode *ip) 1924static int diAllocExt(struct inomap * imap, int agno, struct inode *ip)
1925{ 1925{
@@ -2012,7 +2012,7 @@ static int diAllocExt(struct inomap * imap, int agno, struct inode *ip)
2012/* 2012/*
2013 * NAME: diAllocBit(imap,iagp,ino) 2013 * NAME: diAllocBit(imap,iagp,ino)
2014 * 2014 *
2015 * FUNCTION: allocate a backed inode from an iag. 2015 * FUNCTION: allocate a backed inode from an iag.
2016 * 2016 *
2017 * this routine performs the mechanics of allocating a 2017 * this routine performs the mechanics of allocating a
2018 * specified inode from a backed extent. 2018 * specified inode from a backed extent.
@@ -2025,19 +2025,19 @@ static int diAllocExt(struct inomap * imap, int agno, struct inode *ip)
2025 * in the face of updates to multiple buffers. under this 2025 * in the face of updates to multiple buffers. under this
2026 * approach, all required buffers are obtained before making 2026 * approach, all required buffers are obtained before making
2027 * any updates and are held all are updates are complete. 2027 * any updates and are held all are updates are complete.
2028 * 2028 *
2029 * PRE CONDITION: Already have buffer lock on iagp. Already have AG lock on 2029 * PRE CONDITION: Already have buffer lock on iagp. Already have AG lock on
2030 * this AG. Must have read lock on imap inode. 2030 * this AG. Must have read lock on imap inode.
2031 * 2031 *
2032 * PARAMETERS: 2032 * PARAMETERS:
2033 * imap - pointer to inode map control structure. 2033 * imap - pointer to inode map control structure.
2034 * iagp - pointer to iag. 2034 * iagp - pointer to iag.
2035 * ino - inode number to be allocated within the iag. 2035 * ino - inode number to be allocated within the iag.
2036 * 2036 *
2037 * RETURN VALUES: 2037 * RETURN VALUES:
2038 * 0 - success. 2038 * 0 - success.
2039 * -ENOSPC - insufficient disk resources. 2039 * -ENOSPC - insufficient disk resources.
2040 * -EIO - i/o error. 2040 * -EIO - i/o error.
2041 */ 2041 */
2042static int diAllocBit(struct inomap * imap, struct iag * iagp, int ino) 2042static int diAllocBit(struct inomap * imap, struct iag * iagp, int ino)
2043{ 2043{
@@ -2172,19 +2172,19 @@ static int diAllocBit(struct inomap * imap, struct iag * iagp, int ino)
2172 * buffers. under this approach, all required buffers are 2172 * buffers. under this approach, all required buffers are
2173 * obtained before making any updates and are held until all 2173 * obtained before making any updates and are held until all
2174 * updates are complete. 2174 * updates are complete.
2175 * 2175 *
2176 * PRE CONDITION: Already have buffer lock on iagp. Already have AG lock on 2176 * PRE CONDITION: Already have buffer lock on iagp. Already have AG lock on
2177 * this AG. Must have read lock on imap inode. 2177 * this AG. Must have read lock on imap inode.
2178 * 2178 *
2179 * PARAMETERS: 2179 * PARAMETERS:
2180 * imap - pointer to inode map control structure. 2180 * imap - pointer to inode map control structure.
2181 * iagp - pointer to iag. 2181 * iagp - pointer to iag.
2182 * extno - extent number. 2182 * extno - extent number.
2183 * 2183 *
2184 * RETURN VALUES: 2184 * RETURN VALUES:
2185 * 0 - success. 2185 * 0 - success.
2186 * -ENOSPC - insufficient disk resources. 2186 * -ENOSPC - insufficient disk resources.
2187 * -EIO - i/o error. 2187 * -EIO - i/o error.
2188 */ 2188 */
2189static int diNewExt(struct inomap * imap, struct iag * iagp, int extno) 2189static int diNewExt(struct inomap * imap, struct iag * iagp, int extno)
2190{ 2190{
@@ -2432,34 +2432,34 @@ static int diNewExt(struct inomap * imap, struct iag * iagp, int extno)
2432/* 2432/*
2433 * NAME: diNewIAG(imap,iagnop,agno) 2433 * NAME: diNewIAG(imap,iagnop,agno)
2434 * 2434 *
2435 * FUNCTION: allocate a new iag for an allocation group. 2435 * FUNCTION: allocate a new iag for an allocation group.
2436 * 2436 *
2437 * first tries to allocate the iag from the inode map 2437 * first tries to allocate the iag from the inode map
2438 * iagfree list: 2438 * iagfree list:
2439 * if the list has free iags, the head of the list is removed 2439 * if the list has free iags, the head of the list is removed
2440 * and returned to satisfy the request. 2440 * and returned to satisfy the request.
2441 * if the inode map's iag free list is empty, the inode map 2441 * if the inode map's iag free list is empty, the inode map
2442 * is extended to hold a new iag. this new iag is initialized 2442 * is extended to hold a new iag. this new iag is initialized
2443 * and returned to satisfy the request. 2443 * and returned to satisfy the request.
2444 * 2444 *
2445 * PARAMETERS: 2445 * PARAMETERS:
2446 * imap - pointer to inode map control structure. 2446 * imap - pointer to inode map control structure.
2447 * iagnop - pointer to an iag number set with the number of the 2447 * iagnop - pointer to an iag number set with the number of the
2448 * newly allocated iag upon successful return. 2448 * newly allocated iag upon successful return.
2449 * agno - allocation group number. 2449 * agno - allocation group number.
2450 * bpp - Buffer pointer to be filled in with new IAG's buffer 2450 * bpp - Buffer pointer to be filled in with new IAG's buffer
2451 * 2451 *
2452 * RETURN VALUES: 2452 * RETURN VALUES:
2453 * 0 - success. 2453 * 0 - success.
2454 * -ENOSPC - insufficient disk resources. 2454 * -ENOSPC - insufficient disk resources.
2455 * -EIO - i/o error. 2455 * -EIO - i/o error.
2456 * 2456 *
2457 * serialization: 2457 * serialization:
2458 * AG lock held on entry/exit; 2458 * AG lock held on entry/exit;
2459 * write lock on the map is held inside; 2459 * write lock on the map is held inside;
2460 * read lock on the map is held on successful completion; 2460 * read lock on the map is held on successful completion;
2461 * 2461 *
2462 * note: new iag transaction: 2462 * note: new iag transaction:
2463 * . synchronously write iag; 2463 * . synchronously write iag;
2464 * . write log of xtree and inode of imap; 2464 * . write log of xtree and inode of imap;
2465 * . commit; 2465 * . commit;
@@ -2494,7 +2494,7 @@ diNewIAG(struct inomap * imap, int *iagnop, int agno, struct metapage ** mpp)
2494 /* acquire the free iag lock */ 2494 /* acquire the free iag lock */
2495 IAGFREE_LOCK(imap); 2495 IAGFREE_LOCK(imap);
2496 2496
2497 /* if there are any iags on the inode map free iag list, 2497 /* if there are any iags on the inode map free iag list,
2498 * allocate the iag from the head of the list. 2498 * allocate the iag from the head of the list.
2499 */ 2499 */
2500 if (imap->im_freeiag >= 0) { 2500 if (imap->im_freeiag >= 0) {
@@ -2618,8 +2618,8 @@ diNewIAG(struct inomap * imap, int *iagnop, int agno, struct metapage ** mpp)
2618 flush_metapage(mp); 2618 flush_metapage(mp);
2619 2619
2620 /* 2620 /*
2621 * txCommit(COMMIT_FORCE) will synchronously write address 2621 * txCommit(COMMIT_FORCE) will synchronously write address
2622 * index pages and inode after commit in careful update order 2622 * index pages and inode after commit in careful update order
2623 * of address index pages (right to left, bottom up); 2623 * of address index pages (right to left, bottom up);
2624 */ 2624 */
2625 iplist[0] = ipimap; 2625 iplist[0] = ipimap;
@@ -2678,11 +2678,11 @@ diNewIAG(struct inomap * imap, int *iagnop, int agno, struct metapage ** mpp)
2678 * 2678 *
2679 * FUNCTION: get the buffer for the specified iag within a fileset 2679 * FUNCTION: get the buffer for the specified iag within a fileset
2680 * or aggregate inode map. 2680 * or aggregate inode map.
2681 * 2681 *
2682 * PARAMETERS: 2682 * PARAMETERS:
2683 * imap - pointer to inode map control structure. 2683 * imap - pointer to inode map control structure.
2684 * iagno - iag number. 2684 * iagno - iag number.
2685 * bpp - point to buffer pointer to be filled in on successful 2685 * bpp - point to buffer pointer to be filled in on successful
2686 * exit. 2686 * exit.
2687 * 2687 *
2688 * SERIALIZATION: 2688 * SERIALIZATION:
@@ -2692,7 +2692,7 @@ diNewIAG(struct inomap * imap, int *iagnop, int agno, struct metapage ** mpp)
2692 * 2692 *
2693 * RETURN VALUES: 2693 * RETURN VALUES:
2694 * 0 - success. 2694 * 0 - success.
2695 * -EIO - i/o error. 2695 * -EIO - i/o error.
2696 */ 2696 */
2697static int diIAGRead(struct inomap * imap, int iagno, struct metapage ** mpp) 2697static int diIAGRead(struct inomap * imap, int iagno, struct metapage ** mpp)
2698{ 2698{
@@ -2718,8 +2718,8 @@ static int diIAGRead(struct inomap * imap, int iagno, struct metapage ** mpp)
2718 * the specified bit position. 2718 * the specified bit position.
2719 * 2719 *
2720 * PARAMETERS: 2720 * PARAMETERS:
2721 * word - word to be examined. 2721 * word - word to be examined.
2722 * start - starting bit position. 2722 * start - starting bit position.
2723 * 2723 *
2724 * RETURN VALUES: 2724 * RETURN VALUES:
2725 * bit position of first free bit in the word or 32 if 2725 * bit position of first free bit in the word or 32 if
@@ -2740,10 +2740,10 @@ static int diFindFree(u32 word, int start)
2740 2740
2741/* 2741/*
2742 * NAME: diUpdatePMap() 2742 * NAME: diUpdatePMap()
2743 * 2743 *
2744 * FUNCTION: Update the persistent map in an IAG for the allocation or 2744 * FUNCTION: Update the persistent map in an IAG for the allocation or
2745 * freeing of the specified inode. 2745 * freeing of the specified inode.
2746 * 2746 *
2747 * PRE CONDITIONS: Working map has already been updated for allocate. 2747 * PRE CONDITIONS: Working map has already been updated for allocate.
2748 * 2748 *
2749 * PARAMETERS: 2749 * PARAMETERS:
@@ -2752,7 +2752,7 @@ static int diFindFree(u32 word, int start)
2752 * is_free - If 'true' indicates inode should be marked freed, otherwise 2752 * is_free - If 'true' indicates inode should be marked freed, otherwise
2753 * indicates inode should be marked allocated. 2753 * indicates inode should be marked allocated.
2754 * 2754 *
2755 * RETURN VALUES: 2755 * RETURN VALUES:
2756 * 0 for success 2756 * 0 for success
2757 */ 2757 */
2758int 2758int
@@ -2793,7 +2793,7 @@ diUpdatePMap(struct inode *ipimap,
2793 extno = ino >> L2INOSPEREXT; 2793 extno = ino >> L2INOSPEREXT;
2794 bitno = ino & (INOSPEREXT - 1); 2794 bitno = ino & (INOSPEREXT - 1);
2795 mask = HIGHORDER >> bitno; 2795 mask = HIGHORDER >> bitno;
2796 /* 2796 /*
2797 * mark the inode free in persistent map: 2797 * mark the inode free in persistent map:
2798 */ 2798 */
2799 if (is_free) { 2799 if (is_free) {
@@ -2803,7 +2803,7 @@ diUpdatePMap(struct inode *ipimap,
2803 * of last reference release; 2803 * of last reference release;
2804 */ 2804 */
2805 if (!(le32_to_cpu(iagp->wmap[extno]) & mask)) { 2805 if (!(le32_to_cpu(iagp->wmap[extno]) & mask)) {
2806 jfs_error(ipimap->i_sb, 2806 jfs_error(ipimap->i_sb,
2807 "diUpdatePMap: inode %ld not marked as " 2807 "diUpdatePMap: inode %ld not marked as "
2808 "allocated in wmap!", inum); 2808 "allocated in wmap!", inum);
2809 } 2809 }
@@ -2877,8 +2877,8 @@ diUpdatePMap(struct inode *ipimap,
2877 * diExtendFS() 2877 * diExtendFS()
2878 * 2878 *
2879 * function: update imap for extendfs(); 2879 * function: update imap for extendfs();
2880 * 2880 *
2881 * note: AG size has been increased s.t. each k old contiguous AGs are 2881 * note: AG size has been increased s.t. each k old contiguous AGs are
2882 * coalesced into a new AG; 2882 * coalesced into a new AG;
2883 */ 2883 */
2884int diExtendFS(struct inode *ipimap, struct inode *ipbmap) 2884int diExtendFS(struct inode *ipimap, struct inode *ipbmap)
@@ -2897,7 +2897,7 @@ int diExtendFS(struct inode *ipimap, struct inode *ipbmap)
2897 atomic_read(&imap->im_numfree)); 2897 atomic_read(&imap->im_numfree));
2898 2898
2899 /* 2899 /*
2900 * reconstruct imap 2900 * reconstruct imap
2901 * 2901 *
2902 * coalesce contiguous k (newAGSize/oldAGSize) AGs; 2902 * coalesce contiguous k (newAGSize/oldAGSize) AGs;
2903 * i.e., (AGi, ..., AGj) where i = k*n and j = k*(n+1) - 1 to AGn; 2903 * i.e., (AGi, ..., AGj) where i = k*n and j = k*(n+1) - 1 to AGn;
@@ -2931,7 +2931,7 @@ int diExtendFS(struct inode *ipimap, struct inode *ipbmap)
2931 } 2931 }
2932 2932
2933 /* leave free iag in the free iag list */ 2933 /* leave free iag in the free iag list */
2934 if (iagp->nfreeexts == cpu_to_le32(EXTSPERIAG)) { 2934 if (iagp->nfreeexts == cpu_to_le32(EXTSPERIAG)) {
2935 release_metapage(bp); 2935 release_metapage(bp);
2936 continue; 2936 continue;
2937 } 2937 }
diff --git a/fs/jfs/jfs_imap.h b/fs/jfs/jfs_imap.h
index e3b7db47db6b..4f9c346ed498 100644
--- a/fs/jfs/jfs_imap.h
+++ b/fs/jfs/jfs_imap.h
@@ -1,18 +1,18 @@
1/* 1/*
2 * Copyright (c) International Business Machines Corp., 2000-2002 2 * Copyright (C) International Business Machines Corp., 2000-2002
3 * 3 *
4 * This program is free software; you can redistribute it and/or modify 4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by 5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or 6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version. 7 * (at your option) any later version.
8 * 8 *
9 * This program is distributed in the hope that it will be useful, 9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
12 * the GNU General Public License for more details. 12 * the GNU General Public License for more details.
13 * 13 *
14 * You should have received a copy of the GNU General Public License 14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software 15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 */ 17 */
18#ifndef _H_JFS_IMAP 18#ifndef _H_JFS_IMAP
@@ -45,13 +45,13 @@
45/* get the starting block number of the 4K page of an inode extent 45/* get the starting block number of the 4K page of an inode extent
46 * that contains ino. 46 * that contains ino.
47 */ 47 */
48#define INOPBLK(pxd,ino,l2nbperpg) (addressPXD((pxd)) + \ 48#define INOPBLK(pxd,ino,l2nbperpg) (addressPXD((pxd)) + \
49 ((((ino) & (INOSPEREXT-1)) >> L2INOSPERPAGE) << (l2nbperpg))) 49 ((((ino) & (INOSPEREXT-1)) >> L2INOSPERPAGE) << (l2nbperpg)))
50 50
51/* 51/*
52 * inode allocation map: 52 * inode allocation map:
53 * 53 *
54 * inode allocation map consists of 54 * inode allocation map consists of
55 * . the inode map control page and 55 * . the inode map control page and
56 * . inode allocation group pages (per 4096 inodes) 56 * . inode allocation group pages (per 4096 inodes)
57 * which are addressed by standard JFS xtree. 57 * which are addressed by standard JFS xtree.
diff --git a/fs/jfs/jfs_incore.h b/fs/jfs/jfs_incore.h
index 54d73716ca8c..94005584445a 100644
--- a/fs/jfs/jfs_incore.h
+++ b/fs/jfs/jfs_incore.h
@@ -4,18 +4,18 @@
4 * 4 *
5 * This program is free software; you can redistribute it and/or modify 5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by 6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or 7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version. 8 * (at your option) any later version.
9 * 9 *
10 * This program is distributed in the hope that it will be useful, 10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
13 * the GNU General Public License for more details. 13 * the GNU General Public License for more details.
14 * 14 *
15 * You should have received a copy of the GNU General Public License 15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software 16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 */ 18 */
19#ifndef _H_JFS_INCORE 19#ifndef _H_JFS_INCORE
20#define _H_JFS_INCORE 20#define _H_JFS_INCORE
21 21
diff --git a/fs/jfs/jfs_inode.c b/fs/jfs/jfs_inode.c
index bffaca9ae3a2..4c67ed97682b 100644
--- a/fs/jfs/jfs_inode.c
+++ b/fs/jfs/jfs_inode.c
@@ -3,16 +3,16 @@
3 * 3 *
4 * This program is free software; you can redistribute it and/or modify 4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by 5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or 6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version. 7 * (at your option) any later version.
8 * 8 *
9 * This program is distributed in the hope that it will be useful, 9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
12 * the GNU General Public License for more details. 12 * the GNU General Public License for more details.
13 * 13 *
14 * You should have received a copy of the GNU General Public License 14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software 15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 */ 17 */
18 18
@@ -61,7 +61,7 @@ struct inode *ialloc(struct inode *parent, umode_t mode)
61 inode = new_inode(sb); 61 inode = new_inode(sb);
62 if (!inode) { 62 if (!inode) {
63 jfs_warn("ialloc: new_inode returned NULL!"); 63 jfs_warn("ialloc: new_inode returned NULL!");
64 return inode; 64 return ERR_PTR(-ENOMEM);
65 } 65 }
66 66
67 jfs_inode = JFS_IP(inode); 67 jfs_inode = JFS_IP(inode);
@@ -69,9 +69,10 @@ struct inode *ialloc(struct inode *parent, umode_t mode)
69 rc = diAlloc(parent, S_ISDIR(mode), inode); 69 rc = diAlloc(parent, S_ISDIR(mode), inode);
70 if (rc) { 70 if (rc) {
71 jfs_warn("ialloc: diAlloc returned %d!", rc); 71 jfs_warn("ialloc: diAlloc returned %d!", rc);
72 make_bad_inode(inode); 72 if (rc == -EIO)
73 make_bad_inode(inode);
73 iput(inode); 74 iput(inode);
74 return NULL; 75 return ERR_PTR(rc);
75 } 76 }
76 77
77 inode->i_uid = current->fsuid; 78 inode->i_uid = current->fsuid;
@@ -97,7 +98,7 @@ struct inode *ialloc(struct inode *parent, umode_t mode)
97 inode->i_flags |= S_NOQUOTA; 98 inode->i_flags |= S_NOQUOTA;
98 inode->i_nlink = 0; 99 inode->i_nlink = 0;
99 iput(inode); 100 iput(inode);
100 return NULL; 101 return ERR_PTR(-EDQUOT);
101 } 102 }
102 103
103 inode->i_mode = mode; 104 inode->i_mode = mode;
diff --git a/fs/jfs/jfs_inode.h b/fs/jfs/jfs_inode.h
index 1fc48df670c8..0d06ccfaff0e 100644
--- a/fs/jfs/jfs_inode.h
+++ b/fs/jfs/jfs_inode.h
@@ -3,16 +3,16 @@
3 * 3 *
4 * This program is free software; you can redistribute it and/or modify 4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by 5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or 6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version. 7 * (at your option) any later version.
8 * 8 *
9 * This program is distributed in the hope that it will be useful, 9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
12 * the GNU General Public License for more details. 12 * the GNU General Public License for more details.
13 * 13 *
14 * You should have received a copy of the GNU General Public License 14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software 15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 */ 17 */
18#ifndef _H_JFS_INODE 18#ifndef _H_JFS_INODE
diff --git a/fs/jfs/jfs_lock.h b/fs/jfs/jfs_lock.h
index 70ac9f7d1e00..7d78e83d7c40 100644
--- a/fs/jfs/jfs_lock.h
+++ b/fs/jfs/jfs_lock.h
@@ -1,19 +1,19 @@
1/* 1/*
2 * Copyright (c) International Business Machines Corp., 2000-2001 2 * Copyright (C) International Business Machines Corp., 2000-2001
3 * Portions Copyright (c) Christoph Hellwig, 2001-2002 3 * Portions Copyright (C) Christoph Hellwig, 2001-2002
4 * 4 *
5 * This program is free software; you can redistribute it and/or modify 5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by 6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or 7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version. 8 * (at your option) any later version.
9 * 9 *
10 * This program is distributed in the hope that it will be useful, 10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
13 * the GNU General Public License for more details. 13 * the GNU General Public License for more details.
14 * 14 *
15 * You should have received a copy of the GNU General Public License 15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software 16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 */ 18 */
19#ifndef _H_JFS_LOCK 19#ifndef _H_JFS_LOCK
diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c
index 3315f0b1fbc0..b89c9aba0466 100644
--- a/fs/jfs/jfs_logmgr.c
+++ b/fs/jfs/jfs_logmgr.c
@@ -4,16 +4,16 @@
4 * 4 *
5 * This program is free software; you can redistribute it and/or modify 5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by 6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or 7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version. 8 * (at your option) any later version.
9 * 9 *
10 * This program is distributed in the hope that it will be useful, 10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
13 * the GNU General Public License for more details. 13 * the GNU General Public License for more details.
14 * 14 *
15 * You should have received a copy of the GNU General Public License 15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software 16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 */ 18 */
19 19
@@ -337,7 +337,7 @@ int lmLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
337 * PARAMETER: cd - commit descriptor 337 * PARAMETER: cd - commit descriptor
338 * 338 *
339 * RETURN: end-of-log address 339 * RETURN: end-of-log address
340 * 340 *
341 * serialization: LOG_LOCK() held on entry/exit 341 * serialization: LOG_LOCK() held on entry/exit
342 */ 342 */
343static int 343static int
@@ -554,7 +554,7 @@ lmWriteRecord(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
554 * PARAMETER: log 554 * PARAMETER: log
555 * 555 *
556 * RETURN: 0 556 * RETURN: 0
557 * 557 *
558 * serialization: LOG_LOCK() held on entry/exit 558 * serialization: LOG_LOCK() held on entry/exit
559 */ 559 */
560static int lmNextPage(struct jfs_log * log) 560static int lmNextPage(struct jfs_log * log)
@@ -656,7 +656,7 @@ static int lmNextPage(struct jfs_log * log)
656 * page number - redrive pageout of the page at the head of 656 * page number - redrive pageout of the page at the head of
657 * pageout queue until full page has been written. 657 * pageout queue until full page has been written.
658 * 658 *
659 * RETURN: 659 * RETURN:
660 * 660 *
661 * NOTE: 661 * NOTE:
662 * LOGGC_LOCK serializes log group commit queue, and 662 * LOGGC_LOCK serializes log group commit queue, and
@@ -920,10 +920,10 @@ static void lmPostGC(struct lbuf * bp)
920 * this code is called again. 920 * this code is called again.
921 * 921 *
922 * PARAMETERS: log - log structure 922 * PARAMETERS: log - log structure
923 * hard_sync - 1 to force all metadata to be written 923 * hard_sync - 1 to force all metadata to be written
924 * 924 *
925 * RETURN: 0 925 * RETURN: 0
926 * 926 *
927 * serialization: LOG_LOCK() held on entry/exit 927 * serialization: LOG_LOCK() held on entry/exit
928 */ 928 */
929static int lmLogSync(struct jfs_log * log, int hard_sync) 929static int lmLogSync(struct jfs_log * log, int hard_sync)
@@ -1052,7 +1052,7 @@ static int lmLogSync(struct jfs_log * log, int hard_sync)
1052 * FUNCTION: write log SYNCPT record for specified log 1052 * FUNCTION: write log SYNCPT record for specified log
1053 * 1053 *
1054 * PARAMETERS: log - log structure 1054 * PARAMETERS: log - log structure
1055 * hard_sync - set to 1 to force metadata to be written 1055 * hard_sync - set to 1 to force metadata to be written
1056 */ 1056 */
1057void jfs_syncpt(struct jfs_log *log, int hard_sync) 1057void jfs_syncpt(struct jfs_log *log, int hard_sync)
1058{ LOG_LOCK(log); 1058{ LOG_LOCK(log);
@@ -1067,7 +1067,7 @@ void jfs_syncpt(struct jfs_log *log, int hard_sync)
1067 * insert filesystem in the active list of the log. 1067 * insert filesystem in the active list of the log.
1068 * 1068 *
1069 * PARAMETER: ipmnt - file system mount inode 1069 * PARAMETER: ipmnt - file system mount inode
1070 * iplog - log inode (out) 1070 * iplog - log inode (out)
1071 * 1071 *
1072 * RETURN: 1072 * RETURN:
1073 * 1073 *
@@ -1082,7 +1082,7 @@ int lmLogOpen(struct super_block *sb)
1082 1082
1083 if (sbi->flag & JFS_NOINTEGRITY) 1083 if (sbi->flag & JFS_NOINTEGRITY)
1084 return open_dummy_log(sb); 1084 return open_dummy_log(sb);
1085 1085
1086 if (sbi->mntflag & JFS_INLINELOG) 1086 if (sbi->mntflag & JFS_INLINELOG)
1087 return open_inline_log(sb); 1087 return open_inline_log(sb);
1088 1088
@@ -1131,7 +1131,7 @@ int lmLogOpen(struct super_block *sb)
1131 1131
1132 log->bdev = bdev; 1132 log->bdev = bdev;
1133 memcpy(log->uuid, sbi->loguuid, sizeof(log->uuid)); 1133 memcpy(log->uuid, sbi->loguuid, sizeof(log->uuid));
1134 1134
1135 /* 1135 /*
1136 * initialize log: 1136 * initialize log:
1137 */ 1137 */
@@ -1253,13 +1253,13 @@ static int open_dummy_log(struct super_block *sb)
1253 * initialize the log from log superblock. 1253 * initialize the log from log superblock.
1254 * set the log state in the superblock to LOGMOUNT and 1254 * set the log state in the superblock to LOGMOUNT and
1255 * write SYNCPT log record. 1255 * write SYNCPT log record.
1256 * 1256 *
1257 * PARAMETER: log - log structure 1257 * PARAMETER: log - log structure
1258 * 1258 *
1259 * RETURN: 0 - if ok 1259 * RETURN: 0 - if ok
1260 * -EINVAL - bad log magic number or superblock dirty 1260 * -EINVAL - bad log magic number or superblock dirty
1261 * error returned from logwait() 1261 * error returned from logwait()
1262 * 1262 *
1263 * serialization: single first open thread 1263 * serialization: single first open thread
1264 */ 1264 */
1265int lmLogInit(struct jfs_log * log) 1265int lmLogInit(struct jfs_log * log)
@@ -1297,7 +1297,7 @@ int lmLogInit(struct jfs_log * log)
1297 1297
1298 if (!test_bit(log_INLINELOG, &log->flag)) 1298 if (!test_bit(log_INLINELOG, &log->flag))
1299 log->l2bsize = L2LOGPSIZE; 1299 log->l2bsize = L2LOGPSIZE;
1300 1300
1301 /* check for disabled journaling to disk */ 1301 /* check for disabled journaling to disk */
1302 if (log->no_integrity) { 1302 if (log->no_integrity) {
1303 /* 1303 /*
@@ -1651,7 +1651,7 @@ void jfs_flush_journal(struct jfs_log *log, int wait)
1651 * PARAMETER: log - log inode 1651 * PARAMETER: log - log inode
1652 * 1652 *
1653 * RETURN: 0 - success 1653 * RETURN: 0 - success
1654 * 1654 *
1655 * serialization: single last close thread 1655 * serialization: single last close thread
1656 */ 1656 */
1657int lmLogShutdown(struct jfs_log * log) 1657int lmLogShutdown(struct jfs_log * log)
@@ -1677,7 +1677,7 @@ int lmLogShutdown(struct jfs_log * log)
1677 lrd.type = cpu_to_le16(LOG_SYNCPT); 1677 lrd.type = cpu_to_le16(LOG_SYNCPT);
1678 lrd.length = 0; 1678 lrd.length = 0;
1679 lrd.log.syncpt.sync = 0; 1679 lrd.log.syncpt.sync = 0;
1680 1680
1681 lsn = lmWriteRecord(log, NULL, &lrd, NULL); 1681 lsn = lmWriteRecord(log, NULL, &lrd, NULL);
1682 bp = log->bp; 1682 bp = log->bp;
1683 lp = (struct logpage *) bp->l_ldata; 1683 lp = (struct logpage *) bp->l_ldata;
@@ -1703,7 +1703,7 @@ int lmLogShutdown(struct jfs_log * log)
1703 jfs_info("lmLogShutdown: lsn:0x%x page:%d eor:%d", 1703 jfs_info("lmLogShutdown: lsn:0x%x page:%d eor:%d",
1704 lsn, log->page, log->eor); 1704 lsn, log->page, log->eor);
1705 1705
1706 out: 1706 out:
1707 /* 1707 /*
1708 * shutdown per log i/o 1708 * shutdown per log i/o
1709 */ 1709 */
@@ -1769,7 +1769,7 @@ static int lmLogFileSystem(struct jfs_log * log, struct jfs_sb_info *sbi,
1769 lbmFree(bpsuper); 1769 lbmFree(bpsuper);
1770 return -EIO; 1770 return -EIO;
1771 } 1771 }
1772 1772
1773 } 1773 }
1774 1774
1775 /* 1775 /*
diff --git a/fs/jfs/jfs_logmgr.h b/fs/jfs/jfs_logmgr.h
index 8c6909b80014..a53fb17ea219 100644
--- a/fs/jfs/jfs_logmgr.h
+++ b/fs/jfs/jfs_logmgr.h
@@ -4,16 +4,16 @@
4 * 4 *
5 * This program is free software; you can redistribute it and/or modify 5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by 6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or 7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version. 8 * (at your option) any later version.
9 * 9 *
10 * This program is distributed in the hope that it will be useful, 10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
13 * the GNU General Public License for more details. 13 * the GNU General Public License for more details.
14 * 14 *
15 * You should have received a copy of the GNU General Public License 15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software 16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 */ 18 */
19#ifndef _H_JFS_LOGMGR 19#ifndef _H_JFS_LOGMGR
@@ -35,19 +35,19 @@
35/* 35/*
36 * log logical volume 36 * log logical volume
37 * 37 *
38 * a log is used to make the commit operation on journalled 38 * a log is used to make the commit operation on journalled
39 * files within the same logical volume group atomic. 39 * files within the same logical volume group atomic.
40 * a log is implemented with a logical volume. 40 * a log is implemented with a logical volume.
41 * there is one log per logical volume group. 41 * there is one log per logical volume group.
42 * 42 *
43 * block 0 of the log logical volume is not used (ipl etc). 43 * block 0 of the log logical volume is not used (ipl etc).
44 * block 1 contains a log "superblock" and is used by logFormat(), 44 * block 1 contains a log "superblock" and is used by logFormat(),
45 * lmLogInit(), lmLogShutdown(), and logRedo() to record status 45 * lmLogInit(), lmLogShutdown(), and logRedo() to record status
46 * of the log but is not otherwise used during normal processing. 46 * of the log but is not otherwise used during normal processing.
47 * blocks 2 - (N-1) are used to contain log records. 47 * blocks 2 - (N-1) are used to contain log records.
48 * 48 *
49 * when a volume group is varied-on-line, logRedo() must have 49 * when a volume group is varied-on-line, logRedo() must have
50 * been executed before the file systems (logical volumes) in 50 * been executed before the file systems (logical volumes) in
51 * the volume group can be mounted. 51 * the volume group can be mounted.
52 */ 52 */
53/* 53/*
@@ -97,26 +97,26 @@ struct logsuper {
97 * log logical page 97 * log logical page
98 * 98 *
99 * (this comment should be rewritten !) 99 * (this comment should be rewritten !)
100 * the header and trailer structures (h,t) will normally have 100 * the header and trailer structures (h,t) will normally have
101 * the same page and eor value. 101 * the same page and eor value.
102 * An exception to this occurs when a complete page write is not 102 * An exception to this occurs when a complete page write is not
103 * accomplished on a power failure. Since the hardware may "split write" 103 * accomplished on a power failure. Since the hardware may "split write"
104 * sectors in the page, any out of order sequence may occur during powerfail 104 * sectors in the page, any out of order sequence may occur during powerfail
105 * and needs to be recognized during log replay. The xor value is 105 * and needs to be recognized during log replay. The xor value is
106 * an "exclusive or" of all log words in the page up to eor. This 106 * an "exclusive or" of all log words in the page up to eor. This
107 * 32 bit eor is stored with the top 16 bits in the header and the 107 * 32 bit eor is stored with the top 16 bits in the header and the
108 * bottom 16 bits in the trailer. logredo can easily recognize pages 108 * bottom 16 bits in the trailer. logredo can easily recognize pages
109 * that were not completed by reconstructing this eor and checking 109 * that were not completed by reconstructing this eor and checking
110 * the log page. 110 * the log page.
111 * 111 *
112 * Previous versions of the operating system did not allow split 112 * Previous versions of the operating system did not allow split
113 * writes and detected partially written records in logredo by 113 * writes and detected partially written records in logredo by
114 * ordering the updates to the header, trailer, and the move of data 114 * ordering the updates to the header, trailer, and the move of data
115 * into the logdata area. The order: (1) data is moved (2) header 115 * into the logdata area. The order: (1) data is moved (2) header
116 * is updated (3) trailer is updated. In logredo, when the header 116 * is updated (3) trailer is updated. In logredo, when the header
117 * differed from the trailer, the header and trailer were reconciled 117 * differed from the trailer, the header and trailer were reconciled
118 * as follows: if h.page != t.page they were set to the smaller of 118 * as follows: if h.page != t.page they were set to the smaller of
119 * the two and h.eor and t.eor set to 8 (i.e. empty page). if (only) 119 * the two and h.eor and t.eor set to 8 (i.e. empty page). if (only)
120 * h.eor != t.eor they were set to the smaller of their two values. 120 * h.eor != t.eor they were set to the smaller of their two values.
121 */ 121 */
122struct logpage { 122struct logpage {
@@ -147,20 +147,20 @@ struct logpage {
147 * in a page, pages are written to temporary paging space if 147 * in a page, pages are written to temporary paging space if
148 * if they must be written to disk before commit, and i/o is 148 * if they must be written to disk before commit, and i/o is
149 * scheduled for modified pages to their home location after 149 * scheduled for modified pages to their home location after
150 * the log records containing the after values and the commit 150 * the log records containing the after values and the commit
151 * record is written to the log on disk, undo discards the copy 151 * record is written to the log on disk, undo discards the copy
152 * in main-memory.) 152 * in main-memory.)
153 * 153 *
154 * a log record consists of a data area of variable length followed by 154 * a log record consists of a data area of variable length followed by
155 * a descriptor of fixed size LOGRDSIZE bytes. 155 * a descriptor of fixed size LOGRDSIZE bytes.
156 * the data area is rounded up to an integral number of 4-bytes and 156 * the data area is rounded up to an integral number of 4-bytes and
157 * must be no longer than LOGPSIZE. 157 * must be no longer than LOGPSIZE.
158 * the descriptor is of size of multiple of 4-bytes and aligned on a 158 * the descriptor is of size of multiple of 4-bytes and aligned on a
159 * 4-byte boundary. 159 * 4-byte boundary.
160 * records are packed one after the other in the data area of log pages. 160 * records are packed one after the other in the data area of log pages.
161 * (sometimes a DUMMY record is inserted so that at least one record ends 161 * (sometimes a DUMMY record is inserted so that at least one record ends
162 * on every page or the longest record is placed on at most two pages). 162 * on every page or the longest record is placed on at most two pages).
163 * the field eor in page header/trailer points to the byte following 163 * the field eor in page header/trailer points to the byte following
164 * the last record on a page. 164 * the last record on a page.
165 */ 165 */
166 166
@@ -270,11 +270,11 @@ struct lrd {
270 /* 270 /*
271 * NOREDOINOEXT: the inode extent is freed 271 * NOREDOINOEXT: the inode extent is freed
272 * 272 *
273 * do not apply after-image records which precede this 273 * do not apply after-image records which precede this
274 * record in the log with the any of the 4 page block 274 * record in the log with the any of the 4 page block
275 * numbers in this inode extent. 275 * numbers in this inode extent.
276 * 276 *
277 * NOTE: The fileset and pxd fields MUST remain in 277 * NOTE: The fileset and pxd fields MUST remain in
278 * the same fields in the REDOPAGE record format. 278 * the same fields in the REDOPAGE record format.
279 * 279 *
280 */ 280 */
@@ -319,12 +319,10 @@ struct lrd {
319 * do not apply records which precede this record in the log 319 * do not apply records which precede this record in the log
320 * with the same inode number. 320 * with the same inode number.
321 * 321 *
322 * NOREDILE must be the first to be written at commit 322 * NOREDOFILE must be the first to be written at commit
323 * (last to be read in logredo()) - it prevents 323 * (last to be read in logredo()) - it prevents
324 * replay of preceding updates of all preceding generations 324 * replay of preceding updates of all preceding generations
325 * of the inumber esp. the on-disk inode itself, 325 * of the inumber esp. the on-disk inode itself.
326 * but does NOT prevent
327 * replay of the
328 */ 326 */
329 struct { 327 struct {
330 __le32 fileset; /* 4: fileset number */ 328 __le32 fileset; /* 4: fileset number */
@@ -332,7 +330,7 @@ struct lrd {
332 } noredofile; 330 } noredofile;
333 331
334 /* 332 /*
335 * ? NEWPAGE: 333 * ? NEWPAGE:
336 * 334 *
337 * metadata type dependent 335 * metadata type dependent
338 */ 336 */
@@ -464,7 +462,7 @@ struct lbuf {
464 s64 l_blkno; /* 8: log page block number */ 462 s64 l_blkno; /* 8: log page block number */
465 caddr_t l_ldata; /* 4: data page */ 463 caddr_t l_ldata; /* 4: data page */
466 struct page *l_page; /* The page itself */ 464 struct page *l_page; /* The page itself */
467 uint l_offset; /* Offset of l_ldata within the page */ 465 uint l_offset; /* Offset of l_ldata within the page */
468 466
469 wait_queue_head_t l_ioevent; /* 4: i/o done event */ 467 wait_queue_head_t l_ioevent; /* 4: i/o done event */
470}; 468};
diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c
index f5afc129d6b1..0cccd1c39d75 100644
--- a/fs/jfs/jfs_metapage.c
+++ b/fs/jfs/jfs_metapage.c
@@ -4,16 +4,16 @@
4 * 4 *
5 * This program is free software; you can redistribute it and/or modify 5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by 6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or 7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version. 8 * (at your option) any later version.
9 * 9 *
10 * This program is distributed in the hope that it will be useful, 10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
13 * the GNU General Public License for more details. 13 * the GNU General Public License for more details.
14 * 14 *
15 * You should have received a copy of the GNU General Public License 15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software 16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 */ 18 */
19 19
@@ -461,7 +461,7 @@ static int metapage_writepage(struct page *page, struct writeback_control *wbc)
461 goto add_failed; 461 goto add_failed;
462 if (!bio->bi_size) 462 if (!bio->bi_size)
463 goto dump_bio; 463 goto dump_bio;
464 464
465 submit_bio(WRITE, bio); 465 submit_bio(WRITE, bio);
466 } 466 }
467 if (redirty) 467 if (redirty)
@@ -648,7 +648,7 @@ struct metapage *__get_metapage(struct inode *inode, unsigned long lblock,
648 jfs_err("logical_size = %d, size = %d", 648 jfs_err("logical_size = %d, size = %d",
649 mp->logical_size, size); 649 mp->logical_size, size);
650 dump_stack(); 650 dump_stack();
651 goto unlock; 651 goto unlock;
652 } 652 }
653 mp->count++; 653 mp->count++;
654 lock_metapage(mp); 654 lock_metapage(mp);
@@ -658,7 +658,7 @@ struct metapage *__get_metapage(struct inode *inode, unsigned long lblock,
658 "__get_metapage: using a " 658 "__get_metapage: using a "
659 "discarded metapage"); 659 "discarded metapage");
660 discard_metapage(mp); 660 discard_metapage(mp);
661 goto unlock; 661 goto unlock;
662 } 662 }
663 clear_bit(META_discard, &mp->flag); 663 clear_bit(META_discard, &mp->flag);
664 } 664 }
diff --git a/fs/jfs/jfs_metapage.h b/fs/jfs/jfs_metapage.h
index 01a5a455e012..d94f8d9e87d7 100644
--- a/fs/jfs/jfs_metapage.h
+++ b/fs/jfs/jfs_metapage.h
@@ -4,16 +4,16 @@
4 * 4 *
5 * This program is free software; you can redistribute it and/or modify 5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by 6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or 7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version. 8 * (at your option) any later version.
9 * 9 *
10 * This program is distributed in the hope that it will be useful, 10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
13 * the GNU General Public License for more details. 13 * the GNU General Public License for more details.
14 * 14 *
15 * You should have received a copy of the GNU General Public License 15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software 16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 */ 18 */
19#ifndef _H_JFS_METAPAGE 19#ifndef _H_JFS_METAPAGE
@@ -33,7 +33,7 @@ struct metapage {
33 unsigned long flag; /* See Below */ 33 unsigned long flag; /* See Below */
34 unsigned long count; /* Reference count */ 34 unsigned long count; /* Reference count */
35 void *data; /* Data pointer */ 35 void *data; /* Data pointer */
36 sector_t index; /* block address of page */ 36 sector_t index; /* block address of page */
37 wait_queue_head_t wait; 37 wait_queue_head_t wait;
38 38
39 /* implementation */ 39 /* implementation */
diff --git a/fs/jfs/jfs_mount.c b/fs/jfs/jfs_mount.c
index 032d111bc330..4dd479834897 100644
--- a/fs/jfs/jfs_mount.c
+++ b/fs/jfs/jfs_mount.c
@@ -3,16 +3,16 @@
3 * 3 *
4 * This program is free software; you can redistribute it and/or modify 4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by 5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or 6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version. 7 * (at your option) any later version.
8 * 8 *
9 * This program is distributed in the hope that it will be useful, 9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
12 * the GNU General Public License for more details. 12 * the GNU General Public License for more details.
13 * 13 *
14 * You should have received a copy of the GNU General Public License 14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software 15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 */ 17 */
18 18
@@ -21,18 +21,18 @@
21 * 21 *
22 * note: file system in transition to aggregate/fileset: 22 * note: file system in transition to aggregate/fileset:
23 * 23 *
24 * file system mount is interpreted as the mount of aggregate, 24 * file system mount is interpreted as the mount of aggregate,
25 * if not already mounted, and mount of the single/only fileset in 25 * if not already mounted, and mount of the single/only fileset in
26 * the aggregate; 26 * the aggregate;
27 * 27 *
28 * a file system/aggregate is represented by an internal inode 28 * a file system/aggregate is represented by an internal inode
29 * (aka mount inode) initialized with aggregate superblock; 29 * (aka mount inode) initialized with aggregate superblock;
30 * each vfs represents a fileset, and points to its "fileset inode 30 * each vfs represents a fileset, and points to its "fileset inode
31 * allocation map inode" (aka fileset inode): 31 * allocation map inode" (aka fileset inode):
32 * (an aggregate itself is structured recursively as a filset: 32 * (an aggregate itself is structured recursively as a filset:
33 * an internal vfs is constructed and points to its "fileset inode 33 * an internal vfs is constructed and points to its "fileset inode
34 * allocation map inode" (aka aggregate inode) where each inode 34 * allocation map inode" (aka aggregate inode) where each inode
35 * represents a fileset inode) so that inode number is mapped to 35 * represents a fileset inode) so that inode number is mapped to
36 * on-disk inode in uniform way at both aggregate and fileset level; 36 * on-disk inode in uniform way at both aggregate and fileset level;
37 * 37 *
38 * each vnode/inode of a fileset is linked to its vfs (to facilitate 38 * each vnode/inode of a fileset is linked to its vfs (to facilitate
@@ -41,7 +41,7 @@
41 * per aggregate information, e.g., block size, etc.) as well as 41 * per aggregate information, e.g., block size, etc.) as well as
42 * its file set inode. 42 * its file set inode.
43 * 43 *
44 * aggregate 44 * aggregate
45 * ipmnt 45 * ipmnt
46 * mntvfs -> fileset ipimap+ -> aggregate ipbmap -> aggregate ipaimap; 46 * mntvfs -> fileset ipimap+ -> aggregate ipbmap -> aggregate ipaimap;
47 * fileset vfs -> vp(1) <-> ... <-> vp(n) <->vproot; 47 * fileset vfs -> vp(1) <-> ... <-> vp(n) <->vproot;
@@ -88,7 +88,7 @@ int jfs_mount(struct super_block *sb)
88 struct inode *ipbmap = NULL; 88 struct inode *ipbmap = NULL;
89 89
90 /* 90 /*
91 * read/validate superblock 91 * read/validate superblock
92 * (initialize mount inode from the superblock) 92 * (initialize mount inode from the superblock)
93 */ 93 */
94 if ((rc = chkSuper(sb))) { 94 if ((rc = chkSuper(sb))) {
@@ -238,7 +238,7 @@ int jfs_mount(struct super_block *sb)
238 */ 238 */
239int jfs_mount_rw(struct super_block *sb, int remount) 239int jfs_mount_rw(struct super_block *sb, int remount)
240{ 240{
241 struct jfs_sb_info *sbi = JFS_SBI(sb); 241 struct jfs_sb_info *sbi = JFS_SBI(sb);
242 int rc; 242 int rc;
243 243
244 /* 244 /*
@@ -291,7 +291,7 @@ int jfs_mount_rw(struct super_block *sb, int remount)
291/* 291/*
292 * chkSuper() 292 * chkSuper()
293 * 293 *
294 * validate the superblock of the file system to be mounted and 294 * validate the superblock of the file system to be mounted and
295 * get the file system parameters. 295 * get the file system parameters.
296 * 296 *
297 * returns 297 * returns
@@ -426,7 +426,7 @@ int updateSuper(struct super_block *sb, uint state)
426 jfs_err("updateSuper: bad state"); 426 jfs_err("updateSuper: bad state");
427 } else if (sbi->state == FM_DIRTY) 427 } else if (sbi->state == FM_DIRTY)
428 return 0; 428 return 0;
429 429
430 if ((rc = readSuper(sb, &bh))) 430 if ((rc = readSuper(sb, &bh)))
431 return rc; 431 return rc;
432 432
@@ -486,9 +486,9 @@ int readSuper(struct super_block *sb, struct buffer_head **bpp)
486 * for this file system past this point in log. 486 * for this file system past this point in log.
487 * it is harmless if mount fails. 487 * it is harmless if mount fails.
488 * 488 *
489 * note: MOUNT record is at aggregate level, not at fileset level, 489 * note: MOUNT record is at aggregate level, not at fileset level,
490 * since log records of previous mounts of a fileset 490 * since log records of previous mounts of a fileset
491 * (e.g., AFTER record of extent allocation) have to be processed 491 * (e.g., AFTER record of extent allocation) have to be processed
492 * to update block allocation map at aggregate level. 492 * to update block allocation map at aggregate level.
493 */ 493 */
494static int logMOUNT(struct super_block *sb) 494static int logMOUNT(struct super_block *sb)
diff --git a/fs/jfs/jfs_superblock.h b/fs/jfs/jfs_superblock.h
index 682cf1a68a18..884fc21ab8ee 100644
--- a/fs/jfs/jfs_superblock.h
+++ b/fs/jfs/jfs_superblock.h
@@ -3,16 +3,16 @@
3 * 3 *
4 * This program is free software; you can redistribute it and/or modify 4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by 5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or 6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version. 7 * (at your option) any later version.
8 * 8 *
9 * This program is distributed in the hope that it will be useful, 9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
12 * the GNU General Public License for more details. 12 * the GNU General Public License for more details.
13 * 13 *
14 * You should have received a copy of the GNU General Public License 14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software 15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 */ 17 */
18#ifndef _H_JFS_SUPERBLOCK 18#ifndef _H_JFS_SUPERBLOCK
@@ -21,14 +21,14 @@
21/* 21/*
22 * make the magic number something a human could read 22 * make the magic number something a human could read
23 */ 23 */
24#define JFS_MAGIC "JFS1" /* Magic word */ 24#define JFS_MAGIC "JFS1" /* Magic word */
25 25
26#define JFS_VERSION 2 /* Version number: Version 2 */ 26#define JFS_VERSION 2 /* Version number: Version 2 */
27 27
28#define LV_NAME_SIZE 11 /* MUST BE 11 for OS/2 boot sector */ 28#define LV_NAME_SIZE 11 /* MUST BE 11 for OS/2 boot sector */
29 29
30/* 30/*
31 * aggregate superblock 31 * aggregate superblock
32 * 32 *
33 * The name superblock is too close to super_block, so the name has been 33 * The name superblock is too close to super_block, so the name has been
34 * changed to jfs_superblock. The utilities are still using the old name. 34 * changed to jfs_superblock. The utilities are still using the old name.
@@ -40,7 +40,7 @@ struct jfs_superblock {
40 __le64 s_size; /* 8: aggregate size in hardware/LVM blocks; 40 __le64 s_size; /* 8: aggregate size in hardware/LVM blocks;
41 * VFS: number of blocks 41 * VFS: number of blocks
42 */ 42 */
43 __le32 s_bsize; /* 4: aggregate block size in bytes; 43 __le32 s_bsize; /* 4: aggregate block size in bytes;
44 * VFS: fragment size 44 * VFS: fragment size
45 */ 45 */
46 __le16 s_l2bsize; /* 2: log2 of s_bsize */ 46 __le16 s_l2bsize; /* 2: log2 of s_bsize */
@@ -54,7 +54,7 @@ struct jfs_superblock {
54 __le32 s_flag; /* 4: aggregate attributes: 54 __le32 s_flag; /* 4: aggregate attributes:
55 * see jfs_filsys.h 55 * see jfs_filsys.h
56 */ 56 */
57 __le32 s_state; /* 4: mount/unmount/recovery state: 57 __le32 s_state; /* 4: mount/unmount/recovery state:
58 * see jfs_filsys.h 58 * see jfs_filsys.h
59 */ 59 */
60 __le32 s_compress; /* 4: > 0 if data compression */ 60 __le32 s_compress; /* 4: > 0 if data compression */
@@ -75,11 +75,11 @@ struct jfs_superblock {
75 struct timestruc_t s_time; /* 8: time last updated */ 75 struct timestruc_t s_time; /* 8: time last updated */
76 76
77 __le32 s_fsckloglen; /* 4: Number of filesystem blocks reserved for 77 __le32 s_fsckloglen; /* 4: Number of filesystem blocks reserved for
78 * the fsck service log. 78 * the fsck service log.
79 * N.B. These blocks are divided among the 79 * N.B. These blocks are divided among the
80 * versions kept. This is not a per 80 * versions kept. This is not a per
81 * version size. 81 * version size.
82 * N.B. These blocks are included in the 82 * N.B. These blocks are included in the
83 * length field of s_fsckpxd. 83 * length field of s_fsckpxd.
84 */ 84 */
85 s8 s_fscklog; /* 1: which fsck service log is most recent 85 s8 s_fscklog; /* 1: which fsck service log is most recent
@@ -87,7 +87,7 @@ struct jfs_superblock {
87 * 1 => the first one 87 * 1 => the first one
88 * 2 => the 2nd one 88 * 2 => the 2nd one
89 */ 89 */
90 char s_fpack[11]; /* 11: file system volume name 90 char s_fpack[11]; /* 11: file system volume name
91 * N.B. This must be 11 bytes to 91 * N.B. This must be 11 bytes to
92 * conform with the OS/2 BootSector 92 * conform with the OS/2 BootSector
93 * requirements 93 * requirements
diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c
index ebfa6c061d78..81f6f04af192 100644
--- a/fs/jfs/jfs_txnmgr.c
+++ b/fs/jfs/jfs_txnmgr.c
@@ -4,16 +4,16 @@
4 * 4 *
5 * This program is free software; you can redistribute it and/or modify 5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by 6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or 7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version. 8 * (at your option) any later version.
9 * 9 *
10 * This program is distributed in the hope that it will be useful, 10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
13 * the GNU General Public License for more details. 13 * the GNU General Public License for more details.
14 * 14 *
15 * You should have received a copy of the GNU General Public License 15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software 16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 */ 18 */
19 19
@@ -2026,8 +2026,6 @@ static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
2026 * truncate entry XAD[twm == next - 1]: 2026 * truncate entry XAD[twm == next - 1]:
2027 */ 2027 */
2028 if (twm == next - 1) { 2028 if (twm == next - 1) {
2029 struct pxd_lock *pxdlock;
2030
2031 /* format a maplock for txUpdateMap() to update bmap 2029 /* format a maplock for txUpdateMap() to update bmap
2032 * to free truncated delta extent of the truncated 2030 * to free truncated delta extent of the truncated
2033 * entry XAD[next - 1]; 2031 * entry XAD[next - 1];
diff --git a/fs/jfs/jfs_txnmgr.h b/fs/jfs/jfs_txnmgr.h
index 0e4dc4514c47..7863cf21afca 100644
--- a/fs/jfs/jfs_txnmgr.h
+++ b/fs/jfs/jfs_txnmgr.h
@@ -3,16 +3,16 @@
3 * 3 *
4 * This program is free software; you can redistribute it and/or modify 4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by 5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or 6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version. 7 * (at your option) any later version.
8 * 8 *
9 * This program is distributed in the hope that it will be useful, 9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
12 * the GNU General Public License for more details. 12 * the GNU General Public License for more details.
13 * 13 *
14 * You should have received a copy of the GNU General Public License 14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software 15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 */ 17 */
18#ifndef _H_JFS_TXNMGR 18#ifndef _H_JFS_TXNMGR
@@ -179,7 +179,7 @@ struct linelock {
179 /* (8) */ 179 /* (8) */
180 180
181 struct lv lv[20]; /* 40: */ 181 struct lv lv[20]; /* 40: */
182}; /* (48) */ 182}; /* (48) */
183 183
184#define dt_lock linelock 184#define dt_lock linelock
185 185
@@ -211,8 +211,8 @@ struct xtlock {
211 * at tlock.lock/linelock: watch for alignment; 211 * at tlock.lock/linelock: watch for alignment;
212 * N.B. next field may be set by linelock, and should not 212 * N.B. next field may be set by linelock, and should not
213 * be modified by maplock; 213 * be modified by maplock;
214 * N.B. index of the first pxdlock specifies index of next 214 * N.B. index of the first pxdlock specifies index of next
215 * free maplock (i.e., number of maplock) in the tlock; 215 * free maplock (i.e., number of maplock) in the tlock;
216 */ 216 */
217struct maplock { 217struct maplock {
218 lid_t next; /* 2: */ 218 lid_t next; /* 2: */
diff --git a/fs/jfs/jfs_umount.c b/fs/jfs/jfs_umount.c
index 21eaf7ac0fcb..a386f48c73fc 100644
--- a/fs/jfs/jfs_umount.c
+++ b/fs/jfs/jfs_umount.c
@@ -3,16 +3,16 @@
3 * 3 *
4 * This program is free software; you can redistribute it and/or modify 4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by 5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or 6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version. 7 * (at your option) any later version.
8 * 8 *
9 * This program is distributed in the hope that it will be useful, 9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
12 * the GNU General Public License for more details. 12 * the GNU General Public License for more details.
13 * 13 *
14 * You should have received a copy of the GNU General Public License 14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software 15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 */ 17 */
18 18
@@ -22,8 +22,8 @@
22 * note: file system in transition to aggregate/fileset: 22 * note: file system in transition to aggregate/fileset:
23 * (ref. jfs_mount.c) 23 * (ref. jfs_mount.c)
24 * 24 *
25 * file system unmount is interpreted as mount of the single/only 25 * file system unmount is interpreted as mount of the single/only
26 * fileset in the aggregate and, if unmount of the last fileset, 26 * fileset in the aggregate and, if unmount of the last fileset,
27 * as unmount of the aggerate; 27 * as unmount of the aggerate;
28 */ 28 */
29 29
@@ -60,13 +60,13 @@ int jfs_umount(struct super_block *sb)
60 jfs_info("UnMount JFS: sb:0x%p", sb); 60 jfs_info("UnMount JFS: sb:0x%p", sb);
61 61
62 /* 62 /*
63 * update superblock and close log 63 * update superblock and close log
64 * 64 *
65 * if mounted read-write and log based recovery was enabled 65 * if mounted read-write and log based recovery was enabled
66 */ 66 */
67 if ((log = sbi->log)) 67 if ((log = sbi->log))
68 /* 68 /*
69 * Wait for outstanding transactions to be written to log: 69 * Wait for outstanding transactions to be written to log:
70 */ 70 */
71 jfs_flush_journal(log, 2); 71 jfs_flush_journal(log, 2);
72 72
@@ -112,17 +112,17 @@ int jfs_umount(struct super_block *sb)
112 112
113 /* 113 /*
114 * ensure all file system file pages are propagated to their 114 * ensure all file system file pages are propagated to their
115 * home blocks on disk (and their in-memory buffer pages are 115 * home blocks on disk (and their in-memory buffer pages are
116 * invalidated) BEFORE updating file system superblock state 116 * invalidated) BEFORE updating file system superblock state
117 * (to signify file system is unmounted cleanly, and thus in 117 * (to signify file system is unmounted cleanly, and thus in
118 * consistent state) and log superblock active file system 118 * consistent state) and log superblock active file system
119 * list (to signify skip logredo()). 119 * list (to signify skip logredo()).
120 */ 120 */
121 if (log) { /* log = NULL if read-only mount */ 121 if (log) { /* log = NULL if read-only mount */
122 updateSuper(sb, FM_CLEAN); 122 updateSuper(sb, FM_CLEAN);
123 123
124 /* 124 /*
125 * close log: 125 * close log:
126 * 126 *
127 * remove file system from log active file system list. 127 * remove file system from log active file system list.
128 */ 128 */
@@ -142,7 +142,7 @@ int jfs_umount_rw(struct super_block *sb)
142 return 0; 142 return 0;
143 143
144 /* 144 /*
145 * close log: 145 * close log:
146 * 146 *
147 * remove file system from log active file system list. 147 * remove file system from log active file system list.
148 */ 148 */
diff --git a/fs/jfs/jfs_unicode.c b/fs/jfs/jfs_unicode.c
index f327decfb155..c7de6f5bbefc 100644
--- a/fs/jfs/jfs_unicode.c
+++ b/fs/jfs/jfs_unicode.c
@@ -3,16 +3,16 @@
3 * 3 *
4 * This program is free software; you can redistribute it and/or modify 4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by 5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or 6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version. 7 * (at your option) any later version.
8 * 8 *
9 * This program is distributed in the hope that it will be useful, 9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
12 * the GNU General Public License for more details. 12 * the GNU General Public License for more details.
13 * 13 *
14 * You should have received a copy of the GNU General Public License 14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software 15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 */ 17 */
18 18
@@ -57,8 +57,8 @@ int jfs_strfromUCS_le(char *to, const __le16 * from,
57 warn--; 57 warn--;
58 warn_again--; 58 warn_again--;
59 printk(KERN_ERR 59 printk(KERN_ERR
60 "non-latin1 character 0x%x found in JFS file name\n", 60 "non-latin1 character 0x%x found in JFS file name\n",
61 le16_to_cpu(from[i])); 61 le16_to_cpu(from[i]));
62 printk(KERN_ERR 62 printk(KERN_ERR
63 "mount with iocharset=utf8 to access\n"); 63 "mount with iocharset=utf8 to access\n");
64 } 64 }
@@ -124,7 +124,7 @@ int get_UCSname(struct component_name * uniName, struct dentry *dentry)
124 kmalloc((length + 1) * sizeof(wchar_t), GFP_NOFS); 124 kmalloc((length + 1) * sizeof(wchar_t), GFP_NOFS);
125 125
126 if (uniName->name == NULL) 126 if (uniName->name == NULL)
127 return -ENOSPC; 127 return -ENOMEM;
128 128
129 uniName->namlen = jfs_strtoUCS(uniName->name, dentry->d_name.name, 129 uniName->namlen = jfs_strtoUCS(uniName->name, dentry->d_name.name,
130 length, nls_tab); 130 length, nls_tab);
diff --git a/fs/jfs/jfs_unicode.h b/fs/jfs/jfs_unicode.h
index 69e25ebe87ac..3fbb3a225590 100644
--- a/fs/jfs/jfs_unicode.h
+++ b/fs/jfs/jfs_unicode.h
@@ -1,19 +1,19 @@
1/* 1/*
2 * Copyright (c) International Business Machines Corp., 2000-2002 2 * Copyright (C) International Business Machines Corp., 2000-2002
3 * Portions Copyright (c) Christoph Hellwig, 2001-2002 3 * Portions Copyright (C) Christoph Hellwig, 2001-2002
4 * 4 *
5 * This program is free software; you can redistribute it and/or modify 5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by 6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or 7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version. 8 * (at your option) any later version.
9 * 9 *
10 * This program is distributed in the hope that it will be useful, 10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
13 * the GNU General Public License for more details. 13 * the GNU General Public License for more details.
14 * 14 *
15 * You should have received a copy of the GNU General Public License 15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software 16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 */ 18 */
19#ifndef _H_JFS_UNICODE 19#ifndef _H_JFS_UNICODE
diff --git a/fs/jfs/jfs_uniupr.c b/fs/jfs/jfs_uniupr.c
index 4ab185d26308..cfe50666d312 100644
--- a/fs/jfs/jfs_uniupr.c
+++ b/fs/jfs/jfs_uniupr.c
@@ -1,18 +1,18 @@
1/* 1/*
2 * Copyright (c) International Business Machines Corp., 2000-2002 2 * Copyright (C) International Business Machines Corp., 2000-2002
3 * 3 *
4 * This program is free software; you can redistribute it and/or modify 4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by 5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or 6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version. 7 * (at your option) any later version.
8 * 8 *
9 * This program is distributed in the hope that it will be useful, 9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
12 * the GNU General Public License for more details. 12 * the GNU General Public License for more details.
13 * 13 *
14 * You should have received a copy of the GNU General Public License 14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software 15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 */ 17 */
18 18
diff --git a/fs/jfs/jfs_xattr.h b/fs/jfs/jfs_xattr.h
index 25e9990bccd1..88b6cc535bf2 100644
--- a/fs/jfs/jfs_xattr.h
+++ b/fs/jfs/jfs_xattr.h
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) International Business Machines Corp., 2000-2002 2 * Copyright (C) International Business Machines Corp., 2000-2002
3 * 3 *
4 * This program is free software; you can redistribute it and/or modify 4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by 5 * it under the terms of the GNU General Public License as published by
diff --git a/fs/jfs/jfs_xtree.c b/fs/jfs/jfs_xtree.c
index c92307d3a57e..e98eb03e5310 100644
--- a/fs/jfs/jfs_xtree.c
+++ b/fs/jfs/jfs_xtree.c
@@ -3,16 +3,16 @@
3 * 3 *
4 * This program is free software; you can redistribute it and/or modify 4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by 5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or 6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version. 7 * (at your option) any later version.
8 * 8 *
9 * This program is distributed in the hope that it will be useful, 9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
12 * the GNU General Public License for more details. 12 * the GNU General Public License for more details.
13 * 13 *
14 * You should have received a copy of the GNU General Public License 14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software 15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 */ 17 */
18/* 18/*
@@ -2428,7 +2428,7 @@ printf("xtUpdate.updateLeft.split p:0x%p\n", p);
2428 * return: 2428 * return:
2429 */ 2429 */
2430int xtAppend(tid_t tid, /* transaction id */ 2430int xtAppend(tid_t tid, /* transaction id */
2431 struct inode *ip, int xflag, s64 xoff, s32 maxblocks, 2431 struct inode *ip, int xflag, s64 xoff, s32 maxblocks,
2432 s32 * xlenp, /* (in/out) */ 2432 s32 * xlenp, /* (in/out) */
2433 s64 * xaddrp, /* (in/out) */ 2433 s64 * xaddrp, /* (in/out) */
2434 int flag) 2434 int flag)
@@ -2499,7 +2499,7 @@ int xtAppend(tid_t tid, /* transaction id */
2499 pxdlist.maxnpxd = pxdlist.npxd = 0; 2499 pxdlist.maxnpxd = pxdlist.npxd = 0;
2500 pxd = &pxdlist.pxd[0]; 2500 pxd = &pxdlist.pxd[0];
2501 nblocks = JFS_SBI(ip->i_sb)->nbperpage; 2501 nblocks = JFS_SBI(ip->i_sb)->nbperpage;
2502 for (; nsplit > 0; nsplit--, pxd++, xaddr += nblocks, maxblocks -= nblocks) { 2502 for (; nsplit > 0; nsplit--, pxd++, xaddr += nblocks, maxblocks -= nblocks) {
2503 if ((rc = dbAllocBottomUp(ip, xaddr, (s64) nblocks)) == 0) { 2503 if ((rc = dbAllocBottomUp(ip, xaddr, (s64) nblocks)) == 0) {
2504 PXDaddress(pxd, xaddr); 2504 PXDaddress(pxd, xaddr);
2505 PXDlength(pxd, nblocks); 2505 PXDlength(pxd, nblocks);
@@ -2514,7 +2514,7 @@ int xtAppend(tid_t tid, /* transaction id */
2514 goto out; 2514 goto out;
2515 } 2515 }
2516 2516
2517 xlen = min(xlen, maxblocks); 2517 xlen = min(xlen, maxblocks);
2518 2518
2519 /* 2519 /*
2520 * allocate data extent requested 2520 * allocate data extent requested
diff --git a/fs/jfs/jfs_xtree.h b/fs/jfs/jfs_xtree.h
index af668a80b40f..164f6f2b1019 100644
--- a/fs/jfs/jfs_xtree.h
+++ b/fs/jfs/jfs_xtree.h
@@ -1,18 +1,18 @@
1/* 1/*
2 * Copyright (c) International Business Machines Corp., 2000-2002 2 * Copyright (C) International Business Machines Corp., 2000-2002
3 * 3 *
4 * This program is free software; you can redistribute it and/or modify 4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by 5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or 6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version. 7 * (at your option) any later version.
8 * 8 *
9 * This program is distributed in the hope that it will be useful, 9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
12 * the GNU General Public License for more details. 12 * the GNU General Public License for more details.
13 * 13 *
14 * You should have received a copy of the GNU General Public License 14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software 15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 */ 17 */
18#ifndef _H_JFS_XTREE 18#ifndef _H_JFS_XTREE
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c
index b8d16a6aa88f..a6a8c16c872c 100644
--- a/fs/jfs/namei.c
+++ b/fs/jfs/namei.c
@@ -4,16 +4,16 @@
4 * 4 *
5 * This program is free software; you can redistribute it and/or modify 5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by 6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or 7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version. 8 * (at your option) any later version.
9 * 9 *
10 * This program is distributed in the hope that it will be useful, 10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
13 * the GNU General Public License for more details. 13 * the GNU General Public License for more details.
14 * 14 *
15 * You should have received a copy of the GNU General Public License 15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software 16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 */ 18 */
19 19
@@ -41,7 +41,7 @@ static s64 commitZeroLink(tid_t, struct inode *);
41/* 41/*
42 * NAME: free_ea_wmap(inode) 42 * NAME: free_ea_wmap(inode)
43 * 43 *
44 * FUNCTION: free uncommitted extended attributes from working map 44 * FUNCTION: free uncommitted extended attributes from working map
45 * 45 *
46 */ 46 */
47static inline void free_ea_wmap(struct inode *inode) 47static inline void free_ea_wmap(struct inode *inode)
@@ -62,7 +62,7 @@ static inline void free_ea_wmap(struct inode *inode)
62 * FUNCTION: create a regular file in the parent directory <dip> 62 * FUNCTION: create a regular file in the parent directory <dip>
63 * with name = <from dentry> and mode = <mode> 63 * with name = <from dentry> and mode = <mode>
64 * 64 *
65 * PARAMETER: dip - parent directory vnode 65 * PARAMETER: dip - parent directory vnode
66 * dentry - dentry of new file 66 * dentry - dentry of new file
67 * mode - create mode (rwxrwxrwx). 67 * mode - create mode (rwxrwxrwx).
68 * nd- nd struct 68 * nd- nd struct
@@ -97,8 +97,8 @@ static int jfs_create(struct inode *dip, struct dentry *dentry, int mode,
97 * begin the transaction before we search the directory. 97 * begin the transaction before we search the directory.
98 */ 98 */
99 ip = ialloc(dip, mode); 99 ip = ialloc(dip, mode);
100 if (ip == NULL) { 100 if (IS_ERR(ip)) {
101 rc = -ENOSPC; 101 rc = PTR_ERR(ip);
102 goto out2; 102 goto out2;
103 } 103 }
104 104
@@ -190,7 +190,7 @@ static int jfs_create(struct inode *dip, struct dentry *dentry, int mode,
190 * FUNCTION: create a child directory in the parent directory <dip> 190 * FUNCTION: create a child directory in the parent directory <dip>
191 * with name = <from dentry> and mode = <mode> 191 * with name = <from dentry> and mode = <mode>
192 * 192 *
193 * PARAMETER: dip - parent directory vnode 193 * PARAMETER: dip - parent directory vnode
194 * dentry - dentry of child directory 194 * dentry - dentry of child directory
195 * mode - create mode (rwxrwxrwx). 195 * mode - create mode (rwxrwxrwx).
196 * 196 *
@@ -231,8 +231,8 @@ static int jfs_mkdir(struct inode *dip, struct dentry *dentry, int mode)
231 * begin the transaction before we search the directory. 231 * begin the transaction before we search the directory.
232 */ 232 */
233 ip = ialloc(dip, S_IFDIR | mode); 233 ip = ialloc(dip, S_IFDIR | mode);
234 if (ip == NULL) { 234 if (IS_ERR(ip)) {
235 rc = -ENOSPC; 235 rc = PTR_ERR(ip);
236 goto out2; 236 goto out2;
237 } 237 }
238 238
@@ -324,7 +324,7 @@ static int jfs_mkdir(struct inode *dip, struct dentry *dentry, int mode)
324 * 324 *
325 * FUNCTION: remove a link to child directory 325 * FUNCTION: remove a link to child directory
326 * 326 *
327 * PARAMETER: dip - parent inode 327 * PARAMETER: dip - parent inode
328 * dentry - child directory dentry 328 * dentry - child directory dentry
329 * 329 *
330 * RETURN: -EINVAL - if name is . or .. 330 * RETURN: -EINVAL - if name is . or ..
@@ -332,10 +332,10 @@ static int jfs_mkdir(struct inode *dip, struct dentry *dentry, int mode)
332 * errors from subroutines 332 * errors from subroutines
333 * 333 *
334 * note: 334 * note:
335 * if other threads have the directory open when the last link 335 * if other threads have the directory open when the last link
336 * is removed, the "." and ".." entries, if present, are removed before 336 * is removed, the "." and ".." entries, if present, are removed before
337 * rmdir() returns and no new entries may be created in the directory, 337 * rmdir() returns and no new entries may be created in the directory,
338 * but the directory is not removed until the last reference to 338 * but the directory is not removed until the last reference to
339 * the directory is released (cf.unlink() of regular file). 339 * the directory is released (cf.unlink() of regular file).
340 */ 340 */
341static int jfs_rmdir(struct inode *dip, struct dentry *dentry) 341static int jfs_rmdir(struct inode *dip, struct dentry *dentry)
@@ -446,11 +446,11 @@ static int jfs_rmdir(struct inode *dip, struct dentry *dentry)
446/* 446/*
447 * NAME: jfs_unlink(dip, dentry) 447 * NAME: jfs_unlink(dip, dentry)
448 * 448 *
449 * FUNCTION: remove a link to object <vp> named by <name> 449 * FUNCTION: remove a link to object <vp> named by <name>
450 * from parent directory <dvp> 450 * from parent directory <dvp>
451 * 451 *
452 * PARAMETER: dip - inode of parent directory 452 * PARAMETER: dip - inode of parent directory
453 * dentry - dentry of object to be removed 453 * dentry - dentry of object to be removed
454 * 454 *
455 * RETURN: errors from subroutines 455 * RETURN: errors from subroutines
456 * 456 *
@@ -598,7 +598,7 @@ static int jfs_unlink(struct inode *dip, struct dentry *dentry)
598 * 598 *
599 * FUNCTION: for non-directory, called by jfs_remove(), 599 * FUNCTION: for non-directory, called by jfs_remove(),
600 * truncate a regular file, directory or symbolic 600 * truncate a regular file, directory or symbolic
601 * link to zero length. return 0 if type is not 601 * link to zero length. return 0 if type is not
602 * one of these. 602 * one of these.
603 * 603 *
604 * if the file is currently associated with a VM segment 604 * if the file is currently associated with a VM segment
@@ -608,7 +608,7 @@ static int jfs_unlink(struct inode *dip, struct dentry *dentry)
608 * map by ctrunc1. 608 * map by ctrunc1.
609 * if there is no VM segment on entry, the resources are 609 * if there is no VM segment on entry, the resources are
610 * freed in both work and permanent map. 610 * freed in both work and permanent map.
611 * (? for temporary file - memory object is cached even 611 * (? for temporary file - memory object is cached even
612 * after no reference: 612 * after no reference:
613 * reference count > 0 - ) 613 * reference count > 0 - )
614 * 614 *
@@ -662,7 +662,7 @@ static s64 commitZeroLink(tid_t tid, struct inode *ip)
662 662
663 /* 663 /*
664 * free xtree/data (truncate to zero length): 664 * free xtree/data (truncate to zero length):
665 * free xtree/data pages from cache if COMMIT_PWMAP, 665 * free xtree/data pages from cache if COMMIT_PWMAP,
666 * free xtree/data blocks from persistent block map, and 666 * free xtree/data blocks from persistent block map, and
667 * free xtree/data blocks from working block map if COMMIT_PWMAP; 667 * free xtree/data blocks from working block map if COMMIT_PWMAP;
668 */ 668 */
@@ -677,7 +677,7 @@ static s64 commitZeroLink(tid_t tid, struct inode *ip)
677 * NAME: jfs_free_zero_link() 677 * NAME: jfs_free_zero_link()
678 * 678 *
679 * FUNCTION: for non-directory, called by iClose(), 679 * FUNCTION: for non-directory, called by iClose(),
680 * free resources of a file from cache and WORKING map 680 * free resources of a file from cache and WORKING map
681 * for a file previously committed with zero link count 681 * for a file previously committed with zero link count
682 * while associated with a pager object, 682 * while associated with a pager object,
683 * 683 *
@@ -762,7 +762,7 @@ void jfs_free_zero_link(struct inode *ip)
762 * FUNCTION: create a link to <vp> by the name = <name> 762 * FUNCTION: create a link to <vp> by the name = <name>
763 * in the parent directory <dvp> 763 * in the parent directory <dvp>
764 * 764 *
765 * PARAMETER: vp - target object 765 * PARAMETER: vp - target object
766 * dvp - parent directory of new link 766 * dvp - parent directory of new link
767 * name - name of new link to target object 767 * name - name of new link to target object
768 * crp - credential 768 * crp - credential
@@ -858,8 +858,8 @@ static int jfs_link(struct dentry *old_dentry,
858 * in directory <dip> 858 * in directory <dip>
859 * 859 *
860 * PARAMETER: dip - parent directory vnode 860 * PARAMETER: dip - parent directory vnode
861 * dentry - dentry of symbolic link 861 * dentry - dentry of symbolic link
862 * name - the path name of the existing object 862 * name - the path name of the existing object
863 * that will be the source of the link 863 * that will be the source of the link
864 * 864 *
865 * RETURN: errors from subroutines 865 * RETURN: errors from subroutines
@@ -906,8 +906,8 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry,
906 * (iAlloc() returns new, locked inode) 906 * (iAlloc() returns new, locked inode)
907 */ 907 */
908 ip = ialloc(dip, S_IFLNK | 0777); 908 ip = ialloc(dip, S_IFLNK | 0777);
909 if (ip == NULL) { 909 if (IS_ERR(ip)) {
910 rc = -ENOSPC; 910 rc = PTR_ERR(ip);
911 goto out2; 911 goto out2;
912 } 912 }
913 913
@@ -926,7 +926,7 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry,
926 tblk->u.ixpxd = JFS_IP(ip)->ixpxd; 926 tblk->u.ixpxd = JFS_IP(ip)->ixpxd;
927 927
928 /* fix symlink access permission 928 /* fix symlink access permission
929 * (dir_create() ANDs in the u.u_cmask, 929 * (dir_create() ANDs in the u.u_cmask,
930 * but symlinks really need to be 777 access) 930 * but symlinks really need to be 777 access)
931 */ 931 */
932 ip->i_mode |= 0777; 932 ip->i_mode |= 0777;
@@ -967,7 +967,7 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry,
967 ip->i_mapping->a_ops = &jfs_aops; 967 ip->i_mapping->a_ops = &jfs_aops;
968 968
969 /* 969 /*
970 * even though the data of symlink object (source 970 * even though the data of symlink object (source
971 * path name) is treated as non-journaled user data, 971 * path name) is treated as non-journaled user data,
972 * it is read/written thru buffer cache for performance. 972 * it is read/written thru buffer cache for performance.
973 */ 973 */
@@ -978,7 +978,6 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry,
978 xlen = xsize >> JFS_SBI(sb)->l2bsize; 978 xlen = xsize >> JFS_SBI(sb)->l2bsize;
979 if ((rc = xtInsert(tid, ip, 0, 0, xlen, &xaddr, 0))) { 979 if ((rc = xtInsert(tid, ip, 0, 0, xlen, &xaddr, 0))) {
980 txAbort(tid, 0); 980 txAbort(tid, 0);
981 rc = -ENOSPC;
982 goto out3; 981 goto out3;
983 } 982 }
984 extent = xaddr; 983 extent = xaddr;
@@ -1176,7 +1175,7 @@ static int jfs_rename(struct inode *old_dir, struct dentry *old_dentry,
1176 /* free block resources */ 1175 /* free block resources */
1177 if ((new_size = commitZeroLink(tid, new_ip)) < 0) { 1176 if ((new_size = commitZeroLink(tid, new_ip)) < 0) {
1178 txAbort(tid, 1); /* Marks FS Dirty */ 1177 txAbort(tid, 1); /* Marks FS Dirty */
1179 rc = new_size; 1178 rc = new_size;
1180 goto out4; 1179 goto out4;
1181 } 1180 }
1182 tblk = tid_to_tblock(tid); 1181 tblk = tid_to_tblock(tid);
@@ -1292,7 +1291,7 @@ static int jfs_rename(struct inode *old_dir, struct dentry *old_dentry,
1292 new_size = xtTruncate_pmap(tid, new_ip, new_size); 1291 new_size = xtTruncate_pmap(tid, new_ip, new_size);
1293 if (new_size < 0) { 1292 if (new_size < 0) {
1294 txAbort(tid, 1); 1293 txAbort(tid, 1);
1295 rc = new_size; 1294 rc = new_size;
1296 } else 1295 } else
1297 rc = txCommit(tid, 1, &new_ip, COMMIT_SYNC); 1296 rc = txCommit(tid, 1, &new_ip, COMMIT_SYNC);
1298 txEnd(tid); 1297 txEnd(tid);
@@ -1350,8 +1349,8 @@ static int jfs_mknod(struct inode *dir, struct dentry *dentry,
1350 goto out; 1349 goto out;
1351 1350
1352 ip = ialloc(dir, mode); 1351 ip = ialloc(dir, mode);
1353 if (ip == NULL) { 1352 if (IS_ERR(ip)) {
1354 rc = -ENOSPC; 1353 rc = PTR_ERR(ip);
1355 goto out1; 1354 goto out1;
1356 } 1355 }
1357 jfs_ip = JFS_IP(ip); 1356 jfs_ip = JFS_IP(ip);
diff --git a/fs/jfs/resize.c b/fs/jfs/resize.c
index 45180361871c..79d625f3f733 100644
--- a/fs/jfs/resize.c
+++ b/fs/jfs/resize.c
@@ -3,16 +3,16 @@
3 * 3 *
4 * This program is free software; you can redistribute it and/or modify 4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by 5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or 6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version. 7 * (at your option) any later version.
8 * 8 *
9 * This program is distributed in the hope that it will be useful, 9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
12 * the GNU General Public License for more details. 12 * the GNU General Public License for more details.
13 * 13 *
14 * You should have received a copy of the GNU General Public License 14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software 15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17*/ 17*/
18 18
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 143bcd1d5eaa..9c1c6e0e633d 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -4,16 +4,16 @@
4 * 4 *
5 * This program is free software; you can redistribute it and/or modify 5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by 6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or 7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version. 8 * (at your option) any later version.
9 * 9 *
10 * This program is distributed in the hope that it will be useful, 10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
13 * the GNU General Public License for more details. 13 * the GNU General Public License for more details.
14 * 14 *
15 * You should have received a copy of the GNU General Public License 15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software 16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 */ 18 */
19 19
@@ -82,7 +82,7 @@ static void jfs_handle_error(struct super_block *sb)
82 "as read-only\n", 82 "as read-only\n",
83 sb->s_id); 83 sb->s_id);
84 sb->s_flags |= MS_RDONLY; 84 sb->s_flags |= MS_RDONLY;
85 } 85 }
86 86
87 /* nothing is done for continue beyond marking the superblock dirty */ 87 /* nothing is done for continue beyond marking the superblock dirty */
88} 88}
@@ -422,7 +422,7 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent)
422 422
423 sbi = kzalloc(sizeof (struct jfs_sb_info), GFP_KERNEL); 423 sbi = kzalloc(sizeof (struct jfs_sb_info), GFP_KERNEL);
424 if (!sbi) 424 if (!sbi)
425 return -ENOSPC; 425 return -ENOMEM;
426 sb->s_fs_info = sbi; 426 sb->s_fs_info = sbi;
427 sbi->sb = sb; 427 sbi->sb = sb;
428 sbi->uid = sbi->gid = sbi->umask = -1; 428 sbi->uid = sbi->gid = sbi->umask = -1;
@@ -775,7 +775,7 @@ static int __init init_jfs_fs(void)
775 int rc; 775 int rc;
776 776
777 jfs_inode_cachep = 777 jfs_inode_cachep =
778 kmem_cache_create("jfs_ip", sizeof(struct jfs_inode_info), 0, 778 kmem_cache_create("jfs_ip", sizeof(struct jfs_inode_info), 0,
779 SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD, 779 SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
780 init_once, NULL); 780 init_once, NULL);
781 if (jfs_inode_cachep == NULL) 781 if (jfs_inode_cachep == NULL)
diff --git a/fs/jfs/symlink.c b/fs/jfs/symlink.c
index 16477b3835e1..cee43f36f51d 100644
--- a/fs/jfs/symlink.c
+++ b/fs/jfs/symlink.c
@@ -3,16 +3,16 @@
3 * 3 *
4 * This program is free software; you can redistribute it and/or modify 4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by 5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or 6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version. 7 * (at your option) any later version.
8 * 8 *
9 * This program is distributed in the hope that it will be useful, 9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
12 * the GNU General Public License for more details. 12 * the GNU General Public License for more details.
13 * 13 *
14 * You should have received a copy of the GNU General Public License 14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software 15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 */ 17 */
18 18
diff --git a/fs/jfs/xattr.c b/fs/jfs/xattr.c
index 7a10e1928961..4c7985ebca92 100644
--- a/fs/jfs/xattr.c
+++ b/fs/jfs/xattr.c
@@ -4,16 +4,16 @@
4 * 4 *
5 * This program is free software; you can redistribute it and/or modify 5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by 6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or 7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version. 8 * (at your option) any later version.
9 * 9 *
10 * This program is distributed in the hope that it will be useful, 10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
13 * the GNU General Public License for more details. 13 * the GNU General Public License for more details.
14 * 14 *
15 * You should have received a copy of the GNU General Public License 15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software 16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 */ 18 */
19 19
@@ -57,7 +57,7 @@
57 * 57 *
58 * 0 4 4 + EA_SIZE(ea1) 58 * 0 4 4 + EA_SIZE(ea1)
59 * +------------+-------------------+--------------------+----- 59 * +------------+-------------------+--------------------+-----
60 * | Overall EA | First FEA Element | Second FEA Element | ..... 60 * | Overall EA | First FEA Element | Second FEA Element | .....
61 * | List Size | | | 61 * | List Size | | |
62 * +------------+-------------------+--------------------+----- 62 * +------------+-------------------+--------------------+-----
63 * 63 *
@@ -155,9 +155,9 @@ static void ea_release(struct inode *inode, struct ea_buffer *ea_buf);
155 155
156/* 156/*
157 * NAME: ea_write_inline 157 * NAME: ea_write_inline
158 * 158 *
159 * FUNCTION: Attempt to write an EA inline if area is available 159 * FUNCTION: Attempt to write an EA inline if area is available
160 * 160 *
161 * PRE CONDITIONS: 161 * PRE CONDITIONS:
162 * Already verified that the specified EA is small enough to fit inline 162 * Already verified that the specified EA is small enough to fit inline
163 * 163 *
@@ -216,10 +216,10 @@ static int ea_write_inline(struct inode *ip, struct jfs_ea_list *ealist,
216 216
217/* 217/*
218 * NAME: ea_write 218 * NAME: ea_write
219 * 219 *
220 * FUNCTION: Write an EA for an inode 220 * FUNCTION: Write an EA for an inode
221 * 221 *
222 * PRE CONDITIONS: EA has been verified 222 * PRE CONDITIONS: EA has been verified
223 * 223 *
224 * PARAMETERS: 224 * PARAMETERS:
225 * ip - Inode pointer 225 * ip - Inode pointer
@@ -340,9 +340,9 @@ static int ea_write(struct inode *ip, struct jfs_ea_list *ealist, int size,
340 340
341/* 341/*
342 * NAME: ea_read_inline 342 * NAME: ea_read_inline
343 * 343 *
344 * FUNCTION: Read an inlined EA into user's buffer 344 * FUNCTION: Read an inlined EA into user's buffer
345 * 345 *
346 * PARAMETERS: 346 * PARAMETERS:
347 * ip - Inode pointer 347 * ip - Inode pointer
348 * ealist - Pointer to buffer to fill in with EA 348 * ealist - Pointer to buffer to fill in with EA
@@ -372,9 +372,9 @@ static int ea_read_inline(struct inode *ip, struct jfs_ea_list *ealist)
372 372
373/* 373/*
374 * NAME: ea_read 374 * NAME: ea_read
375 * 375 *
376 * FUNCTION: copy EA data into user's buffer 376 * FUNCTION: copy EA data into user's buffer
377 * 377 *
378 * PARAMETERS: 378 * PARAMETERS:
379 * ip - Inode pointer 379 * ip - Inode pointer
380 * ealist - Pointer to buffer to fill in with EA 380 * ealist - Pointer to buffer to fill in with EA
@@ -406,7 +406,7 @@ static int ea_read(struct inode *ip, struct jfs_ea_list *ealist)
406 return -EIO; 406 return -EIO;
407 } 407 }
408 408
409 /* 409 /*
410 * Figure out how many blocks were allocated when this EA list was 410 * Figure out how many blocks were allocated when this EA list was
411 * originally written to disk. 411 * originally written to disk.
412 */ 412 */
@@ -443,14 +443,14 @@ static int ea_read(struct inode *ip, struct jfs_ea_list *ealist)
443 443
444/* 444/*
445 * NAME: ea_get 445 * NAME: ea_get
446 * 446 *
447 * FUNCTION: Returns buffer containing existing extended attributes. 447 * FUNCTION: Returns buffer containing existing extended attributes.
448 * The size of the buffer will be the larger of the existing 448 * The size of the buffer will be the larger of the existing
449 * attributes size, or min_size. 449 * attributes size, or min_size.
450 * 450 *
451 * The buffer, which may be inlined in the inode or in the 451 * The buffer, which may be inlined in the inode or in the
452 * page cache must be release by calling ea_release or ea_put 452 * page cache must be release by calling ea_release or ea_put
453 * 453 *
454 * PARAMETERS: 454 * PARAMETERS:
455 * inode - Inode pointer 455 * inode - Inode pointer
456 * ea_buf - Structure to be populated with ealist and its metadata 456 * ea_buf - Structure to be populated with ealist and its metadata
@@ -1054,7 +1054,7 @@ ssize_t jfs_listxattr(struct dentry * dentry, char *data, size_t buf_size)
1054 1054
1055 /* compute required size of list */ 1055 /* compute required size of list */
1056 for (ea = FIRST_EA(ealist); ea < END_EALIST(ealist); ea = NEXT_EA(ea)) { 1056 for (ea = FIRST_EA(ealist); ea < END_EALIST(ealist); ea = NEXT_EA(ea)) {
1057 if (can_list(ea)) 1057 if (can_list(ea))
1058 size += name_size(ea) + 1; 1058 size += name_size(ea) + 1;
1059 } 1059 }
1060 1060
@@ -1069,7 +1069,7 @@ ssize_t jfs_listxattr(struct dentry * dentry, char *data, size_t buf_size)
1069 /* Copy attribute names to buffer */ 1069 /* Copy attribute names to buffer */
1070 buffer = data; 1070 buffer = data;
1071 for (ea = FIRST_EA(ealist); ea < END_EALIST(ealist); ea = NEXT_EA(ea)) { 1071 for (ea = FIRST_EA(ealist); ea < END_EALIST(ealist); ea = NEXT_EA(ea)) {
1072 if (can_list(ea)) { 1072 if (can_list(ea)) {
1073 int namelen = copy_name(buffer, ea); 1073 int namelen = copy_name(buffer, ea);
1074 buffer += namelen + 1; 1074 buffer += namelen + 1;
1075 } 1075 }
diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c
index f95cc3f3c42d..e8c7765419e8 100644
--- a/fs/lockd/clntlock.c
+++ b/fs/lockd/clntlock.c
@@ -144,42 +144,12 @@ u32 nlmclnt_grant(const struct sockaddr_in *addr, const struct nlm_lock *lock)
144 */ 144 */
145 145
146/* 146/*
147 * Someone has sent us an SM_NOTIFY. Ensure we bind to the new port number,
148 * that we mark locks for reclaiming, and that we bump the pseudo NSM state.
149 */
150static void nlmclnt_prepare_reclaim(struct nlm_host *host)
151{
152 down_write(&host->h_rwsem);
153 host->h_monitored = 0;
154 host->h_state++;
155 host->h_nextrebind = 0;
156 nlm_rebind_host(host);
157
158 /*
159 * Mark the locks for reclaiming.
160 */
161 list_splice_init(&host->h_granted, &host->h_reclaim);
162
163 dprintk("NLM: reclaiming locks for host %s\n", host->h_name);
164}
165
166static void nlmclnt_finish_reclaim(struct nlm_host *host)
167{
168 host->h_reclaiming = 0;
169 up_write(&host->h_rwsem);
170 dprintk("NLM: done reclaiming locks for host %s", host->h_name);
171}
172
173/*
174 * Reclaim all locks on server host. We do this by spawning a separate 147 * Reclaim all locks on server host. We do this by spawning a separate
175 * reclaimer thread. 148 * reclaimer thread.
176 */ 149 */
177void 150void
178nlmclnt_recovery(struct nlm_host *host, u32 newstate) 151nlmclnt_recovery(struct nlm_host *host)
179{ 152{
180 if (host->h_nsmstate == newstate)
181 return;
182 host->h_nsmstate = newstate;
183 if (!host->h_reclaiming++) { 153 if (!host->h_reclaiming++) {
184 nlm_get_host(host); 154 nlm_get_host(host);
185 __module_get(THIS_MODULE); 155 __module_get(THIS_MODULE);
@@ -199,18 +169,30 @@ reclaimer(void *ptr)
199 daemonize("%s-reclaim", host->h_name); 169 daemonize("%s-reclaim", host->h_name);
200 allow_signal(SIGKILL); 170 allow_signal(SIGKILL);
201 171
172 down_write(&host->h_rwsem);
173
202 /* This one ensures that our parent doesn't terminate while the 174 /* This one ensures that our parent doesn't terminate while the
203 * reclaim is in progress */ 175 * reclaim is in progress */
204 lock_kernel(); 176 lock_kernel();
205 lockd_up(); 177 lockd_up(0); /* note: this cannot fail as lockd is already running */
178
179 dprintk("lockd: reclaiming locks for host %s", host->h_name);
206 180
207 nlmclnt_prepare_reclaim(host);
208 /* First, reclaim all locks that have been marked. */
209restart: 181restart:
210 nsmstate = host->h_nsmstate; 182 nsmstate = host->h_nsmstate;
183
184 /* Force a portmap getport - the peer's lockd will
185 * most likely end up on a different port.
186 */
187 host->h_nextrebind = jiffies;
188 nlm_rebind_host(host);
189
190 /* First, reclaim all locks that have been granted. */
191 list_splice_init(&host->h_granted, &host->h_reclaim);
211 list_for_each_entry_safe(fl, next, &host->h_reclaim, fl_u.nfs_fl.list) { 192 list_for_each_entry_safe(fl, next, &host->h_reclaim, fl_u.nfs_fl.list) {
212 list_del_init(&fl->fl_u.nfs_fl.list); 193 list_del_init(&fl->fl_u.nfs_fl.list);
213 194
195 /* Why are we leaking memory here? --okir */
214 if (signalled()) 196 if (signalled())
215 continue; 197 continue;
216 if (nlmclnt_reclaim(host, fl) != 0) 198 if (nlmclnt_reclaim(host, fl) != 0)
@@ -218,11 +200,13 @@ restart:
218 list_add_tail(&fl->fl_u.nfs_fl.list, &host->h_granted); 200 list_add_tail(&fl->fl_u.nfs_fl.list, &host->h_granted);
219 if (host->h_nsmstate != nsmstate) { 201 if (host->h_nsmstate != nsmstate) {
220 /* Argh! The server rebooted again! */ 202 /* Argh! The server rebooted again! */
221 list_splice_init(&host->h_granted, &host->h_reclaim);
222 goto restart; 203 goto restart;
223 } 204 }
224 } 205 }
225 nlmclnt_finish_reclaim(host); 206
207 host->h_reclaiming = 0;
208 up_write(&host->h_rwsem);
209 dprintk("NLM: done reclaiming locks for host %s", host->h_name);
226 210
227 /* Now, wake up all processes that sleep on a blocked lock */ 211 /* Now, wake up all processes that sleep on a blocked lock */
228 list_for_each_entry(block, &nlm_blocked, b_list) { 212 list_for_each_entry(block, &nlm_blocked, b_list) {
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index 271e2165fff6..3d84f600b633 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -36,14 +36,14 @@ static const struct rpc_call_ops nlmclnt_cancel_ops;
36/* 36/*
37 * Cookie counter for NLM requests 37 * Cookie counter for NLM requests
38 */ 38 */
39static u32 nlm_cookie = 0x1234; 39static atomic_t nlm_cookie = ATOMIC_INIT(0x1234);
40 40
41static inline void nlmclnt_next_cookie(struct nlm_cookie *c) 41void nlmclnt_next_cookie(struct nlm_cookie *c)
42{ 42{
43 memcpy(c->data, &nlm_cookie, 4); 43 u32 cookie = atomic_inc_return(&nlm_cookie);
44 memset(c->data+4, 0, 4); 44
45 memcpy(c->data, &cookie, 4);
45 c->len=4; 46 c->len=4;
46 nlm_cookie++;
47} 47}
48 48
49static struct nlm_lockowner *nlm_get_lockowner(struct nlm_lockowner *lockowner) 49static struct nlm_lockowner *nlm_get_lockowner(struct nlm_lockowner *lockowner)
@@ -129,11 +129,11 @@ static void nlmclnt_setlockargs(struct nlm_rqst *req, struct file_lock *fl)
129 nlmclnt_next_cookie(&argp->cookie); 129 nlmclnt_next_cookie(&argp->cookie);
130 argp->state = nsm_local_state; 130 argp->state = nsm_local_state;
131 memcpy(&lock->fh, NFS_FH(fl->fl_file->f_dentry->d_inode), sizeof(struct nfs_fh)); 131 memcpy(&lock->fh, NFS_FH(fl->fl_file->f_dentry->d_inode), sizeof(struct nfs_fh));
132 lock->caller = system_utsname.nodename; 132 lock->caller = utsname()->nodename;
133 lock->oh.data = req->a_owner; 133 lock->oh.data = req->a_owner;
134 lock->oh.len = snprintf(req->a_owner, sizeof(req->a_owner), "%u@%s", 134 lock->oh.len = snprintf(req->a_owner, sizeof(req->a_owner), "%u@%s",
135 (unsigned int)fl->fl_u.nfs_fl.owner->pid, 135 (unsigned int)fl->fl_u.nfs_fl.owner->pid,
136 system_utsname.nodename); 136 utsname()->nodename);
137 lock->svid = fl->fl_u.nfs_fl.owner->pid; 137 lock->svid = fl->fl_u.nfs_fl.owner->pid;
138 lock->fl.fl_start = fl->fl_start; 138 lock->fl.fl_start = fl->fl_start;
139 lock->fl.fl_end = fl->fl_end; 139 lock->fl.fl_end = fl->fl_end;
@@ -153,6 +153,7 @@ nlmclnt_proc(struct inode *inode, int cmd, struct file_lock *fl)
153{ 153{
154 struct rpc_clnt *client = NFS_CLIENT(inode); 154 struct rpc_clnt *client = NFS_CLIENT(inode);
155 struct sockaddr_in addr; 155 struct sockaddr_in addr;
156 struct nfs_server *nfssrv = NFS_SERVER(inode);
156 struct nlm_host *host; 157 struct nlm_host *host;
157 struct nlm_rqst *call; 158 struct nlm_rqst *call;
158 sigset_t oldset; 159 sigset_t oldset;
@@ -166,7 +167,9 @@ nlmclnt_proc(struct inode *inode, int cmd, struct file_lock *fl)
166 } 167 }
167 168
168 rpc_peeraddr(client, (struct sockaddr *) &addr, sizeof(addr)); 169 rpc_peeraddr(client, (struct sockaddr *) &addr, sizeof(addr));
169 host = nlmclnt_lookup_host(&addr, client->cl_xprt->prot, vers); 170 host = nlmclnt_lookup_host(&addr, client->cl_xprt->prot, vers,
171 nfssrv->nfs_client->cl_hostname,
172 strlen(nfssrv->nfs_client->cl_hostname));
170 if (host == NULL) 173 if (host == NULL)
171 return -ENOLCK; 174 return -ENOLCK;
172 175
@@ -499,7 +502,7 @@ nlmclnt_lock(struct nlm_rqst *req, struct file_lock *fl)
499 unsigned char fl_flags = fl->fl_flags; 502 unsigned char fl_flags = fl->fl_flags;
500 int status = -ENOLCK; 503 int status = -ENOLCK;
501 504
502 if (!host->h_monitored && nsm_monitor(host) < 0) { 505 if (nsm_monitor(host) < 0) {
503 printk(KERN_NOTICE "lockd: failed to monitor %s\n", 506 printk(KERN_NOTICE "lockd: failed to monitor %s\n",
504 host->h_name); 507 host->h_name);
505 goto out; 508 goto out;
diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index a0d0b58ce7a4..fb24a9730345 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c
@@ -27,46 +27,60 @@
27#define NLM_HOST_EXPIRE ((nrhosts > NLM_HOST_MAX)? 300 * HZ : 120 * HZ) 27#define NLM_HOST_EXPIRE ((nrhosts > NLM_HOST_MAX)? 300 * HZ : 120 * HZ)
28#define NLM_HOST_COLLECT ((nrhosts > NLM_HOST_MAX)? 120 * HZ : 60 * HZ) 28#define NLM_HOST_COLLECT ((nrhosts > NLM_HOST_MAX)? 120 * HZ : 60 * HZ)
29 29
30static struct nlm_host * nlm_hosts[NLM_HOST_NRHASH]; 30static struct hlist_head nlm_hosts[NLM_HOST_NRHASH];
31static unsigned long next_gc; 31static unsigned long next_gc;
32static int nrhosts; 32static int nrhosts;
33static DEFINE_MUTEX(nlm_host_mutex); 33static DEFINE_MUTEX(nlm_host_mutex);
34 34
35 35
36static void nlm_gc_hosts(void); 36static void nlm_gc_hosts(void);
37static struct nsm_handle * __nsm_find(const struct sockaddr_in *,
38 const char *, int, int);
37 39
38/* 40/*
39 * Find an NLM server handle in the cache. If there is none, create it. 41 * Find an NLM server handle in the cache. If there is none, create it.
40 */ 42 */
41struct nlm_host * 43struct nlm_host *
42nlmclnt_lookup_host(struct sockaddr_in *sin, int proto, int version) 44nlmclnt_lookup_host(const struct sockaddr_in *sin, int proto, int version,
45 const char *hostname, int hostname_len)
43{ 46{
44 return nlm_lookup_host(0, sin, proto, version); 47 return nlm_lookup_host(0, sin, proto, version,
48 hostname, hostname_len);
45} 49}
46 50
47/* 51/*
48 * Find an NLM client handle in the cache. If there is none, create it. 52 * Find an NLM client handle in the cache. If there is none, create it.
49 */ 53 */
50struct nlm_host * 54struct nlm_host *
51nlmsvc_lookup_host(struct svc_rqst *rqstp) 55nlmsvc_lookup_host(struct svc_rqst *rqstp,
56 const char *hostname, int hostname_len)
52{ 57{
53 return nlm_lookup_host(1, &rqstp->rq_addr, 58 return nlm_lookup_host(1, &rqstp->rq_addr,
54 rqstp->rq_prot, rqstp->rq_vers); 59 rqstp->rq_prot, rqstp->rq_vers,
60 hostname, hostname_len);
55} 61}
56 62
57/* 63/*
58 * Common host lookup routine for server & client 64 * Common host lookup routine for server & client
59 */ 65 */
60struct nlm_host * 66struct nlm_host *
61nlm_lookup_host(int server, struct sockaddr_in *sin, 67nlm_lookup_host(int server, const struct sockaddr_in *sin,
62 int proto, int version) 68 int proto, int version,
69 const char *hostname,
70 int hostname_len)
63{ 71{
64 struct nlm_host *host, **hp; 72 struct hlist_head *chain;
65 u32 addr; 73 struct hlist_node *pos;
74 struct nlm_host *host;
75 struct nsm_handle *nsm = NULL;
66 int hash; 76 int hash;
67 77
68 dprintk("lockd: nlm_lookup_host(%08x, p=%d, v=%d)\n", 78 dprintk("lockd: nlm_lookup_host(%u.%u.%u.%u, p=%d, v=%d, my role=%s, name=%.*s)\n",
69 (unsigned)(sin? ntohl(sin->sin_addr.s_addr) : 0), proto, version); 79 NIPQUAD(sin->sin_addr.s_addr), proto, version,
80 server? "server" : "client",
81 hostname_len,
82 hostname? hostname : "<none>");
83
70 84
71 hash = NLM_ADDRHASH(sin->sin_addr.s_addr); 85 hash = NLM_ADDRHASH(sin->sin_addr.s_addr);
72 86
@@ -76,7 +90,22 @@ nlm_lookup_host(int server, struct sockaddr_in *sin,
76 if (time_after_eq(jiffies, next_gc)) 90 if (time_after_eq(jiffies, next_gc))
77 nlm_gc_hosts(); 91 nlm_gc_hosts();
78 92
79 for (hp = &nlm_hosts[hash]; (host = *hp) != 0; hp = &host->h_next) { 93 /* We may keep several nlm_host objects for a peer, because each
94 * nlm_host is identified by
95 * (address, protocol, version, server/client)
96 * We could probably simplify this a little by putting all those
97 * different NLM rpc_clients into one single nlm_host object.
98 * This would allow us to have one nlm_host per address.
99 */
100 chain = &nlm_hosts[hash];
101 hlist_for_each_entry(host, pos, chain, h_hash) {
102 if (!nlm_cmp_addr(&host->h_addr, sin))
103 continue;
104
105 /* See if we have an NSM handle for this client */
106 if (!nsm)
107 nsm = host->h_nsmhandle;
108
80 if (host->h_proto != proto) 109 if (host->h_proto != proto)
81 continue; 110 continue;
82 if (host->h_version != version) 111 if (host->h_version != version)
@@ -84,28 +113,30 @@ nlm_lookup_host(int server, struct sockaddr_in *sin,
84 if (host->h_server != server) 113 if (host->h_server != server)
85 continue; 114 continue;
86 115
87 if (nlm_cmp_addr(&host->h_addr, sin)) { 116 /* Move to head of hash chain. */
88 if (hp != nlm_hosts + hash) { 117 hlist_del(&host->h_hash);
89 *hp = host->h_next; 118 hlist_add_head(&host->h_hash, chain);
90 host->h_next = nlm_hosts[hash];
91 nlm_hosts[hash] = host;
92 }
93 nlm_get_host(host);
94 mutex_unlock(&nlm_host_mutex);
95 return host;
96 }
97 }
98 119
99 /* Ooops, no host found, create it */ 120 nlm_get_host(host);
100 dprintk("lockd: creating host entry\n"); 121 goto out;
122 }
123 if (nsm)
124 atomic_inc(&nsm->sm_count);
101 125
102 host = kzalloc(sizeof(*host), GFP_KERNEL); 126 host = NULL;
103 if (!host)
104 goto nohost;
105 127
106 addr = sin->sin_addr.s_addr; 128 /* Sadly, the host isn't in our hash table yet. See if
107 sprintf(host->h_name, "%u.%u.%u.%u", NIPQUAD(addr)); 129 * we have an NSM handle for it. If not, create one.
130 */
131 if (!nsm && !(nsm = nsm_find(sin, hostname, hostname_len)))
132 goto out;
108 133
134 host = kzalloc(sizeof(*host), GFP_KERNEL);
135 if (!host) {
136 nsm_release(nsm);
137 goto out;
138 }
139 host->h_name = nsm->sm_name;
109 host->h_addr = *sin; 140 host->h_addr = *sin;
110 host->h_addr.sin_port = 0; /* ouch! */ 141 host->h_addr.sin_port = 0; /* ouch! */
111 host->h_version = version; 142 host->h_version = version;
@@ -119,9 +150,9 @@ nlm_lookup_host(int server, struct sockaddr_in *sin,
119 init_rwsem(&host->h_rwsem); 150 init_rwsem(&host->h_rwsem);
120 host->h_state = 0; /* pseudo NSM state */ 151 host->h_state = 0; /* pseudo NSM state */
121 host->h_nsmstate = 0; /* real NSM state */ 152 host->h_nsmstate = 0; /* real NSM state */
153 host->h_nsmhandle = nsm;
122 host->h_server = server; 154 host->h_server = server;
123 host->h_next = nlm_hosts[hash]; 155 hlist_add_head(&host->h_hash, chain);
124 nlm_hosts[hash] = host;
125 INIT_LIST_HEAD(&host->h_lockowners); 156 INIT_LIST_HEAD(&host->h_lockowners);
126 spin_lock_init(&host->h_lock); 157 spin_lock_init(&host->h_lock);
127 INIT_LIST_HEAD(&host->h_granted); 158 INIT_LIST_HEAD(&host->h_granted);
@@ -130,35 +161,39 @@ nlm_lookup_host(int server, struct sockaddr_in *sin,
130 if (++nrhosts > NLM_HOST_MAX) 161 if (++nrhosts > NLM_HOST_MAX)
131 next_gc = 0; 162 next_gc = 0;
132 163
133nohost: 164out:
134 mutex_unlock(&nlm_host_mutex); 165 mutex_unlock(&nlm_host_mutex);
135 return host; 166 return host;
136} 167}
137 168
138struct nlm_host * 169/*
139nlm_find_client(void) 170 * Destroy a host
171 */
172static void
173nlm_destroy_host(struct nlm_host *host)
140{ 174{
141 /* find a nlm_host for a client for which h_killed == 0. 175 struct rpc_clnt *clnt;
142 * and return it 176
177 BUG_ON(!list_empty(&host->h_lockowners));
178 BUG_ON(atomic_read(&host->h_count));
179
180 /*
181 * Release NSM handle and unmonitor host.
143 */ 182 */
144 int hash; 183 nsm_unmonitor(host);
145 mutex_lock(&nlm_host_mutex); 184
146 for (hash = 0 ; hash < NLM_HOST_NRHASH; hash++) { 185 if ((clnt = host->h_rpcclnt) != NULL) {
147 struct nlm_host *host, **hp; 186 if (atomic_read(&clnt->cl_users)) {
148 for (hp = &nlm_hosts[hash]; (host = *hp) != 0; hp = &host->h_next) { 187 printk(KERN_WARNING
149 if (host->h_server && 188 "lockd: active RPC handle\n");
150 host->h_killed == 0) { 189 clnt->cl_dead = 1;
151 nlm_get_host(host); 190 } else {
152 mutex_unlock(&nlm_host_mutex); 191 rpc_destroy_client(host->h_rpcclnt);
153 return host;
154 }
155 } 192 }
156 } 193 }
157 mutex_unlock(&nlm_host_mutex); 194 kfree(host);
158 return NULL;
159} 195}
160 196
161
162/* 197/*
163 * Create the NLM RPC client for an NLM peer 198 * Create the NLM RPC client for an NLM peer
164 */ 199 */
@@ -260,22 +295,82 @@ void nlm_release_host(struct nlm_host *host)
260} 295}
261 296
262/* 297/*
298 * We were notified that the host indicated by address &sin
299 * has rebooted.
300 * Release all resources held by that peer.
301 */
302void nlm_host_rebooted(const struct sockaddr_in *sin,
303 const char *hostname, int hostname_len,
304 u32 new_state)
305{
306 struct hlist_head *chain;
307 struct hlist_node *pos;
308 struct nsm_handle *nsm;
309 struct nlm_host *host;
310
311 dprintk("lockd: nlm_host_rebooted(%s, %u.%u.%u.%u)\n",
312 hostname, NIPQUAD(sin->sin_addr));
313
314 /* Find the NSM handle for this peer */
315 if (!(nsm = __nsm_find(sin, hostname, hostname_len, 0)))
316 return;
317
318 /* When reclaiming locks on this peer, make sure that
319 * we set up a new notification */
320 nsm->sm_monitored = 0;
321
322 /* Mark all hosts tied to this NSM state as having rebooted.
323 * We run the loop repeatedly, because we drop the host table
324 * lock for this.
325 * To avoid processing a host several times, we match the nsmstate.
326 */
327again: mutex_lock(&nlm_host_mutex);
328 for (chain = nlm_hosts; chain < nlm_hosts + NLM_HOST_NRHASH; ++chain) {
329 hlist_for_each_entry(host, pos, chain, h_hash) {
330 if (host->h_nsmhandle == nsm
331 && host->h_nsmstate != new_state) {
332 host->h_nsmstate = new_state;
333 host->h_state++;
334
335 nlm_get_host(host);
336 mutex_unlock(&nlm_host_mutex);
337
338 if (host->h_server) {
339 /* We're server for this guy, just ditch
340 * all the locks he held. */
341 nlmsvc_free_host_resources(host);
342 } else {
343 /* He's the server, initiate lock recovery. */
344 nlmclnt_recovery(host);
345 }
346
347 nlm_release_host(host);
348 goto again;
349 }
350 }
351 }
352
353 mutex_unlock(&nlm_host_mutex);
354}
355
356/*
263 * Shut down the hosts module. 357 * Shut down the hosts module.
264 * Note that this routine is called only at server shutdown time. 358 * Note that this routine is called only at server shutdown time.
265 */ 359 */
266void 360void
267nlm_shutdown_hosts(void) 361nlm_shutdown_hosts(void)
268{ 362{
363 struct hlist_head *chain;
364 struct hlist_node *pos;
269 struct nlm_host *host; 365 struct nlm_host *host;
270 int i;
271 366
272 dprintk("lockd: shutting down host module\n"); 367 dprintk("lockd: shutting down host module\n");
273 mutex_lock(&nlm_host_mutex); 368 mutex_lock(&nlm_host_mutex);
274 369
275 /* First, make all hosts eligible for gc */ 370 /* First, make all hosts eligible for gc */
276 dprintk("lockd: nuking all hosts...\n"); 371 dprintk("lockd: nuking all hosts...\n");
277 for (i = 0; i < NLM_HOST_NRHASH; i++) { 372 for (chain = nlm_hosts; chain < nlm_hosts + NLM_HOST_NRHASH; ++chain) {
278 for (host = nlm_hosts[i]; host; host = host->h_next) 373 hlist_for_each_entry(host, pos, chain, h_hash)
279 host->h_expires = jiffies - 1; 374 host->h_expires = jiffies - 1;
280 } 375 }
281 376
@@ -287,8 +382,8 @@ nlm_shutdown_hosts(void)
287 if (nrhosts) { 382 if (nrhosts) {
288 printk(KERN_WARNING "lockd: couldn't shutdown host module!\n"); 383 printk(KERN_WARNING "lockd: couldn't shutdown host module!\n");
289 dprintk("lockd: %d hosts left:\n", nrhosts); 384 dprintk("lockd: %d hosts left:\n", nrhosts);
290 for (i = 0; i < NLM_HOST_NRHASH; i++) { 385 for (chain = nlm_hosts; chain < nlm_hosts + NLM_HOST_NRHASH; ++chain) {
291 for (host = nlm_hosts[i]; host; host = host->h_next) { 386 hlist_for_each_entry(host, pos, chain, h_hash) {
292 dprintk(" %s (cnt %d use %d exp %ld)\n", 387 dprintk(" %s (cnt %d use %d exp %ld)\n",
293 host->h_name, atomic_read(&host->h_count), 388 host->h_name, atomic_read(&host->h_count),
294 host->h_inuse, host->h_expires); 389 host->h_inuse, host->h_expires);
@@ -305,45 +400,32 @@ nlm_shutdown_hosts(void)
305static void 400static void
306nlm_gc_hosts(void) 401nlm_gc_hosts(void)
307{ 402{
308 struct nlm_host **q, *host; 403 struct hlist_head *chain;
309 struct rpc_clnt *clnt; 404 struct hlist_node *pos, *next;
310 int i; 405 struct nlm_host *host;
311 406
312 dprintk("lockd: host garbage collection\n"); 407 dprintk("lockd: host garbage collection\n");
313 for (i = 0; i < NLM_HOST_NRHASH; i++) { 408 for (chain = nlm_hosts; chain < nlm_hosts + NLM_HOST_NRHASH; ++chain) {
314 for (host = nlm_hosts[i]; host; host = host->h_next) 409 hlist_for_each_entry(host, pos, chain, h_hash)
315 host->h_inuse = 0; 410 host->h_inuse = 0;
316 } 411 }
317 412
318 /* Mark all hosts that hold locks, blocks or shares */ 413 /* Mark all hosts that hold locks, blocks or shares */
319 nlmsvc_mark_resources(); 414 nlmsvc_mark_resources();
320 415
321 for (i = 0; i < NLM_HOST_NRHASH; i++) { 416 for (chain = nlm_hosts; chain < nlm_hosts + NLM_HOST_NRHASH; ++chain) {
322 q = &nlm_hosts[i]; 417 hlist_for_each_entry_safe(host, pos, next, chain, h_hash) {
323 while ((host = *q) != NULL) {
324 if (atomic_read(&host->h_count) || host->h_inuse 418 if (atomic_read(&host->h_count) || host->h_inuse
325 || time_before(jiffies, host->h_expires)) { 419 || time_before(jiffies, host->h_expires)) {
326 dprintk("nlm_gc_hosts skipping %s (cnt %d use %d exp %ld)\n", 420 dprintk("nlm_gc_hosts skipping %s (cnt %d use %d exp %ld)\n",
327 host->h_name, atomic_read(&host->h_count), 421 host->h_name, atomic_read(&host->h_count),
328 host->h_inuse, host->h_expires); 422 host->h_inuse, host->h_expires);
329 q = &host->h_next;
330 continue; 423 continue;
331 } 424 }
332 dprintk("lockd: delete host %s\n", host->h_name); 425 dprintk("lockd: delete host %s\n", host->h_name);
333 *q = host->h_next; 426 hlist_del_init(&host->h_hash);
334 /* Don't unmonitor hosts that have been invalidated */ 427
335 if (host->h_monitored && !host->h_killed) 428 nlm_destroy_host(host);
336 nsm_unmonitor(host);
337 if ((clnt = host->h_rpcclnt) != NULL) {
338 if (atomic_read(&clnt->cl_users)) {
339 printk(KERN_WARNING
340 "lockd: active RPC handle\n");
341 clnt->cl_dead = 1;
342 } else {
343 rpc_destroy_client(host->h_rpcclnt);
344 }
345 }
346 kfree(host);
347 nrhosts--; 429 nrhosts--;
348 } 430 }
349 } 431 }
@@ -351,3 +433,88 @@ nlm_gc_hosts(void)
351 next_gc = jiffies + NLM_HOST_COLLECT; 433 next_gc = jiffies + NLM_HOST_COLLECT;
352} 434}
353 435
436
437/*
438 * Manage NSM handles
439 */
440static LIST_HEAD(nsm_handles);
441static DEFINE_MUTEX(nsm_mutex);
442
443static struct nsm_handle *
444__nsm_find(const struct sockaddr_in *sin,
445 const char *hostname, int hostname_len,
446 int create)
447{
448 struct nsm_handle *nsm = NULL;
449 struct list_head *pos;
450
451 if (!sin)
452 return NULL;
453
454 if (hostname && memchr(hostname, '/', hostname_len) != NULL) {
455 if (printk_ratelimit()) {
456 printk(KERN_WARNING "Invalid hostname \"%.*s\" "
457 "in NFS lock request\n",
458 hostname_len, hostname);
459 }
460 return NULL;
461 }
462
463 mutex_lock(&nsm_mutex);
464 list_for_each(pos, &nsm_handles) {
465 nsm = list_entry(pos, struct nsm_handle, sm_link);
466
467 if (hostname && nsm_use_hostnames) {
468 if (strlen(nsm->sm_name) != hostname_len
469 || memcmp(nsm->sm_name, hostname, hostname_len))
470 continue;
471 } else if (!nlm_cmp_addr(&nsm->sm_addr, sin))
472 continue;
473 atomic_inc(&nsm->sm_count);
474 goto out;
475 }
476
477 if (!create) {
478 nsm = NULL;
479 goto out;
480 }
481
482 nsm = kzalloc(sizeof(*nsm) + hostname_len + 1, GFP_KERNEL);
483 if (nsm != NULL) {
484 nsm->sm_addr = *sin;
485 nsm->sm_name = (char *) (nsm + 1);
486 memcpy(nsm->sm_name, hostname, hostname_len);
487 nsm->sm_name[hostname_len] = '\0';
488 atomic_set(&nsm->sm_count, 1);
489
490 list_add(&nsm->sm_link, &nsm_handles);
491 }
492
493out:
494 mutex_unlock(&nsm_mutex);
495 return nsm;
496}
497
498struct nsm_handle *
499nsm_find(const struct sockaddr_in *sin, const char *hostname, int hostname_len)
500{
501 return __nsm_find(sin, hostname, hostname_len, 1);
502}
503
504/*
505 * Release an NSM handle
506 */
507void
508nsm_release(struct nsm_handle *nsm)
509{
510 if (!nsm)
511 return;
512 if (atomic_dec_and_test(&nsm->sm_count)) {
513 mutex_lock(&nsm_mutex);
514 if (atomic_read(&nsm->sm_count) == 0) {
515 list_del(&nsm->sm_link);
516 kfree(nsm);
517 }
518 mutex_unlock(&nsm_mutex);
519 }
520}
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index 5954dcb497e4..e0179f8c327f 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -24,13 +24,13 @@ static struct rpc_program nsm_program;
24/* 24/*
25 * Local NSM state 25 * Local NSM state
26 */ 26 */
27u32 nsm_local_state; 27int nsm_local_state;
28 28
29/* 29/*
30 * Common procedure for SM_MON/SM_UNMON calls 30 * Common procedure for SM_MON/SM_UNMON calls
31 */ 31 */
32static int 32static int
33nsm_mon_unmon(struct nlm_host *host, u32 proc, struct nsm_res *res) 33nsm_mon_unmon(struct nsm_handle *nsm, u32 proc, struct nsm_res *res)
34{ 34{
35 struct rpc_clnt *clnt; 35 struct rpc_clnt *clnt;
36 int status; 36 int status;
@@ -46,10 +46,11 @@ nsm_mon_unmon(struct nlm_host *host, u32 proc, struct nsm_res *res)
46 goto out; 46 goto out;
47 } 47 }
48 48
49 args.addr = host->h_addr.sin_addr.s_addr; 49 memset(&args, 0, sizeof(args));
50 args.proto= (host->h_proto<<1) | host->h_server; 50 args.mon_name = nsm->sm_name;
51 args.addr = nsm->sm_addr.sin_addr.s_addr;
51 args.prog = NLM_PROGRAM; 52 args.prog = NLM_PROGRAM;
52 args.vers = host->h_version; 53 args.vers = 3;
53 args.proc = NLMPROC_NSM_NOTIFY; 54 args.proc = NLMPROC_NSM_NOTIFY;
54 memset(res, 0, sizeof(*res)); 55 memset(res, 0, sizeof(*res));
55 56
@@ -70,17 +71,22 @@ nsm_mon_unmon(struct nlm_host *host, u32 proc, struct nsm_res *res)
70int 71int
71nsm_monitor(struct nlm_host *host) 72nsm_monitor(struct nlm_host *host)
72{ 73{
74 struct nsm_handle *nsm = host->h_nsmhandle;
73 struct nsm_res res; 75 struct nsm_res res;
74 int status; 76 int status;
75 77
76 dprintk("lockd: nsm_monitor(%s)\n", host->h_name); 78 dprintk("lockd: nsm_monitor(%s)\n", host->h_name);
79 BUG_ON(nsm == NULL);
77 80
78 status = nsm_mon_unmon(host, SM_MON, &res); 81 if (nsm->sm_monitored)
82 return 0;
83
84 status = nsm_mon_unmon(nsm, SM_MON, &res);
79 85
80 if (status < 0 || res.status != 0) 86 if (status < 0 || res.status != 0)
81 printk(KERN_NOTICE "lockd: cannot monitor %s\n", host->h_name); 87 printk(KERN_NOTICE "lockd: cannot monitor %s\n", host->h_name);
82 else 88 else
83 host->h_monitored = 1; 89 nsm->sm_monitored = 1;
84 return status; 90 return status;
85} 91}
86 92
@@ -90,16 +96,26 @@ nsm_monitor(struct nlm_host *host)
90int 96int
91nsm_unmonitor(struct nlm_host *host) 97nsm_unmonitor(struct nlm_host *host)
92{ 98{
99 struct nsm_handle *nsm = host->h_nsmhandle;
93 struct nsm_res res; 100 struct nsm_res res;
94 int status; 101 int status = 0;
95 102
96 dprintk("lockd: nsm_unmonitor(%s)\n", host->h_name); 103 if (nsm == NULL)
97 104 return 0;
98 status = nsm_mon_unmon(host, SM_UNMON, &res); 105 host->h_nsmhandle = NULL;
99 if (status < 0) 106
100 printk(KERN_NOTICE "lockd: cannot unmonitor %s\n", host->h_name); 107 if (atomic_read(&nsm->sm_count) == 1
101 else 108 && nsm->sm_monitored && !nsm->sm_sticky) {
102 host->h_monitored = 0; 109 dprintk("lockd: nsm_unmonitor(%s)\n", host->h_name);
110
111 status = nsm_mon_unmon(nsm, SM_UNMON, &res);
112 if (status < 0)
113 printk(KERN_NOTICE "lockd: cannot unmonitor %s\n",
114 host->h_name);
115 else
116 nsm->sm_monitored = 0;
117 }
118 nsm_release(nsm);
103 return status; 119 return status;
104} 120}
105 121
@@ -135,7 +151,7 @@ nsm_create(void)
135static u32 * 151static u32 *
136xdr_encode_common(struct rpc_rqst *rqstp, u32 *p, struct nsm_args *argp) 152xdr_encode_common(struct rpc_rqst *rqstp, u32 *p, struct nsm_args *argp)
137{ 153{
138 char buffer[20]; 154 char buffer[20], *name;
139 155
140 /* 156 /*
141 * Use the dotted-quad IP address of the remote host as 157 * Use the dotted-quad IP address of the remote host as
@@ -143,9 +159,14 @@ xdr_encode_common(struct rpc_rqst *rqstp, u32 *p, struct nsm_args *argp)
143 * hostname first for whatever remote hostname it receives, 159 * hostname first for whatever remote hostname it receives,
144 * so this works alright. 160 * so this works alright.
145 */ 161 */
146 sprintf(buffer, "%u.%u.%u.%u", NIPQUAD(argp->addr)); 162 if (nsm_use_hostnames) {
147 if (!(p = xdr_encode_string(p, buffer)) 163 name = argp->mon_name;
148 || !(p = xdr_encode_string(p, system_utsname.nodename))) 164 } else {
165 sprintf(buffer, "%u.%u.%u.%u", NIPQUAD(argp->addr));
166 name = buffer;
167 }
168 if (!(p = xdr_encode_string(p, name))
169 || !(p = xdr_encode_string(p, utsname()->nodename)))
149 return ERR_PTR(-EIO); 170 return ERR_PTR(-EIO);
150 *p++ = htonl(argp->prog); 171 *p++ = htonl(argp->prog);
151 *p++ = htonl(argp->vers); 172 *p++ = htonl(argp->vers);
@@ -160,9 +181,11 @@ xdr_encode_mon(struct rpc_rqst *rqstp, u32 *p, struct nsm_args *argp)
160 p = xdr_encode_common(rqstp, p, argp); 181 p = xdr_encode_common(rqstp, p, argp);
161 if (IS_ERR(p)) 182 if (IS_ERR(p))
162 return PTR_ERR(p); 183 return PTR_ERR(p);
184
185 /* Surprise - there may even be room for an IPv6 address now */
163 *p++ = argp->addr; 186 *p++ = argp->addr;
164 *p++ = argp->vers; 187 *p++ = 0;
165 *p++ = argp->proto; 188 *p++ = 0;
166 *p++ = 0; 189 *p++ = 0;
167 rqstp->rq_slen = xdr_adjust_iovec(rqstp->rq_svec, p); 190 rqstp->rq_slen = xdr_adjust_iovec(rqstp->rq_svec, p);
168 return 0; 191 return 0;
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index 9a991b52c647..634139232aaf 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -31,7 +31,9 @@
31#include <linux/sunrpc/clnt.h> 31#include <linux/sunrpc/clnt.h>
32#include <linux/sunrpc/svc.h> 32#include <linux/sunrpc/svc.h>
33#include <linux/sunrpc/svcsock.h> 33#include <linux/sunrpc/svcsock.h>
34#include <net/ip.h>
34#include <linux/lockd/lockd.h> 35#include <linux/lockd/lockd.h>
36#include <linux/lockd/sm_inter.h>
35#include <linux/nfs.h> 37#include <linux/nfs.h>
36 38
37#define NLMDBG_FACILITY NLMDBG_SVC 39#define NLMDBG_FACILITY NLMDBG_SVC
@@ -46,6 +48,7 @@ EXPORT_SYMBOL(nlmsvc_ops);
46static DEFINE_MUTEX(nlmsvc_mutex); 48static DEFINE_MUTEX(nlmsvc_mutex);
47static unsigned int nlmsvc_users; 49static unsigned int nlmsvc_users;
48static pid_t nlmsvc_pid; 50static pid_t nlmsvc_pid;
51static struct svc_serv *nlmsvc_serv;
49int nlmsvc_grace_period; 52int nlmsvc_grace_period;
50unsigned long nlmsvc_timeout; 53unsigned long nlmsvc_timeout;
51 54
@@ -59,6 +62,7 @@ static DECLARE_WAIT_QUEUE_HEAD(lockd_exit);
59static unsigned long nlm_grace_period; 62static unsigned long nlm_grace_period;
60static unsigned long nlm_timeout = LOCKD_DFLT_TIMEO; 63static unsigned long nlm_timeout = LOCKD_DFLT_TIMEO;
61static int nlm_udpport, nlm_tcpport; 64static int nlm_udpport, nlm_tcpport;
65int nsm_use_hostnames = 0;
62 66
63/* 67/*
64 * Constants needed for the sysctl interface. 68 * Constants needed for the sysctl interface.
@@ -96,7 +100,6 @@ static inline void clear_grace_period(void)
96static void 100static void
97lockd(struct svc_rqst *rqstp) 101lockd(struct svc_rqst *rqstp)
98{ 102{
99 struct svc_serv *serv = rqstp->rq_server;
100 int err = 0; 103 int err = 0;
101 unsigned long grace_period_expire; 104 unsigned long grace_period_expire;
102 105
@@ -112,6 +115,7 @@ lockd(struct svc_rqst *rqstp)
112 * Let our maker know we're running. 115 * Let our maker know we're running.
113 */ 116 */
114 nlmsvc_pid = current->pid; 117 nlmsvc_pid = current->pid;
118 nlmsvc_serv = rqstp->rq_server;
115 complete(&lockd_start_done); 119 complete(&lockd_start_done);
116 120
117 daemonize("lockd"); 121 daemonize("lockd");
@@ -161,7 +165,7 @@ lockd(struct svc_rqst *rqstp)
161 * Find a socket with data available and call its 165 * Find a socket with data available and call its
162 * recvfrom routine. 166 * recvfrom routine.
163 */ 167 */
164 err = svc_recv(serv, rqstp, timeout); 168 err = svc_recv(rqstp, timeout);
165 if (err == -EAGAIN || err == -EINTR) 169 if (err == -EAGAIN || err == -EINTR)
166 continue; 170 continue;
167 if (err < 0) { 171 if (err < 0) {
@@ -174,7 +178,7 @@ lockd(struct svc_rqst *rqstp)
174 dprintk("lockd: request from %08x\n", 178 dprintk("lockd: request from %08x\n",
175 (unsigned)ntohl(rqstp->rq_addr.sin_addr.s_addr)); 179 (unsigned)ntohl(rqstp->rq_addr.sin_addr.s_addr));
176 180
177 svc_process(serv, rqstp); 181 svc_process(rqstp);
178 182
179 } 183 }
180 184
@@ -189,6 +193,7 @@ lockd(struct svc_rqst *rqstp)
189 nlmsvc_invalidate_all(); 193 nlmsvc_invalidate_all();
190 nlm_shutdown_hosts(); 194 nlm_shutdown_hosts();
191 nlmsvc_pid = 0; 195 nlmsvc_pid = 0;
196 nlmsvc_serv = NULL;
192 } else 197 } else
193 printk(KERN_DEBUG 198 printk(KERN_DEBUG
194 "lockd: new process, skipping host shutdown\n"); 199 "lockd: new process, skipping host shutdown\n");
@@ -205,54 +210,77 @@ lockd(struct svc_rqst *rqstp)
205 module_put_and_exit(0); 210 module_put_and_exit(0);
206} 211}
207 212
213
214static int find_socket(struct svc_serv *serv, int proto)
215{
216 struct svc_sock *svsk;
217 int found = 0;
218 list_for_each_entry(svsk, &serv->sv_permsocks, sk_list)
219 if (svsk->sk_sk->sk_protocol == proto) {
220 found = 1;
221 break;
222 }
223 return found;
224}
225
226static int make_socks(struct svc_serv *serv, int proto)
227{
228 /* Make any sockets that are needed but not present.
229 * If nlm_udpport or nlm_tcpport were set as module
230 * options, make those sockets unconditionally
231 */
232 static int warned;
233 int err = 0;
234 if (proto == IPPROTO_UDP || nlm_udpport)
235 if (!find_socket(serv, IPPROTO_UDP))
236 err = svc_makesock(serv, IPPROTO_UDP, nlm_udpport);
237 if (err == 0 && (proto == IPPROTO_TCP || nlm_tcpport))
238 if (!find_socket(serv, IPPROTO_TCP))
239 err= svc_makesock(serv, IPPROTO_TCP, nlm_tcpport);
240 if (!err)
241 warned = 0;
242 else if (warned++ == 0)
243 printk(KERN_WARNING
244 "lockd_up: makesock failed, error=%d\n", err);
245 return err;
246}
247
208/* 248/*
209 * Bring up the lockd process if it's not already up. 249 * Bring up the lockd process if it's not already up.
210 */ 250 */
211int 251int
212lockd_up(void) 252lockd_up(int proto) /* Maybe add a 'family' option when IPv6 is supported ?? */
213{ 253{
214 static int warned;
215 struct svc_serv * serv; 254 struct svc_serv * serv;
216 int error = 0; 255 int error = 0;
217 256
218 mutex_lock(&nlmsvc_mutex); 257 mutex_lock(&nlmsvc_mutex);
219 /* 258 /*
220 * Unconditionally increment the user count ... this is
221 * the number of clients who _want_ a lockd process.
222 */
223 nlmsvc_users++;
224 /*
225 * Check whether we're already up and running. 259 * Check whether we're already up and running.
226 */ 260 */
227 if (nlmsvc_pid) 261 if (nlmsvc_pid) {
262 if (proto)
263 error = make_socks(nlmsvc_serv, proto);
228 goto out; 264 goto out;
265 }
229 266
230 /* 267 /*
231 * Sanity check: if there's no pid, 268 * Sanity check: if there's no pid,
232 * we should be the first user ... 269 * we should be the first user ...
233 */ 270 */
234 if (nlmsvc_users > 1) 271 if (nlmsvc_users)
235 printk(KERN_WARNING 272 printk(KERN_WARNING
236 "lockd_up: no pid, %d users??\n", nlmsvc_users); 273 "lockd_up: no pid, %d users??\n", nlmsvc_users);
237 274
238 error = -ENOMEM; 275 error = -ENOMEM;
239 serv = svc_create(&nlmsvc_program, LOCKD_BUFSIZE); 276 serv = svc_create(&nlmsvc_program, LOCKD_BUFSIZE, NULL);
240 if (!serv) { 277 if (!serv) {
241 printk(KERN_WARNING "lockd_up: create service failed\n"); 278 printk(KERN_WARNING "lockd_up: create service failed\n");
242 goto out; 279 goto out;
243 } 280 }
244 281
245 if ((error = svc_makesock(serv, IPPROTO_UDP, nlm_udpport)) < 0 282 if ((error = make_socks(serv, proto)) < 0)
246#ifdef CONFIG_NFSD_TCP
247 || (error = svc_makesock(serv, IPPROTO_TCP, nlm_tcpport)) < 0
248#endif
249 ) {
250 if (warned++ == 0)
251 printk(KERN_WARNING
252 "lockd_up: makesock failed, error=%d\n", error);
253 goto destroy_and_out; 283 goto destroy_and_out;
254 }
255 warned = 0;
256 284
257 /* 285 /*
258 * Create the kernel thread and wait for it to start. 286 * Create the kernel thread and wait for it to start.
@@ -272,6 +300,8 @@ lockd_up(void)
272destroy_and_out: 300destroy_and_out:
273 svc_destroy(serv); 301 svc_destroy(serv);
274out: 302out:
303 if (!error)
304 nlmsvc_users++;
275 mutex_unlock(&nlmsvc_mutex); 305 mutex_unlock(&nlmsvc_mutex);
276 return error; 306 return error;
277} 307}
@@ -367,6 +397,22 @@ static ctl_table nlm_sysctls[] = {
367 .extra1 = (int *) &nlm_port_min, 397 .extra1 = (int *) &nlm_port_min,
368 .extra2 = (int *) &nlm_port_max, 398 .extra2 = (int *) &nlm_port_max,
369 }, 399 },
400 {
401 .ctl_name = CTL_UNNUMBERED,
402 .procname = "nsm_use_hostnames",
403 .data = &nsm_use_hostnames,
404 .maxlen = sizeof(int),
405 .mode = 0644,
406 .proc_handler = &proc_dointvec,
407 },
408 {
409 .ctl_name = CTL_UNNUMBERED,
410 .procname = "nsm_local_state",
411 .data = &nsm_local_state,
412 .maxlen = sizeof(int),
413 .mode = 0644,
414 .proc_handler = &proc_dointvec,
415 },
370 { .ctl_name = 0 } 416 { .ctl_name = 0 }
371}; 417};
372 418
@@ -455,6 +501,7 @@ module_param_call(nlm_udpport, param_set_port, param_get_int,
455 &nlm_udpport, 0644); 501 &nlm_udpport, 0644);
456module_param_call(nlm_tcpport, param_set_port, param_get_int, 502module_param_call(nlm_tcpport, param_set_port, param_get_int,
457 &nlm_tcpport, 0644); 503 &nlm_tcpport, 0644);
504module_param(nsm_use_hostnames, bool, 0644);
458 505
459/* 506/*
460 * Initialising and terminating the module. 507 * Initialising and terminating the module.
diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c
index a2dd9ccb9b32..fa370f6eb07b 100644
--- a/fs/lockd/svc4proc.c
+++ b/fs/lockd/svc4proc.c
@@ -38,8 +38,8 @@ nlm4svc_retrieve_args(struct svc_rqst *rqstp, struct nlm_args *argp,
38 return nlm_lck_denied_nolocks; 38 return nlm_lck_denied_nolocks;
39 39
40 /* Obtain host handle */ 40 /* Obtain host handle */
41 if (!(host = nlmsvc_lookup_host(rqstp)) 41 if (!(host = nlmsvc_lookup_host(rqstp, lock->caller, lock->len))
42 || (argp->monitor && !host->h_monitored && nsm_monitor(host) < 0)) 42 || (argp->monitor && nsm_monitor(host) < 0))
43 goto no_locks; 43 goto no_locks;
44 *hostp = host; 44 *hostp = host;
45 45
@@ -260,7 +260,9 @@ static int nlm4svc_callback(struct svc_rqst *rqstp, u32 proc, struct nlm_args *a
260 struct nlm_rqst *call; 260 struct nlm_rqst *call;
261 int stat; 261 int stat;
262 262
263 host = nlmsvc_lookup_host(rqstp); 263 host = nlmsvc_lookup_host(rqstp,
264 argp->lock.caller,
265 argp->lock.len);
264 if (host == NULL) 266 if (host == NULL)
265 return rpc_system_err; 267 return rpc_system_err;
266 268
@@ -420,10 +422,6 @@ nlm4svc_proc_sm_notify(struct svc_rqst *rqstp, struct nlm_reboot *argp,
420 void *resp) 422 void *resp)
421{ 423{
422 struct sockaddr_in saddr = rqstp->rq_addr; 424 struct sockaddr_in saddr = rqstp->rq_addr;
423 int vers = argp->vers;
424 int prot = argp->proto >> 1;
425
426 struct nlm_host *host;
427 425
428 dprintk("lockd: SM_NOTIFY called\n"); 426 dprintk("lockd: SM_NOTIFY called\n");
429 if (saddr.sin_addr.s_addr != htonl(INADDR_LOOPBACK) 427 if (saddr.sin_addr.s_addr != htonl(INADDR_LOOPBACK)
@@ -438,21 +436,10 @@ nlm4svc_proc_sm_notify(struct svc_rqst *rqstp, struct nlm_reboot *argp,
438 /* Obtain the host pointer for this NFS server and try to 436 /* Obtain the host pointer for this NFS server and try to
439 * reclaim all locks we hold on this server. 437 * reclaim all locks we hold on this server.
440 */ 438 */
439 memset(&saddr, 0, sizeof(saddr));
441 saddr.sin_addr.s_addr = argp->addr; 440 saddr.sin_addr.s_addr = argp->addr;
441 nlm_host_rebooted(&saddr, argp->mon, argp->len, argp->state);
442 442
443 if ((argp->proto & 1)==0) {
444 if ((host = nlmclnt_lookup_host(&saddr, prot, vers)) != NULL) {
445 nlmclnt_recovery(host, argp->state);
446 nlm_release_host(host);
447 }
448 } else {
449 /* If we run on an NFS server, delete all locks held by the client */
450
451 if ((host = nlm_lookup_host(1, &saddr, prot, vers)) != NULL) {
452 nlmsvc_free_host_resources(host);
453 nlm_release_host(host);
454 }
455 }
456 return rpc_success; 443 return rpc_success;
457} 444}
458 445
@@ -468,7 +455,7 @@ nlm4svc_proc_granted_res(struct svc_rqst *rqstp, struct nlm_res *argp,
468 455
469 dprintk("lockd: GRANTED_RES called\n"); 456 dprintk("lockd: GRANTED_RES called\n");
470 457
471 nlmsvc_grant_reply(rqstp, &argp->cookie, argp->status); 458 nlmsvc_grant_reply(&argp->cookie, argp->status);
472 return rpc_success; 459 return rpc_success;
473} 460}
474 461
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index c9d419703cf3..814c6064c9e0 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -40,7 +40,7 @@
40 40
41static void nlmsvc_release_block(struct nlm_block *block); 41static void nlmsvc_release_block(struct nlm_block *block);
42static void nlmsvc_insert_block(struct nlm_block *block, unsigned long); 42static void nlmsvc_insert_block(struct nlm_block *block, unsigned long);
43static int nlmsvc_remove_block(struct nlm_block *block); 43static void nlmsvc_remove_block(struct nlm_block *block);
44 44
45static int nlmsvc_setgrantargs(struct nlm_rqst *call, struct nlm_lock *lock); 45static int nlmsvc_setgrantargs(struct nlm_rqst *call, struct nlm_lock *lock);
46static void nlmsvc_freegrantargs(struct nlm_rqst *call); 46static void nlmsvc_freegrantargs(struct nlm_rqst *call);
@@ -49,7 +49,7 @@ static const struct rpc_call_ops nlmsvc_grant_ops;
49/* 49/*
50 * The list of blocked locks to retry 50 * The list of blocked locks to retry
51 */ 51 */
52static struct nlm_block * nlm_blocked; 52static LIST_HEAD(nlm_blocked);
53 53
54/* 54/*
55 * Insert a blocked lock into the global list 55 * Insert a blocked lock into the global list
@@ -57,48 +57,44 @@ static struct nlm_block * nlm_blocked;
57static void 57static void
58nlmsvc_insert_block(struct nlm_block *block, unsigned long when) 58nlmsvc_insert_block(struct nlm_block *block, unsigned long when)
59{ 59{
60 struct nlm_block **bp, *b; 60 struct nlm_block *b;
61 struct list_head *pos;
61 62
62 dprintk("lockd: nlmsvc_insert_block(%p, %ld)\n", block, when); 63 dprintk("lockd: nlmsvc_insert_block(%p, %ld)\n", block, when);
63 kref_get(&block->b_count); 64 if (list_empty(&block->b_list)) {
64 if (block->b_queued) 65 kref_get(&block->b_count);
65 nlmsvc_remove_block(block); 66 } else {
66 bp = &nlm_blocked; 67 list_del_init(&block->b_list);
68 }
69
70 pos = &nlm_blocked;
67 if (when != NLM_NEVER) { 71 if (when != NLM_NEVER) {
68 if ((when += jiffies) == NLM_NEVER) 72 if ((when += jiffies) == NLM_NEVER)
69 when ++; 73 when ++;
70 while ((b = *bp) && time_before_eq(b->b_when,when) && b->b_when != NLM_NEVER) 74 list_for_each(pos, &nlm_blocked) {
71 bp = &b->b_next; 75 b = list_entry(pos, struct nlm_block, b_list);
72 } else 76 if (time_after(b->b_when,when) || b->b_when == NLM_NEVER)
73 while ((b = *bp) != 0) 77 break;
74 bp = &b->b_next; 78 }
79 /* On normal exit from the loop, pos == &nlm_blocked,
80 * so we will be adding to the end of the list - good
81 */
82 }
75 83
76 block->b_queued = 1; 84 list_add_tail(&block->b_list, pos);
77 block->b_when = when; 85 block->b_when = when;
78 block->b_next = b;
79 *bp = block;
80} 86}
81 87
82/* 88/*
83 * Remove a block from the global list 89 * Remove a block from the global list
84 */ 90 */
85static int 91static inline void
86nlmsvc_remove_block(struct nlm_block *block) 92nlmsvc_remove_block(struct nlm_block *block)
87{ 93{
88 struct nlm_block **bp, *b; 94 if (!list_empty(&block->b_list)) {
89 95 list_del_init(&block->b_list);
90 if (!block->b_queued) 96 nlmsvc_release_block(block);
91 return 1;
92 for (bp = &nlm_blocked; (b = *bp) != 0; bp = &b->b_next) {
93 if (b == block) {
94 *bp = block->b_next;
95 block->b_queued = 0;
96 nlmsvc_release_block(block);
97 return 1;
98 }
99 } 97 }
100
101 return 0;
102} 98}
103 99
104/* 100/*
@@ -107,14 +103,14 @@ nlmsvc_remove_block(struct nlm_block *block)
107static struct nlm_block * 103static struct nlm_block *
108nlmsvc_lookup_block(struct nlm_file *file, struct nlm_lock *lock) 104nlmsvc_lookup_block(struct nlm_file *file, struct nlm_lock *lock)
109{ 105{
110 struct nlm_block **head, *block; 106 struct nlm_block *block;
111 struct file_lock *fl; 107 struct file_lock *fl;
112 108
113 dprintk("lockd: nlmsvc_lookup_block f=%p pd=%d %Ld-%Ld ty=%d\n", 109 dprintk("lockd: nlmsvc_lookup_block f=%p pd=%d %Ld-%Ld ty=%d\n",
114 file, lock->fl.fl_pid, 110 file, lock->fl.fl_pid,
115 (long long)lock->fl.fl_start, 111 (long long)lock->fl.fl_start,
116 (long long)lock->fl.fl_end, lock->fl.fl_type); 112 (long long)lock->fl.fl_end, lock->fl.fl_type);
117 for (head = &nlm_blocked; (block = *head) != 0; head = &block->b_next) { 113 list_for_each_entry(block, &nlm_blocked, b_list) {
118 fl = &block->b_call->a_args.lock.fl; 114 fl = &block->b_call->a_args.lock.fl;
119 dprintk("lockd: check f=%p pd=%d %Ld-%Ld ty=%d cookie=%s\n", 115 dprintk("lockd: check f=%p pd=%d %Ld-%Ld ty=%d cookie=%s\n",
120 block->b_file, fl->fl_pid, 116 block->b_file, fl->fl_pid,
@@ -143,20 +139,20 @@ static inline int nlm_cookie_match(struct nlm_cookie *a, struct nlm_cookie *b)
143 * Find a block with a given NLM cookie. 139 * Find a block with a given NLM cookie.
144 */ 140 */
145static inline struct nlm_block * 141static inline struct nlm_block *
146nlmsvc_find_block(struct nlm_cookie *cookie, struct sockaddr_in *sin) 142nlmsvc_find_block(struct nlm_cookie *cookie)
147{ 143{
148 struct nlm_block *block; 144 struct nlm_block *block;
149 145
150 for (block = nlm_blocked; block; block = block->b_next) { 146 list_for_each_entry(block, &nlm_blocked, b_list) {
151 dprintk("cookie: head of blocked queue %p, block %p\n", 147 if (nlm_cookie_match(&block->b_call->a_args.cookie,cookie))
152 nlm_blocked, block); 148 goto found;
153 if (nlm_cookie_match(&block->b_call->a_args.cookie,cookie)
154 && nlm_cmp_addr(sin, &block->b_host->h_addr))
155 break;
156 } 149 }
157 150
158 if (block != NULL) 151 return NULL;
159 kref_get(&block->b_count); 152
153found:
154 dprintk("nlmsvc_find_block(%s): block=%p\n", nlmdbg_cookie2a(cookie), block);
155 kref_get(&block->b_count);
160 return block; 156 return block;
161} 157}
162 158
@@ -169,6 +165,11 @@ nlmsvc_find_block(struct nlm_cookie *cookie, struct sockaddr_in *sin)
169 * request, but (as I found out later) that's because some implementations 165 * request, but (as I found out later) that's because some implementations
170 * do just this. Never mind the standards comittees, they support our 166 * do just this. Never mind the standards comittees, they support our
171 * logging industries. 167 * logging industries.
168 *
169 * 10 years later: I hope we can safely ignore these old and broken
170 * clients by now. Let's fix this so we can uniquely identify an incoming
171 * GRANTED_RES message by cookie, without having to rely on the client's IP
172 * address. --okir
172 */ 173 */
173static inline struct nlm_block * 174static inline struct nlm_block *
174nlmsvc_create_block(struct svc_rqst *rqstp, struct nlm_file *file, 175nlmsvc_create_block(struct svc_rqst *rqstp, struct nlm_file *file,
@@ -179,7 +180,7 @@ nlmsvc_create_block(struct svc_rqst *rqstp, struct nlm_file *file,
179 struct nlm_rqst *call = NULL; 180 struct nlm_rqst *call = NULL;
180 181
181 /* Create host handle for callback */ 182 /* Create host handle for callback */
182 host = nlmsvc_lookup_host(rqstp); 183 host = nlmsvc_lookup_host(rqstp, lock->caller, lock->len);
183 if (host == NULL) 184 if (host == NULL)
184 return NULL; 185 return NULL;
185 186
@@ -192,6 +193,8 @@ nlmsvc_create_block(struct svc_rqst *rqstp, struct nlm_file *file,
192 if (block == NULL) 193 if (block == NULL)
193 goto failed; 194 goto failed;
194 kref_init(&block->b_count); 195 kref_init(&block->b_count);
196 INIT_LIST_HEAD(&block->b_list);
197 INIT_LIST_HEAD(&block->b_flist);
195 198
196 if (!nlmsvc_setgrantargs(call, lock)) 199 if (!nlmsvc_setgrantargs(call, lock))
197 goto failed_free; 200 goto failed_free;
@@ -199,7 +202,7 @@ nlmsvc_create_block(struct svc_rqst *rqstp, struct nlm_file *file,
199 /* Set notifier function for VFS, and init args */ 202 /* Set notifier function for VFS, and init args */
200 call->a_args.lock.fl.fl_flags |= FL_SLEEP; 203 call->a_args.lock.fl.fl_flags |= FL_SLEEP;
201 call->a_args.lock.fl.fl_lmops = &nlmsvc_lock_operations; 204 call->a_args.lock.fl.fl_lmops = &nlmsvc_lock_operations;
202 call->a_args.cookie = *cookie; /* see above */ 205 nlmclnt_next_cookie(&call->a_args.cookie);
203 206
204 dprintk("lockd: created block %p...\n", block); 207 dprintk("lockd: created block %p...\n", block);
205 208
@@ -210,8 +213,7 @@ nlmsvc_create_block(struct svc_rqst *rqstp, struct nlm_file *file,
210 file->f_count++; 213 file->f_count++;
211 214
212 /* Add to file's list of blocks */ 215 /* Add to file's list of blocks */
213 block->b_fnext = file->f_blocks; 216 list_add(&block->b_flist, &file->f_blocks);
214 file->f_blocks = block;
215 217
216 /* Set up RPC arguments for callback */ 218 /* Set up RPC arguments for callback */
217 block->b_call = call; 219 block->b_call = call;
@@ -248,19 +250,13 @@ static void nlmsvc_free_block(struct kref *kref)
248{ 250{
249 struct nlm_block *block = container_of(kref, struct nlm_block, b_count); 251 struct nlm_block *block = container_of(kref, struct nlm_block, b_count);
250 struct nlm_file *file = block->b_file; 252 struct nlm_file *file = block->b_file;
251 struct nlm_block **bp;
252 253
253 dprintk("lockd: freeing block %p...\n", block); 254 dprintk("lockd: freeing block %p...\n", block);
254 255
255 down(&file->f_sema);
256 /* Remove block from file's list of blocks */ 256 /* Remove block from file's list of blocks */
257 for (bp = &file->f_blocks; *bp; bp = &(*bp)->b_fnext) { 257 mutex_lock(&file->f_mutex);
258 if (*bp == block) { 258 list_del_init(&block->b_flist);
259 *bp = block->b_fnext; 259 mutex_unlock(&file->f_mutex);
260 break;
261 }
262 }
263 up(&file->f_sema);
264 260
265 nlmsvc_freegrantargs(block->b_call); 261 nlmsvc_freegrantargs(block->b_call);
266 nlm_release_call(block->b_call); 262 nlm_release_call(block->b_call);
@@ -274,47 +270,32 @@ static void nlmsvc_release_block(struct nlm_block *block)
274 kref_put(&block->b_count, nlmsvc_free_block); 270 kref_put(&block->b_count, nlmsvc_free_block);
275} 271}
276 272
277static void nlmsvc_act_mark(struct nlm_host *host, struct nlm_file *file) 273/*
278{ 274 * Loop over all blocks and delete blocks held by
279 struct nlm_block *block; 275 * a matching host.
280 276 */
281 down(&file->f_sema); 277void nlmsvc_traverse_blocks(struct nlm_host *host,
282 for (block = file->f_blocks; block != NULL; block = block->b_fnext) 278 struct nlm_file *file,
283 block->b_host->h_inuse = 1; 279 nlm_host_match_fn_t match)
284 up(&file->f_sema);
285}
286
287static void nlmsvc_act_unlock(struct nlm_host *host, struct nlm_file *file)
288{ 280{
289 struct nlm_block *block; 281 struct nlm_block *block, *next;
290 282
291restart: 283restart:
292 down(&file->f_sema); 284 mutex_lock(&file->f_mutex);
293 for (block = file->f_blocks; block != NULL; block = block->b_fnext) { 285 list_for_each_entry_safe(block, next, &file->f_blocks, b_flist) {
294 if (host != NULL && host != block->b_host) 286 if (!match(block->b_host, host))
295 continue; 287 continue;
296 if (!block->b_queued) 288 /* Do not destroy blocks that are not on
289 * the global retry list - why? */
290 if (list_empty(&block->b_list))
297 continue; 291 continue;
298 kref_get(&block->b_count); 292 kref_get(&block->b_count);
299 up(&file->f_sema); 293 mutex_unlock(&file->f_mutex);
300 nlmsvc_unlink_block(block); 294 nlmsvc_unlink_block(block);
301 nlmsvc_release_block(block); 295 nlmsvc_release_block(block);
302 goto restart; 296 goto restart;
303 } 297 }
304 up(&file->f_sema); 298 mutex_unlock(&file->f_mutex);
305}
306
307/*
308 * Loop over all blocks and perform the action specified.
309 * (NLM_ACT_CHECK handled by nlmsvc_inspect_file).
310 */
311void
312nlmsvc_traverse_blocks(struct nlm_host *host, struct nlm_file *file, int action)
313{
314 if (action == NLM_ACT_MARK)
315 nlmsvc_act_mark(host, file);
316 else
317 nlmsvc_act_unlock(host, file);
318} 299}
319 300
320/* 301/*
@@ -325,7 +306,7 @@ static int nlmsvc_setgrantargs(struct nlm_rqst *call, struct nlm_lock *lock)
325{ 306{
326 locks_copy_lock(&call->a_args.lock.fl, &lock->fl); 307 locks_copy_lock(&call->a_args.lock.fl, &lock->fl);
327 memcpy(&call->a_args.lock.fh, &lock->fh, sizeof(call->a_args.lock.fh)); 308 memcpy(&call->a_args.lock.fh, &lock->fh, sizeof(call->a_args.lock.fh));
328 call->a_args.lock.caller = system_utsname.nodename; 309 call->a_args.lock.caller = utsname()->nodename;
329 call->a_args.lock.oh.len = lock->oh.len; 310 call->a_args.lock.oh.len = lock->oh.len;
330 311
331 /* set default data area */ 312 /* set default data area */
@@ -373,7 +354,7 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
373 lock->fl.fl_flags &= ~FL_SLEEP; 354 lock->fl.fl_flags &= ~FL_SLEEP;
374again: 355again:
375 /* Lock file against concurrent access */ 356 /* Lock file against concurrent access */
376 down(&file->f_sema); 357 mutex_lock(&file->f_mutex);
377 /* Get existing block (in case client is busy-waiting) */ 358 /* Get existing block (in case client is busy-waiting) */
378 block = nlmsvc_lookup_block(file, lock); 359 block = nlmsvc_lookup_block(file, lock);
379 if (block == NULL) { 360 if (block == NULL) {
@@ -411,10 +392,10 @@ again:
411 392
412 /* If we don't have a block, create and initialize it. Then 393 /* If we don't have a block, create and initialize it. Then
413 * retry because we may have slept in kmalloc. */ 394 * retry because we may have slept in kmalloc. */
414 /* We have to release f_sema as nlmsvc_create_block may try to 395 /* We have to release f_mutex as nlmsvc_create_block may try to
415 * to claim it while doing host garbage collection */ 396 * to claim it while doing host garbage collection */
416 if (newblock == NULL) { 397 if (newblock == NULL) {
417 up(&file->f_sema); 398 mutex_unlock(&file->f_mutex);
418 dprintk("lockd: blocking on this lock (allocating).\n"); 399 dprintk("lockd: blocking on this lock (allocating).\n");
419 if (!(newblock = nlmsvc_create_block(rqstp, file, lock, cookie))) 400 if (!(newblock = nlmsvc_create_block(rqstp, file, lock, cookie)))
420 return nlm_lck_denied_nolocks; 401 return nlm_lck_denied_nolocks;
@@ -424,7 +405,7 @@ again:
424 /* Append to list of blocked */ 405 /* Append to list of blocked */
425 nlmsvc_insert_block(newblock, NLM_NEVER); 406 nlmsvc_insert_block(newblock, NLM_NEVER);
426out: 407out:
427 up(&file->f_sema); 408 mutex_unlock(&file->f_mutex);
428 nlmsvc_release_block(newblock); 409 nlmsvc_release_block(newblock);
429 nlmsvc_release_block(block); 410 nlmsvc_release_block(block);
430 dprintk("lockd: nlmsvc_lock returned %u\n", ret); 411 dprintk("lockd: nlmsvc_lock returned %u\n", ret);
@@ -451,6 +432,7 @@ nlmsvc_testlock(struct nlm_file *file, struct nlm_lock *lock,
451 (long long)conflock->fl.fl_start, 432 (long long)conflock->fl.fl_start,
452 (long long)conflock->fl.fl_end); 433 (long long)conflock->fl.fl_end);
453 conflock->caller = "somehost"; /* FIXME */ 434 conflock->caller = "somehost"; /* FIXME */
435 conflock->len = strlen(conflock->caller);
454 conflock->oh.len = 0; /* don't return OH info */ 436 conflock->oh.len = 0; /* don't return OH info */
455 conflock->svid = conflock->fl.fl_pid; 437 conflock->svid = conflock->fl.fl_pid;
456 return nlm_lck_denied; 438 return nlm_lck_denied;
@@ -507,9 +489,9 @@ nlmsvc_cancel_blocked(struct nlm_file *file, struct nlm_lock *lock)
507 (long long)lock->fl.fl_start, 489 (long long)lock->fl.fl_start,
508 (long long)lock->fl.fl_end); 490 (long long)lock->fl.fl_end);
509 491
510 down(&file->f_sema); 492 mutex_lock(&file->f_mutex);
511 block = nlmsvc_lookup_block(file, lock); 493 block = nlmsvc_lookup_block(file, lock);
512 up(&file->f_sema); 494 mutex_unlock(&file->f_mutex);
513 if (block != NULL) { 495 if (block != NULL) {
514 status = nlmsvc_unlink_block(block); 496 status = nlmsvc_unlink_block(block);
515 nlmsvc_release_block(block); 497 nlmsvc_release_block(block);
@@ -527,10 +509,10 @@ nlmsvc_cancel_blocked(struct nlm_file *file, struct nlm_lock *lock)
527static void 509static void
528nlmsvc_notify_blocked(struct file_lock *fl) 510nlmsvc_notify_blocked(struct file_lock *fl)
529{ 511{
530 struct nlm_block **bp, *block; 512 struct nlm_block *block;
531 513
532 dprintk("lockd: VFS unblock notification for block %p\n", fl); 514 dprintk("lockd: VFS unblock notification for block %p\n", fl);
533 for (bp = &nlm_blocked; (block = *bp) != 0; bp = &block->b_next) { 515 list_for_each_entry(block, &nlm_blocked, b_list) {
534 if (nlm_compare_locks(&block->b_call->a_args.lock.fl, fl)) { 516 if (nlm_compare_locks(&block->b_call->a_args.lock.fl, fl)) {
535 nlmsvc_insert_block(block, 0); 517 nlmsvc_insert_block(block, 0);
536 svc_wake_up(block->b_daemon); 518 svc_wake_up(block->b_daemon);
@@ -663,17 +645,14 @@ static const struct rpc_call_ops nlmsvc_grant_ops = {
663 * block. 645 * block.
664 */ 646 */
665void 647void
666nlmsvc_grant_reply(struct svc_rqst *rqstp, struct nlm_cookie *cookie, u32 status) 648nlmsvc_grant_reply(struct nlm_cookie *cookie, u32 status)
667{ 649{
668 struct nlm_block *block; 650 struct nlm_block *block;
669 struct nlm_file *file;
670 651
671 dprintk("grant_reply: looking for cookie %x, host (%08x), s=%d \n", 652 dprintk("grant_reply: looking for cookie %x, s=%d \n",
672 *(unsigned int *)(cookie->data), 653 *(unsigned int *)(cookie->data), status);
673 ntohl(rqstp->rq_addr.sin_addr.s_addr), status); 654 if (!(block = nlmsvc_find_block(cookie)))
674 if (!(block = nlmsvc_find_block(cookie, &rqstp->rq_addr)))
675 return; 655 return;
676 file = block->b_file;
677 656
678 if (block) { 657 if (block) {
679 if (status == NLM_LCK_DENIED_GRACE_PERIOD) { 658 if (status == NLM_LCK_DENIED_GRACE_PERIOD) {
@@ -696,16 +675,19 @@ nlmsvc_grant_reply(struct svc_rqst *rqstp, struct nlm_cookie *cookie, u32 status
696unsigned long 675unsigned long
697nlmsvc_retry_blocked(void) 676nlmsvc_retry_blocked(void)
698{ 677{
699 struct nlm_block *block; 678 unsigned long timeout = MAX_SCHEDULE_TIMEOUT;
679 struct nlm_block *block;
680
681 while (!list_empty(&nlm_blocked)) {
682 block = list_entry(nlm_blocked.next, struct nlm_block, b_list);
700 683
701 dprintk("nlmsvc_retry_blocked(%p, when=%ld)\n",
702 nlm_blocked,
703 nlm_blocked? nlm_blocked->b_when : 0);
704 while ((block = nlm_blocked) != 0) {
705 if (block->b_when == NLM_NEVER) 684 if (block->b_when == NLM_NEVER)
706 break; 685 break;
707 if (time_after(block->b_when,jiffies)) 686 if (time_after(block->b_when,jiffies)) {
687 timeout = block->b_when - jiffies;
708 break; 688 break;
689 }
690
709 dprintk("nlmsvc_retry_blocked(%p, when=%ld)\n", 691 dprintk("nlmsvc_retry_blocked(%p, when=%ld)\n",
710 block, block->b_when); 692 block, block->b_when);
711 kref_get(&block->b_count); 693 kref_get(&block->b_count);
@@ -713,8 +695,5 @@ nlmsvc_retry_blocked(void)
713 nlmsvc_release_block(block); 695 nlmsvc_release_block(block);
714 } 696 }
715 697
716 if ((block = nlm_blocked) && block->b_when != NLM_NEVER) 698 return timeout;
717 return (block->b_when - jiffies);
718
719 return MAX_SCHEDULE_TIMEOUT;
720} 699}
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c
index dbb66a3b5cd9..75b2c81bcb93 100644
--- a/fs/lockd/svcproc.c
+++ b/fs/lockd/svcproc.c
@@ -66,8 +66,8 @@ nlmsvc_retrieve_args(struct svc_rqst *rqstp, struct nlm_args *argp,
66 return nlm_lck_denied_nolocks; 66 return nlm_lck_denied_nolocks;
67 67
68 /* Obtain host handle */ 68 /* Obtain host handle */
69 if (!(host = nlmsvc_lookup_host(rqstp)) 69 if (!(host = nlmsvc_lookup_host(rqstp, lock->caller, lock->len))
70 || (argp->monitor && !host->h_monitored && nsm_monitor(host) < 0)) 70 || (argp->monitor && nsm_monitor(host) < 0))
71 goto no_locks; 71 goto no_locks;
72 *hostp = host; 72 *hostp = host;
73 73
@@ -287,7 +287,9 @@ static int nlmsvc_callback(struct svc_rqst *rqstp, u32 proc, struct nlm_args *ar
287 struct nlm_rqst *call; 287 struct nlm_rqst *call;
288 int stat; 288 int stat;
289 289
290 host = nlmsvc_lookup_host(rqstp); 290 host = nlmsvc_lookup_host(rqstp,
291 argp->lock.caller,
292 argp->lock.len);
291 if (host == NULL) 293 if (host == NULL)
292 return rpc_system_err; 294 return rpc_system_err;
293 295
@@ -449,9 +451,6 @@ nlmsvc_proc_sm_notify(struct svc_rqst *rqstp, struct nlm_reboot *argp,
449 void *resp) 451 void *resp)
450{ 452{
451 struct sockaddr_in saddr = rqstp->rq_addr; 453 struct sockaddr_in saddr = rqstp->rq_addr;
452 int vers = argp->vers;
453 int prot = argp->proto >> 1;
454 struct nlm_host *host;
455 454
456 dprintk("lockd: SM_NOTIFY called\n"); 455 dprintk("lockd: SM_NOTIFY called\n");
457 if (saddr.sin_addr.s_addr != htonl(INADDR_LOOPBACK) 456 if (saddr.sin_addr.s_addr != htonl(INADDR_LOOPBACK)
@@ -466,19 +465,9 @@ nlmsvc_proc_sm_notify(struct svc_rqst *rqstp, struct nlm_reboot *argp,
466 /* Obtain the host pointer for this NFS server and try to 465 /* Obtain the host pointer for this NFS server and try to
467 * reclaim all locks we hold on this server. 466 * reclaim all locks we hold on this server.
468 */ 467 */
468 memset(&saddr, 0, sizeof(saddr));
469 saddr.sin_addr.s_addr = argp->addr; 469 saddr.sin_addr.s_addr = argp->addr;
470 if ((argp->proto & 1)==0) { 470 nlm_host_rebooted(&saddr, argp->mon, argp->len, argp->state);
471 if ((host = nlmclnt_lookup_host(&saddr, prot, vers)) != NULL) {
472 nlmclnt_recovery(host, argp->state);
473 nlm_release_host(host);
474 }
475 } else {
476 /* If we run on an NFS server, delete all locks held by the client */
477 if ((host = nlm_lookup_host(1, &saddr, prot, vers)) != NULL) {
478 nlmsvc_free_host_resources(host);
479 nlm_release_host(host);
480 }
481 }
482 471
483 return rpc_success; 472 return rpc_success;
484} 473}
@@ -495,7 +484,7 @@ nlmsvc_proc_granted_res(struct svc_rqst *rqstp, struct nlm_res *argp,
495 484
496 dprintk("lockd: GRANTED_RES called\n"); 485 dprintk("lockd: GRANTED_RES called\n");
497 486
498 nlmsvc_grant_reply(rqstp, &argp->cookie, argp->status); 487 nlmsvc_grant_reply(&argp->cookie, argp->status);
499 return rpc_success; 488 return rpc_success;
500} 489}
501 490
diff --git a/fs/lockd/svcshare.c b/fs/lockd/svcshare.c
index 27288c83da96..b9926ce8782e 100644
--- a/fs/lockd/svcshare.c
+++ b/fs/lockd/svcshare.c
@@ -85,24 +85,20 @@ nlmsvc_unshare_file(struct nlm_host *host, struct nlm_file *file,
85} 85}
86 86
87/* 87/*
88 * Traverse all shares for a given file (and host). 88 * Traverse all shares for a given file, and delete
89 * NLM_ACT_CHECK is handled by nlmsvc_inspect_file. 89 * those owned by the given (type of) host
90 */ 90 */
91void 91void nlmsvc_traverse_shares(struct nlm_host *host, struct nlm_file *file,
92nlmsvc_traverse_shares(struct nlm_host *host, struct nlm_file *file, int action) 92 nlm_host_match_fn_t match)
93{ 93{
94 struct nlm_share *share, **shpp; 94 struct nlm_share *share, **shpp;
95 95
96 shpp = &file->f_shares; 96 shpp = &file->f_shares;
97 while ((share = *shpp) != NULL) { 97 while ((share = *shpp) != NULL) {
98 if (action == NLM_ACT_MARK) 98 if (match(share->s_host, host)) {
99 share->s_host->h_inuse = 1; 99 *shpp = share->s_next;
100 else if (action == NLM_ACT_UNLOCK) { 100 kfree(share);
101 if (host == NULL || host == share->s_host) { 101 continue;
102 *shpp = share->s_next;
103 kfree(share);
104 continue;
105 }
106 } 102 }
107 shpp = &share->s_next; 103 shpp = &share->s_next;
108 } 104 }
diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c
index a92dd98f8401..514f5f20701e 100644
--- a/fs/lockd/svcsubs.c
+++ b/fs/lockd/svcsubs.c
@@ -25,9 +25,9 @@
25/* 25/*
26 * Global file hash table 26 * Global file hash table
27 */ 27 */
28#define FILE_HASH_BITS 5 28#define FILE_HASH_BITS 7
29#define FILE_NRHASH (1<<FILE_HASH_BITS) 29#define FILE_NRHASH (1<<FILE_HASH_BITS)
30static struct nlm_file * nlm_files[FILE_NRHASH]; 30static struct hlist_head nlm_files[FILE_NRHASH];
31static DEFINE_MUTEX(nlm_file_mutex); 31static DEFINE_MUTEX(nlm_file_mutex);
32 32
33#ifdef NFSD_DEBUG 33#ifdef NFSD_DEBUG
@@ -82,6 +82,7 @@ u32
82nlm_lookup_file(struct svc_rqst *rqstp, struct nlm_file **result, 82nlm_lookup_file(struct svc_rqst *rqstp, struct nlm_file **result,
83 struct nfs_fh *f) 83 struct nfs_fh *f)
84{ 84{
85 struct hlist_node *pos;
85 struct nlm_file *file; 86 struct nlm_file *file;
86 unsigned int hash; 87 unsigned int hash;
87 u32 nfserr; 88 u32 nfserr;
@@ -93,7 +94,7 @@ nlm_lookup_file(struct svc_rqst *rqstp, struct nlm_file **result,
93 /* Lock file table */ 94 /* Lock file table */
94 mutex_lock(&nlm_file_mutex); 95 mutex_lock(&nlm_file_mutex);
95 96
96 for (file = nlm_files[hash]; file; file = file->f_next) 97 hlist_for_each_entry(file, pos, &nlm_files[hash], f_list)
97 if (!nfs_compare_fh(&file->f_handle, f)) 98 if (!nfs_compare_fh(&file->f_handle, f))
98 goto found; 99 goto found;
99 100
@@ -105,8 +106,9 @@ nlm_lookup_file(struct svc_rqst *rqstp, struct nlm_file **result,
105 goto out_unlock; 106 goto out_unlock;
106 107
107 memcpy(&file->f_handle, f, sizeof(struct nfs_fh)); 108 memcpy(&file->f_handle, f, sizeof(struct nfs_fh));
108 file->f_hash = hash; 109 mutex_init(&file->f_mutex);
109 init_MUTEX(&file->f_sema); 110 INIT_HLIST_NODE(&file->f_list);
111 INIT_LIST_HEAD(&file->f_blocks);
110 112
111 /* Open the file. Note that this must not sleep for too long, else 113 /* Open the file. Note that this must not sleep for too long, else
112 * we would lock up lockd:-) So no NFS re-exports, folks. 114 * we would lock up lockd:-) So no NFS re-exports, folks.
@@ -115,12 +117,11 @@ nlm_lookup_file(struct svc_rqst *rqstp, struct nlm_file **result,
115 * the file. 117 * the file.
116 */ 118 */
117 if ((nfserr = nlmsvc_ops->fopen(rqstp, f, &file->f_file)) != 0) { 119 if ((nfserr = nlmsvc_ops->fopen(rqstp, f, &file->f_file)) != 0) {
118 dprintk("lockd: open failed (nfserr %d)\n", ntohl(nfserr)); 120 dprintk("lockd: open failed (error %d)\n", nfserr);
119 goto out_free; 121 goto out_free;
120 } 122 }
121 123
122 file->f_next = nlm_files[hash]; 124 hlist_add_head(&file->f_list, &nlm_files[hash]);
123 nlm_files[hash] = file;
124 125
125found: 126found:
126 dprintk("lockd: found file %p (count %d)\n", file, file->f_count); 127 dprintk("lockd: found file %p (count %d)\n", file, file->f_count);
@@ -149,22 +150,14 @@ out_free:
149static inline void 150static inline void
150nlm_delete_file(struct nlm_file *file) 151nlm_delete_file(struct nlm_file *file)
151{ 152{
152 struct nlm_file **fp, *f;
153
154 nlm_debug_print_file("closing file", file); 153 nlm_debug_print_file("closing file", file);
155 154 if (!hlist_unhashed(&file->f_list)) {
156 fp = nlm_files + file->f_hash; 155 hlist_del(&file->f_list);
157 while ((f = *fp) != NULL) { 156 nlmsvc_ops->fclose(file->f_file);
158 if (f == file) { 157 kfree(file);
159 *fp = file->f_next; 158 } else {
160 nlmsvc_ops->fclose(file->f_file); 159 printk(KERN_WARNING "lockd: attempt to release unknown file!\n");
161 kfree(file);
162 return;
163 }
164 fp = &f->f_next;
165 } 160 }
166
167 printk(KERN_WARNING "lockd: attempt to release unknown file!\n");
168} 161}
169 162
170/* 163/*
@@ -172,7 +165,8 @@ nlm_delete_file(struct nlm_file *file)
172 * action. 165 * action.
173 */ 166 */
174static int 167static int
175nlm_traverse_locks(struct nlm_host *host, struct nlm_file *file, int action) 168nlm_traverse_locks(struct nlm_host *host, struct nlm_file *file,
169 nlm_host_match_fn_t match)
176{ 170{
177 struct inode *inode = nlmsvc_file_inode(file); 171 struct inode *inode = nlmsvc_file_inode(file);
178 struct file_lock *fl; 172 struct file_lock *fl;
@@ -186,17 +180,11 @@ again:
186 180
187 /* update current lock count */ 181 /* update current lock count */
188 file->f_locks++; 182 file->f_locks++;
183
189 lockhost = (struct nlm_host *) fl->fl_owner; 184 lockhost = (struct nlm_host *) fl->fl_owner;
190 if (action == NLM_ACT_MARK) 185 if (match(lockhost, host)) {
191 lockhost->h_inuse = 1;
192 else if (action == NLM_ACT_CHECK)
193 return 1;
194 else if (action == NLM_ACT_UNLOCK) {
195 struct file_lock lock = *fl; 186 struct file_lock lock = *fl;
196 187
197 if (host && lockhost != host)
198 continue;
199
200 lock.fl_type = F_UNLCK; 188 lock.fl_type = F_UNLCK;
201 lock.fl_start = 0; 189 lock.fl_start = 0;
202 lock.fl_end = OFFSET_MAX; 190 lock.fl_end = OFFSET_MAX;
@@ -213,53 +201,66 @@ again:
213} 201}
214 202
215/* 203/*
216 * Operate on a single file 204 * Inspect a single file
217 */ 205 */
218static inline int 206static inline int
219nlm_inspect_file(struct nlm_host *host, struct nlm_file *file, int action) 207nlm_inspect_file(struct nlm_host *host, struct nlm_file *file, nlm_host_match_fn_t match)
220{ 208{
221 if (action == NLM_ACT_CHECK) { 209 nlmsvc_traverse_blocks(host, file, match);
222 /* Fast path for mark and sweep garbage collection */ 210 nlmsvc_traverse_shares(host, file, match);
223 if (file->f_count || file->f_blocks || file->f_shares) 211 return nlm_traverse_locks(host, file, match);
212}
213
214/*
215 * Quick check whether there are still any locks, blocks or
216 * shares on a given file.
217 */
218static inline int
219nlm_file_inuse(struct nlm_file *file)
220{
221 struct inode *inode = nlmsvc_file_inode(file);
222 struct file_lock *fl;
223
224 if (file->f_count || !list_empty(&file->f_blocks) || file->f_shares)
225 return 1;
226
227 for (fl = inode->i_flock; fl; fl = fl->fl_next) {
228 if (fl->fl_lmops == &nlmsvc_lock_operations)
224 return 1; 229 return 1;
225 } else {
226 nlmsvc_traverse_blocks(host, file, action);
227 nlmsvc_traverse_shares(host, file, action);
228 } 230 }
229 return nlm_traverse_locks(host, file, action); 231 file->f_locks = 0;
232 return 0;
230} 233}
231 234
232/* 235/*
233 * Loop over all files in the file table. 236 * Loop over all files in the file table.
234 */ 237 */
235static int 238static int
236nlm_traverse_files(struct nlm_host *host, int action) 239nlm_traverse_files(struct nlm_host *host, nlm_host_match_fn_t match)
237{ 240{
238 struct nlm_file *file, **fp; 241 struct hlist_node *pos, *next;
242 struct nlm_file *file;
239 int i, ret = 0; 243 int i, ret = 0;
240 244
241 mutex_lock(&nlm_file_mutex); 245 mutex_lock(&nlm_file_mutex);
242 for (i = 0; i < FILE_NRHASH; i++) { 246 for (i = 0; i < FILE_NRHASH; i++) {
243 fp = nlm_files + i; 247 hlist_for_each_entry_safe(file, pos, next, &nlm_files[i], f_list) {
244 while ((file = *fp) != NULL) {
245 file->f_count++; 248 file->f_count++;
246 mutex_unlock(&nlm_file_mutex); 249 mutex_unlock(&nlm_file_mutex);
247 250
248 /* Traverse locks, blocks and shares of this file 251 /* Traverse locks, blocks and shares of this file
249 * and update file->f_locks count */ 252 * and update file->f_locks count */
250 if (nlm_inspect_file(host, file, action)) 253 if (nlm_inspect_file(host, file, match))
251 ret = 1; 254 ret = 1;
252 255
253 mutex_lock(&nlm_file_mutex); 256 mutex_lock(&nlm_file_mutex);
254 file->f_count--; 257 file->f_count--;
255 /* No more references to this file. Let go of it. */ 258 /* No more references to this file. Let go of it. */
256 if (!file->f_blocks && !file->f_locks 259 if (list_empty(&file->f_blocks) && !file->f_locks
257 && !file->f_shares && !file->f_count) { 260 && !file->f_shares && !file->f_count) {
258 *fp = file->f_next; 261 hlist_del(&file->f_list);
259 nlmsvc_ops->fclose(file->f_file); 262 nlmsvc_ops->fclose(file->f_file);
260 kfree(file); 263 kfree(file);
261 } else {
262 fp = &file->f_next;
263 } 264 }
264 } 265 }
265 } 266 }
@@ -286,23 +287,54 @@ nlm_release_file(struct nlm_file *file)
286 mutex_lock(&nlm_file_mutex); 287 mutex_lock(&nlm_file_mutex);
287 288
288 /* If there are no more locks etc, delete the file */ 289 /* If there are no more locks etc, delete the file */
289 if(--file->f_count == 0) { 290 if (--file->f_count == 0 && !nlm_file_inuse(file))
290 if(!nlm_inspect_file(NULL, file, NLM_ACT_CHECK)) 291 nlm_delete_file(file);
291 nlm_delete_file(file);
292 }
293 292
294 mutex_unlock(&nlm_file_mutex); 293 mutex_unlock(&nlm_file_mutex);
295} 294}
296 295
297/* 296/*
297 * Helpers function for resource traversal
298 *
299 * nlmsvc_mark_host:
300 * used by the garbage collector; simply sets h_inuse.
301 * Always returns 0.
302 *
303 * nlmsvc_same_host:
304 * returns 1 iff the two hosts match. Used to release
305 * all resources bound to a specific host.
306 *
307 * nlmsvc_is_client:
308 * returns 1 iff the host is a client.
309 * Used by nlmsvc_invalidate_all
310 */
311static int
312nlmsvc_mark_host(struct nlm_host *host, struct nlm_host *dummy)
313{
314 host->h_inuse = 1;
315 return 0;
316}
317
318static int
319nlmsvc_same_host(struct nlm_host *host, struct nlm_host *other)
320{
321 return host == other;
322}
323
324static int
325nlmsvc_is_client(struct nlm_host *host, struct nlm_host *dummy)
326{
327 return host->h_server;
328}
329
330/*
298 * Mark all hosts that still hold resources 331 * Mark all hosts that still hold resources
299 */ 332 */
300void 333void
301nlmsvc_mark_resources(void) 334nlmsvc_mark_resources(void)
302{ 335{
303 dprintk("lockd: nlmsvc_mark_resources\n"); 336 dprintk("lockd: nlmsvc_mark_resources\n");
304 337 nlm_traverse_files(NULL, nlmsvc_mark_host);
305 nlm_traverse_files(NULL, NLM_ACT_MARK);
306} 338}
307 339
308/* 340/*
@@ -313,23 +345,25 @@ nlmsvc_free_host_resources(struct nlm_host *host)
313{ 345{
314 dprintk("lockd: nlmsvc_free_host_resources\n"); 346 dprintk("lockd: nlmsvc_free_host_resources\n");
315 347
316 if (nlm_traverse_files(host, NLM_ACT_UNLOCK)) 348 if (nlm_traverse_files(host, nlmsvc_same_host)) {
317 printk(KERN_WARNING 349 printk(KERN_WARNING
318 "lockd: couldn't remove all locks held by %s", 350 "lockd: couldn't remove all locks held by %s\n",
319 host->h_name); 351 host->h_name);
352 BUG();
353 }
320} 354}
321 355
322/* 356/*
323 * delete all hosts structs for clients 357 * Remove all locks held for clients
324 */ 358 */
325void 359void
326nlmsvc_invalidate_all(void) 360nlmsvc_invalidate_all(void)
327{ 361{
328 struct nlm_host *host; 362 /* Release all locks held by NFS clients.
329 while ((host = nlm_find_client()) != NULL) { 363 * Previously, the code would call
330 nlmsvc_free_host_resources(host); 364 * nlmsvc_free_host_resources for each client in
331 host->h_expires = 0; 365 * turn, which is about as inefficient as it gets.
332 host->h_killed = 1; 366 * Now we just do it once in nlm_traverse_files.
333 nlm_release_host(host); 367 */
334 } 368 nlm_traverse_files(NULL, nlmsvc_is_client);
335} 369}
diff --git a/fs/lockd/xdr.c b/fs/lockd/xdr.c
index 033ea4ac2c30..61c46facf257 100644
--- a/fs/lockd/xdr.c
+++ b/fs/lockd/xdr.c
@@ -515,7 +515,7 @@ nlmclt_decode_res(struct rpc_rqst *req, u32 *p, struct nlm_res *resp)
515 */ 515 */
516#define NLM_void_sz 0 516#define NLM_void_sz 0
517#define NLM_cookie_sz 1+XDR_QUADLEN(NLM_MAXCOOKIELEN) 517#define NLM_cookie_sz 1+XDR_QUADLEN(NLM_MAXCOOKIELEN)
518#define NLM_caller_sz 1+XDR_QUADLEN(sizeof(system_utsname.nodename)) 518#define NLM_caller_sz 1+XDR_QUADLEN(sizeof(utsname()->nodename))
519#define NLM_netobj_sz 1+XDR_QUADLEN(XDR_MAX_NETOBJ) 519#define NLM_netobj_sz 1+XDR_QUADLEN(XDR_MAX_NETOBJ)
520/* #define NLM_owner_sz 1+XDR_QUADLEN(NLM_MAXOWNER) */ 520/* #define NLM_owner_sz 1+XDR_QUADLEN(NLM_MAXOWNER) */
521#define NLM_fhandle_sz 1+XDR_QUADLEN(NFS2_FHSIZE) 521#define NLM_fhandle_sz 1+XDR_QUADLEN(NFS2_FHSIZE)
diff --git a/fs/locks.c b/fs/locks.c
index 21dfadfca2bc..e0b6a80649a0 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -1514,7 +1514,7 @@ int fcntl_setlease(unsigned int fd, struct file *filp, long arg)
1514 goto out_unlock; 1514 goto out_unlock;
1515 } 1515 }
1516 1516
1517 error = f_setown(filp, current->pid, 0); 1517 error = __f_setown(filp, task_pid(current), PIDTYPE_PID, 0);
1518out_unlock: 1518out_unlock:
1519 unlock_kernel(); 1519 unlock_kernel();
1520 return error; 1520 return error;
diff --git a/fs/namespace.c b/fs/namespace.c
index 66d921e14fee..55442a6cf221 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -133,7 +133,7 @@ struct vfsmount *lookup_mnt(struct vfsmount *mnt, struct dentry *dentry)
133 133
134static inline int check_mnt(struct vfsmount *mnt) 134static inline int check_mnt(struct vfsmount *mnt)
135{ 135{
136 return mnt->mnt_namespace == current->namespace; 136 return mnt->mnt_namespace == current->nsproxy->namespace;
137} 137}
138 138
139static void touch_namespace(struct namespace *ns) 139static void touch_namespace(struct namespace *ns)
@@ -830,7 +830,7 @@ static int attach_recursive_mnt(struct vfsmount *source_mnt,
830 if (parent_nd) { 830 if (parent_nd) {
831 detach_mnt(source_mnt, parent_nd); 831 detach_mnt(source_mnt, parent_nd);
832 attach_mnt(source_mnt, nd); 832 attach_mnt(source_mnt, nd);
833 touch_namespace(current->namespace); 833 touch_namespace(current->nsproxy->namespace);
834 } else { 834 } else {
835 mnt_set_mountpoint(dest_mnt, dest_dentry, source_mnt); 835 mnt_set_mountpoint(dest_mnt, dest_dentry, source_mnt);
836 commit_tree(source_mnt); 836 commit_tree(source_mnt);
@@ -1441,7 +1441,7 @@ dput_out:
1441 */ 1441 */
1442struct namespace *dup_namespace(struct task_struct *tsk, struct fs_struct *fs) 1442struct namespace *dup_namespace(struct task_struct *tsk, struct fs_struct *fs)
1443{ 1443{
1444 struct namespace *namespace = tsk->namespace; 1444 struct namespace *namespace = tsk->nsproxy->namespace;
1445 struct namespace *new_ns; 1445 struct namespace *new_ns;
1446 struct vfsmount *rootmnt = NULL, *pwdmnt = NULL, *altrootmnt = NULL; 1446 struct vfsmount *rootmnt = NULL, *pwdmnt = NULL, *altrootmnt = NULL;
1447 struct vfsmount *p, *q; 1447 struct vfsmount *p, *q;
@@ -1508,7 +1508,7 @@ struct namespace *dup_namespace(struct task_struct *tsk, struct fs_struct *fs)
1508 1508
1509int copy_namespace(int flags, struct task_struct *tsk) 1509int copy_namespace(int flags, struct task_struct *tsk)
1510{ 1510{
1511 struct namespace *namespace = tsk->namespace; 1511 struct namespace *namespace = tsk->nsproxy->namespace;
1512 struct namespace *new_ns; 1512 struct namespace *new_ns;
1513 int err = 0; 1513 int err = 0;
1514 1514
@@ -1531,7 +1531,7 @@ int copy_namespace(int flags, struct task_struct *tsk)
1531 goto out; 1531 goto out;
1532 } 1532 }
1533 1533
1534 tsk->namespace = new_ns; 1534 tsk->nsproxy->namespace = new_ns;
1535 1535
1536out: 1536out:
1537 put_namespace(namespace); 1537 put_namespace(namespace);
@@ -1754,7 +1754,7 @@ asmlinkage long sys_pivot_root(const char __user * new_root,
1754 detach_mnt(user_nd.mnt, &root_parent); 1754 detach_mnt(user_nd.mnt, &root_parent);
1755 attach_mnt(user_nd.mnt, &old_nd); /* mount old root on put_old */ 1755 attach_mnt(user_nd.mnt, &old_nd); /* mount old root on put_old */
1756 attach_mnt(new_nd.mnt, &root_parent); /* mount new_root on / */ 1756 attach_mnt(new_nd.mnt, &root_parent); /* mount new_root on / */
1757 touch_namespace(current->namespace); 1757 touch_namespace(current->nsproxy->namespace);
1758 spin_unlock(&vfsmount_lock); 1758 spin_unlock(&vfsmount_lock);
1759 chroot_fs_refs(&user_nd, &new_nd); 1759 chroot_fs_refs(&user_nd, &new_nd);
1760 security_sb_post_pivotroot(&user_nd, &new_nd); 1760 security_sb_post_pivotroot(&user_nd, &new_nd);
@@ -1780,7 +1780,6 @@ static void __init init_mount_tree(void)
1780{ 1780{
1781 struct vfsmount *mnt; 1781 struct vfsmount *mnt;
1782 struct namespace *namespace; 1782 struct namespace *namespace;
1783 struct task_struct *g, *p;
1784 1783
1785 mnt = do_kern_mount("rootfs", 0, "rootfs", NULL); 1784 mnt = do_kern_mount("rootfs", 0, "rootfs", NULL);
1786 if (IS_ERR(mnt)) 1785 if (IS_ERR(mnt))
@@ -1796,13 +1795,8 @@ static void __init init_mount_tree(void)
1796 namespace->root = mnt; 1795 namespace->root = mnt;
1797 mnt->mnt_namespace = namespace; 1796 mnt->mnt_namespace = namespace;
1798 1797
1799 init_task.namespace = namespace; 1798 init_task.nsproxy->namespace = namespace;
1800 read_lock(&tasklist_lock); 1799 get_namespace(namespace);
1801 do_each_thread(g, p) {
1802 get_namespace(namespace);
1803 p->namespace = namespace;
1804 } while_each_thread(g, p);
1805 read_unlock(&tasklist_lock);
1806 1800
1807 set_fs_pwd(current->fs, namespace->root, namespace->root->mnt_root); 1801 set_fs_pwd(current->fs, namespace->root, namespace->root->mnt_root);
1808 set_fs_root(current->fs, namespace->root, namespace->root->mnt_root); 1802 set_fs_root(current->fs, namespace->root, namespace->root->mnt_root);
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index a3ee11364db0..7933e2e99dbc 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -58,7 +58,6 @@ module_param_call(callback_tcpport, param_set_port, param_get_int,
58 */ 58 */
59static void nfs_callback_svc(struct svc_rqst *rqstp) 59static void nfs_callback_svc(struct svc_rqst *rqstp)
60{ 60{
61 struct svc_serv *serv = rqstp->rq_server;
62 int err; 61 int err;
63 62
64 __module_get(THIS_MODULE); 63 __module_get(THIS_MODULE);
@@ -80,7 +79,7 @@ static void nfs_callback_svc(struct svc_rqst *rqstp)
80 /* 79 /*
81 * Listen for a request on the socket 80 * Listen for a request on the socket
82 */ 81 */
83 err = svc_recv(serv, rqstp, MAX_SCHEDULE_TIMEOUT); 82 err = svc_recv(rqstp, MAX_SCHEDULE_TIMEOUT);
84 if (err == -EAGAIN || err == -EINTR) 83 if (err == -EAGAIN || err == -EINTR)
85 continue; 84 continue;
86 if (err < 0) { 85 if (err < 0) {
@@ -91,7 +90,7 @@ static void nfs_callback_svc(struct svc_rqst *rqstp)
91 } 90 }
92 dprintk("%s: request from %u.%u.%u.%u\n", __FUNCTION__, 91 dprintk("%s: request from %u.%u.%u.%u\n", __FUNCTION__,
93 NIPQUAD(rqstp->rq_addr.sin_addr.s_addr)); 92 NIPQUAD(rqstp->rq_addr.sin_addr.s_addr));
94 svc_process(serv, rqstp); 93 svc_process(rqstp);
95 } 94 }
96 95
97 svc_exit_thread(rqstp); 96 svc_exit_thread(rqstp);
@@ -116,7 +115,7 @@ int nfs_callback_up(void)
116 goto out; 115 goto out;
117 init_completion(&nfs_callback_info.started); 116 init_completion(&nfs_callback_info.started);
118 init_completion(&nfs_callback_info.stopped); 117 init_completion(&nfs_callback_info.stopped);
119 serv = svc_create(&nfs4_callback_program, NFS4_CALLBACK_BUFSIZE); 118 serv = svc_create(&nfs4_callback_program, NFS4_CALLBACK_BUFSIZE, NULL);
120 ret = -ENOMEM; 119 ret = -ENOMEM;
121 if (!serv) 120 if (!serv)
122 goto out_err; 121 goto out_err;
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index ec1938d4b814..8106f3b29e4a 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -460,7 +460,8 @@ static int nfs_start_lockd(struct nfs_server *server)
460 goto out; 460 goto out;
461 if (server->flags & NFS_MOUNT_NONLM) 461 if (server->flags & NFS_MOUNT_NONLM)
462 goto out; 462 goto out;
463 error = lockd_up(); 463 error = lockd_up((server->flags & NFS_MOUNT_TCP) ?
464 IPPROTO_TCP : IPPROTO_UDP);
464 if (error < 0) 465 if (error < 0)
465 server->flags |= NFS_MOUNT_NONLM; 466 server->flags |= NFS_MOUNT_NONLM;
466 else 467 else
diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c
index c0a754ecdee6..1d656a645199 100644
--- a/fs/nfs/nfsroot.c
+++ b/fs/nfs/nfsroot.c
@@ -312,7 +312,7 @@ static int __init root_nfs_name(char *name)
312 /* Override them by options set on kernel command-line */ 312 /* Override them by options set on kernel command-line */
313 root_nfs_parse(name, buf); 313 root_nfs_parse(name, buf);
314 314
315 cp = system_utsname.nodename; 315 cp = utsname()->nodename;
316 if (strlen(buf) + strlen(cp) > NFS_MAXPATHLEN) { 316 if (strlen(buf) + strlen(cp) > NFS_MAXPATHLEN) {
317 printk(KERN_ERR "Root-NFS: Pathname for remote directory too long.\n"); 317 printk(KERN_ERR "Root-NFS: Pathname for remote directory too long.\n");
318 return -1; 318 return -1;
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index 01bc68c628ad..e13fa23bd108 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -319,12 +319,25 @@ svc_expkey_update(struct svc_expkey *new, struct svc_expkey *old)
319 319
320static struct cache_head *export_table[EXPORT_HASHMAX]; 320static struct cache_head *export_table[EXPORT_HASHMAX];
321 321
322static void nfsd4_fslocs_free(struct nfsd4_fs_locations *fsloc)
323{
324 int i;
325
326 for (i = 0; i < fsloc->locations_count; i++) {
327 kfree(fsloc->locations[i].path);
328 kfree(fsloc->locations[i].hosts);
329 }
330 kfree(fsloc->locations);
331}
332
322static void svc_export_put(struct kref *ref) 333static void svc_export_put(struct kref *ref)
323{ 334{
324 struct svc_export *exp = container_of(ref, struct svc_export, h.ref); 335 struct svc_export *exp = container_of(ref, struct svc_export, h.ref);
325 dput(exp->ex_dentry); 336 dput(exp->ex_dentry);
326 mntput(exp->ex_mnt); 337 mntput(exp->ex_mnt);
327 auth_domain_put(exp->ex_client); 338 auth_domain_put(exp->ex_client);
339 kfree(exp->ex_path);
340 nfsd4_fslocs_free(&exp->ex_fslocs);
328 kfree(exp); 341 kfree(exp);
329} 342}
330 343
@@ -370,7 +383,7 @@ static int check_export(struct inode *inode, int flags)
370 */ 383 */
371 if (!(inode->i_sb->s_type->fs_flags & FS_REQUIRES_DEV) && 384 if (!(inode->i_sb->s_type->fs_flags & FS_REQUIRES_DEV) &&
372 !(flags & NFSEXP_FSID)) { 385 !(flags & NFSEXP_FSID)) {
373 dprintk("exp_export: export of non-dev fs without fsid"); 386 dprintk("exp_export: export of non-dev fs without fsid\n");
374 return -EINVAL; 387 return -EINVAL;
375 } 388 }
376 if (!inode->i_sb->s_export_op) { 389 if (!inode->i_sb->s_export_op) {
@@ -386,6 +399,69 @@ static int check_export(struct inode *inode, int flags)
386 399
387} 400}
388 401
402#ifdef CONFIG_NFSD_V4
403
404static int
405fsloc_parse(char **mesg, char *buf, struct nfsd4_fs_locations *fsloc)
406{
407 int len;
408 int migrated, i, err;
409
410 len = qword_get(mesg, buf, PAGE_SIZE);
411 if (len != 5 || memcmp(buf, "fsloc", 5))
412 return 0;
413
414 /* listsize */
415 err = get_int(mesg, &fsloc->locations_count);
416 if (err)
417 return err;
418 if (fsloc->locations_count > MAX_FS_LOCATIONS)
419 return -EINVAL;
420 if (fsloc->locations_count == 0)
421 return 0;
422
423 fsloc->locations = kzalloc(fsloc->locations_count
424 * sizeof(struct nfsd4_fs_location), GFP_KERNEL);
425 if (!fsloc->locations)
426 return -ENOMEM;
427 for (i=0; i < fsloc->locations_count; i++) {
428 /* colon separated host list */
429 err = -EINVAL;
430 len = qword_get(mesg, buf, PAGE_SIZE);
431 if (len <= 0)
432 goto out_free_all;
433 err = -ENOMEM;
434 fsloc->locations[i].hosts = kstrdup(buf, GFP_KERNEL);
435 if (!fsloc->locations[i].hosts)
436 goto out_free_all;
437 err = -EINVAL;
438 /* slash separated path component list */
439 len = qword_get(mesg, buf, PAGE_SIZE);
440 if (len <= 0)
441 goto out_free_all;
442 err = -ENOMEM;
443 fsloc->locations[i].path = kstrdup(buf, GFP_KERNEL);
444 if (!fsloc->locations[i].path)
445 goto out_free_all;
446 }
447 /* migrated */
448 err = get_int(mesg, &migrated);
449 if (err)
450 goto out_free_all;
451 err = -EINVAL;
452 if (migrated < 0 || migrated > 1)
453 goto out_free_all;
454 fsloc->migrated = migrated;
455 return 0;
456out_free_all:
457 nfsd4_fslocs_free(fsloc);
458 return err;
459}
460
461#else /* CONFIG_NFSD_V4 */
462static inline int fsloc_parse(char **mesg, char *buf, struct nfsd4_fs_locations *fsloc) { return 0; }
463#endif
464
389static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen) 465static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
390{ 466{
391 /* client path expiry [flags anonuid anongid fsid] */ 467 /* client path expiry [flags anonuid anongid fsid] */
@@ -398,6 +474,7 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
398 int an_int; 474 int an_int;
399 475
400 nd.dentry = NULL; 476 nd.dentry = NULL;
477 exp.ex_path = NULL;
401 478
402 if (mesg[mlen-1] != '\n') 479 if (mesg[mlen-1] != '\n')
403 return -EINVAL; 480 return -EINVAL;
@@ -428,6 +505,10 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
428 exp.ex_client = dom; 505 exp.ex_client = dom;
429 exp.ex_mnt = nd.mnt; 506 exp.ex_mnt = nd.mnt;
430 exp.ex_dentry = nd.dentry; 507 exp.ex_dentry = nd.dentry;
508 exp.ex_path = kstrdup(buf, GFP_KERNEL);
509 err = -ENOMEM;
510 if (!exp.ex_path)
511 goto out;
431 512
432 /* expiry */ 513 /* expiry */
433 err = -EINVAL; 514 err = -EINVAL;
@@ -435,6 +516,11 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
435 if (exp.h.expiry_time == 0) 516 if (exp.h.expiry_time == 0)
436 goto out; 517 goto out;
437 518
519 /* fs locations */
520 exp.ex_fslocs.locations = NULL;
521 exp.ex_fslocs.locations_count = 0;
522 exp.ex_fslocs.migrated = 0;
523
438 /* flags */ 524 /* flags */
439 err = get_int(&mesg, &an_int); 525 err = get_int(&mesg, &an_int);
440 if (err == -ENOENT) 526 if (err == -ENOENT)
@@ -460,6 +546,10 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
460 546
461 err = check_export(nd.dentry->d_inode, exp.ex_flags); 547 err = check_export(nd.dentry->d_inode, exp.ex_flags);
462 if (err) goto out; 548 if (err) goto out;
549
550 err = fsloc_parse(&mesg, buf, &exp.ex_fslocs);
551 if (err)
552 goto out;
463 } 553 }
464 554
465 expp = svc_export_lookup(&exp); 555 expp = svc_export_lookup(&exp);
@@ -473,6 +563,7 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
473 else 563 else
474 exp_put(expp); 564 exp_put(expp);
475 out: 565 out:
566 kfree(exp.ex_path);
476 if (nd.dentry) 567 if (nd.dentry)
477 path_release(&nd); 568 path_release(&nd);
478 out_no_path: 569 out_no_path:
@@ -482,7 +573,8 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
482 return err; 573 return err;
483} 574}
484 575
485static void exp_flags(struct seq_file *m, int flag, int fsid, uid_t anonu, uid_t anong); 576static void exp_flags(struct seq_file *m, int flag, int fsid,
577 uid_t anonu, uid_t anong, struct nfsd4_fs_locations *fslocs);
486 578
487static int svc_export_show(struct seq_file *m, 579static int svc_export_show(struct seq_file *m,
488 struct cache_detail *cd, 580 struct cache_detail *cd,
@@ -501,8 +593,8 @@ static int svc_export_show(struct seq_file *m,
501 seq_putc(m, '('); 593 seq_putc(m, '(');
502 if (test_bit(CACHE_VALID, &h->flags) && 594 if (test_bit(CACHE_VALID, &h->flags) &&
503 !test_bit(CACHE_NEGATIVE, &h->flags)) 595 !test_bit(CACHE_NEGATIVE, &h->flags))
504 exp_flags(m, exp->ex_flags, exp->ex_fsid, 596 exp_flags(m, exp->ex_flags, exp->ex_fsid,
505 exp->ex_anon_uid, exp->ex_anon_gid); 597 exp->ex_anon_uid, exp->ex_anon_gid, &exp->ex_fslocs);
506 seq_puts(m, ")\n"); 598 seq_puts(m, ")\n");
507 return 0; 599 return 0;
508} 600}
@@ -524,6 +616,10 @@ static void svc_export_init(struct cache_head *cnew, struct cache_head *citem)
524 new->ex_client = item->ex_client; 616 new->ex_client = item->ex_client;
525 new->ex_dentry = dget(item->ex_dentry); 617 new->ex_dentry = dget(item->ex_dentry);
526 new->ex_mnt = mntget(item->ex_mnt); 618 new->ex_mnt = mntget(item->ex_mnt);
619 new->ex_path = NULL;
620 new->ex_fslocs.locations = NULL;
621 new->ex_fslocs.locations_count = 0;
622 new->ex_fslocs.migrated = 0;
527} 623}
528 624
529static void export_update(struct cache_head *cnew, struct cache_head *citem) 625static void export_update(struct cache_head *cnew, struct cache_head *citem)
@@ -535,6 +631,14 @@ static void export_update(struct cache_head *cnew, struct cache_head *citem)
535 new->ex_anon_uid = item->ex_anon_uid; 631 new->ex_anon_uid = item->ex_anon_uid;
536 new->ex_anon_gid = item->ex_anon_gid; 632 new->ex_anon_gid = item->ex_anon_gid;
537 new->ex_fsid = item->ex_fsid; 633 new->ex_fsid = item->ex_fsid;
634 new->ex_path = item->ex_path;
635 item->ex_path = NULL;
636 new->ex_fslocs.locations = item->ex_fslocs.locations;
637 item->ex_fslocs.locations = NULL;
638 new->ex_fslocs.locations_count = item->ex_fslocs.locations_count;
639 item->ex_fslocs.locations_count = 0;
640 new->ex_fslocs.migrated = item->ex_fslocs.migrated;
641 item->ex_fslocs.migrated = 0;
538} 642}
539 643
540static struct cache_head *svc_export_alloc(void) 644static struct cache_head *svc_export_alloc(void)
@@ -1048,36 +1152,28 @@ int
1048exp_pseudoroot(struct auth_domain *clp, struct svc_fh *fhp, 1152exp_pseudoroot(struct auth_domain *clp, struct svc_fh *fhp,
1049 struct cache_req *creq) 1153 struct cache_req *creq)
1050{ 1154{
1051 struct svc_expkey *fsid_key;
1052 struct svc_export *exp; 1155 struct svc_export *exp;
1053 int rv; 1156 int rv;
1054 u32 fsidv[2]; 1157 u32 fsidv[2];
1055 1158
1056 mk_fsid_v1(fsidv, 0); 1159 mk_fsid_v1(fsidv, 0);
1057 1160
1058 fsid_key = exp_find_key(clp, 1, fsidv, creq); 1161 exp = exp_find(clp, 1, fsidv, creq);
1059 if (IS_ERR(fsid_key) && PTR_ERR(fsid_key) == -EAGAIN) 1162 if (IS_ERR(exp) && PTR_ERR(exp) == -EAGAIN)
1060 return nfserr_dropit; 1163 return nfserr_dropit;
1061 if (!fsid_key || IS_ERR(fsid_key))
1062 return nfserr_perm;
1063
1064 exp = exp_get_by_name(clp, fsid_key->ek_mnt, fsid_key->ek_dentry, creq);
1065 if (exp == NULL) 1164 if (exp == NULL)
1066 rv = nfserr_perm; 1165 return nfserr_perm;
1067 else if (IS_ERR(exp)) 1166 else if (IS_ERR(exp))
1068 rv = nfserrno(PTR_ERR(exp)); 1167 return nfserrno(PTR_ERR(exp));
1069 else { 1168 rv = fh_compose(fhp, exp, exp->ex_dentry, NULL);
1070 rv = fh_compose(fhp, exp, 1169 exp_put(exp);
1071 fsid_key->ek_dentry, NULL);
1072 exp_put(exp);
1073 }
1074 cache_put(&fsid_key->h, &svc_expkey_cache);
1075 return rv; 1170 return rv;
1076} 1171}
1077 1172
1078/* Iterator */ 1173/* Iterator */
1079 1174
1080static void *e_start(struct seq_file *m, loff_t *pos) 1175static void *e_start(struct seq_file *m, loff_t *pos)
1176 __acquires(svc_export_cache.hash_lock)
1081{ 1177{
1082 loff_t n = *pos; 1178 loff_t n = *pos;
1083 unsigned hash, export; 1179 unsigned hash, export;
@@ -1086,7 +1182,7 @@ static void *e_start(struct seq_file *m, loff_t *pos)
1086 exp_readlock(); 1182 exp_readlock();
1087 read_lock(&svc_export_cache.hash_lock); 1183 read_lock(&svc_export_cache.hash_lock);
1088 if (!n--) 1184 if (!n--)
1089 return (void *)1; 1185 return SEQ_START_TOKEN;
1090 hash = n >> 32; 1186 hash = n >> 32;
1091 export = n & ((1LL<<32) - 1); 1187 export = n & ((1LL<<32) - 1);
1092 1188
@@ -1110,7 +1206,7 @@ static void *e_next(struct seq_file *m, void *p, loff_t *pos)
1110 struct cache_head *ch = p; 1206 struct cache_head *ch = p;
1111 int hash = (*pos >> 32); 1207 int hash = (*pos >> 32);
1112 1208
1113 if (p == (void *)1) 1209 if (p == SEQ_START_TOKEN)
1114 hash = 0; 1210 hash = 0;
1115 else if (ch->next == NULL) { 1211 else if (ch->next == NULL) {
1116 hash++; 1212 hash++;
@@ -1131,6 +1227,7 @@ static void *e_next(struct seq_file *m, void *p, loff_t *pos)
1131} 1227}
1132 1228
1133static void e_stop(struct seq_file *m, void *p) 1229static void e_stop(struct seq_file *m, void *p)
1230 __releases(svc_export_cache.hash_lock)
1134{ 1231{
1135 read_unlock(&svc_export_cache.hash_lock); 1232 read_unlock(&svc_export_cache.hash_lock);
1136 exp_readunlock(); 1233 exp_readunlock();
@@ -1156,7 +1253,8 @@ static struct flags {
1156 { 0, {"", ""}} 1253 { 0, {"", ""}}
1157}; 1254};
1158 1255
1159static void exp_flags(struct seq_file *m, int flag, int fsid, uid_t anonu, uid_t anong) 1256static void exp_flags(struct seq_file *m, int flag, int fsid,
1257 uid_t anonu, uid_t anong, struct nfsd4_fs_locations *fsloc)
1160{ 1258{
1161 int first = 0; 1259 int first = 0;
1162 struct flags *flg; 1260 struct flags *flg;
@@ -1172,21 +1270,34 @@ static void exp_flags(struct seq_file *m, int flag, int fsid, uid_t anonu, uid_t
1172 seq_printf(m, "%sanonuid=%d", first++?",":"", anonu); 1270 seq_printf(m, "%sanonuid=%d", first++?",":"", anonu);
1173 if (anong != (gid_t)-2 && anong != (0x10000-2)) 1271 if (anong != (gid_t)-2 && anong != (0x10000-2))
1174 seq_printf(m, "%sanongid=%d", first++?",":"", anong); 1272 seq_printf(m, "%sanongid=%d", first++?",":"", anong);
1273 if (fsloc && fsloc->locations_count > 0) {
1274 char *loctype = (fsloc->migrated) ? "refer" : "replicas";
1275 int i;
1276
1277 seq_printf(m, "%s%s=", first++?",":"", loctype);
1278 seq_escape(m, fsloc->locations[0].path, ",;@ \t\n\\");
1279 seq_putc(m, '@');
1280 seq_escape(m, fsloc->locations[0].hosts, ",;@ \t\n\\");
1281 for (i = 1; i < fsloc->locations_count; i++) {
1282 seq_putc(m, ';');
1283 seq_escape(m, fsloc->locations[i].path, ",;@ \t\n\\");
1284 seq_putc(m, '@');
1285 seq_escape(m, fsloc->locations[i].hosts, ",;@ \t\n\\");
1286 }
1287 }
1175} 1288}
1176 1289
1177static int e_show(struct seq_file *m, void *p) 1290static int e_show(struct seq_file *m, void *p)
1178{ 1291{
1179 struct cache_head *cp = p; 1292 struct cache_head *cp = p;
1180 struct svc_export *exp = container_of(cp, struct svc_export, h); 1293 struct svc_export *exp = container_of(cp, struct svc_export, h);
1181 svc_client *clp;
1182 1294
1183 if (p == (void *)1) { 1295 if (p == SEQ_START_TOKEN) {
1184 seq_puts(m, "# Version 1.1\n"); 1296 seq_puts(m, "# Version 1.1\n");
1185 seq_puts(m, "# Path Client(Flags) # IPs\n"); 1297 seq_puts(m, "# Path Client(Flags) # IPs\n");
1186 return 0; 1298 return 0;
1187 } 1299 }
1188 1300
1189 clp = exp->ex_client;
1190 cache_get(&exp->h); 1301 cache_get(&exp->h);
1191 if (cache_check(&svc_export_cache, &exp->h, NULL)) 1302 if (cache_check(&svc_export_cache, &exp->h, NULL))
1192 return 0; 1303 return 0;
diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c
index fc95c4df6693..9187755661df 100644
--- a/fs/nfsd/nfs2acl.c
+++ b/fs/nfsd/nfs2acl.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * linux/fs/nfsd/nfsacl.c 2 * linux/fs/nfsd/nfs2acl.c
3 * 3 *
4 * Process version 2 NFSACL requests. 4 * Process version 2 NFSACL requests.
5 * 5 *
@@ -241,7 +241,7 @@ static int nfsaclsvc_encode_getaclres(struct svc_rqst *rqstp, u32 *p,
241 241
242 rqstp->rq_res.page_len = w; 242 rqstp->rq_res.page_len = w;
243 while (w > 0) { 243 while (w > 0) {
244 if (!svc_take_res_page(rqstp)) 244 if (!rqstp->rq_respages[rqstp->rq_resused++])
245 return 0; 245 return 0;
246 w -= PAGE_SIZE; 246 w -= PAGE_SIZE;
247 } 247 }
@@ -333,4 +333,5 @@ struct svc_version nfsd_acl_version2 = {
333 .vs_proc = nfsd_acl_procedures2, 333 .vs_proc = nfsd_acl_procedures2,
334 .vs_dispatch = nfsd_dispatch, 334 .vs_dispatch = nfsd_dispatch,
335 .vs_xdrsize = NFS3_SVC_XDRSIZE, 335 .vs_xdrsize = NFS3_SVC_XDRSIZE,
336 .vs_hidden = 1,
336}; 337};
diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c
index 16e10c170aed..d4bdc00c1169 100644
--- a/fs/nfsd/nfs3acl.c
+++ b/fs/nfsd/nfs3acl.c
@@ -185,7 +185,7 @@ static int nfs3svc_encode_getaclres(struct svc_rqst *rqstp, u32 *p,
185 185
186 rqstp->rq_res.page_len = w; 186 rqstp->rq_res.page_len = w;
187 while (w > 0) { 187 while (w > 0) {
188 if (!svc_take_res_page(rqstp)) 188 if (!rqstp->rq_respages[rqstp->rq_resused++])
189 return 0; 189 return 0;
190 w -= PAGE_SIZE; 190 w -= PAGE_SIZE;
191 } 191 }
@@ -263,5 +263,6 @@ struct svc_version nfsd_acl_version3 = {
263 .vs_proc = nfsd_acl_procedures3, 263 .vs_proc = nfsd_acl_procedures3,
264 .vs_dispatch = nfsd_dispatch, 264 .vs_dispatch = nfsd_dispatch,
265 .vs_xdrsize = NFS3_SVC_XDRSIZE, 265 .vs_xdrsize = NFS3_SVC_XDRSIZE,
266 .vs_hidden = 1,
266}; 267};
267 268
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index f61142afea44..a5ebc7dbb384 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -160,6 +160,7 @@ nfsd3_proc_read(struct svc_rqst *rqstp, struct nfsd3_readargs *argp,
160 struct nfsd3_readres *resp) 160 struct nfsd3_readres *resp)
161{ 161{
162 int nfserr; 162 int nfserr;
163 u32 max_blocksize = svc_max_payload(rqstp);
163 164
164 dprintk("nfsd: READ(3) %s %lu bytes at %lu\n", 165 dprintk("nfsd: READ(3) %s %lu bytes at %lu\n",
165 SVCFH_fmt(&argp->fh), 166 SVCFH_fmt(&argp->fh),
@@ -172,15 +173,15 @@ nfsd3_proc_read(struct svc_rqst *rqstp, struct nfsd3_readargs *argp,
172 */ 173 */
173 174
174 resp->count = argp->count; 175 resp->count = argp->count;
175 if (NFSSVC_MAXBLKSIZE < resp->count) 176 if (max_blocksize < resp->count)
176 resp->count = NFSSVC_MAXBLKSIZE; 177 resp->count = max_blocksize;
177 178
178 svc_reserve(rqstp, ((1 + NFS3_POST_OP_ATTR_WORDS + 3)<<2) + resp->count +4); 179 svc_reserve(rqstp, ((1 + NFS3_POST_OP_ATTR_WORDS + 3)<<2) + resp->count +4);
179 180
180 fh_copy(&resp->fh, &argp->fh); 181 fh_copy(&resp->fh, &argp->fh);
181 nfserr = nfsd_read(rqstp, &resp->fh, NULL, 182 nfserr = nfsd_read(rqstp, &resp->fh, NULL,
182 argp->offset, 183 argp->offset,
183 argp->vec, argp->vlen, 184 rqstp->rq_vec, argp->vlen,
184 &resp->count); 185 &resp->count);
185 if (nfserr == 0) { 186 if (nfserr == 0) {
186 struct inode *inode = resp->fh.fh_dentry->d_inode; 187 struct inode *inode = resp->fh.fh_dentry->d_inode;
@@ -210,7 +211,7 @@ nfsd3_proc_write(struct svc_rqst *rqstp, struct nfsd3_writeargs *argp,
210 resp->committed = argp->stable; 211 resp->committed = argp->stable;
211 nfserr = nfsd_write(rqstp, &resp->fh, NULL, 212 nfserr = nfsd_write(rqstp, &resp->fh, NULL,
212 argp->offset, 213 argp->offset,
213 argp->vec, argp->vlen, 214 rqstp->rq_vec, argp->vlen,
214 argp->len, 215 argp->len,
215 &resp->committed); 216 &resp->committed);
216 resp->count = argp->count; 217 resp->count = argp->count;
@@ -538,15 +539,16 @@ nfsd3_proc_fsinfo(struct svc_rqst * rqstp, struct nfsd_fhandle *argp,
538 struct nfsd3_fsinfores *resp) 539 struct nfsd3_fsinfores *resp)
539{ 540{
540 int nfserr; 541 int nfserr;
542 u32 max_blocksize = svc_max_payload(rqstp);
541 543
542 dprintk("nfsd: FSINFO(3) %s\n", 544 dprintk("nfsd: FSINFO(3) %s\n",
543 SVCFH_fmt(&argp->fh)); 545 SVCFH_fmt(&argp->fh));
544 546
545 resp->f_rtmax = NFSSVC_MAXBLKSIZE; 547 resp->f_rtmax = max_blocksize;
546 resp->f_rtpref = NFSSVC_MAXBLKSIZE; 548 resp->f_rtpref = max_blocksize;
547 resp->f_rtmult = PAGE_SIZE; 549 resp->f_rtmult = PAGE_SIZE;
548 resp->f_wtmax = NFSSVC_MAXBLKSIZE; 550 resp->f_wtmax = max_blocksize;
549 resp->f_wtpref = NFSSVC_MAXBLKSIZE; 551 resp->f_wtpref = max_blocksize;
550 resp->f_wtmult = PAGE_SIZE; 552 resp->f_wtmult = PAGE_SIZE;
551 resp->f_dtpref = PAGE_SIZE; 553 resp->f_dtpref = PAGE_SIZE;
552 resp->f_maxfilesize = ~(u32) 0; 554 resp->f_maxfilesize = ~(u32) 0;
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index 243d94b9653a..247d518248bf 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -330,6 +330,7 @@ nfs3svc_decode_readargs(struct svc_rqst *rqstp, u32 *p,
330{ 330{
331 unsigned int len; 331 unsigned int len;
332 int v,pn; 332 int v,pn;
333 u32 max_blocksize = svc_max_payload(rqstp);
333 334
334 if (!(p = decode_fh(p, &args->fh)) 335 if (!(p = decode_fh(p, &args->fh))
335 || !(p = xdr_decode_hyper(p, &args->offset))) 336 || !(p = xdr_decode_hyper(p, &args->offset)))
@@ -337,17 +338,16 @@ nfs3svc_decode_readargs(struct svc_rqst *rqstp, u32 *p,
337 338
338 len = args->count = ntohl(*p++); 339 len = args->count = ntohl(*p++);
339 340
340 if (len > NFSSVC_MAXBLKSIZE) 341 if (len > max_blocksize)
341 len = NFSSVC_MAXBLKSIZE; 342 len = max_blocksize;
342 343
343 /* set up the kvec */ 344 /* set up the kvec */
344 v=0; 345 v=0;
345 while (len > 0) { 346 while (len > 0) {
346 pn = rqstp->rq_resused; 347 pn = rqstp->rq_resused++;
347 svc_take_page(rqstp); 348 rqstp->rq_vec[v].iov_base = page_address(rqstp->rq_respages[pn]);
348 args->vec[v].iov_base = page_address(rqstp->rq_respages[pn]); 349 rqstp->rq_vec[v].iov_len = len < PAGE_SIZE? len : PAGE_SIZE;
349 args->vec[v].iov_len = len < PAGE_SIZE? len : PAGE_SIZE; 350 len -= rqstp->rq_vec[v].iov_len;
350 len -= args->vec[v].iov_len;
351 v++; 351 v++;
352 } 352 }
353 args->vlen = v; 353 args->vlen = v;
@@ -359,6 +359,7 @@ nfs3svc_decode_writeargs(struct svc_rqst *rqstp, u32 *p,
359 struct nfsd3_writeargs *args) 359 struct nfsd3_writeargs *args)
360{ 360{
361 unsigned int len, v, hdr; 361 unsigned int len, v, hdr;
362 u32 max_blocksize = svc_max_payload(rqstp);
362 363
363 if (!(p = decode_fh(p, &args->fh)) 364 if (!(p = decode_fh(p, &args->fh))
364 || !(p = xdr_decode_hyper(p, &args->offset))) 365 || !(p = xdr_decode_hyper(p, &args->offset)))
@@ -373,22 +374,22 @@ nfs3svc_decode_writeargs(struct svc_rqst *rqstp, u32 *p,
373 rqstp->rq_arg.len - hdr < len) 374 rqstp->rq_arg.len - hdr < len)
374 return 0; 375 return 0;
375 376
376 args->vec[0].iov_base = (void*)p; 377 rqstp->rq_vec[0].iov_base = (void*)p;
377 args->vec[0].iov_len = rqstp->rq_arg.head[0].iov_len - hdr; 378 rqstp->rq_vec[0].iov_len = rqstp->rq_arg.head[0].iov_len - hdr;
378 379
379 if (len > NFSSVC_MAXBLKSIZE) 380 if (len > max_blocksize)
380 len = NFSSVC_MAXBLKSIZE; 381 len = max_blocksize;
381 v= 0; 382 v= 0;
382 while (len > args->vec[v].iov_len) { 383 while (len > rqstp->rq_vec[v].iov_len) {
383 len -= args->vec[v].iov_len; 384 len -= rqstp->rq_vec[v].iov_len;
384 v++; 385 v++;
385 args->vec[v].iov_base = page_address(rqstp->rq_argpages[v]); 386 rqstp->rq_vec[v].iov_base = page_address(rqstp->rq_pages[v]);
386 args->vec[v].iov_len = PAGE_SIZE; 387 rqstp->rq_vec[v].iov_len = PAGE_SIZE;
387 } 388 }
388 args->vec[v].iov_len = len; 389 rqstp->rq_vec[v].iov_len = len;
389 args->vlen = v+1; 390 args->vlen = v+1;
390 391
391 return args->count == args->len && args->vec[0].iov_len > 0; 392 return args->count == args->len && rqstp->rq_vec[0].iov_len > 0;
392} 393}
393 394
394int 395int
@@ -446,11 +447,11 @@ nfs3svc_decode_symlinkargs(struct svc_rqst *rqstp, u32 *p,
446 * This page appears in the rq_res.pages list, but as pages_len is always 447 * This page appears in the rq_res.pages list, but as pages_len is always
447 * 0, it won't get in the way 448 * 0, it won't get in the way
448 */ 449 */
449 svc_take_page(rqstp);
450 len = ntohl(*p++); 450 len = ntohl(*p++);
451 if (len == 0 || len > NFS3_MAXPATHLEN || len >= PAGE_SIZE) 451 if (len == 0 || len > NFS3_MAXPATHLEN || len >= PAGE_SIZE)
452 return 0; 452 return 0;
453 args->tname = new = page_address(rqstp->rq_respages[rqstp->rq_resused-1]); 453 args->tname = new =
454 page_address(rqstp->rq_respages[rqstp->rq_resused++]);
454 args->tlen = len; 455 args->tlen = len;
455 /* first copy and check from the first page */ 456 /* first copy and check from the first page */
456 old = (char*)p; 457 old = (char*)p;
@@ -522,8 +523,8 @@ nfs3svc_decode_readlinkargs(struct svc_rqst *rqstp, u32 *p,
522{ 523{
523 if (!(p = decode_fh(p, &args->fh))) 524 if (!(p = decode_fh(p, &args->fh)))
524 return 0; 525 return 0;
525 svc_take_page(rqstp); 526 args->buffer =
526 args->buffer = page_address(rqstp->rq_respages[rqstp->rq_resused-1]); 527 page_address(rqstp->rq_respages[rqstp->rq_resused++]);
527 528
528 return xdr_argsize_check(rqstp, p); 529 return xdr_argsize_check(rqstp, p);
529} 530}
@@ -554,8 +555,8 @@ nfs3svc_decode_readdirargs(struct svc_rqst *rqstp, u32 *p,
554 if (args->count > PAGE_SIZE) 555 if (args->count > PAGE_SIZE)
555 args->count = PAGE_SIZE; 556 args->count = PAGE_SIZE;
556 557
557 svc_take_page(rqstp); 558 args->buffer =
558 args->buffer = page_address(rqstp->rq_respages[rqstp->rq_resused-1]); 559 page_address(rqstp->rq_respages[rqstp->rq_resused++]);
559 560
560 return xdr_argsize_check(rqstp, p); 561 return xdr_argsize_check(rqstp, p);
561} 562}
@@ -565,6 +566,7 @@ nfs3svc_decode_readdirplusargs(struct svc_rqst *rqstp, u32 *p,
565 struct nfsd3_readdirargs *args) 566 struct nfsd3_readdirargs *args)
566{ 567{
567 int len, pn; 568 int len, pn;
569 u32 max_blocksize = svc_max_payload(rqstp);
568 570
569 if (!(p = decode_fh(p, &args->fh))) 571 if (!(p = decode_fh(p, &args->fh)))
570 return 0; 572 return 0;
@@ -573,13 +575,12 @@ nfs3svc_decode_readdirplusargs(struct svc_rqst *rqstp, u32 *p,
573 args->dircount = ntohl(*p++); 575 args->dircount = ntohl(*p++);
574 args->count = ntohl(*p++); 576 args->count = ntohl(*p++);
575 577
576 len = (args->count > NFSSVC_MAXBLKSIZE) ? NFSSVC_MAXBLKSIZE : 578 len = (args->count > max_blocksize) ? max_blocksize :
577 args->count; 579 args->count;
578 args->count = len; 580 args->count = len;
579 581
580 while (len > 0) { 582 while (len > 0) {
581 pn = rqstp->rq_resused; 583 pn = rqstp->rq_resused++;
582 svc_take_page(rqstp);
583 if (!args->buffer) 584 if (!args->buffer)
584 args->buffer = page_address(rqstp->rq_respages[pn]); 585 args->buffer = page_address(rqstp->rq_respages[pn]);
585 len -= PAGE_SIZE; 586 len -= PAGE_SIZE;
@@ -668,7 +669,6 @@ nfs3svc_encode_readlinkres(struct svc_rqst *rqstp, u32 *p,
668 rqstp->rq_res.page_len = resp->len; 669 rqstp->rq_res.page_len = resp->len;
669 if (resp->len & 3) { 670 if (resp->len & 3) {
670 /* need to pad the tail */ 671 /* need to pad the tail */
671 rqstp->rq_restailpage = 0;
672 rqstp->rq_res.tail[0].iov_base = p; 672 rqstp->rq_res.tail[0].iov_base = p;
673 *p = 0; 673 *p = 0;
674 rqstp->rq_res.tail[0].iov_len = 4 - (resp->len&3); 674 rqstp->rq_res.tail[0].iov_len = 4 - (resp->len&3);
@@ -693,7 +693,6 @@ nfs3svc_encode_readres(struct svc_rqst *rqstp, u32 *p,
693 rqstp->rq_res.page_len = resp->count; 693 rqstp->rq_res.page_len = resp->count;
694 if (resp->count & 3) { 694 if (resp->count & 3) {
695 /* need to pad the tail */ 695 /* need to pad the tail */
696 rqstp->rq_restailpage = 0;
697 rqstp->rq_res.tail[0].iov_base = p; 696 rqstp->rq_res.tail[0].iov_base = p;
698 *p = 0; 697 *p = 0;
699 rqstp->rq_res.tail[0].iov_len = 4 - (resp->count & 3); 698 rqstp->rq_res.tail[0].iov_len = 4 - (resp->count & 3);
@@ -768,7 +767,6 @@ nfs3svc_encode_readdirres(struct svc_rqst *rqstp, u32 *p,
768 rqstp->rq_res.page_len = (resp->count) << 2; 767 rqstp->rq_res.page_len = (resp->count) << 2;
769 768
770 /* add the 'tail' to the end of the 'head' page - page 0. */ 769 /* add the 'tail' to the end of the 'head' page - page 0. */
771 rqstp->rq_restailpage = 0;
772 rqstp->rq_res.tail[0].iov_base = p; 770 rqstp->rq_res.tail[0].iov_base = p;
773 *p++ = 0; /* no more entries */ 771 *p++ = 0; /* no more entries */
774 *p++ = htonl(resp->common.err == nfserr_eof); 772 *p++ = htonl(resp->common.err == nfserr_eof);
diff --git a/fs/nfsd/nfs4acl.c b/fs/nfsd/nfs4acl.c
index edb107e61b91..5d94555cdc83 100644
--- a/fs/nfsd/nfs4acl.c
+++ b/fs/nfsd/nfs4acl.c
@@ -63,6 +63,8 @@
63#define NFS4_INHERITANCE_FLAGS (NFS4_ACE_FILE_INHERIT_ACE \ 63#define NFS4_INHERITANCE_FLAGS (NFS4_ACE_FILE_INHERIT_ACE \
64 | NFS4_ACE_DIRECTORY_INHERIT_ACE | NFS4_ACE_INHERIT_ONLY_ACE) 64 | NFS4_ACE_DIRECTORY_INHERIT_ACE | NFS4_ACE_INHERIT_ONLY_ACE)
65 65
66#define NFS4_SUPPORTED_FLAGS (NFS4_INHERITANCE_FLAGS | NFS4_ACE_IDENTIFIER_GROUP)
67
66#define MASK_EQUAL(mask1, mask2) \ 68#define MASK_EQUAL(mask1, mask2) \
67 ( ((mask1) & NFS4_ACE_MASK_ALL) == ((mask2) & NFS4_ACE_MASK_ALL) ) 69 ( ((mask1) & NFS4_ACE_MASK_ALL) == ((mask2) & NFS4_ACE_MASK_ALL) )
68 70
@@ -96,24 +98,26 @@ deny_mask(u32 allow_mask, unsigned int flags)
96/* XXX: modify functions to return NFS errors; they're only ever 98/* XXX: modify functions to return NFS errors; they're only ever
97 * used by nfs code, after all.... */ 99 * used by nfs code, after all.... */
98 100
99static int 101/* We only map from NFSv4 to POSIX ACLs when setting ACLs, when we err on the
100mode_from_nfs4(u32 perm, unsigned short *mode, unsigned int flags) 102 * side of being more restrictive, so the mode bit mapping below is
103 * pessimistic. An optimistic version would be needed to handle DENY's,
104 * but we espect to coalesce all ALLOWs and DENYs before mapping to mode
105 * bits. */
106
107static void
108low_mode_from_nfs4(u32 perm, unsigned short *mode, unsigned int flags)
101{ 109{
102 u32 ignore = 0; 110 u32 write_mode = NFS4_WRITE_MODE;
103 111
104 if (!(flags & NFS4_ACL_DIR)) 112 if (flags & NFS4_ACL_DIR)
105 ignore |= NFS4_ACE_DELETE_CHILD; /* ignore it */ 113 write_mode |= NFS4_ACE_DELETE_CHILD;
106 perm |= ignore;
107 *mode = 0; 114 *mode = 0;
108 if ((perm & NFS4_READ_MODE) == NFS4_READ_MODE) 115 if ((perm & NFS4_READ_MODE) == NFS4_READ_MODE)
109 *mode |= ACL_READ; 116 *mode |= ACL_READ;
110 if ((perm & NFS4_WRITE_MODE) == NFS4_WRITE_MODE) 117 if ((perm & write_mode) == write_mode)
111 *mode |= ACL_WRITE; 118 *mode |= ACL_WRITE;
112 if ((perm & NFS4_EXECUTE_MODE) == NFS4_EXECUTE_MODE) 119 if ((perm & NFS4_EXECUTE_MODE) == NFS4_EXECUTE_MODE)
113 *mode |= ACL_EXECUTE; 120 *mode |= ACL_EXECUTE;
114 if (!MASK_EQUAL(perm, ignore|mask_from_posix(*mode, flags)))
115 return -EINVAL;
116 return 0;
117} 121}
118 122
119struct ace_container { 123struct ace_container {
@@ -338,38 +342,6 @@ sort_pacl(struct posix_acl *pacl)
338 return; 342 return;
339} 343}
340 344
341static int
342write_pace(struct nfs4_ace *ace, struct posix_acl *pacl,
343 struct posix_acl_entry **pace, short tag, unsigned int flags)
344{
345 struct posix_acl_entry *this = *pace;
346
347 if (*pace == pacl->a_entries + pacl->a_count)
348 return -EINVAL; /* fell off the end */
349 (*pace)++;
350 this->e_tag = tag;
351 if (tag == ACL_USER_OBJ)
352 flags |= NFS4_ACL_OWNER;
353 if (mode_from_nfs4(ace->access_mask, &this->e_perm, flags))
354 return -EINVAL;
355 this->e_id = (tag == ACL_USER || tag == ACL_GROUP ?
356 ace->who : ACL_UNDEFINED_ID);
357 return 0;
358}
359
360static struct nfs4_ace *
361get_next_v4_ace(struct list_head **p, struct list_head *head)
362{
363 struct nfs4_ace *ace;
364
365 *p = (*p)->next;
366 if (*p == head)
367 return NULL;
368 ace = list_entry(*p, struct nfs4_ace, l_ace);
369
370 return ace;
371}
372
373int 345int
374nfs4_acl_nfsv4_to_posix(struct nfs4_acl *acl, struct posix_acl **pacl, 346nfs4_acl_nfsv4_to_posix(struct nfs4_acl *acl, struct posix_acl **pacl,
375 struct posix_acl **dpacl, unsigned int flags) 347 struct posix_acl **dpacl, unsigned int flags)
@@ -385,42 +357,23 @@ nfs4_acl_nfsv4_to_posix(struct nfs4_acl *acl, struct posix_acl **pacl,
385 goto out; 357 goto out;
386 358
387 error = nfs4_acl_split(acl, dacl); 359 error = nfs4_acl_split(acl, dacl);
388 if (error < 0) 360 if (error)
389 goto out_acl; 361 goto out_acl;
390 362
391 if (pacl != NULL) { 363 *pacl = _nfsv4_to_posix_one(acl, flags);
392 if (acl->naces == 0) { 364 if (IS_ERR(*pacl)) {
393 error = -ENODATA; 365 error = PTR_ERR(*pacl);
394 goto try_dpacl; 366 *pacl = NULL;
395 } 367 goto out_acl;
396
397 *pacl = _nfsv4_to_posix_one(acl, flags);
398 if (IS_ERR(*pacl)) {
399 error = PTR_ERR(*pacl);
400 *pacl = NULL;
401 goto out_acl;
402 }
403 } 368 }
404 369
405try_dpacl: 370 *dpacl = _nfsv4_to_posix_one(dacl, flags);
406 if (dpacl != NULL) { 371 if (IS_ERR(*dpacl)) {
407 if (dacl->naces == 0) { 372 error = PTR_ERR(*dpacl);
408 if (pacl == NULL || *pacl == NULL) 373 *dpacl = NULL;
409 error = -ENODATA;
410 goto out_acl;
411 }
412
413 error = 0;
414 *dpacl = _nfsv4_to_posix_one(dacl, flags);
415 if (IS_ERR(*dpacl)) {
416 error = PTR_ERR(*dpacl);
417 *dpacl = NULL;
418 goto out_acl;
419 }
420 } 374 }
421
422out_acl: 375out_acl:
423 if (error && pacl) { 376 if (error) {
424 posix_acl_release(*pacl); 377 posix_acl_release(*pacl);
425 *pacl = NULL; 378 *pacl = NULL;
426 } 379 }
@@ -429,349 +382,311 @@ out:
429 return error; 382 return error;
430} 383}
431 384
385/*
386 * While processing the NFSv4 ACE, this maintains bitmasks representing
387 * which permission bits have been allowed and which denied to a given
388 * entity: */
389struct posix_ace_state {
390 u32 allow;
391 u32 deny;
392};
393
394struct posix_user_ace_state {
395 uid_t uid;
396 struct posix_ace_state perms;
397};
398
399struct posix_ace_state_array {
400 int n;
401 struct posix_user_ace_state aces[];
402};
403
404/*
405 * While processing the NFSv4 ACE, this maintains the partial permissions
406 * calculated so far: */
407
408struct posix_acl_state {
409 struct posix_ace_state owner;
410 struct posix_ace_state group;
411 struct posix_ace_state other;
412 struct posix_ace_state everyone;
413 struct posix_ace_state mask; /* Deny unused in this case */
414 struct posix_ace_state_array *users;
415 struct posix_ace_state_array *groups;
416};
417
432static int 418static int
433same_who(struct nfs4_ace *a, struct nfs4_ace *b) 419init_state(struct posix_acl_state *state, int cnt)
434{ 420{
435 return a->whotype == b->whotype && 421 int alloc;
436 (a->whotype != NFS4_ACL_WHO_NAMED || a->who == b->who); 422
423 memset(state, 0, sizeof(struct posix_acl_state));
424 /*
425 * In the worst case, each individual acl could be for a distinct
426 * named user or group, but we don't no which, so we allocate
427 * enough space for either:
428 */
429 alloc = sizeof(struct posix_ace_state_array)
430 + cnt*sizeof(struct posix_ace_state);
431 state->users = kzalloc(alloc, GFP_KERNEL);
432 if (!state->users)
433 return -ENOMEM;
434 state->groups = kzalloc(alloc, GFP_KERNEL);
435 if (!state->groups) {
436 kfree(state->users);
437 return -ENOMEM;
438 }
439 return 0;
437} 440}
438 441
439static int 442static void
440complementary_ace_pair(struct nfs4_ace *allow, struct nfs4_ace *deny, 443free_state(struct posix_acl_state *state) {
441 unsigned int flags) 444 kfree(state->users);
442{ 445 kfree(state->groups);
443 int ignore = 0;
444 if (!(flags & NFS4_ACL_DIR))
445 ignore |= NFS4_ACE_DELETE_CHILD;
446 return MASK_EQUAL(ignore|deny_mask(allow->access_mask, flags),
447 ignore|deny->access_mask) &&
448 allow->type == NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE &&
449 deny->type == NFS4_ACE_ACCESS_DENIED_ACE_TYPE &&
450 allow->flag == deny->flag &&
451 same_who(allow, deny);
452} 446}
453 447
454static inline int 448static inline void add_to_mask(struct posix_acl_state *state, struct posix_ace_state *astate)
455user_obj_from_v4(struct nfs4_acl *n4acl, struct list_head **p,
456 struct posix_acl *pacl, struct posix_acl_entry **pace,
457 unsigned int flags)
458{ 449{
459 int error = -EINVAL; 450 state->mask.allow |= astate->allow;
460 struct nfs4_ace *ace, *ace2;
461
462 ace = get_next_v4_ace(p, &n4acl->ace_head);
463 if (ace == NULL)
464 goto out;
465 if (ace2type(ace) != ACL_USER_OBJ)
466 goto out;
467 error = write_pace(ace, pacl, pace, ACL_USER_OBJ, flags);
468 if (error < 0)
469 goto out;
470 error = -EINVAL;
471 ace2 = get_next_v4_ace(p, &n4acl->ace_head);
472 if (ace2 == NULL)
473 goto out;
474 if (!complementary_ace_pair(ace, ace2, flags))
475 goto out;
476 error = 0;
477out:
478 return error;
479} 451}
480 452
481static inline int 453/*
482users_from_v4(struct nfs4_acl *n4acl, struct list_head **p, 454 * Certain bits (SYNCHRONIZE, DELETE, WRITE_OWNER, READ/WRITE_NAMED_ATTRS,
483 struct nfs4_ace **mask_ace, 455 * READ_ATTRIBUTES, READ_ACL) are currently unenforceable and don't translate
484 struct posix_acl *pacl, struct posix_acl_entry **pace, 456 * to traditional read/write/execute permissions.
485 unsigned int flags) 457 *
486{ 458 * It's problematic to reject acls that use certain mode bits, because it
487 int error = -EINVAL; 459 * places the burden on users to learn the rules about which bits one
488 struct nfs4_ace *ace, *ace2; 460 * particular server sets, without giving the user a lot of help--we return an
461 * error that could mean any number of different things. To make matters
462 * worse, the problematic bits might be introduced by some application that's
463 * automatically mapping from some other acl model.
464 *
465 * So wherever possible we accept anything, possibly erring on the side of
466 * denying more permissions than necessary.
467 *
468 * However we do reject *explicit* DENY's of a few bits representing
469 * permissions we could never deny:
470 */
489 471
490 ace = get_next_v4_ace(p, &n4acl->ace_head); 472static inline int check_deny(u32 mask, int isowner)
491 if (ace == NULL) 473{
492 goto out; 474 if (mask & (NFS4_ACE_READ_ATTRIBUTES | NFS4_ACE_READ_ACL))
493 while (ace2type(ace) == ACL_USER) { 475 return -EINVAL;
494 if (ace->type != NFS4_ACE_ACCESS_DENIED_ACE_TYPE) 476 if (!isowner)
495 goto out; 477 return 0;
496 if (*mask_ace && 478 if (mask & (NFS4_ACE_WRITE_ATTRIBUTES | NFS4_ACE_WRITE_ACL))
497 !MASK_EQUAL(ace->access_mask, (*mask_ace)->access_mask)) 479 return -EINVAL;
498 goto out; 480 return 0;
499 *mask_ace = ace;
500 ace = get_next_v4_ace(p, &n4acl->ace_head);
501 if (ace == NULL)
502 goto out;
503 if (ace->type != NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE)
504 goto out;
505 error = write_pace(ace, pacl, pace, ACL_USER, flags);
506 if (error < 0)
507 goto out;
508 error = -EINVAL;
509 ace2 = get_next_v4_ace(p, &n4acl->ace_head);
510 if (ace2 == NULL)
511 goto out;
512 if (!complementary_ace_pair(ace, ace2, flags))
513 goto out;
514 if ((*mask_ace)->flag != ace2->flag ||
515 !same_who(*mask_ace, ace2))
516 goto out;
517 ace = get_next_v4_ace(p, &n4acl->ace_head);
518 if (ace == NULL)
519 goto out;
520 }
521 error = 0;
522out:
523 return error;
524} 481}
525 482
526static inline int 483static struct posix_acl *
527group_obj_and_groups_from_v4(struct nfs4_acl *n4acl, struct list_head **p, 484posix_state_to_acl(struct posix_acl_state *state, unsigned int flags)
528 struct nfs4_ace **mask_ace,
529 struct posix_acl *pacl, struct posix_acl_entry **pace,
530 unsigned int flags)
531{ 485{
532 int error = -EINVAL; 486 struct posix_acl_entry *pace;
533 struct nfs4_ace *ace, *ace2; 487 struct posix_acl *pacl;
534 struct ace_container *ac; 488 int nace;
535 struct list_head group_l; 489 int i, error = 0;
536
537 INIT_LIST_HEAD(&group_l);
538 ace = list_entry(*p, struct nfs4_ace, l_ace);
539
540 /* group owner (mask and allow aces) */
541 490
542 if (pacl->a_count != 3) { 491 nace = 4 + state->users->n + state->groups->n;
543 /* then the group owner should be preceded by mask */ 492 pacl = posix_acl_alloc(nace, GFP_KERNEL);
544 if (ace->type != NFS4_ACE_ACCESS_DENIED_ACE_TYPE) 493 if (!pacl)
545 goto out; 494 return ERR_PTR(-ENOMEM);
546 if (*mask_ace &&
547 !MASK_EQUAL(ace->access_mask, (*mask_ace)->access_mask))
548 goto out;
549 *mask_ace = ace;
550 ace = get_next_v4_ace(p, &n4acl->ace_head);
551 if (ace == NULL)
552 goto out;
553 495
554 if ((*mask_ace)->flag != ace->flag || !same_who(*mask_ace, ace)) 496 pace = pacl->a_entries;
555 goto out; 497 pace->e_tag = ACL_USER_OBJ;
498 error = check_deny(state->owner.deny, 1);
499 if (error)
500 goto out_err;
501 low_mode_from_nfs4(state->owner.allow, &pace->e_perm, flags);
502 pace->e_id = ACL_UNDEFINED_ID;
503
504 for (i=0; i < state->users->n; i++) {
505 pace++;
506 pace->e_tag = ACL_USER;
507 error = check_deny(state->users->aces[i].perms.deny, 0);
508 if (error)
509 goto out_err;
510 low_mode_from_nfs4(state->users->aces[i].perms.allow,
511 &pace->e_perm, flags);
512 pace->e_id = state->users->aces[i].uid;
513 add_to_mask(state, &state->users->aces[i].perms);
556 } 514 }
557 515
558 if (ace2type(ace) != ACL_GROUP_OBJ) 516 pace++;
559 goto out; 517 pace->e_tag = ACL_GROUP_OBJ;
560 518 error = check_deny(state->group.deny, 0);
561 ac = kmalloc(sizeof(*ac), GFP_KERNEL); 519 if (error)
562 error = -ENOMEM; 520 goto out_err;
563 if (ac == NULL) 521 low_mode_from_nfs4(state->group.allow, &pace->e_perm, flags);
564 goto out; 522 pace->e_id = ACL_UNDEFINED_ID;
565 ac->ace = ace; 523 add_to_mask(state, &state->group);
566 list_add_tail(&ac->ace_l, &group_l); 524
567 525 for (i=0; i < state->groups->n; i++) {
568 error = -EINVAL; 526 pace++;
569 if (ace->type != NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE) 527 pace->e_tag = ACL_GROUP;
570 goto out; 528 error = check_deny(state->groups->aces[i].perms.deny, 0);
571 529 if (error)
572 error = write_pace(ace, pacl, pace, ACL_GROUP_OBJ, flags); 530 goto out_err;
573 if (error < 0) 531 low_mode_from_nfs4(state->groups->aces[i].perms.allow,
574 goto out; 532 &pace->e_perm, flags);
575 533 pace->e_id = state->groups->aces[i].uid;
576 error = -EINVAL; 534 add_to_mask(state, &state->groups->aces[i].perms);
577 ace = get_next_v4_ace(p, &n4acl->ace_head); 535 }
578 if (ace == NULL)
579 goto out;
580
581 /* groups (mask and allow aces) */
582
583 while (ace2type(ace) == ACL_GROUP) {
584 if (*mask_ace == NULL)
585 goto out;
586
587 if (ace->type != NFS4_ACE_ACCESS_DENIED_ACE_TYPE ||
588 !MASK_EQUAL(ace->access_mask, (*mask_ace)->access_mask))
589 goto out;
590 *mask_ace = ace;
591 536
592 ace = get_next_v4_ace(p, &n4acl->ace_head); 537 pace++;
593 if (ace == NULL) 538 pace->e_tag = ACL_MASK;
594 goto out; 539 low_mode_from_nfs4(state->mask.allow, &pace->e_perm, flags);
595 ac = kmalloc(sizeof(*ac), GFP_KERNEL); 540 pace->e_id = ACL_UNDEFINED_ID;
596 error = -ENOMEM;
597 if (ac == NULL)
598 goto out;
599 error = -EINVAL;
600 if (ace->type != NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE ||
601 !same_who(ace, *mask_ace))
602 goto out;
603 541
604 ac->ace = ace; 542 pace++;
605 list_add_tail(&ac->ace_l, &group_l); 543 pace->e_tag = ACL_OTHER;
544 error = check_deny(state->other.deny, 0);
545 if (error)
546 goto out_err;
547 low_mode_from_nfs4(state->other.allow, &pace->e_perm, flags);
548 pace->e_id = ACL_UNDEFINED_ID;
606 549
607 error = write_pace(ace, pacl, pace, ACL_GROUP, flags); 550 return pacl;
608 if (error < 0) 551out_err:
609 goto out; 552 posix_acl_release(pacl);
610 error = -EINVAL; 553 return ERR_PTR(error);
611 ace = get_next_v4_ace(p, &n4acl->ace_head); 554}
612 if (ace == NULL)
613 goto out;
614 }
615 555
616 /* group owner (deny ace) */ 556static inline void allow_bits(struct posix_ace_state *astate, u32 mask)
557{
558 /* Allow all bits in the mask not already denied: */
559 astate->allow |= mask & ~astate->deny;
560}
617 561
618 if (ace2type(ace) != ACL_GROUP_OBJ) 562static inline void deny_bits(struct posix_ace_state *astate, u32 mask)
619 goto out; 563{
620 ac = list_entry(group_l.next, struct ace_container, ace_l); 564 /* Deny all bits in the mask not already allowed: */
621 ace2 = ac->ace; 565 astate->deny |= mask & ~astate->allow;
622 if (!complementary_ace_pair(ace2, ace, flags)) 566}
623 goto out;
624 list_del(group_l.next);
625 kfree(ac);
626 567
627 /* groups (deny aces) */ 568static int find_uid(struct posix_acl_state *state, struct posix_ace_state_array *a, uid_t uid)
569{
570 int i;
628 571
629 while (!list_empty(&group_l)) { 572 for (i = 0; i < a->n; i++)
630 ace = get_next_v4_ace(p, &n4acl->ace_head); 573 if (a->aces[i].uid == uid)
631 if (ace == NULL) 574 return i;
632 goto out; 575 /* Not found: */
633 if (ace2type(ace) != ACL_GROUP) 576 a->n++;
634 goto out; 577 a->aces[i].uid = uid;
635 ac = list_entry(group_l.next, struct ace_container, ace_l); 578 a->aces[i].perms.allow = state->everyone.allow;
636 ace2 = ac->ace; 579 a->aces[i].perms.deny = state->everyone.deny;
637 if (!complementary_ace_pair(ace2, ace, flags))
638 goto out;
639 list_del(group_l.next);
640 kfree(ac);
641 }
642 580
643 ace = get_next_v4_ace(p, &n4acl->ace_head); 581 return i;
644 if (ace == NULL)
645 goto out;
646 if (ace2type(ace) != ACL_OTHER)
647 goto out;
648 error = 0;
649out:
650 while (!list_empty(&group_l)) {
651 ac = list_entry(group_l.next, struct ace_container, ace_l);
652 list_del(group_l.next);
653 kfree(ac);
654 }
655 return error;
656} 582}
657 583
658static inline int 584static void deny_bits_array(struct posix_ace_state_array *a, u32 mask)
659mask_from_v4(struct nfs4_acl *n4acl, struct list_head **p,
660 struct nfs4_ace **mask_ace,
661 struct posix_acl *pacl, struct posix_acl_entry **pace,
662 unsigned int flags)
663{ 585{
664 int error = -EINVAL; 586 int i;
665 struct nfs4_ace *ace;
666 587
667 ace = list_entry(*p, struct nfs4_ace, l_ace); 588 for (i=0; i < a->n; i++)
668 if (pacl->a_count != 3) { 589 deny_bits(&a->aces[i].perms, mask);
669 if (*mask_ace == NULL)
670 goto out;
671 (*mask_ace)->access_mask = deny_mask((*mask_ace)->access_mask, flags);
672 write_pace(*mask_ace, pacl, pace, ACL_MASK, flags);
673 }
674 error = 0;
675out:
676 return error;
677} 590}
678 591
679static inline int 592static void allow_bits_array(struct posix_ace_state_array *a, u32 mask)
680other_from_v4(struct nfs4_acl *n4acl, struct list_head **p,
681 struct posix_acl *pacl, struct posix_acl_entry **pace,
682 unsigned int flags)
683{ 593{
684 int error = -EINVAL; 594 int i;
685 struct nfs4_ace *ace, *ace2;
686 595
687 ace = list_entry(*p, struct nfs4_ace, l_ace); 596 for (i=0; i < a->n; i++)
688 if (ace->type != NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE) 597 allow_bits(&a->aces[i].perms, mask);
689 goto out;
690 error = write_pace(ace, pacl, pace, ACL_OTHER, flags);
691 if (error < 0)
692 goto out;
693 error = -EINVAL;
694 ace2 = get_next_v4_ace(p, &n4acl->ace_head);
695 if (ace2 == NULL)
696 goto out;
697 if (!complementary_ace_pair(ace, ace2, flags))
698 goto out;
699 error = 0;
700out:
701 return error;
702} 598}
703 599
704static int 600static void process_one_v4_ace(struct posix_acl_state *state,
705calculate_posix_ace_count(struct nfs4_acl *n4acl) 601 struct nfs4_ace *ace)
706{ 602{
707 if (n4acl->naces == 6) /* owner, owner group, and other only */ 603 u32 mask = ace->access_mask;
708 return 3; 604 int i;
709 else { /* Otherwise there must be a mask entry. */ 605
710 /* Also, the remaining entries are for named users and 606 switch (ace2type(ace)) {
711 * groups, and come in threes (mask, allow, deny): */ 607 case ACL_USER_OBJ:
712 if (n4acl->naces < 7) 608 if (ace->type == NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE) {
713 return -EINVAL; 609 allow_bits(&state->owner, mask);
714 if ((n4acl->naces - 7) % 3) 610 } else {
715 return -EINVAL; 611 deny_bits(&state->owner, mask);
716 return 4 + (n4acl->naces - 7)/3; 612 }
613 break;
614 case ACL_USER:
615 i = find_uid(state, state->users, ace->who);
616 if (ace->type == NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE) {
617 allow_bits(&state->users->aces[i].perms, mask);
618 } else {
619 deny_bits(&state->users->aces[i].perms, mask);
620 mask = state->users->aces[i].perms.deny;
621 deny_bits(&state->owner, mask);
622 }
623 break;
624 case ACL_GROUP_OBJ:
625 if (ace->type == NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE) {
626 allow_bits(&state->group, mask);
627 } else {
628 deny_bits(&state->group, mask);
629 mask = state->group.deny;
630 deny_bits(&state->owner, mask);
631 deny_bits(&state->everyone, mask);
632 deny_bits_array(state->users, mask);
633 deny_bits_array(state->groups, mask);
634 }
635 break;
636 case ACL_GROUP:
637 i = find_uid(state, state->groups, ace->who);
638 if (ace->type == NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE) {
639 allow_bits(&state->groups->aces[i].perms, mask);
640 } else {
641 deny_bits(&state->groups->aces[i].perms, mask);
642 mask = state->groups->aces[i].perms.deny;
643 deny_bits(&state->owner, mask);
644 deny_bits(&state->group, mask);
645 deny_bits(&state->everyone, mask);
646 deny_bits_array(state->users, mask);
647 deny_bits_array(state->groups, mask);
648 }
649 break;
650 case ACL_OTHER:
651 if (ace->type == NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE) {
652 allow_bits(&state->owner, mask);
653 allow_bits(&state->group, mask);
654 allow_bits(&state->other, mask);
655 allow_bits(&state->everyone, mask);
656 allow_bits_array(state->users, mask);
657 allow_bits_array(state->groups, mask);
658 } else {
659 deny_bits(&state->owner, mask);
660 deny_bits(&state->group, mask);
661 deny_bits(&state->other, mask);
662 deny_bits(&state->everyone, mask);
663 deny_bits_array(state->users, mask);
664 deny_bits_array(state->groups, mask);
665 }
717 } 666 }
718} 667}
719 668
720
721static struct posix_acl * 669static struct posix_acl *
722_nfsv4_to_posix_one(struct nfs4_acl *n4acl, unsigned int flags) 670_nfsv4_to_posix_one(struct nfs4_acl *n4acl, unsigned int flags)
723{ 671{
672 struct posix_acl_state state;
724 struct posix_acl *pacl; 673 struct posix_acl *pacl;
725 int error = -EINVAL, nace = 0; 674 struct nfs4_ace *ace;
726 struct list_head *p; 675 int ret;
727 struct nfs4_ace *mask_ace = NULL;
728 struct posix_acl_entry *pace;
729
730 nace = calculate_posix_ace_count(n4acl);
731 if (nace < 0)
732 goto out_err;
733
734 pacl = posix_acl_alloc(nace, GFP_KERNEL);
735 error = -ENOMEM;
736 if (pacl == NULL)
737 goto out_err;
738
739 pace = &pacl->a_entries[0];
740 p = &n4acl->ace_head;
741
742 error = user_obj_from_v4(n4acl, &p, pacl, &pace, flags);
743 if (error)
744 goto out_acl;
745
746 error = users_from_v4(n4acl, &p, &mask_ace, pacl, &pace, flags);
747 if (error)
748 goto out_acl;
749 676
750 error = group_obj_and_groups_from_v4(n4acl, &p, &mask_ace, pacl, &pace, 677 ret = init_state(&state, n4acl->naces);
751 flags); 678 if (ret)
752 if (error) 679 return ERR_PTR(ret);
753 goto out_acl;
754 680
755 error = mask_from_v4(n4acl, &p, &mask_ace, pacl, &pace, flags); 681 list_for_each_entry(ace, &n4acl->ace_head, l_ace)
756 if (error) 682 process_one_v4_ace(&state, ace);
757 goto out_acl;
758 error = other_from_v4(n4acl, &p, pacl, &pace, flags);
759 if (error)
760 goto out_acl;
761 683
762 error = -EINVAL; 684 pacl = posix_state_to_acl(&state, flags);
763 if (p->next != &n4acl->ace_head)
764 goto out_acl;
765 if (pace != pacl->a_entries + pacl->a_count)
766 goto out_acl;
767 685
768 sort_pacl(pacl); 686 free_state(&state);
769 687
770 return pacl; 688 if (!IS_ERR(pacl))
771out_acl: 689 sort_pacl(pacl);
772 posix_acl_release(pacl);
773out_err:
774 pacl = ERR_PTR(error);
775 return pacl; 690 return pacl;
776} 691}
777 692
@@ -785,22 +700,41 @@ nfs4_acl_split(struct nfs4_acl *acl, struct nfs4_acl *dacl)
785 list_for_each_safe(h, n, &acl->ace_head) { 700 list_for_each_safe(h, n, &acl->ace_head) {
786 ace = list_entry(h, struct nfs4_ace, l_ace); 701 ace = list_entry(h, struct nfs4_ace, l_ace);
787 702
788 if ((ace->flag & NFS4_INHERITANCE_FLAGS) 703 if (ace->type != NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE &&
789 != NFS4_INHERITANCE_FLAGS) 704 ace->type != NFS4_ACE_ACCESS_DENIED_ACE_TYPE)
790 continue; 705 return -EINVAL;
791 706
792 error = nfs4_acl_add_ace(dacl, ace->type, ace->flag, 707 if (ace->flag & ~NFS4_SUPPORTED_FLAGS)
793 ace->access_mask, ace->whotype, ace->who); 708 return -EINVAL;
794 if (error < 0)
795 goto out;
796 709
797 list_del(h); 710 switch (ace->flag & NFS4_INHERITANCE_FLAGS) {
798 kfree(ace); 711 case 0:
799 acl->naces--; 712 /* Leave this ace in the effective acl: */
713 continue;
714 case NFS4_INHERITANCE_FLAGS:
715 /* Add this ace to the default acl and remove it
716 * from the effective acl: */
717 error = nfs4_acl_add_ace(dacl, ace->type, ace->flag,
718 ace->access_mask, ace->whotype, ace->who);
719 if (error)
720 return error;
721 list_del(h);
722 kfree(ace);
723 acl->naces--;
724 break;
725 case NFS4_INHERITANCE_FLAGS & ~NFS4_ACE_INHERIT_ONLY_ACE:
726 /* Add this ace to the default, but leave it in
727 * the effective acl as well: */
728 error = nfs4_acl_add_ace(dacl, ace->type, ace->flag,
729 ace->access_mask, ace->whotype, ace->who);
730 if (error)
731 return error;
732 break;
733 default:
734 return -EINVAL;
735 }
800 } 736 }
801 737 return 0;
802out:
803 return error;
804} 738}
805 739
806static short 740static short
@@ -930,23 +864,6 @@ nfs4_acl_write_who(int who, char *p)
930 return -1; 864 return -1;
931} 865}
932 866
933static inline int
934match_who(struct nfs4_ace *ace, uid_t owner, gid_t group, uid_t who)
935{
936 switch (ace->whotype) {
937 case NFS4_ACL_WHO_NAMED:
938 return who == ace->who;
939 case NFS4_ACL_WHO_OWNER:
940 return who == owner;
941 case NFS4_ACL_WHO_GROUP:
942 return who == group;
943 case NFS4_ACL_WHO_EVERYONE:
944 return 1;
945 default:
946 return 0;
947 }
948}
949
950EXPORT_SYMBOL(nfs4_acl_new); 867EXPORT_SYMBOL(nfs4_acl_new);
951EXPORT_SYMBOL(nfs4_acl_free); 868EXPORT_SYMBOL(nfs4_acl_free);
952EXPORT_SYMBOL(nfs4_acl_add_ace); 869EXPORT_SYMBOL(nfs4_acl_add_ace);
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 8583d99ee740..f6ca9fb3fc63 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -131,7 +131,7 @@ xdr_error: \
131#define READ_BUF(nbytes) do { \ 131#define READ_BUF(nbytes) do { \
132 p = xdr_inline_decode(xdr, nbytes); \ 132 p = xdr_inline_decode(xdr, nbytes); \
133 if (!p) { \ 133 if (!p) { \
134 dprintk("NFSD: %s: reply buffer overflowed in line %d.", \ 134 dprintk("NFSD: %s: reply buffer overflowed in line %d.\n", \
135 __FUNCTION__, __LINE__); \ 135 __FUNCTION__, __LINE__); \
136 return -EIO; \ 136 return -EIO; \
137 } \ 137 } \
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index ee4eff27aedc..8333db12caca 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -600,7 +600,7 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_se
600 &setattr->sa_stateid, CHECK_FH | WR_STATE, NULL); 600 &setattr->sa_stateid, CHECK_FH | WR_STATE, NULL);
601 nfs4_unlock_state(); 601 nfs4_unlock_state();
602 if (status) { 602 if (status) {
603 dprintk("NFSD: nfsd4_setattr: couldn't process stateid!"); 603 dprintk("NFSD: nfsd4_setattr: couldn't process stateid!\n");
604 return status; 604 return status;
605 } 605 }
606 } 606 }
@@ -646,7 +646,7 @@ nfsd4_write(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_writ
646 *p++ = nfssvc_boot.tv_usec; 646 *p++ = nfssvc_boot.tv_usec;
647 647
648 status = nfsd_write(rqstp, current_fh, filp, write->wr_offset, 648 status = nfsd_write(rqstp, current_fh, filp, write->wr_offset,
649 write->wr_vec, write->wr_vlen, write->wr_buflen, 649 rqstp->rq_vec, write->wr_vlen, write->wr_buflen,
650 &write->wr_how_written); 650 &write->wr_how_written);
651 if (filp) 651 if (filp)
652 fput(filp); 652 fput(filp);
@@ -802,13 +802,29 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
802 * SETCLIENTID_CONFIRM, PUTFH and PUTROOTFH 802 * SETCLIENTID_CONFIRM, PUTFH and PUTROOTFH
803 * require a valid current filehandle 803 * require a valid current filehandle
804 */ 804 */
805 if ((!current_fh->fh_dentry) && 805 if (!current_fh->fh_dentry) {
806 !((op->opnum == OP_PUTFH) || (op->opnum == OP_PUTROOTFH) || 806 if (!((op->opnum == OP_PUTFH) ||
807 (op->opnum == OP_SETCLIENTID) || 807 (op->opnum == OP_PUTROOTFH) ||
808 (op->opnum == OP_SETCLIENTID_CONFIRM) || 808 (op->opnum == OP_SETCLIENTID) ||
809 (op->opnum == OP_RENEW) || (op->opnum == OP_RESTOREFH) || 809 (op->opnum == OP_SETCLIENTID_CONFIRM) ||
810 (op->opnum == OP_RELEASE_LOCKOWNER))) { 810 (op->opnum == OP_RENEW) ||
811 op->status = nfserr_nofilehandle; 811 (op->opnum == OP_RESTOREFH) ||
812 (op->opnum == OP_RELEASE_LOCKOWNER))) {
813 op->status = nfserr_nofilehandle;
814 goto encode_op;
815 }
816 }
817 /* Check must be done at start of each operation, except
818 * for GETATTR and ops not listed as returning NFS4ERR_MOVED
819 */
820 else if (current_fh->fh_export->ex_fslocs.migrated &&
821 !((op->opnum == OP_GETATTR) ||
822 (op->opnum == OP_PUTROOTFH) ||
823 (op->opnum == OP_PUTPUBFH) ||
824 (op->opnum == OP_RENEW) ||
825 (op->opnum == OP_SETCLIENTID) ||
826 (op->opnum == OP_RELEASE_LOCKOWNER))) {
827 op->status = nfserr_moved;
812 goto encode_op; 828 goto encode_op;
813 } 829 }
814 switch (op->opnum) { 830 switch (op->opnum) {
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index e35d7e52fdeb..1cbd2e4ee122 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -184,7 +184,7 @@ struct dentry_list_arg {
184 184
185static int 185static int
186nfsd4_build_dentrylist(void *arg, const char *name, int namlen, 186nfsd4_build_dentrylist(void *arg, const char *name, int namlen,
187 loff_t offset, ino_t ino, unsigned int d_type) 187 loff_t offset, u64 ino, unsigned int d_type)
188{ 188{
189 struct dentry_list_arg *dla = arg; 189 struct dentry_list_arg *dla = arg;
190 struct list_head *dentries = &dla->dentries; 190 struct list_head *dentries = &dla->dentries;
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 5446a0861d1d..41fc241b729a 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -60,6 +60,14 @@
60 60
61#define NFSDDBG_FACILITY NFSDDBG_XDR 61#define NFSDDBG_FACILITY NFSDDBG_XDR
62 62
63/*
64 * As per referral draft, the fsid for a referral MUST be different from the fsid of the containing
65 * directory in order to indicate to the client that a filesystem boundary is present
66 * We use a fixed fsid for a referral
67 */
68#define NFS4_REFERRAL_FSID_MAJOR 0x8000000ULL
69#define NFS4_REFERRAL_FSID_MINOR 0x8000000ULL
70
63static int 71static int
64check_filename(char *str, int len, int err) 72check_filename(char *str, int len, int err)
65{ 73{
@@ -198,8 +206,7 @@ static char *savemem(struct nfsd4_compoundargs *argp, u32 *p, int nbytes)
198 p = new; 206 p = new;
199 memcpy(p, argp->tmp, nbytes); 207 memcpy(p, argp->tmp, nbytes);
200 } else { 208 } else {
201 if (p != argp->tmpp) 209 BUG_ON(p != argp->tmpp);
202 BUG();
203 argp->tmpp = NULL; 210 argp->tmpp = NULL;
204 } 211 }
205 if (defer_free(argp, kfree, p)) { 212 if (defer_free(argp, kfree, p)) {
@@ -927,26 +934,26 @@ nfsd4_decode_write(struct nfsd4_compoundargs *argp, struct nfsd4_write *write)
927 printk(KERN_NOTICE "xdr error! (%s:%d)\n", __FILE__, __LINE__); 934 printk(KERN_NOTICE "xdr error! (%s:%d)\n", __FILE__, __LINE__);
928 goto xdr_error; 935 goto xdr_error;
929 } 936 }
930 write->wr_vec[0].iov_base = p; 937 argp->rqstp->rq_vec[0].iov_base = p;
931 write->wr_vec[0].iov_len = avail; 938 argp->rqstp->rq_vec[0].iov_len = avail;
932 v = 0; 939 v = 0;
933 len = write->wr_buflen; 940 len = write->wr_buflen;
934 while (len > write->wr_vec[v].iov_len) { 941 while (len > argp->rqstp->rq_vec[v].iov_len) {
935 len -= write->wr_vec[v].iov_len; 942 len -= argp->rqstp->rq_vec[v].iov_len;
936 v++; 943 v++;
937 write->wr_vec[v].iov_base = page_address(argp->pagelist[0]); 944 argp->rqstp->rq_vec[v].iov_base = page_address(argp->pagelist[0]);
938 argp->pagelist++; 945 argp->pagelist++;
939 if (argp->pagelen >= PAGE_SIZE) { 946 if (argp->pagelen >= PAGE_SIZE) {
940 write->wr_vec[v].iov_len = PAGE_SIZE; 947 argp->rqstp->rq_vec[v].iov_len = PAGE_SIZE;
941 argp->pagelen -= PAGE_SIZE; 948 argp->pagelen -= PAGE_SIZE;
942 } else { 949 } else {
943 write->wr_vec[v].iov_len = argp->pagelen; 950 argp->rqstp->rq_vec[v].iov_len = argp->pagelen;
944 argp->pagelen -= len; 951 argp->pagelen -= len;
945 } 952 }
946 } 953 }
947 argp->end = (u32*) (write->wr_vec[v].iov_base + write->wr_vec[v].iov_len); 954 argp->end = (u32*) (argp->rqstp->rq_vec[v].iov_base + argp->rqstp->rq_vec[v].iov_len);
948 argp->p = (u32*) (write->wr_vec[v].iov_base + (XDR_QUADLEN(len) << 2)); 955 argp->p = (u32*) (argp->rqstp->rq_vec[v].iov_base + (XDR_QUADLEN(len) << 2));
949 write->wr_vec[v].iov_len = len; 956 argp->rqstp->rq_vec[v].iov_len = len;
950 write->wr_vlen = v+1; 957 write->wr_vlen = v+1;
951 958
952 DECODE_TAIL; 959 DECODE_TAIL;
@@ -1224,6 +1231,119 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
1224 stateowner->so_replay.rp_buflen); \ 1231 stateowner->so_replay.rp_buflen); \
1225 } } while (0); 1232 } } while (0);
1226 1233
1234/* Encode as an array of strings the string given with components
1235 * seperated @sep.
1236 */
1237static int nfsd4_encode_components(char sep, char *components,
1238 u32 **pp, int *buflen)
1239{
1240 u32 *p = *pp;
1241 u32 *countp = p;
1242 int strlen, count=0;
1243 char *str, *end;
1244
1245 dprintk("nfsd4_encode_components(%s)\n", components);
1246 if ((*buflen -= 4) < 0)
1247 return nfserr_resource;
1248 WRITE32(0); /* We will fill this in with @count later */
1249 end = str = components;
1250 while (*end) {
1251 for (; *end && (*end != sep); end++)
1252 ; /* Point to end of component */
1253 strlen = end - str;
1254 if (strlen) {
1255 if ((*buflen -= ((XDR_QUADLEN(strlen) << 2) + 4)) < 0)
1256 return nfserr_resource;
1257 WRITE32(strlen);
1258 WRITEMEM(str, strlen);
1259 count++;
1260 }
1261 else
1262 end++;
1263 str = end;
1264 }
1265 *pp = p;
1266 p = countp;
1267 WRITE32(count);
1268 return 0;
1269}
1270
1271/*
1272 * encode a location element of a fs_locations structure
1273 */
1274static int nfsd4_encode_fs_location4(struct nfsd4_fs_location *location,
1275 u32 **pp, int *buflen)
1276{
1277 int status;
1278 u32 *p = *pp;
1279
1280 status = nfsd4_encode_components(':', location->hosts, &p, buflen);
1281 if (status)
1282 return status;
1283 status = nfsd4_encode_components('/', location->path, &p, buflen);
1284 if (status)
1285 return status;
1286 *pp = p;
1287 return 0;
1288}
1289
1290/*
1291 * Return the path to an export point in the pseudo filesystem namespace
1292 * Returned string is safe to use as long as the caller holds a reference
1293 * to @exp.
1294 */
1295static char *nfsd4_path(struct svc_rqst *rqstp, struct svc_export *exp)
1296{
1297 struct svc_fh tmp_fh;
1298 char *path, *rootpath;
1299 int stat;
1300
1301 fh_init(&tmp_fh, NFS4_FHSIZE);
1302 stat = exp_pseudoroot(rqstp->rq_client, &tmp_fh, &rqstp->rq_chandle);
1303 if (stat)
1304 return ERR_PTR(stat);
1305 rootpath = tmp_fh.fh_export->ex_path;
1306
1307 path = exp->ex_path;
1308
1309 if (strncmp(path, rootpath, strlen(rootpath))) {
1310 printk("nfsd: fs_locations failed;"
1311 "%s is not contained in %s\n", path, rootpath);
1312 return ERR_PTR(-EOPNOTSUPP);
1313 }
1314
1315 return path + strlen(rootpath);
1316}
1317
1318/*
1319 * encode a fs_locations structure
1320 */
1321static int nfsd4_encode_fs_locations(struct svc_rqst *rqstp,
1322 struct svc_export *exp,
1323 u32 **pp, int *buflen)
1324{
1325 int status, i;
1326 u32 *p = *pp;
1327 struct nfsd4_fs_locations *fslocs = &exp->ex_fslocs;
1328 char *root = nfsd4_path(rqstp, exp);
1329
1330 if (IS_ERR(root))
1331 return PTR_ERR(root);
1332 status = nfsd4_encode_components('/', root, &p, buflen);
1333 if (status)
1334 return status;
1335 if ((*buflen -= 4) < 0)
1336 return nfserr_resource;
1337 WRITE32(fslocs->locations_count);
1338 for (i=0; i<fslocs->locations_count; i++) {
1339 status = nfsd4_encode_fs_location4(&fslocs->locations[i],
1340 &p, buflen);
1341 if (status)
1342 return status;
1343 }
1344 *pp = p;
1345 return 0;
1346}
1227 1347
1228static u32 nfs4_ftypes[16] = { 1348static u32 nfs4_ftypes[16] = {
1229 NF4BAD, NF4FIFO, NF4CHR, NF4BAD, 1349 NF4BAD, NF4FIFO, NF4CHR, NF4BAD,
@@ -1273,6 +1393,25 @@ nfsd4_encode_aclname(struct svc_rqst *rqstp, int whotype, uid_t id, int group,
1273 return nfsd4_encode_name(rqstp, whotype, id, group, p, buflen); 1393 return nfsd4_encode_name(rqstp, whotype, id, group, p, buflen);
1274} 1394}
1275 1395
1396#define WORD0_ABSENT_FS_ATTRS (FATTR4_WORD0_FS_LOCATIONS | FATTR4_WORD0_FSID | \
1397 FATTR4_WORD0_RDATTR_ERROR)
1398#define WORD1_ABSENT_FS_ATTRS FATTR4_WORD1_MOUNTED_ON_FILEID
1399
1400static int fattr_handle_absent_fs(u32 *bmval0, u32 *bmval1, u32 *rdattr_err)
1401{
1402 /* As per referral draft: */
1403 if (*bmval0 & ~WORD0_ABSENT_FS_ATTRS ||
1404 *bmval1 & ~WORD1_ABSENT_FS_ATTRS) {
1405 if (*bmval0 & FATTR4_WORD0_RDATTR_ERROR ||
1406 *bmval0 & FATTR4_WORD0_FS_LOCATIONS)
1407 *rdattr_err = NFSERR_MOVED;
1408 else
1409 return nfserr_moved;
1410 }
1411 *bmval0 &= WORD0_ABSENT_FS_ATTRS;
1412 *bmval1 &= WORD1_ABSENT_FS_ATTRS;
1413 return 0;
1414}
1276 1415
1277/* 1416/*
1278 * Note: @fhp can be NULL; in this case, we might have to compose the filehandle 1417 * Note: @fhp can be NULL; in this case, we might have to compose the filehandle
@@ -1295,6 +1434,7 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
1295 u32 *attrlenp; 1434 u32 *attrlenp;
1296 u32 dummy; 1435 u32 dummy;
1297 u64 dummy64; 1436 u64 dummy64;
1437 u32 rdattr_err = 0;
1298 u32 *p = buffer; 1438 u32 *p = buffer;
1299 int status; 1439 int status;
1300 int aclsupport = 0; 1440 int aclsupport = 0;
@@ -1304,6 +1444,12 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
1304 BUG_ON(bmval0 & ~NFSD_SUPPORTED_ATTRS_WORD0); 1444 BUG_ON(bmval0 & ~NFSD_SUPPORTED_ATTRS_WORD0);
1305 BUG_ON(bmval1 & ~NFSD_SUPPORTED_ATTRS_WORD1); 1445 BUG_ON(bmval1 & ~NFSD_SUPPORTED_ATTRS_WORD1);
1306 1446
1447 if (exp->ex_fslocs.migrated) {
1448 status = fattr_handle_absent_fs(&bmval0, &bmval1, &rdattr_err);
1449 if (status)
1450 goto out;
1451 }
1452
1307 status = vfs_getattr(exp->ex_mnt, dentry, &stat); 1453 status = vfs_getattr(exp->ex_mnt, dentry, &stat);
1308 if (status) 1454 if (status)
1309 goto out_nfserr; 1455 goto out_nfserr;
@@ -1335,6 +1481,11 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
1335 goto out_nfserr; 1481 goto out_nfserr;
1336 } 1482 }
1337 } 1483 }
1484 if (bmval0 & FATTR4_WORD0_FS_LOCATIONS) {
1485 if (exp->ex_fslocs.locations == NULL) {
1486 bmval0 &= ~FATTR4_WORD0_FS_LOCATIONS;
1487 }
1488 }
1338 if ((buflen -= 16) < 0) 1489 if ((buflen -= 16) < 0)
1339 goto out_resource; 1490 goto out_resource;
1340 1491
@@ -1344,12 +1495,15 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
1344 attrlenp = p++; /* to be backfilled later */ 1495 attrlenp = p++; /* to be backfilled later */
1345 1496
1346 if (bmval0 & FATTR4_WORD0_SUPPORTED_ATTRS) { 1497 if (bmval0 & FATTR4_WORD0_SUPPORTED_ATTRS) {
1498 u32 word0 = NFSD_SUPPORTED_ATTRS_WORD0;
1347 if ((buflen -= 12) < 0) 1499 if ((buflen -= 12) < 0)
1348 goto out_resource; 1500 goto out_resource;
1501 if (!aclsupport)
1502 word0 &= ~FATTR4_WORD0_ACL;
1503 if (!exp->ex_fslocs.locations)
1504 word0 &= ~FATTR4_WORD0_FS_LOCATIONS;
1349 WRITE32(2); 1505 WRITE32(2);
1350 WRITE32(aclsupport ? 1506 WRITE32(word0);
1351 NFSD_SUPPORTED_ATTRS_WORD0 :
1352 NFSD_SUPPORTED_ATTRS_WORD0 & ~FATTR4_WORD0_ACL);
1353 WRITE32(NFSD_SUPPORTED_ATTRS_WORD1); 1507 WRITE32(NFSD_SUPPORTED_ATTRS_WORD1);
1354 } 1508 }
1355 if (bmval0 & FATTR4_WORD0_TYPE) { 1509 if (bmval0 & FATTR4_WORD0_TYPE) {
@@ -1403,7 +1557,10 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
1403 if (bmval0 & FATTR4_WORD0_FSID) { 1557 if (bmval0 & FATTR4_WORD0_FSID) {
1404 if ((buflen -= 16) < 0) 1558 if ((buflen -= 16) < 0)
1405 goto out_resource; 1559 goto out_resource;
1406 if (is_fsid(fhp, rqstp->rq_reffh)) { 1560 if (exp->ex_fslocs.migrated) {
1561 WRITE64(NFS4_REFERRAL_FSID_MAJOR);
1562 WRITE64(NFS4_REFERRAL_FSID_MINOR);
1563 } else if (is_fsid(fhp, rqstp->rq_reffh)) {
1407 WRITE64((u64)exp->ex_fsid); 1564 WRITE64((u64)exp->ex_fsid);
1408 WRITE64((u64)0); 1565 WRITE64((u64)0);
1409 } else { 1566 } else {
@@ -1426,7 +1583,7 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
1426 if (bmval0 & FATTR4_WORD0_RDATTR_ERROR) { 1583 if (bmval0 & FATTR4_WORD0_RDATTR_ERROR) {
1427 if ((buflen -= 4) < 0) 1584 if ((buflen -= 4) < 0)
1428 goto out_resource; 1585 goto out_resource;
1429 WRITE32(0); 1586 WRITE32(rdattr_err);
1430 } 1587 }
1431 if (bmval0 & FATTR4_WORD0_ACL) { 1588 if (bmval0 & FATTR4_WORD0_ACL) {
1432 struct nfs4_ace *ace; 1589 struct nfs4_ace *ace;
@@ -1514,6 +1671,13 @@ out_acl:
1514 goto out_resource; 1671 goto out_resource;
1515 WRITE64((u64) statfs.f_files); 1672 WRITE64((u64) statfs.f_files);
1516 } 1673 }
1674 if (bmval0 & FATTR4_WORD0_FS_LOCATIONS) {
1675 status = nfsd4_encode_fs_locations(rqstp, exp, &p, &buflen);
1676 if (status == nfserr_resource)
1677 goto out_resource;
1678 if (status)
1679 goto out;
1680 }
1517 if (bmval0 & FATTR4_WORD0_HOMOGENEOUS) { 1681 if (bmval0 & FATTR4_WORD0_HOMOGENEOUS) {
1518 if ((buflen -= 4) < 0) 1682 if ((buflen -= 4) < 0)
1519 goto out_resource; 1683 goto out_resource;
@@ -1537,12 +1701,12 @@ out_acl:
1537 if (bmval0 & FATTR4_WORD0_MAXREAD) { 1701 if (bmval0 & FATTR4_WORD0_MAXREAD) {
1538 if ((buflen -= 8) < 0) 1702 if ((buflen -= 8) < 0)
1539 goto out_resource; 1703 goto out_resource;
1540 WRITE64((u64) NFSSVC_MAXBLKSIZE); 1704 WRITE64((u64) svc_max_payload(rqstp));
1541 } 1705 }
1542 if (bmval0 & FATTR4_WORD0_MAXWRITE) { 1706 if (bmval0 & FATTR4_WORD0_MAXWRITE) {
1543 if ((buflen -= 8) < 0) 1707 if ((buflen -= 8) < 0)
1544 goto out_resource; 1708 goto out_resource;
1545 WRITE64((u64) NFSSVC_MAXBLKSIZE); 1709 WRITE64((u64) svc_max_payload(rqstp));
1546 } 1710 }
1547 if (bmval1 & FATTR4_WORD1_MODE) { 1711 if (bmval1 & FATTR4_WORD1_MODE) {
1548 if ((buflen -= 4) < 0) 1712 if ((buflen -= 4) < 0)
@@ -1846,7 +2010,6 @@ nfsd4_encode_getattr(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_ge
1846 nfserr = nfsd4_encode_fattr(fhp, fhp->fh_export, fhp->fh_dentry, 2010 nfserr = nfsd4_encode_fattr(fhp, fhp->fh_export, fhp->fh_dentry,
1847 resp->p, &buflen, getattr->ga_bmval, 2011 resp->p, &buflen, getattr->ga_bmval,
1848 resp->rqstp); 2012 resp->rqstp);
1849
1850 if (!nfserr) 2013 if (!nfserr)
1851 resp->p += buflen; 2014 resp->p += buflen;
1852 return nfserr; 2015 return nfserr;
@@ -2040,7 +2203,8 @@ nfsd4_encode_open_downgrade(struct nfsd4_compoundres *resp, int nfserr, struct n
2040} 2203}
2041 2204
2042static int 2205static int
2043nfsd4_encode_read(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_read *read) 2206nfsd4_encode_read(struct nfsd4_compoundres *resp, int nfserr,
2207 struct nfsd4_read *read)
2044{ 2208{
2045 u32 eof; 2209 u32 eof;
2046 int v, pn; 2210 int v, pn;
@@ -2055,31 +2219,33 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_read
2055 2219
2056 RESERVE_SPACE(8); /* eof flag and byte count */ 2220 RESERVE_SPACE(8); /* eof flag and byte count */
2057 2221
2058 maxcount = NFSSVC_MAXBLKSIZE; 2222 maxcount = svc_max_payload(resp->rqstp);
2059 if (maxcount > read->rd_length) 2223 if (maxcount > read->rd_length)
2060 maxcount = read->rd_length; 2224 maxcount = read->rd_length;
2061 2225
2062 len = maxcount; 2226 len = maxcount;
2063 v = 0; 2227 v = 0;
2064 while (len > 0) { 2228 while (len > 0) {
2065 pn = resp->rqstp->rq_resused; 2229 pn = resp->rqstp->rq_resused++;
2066 svc_take_page(resp->rqstp); 2230 resp->rqstp->rq_vec[v].iov_base =
2067 read->rd_iov[v].iov_base = page_address(resp->rqstp->rq_respages[pn]); 2231 page_address(resp->rqstp->rq_respages[pn]);
2068 read->rd_iov[v].iov_len = len < PAGE_SIZE ? len : PAGE_SIZE; 2232 resp->rqstp->rq_vec[v].iov_len =
2233 len < PAGE_SIZE ? len : PAGE_SIZE;
2069 v++; 2234 v++;
2070 len -= PAGE_SIZE; 2235 len -= PAGE_SIZE;
2071 } 2236 }
2072 read->rd_vlen = v; 2237 read->rd_vlen = v;
2073 2238
2074 nfserr = nfsd_read(read->rd_rqstp, read->rd_fhp, read->rd_filp, 2239 nfserr = nfsd_read(read->rd_rqstp, read->rd_fhp, read->rd_filp,
2075 read->rd_offset, read->rd_iov, read->rd_vlen, 2240 read->rd_offset, resp->rqstp->rq_vec, read->rd_vlen,
2076 &maxcount); 2241 &maxcount);
2077 2242
2078 if (nfserr == nfserr_symlink) 2243 if (nfserr == nfserr_symlink)
2079 nfserr = nfserr_inval; 2244 nfserr = nfserr_inval;
2080 if (nfserr) 2245 if (nfserr)
2081 return nfserr; 2246 return nfserr;
2082 eof = (read->rd_offset + maxcount >= read->rd_fhp->fh_dentry->d_inode->i_size); 2247 eof = (read->rd_offset + maxcount >=
2248 read->rd_fhp->fh_dentry->d_inode->i_size);
2083 2249
2084 WRITE32(eof); 2250 WRITE32(eof);
2085 WRITE32(maxcount); 2251 WRITE32(maxcount);
@@ -2089,7 +2255,6 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_read
2089 resp->xbuf->page_len = maxcount; 2255 resp->xbuf->page_len = maxcount;
2090 2256
2091 /* Use rest of head for padding and remaining ops: */ 2257 /* Use rest of head for padding and remaining ops: */
2092 resp->rqstp->rq_restailpage = 0;
2093 resp->xbuf->tail[0].iov_base = p; 2258 resp->xbuf->tail[0].iov_base = p;
2094 resp->xbuf->tail[0].iov_len = 0; 2259 resp->xbuf->tail[0].iov_len = 0;
2095 if (maxcount&3) { 2260 if (maxcount&3) {
@@ -2114,8 +2279,7 @@ nfsd4_encode_readlink(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_r
2114 if (resp->xbuf->page_len) 2279 if (resp->xbuf->page_len)
2115 return nfserr_resource; 2280 return nfserr_resource;
2116 2281
2117 svc_take_page(resp->rqstp); 2282 page = page_address(resp->rqstp->rq_respages[resp->rqstp->rq_resused++]);
2118 page = page_address(resp->rqstp->rq_respages[resp->rqstp->rq_resused-1]);
2119 2283
2120 maxcount = PAGE_SIZE; 2284 maxcount = PAGE_SIZE;
2121 RESERVE_SPACE(4); 2285 RESERVE_SPACE(4);
@@ -2139,7 +2303,6 @@ nfsd4_encode_readlink(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_r
2139 resp->xbuf->page_len = maxcount; 2303 resp->xbuf->page_len = maxcount;
2140 2304
2141 /* Use rest of head for padding and remaining ops: */ 2305 /* Use rest of head for padding and remaining ops: */
2142 resp->rqstp->rq_restailpage = 0;
2143 resp->xbuf->tail[0].iov_base = p; 2306 resp->xbuf->tail[0].iov_base = p;
2144 resp->xbuf->tail[0].iov_len = 0; 2307 resp->xbuf->tail[0].iov_len = 0;
2145 if (maxcount&3) { 2308 if (maxcount&3) {
@@ -2190,8 +2353,7 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_re
2190 goto err_no_verf; 2353 goto err_no_verf;
2191 } 2354 }
2192 2355
2193 svc_take_page(resp->rqstp); 2356 page = page_address(resp->rqstp->rq_respages[resp->rqstp->rq_resused++]);
2194 page = page_address(resp->rqstp->rq_respages[resp->rqstp->rq_resused-1]);
2195 readdir->common.err = 0; 2357 readdir->common.err = 0;
2196 readdir->buflen = maxcount; 2358 readdir->buflen = maxcount;
2197 readdir->buffer = page; 2359 readdir->buffer = page;
@@ -2216,10 +2378,10 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_re
2216 p = readdir->buffer; 2378 p = readdir->buffer;
2217 *p++ = 0; /* no more entries */ 2379 *p++ = 0; /* no more entries */
2218 *p++ = htonl(readdir->common.err == nfserr_eof); 2380 *p++ = htonl(readdir->common.err == nfserr_eof);
2219 resp->xbuf->page_len = ((char*)p) - (char*)page_address(resp->rqstp->rq_respages[resp->rqstp->rq_resused-1]); 2381 resp->xbuf->page_len = ((char*)p) - (char*)page_address(
2382 resp->rqstp->rq_respages[resp->rqstp->rq_resused-1]);
2220 2383
2221 /* Use rest of head for padding and remaining ops: */ 2384 /* Use rest of head for padding and remaining ops: */
2222 resp->rqstp->rq_restailpage = 0;
2223 resp->xbuf->tail[0].iov_base = tailbase; 2385 resp->xbuf->tail[0].iov_base = tailbase;
2224 resp->xbuf->tail[0].iov_len = 0; 2386 resp->xbuf->tail[0].iov_len = 0;
2225 resp->p = resp->xbuf->tail[0].iov_base; 2387 resp->p = resp->xbuf->tail[0].iov_base;
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 7046ac9cf97f..39aed901514b 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -23,10 +23,14 @@
23#include <linux/pagemap.h> 23#include <linux/pagemap.h>
24#include <linux/init.h> 24#include <linux/init.h>
25#include <linux/string.h> 25#include <linux/string.h>
26#include <linux/smp_lock.h>
27#include <linux/ctype.h>
26 28
27#include <linux/nfs.h> 29#include <linux/nfs.h>
28#include <linux/nfsd_idmap.h> 30#include <linux/nfsd_idmap.h>
31#include <linux/lockd/bind.h>
29#include <linux/sunrpc/svc.h> 32#include <linux/sunrpc/svc.h>
33#include <linux/sunrpc/svcsock.h>
30#include <linux/nfsd/nfsd.h> 34#include <linux/nfsd/nfsd.h>
31#include <linux/nfsd/cache.h> 35#include <linux/nfsd/cache.h>
32#include <linux/nfsd/xdr.h> 36#include <linux/nfsd/xdr.h>
@@ -35,8 +39,6 @@
35 39
36#include <asm/uaccess.h> 40#include <asm/uaccess.h>
37 41
38unsigned int nfsd_versbits = ~0;
39
40/* 42/*
41 * We have a single directory with 9 nodes in it. 43 * We have a single directory with 9 nodes in it.
42 */ 44 */
@@ -52,7 +54,10 @@ enum {
52 NFSD_List, 54 NFSD_List,
53 NFSD_Fh, 55 NFSD_Fh,
54 NFSD_Threads, 56 NFSD_Threads,
57 NFSD_Pool_Threads,
55 NFSD_Versions, 58 NFSD_Versions,
59 NFSD_Ports,
60 NFSD_MaxBlkSize,
56 /* 61 /*
57 * The below MUST come last. Otherwise we leave a hole in nfsd_files[] 62 * The below MUST come last. Otherwise we leave a hole in nfsd_files[]
58 * with !CONFIG_NFSD_V4 and simple_fill_super() goes oops 63 * with !CONFIG_NFSD_V4 and simple_fill_super() goes oops
@@ -75,7 +80,10 @@ static ssize_t write_getfd(struct file *file, char *buf, size_t size);
75static ssize_t write_getfs(struct file *file, char *buf, size_t size); 80static ssize_t write_getfs(struct file *file, char *buf, size_t size);
76static ssize_t write_filehandle(struct file *file, char *buf, size_t size); 81static ssize_t write_filehandle(struct file *file, char *buf, size_t size);
77static ssize_t write_threads(struct file *file, char *buf, size_t size); 82static ssize_t write_threads(struct file *file, char *buf, size_t size);
83static ssize_t write_pool_threads(struct file *file, char *buf, size_t size);
78static ssize_t write_versions(struct file *file, char *buf, size_t size); 84static ssize_t write_versions(struct file *file, char *buf, size_t size);
85static ssize_t write_ports(struct file *file, char *buf, size_t size);
86static ssize_t write_maxblksize(struct file *file, char *buf, size_t size);
79#ifdef CONFIG_NFSD_V4 87#ifdef CONFIG_NFSD_V4
80static ssize_t write_leasetime(struct file *file, char *buf, size_t size); 88static ssize_t write_leasetime(struct file *file, char *buf, size_t size);
81static ssize_t write_recoverydir(struct file *file, char *buf, size_t size); 89static ssize_t write_recoverydir(struct file *file, char *buf, size_t size);
@@ -91,7 +99,10 @@ static ssize_t (*write_op[])(struct file *, char *, size_t) = {
91 [NFSD_Getfs] = write_getfs, 99 [NFSD_Getfs] = write_getfs,
92 [NFSD_Fh] = write_filehandle, 100 [NFSD_Fh] = write_filehandle,
93 [NFSD_Threads] = write_threads, 101 [NFSD_Threads] = write_threads,
102 [NFSD_Pool_Threads] = write_pool_threads,
94 [NFSD_Versions] = write_versions, 103 [NFSD_Versions] = write_versions,
104 [NFSD_Ports] = write_ports,
105 [NFSD_MaxBlkSize] = write_maxblksize,
95#ifdef CONFIG_NFSD_V4 106#ifdef CONFIG_NFSD_V4
96 [NFSD_Leasetime] = write_leasetime, 107 [NFSD_Leasetime] = write_leasetime,
97 [NFSD_RecoveryDir] = write_recoverydir, 108 [NFSD_RecoveryDir] = write_recoverydir,
@@ -358,6 +369,72 @@ static ssize_t write_threads(struct file *file, char *buf, size_t size)
358 return strlen(buf); 369 return strlen(buf);
359} 370}
360 371
372extern int nfsd_nrpools(void);
373extern int nfsd_get_nrthreads(int n, int *);
374extern int nfsd_set_nrthreads(int n, int *);
375
376static ssize_t write_pool_threads(struct file *file, char *buf, size_t size)
377{
378 /* if size > 0, look for an array of number of threads per node
379 * and apply them then write out number of threads per node as reply
380 */
381 char *mesg = buf;
382 int i;
383 int rv;
384 int len;
385 int npools = nfsd_nrpools();
386 int *nthreads;
387
388 if (npools == 0) {
389 /*
390 * NFS is shut down. The admin can start it by
391 * writing to the threads file but NOT the pool_threads
392 * file, sorry. Report zero threads.
393 */
394 strcpy(buf, "0\n");
395 return strlen(buf);
396 }
397
398 nthreads = kcalloc(npools, sizeof(int), GFP_KERNEL);
399 if (nthreads == NULL)
400 return -ENOMEM;
401
402 if (size > 0) {
403 for (i = 0; i < npools; i++) {
404 rv = get_int(&mesg, &nthreads[i]);
405 if (rv == -ENOENT)
406 break; /* fewer numbers than pools */
407 if (rv)
408 goto out_free; /* syntax error */
409 rv = -EINVAL;
410 if (nthreads[i] < 0)
411 goto out_free;
412 }
413 rv = nfsd_set_nrthreads(i, nthreads);
414 if (rv)
415 goto out_free;
416 }
417
418 rv = nfsd_get_nrthreads(npools, nthreads);
419 if (rv)
420 goto out_free;
421
422 mesg = buf;
423 size = SIMPLE_TRANSACTION_LIMIT;
424 for (i = 0; i < npools && size > 0; i++) {
425 snprintf(mesg, size, "%d%c", nthreads[i], (i == npools-1 ? '\n' : ' '));
426 len = strlen(mesg);
427 size -= len;
428 mesg += len;
429 }
430
431 return (mesg-buf);
432
433out_free:
434 kfree(nthreads);
435 return rv;
436}
437
361static ssize_t write_versions(struct file *file, char *buf, size_t size) 438static ssize_t write_versions(struct file *file, char *buf, size_t size)
362{ 439{
363 /* 440 /*
@@ -372,6 +449,10 @@ static ssize_t write_versions(struct file *file, char *buf, size_t size)
372 449
373 if (size>0) { 450 if (size>0) {
374 if (nfsd_serv) 451 if (nfsd_serv)
452 /* Cannot change versions without updating
453 * nfsd_serv->sv_xdrsize, and reallocing
454 * rq_argp and rq_resp
455 */
375 return -EBUSY; 456 return -EBUSY;
376 if (buf[size-1] != '\n') 457 if (buf[size-1] != '\n')
377 return -EINVAL; 458 return -EINVAL;
@@ -390,10 +471,7 @@ static ssize_t write_versions(struct file *file, char *buf, size_t size)
390 case 2: 471 case 2:
391 case 3: 472 case 3:
392 case 4: 473 case 4:
393 if (sign != '-') 474 nfsd_vers(num, sign == '-' ? NFSD_CLEAR : NFSD_SET);
394 NFSCTL_VERSET(nfsd_versbits, num);
395 else
396 NFSCTL_VERUNSET(nfsd_versbits, num);
397 break; 475 break;
398 default: 476 default:
399 return -EINVAL; 477 return -EINVAL;
@@ -404,16 +482,15 @@ static ssize_t write_versions(struct file *file, char *buf, size_t size)
404 /* If all get turned off, turn them back on, as 482 /* If all get turned off, turn them back on, as
405 * having no versions is BAD 483 * having no versions is BAD
406 */ 484 */
407 if ((nfsd_versbits & NFSCTL_VERALL)==0) 485 nfsd_reset_versions();
408 nfsd_versbits = NFSCTL_VERALL;
409 } 486 }
410 /* Now write current state into reply buffer */ 487 /* Now write current state into reply buffer */
411 len = 0; 488 len = 0;
412 sep = ""; 489 sep = "";
413 for (num=2 ; num <= 4 ; num++) 490 for (num=2 ; num <= 4 ; num++)
414 if (NFSCTL_VERISSET(NFSCTL_VERALL, num)) { 491 if (nfsd_vers(num, NFSD_AVAIL)) {
415 len += sprintf(buf+len, "%s%c%d", sep, 492 len += sprintf(buf+len, "%s%c%d", sep,
416 NFSCTL_VERISSET(nfsd_versbits, num)?'+':'-', 493 nfsd_vers(num, NFSD_TEST)?'+':'-',
417 num); 494 num);
418 sep = " "; 495 sep = " ";
419 } 496 }
@@ -421,6 +498,95 @@ static ssize_t write_versions(struct file *file, char *buf, size_t size)
421 return len; 498 return len;
422} 499}
423 500
501static ssize_t write_ports(struct file *file, char *buf, size_t size)
502{
503 if (size == 0) {
504 int len = 0;
505 lock_kernel();
506 if (nfsd_serv)
507 len = svc_sock_names(buf, nfsd_serv, NULL);
508 unlock_kernel();
509 return len;
510 }
511 /* Either a single 'fd' number is written, in which
512 * case it must be for a socket of a supported family/protocol,
513 * and we use it as an nfsd socket, or
514 * A '-' followed by the 'name' of a socket in which case
515 * we close the socket.
516 */
517 if (isdigit(buf[0])) {
518 char *mesg = buf;
519 int fd;
520 int err;
521 err = get_int(&mesg, &fd);
522 if (err)
523 return -EINVAL;
524 if (fd < 0)
525 return -EINVAL;
526 err = nfsd_create_serv();
527 if (!err) {
528 int proto = 0;
529 err = svc_addsock(nfsd_serv, fd, buf, &proto);
530 if (err >= 0) {
531 err = lockd_up(proto);
532 if (err < 0)
533 svc_sock_names(buf+strlen(buf)+1, nfsd_serv, buf);
534 }
535 /* Decrease the count, but don't shutdown the
536 * the service
537 */
538 lock_kernel();
539 nfsd_serv->sv_nrthreads--;
540 unlock_kernel();
541 }
542 return err < 0 ? err : 0;
543 }
544 if (buf[0] == '-') {
545 char *toclose = kstrdup(buf+1, GFP_KERNEL);
546 int len = 0;
547 if (!toclose)
548 return -ENOMEM;
549 lock_kernel();
550 if (nfsd_serv)
551 len = svc_sock_names(buf, nfsd_serv, toclose);
552 unlock_kernel();
553 if (len >= 0)
554 lockd_down();
555 kfree(toclose);
556 return len;
557 }
558 return -EINVAL;
559}
560
561int nfsd_max_blksize;
562
563static ssize_t write_maxblksize(struct file *file, char *buf, size_t size)
564{
565 char *mesg = buf;
566 if (size > 0) {
567 int bsize;
568 int rv = get_int(&mesg, &bsize);
569 if (rv)
570 return rv;
571 /* force bsize into allowed range and
572 * required alignment.
573 */
574 if (bsize < 1024)
575 bsize = 1024;
576 if (bsize > NFSSVC_MAXBLKSIZE)
577 bsize = NFSSVC_MAXBLKSIZE;
578 bsize &= ~(1024-1);
579 lock_kernel();
580 if (nfsd_serv && nfsd_serv->sv_nrthreads) {
581 unlock_kernel();
582 return -EBUSY;
583 }
584 nfsd_max_blksize = bsize;
585 unlock_kernel();
586 }
587 return sprintf(buf, "%d\n", nfsd_max_blksize);
588}
589
424#ifdef CONFIG_NFSD_V4 590#ifdef CONFIG_NFSD_V4
425extern time_t nfs4_leasetime(void); 591extern time_t nfs4_leasetime(void);
426 592
@@ -483,7 +649,10 @@ static int nfsd_fill_super(struct super_block * sb, void * data, int silent)
483 [NFSD_List] = {"exports", &exports_operations, S_IRUGO}, 649 [NFSD_List] = {"exports", &exports_operations, S_IRUGO},
484 [NFSD_Fh] = {"filehandle", &transaction_ops, S_IWUSR|S_IRUSR}, 650 [NFSD_Fh] = {"filehandle", &transaction_ops, S_IWUSR|S_IRUSR},
485 [NFSD_Threads] = {"threads", &transaction_ops, S_IWUSR|S_IRUSR}, 651 [NFSD_Threads] = {"threads", &transaction_ops, S_IWUSR|S_IRUSR},
652 [NFSD_Pool_Threads] = {"pool_threads", &transaction_ops, S_IWUSR|S_IRUSR},
486 [NFSD_Versions] = {"versions", &transaction_ops, S_IWUSR|S_IRUSR}, 653 [NFSD_Versions] = {"versions", &transaction_ops, S_IWUSR|S_IRUSR},
654 [NFSD_Ports] = {"portlist", &transaction_ops, S_IWUSR|S_IRUGO},
655 [NFSD_MaxBlkSize] = {"max_block_size", &transaction_ops, S_IWUSR|S_IRUGO},
487#ifdef CONFIG_NFSD_V4 656#ifdef CONFIG_NFSD_V4
488 [NFSD_Leasetime] = {"nfsv4leasetime", &transaction_ops, S_IWUSR|S_IRUSR}, 657 [NFSD_Leasetime] = {"nfsv4leasetime", &transaction_ops, S_IWUSR|S_IRUSR},
489 [NFSD_RecoveryDir] = {"nfsv4recoverydir", &transaction_ops, S_IWUSR|S_IRUSR}, 658 [NFSD_RecoveryDir] = {"nfsv4recoverydir", &transaction_ops, S_IWUSR|S_IRUSR},
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index 06cd0db0f32b..9ee1dab5d44a 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -146,20 +146,20 @@ nfsd_proc_read(struct svc_rqst *rqstp, struct nfsd_readargs *argp,
146 * status, 17 words for fattr, and 1 word for the byte count. 146 * status, 17 words for fattr, and 1 word for the byte count.
147 */ 147 */
148 148
149 if (NFSSVC_MAXBLKSIZE < argp->count) { 149 if (NFSSVC_MAXBLKSIZE_V2 < argp->count) {
150 printk(KERN_NOTICE 150 printk(KERN_NOTICE
151 "oversized read request from %u.%u.%u.%u:%d (%d bytes)\n", 151 "oversized read request from %u.%u.%u.%u:%d (%d bytes)\n",
152 NIPQUAD(rqstp->rq_addr.sin_addr.s_addr), 152 NIPQUAD(rqstp->rq_addr.sin_addr.s_addr),
153 ntohs(rqstp->rq_addr.sin_port), 153 ntohs(rqstp->rq_addr.sin_port),
154 argp->count); 154 argp->count);
155 argp->count = NFSSVC_MAXBLKSIZE; 155 argp->count = NFSSVC_MAXBLKSIZE_V2;
156 } 156 }
157 svc_reserve(rqstp, (19<<2) + argp->count + 4); 157 svc_reserve(rqstp, (19<<2) + argp->count + 4);
158 158
159 resp->count = argp->count; 159 resp->count = argp->count;
160 nfserr = nfsd_read(rqstp, fh_copy(&resp->fh, &argp->fh), NULL, 160 nfserr = nfsd_read(rqstp, fh_copy(&resp->fh, &argp->fh), NULL,
161 argp->offset, 161 argp->offset,
162 argp->vec, argp->vlen, 162 rqstp->rq_vec, argp->vlen,
163 &resp->count); 163 &resp->count);
164 164
165 if (nfserr) return nfserr; 165 if (nfserr) return nfserr;
@@ -185,7 +185,7 @@ nfsd_proc_write(struct svc_rqst *rqstp, struct nfsd_writeargs *argp,
185 185
186 nfserr = nfsd_write(rqstp, fh_copy(&resp->fh, &argp->fh), NULL, 186 nfserr = nfsd_write(rqstp, fh_copy(&resp->fh, &argp->fh), NULL,
187 argp->offset, 187 argp->offset,
188 argp->vec, argp->vlen, 188 rqstp->rq_vec, argp->vlen,
189 argp->len, 189 argp->len,
190 &stable); 190 &stable);
191 return nfsd_return_attrs(nfserr, resp); 191 return nfsd_return_attrs(nfserr, resp);
@@ -225,7 +225,7 @@ nfsd_proc_create(struct svc_rqst *rqstp, struct nfsd_createargs *argp,
225 nfserr = nfserr_exist; 225 nfserr = nfserr_exist;
226 if (isdotent(argp->name, argp->len)) 226 if (isdotent(argp->name, argp->len))
227 goto done; 227 goto done;
228 fh_lock(dirfhp); 228 fh_lock_nested(dirfhp, I_MUTEX_PARENT);
229 dchild = lookup_one_len(argp->name, dirfhp->fh_dentry, argp->len); 229 dchild = lookup_one_len(argp->name, dirfhp->fh_dentry, argp->len);
230 if (IS_ERR(dchild)) { 230 if (IS_ERR(dchild)) {
231 nfserr = nfserrno(PTR_ERR(dchild)); 231 nfserr = nfserrno(PTR_ERR(dchild));
@@ -553,7 +553,7 @@ static struct svc_procedure nfsd_procedures2[18] = {
553 PROC(none, void, void, none, RC_NOCACHE, ST), 553 PROC(none, void, void, none, RC_NOCACHE, ST),
554 PROC(lookup, diropargs, diropres, fhandle, RC_NOCACHE, ST+FH+AT), 554 PROC(lookup, diropargs, diropres, fhandle, RC_NOCACHE, ST+FH+AT),
555 PROC(readlink, readlinkargs, readlinkres, none, RC_NOCACHE, ST+1+NFS_MAXPATHLEN/4), 555 PROC(readlink, readlinkargs, readlinkres, none, RC_NOCACHE, ST+1+NFS_MAXPATHLEN/4),
556 PROC(read, readargs, readres, fhandle, RC_NOCACHE, ST+AT+1+NFSSVC_MAXBLKSIZE/4), 556 PROC(read, readargs, readres, fhandle, RC_NOCACHE, ST+AT+1+NFSSVC_MAXBLKSIZE_V2/4),
557 PROC(none, void, void, none, RC_NOCACHE, ST), 557 PROC(none, void, void, none, RC_NOCACHE, ST),
558 PROC(write, writeargs, attrstat, fhandle, RC_REPLBUFF, ST+AT), 558 PROC(write, writeargs, attrstat, fhandle, RC_REPLBUFF, ST+AT),
559 PROC(create, createargs, diropres, fhandle, RC_REPLBUFF, ST+FH+AT), 559 PROC(create, createargs, diropres, fhandle, RC_REPLBUFF, ST+FH+AT),
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index ec1decf29bab..6fa6340a5fb8 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -57,12 +57,6 @@ static atomic_t nfsd_busy;
57static unsigned long nfsd_last_call; 57static unsigned long nfsd_last_call;
58static DEFINE_SPINLOCK(nfsd_call_lock); 58static DEFINE_SPINLOCK(nfsd_call_lock);
59 59
60struct nfsd_list {
61 struct list_head list;
62 struct task_struct *task;
63};
64static struct list_head nfsd_list = LIST_HEAD_INIT(nfsd_list);
65
66#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) 60#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
67static struct svc_stat nfsd_acl_svcstats; 61static struct svc_stat nfsd_acl_svcstats;
68static struct svc_version * nfsd_acl_version[] = { 62static struct svc_version * nfsd_acl_version[] = {
@@ -117,6 +111,32 @@ struct svc_program nfsd_program = {
117 111
118}; 112};
119 113
114int nfsd_vers(int vers, enum vers_op change)
115{
116 if (vers < NFSD_MINVERS || vers >= NFSD_NRVERS)
117 return -1;
118 switch(change) {
119 case NFSD_SET:
120 nfsd_versions[vers] = nfsd_version[vers];
121 break;
122#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
123 if (vers < NFSD_ACL_NRVERS)
124 nfsd_acl_version[vers] = nfsd_acl_version[vers];
125#endif
126 case NFSD_CLEAR:
127 nfsd_versions[vers] = NULL;
128#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
129 if (vers < NFSD_ACL_NRVERS)
130 nfsd_acl_version[vers] = NULL;
131#endif
132 break;
133 case NFSD_TEST:
134 return nfsd_versions[vers] != NULL;
135 case NFSD_AVAIL:
136 return nfsd_version[vers] != NULL;
137 }
138 return 0;
139}
120/* 140/*
121 * Maximum number of nfsd processes 141 * Maximum number of nfsd processes
122 */ 142 */
@@ -130,16 +150,192 @@ int nfsd_nrthreads(void)
130 return nfsd_serv->sv_nrthreads; 150 return nfsd_serv->sv_nrthreads;
131} 151}
132 152
153static int killsig; /* signal that was used to kill last nfsd */
154static void nfsd_last_thread(struct svc_serv *serv)
155{
156 /* When last nfsd thread exits we need to do some clean-up */
157 struct svc_sock *svsk;
158 list_for_each_entry(svsk, &serv->sv_permsocks, sk_list)
159 lockd_down();
160 nfsd_serv = NULL;
161 nfsd_racache_shutdown();
162 nfs4_state_shutdown();
163
164 printk(KERN_WARNING "nfsd: last server has exited\n");
165 if (killsig != SIG_NOCLEAN) {
166 printk(KERN_WARNING "nfsd: unexporting all filesystems\n");
167 nfsd_export_flush();
168 }
169}
170
171void nfsd_reset_versions(void)
172{
173 int found_one = 0;
174 int i;
175
176 for (i = NFSD_MINVERS; i < NFSD_NRVERS; i++) {
177 if (nfsd_program.pg_vers[i])
178 found_one = 1;
179 }
180
181 if (!found_one) {
182 for (i = NFSD_MINVERS; i < NFSD_NRVERS; i++)
183 nfsd_program.pg_vers[i] = nfsd_version[i];
184#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
185 for (i = NFSD_ACL_MINVERS; i < NFSD_ACL_NRVERS; i++)
186 nfsd_acl_program.pg_vers[i] =
187 nfsd_acl_version[i];
188#endif
189 }
190}
191
192int nfsd_create_serv(void)
193{
194 int err = 0;
195 lock_kernel();
196 if (nfsd_serv) {
197 svc_get(nfsd_serv);
198 unlock_kernel();
199 return 0;
200 }
201 if (nfsd_max_blksize == 0) {
202 /* choose a suitable default */
203 struct sysinfo i;
204 si_meminfo(&i);
205 /* Aim for 1/4096 of memory per thread
206 * This gives 1MB on 4Gig machines
207 * But only uses 32K on 128M machines.
208 * Bottom out at 8K on 32M and smaller.
209 * Of course, this is only a default.
210 */
211 nfsd_max_blksize = NFSSVC_MAXBLKSIZE;
212 i.totalram <<= PAGE_SHIFT - 12;
213 while (nfsd_max_blksize > i.totalram &&
214 nfsd_max_blksize >= 8*1024*2)
215 nfsd_max_blksize /= 2;
216 }
217
218 atomic_set(&nfsd_busy, 0);
219 nfsd_serv = svc_create_pooled(&nfsd_program,
220 NFSD_BUFSIZE - NFSSVC_MAXBLKSIZE + nfsd_max_blksize,
221 nfsd_last_thread,
222 nfsd, SIG_NOCLEAN, THIS_MODULE);
223 if (nfsd_serv == NULL)
224 err = -ENOMEM;
225 unlock_kernel();
226 do_gettimeofday(&nfssvc_boot); /* record boot time */
227 return err;
228}
229
230static int nfsd_init_socks(int port)
231{
232 int error;
233 if (!list_empty(&nfsd_serv->sv_permsocks))
234 return 0;
235
236 error = lockd_up(IPPROTO_UDP);
237 if (error >= 0) {
238 error = svc_makesock(nfsd_serv, IPPROTO_UDP, port);
239 if (error < 0)
240 lockd_down();
241 }
242 if (error < 0)
243 return error;
244
245#ifdef CONFIG_NFSD_TCP
246 error = lockd_up(IPPROTO_TCP);
247 if (error >= 0) {
248 error = svc_makesock(nfsd_serv, IPPROTO_TCP, port);
249 if (error < 0)
250 lockd_down();
251 }
252 if (error < 0)
253 return error;
254#endif
255 return 0;
256}
257
258int nfsd_nrpools(void)
259{
260 if (nfsd_serv == NULL)
261 return 0;
262 else
263 return nfsd_serv->sv_nrpools;
264}
265
266int nfsd_get_nrthreads(int n, int *nthreads)
267{
268 int i = 0;
269
270 if (nfsd_serv != NULL) {
271 for (i = 0; i < nfsd_serv->sv_nrpools && i < n; i++)
272 nthreads[i] = nfsd_serv->sv_pools[i].sp_nrthreads;
273 }
274
275 return 0;
276}
277
278int nfsd_set_nrthreads(int n, int *nthreads)
279{
280 int i = 0;
281 int tot = 0;
282 int err = 0;
283
284 if (nfsd_serv == NULL || n <= 0)
285 return 0;
286
287 if (n > nfsd_serv->sv_nrpools)
288 n = nfsd_serv->sv_nrpools;
289
290 /* enforce a global maximum number of threads */
291 tot = 0;
292 for (i = 0; i < n; i++) {
293 if (nthreads[i] > NFSD_MAXSERVS)
294 nthreads[i] = NFSD_MAXSERVS;
295 tot += nthreads[i];
296 }
297 if (tot > NFSD_MAXSERVS) {
298 /* total too large: scale down requested numbers */
299 for (i = 0; i < n && tot > 0; i++) {
300 int new = nthreads[i] * NFSD_MAXSERVS / tot;
301 tot -= (nthreads[i] - new);
302 nthreads[i] = new;
303 }
304 for (i = 0; i < n && tot > 0; i++) {
305 nthreads[i]--;
306 tot--;
307 }
308 }
309
310 /*
311 * There must always be a thread in pool 0; the admin
312 * can't shut down NFS completely using pool_threads.
313 */
314 if (nthreads[0] == 0)
315 nthreads[0] = 1;
316
317 /* apply the new numbers */
318 lock_kernel();
319 svc_get(nfsd_serv);
320 for (i = 0; i < n; i++) {
321 err = svc_set_num_threads(nfsd_serv, &nfsd_serv->sv_pools[i],
322 nthreads[i]);
323 if (err)
324 break;
325 }
326 svc_destroy(nfsd_serv);
327 unlock_kernel();
328
329 return err;
330}
331
133int 332int
134nfsd_svc(unsigned short port, int nrservs) 333nfsd_svc(unsigned short port, int nrservs)
135{ 334{
136 int error; 335 int error;
137 int none_left, found_one, i;
138 struct list_head *victim;
139 336
140 lock_kernel(); 337 lock_kernel();
141 dprintk("nfsd: creating service: vers 0x%x\n", 338 dprintk("nfsd: creating service\n");
142 nfsd_versbits);
143 error = -EINVAL; 339 error = -EINVAL;
144 if (nrservs <= 0) 340 if (nrservs <= 0)
145 nrservs = 0; 341 nrservs = 0;
@@ -153,91 +349,20 @@ nfsd_svc(unsigned short port, int nrservs)
153 error = nfs4_state_start(); 349 error = nfs4_state_start();
154 if (error<0) 350 if (error<0)
155 goto out; 351 goto out;
156 if (!nfsd_serv) {
157 /*
158 * Use the nfsd_ctlbits to define which
159 * versions that will be advertised.
160 * If nfsd_ctlbits doesn't list any version,
161 * export them all.
162 */
163 found_one = 0;
164
165 for (i = NFSD_MINVERS; i < NFSD_NRVERS; i++) {
166 if (NFSCTL_VERISSET(nfsd_versbits, i)) {
167 nfsd_program.pg_vers[i] = nfsd_version[i];
168 found_one = 1;
169 } else
170 nfsd_program.pg_vers[i] = NULL;
171 }
172 352
173 if (!found_one) { 353 nfsd_reset_versions();
174 for (i = NFSD_MINVERS; i < NFSD_NRVERS; i++)
175 nfsd_program.pg_vers[i] = nfsd_version[i];
176 }
177 354
355 error = nfsd_create_serv();
178 356
179#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) 357 if (error)
180 found_one = 0; 358 goto out;
181 359 error = nfsd_init_socks(port);
182 for (i = NFSD_ACL_MINVERS; i < NFSD_ACL_NRVERS; i++) { 360 if (error)
183 if (NFSCTL_VERISSET(nfsd_versbits, i)) { 361 goto failure;
184 nfsd_acl_program.pg_vers[i] =
185 nfsd_acl_version[i];
186 found_one = 1;
187 } else
188 nfsd_acl_program.pg_vers[i] = NULL;
189 }
190
191 if (!found_one) {
192 for (i = NFSD_ACL_MINVERS; i < NFSD_ACL_NRVERS; i++)
193 nfsd_acl_program.pg_vers[i] =
194 nfsd_acl_version[i];
195 }
196#endif
197
198 atomic_set(&nfsd_busy, 0);
199 error = -ENOMEM;
200 nfsd_serv = svc_create(&nfsd_program, NFSD_BUFSIZE);
201 if (nfsd_serv == NULL)
202 goto out;
203 error = svc_makesock(nfsd_serv, IPPROTO_UDP, port);
204 if (error < 0)
205 goto failure;
206 362
207#ifdef CONFIG_NFSD_TCP 363 error = svc_set_num_threads(nfsd_serv, NULL, nrservs);
208 error = svc_makesock(nfsd_serv, IPPROTO_TCP, port);
209 if (error < 0)
210 goto failure;
211#endif
212 do_gettimeofday(&nfssvc_boot); /* record boot time */
213 } else
214 nfsd_serv->sv_nrthreads++;
215 nrservs -= (nfsd_serv->sv_nrthreads-1);
216 while (nrservs > 0) {
217 nrservs--;
218 __module_get(THIS_MODULE);
219 error = svc_create_thread(nfsd, nfsd_serv);
220 if (error < 0) {
221 module_put(THIS_MODULE);
222 break;
223 }
224 }
225 victim = nfsd_list.next;
226 while (nrservs < 0 && victim != &nfsd_list) {
227 struct nfsd_list *nl =
228 list_entry(victim,struct nfsd_list, list);
229 victim = victim->next;
230 send_sig(SIG_NOCLEAN, nl->task, 1);
231 nrservs++;
232 }
233 failure: 364 failure:
234 none_left = (nfsd_serv->sv_nrthreads == 1);
235 svc_destroy(nfsd_serv); /* Release server */ 365 svc_destroy(nfsd_serv); /* Release server */
236 if (none_left) {
237 nfsd_serv = NULL;
238 nfsd_racache_shutdown();
239 nfs4_state_shutdown();
240 }
241 out: 366 out:
242 unlock_kernel(); 367 unlock_kernel();
243 return error; 368 return error;
@@ -270,10 +395,8 @@ update_thread_usage(int busy_threads)
270static void 395static void
271nfsd(struct svc_rqst *rqstp) 396nfsd(struct svc_rqst *rqstp)
272{ 397{
273 struct svc_serv *serv = rqstp->rq_server;
274 struct fs_struct *fsp; 398 struct fs_struct *fsp;
275 int err; 399 int err;
276 struct nfsd_list me;
277 sigset_t shutdown_mask, allowed_mask; 400 sigset_t shutdown_mask, allowed_mask;
278 401
279 /* Lock module and set up kernel thread */ 402 /* Lock module and set up kernel thread */
@@ -297,10 +420,7 @@ nfsd(struct svc_rqst *rqstp)
297 420
298 nfsdstats.th_cnt++; 421 nfsdstats.th_cnt++;
299 422
300 lockd_up(); /* start lockd */ 423 rqstp->rq_task = current;
301
302 me.task = current;
303 list_add(&me.list, &nfsd_list);
304 424
305 unlock_kernel(); 425 unlock_kernel();
306 426
@@ -322,8 +442,7 @@ nfsd(struct svc_rqst *rqstp)
322 * Find a socket with data available and call its 442 * Find a socket with data available and call its
323 * recvfrom routine. 443 * recvfrom routine.
324 */ 444 */
325 while ((err = svc_recv(serv, rqstp, 445 while ((err = svc_recv(rqstp, 60*60*HZ)) == -EAGAIN)
326 60*60*HZ)) == -EAGAIN)
327 ; 446 ;
328 if (err < 0) 447 if (err < 0)
329 break; 448 break;
@@ -336,7 +455,7 @@ nfsd(struct svc_rqst *rqstp)
336 /* Process request with signals blocked. */ 455 /* Process request with signals blocked. */
337 sigprocmask(SIG_SETMASK, &allowed_mask, NULL); 456 sigprocmask(SIG_SETMASK, &allowed_mask, NULL);
338 457
339 svc_process(serv, rqstp); 458 svc_process(rqstp);
340 459
341 /* Unlock export hash tables */ 460 /* Unlock export hash tables */
342 exp_readunlock(); 461 exp_readunlock();
@@ -353,29 +472,13 @@ nfsd(struct svc_rqst *rqstp)
353 if (sigismember(&current->pending.signal, signo) && 472 if (sigismember(&current->pending.signal, signo) &&
354 !sigismember(&current->blocked, signo)) 473 !sigismember(&current->blocked, signo))
355 break; 474 break;
356 err = signo; 475 killsig = signo;
357 } 476 }
358 /* Clear signals before calling lockd_down() and svc_exit_thread() */ 477 /* Clear signals before calling svc_exit_thread() */
359 flush_signals(current); 478 flush_signals(current);
360 479
361 lock_kernel(); 480 lock_kernel();
362 481
363 /* Release lockd */
364 lockd_down();
365
366 /* Check if this is last thread */
367 if (serv->sv_nrthreads==1) {
368
369 printk(KERN_WARNING "nfsd: last server has exited\n");
370 if (err != SIG_NOCLEAN) {
371 printk(KERN_WARNING "nfsd: unexporting all filesystems\n");
372 nfsd_export_flush();
373 }
374 nfsd_serv = NULL;
375 nfsd_racache_shutdown(); /* release read-ahead cache */
376 nfs4_state_shutdown();
377 }
378 list_del(&me.list);
379 nfsdstats.th_cnt --; 482 nfsdstats.th_cnt --;
380 483
381out: 484out:
diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c
index e3a0797dd56b..1135c0d14557 100644
--- a/fs/nfsd/nfsxdr.c
+++ b/fs/nfsd/nfsxdr.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * linux/fs/nfsd/xdr.c 2 * linux/fs/nfsd/nfsxdr.c
3 * 3 *
4 * XDR support for nfsd 4 * XDR support for nfsd
5 * 5 *
@@ -254,19 +254,18 @@ nfssvc_decode_readargs(struct svc_rqst *rqstp, u32 *p,
254 len = args->count = ntohl(*p++); 254 len = args->count = ntohl(*p++);
255 p++; /* totalcount - unused */ 255 p++; /* totalcount - unused */
256 256
257 if (len > NFSSVC_MAXBLKSIZE) 257 if (len > NFSSVC_MAXBLKSIZE_V2)
258 len = NFSSVC_MAXBLKSIZE; 258 len = NFSSVC_MAXBLKSIZE_V2;
259 259
260 /* set up somewhere to store response. 260 /* set up somewhere to store response.
261 * We take pages, put them on reslist and include in iovec 261 * We take pages, put them on reslist and include in iovec
262 */ 262 */
263 v=0; 263 v=0;
264 while (len > 0) { 264 while (len > 0) {
265 pn=rqstp->rq_resused; 265 pn = rqstp->rq_resused++;
266 svc_take_page(rqstp); 266 rqstp->rq_vec[v].iov_base = page_address(rqstp->rq_respages[pn]);
267 args->vec[v].iov_base = page_address(rqstp->rq_respages[pn]); 267 rqstp->rq_vec[v].iov_len = len < PAGE_SIZE?len:PAGE_SIZE;
268 args->vec[v].iov_len = len < PAGE_SIZE?len:PAGE_SIZE; 268 len -= rqstp->rq_vec[v].iov_len;
269 len -= args->vec[v].iov_len;
270 v++; 269 v++;
271 } 270 }
272 args->vlen = v; 271 args->vlen = v;
@@ -286,21 +285,21 @@ nfssvc_decode_writeargs(struct svc_rqst *rqstp, u32 *p,
286 args->offset = ntohl(*p++); /* offset */ 285 args->offset = ntohl(*p++); /* offset */
287 p++; /* totalcount */ 286 p++; /* totalcount */
288 len = args->len = ntohl(*p++); 287 len = args->len = ntohl(*p++);
289 args->vec[0].iov_base = (void*)p; 288 rqstp->rq_vec[0].iov_base = (void*)p;
290 args->vec[0].iov_len = rqstp->rq_arg.head[0].iov_len - 289 rqstp->rq_vec[0].iov_len = rqstp->rq_arg.head[0].iov_len -
291 (((void*)p) - rqstp->rq_arg.head[0].iov_base); 290 (((void*)p) - rqstp->rq_arg.head[0].iov_base);
292 if (len > NFSSVC_MAXBLKSIZE) 291 if (len > NFSSVC_MAXBLKSIZE_V2)
293 len = NFSSVC_MAXBLKSIZE; 292 len = NFSSVC_MAXBLKSIZE_V2;
294 v = 0; 293 v = 0;
295 while (len > args->vec[v].iov_len) { 294 while (len > rqstp->rq_vec[v].iov_len) {
296 len -= args->vec[v].iov_len; 295 len -= rqstp->rq_vec[v].iov_len;
297 v++; 296 v++;
298 args->vec[v].iov_base = page_address(rqstp->rq_argpages[v]); 297 rqstp->rq_vec[v].iov_base = page_address(rqstp->rq_pages[v]);
299 args->vec[v].iov_len = PAGE_SIZE; 298 rqstp->rq_vec[v].iov_len = PAGE_SIZE;
300 } 299 }
301 args->vec[v].iov_len = len; 300 rqstp->rq_vec[v].iov_len = len;
302 args->vlen = v+1; 301 args->vlen = v+1;
303 return args->vec[0].iov_len > 0; 302 return rqstp->rq_vec[0].iov_len > 0;
304} 303}
305 304
306int 305int
@@ -333,8 +332,7 @@ nfssvc_decode_readlinkargs(struct svc_rqst *rqstp, u32 *p, struct nfsd_readlinka
333{ 332{
334 if (!(p = decode_fh(p, &args->fh))) 333 if (!(p = decode_fh(p, &args->fh)))
335 return 0; 334 return 0;
336 svc_take_page(rqstp); 335 args->buffer = page_address(rqstp->rq_respages[rqstp->rq_resused++]);
337 args->buffer = page_address(rqstp->rq_respages[rqstp->rq_resused-1]);
338 336
339 return xdr_argsize_check(rqstp, p); 337 return xdr_argsize_check(rqstp, p);
340} 338}
@@ -375,8 +373,7 @@ nfssvc_decode_readdirargs(struct svc_rqst *rqstp, u32 *p,
375 if (args->count > PAGE_SIZE) 373 if (args->count > PAGE_SIZE)
376 args->count = PAGE_SIZE; 374 args->count = PAGE_SIZE;
377 375
378 svc_take_page(rqstp); 376 args->buffer = page_address(rqstp->rq_respages[rqstp->rq_resused++]);
379 args->buffer = page_address(rqstp->rq_respages[rqstp->rq_resused-1]);
380 377
381 return xdr_argsize_check(rqstp, p); 378 return xdr_argsize_check(rqstp, p);
382} 379}
@@ -416,7 +413,6 @@ nfssvc_encode_readlinkres(struct svc_rqst *rqstp, u32 *p,
416 rqstp->rq_res.page_len = resp->len; 413 rqstp->rq_res.page_len = resp->len;
417 if (resp->len & 3) { 414 if (resp->len & 3) {
418 /* need to pad the tail */ 415 /* need to pad the tail */
419 rqstp->rq_restailpage = 0;
420 rqstp->rq_res.tail[0].iov_base = p; 416 rqstp->rq_res.tail[0].iov_base = p;
421 *p = 0; 417 *p = 0;
422 rqstp->rq_res.tail[0].iov_len = 4 - (resp->len&3); 418 rqstp->rq_res.tail[0].iov_len = 4 - (resp->len&3);
@@ -436,7 +432,6 @@ nfssvc_encode_readres(struct svc_rqst *rqstp, u32 *p,
436 rqstp->rq_res.page_len = resp->count; 432 rqstp->rq_res.page_len = resp->count;
437 if (resp->count & 3) { 433 if (resp->count & 3) {
438 /* need to pad the tail */ 434 /* need to pad the tail */
439 rqstp->rq_restailpage = 0;
440 rqstp->rq_res.tail[0].iov_base = p; 435 rqstp->rq_res.tail[0].iov_base = p;
441 *p = 0; 436 *p = 0;
442 rqstp->rq_res.tail[0].iov_len = 4 - (resp->count&3); 437 rqstp->rq_res.tail[0].iov_len = 4 - (resp->count&3);
@@ -463,7 +458,7 @@ nfssvc_encode_statfsres(struct svc_rqst *rqstp, u32 *p,
463{ 458{
464 struct kstatfs *stat = &resp->stats; 459 struct kstatfs *stat = &resp->stats;
465 460
466 *p++ = htonl(NFSSVC_MAXBLKSIZE); /* max transfer size */ 461 *p++ = htonl(NFSSVC_MAXBLKSIZE_V2); /* max transfer size */
467 *p++ = htonl(stat->f_bsize); 462 *p++ = htonl(stat->f_bsize);
468 *p++ = htonl(stat->f_blocks); 463 *p++ = htonl(stat->f_blocks);
469 *p++ = htonl(stat->f_bfree); 464 *p++ = htonl(stat->f_bfree);
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index c9e3b5a8fe07..1141bd29e4e3 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -54,6 +54,7 @@
54#include <linux/nfsd_idmap.h> 54#include <linux/nfsd_idmap.h>
55#include <linux/security.h> 55#include <linux/security.h>
56#endif /* CONFIG_NFSD_V4 */ 56#endif /* CONFIG_NFSD_V4 */
57#include <linux/jhash.h>
57 58
58#include <asm/uaccess.h> 59#include <asm/uaccess.h>
59 60
@@ -81,10 +82,19 @@ struct raparms {
81 dev_t p_dev; 82 dev_t p_dev;
82 int p_set; 83 int p_set;
83 struct file_ra_state p_ra; 84 struct file_ra_state p_ra;
85 unsigned int p_hindex;
84}; 86};
85 87
88struct raparm_hbucket {
89 struct raparms *pb_head;
90 spinlock_t pb_lock;
91} ____cacheline_aligned_in_smp;
92
86static struct raparms * raparml; 93static struct raparms * raparml;
87static struct raparms * raparm_cache; 94#define RAPARM_HASH_BITS 4
95#define RAPARM_HASH_SIZE (1<<RAPARM_HASH_BITS)
96#define RAPARM_HASH_MASK (RAPARM_HASH_SIZE-1)
97static struct raparm_hbucket raparm_hash[RAPARM_HASH_SIZE];
88 98
89/* 99/*
90 * Called from nfsd_lookup and encode_dirent. Check if we have crossed 100 * Called from nfsd_lookup and encode_dirent. Check if we have crossed
@@ -437,13 +447,11 @@ nfsd4_set_nfs4_acl(struct svc_rqst *rqstp, struct svc_fh *fhp,
437 } else if (error < 0) 447 } else if (error < 0)
438 goto out_nfserr; 448 goto out_nfserr;
439 449
440 if (pacl) { 450 error = set_nfsv4_acl_one(dentry, pacl, POSIX_ACL_XATTR_ACCESS);
441 error = set_nfsv4_acl_one(dentry, pacl, POSIX_ACL_XATTR_ACCESS); 451 if (error < 0)
442 if (error < 0) 452 goto out_nfserr;
443 goto out_nfserr;
444 }
445 453
446 if (dpacl) { 454 if (S_ISDIR(inode->i_mode)) {
447 error = set_nfsv4_acl_one(dentry, dpacl, POSIX_ACL_XATTR_DEFAULT); 455 error = set_nfsv4_acl_one(dentry, dpacl, POSIX_ACL_XATTR_DEFAULT);
448 if (error < 0) 456 if (error < 0)
449 goto out_nfserr; 457 goto out_nfserr;
@@ -743,16 +751,20 @@ nfsd_sync_dir(struct dentry *dp)
743 * Obtain the readahead parameters for the file 751 * Obtain the readahead parameters for the file
744 * specified by (dev, ino). 752 * specified by (dev, ino).
745 */ 753 */
746static DEFINE_SPINLOCK(ra_lock);
747 754
748static inline struct raparms * 755static inline struct raparms *
749nfsd_get_raparms(dev_t dev, ino_t ino) 756nfsd_get_raparms(dev_t dev, ino_t ino)
750{ 757{
751 struct raparms *ra, **rap, **frap = NULL; 758 struct raparms *ra, **rap, **frap = NULL;
752 int depth = 0; 759 int depth = 0;
760 unsigned int hash;
761 struct raparm_hbucket *rab;
753 762
754 spin_lock(&ra_lock); 763 hash = jhash_2words(dev, ino, 0xfeedbeef) & RAPARM_HASH_MASK;
755 for (rap = &raparm_cache; (ra = *rap); rap = &ra->p_next) { 764 rab = &raparm_hash[hash];
765
766 spin_lock(&rab->pb_lock);
767 for (rap = &rab->pb_head; (ra = *rap); rap = &ra->p_next) {
756 if (ra->p_ino == ino && ra->p_dev == dev) 768 if (ra->p_ino == ino && ra->p_dev == dev)
757 goto found; 769 goto found;
758 depth++; 770 depth++;
@@ -761,7 +773,7 @@ nfsd_get_raparms(dev_t dev, ino_t ino)
761 } 773 }
762 depth = nfsdstats.ra_size*11/10; 774 depth = nfsdstats.ra_size*11/10;
763 if (!frap) { 775 if (!frap) {
764 spin_unlock(&ra_lock); 776 spin_unlock(&rab->pb_lock);
765 return NULL; 777 return NULL;
766 } 778 }
767 rap = frap; 779 rap = frap;
@@ -769,15 +781,16 @@ nfsd_get_raparms(dev_t dev, ino_t ino)
769 ra->p_dev = dev; 781 ra->p_dev = dev;
770 ra->p_ino = ino; 782 ra->p_ino = ino;
771 ra->p_set = 0; 783 ra->p_set = 0;
784 ra->p_hindex = hash;
772found: 785found:
773 if (rap != &raparm_cache) { 786 if (rap != &rab->pb_head) {
774 *rap = ra->p_next; 787 *rap = ra->p_next;
775 ra->p_next = raparm_cache; 788 ra->p_next = rab->pb_head;
776 raparm_cache = ra; 789 rab->pb_head = ra;
777 } 790 }
778 ra->p_count++; 791 ra->p_count++;
779 nfsdstats.ra_depth[depth*10/nfsdstats.ra_size]++; 792 nfsdstats.ra_depth[depth*10/nfsdstats.ra_size]++;
780 spin_unlock(&ra_lock); 793 spin_unlock(&rab->pb_lock);
781 return ra; 794 return ra;
782} 795}
783 796
@@ -791,22 +804,26 @@ nfsd_read_actor(read_descriptor_t *desc, struct page *page, unsigned long offset
791{ 804{
792 unsigned long count = desc->count; 805 unsigned long count = desc->count;
793 struct svc_rqst *rqstp = desc->arg.data; 806 struct svc_rqst *rqstp = desc->arg.data;
807 struct page **pp = rqstp->rq_respages + rqstp->rq_resused;
794 808
795 if (size > count) 809 if (size > count)
796 size = count; 810 size = count;
797 811
798 if (rqstp->rq_res.page_len == 0) { 812 if (rqstp->rq_res.page_len == 0) {
799 get_page(page); 813 get_page(page);
800 rqstp->rq_respages[rqstp->rq_resused++] = page; 814 put_page(*pp);
815 *pp = page;
816 rqstp->rq_resused++;
801 rqstp->rq_res.page_base = offset; 817 rqstp->rq_res.page_base = offset;
802 rqstp->rq_res.page_len = size; 818 rqstp->rq_res.page_len = size;
803 } else if (page != rqstp->rq_respages[rqstp->rq_resused-1]) { 819 } else if (page != pp[-1]) {
804 get_page(page); 820 get_page(page);
805 rqstp->rq_respages[rqstp->rq_resused++] = page; 821 put_page(*pp);
822 *pp = page;
823 rqstp->rq_resused++;
806 rqstp->rq_res.page_len += size; 824 rqstp->rq_res.page_len += size;
807 } else { 825 } else
808 rqstp->rq_res.page_len += size; 826 rqstp->rq_res.page_len += size;
809 }
810 827
811 desc->count = count - size; 828 desc->count = count - size;
812 desc->written += size; 829 desc->written += size;
@@ -837,7 +854,7 @@ nfsd_vfs_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
837 file->f_ra = ra->p_ra; 854 file->f_ra = ra->p_ra;
838 855
839 if (file->f_op->sendfile && rqstp->rq_sendfile_ok) { 856 if (file->f_op->sendfile && rqstp->rq_sendfile_ok) {
840 svc_pushback_unused_pages(rqstp); 857 rqstp->rq_resused = 1;
841 err = file->f_op->sendfile(file, &offset, *count, 858 err = file->f_op->sendfile(file, &offset, *count,
842 nfsd_read_actor, rqstp); 859 nfsd_read_actor, rqstp);
843 } else { 860 } else {
@@ -849,11 +866,12 @@ nfsd_vfs_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
849 866
850 /* Write back readahead params */ 867 /* Write back readahead params */
851 if (ra) { 868 if (ra) {
852 spin_lock(&ra_lock); 869 struct raparm_hbucket *rab = &raparm_hash[ra->p_hindex];
870 spin_lock(&rab->pb_lock);
853 ra->p_ra = file->f_ra; 871 ra->p_ra = file->f_ra;
854 ra->p_set = 1; 872 ra->p_set = 1;
855 ra->p_count--; 873 ra->p_count--;
856 spin_unlock(&ra_lock); 874 spin_unlock(&rab->pb_lock);
857 } 875 }
858 876
859 if (err >= 0) { 877 if (err >= 0) {
@@ -1114,7 +1132,7 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
1114 */ 1132 */
1115 if (!resfhp->fh_dentry) { 1133 if (!resfhp->fh_dentry) {
1116 /* called from nfsd_proc_mkdir, or possibly nfsd3_proc_create */ 1134 /* called from nfsd_proc_mkdir, or possibly nfsd3_proc_create */
1117 fh_lock(fhp); 1135 fh_lock_nested(fhp, I_MUTEX_PARENT);
1118 dchild = lookup_one_len(fname, dentry, flen); 1136 dchild = lookup_one_len(fname, dentry, flen);
1119 err = PTR_ERR(dchild); 1137 err = PTR_ERR(dchild);
1120 if (IS_ERR(dchild)) 1138 if (IS_ERR(dchild))
@@ -1240,7 +1258,7 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp,
1240 err = nfserr_notdir; 1258 err = nfserr_notdir;
1241 if(!dirp->i_op || !dirp->i_op->lookup) 1259 if(!dirp->i_op || !dirp->i_op->lookup)
1242 goto out; 1260 goto out;
1243 fh_lock(fhp); 1261 fh_lock_nested(fhp, I_MUTEX_PARENT);
1244 1262
1245 /* 1263 /*
1246 * Compose the response file handle. 1264 * Compose the response file handle.
@@ -1494,7 +1512,7 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp,
1494 if (isdotent(name, len)) 1512 if (isdotent(name, len))
1495 goto out; 1513 goto out;
1496 1514
1497 fh_lock(ffhp); 1515 fh_lock_nested(ffhp, I_MUTEX_PARENT);
1498 ddir = ffhp->fh_dentry; 1516 ddir = ffhp->fh_dentry;
1499 dirp = ddir->d_inode; 1517 dirp = ddir->d_inode;
1500 1518
@@ -1644,7 +1662,7 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
1644 if (err) 1662 if (err)
1645 goto out; 1663 goto out;
1646 1664
1647 fh_lock(fhp); 1665 fh_lock_nested(fhp, I_MUTEX_PARENT);
1648 dentry = fhp->fh_dentry; 1666 dentry = fhp->fh_dentry;
1649 dirp = dentry->d_inode; 1667 dirp = dentry->d_inode;
1650 1668
@@ -1829,11 +1847,11 @@ nfsd_permission(struct svc_export *exp, struct dentry *dentry, int acc)
1829void 1847void
1830nfsd_racache_shutdown(void) 1848nfsd_racache_shutdown(void)
1831{ 1849{
1832 if (!raparm_cache) 1850 if (!raparml)
1833 return; 1851 return;
1834 dprintk("nfsd: freeing readahead buffers.\n"); 1852 dprintk("nfsd: freeing readahead buffers.\n");
1835 kfree(raparml); 1853 kfree(raparml);
1836 raparm_cache = raparml = NULL; 1854 raparml = NULL;
1837} 1855}
1838/* 1856/*
1839 * Initialize readahead param cache 1857 * Initialize readahead param cache
@@ -1842,19 +1860,31 @@ int
1842nfsd_racache_init(int cache_size) 1860nfsd_racache_init(int cache_size)
1843{ 1861{
1844 int i; 1862 int i;
1863 int j = 0;
1864 int nperbucket;
1865
1845 1866
1846 if (raparm_cache) 1867 if (raparml)
1847 return 0; 1868 return 0;
1869 if (cache_size < 2*RAPARM_HASH_SIZE)
1870 cache_size = 2*RAPARM_HASH_SIZE;
1848 raparml = kmalloc(sizeof(struct raparms) * cache_size, GFP_KERNEL); 1871 raparml = kmalloc(sizeof(struct raparms) * cache_size, GFP_KERNEL);
1849 1872
1850 if (raparml != NULL) { 1873 if (raparml != NULL) {
1851 dprintk("nfsd: allocating %d readahead buffers.\n", 1874 dprintk("nfsd: allocating %d readahead buffers.\n",
1852 cache_size); 1875 cache_size);
1876 for (i = 0 ; i < RAPARM_HASH_SIZE ; i++) {
1877 raparm_hash[i].pb_head = NULL;
1878 spin_lock_init(&raparm_hash[i].pb_lock);
1879 }
1880 nperbucket = cache_size >> RAPARM_HASH_BITS;
1853 memset(raparml, 0, sizeof(struct raparms) * cache_size); 1881 memset(raparml, 0, sizeof(struct raparms) * cache_size);
1854 for (i = 0; i < cache_size - 1; i++) { 1882 for (i = 0; i < cache_size - 1; i++) {
1855 raparml[i].p_next = raparml + i + 1; 1883 if (i % nperbucket == 0)
1884 raparm_hash[j++].pb_head = raparml + i;
1885 if (i % nperbucket < nperbucket-1)
1886 raparml[i].p_next = raparml + i + 1;
1856 } 1887 }
1857 raparm_cache = raparml;
1858 } else { 1888 } else {
1859 printk(KERN_WARNING 1889 printk(KERN_WARNING
1860 "nfsd: Could not allocate memory read-ahead cache.\n"); 1890 "nfsd: Could not allocate memory read-ahead cache.\n");
diff --git a/fs/nls/nls_ascii.c b/fs/nls/nls_ascii.c
index b83381c07ad6..6993faea28ac 100644
--- a/fs/nls/nls_ascii.c
+++ b/fs/nls/nls_ascii.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * linux/fs/nls_ascii.c 2 * linux/fs/nls/nls_ascii.c
3 * 3 *
4 * Charset ascii translation tables. 4 * Charset ascii translation tables.
5 * Generated automatically from the Unicode and charset 5 * Generated automatically from the Unicode and charset
diff --git a/fs/nls/nls_base.c b/fs/nls/nls_base.c
index 9de6b495f112..7dfdab98729b 100644
--- a/fs/nls/nls_base.c
+++ b/fs/nls/nls_base.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * linux/fs/nls_base.c 2 * linux/fs/nls/nls_base.c
3 * 3 *
4 * Native language support--charsets and unicode translations. 4 * Native language support--charsets and unicode translations.
5 * By Gordon Chaffee 1996, 1997 5 * By Gordon Chaffee 1996, 1997
@@ -163,8 +163,6 @@ int register_nls(struct nls_table * nls)
163{ 163{
164 struct nls_table ** tmp = &tables; 164 struct nls_table ** tmp = &tables;
165 165
166 if (!nls)
167 return -EINVAL;
168 if (nls->next) 166 if (nls->next)
169 return -EBUSY; 167 return -EBUSY;
170 168
diff --git a/fs/nls/nls_cp1250.c b/fs/nls/nls_cp1250.c
index 32e78cf95180..570aa69846a0 100644
--- a/fs/nls/nls_cp1250.c
+++ b/fs/nls/nls_cp1250.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * linux/fs/nls_cp1250.c 2 * linux/fs/nls/nls_cp1250.c
3 * 3 *
4 * Charset cp1250 translation tables. 4 * Charset cp1250 translation tables.
5 * Generated automatically from the Unicode and charset 5 * Generated automatically from the Unicode and charset
diff --git a/fs/nls/nls_cp1251.c b/fs/nls/nls_cp1251.c
index cb41c8ae4486..f114afa069db 100644
--- a/fs/nls/nls_cp1251.c
+++ b/fs/nls/nls_cp1251.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * linux/fs/nls_cp1251.c 2 * linux/fs/nls/nls_cp1251.c
3 * 3 *
4 * Charset cp1251 translation tables. 4 * Charset cp1251 translation tables.
5 * Generated automatically from the Unicode and charset 5 * Generated automatically from the Unicode and charset
diff --git a/fs/nls/nls_cp1255.c b/fs/nls/nls_cp1255.c
index efdeefee5346..e57f2cbf5bc0 100644
--- a/fs/nls/nls_cp1255.c
+++ b/fs/nls/nls_cp1255.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * linux/fs/nls_cp1255.c 2 * linux/fs/nls/nls_cp1255.c
3 * 3 *
4 * Charset cp1255 translation tables. 4 * Charset cp1255 translation tables.
5 * The Unicode to charset table has only exact mappings. 5 * The Unicode to charset table has only exact mappings.
diff --git a/fs/nls/nls_cp437.c b/fs/nls/nls_cp437.c
index 5c4a1cd685dd..d41930ce4a44 100644
--- a/fs/nls/nls_cp437.c
+++ b/fs/nls/nls_cp437.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * linux/fs/nls_cp437.c 2 * linux/fs/nls/nls_cp437.c
3 * 3 *
4 * Charset cp437 translation tables. 4 * Charset cp437 translation tables.
5 * Generated automatically from the Unicode and charset 5 * Generated automatically from the Unicode and charset
diff --git a/fs/nls/nls_cp737.c b/fs/nls/nls_cp737.c
index e8b3ca8462e7..d21f8790aa19 100644
--- a/fs/nls/nls_cp737.c
+++ b/fs/nls/nls_cp737.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * linux/fs/nls_cp737.c 2 * linux/fs/nls/nls_cp737.c
3 * 3 *
4 * Charset cp737 translation tables. 4 * Charset cp737 translation tables.
5 * Generated automatically from the Unicode and charset 5 * Generated automatically from the Unicode and charset
diff --git a/fs/nls/nls_cp775.c b/fs/nls/nls_cp775.c
index bdb290ea523a..c97714c38a90 100644
--- a/fs/nls/nls_cp775.c
+++ b/fs/nls/nls_cp775.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * linux/fs/nls_cp775.c 2 * linux/fs/nls/nls_cp775.c
3 * 3 *
4 * Charset cp775 translation tables. 4 * Charset cp775 translation tables.
5 * Generated automatically from the Unicode and charset 5 * Generated automatically from the Unicode and charset
diff --git a/fs/nls/nls_cp850.c b/fs/nls/nls_cp850.c
index 25deaa4c8648..843b7d975ba2 100644
--- a/fs/nls/nls_cp850.c
+++ b/fs/nls/nls_cp850.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * linux/fs/nls_cp850.c 2 * linux/fs/nls/nls_cp850.c
3 * 3 *
4 * Charset cp850 translation tables. 4 * Charset cp850 translation tables.
5 * Generated automatically from the Unicode and charset 5 * Generated automatically from the Unicode and charset
diff --git a/fs/nls/nls_cp852.c b/fs/nls/nls_cp852.c
index b822a7b6b970..83cfd844d5ca 100644
--- a/fs/nls/nls_cp852.c
+++ b/fs/nls/nls_cp852.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * linux/fs/nls_cp852.c 2 * linux/fs/nls/nls_cp852.c
3 * 3 *
4 * Charset cp852 translation tables. 4 * Charset cp852 translation tables.
5 * Generated automatically from the Unicode and charset 5 * Generated automatically from the Unicode and charset
diff --git a/fs/nls/nls_cp855.c b/fs/nls/nls_cp855.c
index e8641b7a8b27..9190b7b574ff 100644
--- a/fs/nls/nls_cp855.c
+++ b/fs/nls/nls_cp855.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * linux/fs/nls_cp855.c 2 * linux/fs/nls/nls_cp855.c
3 * 3 *
4 * Charset cp855 translation tables. 4 * Charset cp855 translation tables.
5 * Generated automatically from the Unicode and charset 5 * Generated automatically from the Unicode and charset
diff --git a/fs/nls/nls_cp857.c b/fs/nls/nls_cp857.c
index 7ba589ef8cc0..ef3d36db8082 100644
--- a/fs/nls/nls_cp857.c
+++ b/fs/nls/nls_cp857.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * linux/fs/nls_cp857.c 2 * linux/fs/nls/nls_cp857.c
3 * 3 *
4 * Charset cp857 translation tables. 4 * Charset cp857 translation tables.
5 * Generated automatically from the Unicode and charset 5 * Generated automatically from the Unicode and charset
diff --git a/fs/nls/nls_cp860.c b/fs/nls/nls_cp860.c
index 3b9e49ce8c80..7e2fb6645893 100644
--- a/fs/nls/nls_cp860.c
+++ b/fs/nls/nls_cp860.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * linux/fs/nls_cp860.c 2 * linux/fs/nls/nls_cp860.c
3 * 3 *
4 * Charset cp860 translation tables. 4 * Charset cp860 translation tables.
5 * Generated automatically from the Unicode and charset 5 * Generated automatically from the Unicode and charset
diff --git a/fs/nls/nls_cp861.c b/fs/nls/nls_cp861.c
index 959ff64ee971..66d8d808ccf1 100644
--- a/fs/nls/nls_cp861.c
+++ b/fs/nls/nls_cp861.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * linux/fs/nls_cp861.c 2 * linux/fs/nls/nls_cp861.c
3 * 3 *
4 * Charset cp861 translation tables. 4 * Charset cp861 translation tables.
5 * Generated automatically from the Unicode and charset 5 * Generated automatically from the Unicode and charset
diff --git a/fs/nls/nls_cp862.c b/fs/nls/nls_cp862.c
index b96928f5a023..360ba388485f 100644
--- a/fs/nls/nls_cp862.c
+++ b/fs/nls/nls_cp862.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * linux/fs/nls_cp862.c 2 * linux/fs/nls/nls_cp862.c
3 * 3 *
4 * Charset cp862 translation tables. 4 * Charset cp862 translation tables.
5 * Generated automatically from the Unicode and charset 5 * Generated automatically from the Unicode and charset
diff --git a/fs/nls/nls_cp863.c b/fs/nls/nls_cp863.c
index baa6e0eab1d6..656a93113e37 100644
--- a/fs/nls/nls_cp863.c
+++ b/fs/nls/nls_cp863.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * linux/fs/nls_cp863.c 2 * linux/fs/nls/nls_cp863.c
3 * 3 *
4 * Charset cp863 translation tables. 4 * Charset cp863 translation tables.
5 * Generated automatically from the Unicode and charset 5 * Generated automatically from the Unicode and charset
diff --git a/fs/nls/nls_cp864.c b/fs/nls/nls_cp864.c
index f4dabb037dfe..01ca7309753e 100644
--- a/fs/nls/nls_cp864.c
+++ b/fs/nls/nls_cp864.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * linux/fs/nls_cp864.c 2 * linux/fs/nls/nls_cp864.c
3 * 3 *
4 * Charset cp864 translation tables. 4 * Charset cp864 translation tables.
5 * Generated automatically from the Unicode and charset 5 * Generated automatically from the Unicode and charset
diff --git a/fs/nls/nls_cp865.c b/fs/nls/nls_cp865.c
index 4caeafae32c2..5ba6ee13e109 100644
--- a/fs/nls/nls_cp865.c
+++ b/fs/nls/nls_cp865.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * linux/fs/nls_cp865.c 2 * linux/fs/nls/nls_cp865.c
3 * 3 *
4 * Charset cp865 translation tables. 4 * Charset cp865 translation tables.
5 * Generated automatically from the Unicode and charset 5 * Generated automatically from the Unicode and charset
diff --git a/fs/nls/nls_cp866.c b/fs/nls/nls_cp866.c
index f2b4a9a293fb..c5f82221c9fe 100644
--- a/fs/nls/nls_cp866.c
+++ b/fs/nls/nls_cp866.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * linux/fs/nls_cp866.c 2 * linux/fs/nls/nls_cp866.c
3 * 3 *
4 * Charset cp866 translation tables. 4 * Charset cp866 translation tables.
5 * Generated automatically from the Unicode and charset 5 * Generated automatically from the Unicode and charset
diff --git a/fs/nls/nls_cp869.c b/fs/nls/nls_cp869.c
index 12b436f4eca1..8d4015124d11 100644
--- a/fs/nls/nls_cp869.c
+++ b/fs/nls/nls_cp869.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * linux/fs/nls_cp869.c 2 * linux/fs/nls/nls_cp869.c
3 * 3 *
4 * Charset cp869 translation tables. 4 * Charset cp869 translation tables.
5 * Generated automatically from the Unicode and charset 5 * Generated automatically from the Unicode and charset
diff --git a/fs/nls/nls_cp874.c b/fs/nls/nls_cp874.c
index b5766a01703a..df042052c2db 100644
--- a/fs/nls/nls_cp874.c
+++ b/fs/nls/nls_cp874.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * linux/fs/nls_cp874.c 2 * linux/fs/nls/nls_cp874.c
3 * 3 *
4 * Charset cp874 translation tables. 4 * Charset cp874 translation tables.
5 * Generated automatically from the Unicode and charset 5 * Generated automatically from the Unicode and charset
diff --git a/fs/nls/nls_cp932.c b/fs/nls/nls_cp932.c
index 2c1a17cdcd24..2a9ccf3bc7ef 100644
--- a/fs/nls/nls_cp932.c
+++ b/fs/nls/nls_cp932.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * linux/fs/nls_cp932.c 2 * linux/fs/nls/nls_cp932.c
3 * 3 *
4 * Charset cp932 translation tables. 4 * Charset cp932 translation tables.
5 * This translation table was generated automatically, the 5 * This translation table was generated automatically, the
diff --git a/fs/nls/nls_cp936.c b/fs/nls/nls_cp936.c
index ef4cef464aba..046fde8170ea 100644
--- a/fs/nls/nls_cp936.c
+++ b/fs/nls/nls_cp936.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * linux/fs/nls_cp936.c 2 * linux/fs/nls/nls_cp936.c
3 * 3 *
4 * Charset cp936 translation tables. 4 * Charset cp936 translation tables.
5 * This translation table was generated automatically, the 5 * This translation table was generated automatically, the
diff --git a/fs/nls/nls_cp949.c b/fs/nls/nls_cp949.c
index 4351ae21d897..92ae19372f0f 100644
--- a/fs/nls/nls_cp949.c
+++ b/fs/nls/nls_cp949.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * linux/fs/nls_cp949.c 2 * linux/fs/nls/nls_cp949.c
3 * 3 *
4 * Charset cp949 translation tables. 4 * Charset cp949 translation tables.
5 * This translation table was generated automatically, the 5 * This translation table was generated automatically, the
diff --git a/fs/nls/nls_cp950.c b/fs/nls/nls_cp950.c
index 8167a2858879..5665945fb88c 100644
--- a/fs/nls/nls_cp950.c
+++ b/fs/nls/nls_cp950.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * linux/fs/nls_cp950.c 2 * linux/fs/nls/nls_cp950.c
3 * 3 *
4 * Charset cp950 translation tables. 4 * Charset cp950 translation tables.
5 * This translation table was generated automatically, the 5 * This translation table was generated automatically, the
diff --git a/fs/nls/nls_euc-jp.c b/fs/nls/nls_euc-jp.c
index 06640c3e4021..73293511578b 100644
--- a/fs/nls/nls_euc-jp.c
+++ b/fs/nls/nls_euc-jp.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * linux/fs/nls_euc-jp.c 2 * linux/fs/nls/nls_euc-jp.c
3 * 3 *
4 * Added `OSF/JVC Recommended Code Set Conversion Specification 4 * Added `OSF/JVC Recommended Code Set Conversion Specification
5 * between Japanese EUC and Shift-JIS' support: <hirofumi@mail.parknet.co.jp> 5 * between Japanese EUC and Shift-JIS' support: <hirofumi@mail.parknet.co.jp>
diff --git a/fs/nls/nls_iso8859-1.c b/fs/nls/nls_iso8859-1.c
index 70a2c1956723..2483c3c6c1c1 100644
--- a/fs/nls/nls_iso8859-1.c
+++ b/fs/nls/nls_iso8859-1.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * linux/fs/nls_iso8859-1.c 2 * linux/fs/nls/nls_iso8859-1.c
3 * 3 *
4 * Charset iso8859-1 translation tables. 4 * Charset iso8859-1 translation tables.
5 * Generated automatically from the Unicode and charset 5 * Generated automatically from the Unicode and charset
diff --git a/fs/nls/nls_iso8859-13.c b/fs/nls/nls_iso8859-13.c
index 4547035f21a3..7b8721d74368 100644
--- a/fs/nls/nls_iso8859-13.c
+++ b/fs/nls/nls_iso8859-13.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * linux/fs/nls_iso8859-13.c 2 * linux/fs/nls/nls_iso8859-13.c
3 * 3 *
4 * Charset iso8859-13 translation tables. 4 * Charset iso8859-13 translation tables.
5 * The Unicode to charset table has only exact mappings. 5 * The Unicode to charset table has only exact mappings.
diff --git a/fs/nls/nls_iso8859-14.c b/fs/nls/nls_iso8859-14.c
index 13628d0dd3a9..2e895e638dba 100644
--- a/fs/nls/nls_iso8859-14.c
+++ b/fs/nls/nls_iso8859-14.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * linux/fs/nls_iso8859-14.c 2 * linux/fs/nls/nls_iso8859-14.c
3 * 3 *
4 * Charset iso8859-14 translation tables. 4 * Charset iso8859-14 translation tables.
5 * 5 *
diff --git a/fs/nls/nls_iso8859-15.c b/fs/nls/nls_iso8859-15.c
index 88b924bf7e18..5c91592779fe 100644
--- a/fs/nls/nls_iso8859-15.c
+++ b/fs/nls/nls_iso8859-15.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * linux/fs/nls_iso8859-15.c 2 * linux/fs/nls/nls_iso8859-15.c
3 * 3 *
4 * Charset iso8859-15 translation tables. 4 * Charset iso8859-15 translation tables.
5 * The Unicode to charset table has only exact mappings. 5 * The Unicode to charset table has only exact mappings.
diff --git a/fs/nls/nls_iso8859-2.c b/fs/nls/nls_iso8859-2.c
index 372528a6c40c..892d38fe9530 100644
--- a/fs/nls/nls_iso8859-2.c
+++ b/fs/nls/nls_iso8859-2.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * linux/fs/nls_iso8859-2.c 2 * linux/fs/nls/nls_iso8859-2.c
3 * 3 *
4 * Charset iso8859-2 translation tables. 4 * Charset iso8859-2 translation tables.
5 * Generated automatically from the Unicode and charset 5 * Generated automatically from the Unicode and charset
diff --git a/fs/nls/nls_iso8859-3.c b/fs/nls/nls_iso8859-3.c
index 81b45a234369..49317bcdb4be 100644
--- a/fs/nls/nls_iso8859-3.c
+++ b/fs/nls/nls_iso8859-3.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * linux/fs/nls_iso8859-3.c 2 * linux/fs/nls/nls_iso8859-3.c
3 * 3 *
4 * Charset iso8859-3 translation tables. 4 * Charset iso8859-3 translation tables.
5 * Generated automatically from the Unicode and charset 5 * Generated automatically from the Unicode and charset
diff --git a/fs/nls/nls_iso8859-4.c b/fs/nls/nls_iso8859-4.c
index 101b87f5a49b..9f3b9368c2cf 100644
--- a/fs/nls/nls_iso8859-4.c
+++ b/fs/nls/nls_iso8859-4.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * linux/fs/nls_iso8859-4.c 2 * linux/fs/nls/nls_iso8859-4.c
3 * 3 *
4 * Charset iso8859-4 translation tables. 4 * Charset iso8859-4 translation tables.
5 * Generated automatically from the Unicode and charset 5 * Generated automatically from the Unicode and charset
diff --git a/fs/nls/nls_iso8859-5.c b/fs/nls/nls_iso8859-5.c
index 83b0084de5eb..001a2bb132ce 100644
--- a/fs/nls/nls_iso8859-5.c
+++ b/fs/nls/nls_iso8859-5.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * linux/fs/nls_iso8859-5.c 2 * linux/fs/nls/nls_iso8859-5.c
3 * 3 *
4 * Charset iso8859-5 translation tables. 4 * Charset iso8859-5 translation tables.
5 * Generated automatically from the Unicode and charset 5 * Generated automatically from the Unicode and charset
diff --git a/fs/nls/nls_iso8859-6.c b/fs/nls/nls_iso8859-6.c
index 0c519d65f55b..8cec03d66088 100644
--- a/fs/nls/nls_iso8859-6.c
+++ b/fs/nls/nls_iso8859-6.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * linux/fs/nls_iso8859-6.c 2 * linux/fs/nls/nls_iso8859-6.c
3 * 3 *
4 * Charset iso8859-6 translation tables. 4 * Charset iso8859-6 translation tables.
5 * Generated automatically from the Unicode and charset 5 * Generated automatically from the Unicode and charset
diff --git a/fs/nls/nls_iso8859-7.c b/fs/nls/nls_iso8859-7.c
index bd0854625acf..1be707d5ac31 100644
--- a/fs/nls/nls_iso8859-7.c
+++ b/fs/nls/nls_iso8859-7.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * linux/fs/nls_iso8859-7.c 2 * linux/fs/nls/nls_iso8859-7.c
3 * 3 *
4 * Charset iso8859-7 translation tables. 4 * Charset iso8859-7 translation tables.
5 * Generated automatically from the Unicode and charset 5 * Generated automatically from the Unicode and charset
diff --git a/fs/nls/nls_iso8859-9.c b/fs/nls/nls_iso8859-9.c
index 988eff791c06..8c0146f73834 100644
--- a/fs/nls/nls_iso8859-9.c
+++ b/fs/nls/nls_iso8859-9.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * linux/fs/nls_iso8859-9.c 2 * linux/fs/nls/nls_iso8859-9.c
3 * 3 *
4 * Charset iso8859-9 translation tables. 4 * Charset iso8859-9 translation tables.
5 * Generated automatically from the Unicode and charset 5 * Generated automatically from the Unicode and charset
diff --git a/fs/nls/nls_koi8-r.c b/fs/nls/nls_koi8-r.c
index 0ad22c249796..fefbe0807265 100644
--- a/fs/nls/nls_koi8-r.c
+++ b/fs/nls/nls_koi8-r.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * linux/fs/nls_koi8-r.c 2 * linux/fs/nls/nls_koi8-r.c
3 * 3 *
4 * Charset koi8-r translation tables. 4 * Charset koi8-r translation tables.
5 * Generated automatically from the Unicode and charset 5 * Generated automatically from the Unicode and charset
diff --git a/fs/nls/nls_koi8-ru.c b/fs/nls/nls_koi8-ru.c
index 5db83efe27c6..e7bc1d75c78c 100644
--- a/fs/nls/nls_koi8-ru.c
+++ b/fs/nls/nls_koi8-ru.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * linux/fs/nls_koi8-ru.c 2 * linux/fs/nls/nls_koi8-ru.c
3 * 3 *
4 * Charset koi8-ru translation based on charset koi8-u. 4 * Charset koi8-ru translation based on charset koi8-u.
5 * The Unicode to charset table has only exact mappings. 5 * The Unicode to charset table has only exact mappings.
diff --git a/fs/nls/nls_koi8-u.c b/fs/nls/nls_koi8-u.c
index 9d30fd61cf46..015070211f22 100644
--- a/fs/nls/nls_koi8-u.c
+++ b/fs/nls/nls_koi8-u.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * linux/fs/nls_koi8-u.c 2 * linux/fs/nls/nls_koi8-u.c
3 * 3 *
4 * Charset koi8-u translation tables. 4 * Charset koi8-u translation tables.
5 * The Unicode to charset table has only exact mappings. 5 * The Unicode to charset table has only exact mappings.
diff --git a/fs/proc/array.c b/fs/proc/array.c
index c0e554971df0..25e917fb4739 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -162,7 +162,7 @@ static inline char * task_state(struct task_struct *p, char *buffer)
162 int g; 162 int g;
163 struct fdtable *fdt = NULL; 163 struct fdtable *fdt = NULL;
164 164
165 read_lock(&tasklist_lock); 165 rcu_read_lock();
166 buffer += sprintf(buffer, 166 buffer += sprintf(buffer,
167 "State:\t%s\n" 167 "State:\t%s\n"
168 "SleepAVG:\t%lu%%\n" 168 "SleepAVG:\t%lu%%\n"
@@ -174,14 +174,13 @@ static inline char * task_state(struct task_struct *p, char *buffer)
174 "Gid:\t%d\t%d\t%d\t%d\n", 174 "Gid:\t%d\t%d\t%d\t%d\n",
175 get_task_state(p), 175 get_task_state(p),
176 (p->sleep_avg/1024)*100/(1020000000/1024), 176 (p->sleep_avg/1024)*100/(1020000000/1024),
177 p->tgid, 177 p->tgid, p->pid,
178 p->pid, pid_alive(p) ? p->group_leader->real_parent->tgid : 0, 178 pid_alive(p) ? rcu_dereference(p->real_parent)->tgid : 0,
179 pid_alive(p) && p->ptrace ? p->parent->pid : 0, 179 pid_alive(p) && p->ptrace ? rcu_dereference(p->parent)->pid : 0,
180 p->uid, p->euid, p->suid, p->fsuid, 180 p->uid, p->euid, p->suid, p->fsuid,
181 p->gid, p->egid, p->sgid, p->fsgid); 181 p->gid, p->egid, p->sgid, p->fsgid);
182 read_unlock(&tasklist_lock); 182
183 task_lock(p); 183 task_lock(p);
184 rcu_read_lock();
185 if (p->files) 184 if (p->files)
186 fdt = files_fdtable(p->files); 185 fdt = files_fdtable(p->files);
187 buffer += sprintf(buffer, 186 buffer += sprintf(buffer,
@@ -244,6 +243,7 @@ static void collect_sigign_sigcatch(struct task_struct *p, sigset_t *ign,
244 243
245static inline char * task_sig(struct task_struct *p, char *buffer) 244static inline char * task_sig(struct task_struct *p, char *buffer)
246{ 245{
246 unsigned long flags;
247 sigset_t pending, shpending, blocked, ignored, caught; 247 sigset_t pending, shpending, blocked, ignored, caught;
248 int num_threads = 0; 248 int num_threads = 0;
249 unsigned long qsize = 0; 249 unsigned long qsize = 0;
@@ -255,10 +255,8 @@ static inline char * task_sig(struct task_struct *p, char *buffer)
255 sigemptyset(&ignored); 255 sigemptyset(&ignored);
256 sigemptyset(&caught); 256 sigemptyset(&caught);
257 257
258 /* Gather all the data with the appropriate locks held */ 258 rcu_read_lock();
259 read_lock(&tasklist_lock); 259 if (lock_task_sighand(p, &flags)) {
260 if (p->sighand) {
261 spin_lock_irq(&p->sighand->siglock);
262 pending = p->pending.signal; 260 pending = p->pending.signal;
263 shpending = p->signal->shared_pending.signal; 261 shpending = p->signal->shared_pending.signal;
264 blocked = p->blocked; 262 blocked = p->blocked;
@@ -266,9 +264,9 @@ static inline char * task_sig(struct task_struct *p, char *buffer)
266 num_threads = atomic_read(&p->signal->count); 264 num_threads = atomic_read(&p->signal->count);
267 qsize = atomic_read(&p->user->sigpending); 265 qsize = atomic_read(&p->user->sigpending);
268 qlim = p->signal->rlim[RLIMIT_SIGPENDING].rlim_cur; 266 qlim = p->signal->rlim[RLIMIT_SIGPENDING].rlim_cur;
269 spin_unlock_irq(&p->sighand->siglock); 267 unlock_task_sighand(p, &flags);
270 } 268 }
271 read_unlock(&tasklist_lock); 269 rcu_read_unlock();
272 270
273 buffer += sprintf(buffer, "Threads:\t%d\n", num_threads); 271 buffer += sprintf(buffer, "Threads:\t%d\n", num_threads);
274 buffer += sprintf(buffer, "SigQ:\t%lu/%lu\n", qsize, qlim); 272 buffer += sprintf(buffer, "SigQ:\t%lu/%lu\n", qsize, qlim);
@@ -322,7 +320,7 @@ static int do_task_stat(struct task_struct *task, char * buffer, int whole)
322 sigset_t sigign, sigcatch; 320 sigset_t sigign, sigcatch;
323 char state; 321 char state;
324 int res; 322 int res;
325 pid_t ppid, pgid = -1, sid = -1; 323 pid_t ppid = 0, pgid = -1, sid = -1;
326 int num_threads = 0; 324 int num_threads = 0;
327 struct mm_struct *mm; 325 struct mm_struct *mm;
328 unsigned long long start_time; 326 unsigned long long start_time;
@@ -330,8 +328,8 @@ static int do_task_stat(struct task_struct *task, char * buffer, int whole)
330 unsigned long min_flt = 0, maj_flt = 0; 328 unsigned long min_flt = 0, maj_flt = 0;
331 cputime_t cutime, cstime, utime, stime; 329 cputime_t cutime, cstime, utime, stime;
332 unsigned long rsslim = 0; 330 unsigned long rsslim = 0;
333 struct task_struct *t;
334 char tcomm[sizeof(task->comm)]; 331 char tcomm[sizeof(task->comm)];
332 unsigned long flags;
335 333
336 state = *get_task_state(task); 334 state = *get_task_state(task);
337 vsize = eip = esp = 0; 335 vsize = eip = esp = 0;
@@ -349,15 +347,33 @@ static int do_task_stat(struct task_struct *task, char * buffer, int whole)
349 cutime = cstime = utime = stime = cputime_zero; 347 cutime = cstime = utime = stime = cputime_zero;
350 348
351 mutex_lock(&tty_mutex); 349 mutex_lock(&tty_mutex);
352 read_lock(&tasklist_lock); 350 rcu_read_lock();
353 if (task->sighand) { 351 if (lock_task_sighand(task, &flags)) {
354 spin_lock_irq(&task->sighand->siglock); 352 struct signal_struct *sig = task->signal;
355 num_threads = atomic_read(&task->signal->count); 353 struct tty_struct *tty = sig->tty;
354
355 if (tty) {
356 /*
357 * sig->tty is not stable, but tty_mutex
358 * protects us from release_dev(tty)
359 */
360 barrier();
361 tty_pgrp = tty->pgrp;
362 tty_nr = new_encode_dev(tty_devnum(tty));
363 }
364
365 num_threads = atomic_read(&sig->count);
356 collect_sigign_sigcatch(task, &sigign, &sigcatch); 366 collect_sigign_sigcatch(task, &sigign, &sigcatch);
357 367
368 cmin_flt = sig->cmin_flt;
369 cmaj_flt = sig->cmaj_flt;
370 cutime = sig->cutime;
371 cstime = sig->cstime;
372 rsslim = sig->rlim[RLIMIT_RSS].rlim_cur;
373
358 /* add up live thread stats at the group level */ 374 /* add up live thread stats at the group level */
359 if (whole) { 375 if (whole) {
360 t = task; 376 struct task_struct *t = task;
361 do { 377 do {
362 min_flt += t->min_flt; 378 min_flt += t->min_flt;
363 maj_flt += t->maj_flt; 379 maj_flt += t->maj_flt;
@@ -365,31 +381,20 @@ static int do_task_stat(struct task_struct *task, char * buffer, int whole)
365 stime = cputime_add(stime, t->stime); 381 stime = cputime_add(stime, t->stime);
366 t = next_thread(t); 382 t = next_thread(t);
367 } while (t != task); 383 } while (t != task);
368 }
369 384
370 spin_unlock_irq(&task->sighand->siglock); 385 min_flt += sig->min_flt;
371 } 386 maj_flt += sig->maj_flt;
372 if (task->signal) { 387 utime = cputime_add(utime, sig->utime);
373 if (task->signal->tty) { 388 stime = cputime_add(stime, sig->stime);
374 tty_pgrp = task->signal->tty->pgrp;
375 tty_nr = new_encode_dev(tty_devnum(task->signal->tty));
376 } 389 }
390
391 sid = sig->session;
377 pgid = process_group(task); 392 pgid = process_group(task);
378 sid = task->signal->session; 393 ppid = rcu_dereference(task->real_parent)->tgid;
379 cmin_flt = task->signal->cmin_flt; 394
380 cmaj_flt = task->signal->cmaj_flt; 395 unlock_task_sighand(task, &flags);
381 cutime = task->signal->cutime;
382 cstime = task->signal->cstime;
383 rsslim = task->signal->rlim[RLIMIT_RSS].rlim_cur;
384 if (whole) {
385 min_flt += task->signal->min_flt;
386 maj_flt += task->signal->maj_flt;
387 utime = cputime_add(utime, task->signal->utime);
388 stime = cputime_add(stime, task->signal->stime);
389 }
390 } 396 }
391 ppid = pid_alive(task) ? task->group_leader->real_parent->tgid : 0; 397 rcu_read_unlock();
392 read_unlock(&tasklist_lock);
393 mutex_unlock(&tty_mutex); 398 mutex_unlock(&tty_mutex);
394 399
395 if (!whole || num_threads<2) 400 if (!whole || num_threads<2)
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 89c20d9d50bf..82da55b5cffe 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -71,6 +71,7 @@
71#include <linux/cpuset.h> 71#include <linux/cpuset.h>
72#include <linux/audit.h> 72#include <linux/audit.h>
73#include <linux/poll.h> 73#include <linux/poll.h>
74#include <linux/nsproxy.h>
74#include "internal.h" 75#include "internal.h"
75 76
76/* NOTE: 77/* NOTE:
@@ -83,262 +84,44 @@
83 * in /proc for a task before it execs a suid executable. 84 * in /proc for a task before it execs a suid executable.
84 */ 85 */
85 86
86/*
87 * For hysterical raisins we keep the same inumbers as in the old procfs.
88 * Feel free to change the macro below - just keep the range distinct from
89 * inumbers of the rest of procfs (currently those are in 0x0000--0xffff).
90 * As soon as we'll get a separate superblock we will be able to forget
91 * about magical ranges too.
92 */
93
94#define fake_ino(pid,ino) (((pid)<<16)|(ino))
95
96enum pid_directory_inos {
97 PROC_TGID_INO = 2,
98 PROC_TGID_TASK,
99 PROC_TGID_STATUS,
100 PROC_TGID_MEM,
101#ifdef CONFIG_SECCOMP
102 PROC_TGID_SECCOMP,
103#endif
104 PROC_TGID_CWD,
105 PROC_TGID_ROOT,
106 PROC_TGID_EXE,
107 PROC_TGID_FD,
108 PROC_TGID_ENVIRON,
109 PROC_TGID_AUXV,
110 PROC_TGID_CMDLINE,
111 PROC_TGID_STAT,
112 PROC_TGID_STATM,
113 PROC_TGID_MAPS,
114 PROC_TGID_NUMA_MAPS,
115 PROC_TGID_MOUNTS,
116 PROC_TGID_MOUNTSTATS,
117 PROC_TGID_WCHAN,
118#ifdef CONFIG_MMU
119 PROC_TGID_SMAPS,
120#endif
121#ifdef CONFIG_SCHEDSTATS
122 PROC_TGID_SCHEDSTAT,
123#endif
124#ifdef CONFIG_CPUSETS
125 PROC_TGID_CPUSET,
126#endif
127#ifdef CONFIG_SECURITY
128 PROC_TGID_ATTR,
129 PROC_TGID_ATTR_CURRENT,
130 PROC_TGID_ATTR_PREV,
131 PROC_TGID_ATTR_EXEC,
132 PROC_TGID_ATTR_FSCREATE,
133 PROC_TGID_ATTR_KEYCREATE,
134 PROC_TGID_ATTR_SOCKCREATE,
135#endif
136#ifdef CONFIG_AUDITSYSCALL
137 PROC_TGID_LOGINUID,
138#endif
139 PROC_TGID_OOM_SCORE,
140 PROC_TGID_OOM_ADJUST,
141 PROC_TID_INO,
142 PROC_TID_STATUS,
143 PROC_TID_MEM,
144#ifdef CONFIG_SECCOMP
145 PROC_TID_SECCOMP,
146#endif
147 PROC_TID_CWD,
148 PROC_TID_ROOT,
149 PROC_TID_EXE,
150 PROC_TID_FD,
151 PROC_TID_ENVIRON,
152 PROC_TID_AUXV,
153 PROC_TID_CMDLINE,
154 PROC_TID_STAT,
155 PROC_TID_STATM,
156 PROC_TID_MAPS,
157 PROC_TID_NUMA_MAPS,
158 PROC_TID_MOUNTS,
159 PROC_TID_MOUNTSTATS,
160 PROC_TID_WCHAN,
161#ifdef CONFIG_MMU
162 PROC_TID_SMAPS,
163#endif
164#ifdef CONFIG_SCHEDSTATS
165 PROC_TID_SCHEDSTAT,
166#endif
167#ifdef CONFIG_CPUSETS
168 PROC_TID_CPUSET,
169#endif
170#ifdef CONFIG_SECURITY
171 PROC_TID_ATTR,
172 PROC_TID_ATTR_CURRENT,
173 PROC_TID_ATTR_PREV,
174 PROC_TID_ATTR_EXEC,
175 PROC_TID_ATTR_FSCREATE,
176 PROC_TID_ATTR_KEYCREATE,
177 PROC_TID_ATTR_SOCKCREATE,
178#endif
179#ifdef CONFIG_AUDITSYSCALL
180 PROC_TID_LOGINUID,
181#endif
182 PROC_TID_OOM_SCORE,
183 PROC_TID_OOM_ADJUST,
184
185 /* Add new entries before this */
186 PROC_TID_FD_DIR = 0x8000, /* 0x8000-0xffff */
187};
188 87
189/* Worst case buffer size needed for holding an integer. */ 88/* Worst case buffer size needed for holding an integer. */
190#define PROC_NUMBUF 10 89#define PROC_NUMBUF 10
191 90
192struct pid_entry { 91struct pid_entry {
193 int type;
194 int len; 92 int len;
195 char *name; 93 char *name;
196 mode_t mode; 94 mode_t mode;
95 struct inode_operations *iop;
96 struct file_operations *fop;
97 union proc_op op;
197}; 98};
198 99
199#define E(type,name,mode) {(type),sizeof(name)-1,(name),(mode)} 100#define NOD(NAME, MODE, IOP, FOP, OP) { \
200 101 .len = sizeof(NAME) - 1, \
201static struct pid_entry tgid_base_stuff[] = { 102 .name = (NAME), \
202 E(PROC_TGID_TASK, "task", S_IFDIR|S_IRUGO|S_IXUGO), 103 .mode = MODE, \
203 E(PROC_TGID_FD, "fd", S_IFDIR|S_IRUSR|S_IXUSR), 104 .iop = IOP, \
204 E(PROC_TGID_ENVIRON, "environ", S_IFREG|S_IRUSR), 105 .fop = FOP, \
205 E(PROC_TGID_AUXV, "auxv", S_IFREG|S_IRUSR), 106 .op = OP, \
206 E(PROC_TGID_STATUS, "status", S_IFREG|S_IRUGO),
207 E(PROC_TGID_CMDLINE, "cmdline", S_IFREG|S_IRUGO),
208 E(PROC_TGID_STAT, "stat", S_IFREG|S_IRUGO),
209 E(PROC_TGID_STATM, "statm", S_IFREG|S_IRUGO),
210 E(PROC_TGID_MAPS, "maps", S_IFREG|S_IRUGO),
211#ifdef CONFIG_NUMA
212 E(PROC_TGID_NUMA_MAPS, "numa_maps", S_IFREG|S_IRUGO),
213#endif
214 E(PROC_TGID_MEM, "mem", S_IFREG|S_IRUSR|S_IWUSR),
215#ifdef CONFIG_SECCOMP
216 E(PROC_TGID_SECCOMP, "seccomp", S_IFREG|S_IRUSR|S_IWUSR),
217#endif
218 E(PROC_TGID_CWD, "cwd", S_IFLNK|S_IRWXUGO),
219 E(PROC_TGID_ROOT, "root", S_IFLNK|S_IRWXUGO),
220 E(PROC_TGID_EXE, "exe", S_IFLNK|S_IRWXUGO),
221 E(PROC_TGID_MOUNTS, "mounts", S_IFREG|S_IRUGO),
222 E(PROC_TGID_MOUNTSTATS, "mountstats", S_IFREG|S_IRUSR),
223#ifdef CONFIG_MMU
224 E(PROC_TGID_SMAPS, "smaps", S_IFREG|S_IRUGO),
225#endif
226#ifdef CONFIG_SECURITY
227 E(PROC_TGID_ATTR, "attr", S_IFDIR|S_IRUGO|S_IXUGO),
228#endif
229#ifdef CONFIG_KALLSYMS
230 E(PROC_TGID_WCHAN, "wchan", S_IFREG|S_IRUGO),
231#endif
232#ifdef CONFIG_SCHEDSTATS
233 E(PROC_TGID_SCHEDSTAT, "schedstat", S_IFREG|S_IRUGO),
234#endif
235#ifdef CONFIG_CPUSETS
236 E(PROC_TGID_CPUSET, "cpuset", S_IFREG|S_IRUGO),
237#endif
238 E(PROC_TGID_OOM_SCORE, "oom_score",S_IFREG|S_IRUGO),
239 E(PROC_TGID_OOM_ADJUST,"oom_adj", S_IFREG|S_IRUGO|S_IWUSR),
240#ifdef CONFIG_AUDITSYSCALL
241 E(PROC_TGID_LOGINUID, "loginuid", S_IFREG|S_IWUSR|S_IRUGO),
242#endif
243 {0,0,NULL,0}
244};
245static struct pid_entry tid_base_stuff[] = {
246 E(PROC_TID_FD, "fd", S_IFDIR|S_IRUSR|S_IXUSR),
247 E(PROC_TID_ENVIRON, "environ", S_IFREG|S_IRUSR),
248 E(PROC_TID_AUXV, "auxv", S_IFREG|S_IRUSR),
249 E(PROC_TID_STATUS, "status", S_IFREG|S_IRUGO),
250 E(PROC_TID_CMDLINE, "cmdline", S_IFREG|S_IRUGO),
251 E(PROC_TID_STAT, "stat", S_IFREG|S_IRUGO),
252 E(PROC_TID_STATM, "statm", S_IFREG|S_IRUGO),
253 E(PROC_TID_MAPS, "maps", S_IFREG|S_IRUGO),
254#ifdef CONFIG_NUMA
255 E(PROC_TID_NUMA_MAPS, "numa_maps", S_IFREG|S_IRUGO),
256#endif
257 E(PROC_TID_MEM, "mem", S_IFREG|S_IRUSR|S_IWUSR),
258#ifdef CONFIG_SECCOMP
259 E(PROC_TID_SECCOMP, "seccomp", S_IFREG|S_IRUSR|S_IWUSR),
260#endif
261 E(PROC_TID_CWD, "cwd", S_IFLNK|S_IRWXUGO),
262 E(PROC_TID_ROOT, "root", S_IFLNK|S_IRWXUGO),
263 E(PROC_TID_EXE, "exe", S_IFLNK|S_IRWXUGO),
264 E(PROC_TID_MOUNTS, "mounts", S_IFREG|S_IRUGO),
265#ifdef CONFIG_MMU
266 E(PROC_TID_SMAPS, "smaps", S_IFREG|S_IRUGO),
267#endif
268#ifdef CONFIG_SECURITY
269 E(PROC_TID_ATTR, "attr", S_IFDIR|S_IRUGO|S_IXUGO),
270#endif
271#ifdef CONFIG_KALLSYMS
272 E(PROC_TID_WCHAN, "wchan", S_IFREG|S_IRUGO),
273#endif
274#ifdef CONFIG_SCHEDSTATS
275 E(PROC_TID_SCHEDSTAT, "schedstat",S_IFREG|S_IRUGO),
276#endif
277#ifdef CONFIG_CPUSETS
278 E(PROC_TID_CPUSET, "cpuset", S_IFREG|S_IRUGO),
279#endif
280 E(PROC_TID_OOM_SCORE, "oom_score",S_IFREG|S_IRUGO),
281 E(PROC_TID_OOM_ADJUST, "oom_adj", S_IFREG|S_IRUGO|S_IWUSR),
282#ifdef CONFIG_AUDITSYSCALL
283 E(PROC_TID_LOGINUID, "loginuid", S_IFREG|S_IWUSR|S_IRUGO),
284#endif
285 {0,0,NULL,0}
286};
287
288#ifdef CONFIG_SECURITY
289static struct pid_entry tgid_attr_stuff[] = {
290 E(PROC_TGID_ATTR_CURRENT, "current", S_IFREG|S_IRUGO|S_IWUGO),
291 E(PROC_TGID_ATTR_PREV, "prev", S_IFREG|S_IRUGO),
292 E(PROC_TGID_ATTR_EXEC, "exec", S_IFREG|S_IRUGO|S_IWUGO),
293 E(PROC_TGID_ATTR_FSCREATE, "fscreate", S_IFREG|S_IRUGO|S_IWUGO),
294 E(PROC_TGID_ATTR_KEYCREATE, "keycreate", S_IFREG|S_IRUGO|S_IWUGO),
295 E(PROC_TGID_ATTR_SOCKCREATE, "sockcreate", S_IFREG|S_IRUGO|S_IWUGO),
296 {0,0,NULL,0}
297};
298static struct pid_entry tid_attr_stuff[] = {
299 E(PROC_TID_ATTR_CURRENT, "current", S_IFREG|S_IRUGO|S_IWUGO),
300 E(PROC_TID_ATTR_PREV, "prev", S_IFREG|S_IRUGO),
301 E(PROC_TID_ATTR_EXEC, "exec", S_IFREG|S_IRUGO|S_IWUGO),
302 E(PROC_TID_ATTR_FSCREATE, "fscreate", S_IFREG|S_IRUGO|S_IWUGO),
303 E(PROC_TID_ATTR_KEYCREATE, "keycreate", S_IFREG|S_IRUGO|S_IWUGO),
304 E(PROC_TID_ATTR_SOCKCREATE, "sockcreate", S_IFREG|S_IRUGO|S_IWUGO),
305 {0,0,NULL,0}
306};
307#endif
308
309#undef E
310
311static int proc_fd_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt)
312{
313 struct task_struct *task = get_proc_task(inode);
314 struct files_struct *files = NULL;
315 struct file *file;
316 int fd = proc_fd(inode);
317
318 if (task) {
319 files = get_files_struct(task);
320 put_task_struct(task);
321 }
322 if (files) {
323 /*
324 * We are not taking a ref to the file structure, so we must
325 * hold ->file_lock.
326 */
327 spin_lock(&files->file_lock);
328 file = fcheck_files(files, fd);
329 if (file) {
330 *mnt = mntget(file->f_vfsmnt);
331 *dentry = dget(file->f_dentry);
332 spin_unlock(&files->file_lock);
333 put_files_struct(files);
334 return 0;
335 }
336 spin_unlock(&files->file_lock);
337 put_files_struct(files);
338 }
339 return -ENOENT;
340} 107}
341 108
109#define DIR(NAME, MODE, OTYPE) \
110 NOD(NAME, (S_IFDIR|(MODE)), \
111 &proc_##OTYPE##_inode_operations, &proc_##OTYPE##_operations, \
112 {} )
113#define LNK(NAME, OTYPE) \
114 NOD(NAME, (S_IFLNK|S_IRWXUGO), \
115 &proc_pid_link_inode_operations, NULL, \
116 { .proc_get_link = &proc_##OTYPE##_link } )
117#define REG(NAME, MODE, OTYPE) \
118 NOD(NAME, (S_IFREG|(MODE)), NULL, \
119 &proc_##OTYPE##_operations, {})
120#define INF(NAME, MODE, OTYPE) \
121 NOD(NAME, (S_IFREG|(MODE)), \
122 NULL, &proc_info_file_operations, \
123 { .proc_read = &proc_##OTYPE } )
124
342static struct fs_struct *get_fs_struct(struct task_struct *task) 125static struct fs_struct *get_fs_struct(struct task_struct *task)
343{ 126{
344 struct fs_struct *fs; 127 struct fs_struct *fs;
@@ -587,7 +370,7 @@ static int mounts_open(struct inode *inode, struct file *file)
587 370
588 if (task) { 371 if (task) {
589 task_lock(task); 372 task_lock(task);
590 namespace = task->namespace; 373 namespace = task->nsproxy->namespace;
591 if (namespace) 374 if (namespace)
592 get_namespace(namespace); 375 get_namespace(namespace);
593 task_unlock(task); 376 task_unlock(task);
@@ -658,7 +441,7 @@ static int mountstats_open(struct inode *inode, struct file *file)
658 441
659 if (task) { 442 if (task) {
660 task_lock(task); 443 task_lock(task);
661 namespace = task->namespace; 444 namespace = task->nsproxy->namespace;
662 if (namespace) 445 if (namespace)
663 get_namespace(namespace); 446 get_namespace(namespace);
664 task_unlock(task); 447 task_unlock(task);
@@ -1137,143 +920,6 @@ static struct inode_operations proc_pid_link_inode_operations = {
1137 .setattr = proc_setattr, 920 .setattr = proc_setattr,
1138}; 921};
1139 922
1140static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir)
1141{
1142 struct dentry *dentry = filp->f_dentry;
1143 struct inode *inode = dentry->d_inode;
1144 struct task_struct *p = get_proc_task(inode);
1145 unsigned int fd, tid, ino;
1146 int retval;
1147 char buf[PROC_NUMBUF];
1148 struct files_struct * files;
1149 struct fdtable *fdt;
1150
1151 retval = -ENOENT;
1152 if (!p)
1153 goto out_no_task;
1154 retval = 0;
1155 tid = p->pid;
1156
1157 fd = filp->f_pos;
1158 switch (fd) {
1159 case 0:
1160 if (filldir(dirent, ".", 1, 0, inode->i_ino, DT_DIR) < 0)
1161 goto out;
1162 filp->f_pos++;
1163 case 1:
1164 ino = parent_ino(dentry);
1165 if (filldir(dirent, "..", 2, 1, ino, DT_DIR) < 0)
1166 goto out;
1167 filp->f_pos++;
1168 default:
1169 files = get_files_struct(p);
1170 if (!files)
1171 goto out;
1172 rcu_read_lock();
1173 fdt = files_fdtable(files);
1174 for (fd = filp->f_pos-2;
1175 fd < fdt->max_fds;
1176 fd++, filp->f_pos++) {
1177 unsigned int i,j;
1178
1179 if (!fcheck_files(files, fd))
1180 continue;
1181 rcu_read_unlock();
1182
1183 j = PROC_NUMBUF;
1184 i = fd;
1185 do {
1186 j--;
1187 buf[j] = '0' + (i % 10);
1188 i /= 10;
1189 } while (i);
1190
1191 ino = fake_ino(tid, PROC_TID_FD_DIR + fd);
1192 if (filldir(dirent, buf+j, PROC_NUMBUF-j, fd+2, ino, DT_LNK) < 0) {
1193 rcu_read_lock();
1194 break;
1195 }
1196 rcu_read_lock();
1197 }
1198 rcu_read_unlock();
1199 put_files_struct(files);
1200 }
1201out:
1202 put_task_struct(p);
1203out_no_task:
1204 return retval;
1205}
1206
1207static int proc_pident_readdir(struct file *filp,
1208 void *dirent, filldir_t filldir,
1209 struct pid_entry *ents, unsigned int nents)
1210{
1211 int i;
1212 int pid;
1213 struct dentry *dentry = filp->f_dentry;
1214 struct inode *inode = dentry->d_inode;
1215 struct task_struct *task = get_proc_task(inode);
1216 struct pid_entry *p;
1217 ino_t ino;
1218 int ret;
1219
1220 ret = -ENOENT;
1221 if (!task)
1222 goto out;
1223
1224 ret = 0;
1225 pid = task->pid;
1226 put_task_struct(task);
1227 i = filp->f_pos;
1228 switch (i) {
1229 case 0:
1230 ino = inode->i_ino;
1231 if (filldir(dirent, ".", 1, i, ino, DT_DIR) < 0)
1232 goto out;
1233 i++;
1234 filp->f_pos++;
1235 /* fall through */
1236 case 1:
1237 ino = parent_ino(dentry);
1238 if (filldir(dirent, "..", 2, i, ino, DT_DIR) < 0)
1239 goto out;
1240 i++;
1241 filp->f_pos++;
1242 /* fall through */
1243 default:
1244 i -= 2;
1245 if (i >= nents) {
1246 ret = 1;
1247 goto out;
1248 }
1249 p = ents + i;
1250 while (p->name) {
1251 if (filldir(dirent, p->name, p->len, filp->f_pos,
1252 fake_ino(pid, p->type), p->mode >> 12) < 0)
1253 goto out;
1254 filp->f_pos++;
1255 p++;
1256 }
1257 }
1258
1259 ret = 1;
1260out:
1261 return ret;
1262}
1263
1264static int proc_tgid_base_readdir(struct file * filp,
1265 void * dirent, filldir_t filldir)
1266{
1267 return proc_pident_readdir(filp,dirent,filldir,
1268 tgid_base_stuff,ARRAY_SIZE(tgid_base_stuff));
1269}
1270
1271static int proc_tid_base_readdir(struct file * filp,
1272 void * dirent, filldir_t filldir)
1273{
1274 return proc_pident_readdir(filp,dirent,filldir,
1275 tid_base_stuff,ARRAY_SIZE(tid_base_stuff));
1276}
1277 923
1278/* building an inode */ 924/* building an inode */
1279 925
@@ -1293,13 +939,13 @@ static int task_dumpable(struct task_struct *task)
1293} 939}
1294 940
1295 941
1296static struct inode *proc_pid_make_inode(struct super_block * sb, struct task_struct *task, int ino) 942static struct inode *proc_pid_make_inode(struct super_block * sb, struct task_struct *task)
1297{ 943{
1298 struct inode * inode; 944 struct inode * inode;
1299 struct proc_inode *ei; 945 struct proc_inode *ei;
1300 946
1301 /* We need a new inode */ 947 /* We need a new inode */
1302 948
1303 inode = new_inode(sb); 949 inode = new_inode(sb);
1304 if (!inode) 950 if (!inode)
1305 goto out; 951 goto out;
@@ -1307,13 +953,12 @@ static struct inode *proc_pid_make_inode(struct super_block * sb, struct task_st
1307 /* Common stuff */ 953 /* Common stuff */
1308 ei = PROC_I(inode); 954 ei = PROC_I(inode);
1309 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; 955 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
1310 inode->i_ino = fake_ino(task->pid, ino);
1311 inode->i_op = &proc_def_inode_operations; 956 inode->i_op = &proc_def_inode_operations;
1312 957
1313 /* 958 /*
1314 * grab the reference to task. 959 * grab the reference to task.
1315 */ 960 */
1316 ei->pid = get_pid(task->pids[PIDTYPE_PID].pid); 961 ei->pid = get_task_pid(task, PIDTYPE_PID);
1317 if (!ei->pid) 962 if (!ei->pid)
1318 goto out_unlock; 963 goto out_unlock;
1319 964
@@ -1333,6 +978,27 @@ out_unlock:
1333 return NULL; 978 return NULL;
1334} 979}
1335 980
981static int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
982{
983 struct inode *inode = dentry->d_inode;
984 struct task_struct *task;
985 generic_fillattr(inode, stat);
986
987 rcu_read_lock();
988 stat->uid = 0;
989 stat->gid = 0;
990 task = pid_task(proc_pid(inode), PIDTYPE_PID);
991 if (task) {
992 if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) ||
993 task_dumpable(task)) {
994 stat->uid = task->euid;
995 stat->gid = task->egid;
996 }
997 }
998 rcu_read_unlock();
999 return 0;
1000}
1001
1336/* dentry stuff */ 1002/* dentry stuff */
1337 1003
1338/* 1004/*
@@ -1372,25 +1038,130 @@ static int pid_revalidate(struct dentry *dentry, struct nameidata *nd)
1372 return 0; 1038 return 0;
1373} 1039}
1374 1040
1375static int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) 1041static int pid_delete_dentry(struct dentry * dentry)
1376{ 1042{
1377 struct inode *inode = dentry->d_inode; 1043 /* Is the task we represent dead?
1378 struct task_struct *task; 1044 * If so, then don't put the dentry on the lru list,
1379 generic_fillattr(inode, stat); 1045 * kill it immediately.
1046 */
1047 return !proc_pid(dentry->d_inode)->tasks[PIDTYPE_PID].first;
1048}
1049
1050static struct dentry_operations pid_dentry_operations =
1051{
1052 .d_revalidate = pid_revalidate,
1053 .d_delete = pid_delete_dentry,
1054};
1055
1056/* Lookups */
1057
1058typedef struct dentry *instantiate_t(struct inode *, struct dentry *, struct task_struct *, void *);
1059
1060/*
1061 * Fill a directory entry.
1062 *
1063 * If possible create the dcache entry and derive our inode number and
1064 * file type from dcache entry.
1065 *
1066 * Since all of the proc inode numbers are dynamically generated, the inode
1067 * numbers do not exist until the inode is cache. This means creating the
1068 * the dcache entry in readdir is necessary to keep the inode numbers
1069 * reported by readdir in sync with the inode numbers reported
1070 * by stat.
1071 */
1072static int proc_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
1073 char *name, int len,
1074 instantiate_t instantiate, struct task_struct *task, void *ptr)
1075{
1076 struct dentry *child, *dir = filp->f_dentry;
1077 struct inode *inode;
1078 struct qstr qname;
1079 ino_t ino = 0;
1080 unsigned type = DT_UNKNOWN;
1081
1082 qname.name = name;
1083 qname.len = len;
1084 qname.hash = full_name_hash(name, len);
1085
1086 child = d_lookup(dir, &qname);
1087 if (!child) {
1088 struct dentry *new;
1089 new = d_alloc(dir, &qname);
1090 if (new) {
1091 child = instantiate(dir->d_inode, new, task, ptr);
1092 if (child)
1093 dput(new);
1094 else
1095 child = new;
1096 }
1097 }
1098 if (!child || IS_ERR(child) || !child->d_inode)
1099 goto end_instantiate;
1100 inode = child->d_inode;
1101 if (inode) {
1102 ino = inode->i_ino;
1103 type = inode->i_mode >> 12;
1104 }
1105 dput(child);
1106end_instantiate:
1107 if (!ino)
1108 ino = find_inode_number(dir, &qname);
1109 if (!ino)
1110 ino = 1;
1111 return filldir(dirent, name, len, filp->f_pos, ino, type);
1112}
1113
1114static unsigned name_to_int(struct dentry *dentry)
1115{
1116 const char *name = dentry->d_name.name;
1117 int len = dentry->d_name.len;
1118 unsigned n = 0;
1119
1120 if (len > 1 && *name == '0')
1121 goto out;
1122 while (len-- > 0) {
1123 unsigned c = *name++ - '0';
1124 if (c > 9)
1125 goto out;
1126 if (n >= (~0U-9)/10)
1127 goto out;
1128 n *= 10;
1129 n += c;
1130 }
1131 return n;
1132out:
1133 return ~0U;
1134}
1135
1136static int proc_fd_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt)
1137{
1138 struct task_struct *task = get_proc_task(inode);
1139 struct files_struct *files = NULL;
1140 struct file *file;
1141 int fd = proc_fd(inode);
1380 1142
1381 rcu_read_lock();
1382 stat->uid = 0;
1383 stat->gid = 0;
1384 task = pid_task(proc_pid(inode), PIDTYPE_PID);
1385 if (task) { 1143 if (task) {
1386 if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) || 1144 files = get_files_struct(task);
1387 task_dumpable(task)) { 1145 put_task_struct(task);
1388 stat->uid = task->euid; 1146 }
1389 stat->gid = task->egid; 1147 if (files) {
1148 /*
1149 * We are not taking a ref to the file structure, so we must
1150 * hold ->file_lock.
1151 */
1152 spin_lock(&files->file_lock);
1153 file = fcheck_files(files, fd);
1154 if (file) {
1155 *mnt = mntget(file->f_vfsmnt);
1156 *dentry = dget(file->f_dentry);
1157 spin_unlock(&files->file_lock);
1158 put_files_struct(files);
1159 return 0;
1390 } 1160 }
1161 spin_unlock(&files->file_lock);
1162 put_files_struct(files);
1391 } 1163 }
1392 rcu_read_unlock(); 1164 return -ENOENT;
1393 return 0;
1394} 1165}
1395 1166
1396static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd) 1167static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd)
@@ -1428,75 +1199,30 @@ static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd)
1428 return 0; 1199 return 0;
1429} 1200}
1430 1201
1431static int pid_delete_dentry(struct dentry * dentry)
1432{
1433 /* Is the task we represent dead?
1434 * If so, then don't put the dentry on the lru list,
1435 * kill it immediately.
1436 */
1437 return !proc_pid(dentry->d_inode)->tasks[PIDTYPE_PID].first;
1438}
1439
1440static struct dentry_operations tid_fd_dentry_operations = 1202static struct dentry_operations tid_fd_dentry_operations =
1441{ 1203{
1442 .d_revalidate = tid_fd_revalidate, 1204 .d_revalidate = tid_fd_revalidate,
1443 .d_delete = pid_delete_dentry, 1205 .d_delete = pid_delete_dentry,
1444}; 1206};
1445 1207
1446static struct dentry_operations pid_dentry_operations = 1208static struct dentry *proc_fd_instantiate(struct inode *dir,
1447{ 1209 struct dentry *dentry, struct task_struct *task, void *ptr)
1448 .d_revalidate = pid_revalidate,
1449 .d_delete = pid_delete_dentry,
1450};
1451
1452/* Lookups */
1453
1454static unsigned name_to_int(struct dentry *dentry)
1455{
1456 const char *name = dentry->d_name.name;
1457 int len = dentry->d_name.len;
1458 unsigned n = 0;
1459
1460 if (len > 1 && *name == '0')
1461 goto out;
1462 while (len-- > 0) {
1463 unsigned c = *name++ - '0';
1464 if (c > 9)
1465 goto out;
1466 if (n >= (~0U-9)/10)
1467 goto out;
1468 n *= 10;
1469 n += c;
1470 }
1471 return n;
1472out:
1473 return ~0U;
1474}
1475
1476/* SMP-safe */
1477static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry, struct nameidata *nd)
1478{ 1210{
1479 struct task_struct *task = get_proc_task(dir); 1211 unsigned fd = *(unsigned *)ptr;
1480 unsigned fd = name_to_int(dentry); 1212 struct file *file;
1481 struct dentry *result = ERR_PTR(-ENOENT); 1213 struct files_struct *files;
1482 struct file * file; 1214 struct inode *inode;
1483 struct files_struct * files; 1215 struct proc_inode *ei;
1484 struct inode *inode; 1216 struct dentry *error = ERR_PTR(-ENOENT);
1485 struct proc_inode *ei;
1486
1487 if (!task)
1488 goto out_no_task;
1489 if (fd == ~0U)
1490 goto out;
1491 1217
1492 inode = proc_pid_make_inode(dir->i_sb, task, PROC_TID_FD_DIR+fd); 1218 inode = proc_pid_make_inode(dir->i_sb, task);
1493 if (!inode) 1219 if (!inode)
1494 goto out; 1220 goto out;
1495 ei = PROC_I(inode); 1221 ei = PROC_I(inode);
1496 ei->fd = fd; 1222 ei->fd = fd;
1497 files = get_files_struct(task); 1223 files = get_files_struct(task);
1498 if (!files) 1224 if (!files)
1499 goto out_unlock; 1225 goto out_iput;
1500 inode->i_mode = S_IFLNK; 1226 inode->i_mode = S_IFLNK;
1501 1227
1502 /* 1228 /*
@@ -1506,13 +1232,14 @@ static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry,
1506 spin_lock(&files->file_lock); 1232 spin_lock(&files->file_lock);
1507 file = fcheck_files(files, fd); 1233 file = fcheck_files(files, fd);
1508 if (!file) 1234 if (!file)
1509 goto out_unlock2; 1235 goto out_unlock;
1510 if (file->f_mode & 1) 1236 if (file->f_mode & 1)
1511 inode->i_mode |= S_IRUSR | S_IXUSR; 1237 inode->i_mode |= S_IRUSR | S_IXUSR;
1512 if (file->f_mode & 2) 1238 if (file->f_mode & 2)
1513 inode->i_mode |= S_IWUSR | S_IXUSR; 1239 inode->i_mode |= S_IWUSR | S_IXUSR;
1514 spin_unlock(&files->file_lock); 1240 spin_unlock(&files->file_lock);
1515 put_files_struct(files); 1241 put_files_struct(files);
1242
1516 inode->i_op = &proc_pid_link_inode_operations; 1243 inode->i_op = &proc_pid_link_inode_operations;
1517 inode->i_size = 64; 1244 inode->i_size = 64;
1518 ei->op.proc_get_link = proc_fd_link; 1245 ei->op.proc_get_link = proc_fd_link;
@@ -1520,34 +1247,106 @@ static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry,
1520 d_add(dentry, inode); 1247 d_add(dentry, inode);
1521 /* Close the race of the process dying before we return the dentry */ 1248 /* Close the race of the process dying before we return the dentry */
1522 if (tid_fd_revalidate(dentry, NULL)) 1249 if (tid_fd_revalidate(dentry, NULL))
1523 result = NULL; 1250 error = NULL;
1524out:
1525 put_task_struct(task);
1526out_no_task:
1527 return result;
1528 1251
1529out_unlock2: 1252 out:
1253 return error;
1254out_unlock:
1530 spin_unlock(&files->file_lock); 1255 spin_unlock(&files->file_lock);
1531 put_files_struct(files); 1256 put_files_struct(files);
1532out_unlock: 1257out_iput:
1533 iput(inode); 1258 iput(inode);
1534 goto out; 1259 goto out;
1535} 1260}
1536 1261
1537static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldir); 1262static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry, struct nameidata *nd)
1538static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd); 1263{
1539static int proc_task_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat); 1264 struct task_struct *task = get_proc_task(dir);
1265 unsigned fd = name_to_int(dentry);
1266 struct dentry *result = ERR_PTR(-ENOENT);
1267
1268 if (!task)
1269 goto out_no_task;
1270 if (fd == ~0U)
1271 goto out;
1272
1273 result = proc_fd_instantiate(dir, dentry, task, &fd);
1274out:
1275 put_task_struct(task);
1276out_no_task:
1277 return result;
1278}
1279
1280static int proc_fd_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
1281 struct task_struct *task, int fd)
1282{
1283 char name[PROC_NUMBUF];
1284 int len = snprintf(name, sizeof(name), "%d", fd);
1285 return proc_fill_cache(filp, dirent, filldir, name, len,
1286 proc_fd_instantiate, task, &fd);
1287}
1288
1289static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir)
1290{
1291 struct dentry *dentry = filp->f_dentry;
1292 struct inode *inode = dentry->d_inode;
1293 struct task_struct *p = get_proc_task(inode);
1294 unsigned int fd, tid, ino;
1295 int retval;
1296 struct files_struct * files;
1297 struct fdtable *fdt;
1298
1299 retval = -ENOENT;
1300 if (!p)
1301 goto out_no_task;
1302 retval = 0;
1303 tid = p->pid;
1304
1305 fd = filp->f_pos;
1306 switch (fd) {
1307 case 0:
1308 if (filldir(dirent, ".", 1, 0, inode->i_ino, DT_DIR) < 0)
1309 goto out;
1310 filp->f_pos++;
1311 case 1:
1312 ino = parent_ino(dentry);
1313 if (filldir(dirent, "..", 2, 1, ino, DT_DIR) < 0)
1314 goto out;
1315 filp->f_pos++;
1316 default:
1317 files = get_files_struct(p);
1318 if (!files)
1319 goto out;
1320 rcu_read_lock();
1321 fdt = files_fdtable(files);
1322 for (fd = filp->f_pos-2;
1323 fd < fdt->max_fds;
1324 fd++, filp->f_pos++) {
1325
1326 if (!fcheck_files(files, fd))
1327 continue;
1328 rcu_read_unlock();
1329
1330 if (proc_fd_fill_cache(filp, dirent, filldir, p, fd) < 0) {
1331 rcu_read_lock();
1332 break;
1333 }
1334 rcu_read_lock();
1335 }
1336 rcu_read_unlock();
1337 put_files_struct(files);
1338 }
1339out:
1340 put_task_struct(p);
1341out_no_task:
1342 return retval;
1343}
1540 1344
1541static struct file_operations proc_fd_operations = { 1345static struct file_operations proc_fd_operations = {
1542 .read = generic_read_dir, 1346 .read = generic_read_dir,
1543 .readdir = proc_readfd, 1347 .readdir = proc_readfd,
1544}; 1348};
1545 1349
1546static struct file_operations proc_task_operations = {
1547 .read = generic_read_dir,
1548 .readdir = proc_task_readdir,
1549};
1550
1551/* 1350/*
1552 * proc directories can do almost nothing.. 1351 * proc directories can do almost nothing..
1553 */ 1352 */
@@ -1556,11 +1355,137 @@ static struct inode_operations proc_fd_inode_operations = {
1556 .setattr = proc_setattr, 1355 .setattr = proc_setattr,
1557}; 1356};
1558 1357
1559static struct inode_operations proc_task_inode_operations = { 1358static struct dentry *proc_pident_instantiate(struct inode *dir,
1560 .lookup = proc_task_lookup, 1359 struct dentry *dentry, struct task_struct *task, void *ptr)
1561 .getattr = proc_task_getattr, 1360{
1562 .setattr = proc_setattr, 1361 struct pid_entry *p = ptr;
1563}; 1362 struct inode *inode;
1363 struct proc_inode *ei;
1364 struct dentry *error = ERR_PTR(-EINVAL);
1365
1366 inode = proc_pid_make_inode(dir->i_sb, task);
1367 if (!inode)
1368 goto out;
1369
1370 ei = PROC_I(inode);
1371 inode->i_mode = p->mode;
1372 if (S_ISDIR(inode->i_mode))
1373 inode->i_nlink = 2; /* Use getattr to fix if necessary */
1374 if (p->iop)
1375 inode->i_op = p->iop;
1376 if (p->fop)
1377 inode->i_fop = p->fop;
1378 ei->op = p->op;
1379 dentry->d_op = &pid_dentry_operations;
1380 d_add(dentry, inode);
1381 /* Close the race of the process dying before we return the dentry */
1382 if (pid_revalidate(dentry, NULL))
1383 error = NULL;
1384out:
1385 return error;
1386}
1387
1388static struct dentry *proc_pident_lookup(struct inode *dir,
1389 struct dentry *dentry,
1390 struct pid_entry *ents,
1391 unsigned int nents)
1392{
1393 struct inode *inode;
1394 struct dentry *error;
1395 struct task_struct *task = get_proc_task(dir);
1396 struct pid_entry *p, *last;
1397
1398 error = ERR_PTR(-ENOENT);
1399 inode = NULL;
1400
1401 if (!task)
1402 goto out_no_task;
1403
1404 /*
1405 * Yes, it does not scale. And it should not. Don't add
1406 * new entries into /proc/<tgid>/ without very good reasons.
1407 */
1408 last = &ents[nents - 1];
1409 for (p = ents; p <= last; p++) {
1410 if (p->len != dentry->d_name.len)
1411 continue;
1412 if (!memcmp(dentry->d_name.name, p->name, p->len))
1413 break;
1414 }
1415 if (p > last)
1416 goto out;
1417
1418 error = proc_pident_instantiate(dir, dentry, task, p);
1419out:
1420 put_task_struct(task);
1421out_no_task:
1422 return error;
1423}
1424
1425static int proc_pident_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
1426 struct task_struct *task, struct pid_entry *p)
1427{
1428 return proc_fill_cache(filp, dirent, filldir, p->name, p->len,
1429 proc_pident_instantiate, task, p);
1430}
1431
1432static int proc_pident_readdir(struct file *filp,
1433 void *dirent, filldir_t filldir,
1434 struct pid_entry *ents, unsigned int nents)
1435{
1436 int i;
1437 int pid;
1438 struct dentry *dentry = filp->f_dentry;
1439 struct inode *inode = dentry->d_inode;
1440 struct task_struct *task = get_proc_task(inode);
1441 struct pid_entry *p, *last;
1442 ino_t ino;
1443 int ret;
1444
1445 ret = -ENOENT;
1446 if (!task)
1447 goto out_no_task;
1448
1449 ret = 0;
1450 pid = task->pid;
1451 i = filp->f_pos;
1452 switch (i) {
1453 case 0:
1454 ino = inode->i_ino;
1455 if (filldir(dirent, ".", 1, i, ino, DT_DIR) < 0)
1456 goto out;
1457 i++;
1458 filp->f_pos++;
1459 /* fall through */
1460 case 1:
1461 ino = parent_ino(dentry);
1462 if (filldir(dirent, "..", 2, i, ino, DT_DIR) < 0)
1463 goto out;
1464 i++;
1465 filp->f_pos++;
1466 /* fall through */
1467 default:
1468 i -= 2;
1469 if (i >= nents) {
1470 ret = 1;
1471 goto out;
1472 }
1473 p = ents + i;
1474 last = &ents[nents - 1];
1475 while (p <= last) {
1476 if (proc_pident_fill_cache(filp, dirent, filldir, task, p) < 0)
1477 goto out;
1478 filp->f_pos++;
1479 p++;
1480 }
1481 }
1482
1483 ret = 1;
1484out:
1485 put_task_struct(task);
1486out_no_task:
1487 return ret;
1488}
1564 1489
1565#ifdef CONFIG_SECURITY 1490#ifdef CONFIG_SECURITY
1566static ssize_t proc_pid_attr_read(struct file * file, char __user * buf, 1491static ssize_t proc_pid_attr_read(struct file * file, char __user * buf,
@@ -1581,8 +1506,8 @@ static ssize_t proc_pid_attr_read(struct file * file, char __user * buf,
1581 if (!(page = __get_free_page(GFP_KERNEL))) 1506 if (!(page = __get_free_page(GFP_KERNEL)))
1582 goto out; 1507 goto out;
1583 1508
1584 length = security_getprocattr(task, 1509 length = security_getprocattr(task,
1585 (char*)file->f_dentry->d_name.name, 1510 (char*)file->f_dentry->d_name.name,
1586 (void*)page, count); 1511 (void*)page, count);
1587 if (length >= 0) 1512 if (length >= 0)
1588 length = simple_read_from_buffer(buf, count, ppos, (char *)page, length); 1513 length = simple_read_from_buffer(buf, count, ppos, (char *)page, length);
@@ -1595,17 +1520,17 @@ out_no_task:
1595 1520
1596static ssize_t proc_pid_attr_write(struct file * file, const char __user * buf, 1521static ssize_t proc_pid_attr_write(struct file * file, const char __user * buf,
1597 size_t count, loff_t *ppos) 1522 size_t count, loff_t *ppos)
1598{ 1523{
1599 struct inode * inode = file->f_dentry->d_inode; 1524 struct inode * inode = file->f_dentry->d_inode;
1600 char *page; 1525 char *page;
1601 ssize_t length; 1526 ssize_t length;
1602 struct task_struct *task = get_proc_task(inode); 1527 struct task_struct *task = get_proc_task(inode);
1603 1528
1604 length = -ESRCH; 1529 length = -ESRCH;
1605 if (!task) 1530 if (!task)
1606 goto out_no_task; 1531 goto out_no_task;
1607 if (count > PAGE_SIZE) 1532 if (count > PAGE_SIZE)
1608 count = PAGE_SIZE; 1533 count = PAGE_SIZE;
1609 1534
1610 /* No partial writes. */ 1535 /* No partial writes. */
1611 length = -EINVAL; 1536 length = -EINVAL;
@@ -1613,16 +1538,16 @@ static ssize_t proc_pid_attr_write(struct file * file, const char __user * buf,
1613 goto out; 1538 goto out;
1614 1539
1615 length = -ENOMEM; 1540 length = -ENOMEM;
1616 page = (char*)__get_free_page(GFP_USER); 1541 page = (char*)__get_free_page(GFP_USER);
1617 if (!page) 1542 if (!page)
1618 goto out; 1543 goto out;
1619 1544
1620 length = -EFAULT; 1545 length = -EFAULT;
1621 if (copy_from_user(page, buf, count)) 1546 if (copy_from_user(page, buf, count))
1622 goto out_free; 1547 goto out_free;
1623 1548
1624 length = security_setprocattr(task, 1549 length = security_setprocattr(task,
1625 (char*)file->f_dentry->d_name.name, 1550 (char*)file->f_dentry->d_name.name,
1626 (void*)page, count); 1551 (void*)page, count);
1627out_free: 1552out_free:
1628 free_page((unsigned long) page); 1553 free_page((unsigned long) page);
@@ -1630,330 +1555,263 @@ out:
1630 put_task_struct(task); 1555 put_task_struct(task);
1631out_no_task: 1556out_no_task:
1632 return length; 1557 return length;
1633} 1558}
1634 1559
1635static struct file_operations proc_pid_attr_operations = { 1560static struct file_operations proc_pid_attr_operations = {
1636 .read = proc_pid_attr_read, 1561 .read = proc_pid_attr_read,
1637 .write = proc_pid_attr_write, 1562 .write = proc_pid_attr_write,
1638}; 1563};
1639 1564
1640static struct file_operations proc_tid_attr_operations; 1565static struct pid_entry attr_dir_stuff[] = {
1641static struct inode_operations proc_tid_attr_inode_operations; 1566 REG("current", S_IRUGO|S_IWUGO, pid_attr),
1642static struct file_operations proc_tgid_attr_operations; 1567 REG("prev", S_IRUGO, pid_attr),
1643static struct inode_operations proc_tgid_attr_inode_operations; 1568 REG("exec", S_IRUGO|S_IWUGO, pid_attr),
1569 REG("fscreate", S_IRUGO|S_IWUGO, pid_attr),
1570 REG("keycreate", S_IRUGO|S_IWUGO, pid_attr),
1571 REG("sockcreate", S_IRUGO|S_IWUGO, pid_attr),
1572};
1573
1574static int proc_attr_dir_readdir(struct file * filp,
1575 void * dirent, filldir_t filldir)
1576{
1577 return proc_pident_readdir(filp,dirent,filldir,
1578 attr_dir_stuff,ARRAY_SIZE(attr_dir_stuff));
1579}
1580
1581static struct file_operations proc_attr_dir_operations = {
1582 .read = generic_read_dir,
1583 .readdir = proc_attr_dir_readdir,
1584};
1585
1586static struct dentry *proc_attr_dir_lookup(struct inode *dir,
1587 struct dentry *dentry, struct nameidata *nd)
1588{
1589 return proc_pident_lookup(dir, dentry,
1590 attr_dir_stuff, ARRAY_SIZE(attr_dir_stuff));
1591}
1592
1593static struct inode_operations proc_attr_dir_inode_operations = {
1594 .lookup = proc_attr_dir_lookup,
1595 .getattr = pid_getattr,
1596 .setattr = proc_setattr,
1597};
1598
1644#endif 1599#endif
1645 1600
1646/* SMP-safe */ 1601/*
1647static struct dentry *proc_pident_lookup(struct inode *dir, 1602 * /proc/self:
1648 struct dentry *dentry, 1603 */
1649 struct pid_entry *ents) 1604static int proc_self_readlink(struct dentry *dentry, char __user *buffer,
1605 int buflen)
1606{
1607 char tmp[PROC_NUMBUF];
1608 sprintf(tmp, "%d", current->tgid);
1609 return vfs_readlink(dentry,buffer,buflen,tmp);
1610}
1611
1612static void *proc_self_follow_link(struct dentry *dentry, struct nameidata *nd)
1650{ 1613{
1614 char tmp[PROC_NUMBUF];
1615 sprintf(tmp, "%d", current->tgid);
1616 return ERR_PTR(vfs_follow_link(nd,tmp));
1617}
1618
1619static struct inode_operations proc_self_inode_operations = {
1620 .readlink = proc_self_readlink,
1621 .follow_link = proc_self_follow_link,
1622};
1623
1624/*
1625 * proc base
1626 *
1627 * These are the directory entries in the root directory of /proc
1628 * that properly belong to the /proc filesystem, as they describe
1629 * describe something that is process related.
1630 */
1631static struct pid_entry proc_base_stuff[] = {
1632 NOD("self", S_IFLNK|S_IRWXUGO,
1633 &proc_self_inode_operations, NULL, {}),
1634};
1635
1636/*
1637 * Exceptional case: normally we are not allowed to unhash a busy
1638 * directory. In this case, however, we can do it - no aliasing problems
1639 * due to the way we treat inodes.
1640 */
1641static int proc_base_revalidate(struct dentry *dentry, struct nameidata *nd)
1642{
1643 struct inode *inode = dentry->d_inode;
1644 struct task_struct *task = get_proc_task(inode);
1645 if (task) {
1646 put_task_struct(task);
1647 return 1;
1648 }
1649 d_drop(dentry);
1650 return 0;
1651}
1652
1653static struct dentry_operations proc_base_dentry_operations =
1654{
1655 .d_revalidate = proc_base_revalidate,
1656 .d_delete = pid_delete_dentry,
1657};
1658
1659static struct dentry *proc_base_instantiate(struct inode *dir,
1660 struct dentry *dentry, struct task_struct *task, void *ptr)
1661{
1662 struct pid_entry *p = ptr;
1651 struct inode *inode; 1663 struct inode *inode;
1664 struct proc_inode *ei;
1665 struct dentry *error = ERR_PTR(-EINVAL);
1666
1667 /* Allocate the inode */
1668 error = ERR_PTR(-ENOMEM);
1669 inode = new_inode(dir->i_sb);
1670 if (!inode)
1671 goto out;
1672
1673 /* Initialize the inode */
1674 ei = PROC_I(inode);
1675 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
1676
1677 /*
1678 * grab the reference to the task.
1679 */
1680 ei->pid = get_task_pid(task, PIDTYPE_PID);
1681 if (!ei->pid)
1682 goto out_iput;
1683
1684 inode->i_uid = 0;
1685 inode->i_gid = 0;
1686 inode->i_mode = p->mode;
1687 if (S_ISDIR(inode->i_mode))
1688 inode->i_nlink = 2;
1689 if (S_ISLNK(inode->i_mode))
1690 inode->i_size = 64;
1691 if (p->iop)
1692 inode->i_op = p->iop;
1693 if (p->fop)
1694 inode->i_fop = p->fop;
1695 ei->op = p->op;
1696 dentry->d_op = &proc_base_dentry_operations;
1697 d_add(dentry, inode);
1698 error = NULL;
1699out:
1700 return error;
1701out_iput:
1702 iput(inode);
1703 goto out;
1704}
1705
1706static struct dentry *proc_base_lookup(struct inode *dir, struct dentry *dentry)
1707{
1652 struct dentry *error; 1708 struct dentry *error;
1653 struct task_struct *task = get_proc_task(dir); 1709 struct task_struct *task = get_proc_task(dir);
1654 struct pid_entry *p; 1710 struct pid_entry *p, *last;
1655 struct proc_inode *ei;
1656 1711
1657 error = ERR_PTR(-ENOENT); 1712 error = ERR_PTR(-ENOENT);
1658 inode = NULL;
1659 1713
1660 if (!task) 1714 if (!task)
1661 goto out_no_task; 1715 goto out_no_task;
1662 1716
1663 for (p = ents; p->name; p++) { 1717 /* Lookup the directory entry */
1718 last = &proc_base_stuff[ARRAY_SIZE(proc_base_stuff) - 1];
1719 for (p = proc_base_stuff; p <= last; p++) {
1664 if (p->len != dentry->d_name.len) 1720 if (p->len != dentry->d_name.len)
1665 continue; 1721 continue;
1666 if (!memcmp(dentry->d_name.name, p->name, p->len)) 1722 if (!memcmp(dentry->d_name.name, p->name, p->len))
1667 break; 1723 break;
1668 } 1724 }
1669 if (!p->name) 1725 if (p > last)
1670 goto out; 1726 goto out;
1671 1727
1672 error = ERR_PTR(-EINVAL); 1728 error = proc_base_instantiate(dir, dentry, task, p);
1673 inode = proc_pid_make_inode(dir->i_sb, task, p->type);
1674 if (!inode)
1675 goto out;
1676 1729
1677 ei = PROC_I(inode); 1730out:
1678 inode->i_mode = p->mode; 1731 put_task_struct(task);
1679 /* 1732out_no_task:
1680 * Yes, it does not scale. And it should not. Don't add 1733 return error;
1681 * new entries into /proc/<tgid>/ without very good reasons. 1734}
1682 */ 1735
1683 switch(p->type) { 1736static int proc_base_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
1684 case PROC_TGID_TASK: 1737 struct task_struct *task, struct pid_entry *p)
1685 inode->i_nlink = 2; 1738{
1686 inode->i_op = &proc_task_inode_operations; 1739 return proc_fill_cache(filp, dirent, filldir, p->name, p->len,
1687 inode->i_fop = &proc_task_operations; 1740 proc_base_instantiate, task, p);
1688 break; 1741}
1689 case PROC_TID_FD: 1742
1690 case PROC_TGID_FD: 1743/*
1691 inode->i_nlink = 2; 1744 * Thread groups
1692 inode->i_op = &proc_fd_inode_operations; 1745 */
1693 inode->i_fop = &proc_fd_operations; 1746static struct file_operations proc_task_operations;
1694 break; 1747static struct inode_operations proc_task_inode_operations;
1695 case PROC_TID_EXE: 1748
1696 case PROC_TGID_EXE: 1749static struct pid_entry tgid_base_stuff[] = {
1697 inode->i_op = &proc_pid_link_inode_operations; 1750 DIR("task", S_IRUGO|S_IXUGO, task),
1698 ei->op.proc_get_link = proc_exe_link; 1751 DIR("fd", S_IRUSR|S_IXUSR, fd),
1699 break; 1752 INF("environ", S_IRUSR, pid_environ),
1700 case PROC_TID_CWD: 1753 INF("auxv", S_IRUSR, pid_auxv),
1701 case PROC_TGID_CWD: 1754 INF("status", S_IRUGO, pid_status),
1702 inode->i_op = &proc_pid_link_inode_operations; 1755 INF("cmdline", S_IRUGO, pid_cmdline),
1703 ei->op.proc_get_link = proc_cwd_link; 1756 INF("stat", S_IRUGO, tgid_stat),
1704 break; 1757 INF("statm", S_IRUGO, pid_statm),
1705 case PROC_TID_ROOT: 1758 REG("maps", S_IRUGO, maps),
1706 case PROC_TGID_ROOT:
1707 inode->i_op = &proc_pid_link_inode_operations;
1708 ei->op.proc_get_link = proc_root_link;
1709 break;
1710 case PROC_TID_ENVIRON:
1711 case PROC_TGID_ENVIRON:
1712 inode->i_fop = &proc_info_file_operations;
1713 ei->op.proc_read = proc_pid_environ;
1714 break;
1715 case PROC_TID_AUXV:
1716 case PROC_TGID_AUXV:
1717 inode->i_fop = &proc_info_file_operations;
1718 ei->op.proc_read = proc_pid_auxv;
1719 break;
1720 case PROC_TID_STATUS:
1721 case PROC_TGID_STATUS:
1722 inode->i_fop = &proc_info_file_operations;
1723 ei->op.proc_read = proc_pid_status;
1724 break;
1725 case PROC_TID_STAT:
1726 inode->i_fop = &proc_info_file_operations;
1727 ei->op.proc_read = proc_tid_stat;
1728 break;
1729 case PROC_TGID_STAT:
1730 inode->i_fop = &proc_info_file_operations;
1731 ei->op.proc_read = proc_tgid_stat;
1732 break;
1733 case PROC_TID_CMDLINE:
1734 case PROC_TGID_CMDLINE:
1735 inode->i_fop = &proc_info_file_operations;
1736 ei->op.proc_read = proc_pid_cmdline;
1737 break;
1738 case PROC_TID_STATM:
1739 case PROC_TGID_STATM:
1740 inode->i_fop = &proc_info_file_operations;
1741 ei->op.proc_read = proc_pid_statm;
1742 break;
1743 case PROC_TID_MAPS:
1744 case PROC_TGID_MAPS:
1745 inode->i_fop = &proc_maps_operations;
1746 break;
1747#ifdef CONFIG_NUMA 1759#ifdef CONFIG_NUMA
1748 case PROC_TID_NUMA_MAPS: 1760 REG("numa_maps", S_IRUGO, numa_maps),
1749 case PROC_TGID_NUMA_MAPS:
1750 inode->i_fop = &proc_numa_maps_operations;
1751 break;
1752#endif 1761#endif
1753 case PROC_TID_MEM: 1762 REG("mem", S_IRUSR|S_IWUSR, mem),
1754 case PROC_TGID_MEM:
1755 inode->i_fop = &proc_mem_operations;
1756 break;
1757#ifdef CONFIG_SECCOMP 1763#ifdef CONFIG_SECCOMP
1758 case PROC_TID_SECCOMP: 1764 REG("seccomp", S_IRUSR|S_IWUSR, seccomp),
1759 case PROC_TGID_SECCOMP: 1765#endif
1760 inode->i_fop = &proc_seccomp_operations; 1766 LNK("cwd", cwd),
1761 break; 1767 LNK("root", root),
1762#endif /* CONFIG_SECCOMP */ 1768 LNK("exe", exe),
1763 case PROC_TID_MOUNTS: 1769 REG("mounts", S_IRUGO, mounts),
1764 case PROC_TGID_MOUNTS: 1770 REG("mountstats", S_IRUSR, mountstats),
1765 inode->i_fop = &proc_mounts_operations;
1766 break;
1767#ifdef CONFIG_MMU 1771#ifdef CONFIG_MMU
1768 case PROC_TID_SMAPS: 1772 REG("smaps", S_IRUGO, smaps),
1769 case PROC_TGID_SMAPS:
1770 inode->i_fop = &proc_smaps_operations;
1771 break;
1772#endif 1773#endif
1773 case PROC_TID_MOUNTSTATS:
1774 case PROC_TGID_MOUNTSTATS:
1775 inode->i_fop = &proc_mountstats_operations;
1776 break;
1777#ifdef CONFIG_SECURITY 1774#ifdef CONFIG_SECURITY
1778 case PROC_TID_ATTR: 1775 DIR("attr", S_IRUGO|S_IXUGO, attr_dir),
1779 inode->i_nlink = 2;
1780 inode->i_op = &proc_tid_attr_inode_operations;
1781 inode->i_fop = &proc_tid_attr_operations;
1782 break;
1783 case PROC_TGID_ATTR:
1784 inode->i_nlink = 2;
1785 inode->i_op = &proc_tgid_attr_inode_operations;
1786 inode->i_fop = &proc_tgid_attr_operations;
1787 break;
1788 case PROC_TID_ATTR_CURRENT:
1789 case PROC_TGID_ATTR_CURRENT:
1790 case PROC_TID_ATTR_PREV:
1791 case PROC_TGID_ATTR_PREV:
1792 case PROC_TID_ATTR_EXEC:
1793 case PROC_TGID_ATTR_EXEC:
1794 case PROC_TID_ATTR_FSCREATE:
1795 case PROC_TGID_ATTR_FSCREATE:
1796 case PROC_TID_ATTR_KEYCREATE:
1797 case PROC_TGID_ATTR_KEYCREATE:
1798 case PROC_TID_ATTR_SOCKCREATE:
1799 case PROC_TGID_ATTR_SOCKCREATE:
1800 inode->i_fop = &proc_pid_attr_operations;
1801 break;
1802#endif 1776#endif
1803#ifdef CONFIG_KALLSYMS 1777#ifdef CONFIG_KALLSYMS
1804 case PROC_TID_WCHAN: 1778 INF("wchan", S_IRUGO, pid_wchan),
1805 case PROC_TGID_WCHAN:
1806 inode->i_fop = &proc_info_file_operations;
1807 ei->op.proc_read = proc_pid_wchan;
1808 break;
1809#endif 1779#endif
1810#ifdef CONFIG_SCHEDSTATS 1780#ifdef CONFIG_SCHEDSTATS
1811 case PROC_TID_SCHEDSTAT: 1781 INF("schedstat", S_IRUGO, pid_schedstat),
1812 case PROC_TGID_SCHEDSTAT:
1813 inode->i_fop = &proc_info_file_operations;
1814 ei->op.proc_read = proc_pid_schedstat;
1815 break;
1816#endif 1782#endif
1817#ifdef CONFIG_CPUSETS 1783#ifdef CONFIG_CPUSETS
1818 case PROC_TID_CPUSET: 1784 REG("cpuset", S_IRUGO, cpuset),
1819 case PROC_TGID_CPUSET:
1820 inode->i_fop = &proc_cpuset_operations;
1821 break;
1822#endif 1785#endif
1823 case PROC_TID_OOM_SCORE: 1786 INF("oom_score", S_IRUGO, oom_score),
1824 case PROC_TGID_OOM_SCORE: 1787 REG("oom_adj", S_IRUGO|S_IWUSR, oom_adjust),
1825 inode->i_fop = &proc_info_file_operations;
1826 ei->op.proc_read = proc_oom_score;
1827 break;
1828 case PROC_TID_OOM_ADJUST:
1829 case PROC_TGID_OOM_ADJUST:
1830 inode->i_fop = &proc_oom_adjust_operations;
1831 break;
1832#ifdef CONFIG_AUDITSYSCALL 1788#ifdef CONFIG_AUDITSYSCALL
1833 case PROC_TID_LOGINUID: 1789 REG("loginuid", S_IWUSR|S_IRUGO, loginuid),
1834 case PROC_TGID_LOGINUID:
1835 inode->i_fop = &proc_loginuid_operations;
1836 break;
1837#endif 1790#endif
1838 default:
1839 printk("procfs: impossible type (%d)",p->type);
1840 iput(inode);
1841 error = ERR_PTR(-EINVAL);
1842 goto out;
1843 }
1844 dentry->d_op = &pid_dentry_operations;
1845 d_add(dentry, inode);
1846 /* Close the race of the process dying before we return the dentry */
1847 if (pid_revalidate(dentry, NULL))
1848 error = NULL;
1849out:
1850 put_task_struct(task);
1851out_no_task:
1852 return error;
1853}
1854
1855static struct dentry *proc_tgid_base_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd){
1856 return proc_pident_lookup(dir, dentry, tgid_base_stuff);
1857}
1858
1859static struct dentry *proc_tid_base_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd){
1860 return proc_pident_lookup(dir, dentry, tid_base_stuff);
1861}
1862
1863static struct file_operations proc_tgid_base_operations = {
1864 .read = generic_read_dir,
1865 .readdir = proc_tgid_base_readdir,
1866}; 1791};
1867 1792
1868static struct file_operations proc_tid_base_operations = { 1793static int proc_tgid_base_readdir(struct file * filp,
1869 .read = generic_read_dir,
1870 .readdir = proc_tid_base_readdir,
1871};
1872
1873static struct inode_operations proc_tgid_base_inode_operations = {
1874 .lookup = proc_tgid_base_lookup,
1875 .getattr = pid_getattr,
1876 .setattr = proc_setattr,
1877};
1878
1879static struct inode_operations proc_tid_base_inode_operations = {
1880 .lookup = proc_tid_base_lookup,
1881 .getattr = pid_getattr,
1882 .setattr = proc_setattr,
1883};
1884
1885#ifdef CONFIG_SECURITY
1886static int proc_tgid_attr_readdir(struct file * filp,
1887 void * dirent, filldir_t filldir)
1888{
1889 return proc_pident_readdir(filp,dirent,filldir,
1890 tgid_attr_stuff,ARRAY_SIZE(tgid_attr_stuff));
1891}
1892
1893static int proc_tid_attr_readdir(struct file * filp,
1894 void * dirent, filldir_t filldir) 1794 void * dirent, filldir_t filldir)
1895{ 1795{
1896 return proc_pident_readdir(filp,dirent,filldir, 1796 return proc_pident_readdir(filp,dirent,filldir,
1897 tid_attr_stuff,ARRAY_SIZE(tid_attr_stuff)); 1797 tgid_base_stuff,ARRAY_SIZE(tgid_base_stuff));
1898} 1798}
1899 1799
1900static struct file_operations proc_tgid_attr_operations = { 1800static struct file_operations proc_tgid_base_operations = {
1901 .read = generic_read_dir,
1902 .readdir = proc_tgid_attr_readdir,
1903};
1904
1905static struct file_operations proc_tid_attr_operations = {
1906 .read = generic_read_dir, 1801 .read = generic_read_dir,
1907 .readdir = proc_tid_attr_readdir, 1802 .readdir = proc_tgid_base_readdir,
1908}; 1803};
1909 1804
1910static struct dentry *proc_tgid_attr_lookup(struct inode *dir, 1805static struct dentry *proc_tgid_base_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd){
1911 struct dentry *dentry, struct nameidata *nd) 1806 return proc_pident_lookup(dir, dentry,
1912{ 1807 tgid_base_stuff, ARRAY_SIZE(tgid_base_stuff));
1913 return proc_pident_lookup(dir, dentry, tgid_attr_stuff);
1914}
1915
1916static struct dentry *proc_tid_attr_lookup(struct inode *dir,
1917 struct dentry *dentry, struct nameidata *nd)
1918{
1919 return proc_pident_lookup(dir, dentry, tid_attr_stuff);
1920} 1808}
1921 1809
1922static struct inode_operations proc_tgid_attr_inode_operations = { 1810static struct inode_operations proc_tgid_base_inode_operations = {
1923 .lookup = proc_tgid_attr_lookup, 1811 .lookup = proc_tgid_base_lookup,
1924 .getattr = pid_getattr,
1925 .setattr = proc_setattr,
1926};
1927
1928static struct inode_operations proc_tid_attr_inode_operations = {
1929 .lookup = proc_tid_attr_lookup,
1930 .getattr = pid_getattr, 1812 .getattr = pid_getattr,
1931 .setattr = proc_setattr, 1813 .setattr = proc_setattr,
1932}; 1814};
1933#endif
1934
1935/*
1936 * /proc/self:
1937 */
1938static int proc_self_readlink(struct dentry *dentry, char __user *buffer,
1939 int buflen)
1940{
1941 char tmp[PROC_NUMBUF];
1942 sprintf(tmp, "%d", current->tgid);
1943 return vfs_readlink(dentry,buffer,buflen,tmp);
1944}
1945
1946static void *proc_self_follow_link(struct dentry *dentry, struct nameidata *nd)
1947{
1948 char tmp[PROC_NUMBUF];
1949 sprintf(tmp, "%d", current->tgid);
1950 return ERR_PTR(vfs_follow_link(nd,tmp));
1951}
1952
1953static struct inode_operations proc_self_inode_operations = {
1954 .readlink = proc_self_readlink,
1955 .follow_link = proc_self_follow_link,
1956};
1957 1815
1958/** 1816/**
1959 * proc_flush_task - Remove dcache entries for @task from the /proc dcache. 1817 * proc_flush_task - Remove dcache entries for @task from the /proc dcache.
@@ -2022,54 +1880,23 @@ out:
2022 return; 1880 return;
2023} 1881}
2024 1882
2025/* SMP-safe */ 1883struct dentry *proc_pid_instantiate(struct inode *dir,
2026struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd) 1884 struct dentry * dentry, struct task_struct *task, void *ptr)
2027{ 1885{
2028 struct dentry *result = ERR_PTR(-ENOENT); 1886 struct dentry *error = ERR_PTR(-ENOENT);
2029 struct task_struct *task;
2030 struct inode *inode; 1887 struct inode *inode;
2031 struct proc_inode *ei;
2032 unsigned tgid;
2033
2034 if (dentry->d_name.len == 4 && !memcmp(dentry->d_name.name,"self",4)) {
2035 inode = new_inode(dir->i_sb);
2036 if (!inode)
2037 return ERR_PTR(-ENOMEM);
2038 ei = PROC_I(inode);
2039 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
2040 inode->i_ino = fake_ino(0, PROC_TGID_INO);
2041 ei->pde = NULL;
2042 inode->i_mode = S_IFLNK|S_IRWXUGO;
2043 inode->i_uid = inode->i_gid = 0;
2044 inode->i_size = 64;
2045 inode->i_op = &proc_self_inode_operations;
2046 d_add(dentry, inode);
2047 return NULL;
2048 }
2049 tgid = name_to_int(dentry);
2050 if (tgid == ~0U)
2051 goto out;
2052 1888
2053 rcu_read_lock(); 1889 inode = proc_pid_make_inode(dir->i_sb, task);
2054 task = find_task_by_pid(tgid);
2055 if (task)
2056 get_task_struct(task);
2057 rcu_read_unlock();
2058 if (!task)
2059 goto out;
2060
2061 inode = proc_pid_make_inode(dir->i_sb, task, PROC_TGID_INO);
2062 if (!inode) 1890 if (!inode)
2063 goto out_put_task; 1891 goto out;
2064 1892
2065 inode->i_mode = S_IFDIR|S_IRUGO|S_IXUGO; 1893 inode->i_mode = S_IFDIR|S_IRUGO|S_IXUGO;
2066 inode->i_op = &proc_tgid_base_inode_operations; 1894 inode->i_op = &proc_tgid_base_inode_operations;
2067 inode->i_fop = &proc_tgid_base_operations; 1895 inode->i_fop = &proc_tgid_base_operations;
2068 inode->i_flags|=S_IMMUTABLE; 1896 inode->i_flags|=S_IMMUTABLE;
2069#ifdef CONFIG_SECURITY
2070 inode->i_nlink = 5;
2071#else
2072 inode->i_nlink = 4; 1897 inode->i_nlink = 4;
1898#ifdef CONFIG_SECURITY
1899 inode->i_nlink += 1;
2073#endif 1900#endif
2074 1901
2075 dentry->d_op = &pid_dentry_operations; 1902 dentry->d_op = &pid_dentry_operations;
@@ -2077,179 +1904,251 @@ struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct
2077 d_add(dentry, inode); 1904 d_add(dentry, inode);
2078 /* Close the race of the process dying before we return the dentry */ 1905 /* Close the race of the process dying before we return the dentry */
2079 if (pid_revalidate(dentry, NULL)) 1906 if (pid_revalidate(dentry, NULL))
2080 result = NULL; 1907 error = NULL;
2081
2082out_put_task:
2083 put_task_struct(task);
2084out: 1908out:
2085 return result; 1909 return error;
2086} 1910}
2087 1911
2088/* SMP-safe */ 1912struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd)
2089static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd)
2090{ 1913{
2091 struct dentry *result = ERR_PTR(-ENOENT); 1914 struct dentry *result = ERR_PTR(-ENOENT);
2092 struct task_struct *task; 1915 struct task_struct *task;
2093 struct task_struct *leader = get_proc_task(dir); 1916 unsigned tgid;
2094 struct inode *inode;
2095 unsigned tid;
2096 1917
2097 if (!leader) 1918 result = proc_base_lookup(dir, dentry);
2098 goto out_no_task; 1919 if (!IS_ERR(result) || PTR_ERR(result) != -ENOENT)
1920 goto out;
2099 1921
2100 tid = name_to_int(dentry); 1922 tgid = name_to_int(dentry);
2101 if (tid == ~0U) 1923 if (tgid == ~0U)
2102 goto out; 1924 goto out;
2103 1925
2104 rcu_read_lock(); 1926 rcu_read_lock();
2105 task = find_task_by_pid(tid); 1927 task = find_task_by_pid(tgid);
2106 if (task) 1928 if (task)
2107 get_task_struct(task); 1929 get_task_struct(task);
2108 rcu_read_unlock(); 1930 rcu_read_unlock();
2109 if (!task) 1931 if (!task)
2110 goto out; 1932 goto out;
2111 if (leader->tgid != task->tgid)
2112 goto out_drop_task;
2113
2114 inode = proc_pid_make_inode(dir->i_sb, task, PROC_TID_INO);
2115
2116
2117 if (!inode)
2118 goto out_drop_task;
2119 inode->i_mode = S_IFDIR|S_IRUGO|S_IXUGO;
2120 inode->i_op = &proc_tid_base_inode_operations;
2121 inode->i_fop = &proc_tid_base_operations;
2122 inode->i_flags|=S_IMMUTABLE;
2123#ifdef CONFIG_SECURITY
2124 inode->i_nlink = 4;
2125#else
2126 inode->i_nlink = 3;
2127#endif
2128
2129 dentry->d_op = &pid_dentry_operations;
2130
2131 d_add(dentry, inode);
2132 /* Close the race of the process dying before we return the dentry */
2133 if (pid_revalidate(dentry, NULL))
2134 result = NULL;
2135 1933
2136out_drop_task: 1934 result = proc_pid_instantiate(dir, dentry, task, NULL);
2137 put_task_struct(task); 1935 put_task_struct(task);
2138out: 1936out:
2139 put_task_struct(leader);
2140out_no_task:
2141 return result; 1937 return result;
2142} 1938}
2143 1939
2144/* 1940/*
2145 * Find the first tgid to return to user space. 1941 * Find the first task with tgid >= tgid
2146 *
2147 * Usually this is just whatever follows &init_task, but if the users
2148 * buffer was too small to hold the full list or there was a seek into
2149 * the middle of the directory we have more work to do.
2150 *
2151 * In the case of a short read we start with find_task_by_pid.
2152 * 1942 *
2153 * In the case of a seek we start with &init_task and walk nr
2154 * threads past it.
2155 */ 1943 */
2156static struct task_struct *first_tgid(int tgid, unsigned int nr) 1944static struct task_struct *next_tgid(unsigned int tgid)
2157{ 1945{
2158 struct task_struct *pos; 1946 struct task_struct *task;
2159 rcu_read_lock(); 1947 struct pid *pid;
2160 if (tgid && nr) {
2161 pos = find_task_by_pid(tgid);
2162 if (pos && thread_group_leader(pos))
2163 goto found;
2164 }
2165 /* If nr exceeds the number of processes get out quickly */
2166 pos = NULL;
2167 if (nr && nr >= nr_processes())
2168 goto done;
2169 1948
2170 /* If we haven't found our starting place yet start with 1949 rcu_read_lock();
2171 * the init_task and walk nr tasks forward. 1950retry:
2172 */ 1951 task = NULL;
2173 for (pos = next_task(&init_task); nr > 0; --nr) { 1952 pid = find_ge_pid(tgid);
2174 pos = next_task(pos); 1953 if (pid) {
2175 if (pos == &init_task) { 1954 tgid = pid->nr + 1;
2176 pos = NULL; 1955 task = pid_task(pid, PIDTYPE_PID);
2177 goto done; 1956 /* What we to know is if the pid we have find is the
2178 } 1957 * pid of a thread_group_leader. Testing for task
1958 * being a thread_group_leader is the obvious thing
1959 * todo but there is a window when it fails, due to
1960 * the pid transfer logic in de_thread.
1961 *
1962 * So we perform the straight forward test of seeing
1963 * if the pid we have found is the pid of a thread
1964 * group leader, and don't worry if the task we have
1965 * found doesn't happen to be a thread group leader.
1966 * As we don't care in the case of readdir.
1967 */
1968 if (!task || !has_group_leader_pid(task))
1969 goto retry;
1970 get_task_struct(task);
2179 } 1971 }
2180found:
2181 get_task_struct(pos);
2182done:
2183 rcu_read_unlock(); 1972 rcu_read_unlock();
2184 return pos; 1973 return task;
2185} 1974}
2186 1975
2187/* 1976#define TGID_OFFSET (FIRST_PROCESS_ENTRY + ARRAY_SIZE(proc_base_stuff))
2188 * Find the next task in the task list. 1977
2189 * Return NULL if we loop or there is any error. 1978static int proc_pid_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
2190 * 1979 struct task_struct *task, int tgid)
2191 * The reference to the input task_struct is released.
2192 */
2193static struct task_struct *next_tgid(struct task_struct *start)
2194{ 1980{
2195 struct task_struct *pos; 1981 char name[PROC_NUMBUF];
2196 rcu_read_lock(); 1982 int len = snprintf(name, sizeof(name), "%d", tgid);
2197 pos = start; 1983 return proc_fill_cache(filp, dirent, filldir, name, len,
2198 if (pid_alive(start)) 1984 proc_pid_instantiate, task, NULL);
2199 pos = next_task(start);
2200 if (pid_alive(pos) && (pos != &init_task)) {
2201 get_task_struct(pos);
2202 goto done;
2203 }
2204 pos = NULL;
2205done:
2206 rcu_read_unlock();
2207 put_task_struct(start);
2208 return pos;
2209} 1985}
2210 1986
2211/* for the /proc/ directory itself, after non-process stuff has been done */ 1987/* for the /proc/ directory itself, after non-process stuff has been done */
2212int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir) 1988int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir)
2213{ 1989{
2214 char buf[PROC_NUMBUF];
2215 unsigned int nr = filp->f_pos - FIRST_PROCESS_ENTRY; 1990 unsigned int nr = filp->f_pos - FIRST_PROCESS_ENTRY;
1991 struct task_struct *reaper = get_proc_task(filp->f_dentry->d_inode);
2216 struct task_struct *task; 1992 struct task_struct *task;
2217 int tgid; 1993 int tgid;
2218 1994
2219 if (!nr) { 1995 if (!reaper)
2220 ino_t ino = fake_ino(0,PROC_TGID_INO); 1996 goto out_no_task;
2221 if (filldir(dirent, "self", 4, filp->f_pos, ino, DT_LNK) < 0) 1997
2222 return 0; 1998 for (; nr < ARRAY_SIZE(proc_base_stuff); filp->f_pos++, nr++) {
2223 filp->f_pos++; 1999 struct pid_entry *p = &proc_base_stuff[nr];
2224 nr++; 2000 if (proc_base_fill_cache(filp, dirent, filldir, reaper, p) < 0)
2001 goto out;
2225 } 2002 }
2226 nr -= 1;
2227 2003
2228 /* f_version caches the tgid value that the last readdir call couldn't 2004 tgid = filp->f_pos - TGID_OFFSET;
2229 * return. lseek aka telldir automagically resets f_version to 0. 2005 for (task = next_tgid(tgid);
2230 */
2231 tgid = filp->f_version;
2232 filp->f_version = 0;
2233 for (task = first_tgid(tgid, nr);
2234 task; 2006 task;
2235 task = next_tgid(task), filp->f_pos++) { 2007 put_task_struct(task), task = next_tgid(tgid + 1)) {
2236 int len;
2237 ino_t ino;
2238 tgid = task->pid; 2008 tgid = task->pid;
2239 len = snprintf(buf, sizeof(buf), "%d", tgid); 2009 filp->f_pos = tgid + TGID_OFFSET;
2240 ino = fake_ino(tgid, PROC_TGID_INO); 2010 if (proc_pid_fill_cache(filp, dirent, filldir, task, tgid) < 0) {
2241 if (filldir(dirent, buf, len, filp->f_pos, ino, DT_DIR) < 0) {
2242 /* returning this tgid failed, save it as the first
2243 * pid for the next readir call */
2244 filp->f_version = tgid;
2245 put_task_struct(task); 2011 put_task_struct(task);
2246 break; 2012 goto out;
2247 } 2013 }
2248 } 2014 }
2015 filp->f_pos = PID_MAX_LIMIT + TGID_OFFSET;
2016out:
2017 put_task_struct(reaper);
2018out_no_task:
2249 return 0; 2019 return 0;
2250} 2020}
2251 2021
2252/* 2022/*
2023 * Tasks
2024 */
2025static struct pid_entry tid_base_stuff[] = {
2026 DIR("fd", S_IRUSR|S_IXUSR, fd),
2027 INF("environ", S_IRUSR, pid_environ),
2028 INF("auxv", S_IRUSR, pid_auxv),
2029 INF("status", S_IRUGO, pid_status),
2030 INF("cmdline", S_IRUGO, pid_cmdline),
2031 INF("stat", S_IRUGO, tid_stat),
2032 INF("statm", S_IRUGO, pid_statm),
2033 REG("maps", S_IRUGO, maps),
2034#ifdef CONFIG_NUMA
2035 REG("numa_maps", S_IRUGO, numa_maps),
2036#endif
2037 REG("mem", S_IRUSR|S_IWUSR, mem),
2038#ifdef CONFIG_SECCOMP
2039 REG("seccomp", S_IRUSR|S_IWUSR, seccomp),
2040#endif
2041 LNK("cwd", cwd),
2042 LNK("root", root),
2043 LNK("exe", exe),
2044 REG("mounts", S_IRUGO, mounts),
2045#ifdef CONFIG_MMU
2046 REG("smaps", S_IRUGO, smaps),
2047#endif
2048#ifdef CONFIG_SECURITY
2049 DIR("attr", S_IRUGO|S_IXUGO, attr_dir),
2050#endif
2051#ifdef CONFIG_KALLSYMS
2052 INF("wchan", S_IRUGO, pid_wchan),
2053#endif
2054#ifdef CONFIG_SCHEDSTATS
2055 INF("schedstat", S_IRUGO, pid_schedstat),
2056#endif
2057#ifdef CONFIG_CPUSETS
2058 REG("cpuset", S_IRUGO, cpuset),
2059#endif
2060 INF("oom_score", S_IRUGO, oom_score),
2061 REG("oom_adj", S_IRUGO|S_IWUSR, oom_adjust),
2062#ifdef CONFIG_AUDITSYSCALL
2063 REG("loginuid", S_IWUSR|S_IRUGO, loginuid),
2064#endif
2065};
2066
2067static int proc_tid_base_readdir(struct file * filp,
2068 void * dirent, filldir_t filldir)
2069{
2070 return proc_pident_readdir(filp,dirent,filldir,
2071 tid_base_stuff,ARRAY_SIZE(tid_base_stuff));
2072}
2073
2074static struct dentry *proc_tid_base_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd){
2075 return proc_pident_lookup(dir, dentry,
2076 tid_base_stuff, ARRAY_SIZE(tid_base_stuff));
2077}
2078
2079static struct file_operations proc_tid_base_operations = {
2080 .read = generic_read_dir,
2081 .readdir = proc_tid_base_readdir,
2082};
2083
2084static struct inode_operations proc_tid_base_inode_operations = {
2085 .lookup = proc_tid_base_lookup,
2086 .getattr = pid_getattr,
2087 .setattr = proc_setattr,
2088};
2089
2090static struct dentry *proc_task_instantiate(struct inode *dir,
2091 struct dentry *dentry, struct task_struct *task, void *ptr)
2092{
2093 struct dentry *error = ERR_PTR(-ENOENT);
2094 struct inode *inode;
2095 inode = proc_pid_make_inode(dir->i_sb, task);
2096
2097 if (!inode)
2098 goto out;
2099 inode->i_mode = S_IFDIR|S_IRUGO|S_IXUGO;
2100 inode->i_op = &proc_tid_base_inode_operations;
2101 inode->i_fop = &proc_tid_base_operations;
2102 inode->i_flags|=S_IMMUTABLE;
2103 inode->i_nlink = 3;
2104#ifdef CONFIG_SECURITY
2105 inode->i_nlink += 1;
2106#endif
2107
2108 dentry->d_op = &pid_dentry_operations;
2109
2110 d_add(dentry, inode);
2111 /* Close the race of the process dying before we return the dentry */
2112 if (pid_revalidate(dentry, NULL))
2113 error = NULL;
2114out:
2115 return error;
2116}
2117
2118static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd)
2119{
2120 struct dentry *result = ERR_PTR(-ENOENT);
2121 struct task_struct *task;
2122 struct task_struct *leader = get_proc_task(dir);
2123 unsigned tid;
2124
2125 if (!leader)
2126 goto out_no_task;
2127
2128 tid = name_to_int(dentry);
2129 if (tid == ~0U)
2130 goto out;
2131
2132 rcu_read_lock();
2133 task = find_task_by_pid(tid);
2134 if (task)
2135 get_task_struct(task);
2136 rcu_read_unlock();
2137 if (!task)
2138 goto out;
2139 if (leader->tgid != task->tgid)
2140 goto out_drop_task;
2141
2142 result = proc_task_instantiate(dir, dentry, task, NULL);
2143out_drop_task:
2144 put_task_struct(task);
2145out:
2146 put_task_struct(leader);
2147out_no_task:
2148 return result;
2149}
2150
2151/*
2253 * Find the first tid of a thread group to return to user space. 2152 * Find the first tid of a thread group to return to user space.
2254 * 2153 *
2255 * Usually this is just the thread group leader, but if the users 2154 * Usually this is just the thread group leader, but if the users
@@ -2318,10 +2217,18 @@ static struct task_struct *next_tid(struct task_struct *start)
2318 return pos; 2217 return pos;
2319} 2218}
2320 2219
2220static int proc_task_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
2221 struct task_struct *task, int tid)
2222{
2223 char name[PROC_NUMBUF];
2224 int len = snprintf(name, sizeof(name), "%d", tid);
2225 return proc_fill_cache(filp, dirent, filldir, name, len,
2226 proc_task_instantiate, task, NULL);
2227}
2228
2321/* for the /proc/TGID/task/ directories */ 2229/* for the /proc/TGID/task/ directories */
2322static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldir) 2230static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldir)
2323{ 2231{
2324 char buf[PROC_NUMBUF];
2325 struct dentry *dentry = filp->f_dentry; 2232 struct dentry *dentry = filp->f_dentry;
2326 struct inode *inode = dentry->d_inode; 2233 struct inode *inode = dentry->d_inode;
2327 struct task_struct *leader = get_proc_task(inode); 2234 struct task_struct *leader = get_proc_task(inode);
@@ -2358,11 +2265,8 @@ static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldi
2358 for (task = first_tid(leader, tid, pos - 2); 2265 for (task = first_tid(leader, tid, pos - 2);
2359 task; 2266 task;
2360 task = next_tid(task), pos++) { 2267 task = next_tid(task), pos++) {
2361 int len;
2362 tid = task->pid; 2268 tid = task->pid;
2363 len = snprintf(buf, sizeof(buf), "%d", tid); 2269 if (proc_task_fill_cache(filp, dirent, filldir, task, tid) < 0) {
2364 ino = fake_ino(tid, PROC_TID_INO);
2365 if (filldir(dirent, buf, len, pos, ino, DT_DIR < 0)) {
2366 /* returning this tgid failed, save it as the first 2270 /* returning this tgid failed, save it as the first
2367 * pid for the next readir call */ 2271 * pid for the next readir call */
2368 filp->f_version = tid; 2272 filp->f_version = tid;
@@ -2392,3 +2296,14 @@ static int proc_task_getattr(struct vfsmount *mnt, struct dentry *dentry, struct
2392 2296
2393 return 0; 2297 return 0;
2394} 2298}
2299
2300static struct inode_operations proc_task_inode_operations = {
2301 .lookup = proc_task_lookup,
2302 .getattr = proc_task_getattr,
2303 .setattr = proc_setattr,
2304};
2305
2306static struct file_operations proc_task_operations = {
2307 .read = generic_read_dir,
2308 .readdir = proc_task_readdir,
2309};
diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index 66bc425f2f3d..8d88e58ed5cc 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -45,6 +45,7 @@
45#include <linux/sysrq.h> 45#include <linux/sysrq.h>
46#include <linux/vmalloc.h> 46#include <linux/vmalloc.h>
47#include <linux/crash_dump.h> 47#include <linux/crash_dump.h>
48#include <linux/pspace.h>
48#include <asm/uaccess.h> 49#include <asm/uaccess.h>
49#include <asm/pgtable.h> 50#include <asm/pgtable.h>
50#include <asm/io.h> 51#include <asm/io.h>
@@ -91,7 +92,7 @@ static int loadavg_read_proc(char *page, char **start, off_t off,
91 LOAD_INT(a), LOAD_FRAC(a), 92 LOAD_INT(a), LOAD_FRAC(a),
92 LOAD_INT(b), LOAD_FRAC(b), 93 LOAD_INT(b), LOAD_FRAC(b),
93 LOAD_INT(c), LOAD_FRAC(c), 94 LOAD_INT(c), LOAD_FRAC(c),
94 nr_running(), nr_threads, last_pid); 95 nr_running(), nr_threads, init_pspace.last_pid);
95 return proc_calc_metrics(page, start, off, count, eof, len); 96 return proc_calc_metrics(page, start, off, count, eof, len);
96} 97}
97 98
diff --git a/fs/proc/root.c b/fs/proc/root.c
index 8901c65caca8..ffe66c38488b 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -16,6 +16,7 @@
16#include <linux/module.h> 16#include <linux/module.h>
17#include <linux/bitops.h> 17#include <linux/bitops.h>
18#include <linux/smp_lock.h> 18#include <linux/smp_lock.h>
19#include <linux/mount.h>
19 20
20#include "internal.h" 21#include "internal.h"
21 22
@@ -28,6 +29,17 @@ struct proc_dir_entry *proc_sys_root;
28static int proc_get_sb(struct file_system_type *fs_type, 29static int proc_get_sb(struct file_system_type *fs_type,
29 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 30 int flags, const char *dev_name, void *data, struct vfsmount *mnt)
30{ 31{
32 if (proc_mnt) {
33 /* Seed the root directory with a pid so it doesn't need
34 * to be special in base.c. I would do this earlier but
35 * the only task alive when /proc is mounted the first time
36 * is the init_task and it doesn't have any pids.
37 */
38 struct proc_inode *ei;
39 ei = PROC_I(proc_mnt->mnt_sb->s_root->d_inode);
40 if (!ei->pid)
41 ei->pid = find_get_pid(1);
42 }
31 return get_sb_single(fs_type, flags, data, proc_fill_super, mnt); 43 return get_sb_single(fs_type, flags, data, proc_fill_super, mnt);
32} 44}
33 45
diff --git a/fs/readdir.c b/fs/readdir.c
index b6109329b607..bff3ee58e2f8 100644
--- a/fs/readdir.c
+++ b/fs/readdir.c
@@ -69,20 +69,24 @@ struct readdir_callback {
69}; 69};
70 70
71static int fillonedir(void * __buf, const char * name, int namlen, loff_t offset, 71static int fillonedir(void * __buf, const char * name, int namlen, loff_t offset,
72 ino_t ino, unsigned int d_type) 72 u64 ino, unsigned int d_type)
73{ 73{
74 struct readdir_callback * buf = (struct readdir_callback *) __buf; 74 struct readdir_callback * buf = (struct readdir_callback *) __buf;
75 struct old_linux_dirent __user * dirent; 75 struct old_linux_dirent __user * dirent;
76 unsigned long d_ino;
76 77
77 if (buf->result) 78 if (buf->result)
78 return -EINVAL; 79 return -EINVAL;
80 d_ino = ino;
81 if (sizeof(d_ino) < sizeof(ino) && d_ino != ino)
82 return -EOVERFLOW;
79 buf->result++; 83 buf->result++;
80 dirent = buf->dirent; 84 dirent = buf->dirent;
81 if (!access_ok(VERIFY_WRITE, dirent, 85 if (!access_ok(VERIFY_WRITE, dirent,
82 (unsigned long)(dirent->d_name + namlen + 1) - 86 (unsigned long)(dirent->d_name + namlen + 1) -
83 (unsigned long)dirent)) 87 (unsigned long)dirent))
84 goto efault; 88 goto efault;
85 if ( __put_user(ino, &dirent->d_ino) || 89 if ( __put_user(d_ino, &dirent->d_ino) ||
86 __put_user(offset, &dirent->d_offset) || 90 __put_user(offset, &dirent->d_offset) ||
87 __put_user(namlen, &dirent->d_namlen) || 91 __put_user(namlen, &dirent->d_namlen) ||
88 __copy_to_user(dirent->d_name, name, namlen) || 92 __copy_to_user(dirent->d_name, name, namlen) ||
@@ -138,22 +142,26 @@ struct getdents_callback {
138}; 142};
139 143
140static int filldir(void * __buf, const char * name, int namlen, loff_t offset, 144static int filldir(void * __buf, const char * name, int namlen, loff_t offset,
141 ino_t ino, unsigned int d_type) 145 u64 ino, unsigned int d_type)
142{ 146{
143 struct linux_dirent __user * dirent; 147 struct linux_dirent __user * dirent;
144 struct getdents_callback * buf = (struct getdents_callback *) __buf; 148 struct getdents_callback * buf = (struct getdents_callback *) __buf;
149 unsigned long d_ino;
145 int reclen = ROUND_UP(NAME_OFFSET(dirent) + namlen + 2); 150 int reclen = ROUND_UP(NAME_OFFSET(dirent) + namlen + 2);
146 151
147 buf->error = -EINVAL; /* only used if we fail.. */ 152 buf->error = -EINVAL; /* only used if we fail.. */
148 if (reclen > buf->count) 153 if (reclen > buf->count)
149 return -EINVAL; 154 return -EINVAL;
155 d_ino = ino;
156 if (sizeof(d_ino) < sizeof(ino) && d_ino != ino)
157 return -EOVERFLOW;
150 dirent = buf->previous; 158 dirent = buf->previous;
151 if (dirent) { 159 if (dirent) {
152 if (__put_user(offset, &dirent->d_off)) 160 if (__put_user(offset, &dirent->d_off))
153 goto efault; 161 goto efault;
154 } 162 }
155 dirent = buf->current_dir; 163 dirent = buf->current_dir;
156 if (__put_user(ino, &dirent->d_ino)) 164 if (__put_user(d_ino, &dirent->d_ino))
157 goto efault; 165 goto efault;
158 if (__put_user(reclen, &dirent->d_reclen)) 166 if (__put_user(reclen, &dirent->d_reclen))
159 goto efault; 167 goto efault;
@@ -222,7 +230,7 @@ struct getdents_callback64 {
222}; 230};
223 231
224static int filldir64(void * __buf, const char * name, int namlen, loff_t offset, 232static int filldir64(void * __buf, const char * name, int namlen, loff_t offset,
225 ino_t ino, unsigned int d_type) 233 u64 ino, unsigned int d_type)
226{ 234{
227 struct linux_dirent64 __user *dirent; 235 struct linux_dirent64 __user *dirent;
228 struct getdents_callback64 * buf = (struct getdents_callback64 *) __buf; 236 struct getdents_callback64 * buf = (struct getdents_callback64 *) __buf;
diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c
index 41f24369e47a..c093642fb983 100644
--- a/fs/reiserfs/file.c
+++ b/fs/reiserfs/file.c
@@ -38,8 +38,7 @@ static int reiserfs_file_release(struct inode *inode, struct file *filp)
38 int err; 38 int err;
39 int jbegin_failure = 0; 39 int jbegin_failure = 0;
40 40
41 if (!S_ISREG(inode->i_mode)) 41 BUG_ON(!S_ISREG(inode->i_mode));
42 BUG();
43 42
44 /* fast out for when nothing needs to be done */ 43 /* fast out for when nothing needs to be done */
45 if ((atomic_read(&inode->i_count) > 1 || 44 if ((atomic_read(&inode->i_count) > 1 ||
@@ -125,8 +124,7 @@ static int reiserfs_sync_file(struct file *p_s_filp,
125 int n_err; 124 int n_err;
126 int barrier_done; 125 int barrier_done;
127 126
128 if (!S_ISREG(p_s_inode->i_mode)) 127 BUG_ON(!S_ISREG(p_s_inode->i_mode));
129 BUG();
130 n_err = sync_mapping_buffers(p_s_inode->i_mapping); 128 n_err = sync_mapping_buffers(p_s_inode->i_mapping);
131 reiserfs_write_lock(p_s_inode->i_sb); 129 reiserfs_write_lock(p_s_inode->i_sb);
132 barrier_done = reiserfs_commit_for_inode(p_s_inode); 130 barrier_done = reiserfs_commit_for_inode(p_s_inode);
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 7e5a2f5ebeb0..9c69bcacad22 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -1780,7 +1780,7 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
1780 err = -EDQUOT; 1780 err = -EDQUOT;
1781 goto out_end_trans; 1781 goto out_end_trans;
1782 } 1782 }
1783 if (!dir || !dir->i_nlink) { 1783 if (!dir->i_nlink) {
1784 err = -EPERM; 1784 err = -EPERM;
1785 goto out_bad_inode; 1785 goto out_bad_inode;
1786 } 1786 }
diff --git a/fs/reiserfs/item_ops.c b/fs/reiserfs/item_ops.c
index 7a88adbceef6..b9b423b22a8b 100644
--- a/fs/reiserfs/item_ops.c
+++ b/fs/reiserfs/item_ops.c
@@ -75,8 +75,7 @@ static int sd_create_vi(struct virtual_node *vn,
75static int sd_check_left(struct virtual_item *vi, int free, 75static int sd_check_left(struct virtual_item *vi, int free,
76 int start_skip, int end_skip) 76 int start_skip, int end_skip)
77{ 77{
78 if (start_skip || end_skip) 78 BUG_ON(start_skip || end_skip);
79 BUG();
80 return -1; 79 return -1;
81} 80}
82 81
@@ -87,8 +86,7 @@ static int sd_check_right(struct virtual_item *vi, int free)
87 86
88static int sd_part_size(struct virtual_item *vi, int first, int count) 87static int sd_part_size(struct virtual_item *vi, int first, int count)
89{ 88{
90 if (count) 89 BUG_ON(count);
91 BUG();
92 return 0; 90 return 0;
93} 91}
94 92
@@ -476,8 +474,7 @@ static int direntry_create_vi(struct virtual_node *vn,
476 474
477 vi->vi_index = TYPE_DIRENTRY; 475 vi->vi_index = TYPE_DIRENTRY;
478 476
479 if (!(vi->vi_ih) || !vi->vi_item) 477 BUG_ON(!(vi->vi_ih) || !vi->vi_item);
480 BUG();
481 478
482 dir_u->flags = 0; 479 dir_u->flags = 0;
483 if (le_ih_k_offset(vi->vi_ih) == DOT_OFFSET) 480 if (le_ih_k_offset(vi->vi_ih) == DOT_OFFSET)
@@ -575,8 +572,7 @@ static int direntry_check_right(struct virtual_item *vi, int free)
575 free -= dir_u->entry_sizes[i]; 572 free -= dir_u->entry_sizes[i];
576 entries++; 573 entries++;
577 } 574 }
578 if (entries == dir_u->entry_count) 575 BUG_ON(entries == dir_u->entry_count);
579 BUG();
580 576
581 /* "." and ".." can not be separated from each other */ 577 /* "." and ".." can not be separated from each other */
582 if ((dir_u->flags & DIRENTRY_VI_FIRST_DIRENTRY_ITEM) 578 if ((dir_u->flags & DIRENTRY_VI_FIRST_DIRENTRY_ITEM)
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index e6b5ccf23f15..ad8cbc49883a 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -718,8 +718,7 @@ static int add_to_chunk(struct buffer_chunk *chunk, struct buffer_head *bh,
718 spinlock_t * lock, void (fn) (struct buffer_chunk *)) 718 spinlock_t * lock, void (fn) (struct buffer_chunk *))
719{ 719{
720 int ret = 0; 720 int ret = 0;
721 if (chunk->nr >= CHUNK_SIZE) 721 BUG_ON(chunk->nr >= CHUNK_SIZE);
722 BUG();
723 chunk->bh[chunk->nr++] = bh; 722 chunk->bh[chunk->nr++] = bh;
724 if (chunk->nr >= CHUNK_SIZE) { 723 if (chunk->nr >= CHUNK_SIZE) {
725 ret = 1; 724 ret = 1;
@@ -788,8 +787,7 @@ static inline int __add_jh(struct reiserfs_journal *j, struct buffer_head *bh,
788 /* buffer must be locked for __add_jh, should be able to have 787 /* buffer must be locked for __add_jh, should be able to have
789 * two adds at the same time 788 * two adds at the same time
790 */ 789 */
791 if (bh->b_private) 790 BUG_ON(bh->b_private);
792 BUG();
793 jh->bh = bh; 791 jh->bh = bh;
794 bh->b_private = jh; 792 bh->b_private = jh;
795 } 793 }
@@ -2967,8 +2965,7 @@ static int do_journal_begin_r(struct reiserfs_transaction_handle *th,
2967 int retval; 2965 int retval;
2968 2966
2969 reiserfs_check_lock_depth(p_s_sb, "journal_begin"); 2967 reiserfs_check_lock_depth(p_s_sb, "journal_begin");
2970 if (nblocks > journal->j_trans_max) 2968 BUG_ON(nblocks > journal->j_trans_max);
2971 BUG();
2972 2969
2973 PROC_INFO_INC(p_s_sb, journal.journal_being); 2970 PROC_INFO_INC(p_s_sb, journal.journal_being);
2974 /* set here for journal_join */ 2971 /* set here for journal_join */
@@ -3084,9 +3081,8 @@ struct reiserfs_transaction_handle *reiserfs_persistent_transaction(struct
3084 if (reiserfs_transaction_running(s)) { 3081 if (reiserfs_transaction_running(s)) {
3085 th = current->journal_info; 3082 th = current->journal_info;
3086 th->t_refcount++; 3083 th->t_refcount++;
3087 if (th->t_refcount < 2) { 3084 BUG_ON(th->t_refcount < 2);
3088 BUG(); 3085
3089 }
3090 return th; 3086 return th;
3091 } 3087 }
3092 th = kmalloc(sizeof(struct reiserfs_transaction_handle), GFP_NOFS); 3088 th = kmalloc(sizeof(struct reiserfs_transaction_handle), GFP_NOFS);
@@ -3126,9 +3122,7 @@ static int journal_join(struct reiserfs_transaction_handle *th,
3126 ** pointer 3122 ** pointer
3127 */ 3123 */
3128 th->t_handle_save = cur_th; 3124 th->t_handle_save = cur_th;
3129 if (cur_th && cur_th->t_refcount > 1) { 3125 BUG_ON(cur_th && cur_th->t_refcount > 1);
3130 BUG();
3131 }
3132 return do_journal_begin_r(th, p_s_sb, nblocks, JBEGIN_JOIN); 3126 return do_journal_begin_r(th, p_s_sb, nblocks, JBEGIN_JOIN);
3133} 3127}
3134 3128
@@ -3141,9 +3135,7 @@ int journal_join_abort(struct reiserfs_transaction_handle *th,
3141 ** pointer 3135 ** pointer
3142 */ 3136 */
3143 th->t_handle_save = cur_th; 3137 th->t_handle_save = cur_th;
3144 if (cur_th && cur_th->t_refcount > 1) { 3138 BUG_ON(cur_th && cur_th->t_refcount > 1);
3145 BUG();
3146 }
3147 return do_journal_begin_r(th, p_s_sb, nblocks, JBEGIN_ABORT); 3139 return do_journal_begin_r(th, p_s_sb, nblocks, JBEGIN_ABORT);
3148} 3140}
3149 3141
@@ -3178,8 +3170,7 @@ int journal_begin(struct reiserfs_transaction_handle *th,
3178 current->journal_info = th; 3170 current->journal_info = th;
3179 } 3171 }
3180 ret = do_journal_begin_r(th, p_s_sb, nblocks, JBEGIN_REG); 3172 ret = do_journal_begin_r(th, p_s_sb, nblocks, JBEGIN_REG);
3181 if (current->journal_info != th) 3173 BUG_ON(current->journal_info != th);
3182 BUG();
3183 3174
3184 /* I guess this boils down to being the reciprocal of clm-2100 above. 3175 /* I guess this boils down to being the reciprocal of clm-2100 above.
3185 * If do_journal_begin_r fails, we need to put it back, since journal_end 3176 * If do_journal_begin_r fails, we need to put it back, since journal_end
@@ -3324,8 +3315,7 @@ int journal_end(struct reiserfs_transaction_handle *th,
3324 /* we aren't allowed to close a nested transaction on a different 3315 /* we aren't allowed to close a nested transaction on a different
3325 ** filesystem from the one in the task struct 3316 ** filesystem from the one in the task struct
3326 */ 3317 */
3327 if (cur_th->t_super != th->t_super) 3318 BUG_ON(cur_th->t_super != th->t_super);
3328 BUG();
3329 3319
3330 if (th != cur_th) { 3320 if (th != cur_th) {
3331 memcpy(current->journal_info, th, sizeof(*th)); 3321 memcpy(current->journal_info, th, sizeof(*th));
@@ -3444,9 +3434,7 @@ int journal_end_sync(struct reiserfs_transaction_handle *th,
3444 3434
3445 BUG_ON(!th->t_trans_id); 3435 BUG_ON(!th->t_trans_id);
3446 /* you can sync while nested, very, very bad */ 3436 /* you can sync while nested, very, very bad */
3447 if (th->t_refcount > 1) { 3437 BUG_ON(th->t_refcount > 1);
3448 BUG();
3449 }
3450 if (journal->j_len == 0) { 3438 if (journal->j_len == 0) {
3451 reiserfs_prepare_for_journal(p_s_sb, SB_BUFFER_WITH_SB(p_s_sb), 3439 reiserfs_prepare_for_journal(p_s_sb, SB_BUFFER_WITH_SB(p_s_sb),
3452 1); 3440 1);
@@ -3556,9 +3544,8 @@ static int check_journal_end(struct reiserfs_transaction_handle *th,
3556 ** will be dealt with by next transaction that actually writes something, but should be taken 3544 ** will be dealt with by next transaction that actually writes something, but should be taken
3557 ** care of in this trans 3545 ** care of in this trans
3558 */ 3546 */
3559 if (journal->j_len == 0) { 3547 BUG_ON(journal->j_len == 0);
3560 BUG(); 3548
3561 }
3562 /* if wcount > 0, and we are called to with flush or commit_now, 3549 /* if wcount > 0, and we are called to with flush or commit_now,
3563 ** we wait on j_join_wait. We will wake up when the last writer has 3550 ** we wait on j_join_wait. We will wake up when the last writer has
3564 ** finished the transaction, and started it on its way to the disk. 3551 ** finished the transaction, and started it on its way to the disk.
@@ -3592,9 +3579,8 @@ static int check_journal_end(struct reiserfs_transaction_handle *th,
3592 unlock_journal(p_s_sb); 3579 unlock_journal(p_s_sb);
3593 } 3580 }
3594 } 3581 }
3595 if (journal->j_trans_id == trans_id) { 3582 BUG_ON(journal->j_trans_id == trans_id);
3596 BUG(); 3583
3597 }
3598 if (commit_now 3584 if (commit_now
3599 && journal_list_still_alive(p_s_sb, trans_id) 3585 && journal_list_still_alive(p_s_sb, trans_id)
3600 && wait_on_commit) { 3586 && wait_on_commit) {
@@ -4074,9 +4060,7 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
4074 set_commit_trans_len(commit, journal->j_len); 4060 set_commit_trans_len(commit, journal->j_len);
4075 4061
4076 /* special check in case all buffers in the journal were marked for not logging */ 4062 /* special check in case all buffers in the journal were marked for not logging */
4077 if (journal->j_len == 0) { 4063 BUG_ON(journal->j_len == 0);
4078 BUG();
4079 }
4080 4064
4081 /* we're about to dirty all the log blocks, mark the description block 4065 /* we're about to dirty all the log blocks, mark the description block
4082 * dirty now too. Don't mark the commit block dirty until all the 4066 * dirty now too. Don't mark the commit block dirty until all the
@@ -4173,8 +4157,7 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
4173 journal, jl, &jl->j_tail_bh_list); 4157 journal, jl, &jl->j_tail_bh_list);
4174 lock_kernel(); 4158 lock_kernel();
4175 } 4159 }
4176 if (!list_empty(&jl->j_tail_bh_list)) 4160 BUG_ON(!list_empty(&jl->j_tail_bh_list));
4177 BUG();
4178 up(&jl->j_commit_lock); 4161 up(&jl->j_commit_lock);
4179 4162
4180 /* honor the flush wishes from the caller, simple commits can 4163 /* honor the flush wishes from the caller, simple commits can
diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c
index 16e9cff8f15d..abde1edc2235 100644
--- a/fs/reiserfs/namei.c
+++ b/fs/reiserfs/namei.c
@@ -67,8 +67,7 @@ inline void set_de_name_and_namelen(struct reiserfs_dir_entry *de)
67{ 67{
68 struct reiserfs_de_head *deh = de->de_deh + de->de_entry_num; 68 struct reiserfs_de_head *deh = de->de_deh + de->de_entry_num;
69 69
70 if (de->de_entry_num >= ih_entry_count(de->de_ih)) 70 BUG_ON(de->de_entry_num >= ih_entry_count(de->de_ih));
71 BUG();
72 71
73 de->de_entrylen = entry_length(de->de_bh, de->de_ih, de->de_entry_num); 72 de->de_entrylen = entry_length(de->de_bh, de->de_ih, de->de_entry_num);
74 de->de_namelen = de->de_entrylen - (de_with_sd(deh) ? SD_SIZE : 0); 73 de->de_namelen = de->de_entrylen - (de_with_sd(deh) ? SD_SIZE : 0);
@@ -80,8 +79,7 @@ inline void set_de_name_and_namelen(struct reiserfs_dir_entry *de)
80// what entry points to 79// what entry points to
81static inline void set_de_object_key(struct reiserfs_dir_entry *de) 80static inline void set_de_object_key(struct reiserfs_dir_entry *de)
82{ 81{
83 if (de->de_entry_num >= ih_entry_count(de->de_ih)) 82 BUG_ON(de->de_entry_num >= ih_entry_count(de->de_ih));
84 BUG();
85 de->de_dir_id = deh_dir_id(&(de->de_deh[de->de_entry_num])); 83 de->de_dir_id = deh_dir_id(&(de->de_deh[de->de_entry_num]));
86 de->de_objectid = deh_objectid(&(de->de_deh[de->de_entry_num])); 84 de->de_objectid = deh_objectid(&(de->de_deh[de->de_entry_num]));
87} 85}
@@ -90,8 +88,7 @@ static inline void store_de_entry_key(struct reiserfs_dir_entry *de)
90{ 88{
91 struct reiserfs_de_head *deh = de->de_deh + de->de_entry_num; 89 struct reiserfs_de_head *deh = de->de_deh + de->de_entry_num;
92 90
93 if (de->de_entry_num >= ih_entry_count(de->de_ih)) 91 BUG_ON(de->de_entry_num >= ih_entry_count(de->de_ih));
94 BUG();
95 92
96 /* store key of the found entry */ 93 /* store key of the found entry */
97 de->de_entry_key.version = KEY_FORMAT_3_5; 94 de->de_entry_key.version = KEY_FORMAT_3_5;
diff --git a/fs/reiserfs/stree.c b/fs/reiserfs/stree.c
index 8b9b13127136..5240abe1a709 100644
--- a/fs/reiserfs/stree.c
+++ b/fs/reiserfs/stree.c
@@ -1476,9 +1476,7 @@ static int maybe_indirect_to_direct(struct reiserfs_transaction_handle *th,
1476 int n_block_size = p_s_sb->s_blocksize; 1476 int n_block_size = p_s_sb->s_blocksize;
1477 int cut_bytes; 1477 int cut_bytes;
1478 BUG_ON(!th->t_trans_id); 1478 BUG_ON(!th->t_trans_id);
1479 1479 BUG_ON(n_new_file_size != p_s_inode->i_size);
1480 if (n_new_file_size != p_s_inode->i_size)
1481 BUG();
1482 1480
1483 /* the page being sent in could be NULL if there was an i/o error 1481 /* the page being sent in could be NULL if there was an i/o error
1484 ** reading in the last block. The user will hit problems trying to 1482 ** reading in the last block. The user will hit problems trying to
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index d935fb9394e3..7bdb0ed443e1 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -773,7 +773,7 @@ int reiserfs_xattr_del(struct inode *inode, const char *name)
773 773
774static int 774static int
775reiserfs_delete_xattrs_filler(void *buf, const char *name, int namelen, 775reiserfs_delete_xattrs_filler(void *buf, const char *name, int namelen,
776 loff_t offset, ino_t ino, unsigned int d_type) 776 loff_t offset, u64 ino, unsigned int d_type)
777{ 777{
778 struct dentry *xadir = (struct dentry *)buf; 778 struct dentry *xadir = (struct dentry *)buf;
779 779
@@ -851,7 +851,7 @@ struct reiserfs_chown_buf {
851/* XXX: If there is a better way to do this, I'd love to hear about it */ 851/* XXX: If there is a better way to do this, I'd love to hear about it */
852static int 852static int
853reiserfs_chown_xattrs_filler(void *buf, const char *name, int namelen, 853reiserfs_chown_xattrs_filler(void *buf, const char *name, int namelen,
854 loff_t offset, ino_t ino, unsigned int d_type) 854 loff_t offset, u64 ino, unsigned int d_type)
855{ 855{
856 struct reiserfs_chown_buf *chown_buf = (struct reiserfs_chown_buf *)buf; 856 struct reiserfs_chown_buf *chown_buf = (struct reiserfs_chown_buf *)buf;
857 struct dentry *xafile, *xadir = chown_buf->xadir; 857 struct dentry *xafile, *xadir = chown_buf->xadir;
@@ -1036,7 +1036,7 @@ struct reiserfs_listxattr_buf {
1036 1036
1037static int 1037static int
1038reiserfs_listxattr_filler(void *buf, const char *name, int namelen, 1038reiserfs_listxattr_filler(void *buf, const char *name, int namelen,
1039 loff_t offset, ino_t ino, unsigned int d_type) 1039 loff_t offset, u64 ino, unsigned int d_type)
1040{ 1040{
1041 struct reiserfs_listxattr_buf *b = (struct reiserfs_listxattr_buf *)buf; 1041 struct reiserfs_listxattr_buf *b = (struct reiserfs_listxattr_buf *)buf;
1042 int len = 0; 1042 int len = 0;
diff --git a/fs/stat.c b/fs/stat.c
index 60a31d5e5966..bca07eb2003c 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -140,6 +140,8 @@ static int cp_old_stat(struct kstat *stat, struct __old_kernel_stat __user * sta
140 memset(&tmp, 0, sizeof(struct __old_kernel_stat)); 140 memset(&tmp, 0, sizeof(struct __old_kernel_stat));
141 tmp.st_dev = old_encode_dev(stat->dev); 141 tmp.st_dev = old_encode_dev(stat->dev);
142 tmp.st_ino = stat->ino; 142 tmp.st_ino = stat->ino;
143 if (sizeof(tmp.st_ino) < sizeof(stat->ino) && tmp.st_ino != stat->ino)
144 return -EOVERFLOW;
143 tmp.st_mode = stat->mode; 145 tmp.st_mode = stat->mode;
144 tmp.st_nlink = stat->nlink; 146 tmp.st_nlink = stat->nlink;
145 if (tmp.st_nlink != stat->nlink) 147 if (tmp.st_nlink != stat->nlink)
@@ -210,6 +212,8 @@ static int cp_new_stat(struct kstat *stat, struct stat __user *statbuf)
210 tmp.st_dev = new_encode_dev(stat->dev); 212 tmp.st_dev = new_encode_dev(stat->dev);
211#endif 213#endif
212 tmp.st_ino = stat->ino; 214 tmp.st_ino = stat->ino;
215 if (sizeof(tmp.st_ino) < sizeof(stat->ino) && tmp.st_ino != stat->ino)
216 return -EOVERFLOW;
213 tmp.st_mode = stat->mode; 217 tmp.st_mode = stat->mode;
214 tmp.st_nlink = stat->nlink; 218 tmp.st_nlink = stat->nlink;
215 if (tmp.st_nlink != stat->nlink) 219 if (tmp.st_nlink != stat->nlink)
@@ -347,6 +351,8 @@ static long cp_new_stat64(struct kstat *stat, struct stat64 __user *statbuf)
347 tmp.st_rdev = huge_encode_dev(stat->rdev); 351 tmp.st_rdev = huge_encode_dev(stat->rdev);
348#endif 352#endif
349 tmp.st_ino = stat->ino; 353 tmp.st_ino = stat->ino;
354 if (sizeof(tmp.st_ino) < sizeof(stat->ino) && tmp.st_ino != stat->ino)
355 return -EOVERFLOW;
350#ifdef STAT64_HAS_BROKEN_ST_INO 356#ifdef STAT64_HAS_BROKEN_ST_INO
351 tmp.__st_ino = stat->ino; 357 tmp.__st_ino = stat->ino;
352#endif 358#endif
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index cf3786625bfa..146f1dedec84 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -157,8 +157,8 @@ sysfs_read_file(struct file *file, char __user *buf, size_t count, loff_t *ppos)
157 if ((retval = fill_read_buffer(file->f_dentry,buffer))) 157 if ((retval = fill_read_buffer(file->f_dentry,buffer)))
158 goto out; 158 goto out;
159 } 159 }
160 pr_debug("%s: count = %d, ppos = %lld, buf = %s\n", 160 pr_debug("%s: count = %zd, ppos = %lld, buf = %s\n",
161 __FUNCTION__,count,*ppos,buffer->page); 161 __FUNCTION__, count, *ppos, buffer->page);
162 retval = flush_read_buffer(buffer,buf,count,ppos); 162 retval = flush_read_buffer(buffer,buf,count,ppos);
163out: 163out:
164 up(&buffer->sem); 164 up(&buffer->sem);
diff --git a/fs/xfs/support/debug.c b/fs/xfs/support/debug.c
index 36fbeccdc722..c75f68361e33 100644
--- a/fs/xfs/support/debug.c
+++ b/fs/xfs/support/debug.c
@@ -53,8 +53,7 @@ cmn_err(register int level, char *fmt, ...)
53 va_end(ap); 53 va_end(ap);
54 spin_unlock_irqrestore(&xfs_err_lock,flags); 54 spin_unlock_irqrestore(&xfs_err_lock,flags);
55 55
56 if (level == CE_PANIC) 56 BUG_ON(level == CE_PANIC);
57 BUG();
58} 57}
59 58
60void 59void
@@ -72,8 +71,7 @@ icmn_err(register int level, char *fmt, va_list ap)
72 strcat(message, "\n"); 71 strcat(message, "\n");
73 spin_unlock_irqrestore(&xfs_err_lock,flags); 72 spin_unlock_irqrestore(&xfs_err_lock,flags);
74 printk("%s%s", err_level[level], message); 73 printk("%s%s", err_level[level], message);
75 if (level == CE_PANIC) 74 BUG_ON(level == CE_PANIC);
76 BUG();
77} 75}
78 76
79void 77void