diff options
Diffstat (limited to 'fs')
162 files changed, 10244 insertions, 2809 deletions
diff --git a/fs/Kconfig b/fs/Kconfig index fa64867d6ed6..599de54451af 100644 --- a/fs/Kconfig +++ b/fs/Kconfig | |||
@@ -74,11 +74,11 @@ config EXT3_FS | |||
74 | tristate "Ext3 journalling file system support" | 74 | tristate "Ext3 journalling file system support" |
75 | select JBD | 75 | select JBD |
76 | help | 76 | help |
77 | This is the journaling version of the Second extended file system | 77 | This is the journalling version of the Second extended file system |
78 | (often called ext3), the de facto standard Linux file system | 78 | (often called ext3), the de facto standard Linux file system |
79 | (method to organize files on a storage device) for hard disks. | 79 | (method to organize files on a storage device) for hard disks. |
80 | 80 | ||
81 | The journaling code included in this driver means you do not have | 81 | The journalling code included in this driver means you do not have |
82 | to run e2fsck (file system checker) on your file systems after a | 82 | to run e2fsck (file system checker) on your file systems after a |
83 | crash. The journal keeps track of any changes that were being made | 83 | crash. The journal keeps track of any changes that were being made |
84 | at the time the system crashed, and can ensure that your file system | 84 | at the time the system crashed, and can ensure that your file system |
@@ -143,7 +143,7 @@ config EXT3_FS_SECURITY | |||
143 | config JBD | 143 | config JBD |
144 | tristate | 144 | tristate |
145 | help | 145 | help |
146 | This is a generic journaling layer for block devices. It is | 146 | This is a generic journalling layer for block devices. It is |
147 | currently used by the ext3 and OCFS2 file systems, but it could | 147 | currently used by the ext3 and OCFS2 file systems, but it could |
148 | also be used to add journal support to other file systems or block | 148 | also be used to add journal support to other file systems or block |
149 | devices such as RAID or LVM. | 149 | devices such as RAID or LVM. |
@@ -183,7 +183,7 @@ config REISERFS_FS | |||
183 | tristate "Reiserfs support" | 183 | tristate "Reiserfs support" |
184 | help | 184 | help |
185 | Stores not just filenames but the files themselves in a balanced | 185 | Stores not just filenames but the files themselves in a balanced |
186 | tree. Uses journaling. | 186 | tree. Uses journalling. |
187 | 187 | ||
188 | Balanced trees are more efficient than traditional file system | 188 | Balanced trees are more efficient than traditional file system |
189 | architectural foundations. | 189 | architectural foundations. |
@@ -996,6 +996,18 @@ config AFFS_FS | |||
996 | To compile this file system support as a module, choose M here: the | 996 | To compile this file system support as a module, choose M here: the |
997 | module will be called affs. If unsure, say N. | 997 | module will be called affs. If unsure, say N. |
998 | 998 | ||
999 | config ECRYPT_FS | ||
1000 | tristate "eCrypt filesystem layer support (EXPERIMENTAL)" | ||
1001 | depends on EXPERIMENTAL && KEYS && CRYPTO | ||
1002 | help | ||
1003 | Encrypted filesystem that operates on the VFS layer. See | ||
1004 | <file:Documentation/ecryptfs.txt> to learn more about | ||
1005 | eCryptfs. Userspace components are required and can be | ||
1006 | obtained from <http://ecryptfs.sf.net>. | ||
1007 | |||
1008 | To compile this file system support as a module, choose M here: the | ||
1009 | module will be called ecryptfs. | ||
1010 | |||
999 | config HFS_FS | 1011 | config HFS_FS |
1000 | tristate "Apple Macintosh file system support (EXPERIMENTAL)" | 1012 | tristate "Apple Macintosh file system support (EXPERIMENTAL)" |
1001 | depends on BLOCK && EXPERIMENTAL | 1013 | depends on BLOCK && EXPERIMENTAL |
@@ -1033,7 +1045,7 @@ config BEFS_FS | |||
1033 | on files and directories, and database-like indeces on selected | 1045 | on files and directories, and database-like indeces on selected |
1034 | attributes. (Also note that this driver doesn't make those features | 1046 | attributes. (Also note that this driver doesn't make those features |
1035 | available at this time). It is a 64 bit filesystem, so it supports | 1047 | available at this time). It is a 64 bit filesystem, so it supports |
1036 | extremly large volumes and files. | 1048 | extremely large volumes and files. |
1037 | 1049 | ||
1038 | If you use this filesystem, you should also say Y to at least one | 1050 | If you use this filesystem, you should also say Y to at least one |
1039 | of the NLS (native language support) options below. | 1051 | of the NLS (native language support) options below. |
@@ -1091,7 +1103,7 @@ config JFFS_FS | |||
1091 | tristate "Journalling Flash File System (JFFS) support" | 1103 | tristate "Journalling Flash File System (JFFS) support" |
1092 | depends on MTD && BLOCK | 1104 | depends on MTD && BLOCK |
1093 | help | 1105 | help |
1094 | JFFS is the Journaling Flash File System developed by Axis | 1106 | JFFS is the Journalling Flash File System developed by Axis |
1095 | Communications in Sweden, aimed at providing a crash/powerdown-safe | 1107 | Communications in Sweden, aimed at providing a crash/powerdown-safe |
1096 | file system for disk-less embedded devices. Further information is | 1108 | file system for disk-less embedded devices. Further information is |
1097 | available at (<http://developer.axis.com/software/jffs/>). | 1109 | available at (<http://developer.axis.com/software/jffs/>). |
@@ -1261,7 +1273,7 @@ config JFFS2_CMODE_NONE | |||
1261 | config JFFS2_CMODE_PRIORITY | 1273 | config JFFS2_CMODE_PRIORITY |
1262 | bool "priority" | 1274 | bool "priority" |
1263 | help | 1275 | help |
1264 | Tries the compressors in a predefinied order and chooses the first | 1276 | Tries the compressors in a predefined order and chooses the first |
1265 | successful one. | 1277 | successful one. |
1266 | 1278 | ||
1267 | config JFFS2_CMODE_SIZE | 1279 | config JFFS2_CMODE_SIZE |
@@ -1366,7 +1378,7 @@ config SYSV_FS | |||
1366 | 1378 | ||
1367 | If you have floppies or hard disk partitions like that, it is likely | 1379 | If you have floppies or hard disk partitions like that, it is likely |
1368 | that they contain binaries from those other Unix systems; in order | 1380 | that they contain binaries from those other Unix systems; in order |
1369 | to run these binaries, you will want to install linux-abi which is a | 1381 | to run these binaries, you will want to install linux-abi which is |
1370 | a set of kernel modules that lets you run SCO, Xenix, Wyse, | 1382 | a set of kernel modules that lets you run SCO, Xenix, Wyse, |
1371 | UnixWare, Dell Unix and System V programs under Linux. It is | 1383 | UnixWare, Dell Unix and System V programs under Linux. It is |
1372 | available via FTP (user: ftp) from | 1384 | available via FTP (user: ftp) from |
@@ -1951,7 +1963,7 @@ config AFS_FS | |||
1951 | If you say Y here, you will get an experimental Andrew File System | 1963 | If you say Y here, you will get an experimental Andrew File System |
1952 | driver. It currently only supports unsecured read-only AFS access. | 1964 | driver. It currently only supports unsecured read-only AFS access. |
1953 | 1965 | ||
1954 | See <file:Documentation/filesystems/afs.txt> for more intormation. | 1966 | See <file:Documentation/filesystems/afs.txt> for more information. |
1955 | 1967 | ||
1956 | If unsure, say N. | 1968 | If unsure, say N. |
1957 | 1969 | ||
diff --git a/fs/Makefile b/fs/Makefile index 215f70378177..df614eacee86 100644 --- a/fs/Makefile +++ b/fs/Makefile | |||
@@ -76,6 +76,7 @@ obj-$(CONFIG_BFS_FS) += bfs/ | |||
76 | obj-$(CONFIG_ISO9660_FS) += isofs/ | 76 | obj-$(CONFIG_ISO9660_FS) += isofs/ |
77 | obj-$(CONFIG_HFSPLUS_FS) += hfsplus/ # Before hfs to find wrapped HFS+ | 77 | obj-$(CONFIG_HFSPLUS_FS) += hfsplus/ # Before hfs to find wrapped HFS+ |
78 | obj-$(CONFIG_HFS_FS) += hfs/ | 78 | obj-$(CONFIG_HFS_FS) += hfs/ |
79 | obj-$(CONFIG_ECRYPT_FS) += ecryptfs/ | ||
79 | obj-$(CONFIG_VXFS_FS) += freevxfs/ | 80 | obj-$(CONFIG_VXFS_FS) += freevxfs/ |
80 | obj-$(CONFIG_NFS_FS) += nfs/ | 81 | obj-$(CONFIG_NFS_FS) += nfs/ |
81 | obj-$(CONFIG_EXPORTFS) += exportfs/ | 82 | obj-$(CONFIG_EXPORTFS) += exportfs/ |
diff --git a/fs/afs/dir.c b/fs/afs/dir.c index 2fc99877cb0d..cf8a2cb28505 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c | |||
@@ -30,7 +30,7 @@ static int afs_dir_readdir(struct file *file, void *dirent, filldir_t filldir); | |||
30 | static int afs_d_revalidate(struct dentry *dentry, struct nameidata *nd); | 30 | static int afs_d_revalidate(struct dentry *dentry, struct nameidata *nd); |
31 | static int afs_d_delete(struct dentry *dentry); | 31 | static int afs_d_delete(struct dentry *dentry); |
32 | static int afs_dir_lookup_filldir(void *_cookie, const char *name, int nlen, | 32 | static int afs_dir_lookup_filldir(void *_cookie, const char *name, int nlen, |
33 | loff_t fpos, ino_t ino, unsigned dtype); | 33 | loff_t fpos, u64 ino, unsigned dtype); |
34 | 34 | ||
35 | const struct file_operations afs_dir_file_operations = { | 35 | const struct file_operations afs_dir_file_operations = { |
36 | .open = afs_dir_open, | 36 | .open = afs_dir_open, |
@@ -409,7 +409,7 @@ static int afs_dir_readdir(struct file *file, void *cookie, filldir_t filldir) | |||
409 | * uniquifier through dtype | 409 | * uniquifier through dtype |
410 | */ | 410 | */ |
411 | static int afs_dir_lookup_filldir(void *_cookie, const char *name, int nlen, | 411 | static int afs_dir_lookup_filldir(void *_cookie, const char *name, int nlen, |
412 | loff_t fpos, ino_t ino, unsigned dtype) | 412 | loff_t fpos, u64 ino, unsigned dtype) |
413 | { | 413 | { |
414 | struct afs_dir_lookup_cookie *cookie = _cookie; | 414 | struct afs_dir_lookup_cookie *cookie = _cookie; |
415 | 415 | ||
@@ -675,7 +675,7 @@ static ssize_t aio_run_iocb(struct kiocb *iocb) | |||
675 | } | 675 | } |
676 | 676 | ||
677 | if (!(iocb->ki_retried & 0xff)) { | 677 | if (!(iocb->ki_retried & 0xff)) { |
678 | pr_debug("%ld retry: %d of %d\n", iocb->ki_retried, | 678 | pr_debug("%ld retry: %zd of %zd\n", iocb->ki_retried, |
679 | iocb->ki_nbytes - iocb->ki_left, iocb->ki_nbytes); | 679 | iocb->ki_nbytes - iocb->ki_left, iocb->ki_nbytes); |
680 | } | 680 | } |
681 | 681 | ||
@@ -1008,7 +1008,7 @@ int fastcall aio_complete(struct kiocb *iocb, long res, long res2) | |||
1008 | 1008 | ||
1009 | pr_debug("added to ring %p at [%lu]\n", iocb, tail); | 1009 | pr_debug("added to ring %p at [%lu]\n", iocb, tail); |
1010 | 1010 | ||
1011 | pr_debug("%ld retries: %d of %d\n", iocb->ki_retried, | 1011 | pr_debug("%ld retries: %zd of %zd\n", iocb->ki_retried, |
1012 | iocb->ki_nbytes - iocb->ki_left, iocb->ki_nbytes); | 1012 | iocb->ki_nbytes - iocb->ki_left, iocb->ki_nbytes); |
1013 | put_rq: | 1013 | put_rq: |
1014 | /* everything turned out well, dispose of the aiocb. */ | 1014 | /* everything turned out well, dispose of the aiocb. */ |
diff --git a/fs/befs/befs_fs_types.h b/fs/befs/befs_fs_types.h index 9095518e918d..63ef1e18fb84 100644 --- a/fs/befs/befs_fs_types.h +++ b/fs/befs/befs_fs_types.h | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * include/linux/befs_fs_types.h | 2 | * fs/befs/befs_fs_types.h |
3 | * | 3 | * |
4 | * Copyright (C) 2001 Will Dyson (will@cs.earlham.edu) | 4 | * Copyright (C) 2001 Will Dyson (will@cs.earlham.edu) |
5 | * | 5 | * |
diff --git a/fs/binfmt_som.c b/fs/binfmt_som.c index 32b5d625ce9c..5bcdaaf4eae0 100644 --- a/fs/binfmt_som.c +++ b/fs/binfmt_som.c | |||
@@ -29,6 +29,7 @@ | |||
29 | #include <linux/personality.h> | 29 | #include <linux/personality.h> |
30 | #include <linux/init.h> | 30 | #include <linux/init.h> |
31 | 31 | ||
32 | #include <asm/a.out.h> | ||
32 | #include <asm/uaccess.h> | 33 | #include <asm/uaccess.h> |
33 | #include <asm/pgtable.h> | 34 | #include <asm/pgtable.h> |
34 | 35 | ||
@@ -194,6 +195,7 @@ load_som_binary(struct linux_binprm * bprm, struct pt_regs * regs) | |||
194 | unsigned long som_entry; | 195 | unsigned long som_entry; |
195 | struct som_hdr *som_ex; | 196 | struct som_hdr *som_ex; |
196 | struct som_exec_auxhdr *hpuxhdr; | 197 | struct som_exec_auxhdr *hpuxhdr; |
198 | struct files_struct *files; | ||
197 | 199 | ||
198 | /* Get the exec-header */ | 200 | /* Get the exec-header */ |
199 | som_ex = (struct som_hdr *) bprm->buf; | 201 | som_ex = (struct som_hdr *) bprm->buf; |
@@ -208,15 +210,27 @@ load_som_binary(struct linux_binprm * bprm, struct pt_regs * regs) | |||
208 | size = som_ex->aux_header_size; | 210 | size = som_ex->aux_header_size; |
209 | if (size > SOM_PAGESIZE) | 211 | if (size > SOM_PAGESIZE) |
210 | goto out; | 212 | goto out; |
211 | hpuxhdr = (struct som_exec_auxhdr *) kmalloc(size, GFP_KERNEL); | 213 | hpuxhdr = kmalloc(size, GFP_KERNEL); |
212 | if (!hpuxhdr) | 214 | if (!hpuxhdr) |
213 | goto out; | 215 | goto out; |
214 | 216 | ||
215 | retval = kernel_read(bprm->file, som_ex->aux_header_location, | 217 | retval = kernel_read(bprm->file, som_ex->aux_header_location, |
216 | (char *) hpuxhdr, size); | 218 | (char *) hpuxhdr, size); |
219 | if (retval != size) { | ||
220 | if (retval >= 0) | ||
221 | retval = -EIO; | ||
222 | goto out_free; | ||
223 | } | ||
224 | |||
225 | files = current->files; /* Refcounted so ok */ | ||
226 | retval = unshare_files(); | ||
217 | if (retval < 0) | 227 | if (retval < 0) |
218 | goto out_free; | 228 | goto out_free; |
219 | #error "Fix security hole before enabling me" | 229 | if (files == current->files) { |
230 | put_files_struct(files); | ||
231 | files = NULL; | ||
232 | } | ||
233 | |||
220 | retval = get_unused_fd(); | 234 | retval = get_unused_fd(); |
221 | if (retval < 0) | 235 | if (retval < 0) |
222 | goto out_free; | 236 | goto out_free; |
diff --git a/fs/cifs/README b/fs/cifs/README index 5f0e1bd64fee..432e515431c4 100644 --- a/fs/cifs/README +++ b/fs/cifs/README | |||
@@ -269,7 +269,7 @@ A partial list of the supported mount options follows: | |||
269 | (gid) mount option is specified. For the uid (gid) of newly | 269 | (gid) mount option is specified. For the uid (gid) of newly |
270 | created files and directories, ie files created since | 270 | created files and directories, ie files created since |
271 | the last mount of the server share, the expected uid | 271 | the last mount of the server share, the expected uid |
272 | (gid) is cached as as long as the inode remains in | 272 | (gid) is cached as long as the inode remains in |
273 | memory on the client. Also note that permission | 273 | memory on the client. Also note that permission |
274 | checks (authorization checks) on accesses to a file occur | 274 | checks (authorization checks) on accesses to a file occur |
275 | at the server, but there are cases in which an administrator | 275 | at the server, but there are cases in which an administrator |
@@ -375,7 +375,7 @@ A partial list of the supported mount options follows: | |||
375 | the local process on newly created files, directories, and | 375 | the local process on newly created files, directories, and |
376 | devices (create, mkdir, mknod). If the CIFS Unix Extensions | 376 | devices (create, mkdir, mknod). If the CIFS Unix Extensions |
377 | are not negotiated, for newly created files and directories | 377 | are not negotiated, for newly created files and directories |
378 | instead of using the default uid and gid specified on the | 378 | instead of using the default uid and gid specified on |
379 | the mount, cache the new file's uid and gid locally which means | 379 | the mount, cache the new file's uid and gid locally which means |
380 | that the uid for the file can change when the inode is | 380 | that the uid for the file can change when the inode is |
381 | reloaded (or the user remounts the share). | 381 | reloaded (or the user remounts the share). |
@@ -440,7 +440,7 @@ A partial list of the supported mount options follows: | |||
440 | create device files and fifos in a format compatible with | 440 | create device files and fifos in a format compatible with |
441 | Services for Unix (SFU). In addition retrieve bits 10-12 | 441 | Services for Unix (SFU). In addition retrieve bits 10-12 |
442 | of the mode via the SETFILEBITS extended attribute (as | 442 | of the mode via the SETFILEBITS extended attribute (as |
443 | SFU does). In the future the bottom 9 bits of the mode | 443 | SFU does). In the future the bottom 9 bits of the |
444 | mode also will be emulated using queries of the security | 444 | mode also will be emulated using queries of the security |
445 | descriptor (ACL). | 445 | descriptor (ACL). |
446 | sign Must use packet signing (helps avoid unwanted data modification | 446 | sign Must use packet signing (helps avoid unwanted data modification |
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 0e9ba0b9d71e..c78762051da4 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c | |||
@@ -772,12 +772,12 @@ cifs_parse_mount_options(char *options, const char *devname,struct smb_vol *vol) | |||
772 | separator[1] = 0; | 772 | separator[1] = 0; |
773 | 773 | ||
774 | memset(vol->source_rfc1001_name,0x20,15); | 774 | memset(vol->source_rfc1001_name,0x20,15); |
775 | for(i=0;i < strnlen(system_utsname.nodename,15);i++) { | 775 | for(i=0;i < strnlen(utsname()->nodename,15);i++) { |
776 | /* does not have to be a perfect mapping since the field is | 776 | /* does not have to be a perfect mapping since the field is |
777 | informational, only used for servers that do not support | 777 | informational, only used for servers that do not support |
778 | port 445 and it can be overridden at mount time */ | 778 | port 445 and it can be overridden at mount time */ |
779 | vol->source_rfc1001_name[i] = | 779 | vol->source_rfc1001_name[i] = |
780 | toupper(system_utsname.nodename[i]); | 780 | toupper(utsname()->nodename[i]); |
781 | } | 781 | } |
782 | vol->source_rfc1001_name[15] = 0; | 782 | vol->source_rfc1001_name[15] = 0; |
783 | /* null target name indicates to use *SMBSERVR default called name | 783 | /* null target name indicates to use *SMBSERVR default called name |
@@ -2153,7 +2153,7 @@ CIFSSessSetup(unsigned int xid, struct cifsSesInfo *ses, | |||
2153 | 32, nls_codepage); | 2153 | 32, nls_codepage); |
2154 | bcc_ptr += 2 * bytes_returned; | 2154 | bcc_ptr += 2 * bytes_returned; |
2155 | bytes_returned = | 2155 | bytes_returned = |
2156 | cifs_strtoUCS((__le16 *) bcc_ptr, system_utsname.release, | 2156 | cifs_strtoUCS((__le16 *) bcc_ptr, utsname()->release, |
2157 | 32, nls_codepage); | 2157 | 32, nls_codepage); |
2158 | bcc_ptr += 2 * bytes_returned; | 2158 | bcc_ptr += 2 * bytes_returned; |
2159 | bcc_ptr += 2; | 2159 | bcc_ptr += 2; |
@@ -2180,8 +2180,8 @@ CIFSSessSetup(unsigned int xid, struct cifsSesInfo *ses, | |||
2180 | } | 2180 | } |
2181 | strcpy(bcc_ptr, "Linux version "); | 2181 | strcpy(bcc_ptr, "Linux version "); |
2182 | bcc_ptr += strlen("Linux version "); | 2182 | bcc_ptr += strlen("Linux version "); |
2183 | strcpy(bcc_ptr, system_utsname.release); | 2183 | strcpy(bcc_ptr, utsname()->release); |
2184 | bcc_ptr += strlen(system_utsname.release) + 1; | 2184 | bcc_ptr += strlen(utsname()->release) + 1; |
2185 | strcpy(bcc_ptr, CIFS_NETWORK_OPSYS); | 2185 | strcpy(bcc_ptr, CIFS_NETWORK_OPSYS); |
2186 | bcc_ptr += strlen(CIFS_NETWORK_OPSYS) + 1; | 2186 | bcc_ptr += strlen(CIFS_NETWORK_OPSYS) + 1; |
2187 | } | 2187 | } |
@@ -2445,7 +2445,7 @@ CIFSNTLMSSPNegotiateSessSetup(unsigned int xid, | |||
2445 | 32, nls_codepage); | 2445 | 32, nls_codepage); |
2446 | bcc_ptr += 2 * bytes_returned; | 2446 | bcc_ptr += 2 * bytes_returned; |
2447 | bytes_returned = | 2447 | bytes_returned = |
2448 | cifs_strtoUCS((__le16 *) bcc_ptr, system_utsname.release, 32, | 2448 | cifs_strtoUCS((__le16 *) bcc_ptr, utsname()->release, 32, |
2449 | nls_codepage); | 2449 | nls_codepage); |
2450 | bcc_ptr += 2 * bytes_returned; | 2450 | bcc_ptr += 2 * bytes_returned; |
2451 | bcc_ptr += 2; /* null terminate Linux version */ | 2451 | bcc_ptr += 2; /* null terminate Linux version */ |
@@ -2462,8 +2462,8 @@ CIFSNTLMSSPNegotiateSessSetup(unsigned int xid, | |||
2462 | } else { /* ASCII */ | 2462 | } else { /* ASCII */ |
2463 | strcpy(bcc_ptr, "Linux version "); | 2463 | strcpy(bcc_ptr, "Linux version "); |
2464 | bcc_ptr += strlen("Linux version "); | 2464 | bcc_ptr += strlen("Linux version "); |
2465 | strcpy(bcc_ptr, system_utsname.release); | 2465 | strcpy(bcc_ptr, utsname()->release); |
2466 | bcc_ptr += strlen(system_utsname.release) + 1; | 2466 | bcc_ptr += strlen(utsname()->release) + 1; |
2467 | strcpy(bcc_ptr, CIFS_NETWORK_OPSYS); | 2467 | strcpy(bcc_ptr, CIFS_NETWORK_OPSYS); |
2468 | bcc_ptr += strlen(CIFS_NETWORK_OPSYS) + 1; | 2468 | bcc_ptr += strlen(CIFS_NETWORK_OPSYS) + 1; |
2469 | bcc_ptr++; /* empty domain field */ | 2469 | bcc_ptr++; /* empty domain field */ |
@@ -2836,7 +2836,7 @@ CIFSNTLMSSPAuthSessSetup(unsigned int xid, struct cifsSesInfo *ses, | |||
2836 | 32, nls_codepage); | 2836 | 32, nls_codepage); |
2837 | bcc_ptr += 2 * bytes_returned; | 2837 | bcc_ptr += 2 * bytes_returned; |
2838 | bytes_returned = | 2838 | bytes_returned = |
2839 | cifs_strtoUCS((__le16 *) bcc_ptr, system_utsname.release, 32, | 2839 | cifs_strtoUCS((__le16 *) bcc_ptr, utsname()->release, 32, |
2840 | nls_codepage); | 2840 | nls_codepage); |
2841 | bcc_ptr += 2 * bytes_returned; | 2841 | bcc_ptr += 2 * bytes_returned; |
2842 | bcc_ptr += 2; /* null term version string */ | 2842 | bcc_ptr += 2; /* null term version string */ |
@@ -2888,8 +2888,8 @@ CIFSNTLMSSPAuthSessSetup(unsigned int xid, struct cifsSesInfo *ses, | |||
2888 | 2888 | ||
2889 | strcpy(bcc_ptr, "Linux version "); | 2889 | strcpy(bcc_ptr, "Linux version "); |
2890 | bcc_ptr += strlen("Linux version "); | 2890 | bcc_ptr += strlen("Linux version "); |
2891 | strcpy(bcc_ptr, system_utsname.release); | 2891 | strcpy(bcc_ptr, utsname()->release); |
2892 | bcc_ptr += strlen(system_utsname.release) + 1; | 2892 | bcc_ptr += strlen(utsname()->release) + 1; |
2893 | strcpy(bcc_ptr, CIFS_NETWORK_OPSYS); | 2893 | strcpy(bcc_ptr, CIFS_NETWORK_OPSYS); |
2894 | bcc_ptr += strlen(CIFS_NETWORK_OPSYS) + 1; | 2894 | bcc_ptr += strlen(CIFS_NETWORK_OPSYS) + 1; |
2895 | bcc_ptr++; /* null domain */ | 2895 | bcc_ptr++; /* null domain */ |
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index d1705ab8136e..22b4c35dcfe3 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c | |||
@@ -111,7 +111,7 @@ static void unicode_ssetup_strings(char ** pbcc_area, struct cifsSesInfo *ses, | |||
111 | bytes_ret = cifs_strtoUCS((__le16 *)bcc_ptr, "Linux version ", 32, | 111 | bytes_ret = cifs_strtoUCS((__le16 *)bcc_ptr, "Linux version ", 32, |
112 | nls_cp); | 112 | nls_cp); |
113 | bcc_ptr += 2 * bytes_ret; | 113 | bcc_ptr += 2 * bytes_ret; |
114 | bytes_ret = cifs_strtoUCS((__le16 *) bcc_ptr, system_utsname.release, | 114 | bytes_ret = cifs_strtoUCS((__le16 *) bcc_ptr, init_utsname()->release, |
115 | 32, nls_cp); | 115 | 32, nls_cp); |
116 | bcc_ptr += 2 * bytes_ret; | 116 | bcc_ptr += 2 * bytes_ret; |
117 | bcc_ptr += 2; /* trailing null */ | 117 | bcc_ptr += 2; /* trailing null */ |
@@ -158,8 +158,8 @@ static void ascii_ssetup_strings(char ** pbcc_area, struct cifsSesInfo *ses, | |||
158 | 158 | ||
159 | strcpy(bcc_ptr, "Linux version "); | 159 | strcpy(bcc_ptr, "Linux version "); |
160 | bcc_ptr += strlen("Linux version "); | 160 | bcc_ptr += strlen("Linux version "); |
161 | strcpy(bcc_ptr, system_utsname.release); | 161 | strcpy(bcc_ptr, init_utsname()->release); |
162 | bcc_ptr += strlen(system_utsname.release) + 1; | 162 | bcc_ptr += strlen(init_utsname()->release) + 1; |
163 | 163 | ||
164 | strcpy(bcc_ptr, CIFS_NETWORK_OPSYS); | 164 | strcpy(bcc_ptr, CIFS_NETWORK_OPSYS); |
165 | bcc_ptr += strlen(CIFS_NETWORK_OPSYS) + 1; | 165 | bcc_ptr += strlen(CIFS_NETWORK_OPSYS) + 1; |
diff --git a/fs/compat.c b/fs/compat.c index 13fb08d096c4..4d3fbcb2ddb1 100644 --- a/fs/compat.c +++ b/fs/compat.c | |||
@@ -56,8 +56,6 @@ | |||
56 | 56 | ||
57 | int compat_log = 1; | 57 | int compat_log = 1; |
58 | 58 | ||
59 | extern void sigset_from_compat(sigset_t *set, compat_sigset_t *compat); | ||
60 | |||
61 | int compat_printk(const char *fmt, ...) | 59 | int compat_printk(const char *fmt, ...) |
62 | { | 60 | { |
63 | va_list ap; | 61 | va_list ap; |
@@ -916,20 +914,24 @@ struct compat_readdir_callback { | |||
916 | }; | 914 | }; |
917 | 915 | ||
918 | static int compat_fillonedir(void *__buf, const char *name, int namlen, | 916 | static int compat_fillonedir(void *__buf, const char *name, int namlen, |
919 | loff_t offset, ino_t ino, unsigned int d_type) | 917 | loff_t offset, u64 ino, unsigned int d_type) |
920 | { | 918 | { |
921 | struct compat_readdir_callback *buf = __buf; | 919 | struct compat_readdir_callback *buf = __buf; |
922 | struct compat_old_linux_dirent __user *dirent; | 920 | struct compat_old_linux_dirent __user *dirent; |
921 | compat_ulong_t d_ino; | ||
923 | 922 | ||
924 | if (buf->result) | 923 | if (buf->result) |
925 | return -EINVAL; | 924 | return -EINVAL; |
925 | d_ino = ino; | ||
926 | if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) | ||
927 | return -EOVERFLOW; | ||
926 | buf->result++; | 928 | buf->result++; |
927 | dirent = buf->dirent; | 929 | dirent = buf->dirent; |
928 | if (!access_ok(VERIFY_WRITE, dirent, | 930 | if (!access_ok(VERIFY_WRITE, dirent, |
929 | (unsigned long)(dirent->d_name + namlen + 1) - | 931 | (unsigned long)(dirent->d_name + namlen + 1) - |
930 | (unsigned long)dirent)) | 932 | (unsigned long)dirent)) |
931 | goto efault; | 933 | goto efault; |
932 | if ( __put_user(ino, &dirent->d_ino) || | 934 | if ( __put_user(d_ino, &dirent->d_ino) || |
933 | __put_user(offset, &dirent->d_offset) || | 935 | __put_user(offset, &dirent->d_offset) || |
934 | __put_user(namlen, &dirent->d_namlen) || | 936 | __put_user(namlen, &dirent->d_namlen) || |
935 | __copy_to_user(dirent->d_name, name, namlen) || | 937 | __copy_to_user(dirent->d_name, name, namlen) || |
@@ -980,22 +982,26 @@ struct compat_getdents_callback { | |||
980 | }; | 982 | }; |
981 | 983 | ||
982 | static int compat_filldir(void *__buf, const char *name, int namlen, | 984 | static int compat_filldir(void *__buf, const char *name, int namlen, |
983 | loff_t offset, ino_t ino, unsigned int d_type) | 985 | loff_t offset, u64 ino, unsigned int d_type) |
984 | { | 986 | { |
985 | struct compat_linux_dirent __user * dirent; | 987 | struct compat_linux_dirent __user * dirent; |
986 | struct compat_getdents_callback *buf = __buf; | 988 | struct compat_getdents_callback *buf = __buf; |
989 | compat_ulong_t d_ino; | ||
987 | int reclen = COMPAT_ROUND_UP(NAME_OFFSET(dirent) + namlen + 2); | 990 | int reclen = COMPAT_ROUND_UP(NAME_OFFSET(dirent) + namlen + 2); |
988 | 991 | ||
989 | buf->error = -EINVAL; /* only used if we fail.. */ | 992 | buf->error = -EINVAL; /* only used if we fail.. */ |
990 | if (reclen > buf->count) | 993 | if (reclen > buf->count) |
991 | return -EINVAL; | 994 | return -EINVAL; |
995 | d_ino = ino; | ||
996 | if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) | ||
997 | return -EOVERFLOW; | ||
992 | dirent = buf->previous; | 998 | dirent = buf->previous; |
993 | if (dirent) { | 999 | if (dirent) { |
994 | if (__put_user(offset, &dirent->d_off)) | 1000 | if (__put_user(offset, &dirent->d_off)) |
995 | goto efault; | 1001 | goto efault; |
996 | } | 1002 | } |
997 | dirent = buf->current_dir; | 1003 | dirent = buf->current_dir; |
998 | if (__put_user(ino, &dirent->d_ino)) | 1004 | if (__put_user(d_ino, &dirent->d_ino)) |
999 | goto efault; | 1005 | goto efault; |
1000 | if (__put_user(reclen, &dirent->d_reclen)) | 1006 | if (__put_user(reclen, &dirent->d_reclen)) |
1001 | goto efault; | 1007 | goto efault; |
@@ -1066,7 +1072,7 @@ struct compat_getdents_callback64 { | |||
1066 | }; | 1072 | }; |
1067 | 1073 | ||
1068 | static int compat_filldir64(void * __buf, const char * name, int namlen, loff_t offset, | 1074 | static int compat_filldir64(void * __buf, const char * name, int namlen, loff_t offset, |
1069 | ino_t ino, unsigned int d_type) | 1075 | u64 ino, unsigned int d_type) |
1070 | { | 1076 | { |
1071 | struct linux_dirent64 __user *dirent; | 1077 | struct linux_dirent64 __user *dirent; |
1072 | struct compat_getdents_callback64 *buf = __buf; | 1078 | struct compat_getdents_callback64 *buf = __buf; |
diff --git a/fs/configfs/file.c b/fs/configfs/file.c index 85105e50f7db..e6d5754a715e 100644 --- a/fs/configfs/file.c +++ b/fs/configfs/file.c | |||
@@ -137,8 +137,8 @@ configfs_read_file(struct file *file, char __user *buf, size_t count, loff_t *pp | |||
137 | if ((retval = fill_read_buffer(file->f_dentry,buffer))) | 137 | if ((retval = fill_read_buffer(file->f_dentry,buffer))) |
138 | goto out; | 138 | goto out; |
139 | } | 139 | } |
140 | pr_debug("%s: count = %d, ppos = %lld, buf = %s\n", | 140 | pr_debug("%s: count = %zd, ppos = %lld, buf = %s\n", |
141 | __FUNCTION__,count,*ppos,buffer->page); | 141 | __FUNCTION__, count, *ppos, buffer->page); |
142 | retval = flush_read_buffer(buffer,buf,count,ppos); | 142 | retval = flush_read_buffer(buffer,buf,count,ppos); |
143 | out: | 143 | out: |
144 | up(&buffer->sem); | 144 | up(&buffer->sem); |
diff --git a/fs/dcache.c b/fs/dcache.c index fc2faa44f8d1..2355bddad8de 100644 --- a/fs/dcache.c +++ b/fs/dcache.c | |||
@@ -291,9 +291,9 @@ struct dentry * dget_locked(struct dentry *dentry) | |||
291 | * it can be unhashed only if it has no children, or if it is the root | 291 | * it can be unhashed only if it has no children, or if it is the root |
292 | * of a filesystem. | 292 | * of a filesystem. |
293 | * | 293 | * |
294 | * If the inode has a DCACHE_DISCONNECTED alias, then prefer | 294 | * If the inode has an IS_ROOT, DCACHE_DISCONNECTED alias, then prefer |
295 | * any other hashed alias over that one unless @want_discon is set, | 295 | * any other hashed alias over that one unless @want_discon is set, |
296 | * in which case only return a DCACHE_DISCONNECTED alias. | 296 | * in which case only return an IS_ROOT, DCACHE_DISCONNECTED alias. |
297 | */ | 297 | */ |
298 | 298 | ||
299 | static struct dentry * __d_find_alias(struct inode *inode, int want_discon) | 299 | static struct dentry * __d_find_alias(struct inode *inode, int want_discon) |
@@ -309,7 +309,8 @@ static struct dentry * __d_find_alias(struct inode *inode, int want_discon) | |||
309 | prefetch(next); | 309 | prefetch(next); |
310 | alias = list_entry(tmp, struct dentry, d_alias); | 310 | alias = list_entry(tmp, struct dentry, d_alias); |
311 | if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) { | 311 | if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) { |
312 | if (alias->d_flags & DCACHE_DISCONNECTED) | 312 | if (IS_ROOT(alias) && |
313 | (alias->d_flags & DCACHE_DISCONNECTED)) | ||
313 | discon_alias = alias; | 314 | discon_alias = alias; |
314 | else if (!want_discon) { | 315 | else if (!want_discon) { |
315 | __dget_locked(alias); | 316 | __dget_locked(alias); |
@@ -1004,7 +1005,7 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry) | |||
1004 | { | 1005 | { |
1005 | struct dentry *new = NULL; | 1006 | struct dentry *new = NULL; |
1006 | 1007 | ||
1007 | if (inode) { | 1008 | if (inode && S_ISDIR(inode->i_mode)) { |
1008 | spin_lock(&dcache_lock); | 1009 | spin_lock(&dcache_lock); |
1009 | new = __d_find_alias(inode, 1); | 1010 | new = __d_find_alias(inode, 1); |
1010 | if (new) { | 1011 | if (new) { |
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index ecf3da9edf21..e77676df6713 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c | |||
@@ -252,7 +252,7 @@ EXPORT_SYMBOL_GPL(debugfs_create_dir); | |||
252 | * | 252 | * |
253 | * This function removes a file or directory in debugfs that was previously | 253 | * This function removes a file or directory in debugfs that was previously |
254 | * created with a call to another debugfs function (like | 254 | * created with a call to another debugfs function (like |
255 | * debufs_create_file() or variants thereof.) | 255 | * debugfs_create_file() or variants thereof.) |
256 | * | 256 | * |
257 | * This function is required to be called in order for the file to be | 257 | * This function is required to be called in order for the file to be |
258 | * removed, no automatic cleanup of files will happen when a module is | 258 | * removed, no automatic cleanup of files will happen when a module is |
diff --git a/fs/dnotify.c b/fs/dnotify.c index f932591df5a4..2b0442db67e0 100644 --- a/fs/dnotify.c +++ b/fs/dnotify.c | |||
@@ -92,7 +92,7 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg) | |||
92 | prev = &odn->dn_next; | 92 | prev = &odn->dn_next; |
93 | } | 93 | } |
94 | 94 | ||
95 | error = f_setown(filp, current->pid, 0); | 95 | error = __f_setown(filp, task_pid(current), PIDTYPE_PID, 0); |
96 | if (error) | 96 | if (error) |
97 | goto out_free; | 97 | goto out_free; |
98 | 98 | ||
diff --git a/fs/ecryptfs/Makefile b/fs/ecryptfs/Makefile new file mode 100644 index 000000000000..ca6562451eeb --- /dev/null +++ b/fs/ecryptfs/Makefile | |||
@@ -0,0 +1,7 @@ | |||
1 | # | ||
2 | # Makefile for the Linux 2.6 eCryptfs | ||
3 | # | ||
4 | |||
5 | obj-$(CONFIG_ECRYPT_FS) += ecryptfs.o | ||
6 | |||
7 | ecryptfs-objs := dentry.o file.o inode.o main.o super.o mmap.o crypto.o keystore.o debug.o | ||
diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c new file mode 100644 index 000000000000..ed35a9712fa1 --- /dev/null +++ b/fs/ecryptfs/crypto.c | |||
@@ -0,0 +1,1659 @@ | |||
1 | /** | ||
2 | * eCryptfs: Linux filesystem encryption layer | ||
3 | * | ||
4 | * Copyright (C) 1997-2004 Erez Zadok | ||
5 | * Copyright (C) 2001-2004 Stony Brook University | ||
6 | * Copyright (C) 2004-2006 International Business Machines Corp. | ||
7 | * Author(s): Michael A. Halcrow <mahalcro@us.ibm.com> | ||
8 | * Michael C. Thompson <mcthomps@us.ibm.com> | ||
9 | * | ||
10 | * This program is free software; you can redistribute it and/or | ||
11 | * modify it under the terms of the GNU General Public License as | ||
12 | * published by the Free Software Foundation; either version 2 of the | ||
13 | * License, or (at your option) any later version. | ||
14 | * | ||
15 | * This program is distributed in the hope that it will be useful, but | ||
16 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
18 | * General Public License for more details. | ||
19 | * | ||
20 | * You should have received a copy of the GNU General Public License | ||
21 | * along with this program; if not, write to the Free Software | ||
22 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA | ||
23 | * 02111-1307, USA. | ||
24 | */ | ||
25 | |||
26 | #include <linux/fs.h> | ||
27 | #include <linux/mount.h> | ||
28 | #include <linux/pagemap.h> | ||
29 | #include <linux/random.h> | ||
30 | #include <linux/compiler.h> | ||
31 | #include <linux/key.h> | ||
32 | #include <linux/namei.h> | ||
33 | #include <linux/crypto.h> | ||
34 | #include <linux/file.h> | ||
35 | #include <linux/scatterlist.h> | ||
36 | #include "ecryptfs_kernel.h" | ||
37 | |||
38 | static int | ||
39 | ecryptfs_decrypt_page_offset(struct ecryptfs_crypt_stat *crypt_stat, | ||
40 | struct page *dst_page, int dst_offset, | ||
41 | struct page *src_page, int src_offset, int size, | ||
42 | unsigned char *iv); | ||
43 | static int | ||
44 | ecryptfs_encrypt_page_offset(struct ecryptfs_crypt_stat *crypt_stat, | ||
45 | struct page *dst_page, int dst_offset, | ||
46 | struct page *src_page, int src_offset, int size, | ||
47 | unsigned char *iv); | ||
48 | |||
49 | /** | ||
50 | * ecryptfs_to_hex | ||
51 | * @dst: Buffer to take hex character representation of contents of | ||
52 | * src; must be at least of size (src_size * 2) | ||
53 | * @src: Buffer to be converted to a hex string respresentation | ||
54 | * @src_size: number of bytes to convert | ||
55 | */ | ||
56 | void ecryptfs_to_hex(char *dst, char *src, size_t src_size) | ||
57 | { | ||
58 | int x; | ||
59 | |||
60 | for (x = 0; x < src_size; x++) | ||
61 | sprintf(&dst[x * 2], "%.2x", (unsigned char)src[x]); | ||
62 | } | ||
63 | |||
64 | /** | ||
65 | * ecryptfs_from_hex | ||
66 | * @dst: Buffer to take the bytes from src hex; must be at least of | ||
67 | * size (src_size / 2) | ||
68 | * @src: Buffer to be converted from a hex string respresentation to raw value | ||
69 | * @dst_size: size of dst buffer, or number of hex characters pairs to convert | ||
70 | */ | ||
71 | void ecryptfs_from_hex(char *dst, char *src, int dst_size) | ||
72 | { | ||
73 | int x; | ||
74 | char tmp[3] = { 0, }; | ||
75 | |||
76 | for (x = 0; x < dst_size; x++) { | ||
77 | tmp[0] = src[x * 2]; | ||
78 | tmp[1] = src[x * 2 + 1]; | ||
79 | dst[x] = (unsigned char)simple_strtol(tmp, NULL, 16); | ||
80 | } | ||
81 | } | ||
82 | |||
83 | /** | ||
84 | * ecryptfs_calculate_md5 - calculates the md5 of @src | ||
85 | * @dst: Pointer to 16 bytes of allocated memory | ||
86 | * @crypt_stat: Pointer to crypt_stat struct for the current inode | ||
87 | * @src: Data to be md5'd | ||
88 | * @len: Length of @src | ||
89 | * | ||
90 | * Uses the allocated crypto context that crypt_stat references to | ||
91 | * generate the MD5 sum of the contents of src. | ||
92 | */ | ||
93 | static int ecryptfs_calculate_md5(char *dst, | ||
94 | struct ecryptfs_crypt_stat *crypt_stat, | ||
95 | char *src, int len) | ||
96 | { | ||
97 | int rc = 0; | ||
98 | struct scatterlist sg; | ||
99 | |||
100 | mutex_lock(&crypt_stat->cs_md5_tfm_mutex); | ||
101 | sg_init_one(&sg, (u8 *)src, len); | ||
102 | if (!crypt_stat->md5_tfm) { | ||
103 | crypt_stat->md5_tfm = | ||
104 | crypto_alloc_tfm("md5", CRYPTO_TFM_REQ_MAY_SLEEP); | ||
105 | if (!crypt_stat->md5_tfm) { | ||
106 | rc = -ENOMEM; | ||
107 | ecryptfs_printk(KERN_ERR, "Error attempting to " | ||
108 | "allocate crypto context\n"); | ||
109 | goto out; | ||
110 | } | ||
111 | } | ||
112 | crypto_digest_init(crypt_stat->md5_tfm); | ||
113 | crypto_digest_update(crypt_stat->md5_tfm, &sg, 1); | ||
114 | crypto_digest_final(crypt_stat->md5_tfm, dst); | ||
115 | mutex_unlock(&crypt_stat->cs_md5_tfm_mutex); | ||
116 | out: | ||
117 | return rc; | ||
118 | } | ||
119 | |||
120 | /** | ||
121 | * ecryptfs_derive_iv | ||
122 | * @iv: destination for the derived iv vale | ||
123 | * @crypt_stat: Pointer to crypt_stat struct for the current inode | ||
124 | * @offset: Offset of the page whose's iv we are to derive | ||
125 | * | ||
126 | * Generate the initialization vector from the given root IV and page | ||
127 | * offset. | ||
128 | * | ||
129 | * Returns zero on success; non-zero on error. | ||
130 | */ | ||
131 | static int ecryptfs_derive_iv(char *iv, struct ecryptfs_crypt_stat *crypt_stat, | ||
132 | pgoff_t offset) | ||
133 | { | ||
134 | int rc = 0; | ||
135 | char dst[MD5_DIGEST_SIZE]; | ||
136 | char src[ECRYPTFS_MAX_IV_BYTES + 16]; | ||
137 | |||
138 | if (unlikely(ecryptfs_verbosity > 0)) { | ||
139 | ecryptfs_printk(KERN_DEBUG, "root iv:\n"); | ||
140 | ecryptfs_dump_hex(crypt_stat->root_iv, crypt_stat->iv_bytes); | ||
141 | } | ||
142 | /* TODO: It is probably secure to just cast the least | ||
143 | * significant bits of the root IV into an unsigned long and | ||
144 | * add the offset to that rather than go through all this | ||
145 | * hashing business. -Halcrow */ | ||
146 | memcpy(src, crypt_stat->root_iv, crypt_stat->iv_bytes); | ||
147 | memset((src + crypt_stat->iv_bytes), 0, 16); | ||
148 | snprintf((src + crypt_stat->iv_bytes), 16, "%ld", offset); | ||
149 | if (unlikely(ecryptfs_verbosity > 0)) { | ||
150 | ecryptfs_printk(KERN_DEBUG, "source:\n"); | ||
151 | ecryptfs_dump_hex(src, (crypt_stat->iv_bytes + 16)); | ||
152 | } | ||
153 | rc = ecryptfs_calculate_md5(dst, crypt_stat, src, | ||
154 | (crypt_stat->iv_bytes + 16)); | ||
155 | if (rc) { | ||
156 | ecryptfs_printk(KERN_WARNING, "Error attempting to compute " | ||
157 | "MD5 while generating IV for a page\n"); | ||
158 | goto out; | ||
159 | } | ||
160 | memcpy(iv, dst, crypt_stat->iv_bytes); | ||
161 | if (unlikely(ecryptfs_verbosity > 0)) { | ||
162 | ecryptfs_printk(KERN_DEBUG, "derived iv:\n"); | ||
163 | ecryptfs_dump_hex(iv, crypt_stat->iv_bytes); | ||
164 | } | ||
165 | out: | ||
166 | return rc; | ||
167 | } | ||
168 | |||
169 | /** | ||
170 | * ecryptfs_init_crypt_stat | ||
171 | * @crypt_stat: Pointer to the crypt_stat struct to initialize. | ||
172 | * | ||
173 | * Initialize the crypt_stat structure. | ||
174 | */ | ||
175 | void | ||
176 | ecryptfs_init_crypt_stat(struct ecryptfs_crypt_stat *crypt_stat) | ||
177 | { | ||
178 | memset((void *)crypt_stat, 0, sizeof(struct ecryptfs_crypt_stat)); | ||
179 | mutex_init(&crypt_stat->cs_mutex); | ||
180 | mutex_init(&crypt_stat->cs_tfm_mutex); | ||
181 | mutex_init(&crypt_stat->cs_md5_tfm_mutex); | ||
182 | ECRYPTFS_SET_FLAG(crypt_stat->flags, ECRYPTFS_STRUCT_INITIALIZED); | ||
183 | } | ||
184 | |||
185 | /** | ||
186 | * ecryptfs_destruct_crypt_stat | ||
187 | * @crypt_stat: Pointer to the crypt_stat struct to initialize. | ||
188 | * | ||
189 | * Releases all memory associated with a crypt_stat struct. | ||
190 | */ | ||
191 | void ecryptfs_destruct_crypt_stat(struct ecryptfs_crypt_stat *crypt_stat) | ||
192 | { | ||
193 | if (crypt_stat->tfm) | ||
194 | crypto_free_tfm(crypt_stat->tfm); | ||
195 | if (crypt_stat->md5_tfm) | ||
196 | crypto_free_tfm(crypt_stat->md5_tfm); | ||
197 | memset(crypt_stat, 0, sizeof(struct ecryptfs_crypt_stat)); | ||
198 | } | ||
199 | |||
200 | void ecryptfs_destruct_mount_crypt_stat( | ||
201 | struct ecryptfs_mount_crypt_stat *mount_crypt_stat) | ||
202 | { | ||
203 | if (mount_crypt_stat->global_auth_tok_key) | ||
204 | key_put(mount_crypt_stat->global_auth_tok_key); | ||
205 | if (mount_crypt_stat->global_key_tfm) | ||
206 | crypto_free_tfm(mount_crypt_stat->global_key_tfm); | ||
207 | memset(mount_crypt_stat, 0, sizeof(struct ecryptfs_mount_crypt_stat)); | ||
208 | } | ||
209 | |||
210 | /** | ||
211 | * virt_to_scatterlist | ||
212 | * @addr: Virtual address | ||
213 | * @size: Size of data; should be an even multiple of the block size | ||
214 | * @sg: Pointer to scatterlist array; set to NULL to obtain only | ||
215 | * the number of scatterlist structs required in array | ||
216 | * @sg_size: Max array size | ||
217 | * | ||
218 | * Fills in a scatterlist array with page references for a passed | ||
219 | * virtual address. | ||
220 | * | ||
221 | * Returns the number of scatterlist structs in array used | ||
222 | */ | ||
223 | int virt_to_scatterlist(const void *addr, int size, struct scatterlist *sg, | ||
224 | int sg_size) | ||
225 | { | ||
226 | int i = 0; | ||
227 | struct page *pg; | ||
228 | int offset; | ||
229 | int remainder_of_page; | ||
230 | |||
231 | while (size > 0 && i < sg_size) { | ||
232 | pg = virt_to_page(addr); | ||
233 | offset = offset_in_page(addr); | ||
234 | if (sg) { | ||
235 | sg[i].page = pg; | ||
236 | sg[i].offset = offset; | ||
237 | } | ||
238 | remainder_of_page = PAGE_CACHE_SIZE - offset; | ||
239 | if (size >= remainder_of_page) { | ||
240 | if (sg) | ||
241 | sg[i].length = remainder_of_page; | ||
242 | addr += remainder_of_page; | ||
243 | size -= remainder_of_page; | ||
244 | } else { | ||
245 | if (sg) | ||
246 | sg[i].length = size; | ||
247 | addr += size; | ||
248 | size = 0; | ||
249 | } | ||
250 | i++; | ||
251 | } | ||
252 | if (size > 0) | ||
253 | return -ENOMEM; | ||
254 | return i; | ||
255 | } | ||
256 | |||
257 | /** | ||
258 | * encrypt_scatterlist | ||
259 | * @crypt_stat: Pointer to the crypt_stat struct to initialize. | ||
260 | * @dest_sg: Destination of encrypted data | ||
261 | * @src_sg: Data to be encrypted | ||
262 | * @size: Length of data to be encrypted | ||
263 | * @iv: iv to use during encryption | ||
264 | * | ||
265 | * Returns the number of bytes encrypted; negative value on error | ||
266 | */ | ||
267 | static int encrypt_scatterlist(struct ecryptfs_crypt_stat *crypt_stat, | ||
268 | struct scatterlist *dest_sg, | ||
269 | struct scatterlist *src_sg, int size, | ||
270 | unsigned char *iv) | ||
271 | { | ||
272 | int rc = 0; | ||
273 | |||
274 | BUG_ON(!crypt_stat || !crypt_stat->tfm | ||
275 | || !ECRYPTFS_CHECK_FLAG(crypt_stat->flags, | ||
276 | ECRYPTFS_STRUCT_INITIALIZED)); | ||
277 | if (unlikely(ecryptfs_verbosity > 0)) { | ||
278 | ecryptfs_printk(KERN_DEBUG, "Key size [%d]; key:\n", | ||
279 | crypt_stat->key_size); | ||
280 | ecryptfs_dump_hex(crypt_stat->key, | ||
281 | crypt_stat->key_size); | ||
282 | } | ||
283 | /* Consider doing this once, when the file is opened */ | ||
284 | mutex_lock(&crypt_stat->cs_tfm_mutex); | ||
285 | rc = crypto_cipher_setkey(crypt_stat->tfm, crypt_stat->key, | ||
286 | crypt_stat->key_size); | ||
287 | if (rc) { | ||
288 | ecryptfs_printk(KERN_ERR, "Error setting key; rc = [%d]\n", | ||
289 | rc); | ||
290 | mutex_unlock(&crypt_stat->cs_tfm_mutex); | ||
291 | rc = -EINVAL; | ||
292 | goto out; | ||
293 | } | ||
294 | ecryptfs_printk(KERN_DEBUG, "Encrypting [%d] bytes.\n", size); | ||
295 | crypto_cipher_encrypt_iv(crypt_stat->tfm, dest_sg, src_sg, size, iv); | ||
296 | mutex_unlock(&crypt_stat->cs_tfm_mutex); | ||
297 | out: | ||
298 | return rc; | ||
299 | } | ||
300 | |||
301 | static void | ||
302 | ecryptfs_extent_to_lwr_pg_idx_and_offset(unsigned long *lower_page_idx, | ||
303 | int *byte_offset, | ||
304 | struct ecryptfs_crypt_stat *crypt_stat, | ||
305 | unsigned long extent_num) | ||
306 | { | ||
307 | unsigned long lower_extent_num; | ||
308 | int extents_occupied_by_headers_at_front; | ||
309 | int bytes_occupied_by_headers_at_front; | ||
310 | int extent_offset; | ||
311 | int extents_per_page; | ||
312 | |||
313 | bytes_occupied_by_headers_at_front = | ||
314 | ( crypt_stat->header_extent_size | ||
315 | * crypt_stat->num_header_extents_at_front ); | ||
316 | extents_occupied_by_headers_at_front = | ||
317 | ( bytes_occupied_by_headers_at_front | ||
318 | / crypt_stat->extent_size ); | ||
319 | lower_extent_num = extents_occupied_by_headers_at_front + extent_num; | ||
320 | extents_per_page = PAGE_CACHE_SIZE / crypt_stat->extent_size; | ||
321 | (*lower_page_idx) = lower_extent_num / extents_per_page; | ||
322 | extent_offset = lower_extent_num % extents_per_page; | ||
323 | (*byte_offset) = extent_offset * crypt_stat->extent_size; | ||
324 | ecryptfs_printk(KERN_DEBUG, " * crypt_stat->header_extent_size = " | ||
325 | "[%d]\n", crypt_stat->header_extent_size); | ||
326 | ecryptfs_printk(KERN_DEBUG, " * crypt_stat->" | ||
327 | "num_header_extents_at_front = [%d]\n", | ||
328 | crypt_stat->num_header_extents_at_front); | ||
329 | ecryptfs_printk(KERN_DEBUG, " * extents_occupied_by_headers_at_" | ||
330 | "front = [%d]\n", extents_occupied_by_headers_at_front); | ||
331 | ecryptfs_printk(KERN_DEBUG, " * lower_extent_num = [0x%.16x]\n", | ||
332 | lower_extent_num); | ||
333 | ecryptfs_printk(KERN_DEBUG, " * extents_per_page = [%d]\n", | ||
334 | extents_per_page); | ||
335 | ecryptfs_printk(KERN_DEBUG, " * (*lower_page_idx) = [0x%.16x]\n", | ||
336 | (*lower_page_idx)); | ||
337 | ecryptfs_printk(KERN_DEBUG, " * extent_offset = [%d]\n", | ||
338 | extent_offset); | ||
339 | ecryptfs_printk(KERN_DEBUG, " * (*byte_offset) = [%d]\n", | ||
340 | (*byte_offset)); | ||
341 | } | ||
342 | |||
343 | static int ecryptfs_write_out_page(struct ecryptfs_page_crypt_context *ctx, | ||
344 | struct page *lower_page, | ||
345 | struct inode *lower_inode, | ||
346 | int byte_offset_in_page, int bytes_to_write) | ||
347 | { | ||
348 | int rc = 0; | ||
349 | |||
350 | if (ctx->mode == ECRYPTFS_PREPARE_COMMIT_MODE) { | ||
351 | rc = ecryptfs_commit_lower_page(lower_page, lower_inode, | ||
352 | ctx->param.lower_file, | ||
353 | byte_offset_in_page, | ||
354 | bytes_to_write); | ||
355 | if (rc) { | ||
356 | ecryptfs_printk(KERN_ERR, "Error calling lower " | ||
357 | "commit; rc = [%d]\n", rc); | ||
358 | goto out; | ||
359 | } | ||
360 | } else { | ||
361 | rc = ecryptfs_writepage_and_release_lower_page(lower_page, | ||
362 | lower_inode, | ||
363 | ctx->param.wbc); | ||
364 | if (rc) { | ||
365 | ecryptfs_printk(KERN_ERR, "Error calling lower " | ||
366 | "writepage(); rc = [%d]\n", rc); | ||
367 | goto out; | ||
368 | } | ||
369 | } | ||
370 | out: | ||
371 | return rc; | ||
372 | } | ||
373 | |||
374 | static int ecryptfs_read_in_page(struct ecryptfs_page_crypt_context *ctx, | ||
375 | struct page **lower_page, | ||
376 | struct inode *lower_inode, | ||
377 | unsigned long lower_page_idx, | ||
378 | int byte_offset_in_page) | ||
379 | { | ||
380 | int rc = 0; | ||
381 | |||
382 | if (ctx->mode == ECRYPTFS_PREPARE_COMMIT_MODE) { | ||
383 | /* TODO: Limit this to only the data extents that are | ||
384 | * needed */ | ||
385 | rc = ecryptfs_get_lower_page(lower_page, lower_inode, | ||
386 | ctx->param.lower_file, | ||
387 | lower_page_idx, | ||
388 | byte_offset_in_page, | ||
389 | (PAGE_CACHE_SIZE | ||
390 | - byte_offset_in_page)); | ||
391 | if (rc) { | ||
392 | ecryptfs_printk( | ||
393 | KERN_ERR, "Error attempting to grab, map, " | ||
394 | "and prepare_write lower page with index " | ||
395 | "[0x%.16x]; rc = [%d]\n", lower_page_idx, rc); | ||
396 | goto out; | ||
397 | } | ||
398 | } else { | ||
399 | rc = ecryptfs_grab_and_map_lower_page(lower_page, NULL, | ||
400 | lower_inode, | ||
401 | lower_page_idx); | ||
402 | if (rc) { | ||
403 | ecryptfs_printk( | ||
404 | KERN_ERR, "Error attempting to grab and map " | ||
405 | "lower page with index [0x%.16x]; rc = [%d]\n", | ||
406 | lower_page_idx, rc); | ||
407 | goto out; | ||
408 | } | ||
409 | } | ||
410 | out: | ||
411 | return rc; | ||
412 | } | ||
413 | |||
414 | /** | ||
415 | * ecryptfs_encrypt_page | ||
416 | * @ctx: The context of the page | ||
417 | * | ||
418 | * Encrypt an eCryptfs page. This is done on a per-extent basis. Note | ||
419 | * that eCryptfs pages may straddle the lower pages -- for instance, | ||
420 | * if the file was created on a machine with an 8K page size | ||
421 | * (resulting in an 8K header), and then the file is copied onto a | ||
422 | * host with a 32K page size, then when reading page 0 of the eCryptfs | ||
423 | * file, 24K of page 0 of the lower file will be read and decrypted, | ||
424 | * and then 8K of page 1 of the lower file will be read and decrypted. | ||
425 | * | ||
426 | * The actual operations performed on each page depends on the | ||
427 | * contents of the ecryptfs_page_crypt_context struct. | ||
428 | * | ||
429 | * Returns zero on success; negative on error | ||
430 | */ | ||
431 | int ecryptfs_encrypt_page(struct ecryptfs_page_crypt_context *ctx) | ||
432 | { | ||
433 | char extent_iv[ECRYPTFS_MAX_IV_BYTES]; | ||
434 | unsigned long base_extent; | ||
435 | unsigned long extent_offset = 0; | ||
436 | unsigned long lower_page_idx = 0; | ||
437 | unsigned long prior_lower_page_idx = 0; | ||
438 | struct page *lower_page; | ||
439 | struct inode *lower_inode; | ||
440 | struct ecryptfs_inode_info *inode_info; | ||
441 | struct ecryptfs_crypt_stat *crypt_stat; | ||
442 | int rc = 0; | ||
443 | int lower_byte_offset = 0; | ||
444 | int orig_byte_offset = 0; | ||
445 | int num_extents_per_page; | ||
446 | #define ECRYPTFS_PAGE_STATE_UNREAD 0 | ||
447 | #define ECRYPTFS_PAGE_STATE_READ 1 | ||
448 | #define ECRYPTFS_PAGE_STATE_MODIFIED 2 | ||
449 | #define ECRYPTFS_PAGE_STATE_WRITTEN 3 | ||
450 | int page_state; | ||
451 | |||
452 | lower_inode = ecryptfs_inode_to_lower(ctx->page->mapping->host); | ||
453 | inode_info = ecryptfs_inode_to_private(ctx->page->mapping->host); | ||
454 | crypt_stat = &inode_info->crypt_stat; | ||
455 | if (!ECRYPTFS_CHECK_FLAG(crypt_stat->flags, ECRYPTFS_ENCRYPTED)) { | ||
456 | rc = ecryptfs_copy_page_to_lower(ctx->page, lower_inode, | ||
457 | ctx->param.lower_file); | ||
458 | if (rc) | ||
459 | ecryptfs_printk(KERN_ERR, "Error attempting to copy " | ||
460 | "page at index [0x%.16x]\n", | ||
461 | ctx->page->index); | ||
462 | goto out; | ||
463 | } | ||
464 | num_extents_per_page = PAGE_CACHE_SIZE / crypt_stat->extent_size; | ||
465 | base_extent = (ctx->page->index * num_extents_per_page); | ||
466 | page_state = ECRYPTFS_PAGE_STATE_UNREAD; | ||
467 | while (extent_offset < num_extents_per_page) { | ||
468 | ecryptfs_extent_to_lwr_pg_idx_and_offset( | ||
469 | &lower_page_idx, &lower_byte_offset, crypt_stat, | ||
470 | (base_extent + extent_offset)); | ||
471 | if (prior_lower_page_idx != lower_page_idx | ||
472 | && page_state == ECRYPTFS_PAGE_STATE_MODIFIED) { | ||
473 | rc = ecryptfs_write_out_page(ctx, lower_page, | ||
474 | lower_inode, | ||
475 | orig_byte_offset, | ||
476 | (PAGE_CACHE_SIZE | ||
477 | - orig_byte_offset)); | ||
478 | if (rc) { | ||
479 | ecryptfs_printk(KERN_ERR, "Error attempting " | ||
480 | "to write out page; rc = [%d]" | ||
481 | "\n", rc); | ||
482 | goto out; | ||
483 | } | ||
484 | page_state = ECRYPTFS_PAGE_STATE_WRITTEN; | ||
485 | } | ||
486 | if (page_state == ECRYPTFS_PAGE_STATE_UNREAD | ||
487 | || page_state == ECRYPTFS_PAGE_STATE_WRITTEN) { | ||
488 | rc = ecryptfs_read_in_page(ctx, &lower_page, | ||
489 | lower_inode, lower_page_idx, | ||
490 | lower_byte_offset); | ||
491 | if (rc) { | ||
492 | ecryptfs_printk(KERN_ERR, "Error attempting " | ||
493 | "to read in lower page with " | ||
494 | "index [0x%.16x]; rc = [%d]\n", | ||
495 | lower_page_idx, rc); | ||
496 | goto out; | ||
497 | } | ||
498 | orig_byte_offset = lower_byte_offset; | ||
499 | prior_lower_page_idx = lower_page_idx; | ||
500 | page_state = ECRYPTFS_PAGE_STATE_READ; | ||
501 | } | ||
502 | BUG_ON(!(page_state == ECRYPTFS_PAGE_STATE_MODIFIED | ||
503 | || page_state == ECRYPTFS_PAGE_STATE_READ)); | ||
504 | rc = ecryptfs_derive_iv(extent_iv, crypt_stat, | ||
505 | (base_extent + extent_offset)); | ||
506 | if (rc) { | ||
507 | ecryptfs_printk(KERN_ERR, "Error attempting to " | ||
508 | "derive IV for extent [0x%.16x]; " | ||
509 | "rc = [%d]\n", | ||
510 | (base_extent + extent_offset), rc); | ||
511 | goto out; | ||
512 | } | ||
513 | if (unlikely(ecryptfs_verbosity > 0)) { | ||
514 | ecryptfs_printk(KERN_DEBUG, "Encrypting extent " | ||
515 | "with iv:\n"); | ||
516 | ecryptfs_dump_hex(extent_iv, crypt_stat->iv_bytes); | ||
517 | ecryptfs_printk(KERN_DEBUG, "First 8 bytes before " | ||
518 | "encryption:\n"); | ||
519 | ecryptfs_dump_hex((char *) | ||
520 | (page_address(ctx->page) | ||
521 | + (extent_offset | ||
522 | * crypt_stat->extent_size)), 8); | ||
523 | } | ||
524 | rc = ecryptfs_encrypt_page_offset( | ||
525 | crypt_stat, lower_page, lower_byte_offset, ctx->page, | ||
526 | (extent_offset * crypt_stat->extent_size), | ||
527 | crypt_stat->extent_size, extent_iv); | ||
528 | ecryptfs_printk(KERN_DEBUG, "Encrypt extent [0x%.16x]; " | ||
529 | "rc = [%d]\n", | ||
530 | (base_extent + extent_offset), rc); | ||
531 | if (unlikely(ecryptfs_verbosity > 0)) { | ||
532 | ecryptfs_printk(KERN_DEBUG, "First 8 bytes after " | ||
533 | "encryption:\n"); | ||
534 | ecryptfs_dump_hex((char *)(page_address(lower_page) | ||
535 | + lower_byte_offset), 8); | ||
536 | } | ||
537 | page_state = ECRYPTFS_PAGE_STATE_MODIFIED; | ||
538 | extent_offset++; | ||
539 | } | ||
540 | BUG_ON(orig_byte_offset != 0); | ||
541 | rc = ecryptfs_write_out_page(ctx, lower_page, lower_inode, 0, | ||
542 | (lower_byte_offset | ||
543 | + crypt_stat->extent_size)); | ||
544 | if (rc) { | ||
545 | ecryptfs_printk(KERN_ERR, "Error attempting to write out " | ||
546 | "page; rc = [%d]\n", rc); | ||
547 | goto out; | ||
548 | } | ||
549 | out: | ||
550 | return rc; | ||
551 | } | ||
552 | |||
553 | /** | ||
554 | * ecryptfs_decrypt_page | ||
555 | * @file: The ecryptfs file | ||
556 | * @page: The page in ecryptfs to decrypt | ||
557 | * | ||
558 | * Decrypt an eCryptfs page. This is done on a per-extent basis. Note | ||
559 | * that eCryptfs pages may straddle the lower pages -- for instance, | ||
560 | * if the file was created on a machine with an 8K page size | ||
561 | * (resulting in an 8K header), and then the file is copied onto a | ||
562 | * host with a 32K page size, then when reading page 0 of the eCryptfs | ||
563 | * file, 24K of page 0 of the lower file will be read and decrypted, | ||
564 | * and then 8K of page 1 of the lower file will be read and decrypted. | ||
565 | * | ||
566 | * Returns zero on success; negative on error | ||
567 | */ | ||
568 | int ecryptfs_decrypt_page(struct file *file, struct page *page) | ||
569 | { | ||
570 | char extent_iv[ECRYPTFS_MAX_IV_BYTES]; | ||
571 | unsigned long base_extent; | ||
572 | unsigned long extent_offset = 0; | ||
573 | unsigned long lower_page_idx = 0; | ||
574 | unsigned long prior_lower_page_idx = 0; | ||
575 | struct page *lower_page; | ||
576 | char *lower_page_virt = NULL; | ||
577 | struct inode *lower_inode; | ||
578 | struct ecryptfs_crypt_stat *crypt_stat; | ||
579 | int rc = 0; | ||
580 | int byte_offset; | ||
581 | int num_extents_per_page; | ||
582 | int page_state; | ||
583 | |||
584 | crypt_stat = &(ecryptfs_inode_to_private( | ||
585 | page->mapping->host)->crypt_stat); | ||
586 | lower_inode = ecryptfs_inode_to_lower(page->mapping->host); | ||
587 | if (!ECRYPTFS_CHECK_FLAG(crypt_stat->flags, ECRYPTFS_ENCRYPTED)) { | ||
588 | rc = ecryptfs_do_readpage(file, page, page->index); | ||
589 | if (rc) | ||
590 | ecryptfs_printk(KERN_ERR, "Error attempting to copy " | ||
591 | "page at index [0x%.16x]\n", | ||
592 | page->index); | ||
593 | goto out; | ||
594 | } | ||
595 | num_extents_per_page = PAGE_CACHE_SIZE / crypt_stat->extent_size; | ||
596 | base_extent = (page->index * num_extents_per_page); | ||
597 | lower_page_virt = kmem_cache_alloc(ecryptfs_lower_page_cache, | ||
598 | SLAB_KERNEL); | ||
599 | if (!lower_page_virt) { | ||
600 | rc = -ENOMEM; | ||
601 | ecryptfs_printk(KERN_ERR, "Error getting page for encrypted " | ||
602 | "lower page(s)\n"); | ||
603 | goto out; | ||
604 | } | ||
605 | lower_page = virt_to_page(lower_page_virt); | ||
606 | page_state = ECRYPTFS_PAGE_STATE_UNREAD; | ||
607 | while (extent_offset < num_extents_per_page) { | ||
608 | ecryptfs_extent_to_lwr_pg_idx_and_offset( | ||
609 | &lower_page_idx, &byte_offset, crypt_stat, | ||
610 | (base_extent + extent_offset)); | ||
611 | if (prior_lower_page_idx != lower_page_idx | ||
612 | || page_state == ECRYPTFS_PAGE_STATE_UNREAD) { | ||
613 | rc = ecryptfs_do_readpage(file, lower_page, | ||
614 | lower_page_idx); | ||
615 | if (rc) { | ||
616 | ecryptfs_printk(KERN_ERR, "Error reading " | ||
617 | "lower encrypted page; rc = " | ||
618 | "[%d]\n", rc); | ||
619 | goto out; | ||
620 | } | ||
621 | prior_lower_page_idx = lower_page_idx; | ||
622 | page_state = ECRYPTFS_PAGE_STATE_READ; | ||
623 | } | ||
624 | rc = ecryptfs_derive_iv(extent_iv, crypt_stat, | ||
625 | (base_extent + extent_offset)); | ||
626 | if (rc) { | ||
627 | ecryptfs_printk(KERN_ERR, "Error attempting to " | ||
628 | "derive IV for extent [0x%.16x]; rc = " | ||
629 | "[%d]\n", | ||
630 | (base_extent + extent_offset), rc); | ||
631 | goto out; | ||
632 | } | ||
633 | if (unlikely(ecryptfs_verbosity > 0)) { | ||
634 | ecryptfs_printk(KERN_DEBUG, "Decrypting extent " | ||
635 | "with iv:\n"); | ||
636 | ecryptfs_dump_hex(extent_iv, crypt_stat->iv_bytes); | ||
637 | ecryptfs_printk(KERN_DEBUG, "First 8 bytes before " | ||
638 | "decryption:\n"); | ||
639 | ecryptfs_dump_hex((lower_page_virt + byte_offset), 8); | ||
640 | } | ||
641 | rc = ecryptfs_decrypt_page_offset(crypt_stat, page, | ||
642 | (extent_offset | ||
643 | * crypt_stat->extent_size), | ||
644 | lower_page, byte_offset, | ||
645 | crypt_stat->extent_size, | ||
646 | extent_iv); | ||
647 | if (rc != crypt_stat->extent_size) { | ||
648 | ecryptfs_printk(KERN_ERR, "Error attempting to " | ||
649 | "decrypt extent [0x%.16x]\n", | ||
650 | (base_extent + extent_offset)); | ||
651 | goto out; | ||
652 | } | ||
653 | rc = 0; | ||
654 | if (unlikely(ecryptfs_verbosity > 0)) { | ||
655 | ecryptfs_printk(KERN_DEBUG, "First 8 bytes after " | ||
656 | "decryption:\n"); | ||
657 | ecryptfs_dump_hex((char *)(page_address(page) | ||
658 | + byte_offset), 8); | ||
659 | } | ||
660 | extent_offset++; | ||
661 | } | ||
662 | out: | ||
663 | if (lower_page_virt) | ||
664 | kmem_cache_free(ecryptfs_lower_page_cache, lower_page_virt); | ||
665 | return rc; | ||
666 | } | ||
667 | |||
668 | /** | ||
669 | * decrypt_scatterlist | ||
670 | * | ||
671 | * Returns the number of bytes decrypted; negative value on error | ||
672 | */ | ||
673 | static int decrypt_scatterlist(struct ecryptfs_crypt_stat *crypt_stat, | ||
674 | struct scatterlist *dest_sg, | ||
675 | struct scatterlist *src_sg, int size, | ||
676 | unsigned char *iv) | ||
677 | { | ||
678 | int rc = 0; | ||
679 | |||
680 | /* Consider doing this once, when the file is opened */ | ||
681 | mutex_lock(&crypt_stat->cs_tfm_mutex); | ||
682 | rc = crypto_cipher_setkey(crypt_stat->tfm, crypt_stat->key, | ||
683 | crypt_stat->key_size); | ||
684 | if (rc) { | ||
685 | ecryptfs_printk(KERN_ERR, "Error setting key; rc = [%d]\n", | ||
686 | rc); | ||
687 | mutex_unlock(&crypt_stat->cs_tfm_mutex); | ||
688 | rc = -EINVAL; | ||
689 | goto out; | ||
690 | } | ||
691 | ecryptfs_printk(KERN_DEBUG, "Decrypting [%d] bytes.\n", size); | ||
692 | rc = crypto_cipher_decrypt_iv(crypt_stat->tfm, dest_sg, src_sg, size, | ||
693 | iv); | ||
694 | mutex_unlock(&crypt_stat->cs_tfm_mutex); | ||
695 | if (rc) { | ||
696 | ecryptfs_printk(KERN_ERR, "Error decrypting; rc = [%d]\n", | ||
697 | rc); | ||
698 | goto out; | ||
699 | } | ||
700 | rc = size; | ||
701 | out: | ||
702 | return rc; | ||
703 | } | ||
704 | |||
705 | /** | ||
706 | * ecryptfs_encrypt_page_offset | ||
707 | * | ||
708 | * Returns the number of bytes encrypted | ||
709 | */ | ||
710 | static int | ||
711 | ecryptfs_encrypt_page_offset(struct ecryptfs_crypt_stat *crypt_stat, | ||
712 | struct page *dst_page, int dst_offset, | ||
713 | struct page *src_page, int src_offset, int size, | ||
714 | unsigned char *iv) | ||
715 | { | ||
716 | struct scatterlist src_sg, dst_sg; | ||
717 | |||
718 | src_sg.page = src_page; | ||
719 | src_sg.offset = src_offset; | ||
720 | src_sg.length = size; | ||
721 | dst_sg.page = dst_page; | ||
722 | dst_sg.offset = dst_offset; | ||
723 | dst_sg.length = size; | ||
724 | return encrypt_scatterlist(crypt_stat, &dst_sg, &src_sg, size, iv); | ||
725 | } | ||
726 | |||
727 | /** | ||
728 | * ecryptfs_decrypt_page_offset | ||
729 | * | ||
730 | * Returns the number of bytes decrypted | ||
731 | */ | ||
732 | static int | ||
733 | ecryptfs_decrypt_page_offset(struct ecryptfs_crypt_stat *crypt_stat, | ||
734 | struct page *dst_page, int dst_offset, | ||
735 | struct page *src_page, int src_offset, int size, | ||
736 | unsigned char *iv) | ||
737 | { | ||
738 | struct scatterlist src_sg, dst_sg; | ||
739 | |||
740 | src_sg.page = src_page; | ||
741 | src_sg.offset = src_offset; | ||
742 | src_sg.length = size; | ||
743 | dst_sg.page = dst_page; | ||
744 | dst_sg.offset = dst_offset; | ||
745 | dst_sg.length = size; | ||
746 | return decrypt_scatterlist(crypt_stat, &dst_sg, &src_sg, size, iv); | ||
747 | } | ||
748 | |||
749 | #define ECRYPTFS_MAX_SCATTERLIST_LEN 4 | ||
750 | |||
751 | /** | ||
752 | * ecryptfs_init_crypt_ctx | ||
753 | * @crypt_stat: Uninitilized crypt stats structure | ||
754 | * | ||
755 | * Initialize the crypto context. | ||
756 | * | ||
757 | * TODO: Performance: Keep a cache of initialized cipher contexts; | ||
758 | * only init if needed | ||
759 | */ | ||
760 | int ecryptfs_init_crypt_ctx(struct ecryptfs_crypt_stat *crypt_stat) | ||
761 | { | ||
762 | int rc = -EINVAL; | ||
763 | |||
764 | if (!crypt_stat->cipher) { | ||
765 | ecryptfs_printk(KERN_ERR, "No cipher specified\n"); | ||
766 | goto out; | ||
767 | } | ||
768 | ecryptfs_printk(KERN_DEBUG, | ||
769 | "Initializing cipher [%s]; strlen = [%d]; " | ||
770 | "key_size_bits = [%d]\n", | ||
771 | crypt_stat->cipher, (int)strlen(crypt_stat->cipher), | ||
772 | crypt_stat->key_size << 3); | ||
773 | if (crypt_stat->tfm) { | ||
774 | rc = 0; | ||
775 | goto out; | ||
776 | } | ||
777 | mutex_lock(&crypt_stat->cs_tfm_mutex); | ||
778 | crypt_stat->tfm = crypto_alloc_tfm(crypt_stat->cipher, | ||
779 | ECRYPTFS_DEFAULT_CHAINING_MODE | ||
780 | | CRYPTO_TFM_REQ_WEAK_KEY); | ||
781 | mutex_unlock(&crypt_stat->cs_tfm_mutex); | ||
782 | if (!crypt_stat->tfm) { | ||
783 | ecryptfs_printk(KERN_ERR, "cryptfs: init_crypt_ctx(): " | ||
784 | "Error initializing cipher [%s]\n", | ||
785 | crypt_stat->cipher); | ||
786 | goto out; | ||
787 | } | ||
788 | rc = 0; | ||
789 | out: | ||
790 | return rc; | ||
791 | } | ||
792 | |||
793 | static void set_extent_mask_and_shift(struct ecryptfs_crypt_stat *crypt_stat) | ||
794 | { | ||
795 | int extent_size_tmp; | ||
796 | |||
797 | crypt_stat->extent_mask = 0xFFFFFFFF; | ||
798 | crypt_stat->extent_shift = 0; | ||
799 | if (crypt_stat->extent_size == 0) | ||
800 | return; | ||
801 | extent_size_tmp = crypt_stat->extent_size; | ||
802 | while ((extent_size_tmp & 0x01) == 0) { | ||
803 | extent_size_tmp >>= 1; | ||
804 | crypt_stat->extent_mask <<= 1; | ||
805 | crypt_stat->extent_shift++; | ||
806 | } | ||
807 | } | ||
808 | |||
809 | void ecryptfs_set_default_sizes(struct ecryptfs_crypt_stat *crypt_stat) | ||
810 | { | ||
811 | /* Default values; may be overwritten as we are parsing the | ||
812 | * packets. */ | ||
813 | crypt_stat->extent_size = ECRYPTFS_DEFAULT_EXTENT_SIZE; | ||
814 | set_extent_mask_and_shift(crypt_stat); | ||
815 | crypt_stat->iv_bytes = ECRYPTFS_DEFAULT_IV_BYTES; | ||
816 | if (PAGE_CACHE_SIZE <= ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE) { | ||
817 | crypt_stat->header_extent_size = | ||
818 | ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE; | ||
819 | } else | ||
820 | crypt_stat->header_extent_size = PAGE_CACHE_SIZE; | ||
821 | crypt_stat->num_header_extents_at_front = 1; | ||
822 | } | ||
823 | |||
824 | /** | ||
825 | * ecryptfs_compute_root_iv | ||
826 | * @crypt_stats | ||
827 | * | ||
828 | * On error, sets the root IV to all 0's. | ||
829 | */ | ||
830 | int ecryptfs_compute_root_iv(struct ecryptfs_crypt_stat *crypt_stat) | ||
831 | { | ||
832 | int rc = 0; | ||
833 | char dst[MD5_DIGEST_SIZE]; | ||
834 | |||
835 | BUG_ON(crypt_stat->iv_bytes > MD5_DIGEST_SIZE); | ||
836 | BUG_ON(crypt_stat->iv_bytes <= 0); | ||
837 | if (!ECRYPTFS_CHECK_FLAG(crypt_stat->flags, ECRYPTFS_KEY_VALID)) { | ||
838 | rc = -EINVAL; | ||
839 | ecryptfs_printk(KERN_WARNING, "Session key not valid; " | ||
840 | "cannot generate root IV\n"); | ||
841 | goto out; | ||
842 | } | ||
843 | rc = ecryptfs_calculate_md5(dst, crypt_stat, crypt_stat->key, | ||
844 | crypt_stat->key_size); | ||
845 | if (rc) { | ||
846 | ecryptfs_printk(KERN_WARNING, "Error attempting to compute " | ||
847 | "MD5 while generating root IV\n"); | ||
848 | goto out; | ||
849 | } | ||
850 | memcpy(crypt_stat->root_iv, dst, crypt_stat->iv_bytes); | ||
851 | out: | ||
852 | if (rc) { | ||
853 | memset(crypt_stat->root_iv, 0, crypt_stat->iv_bytes); | ||
854 | ECRYPTFS_SET_FLAG(crypt_stat->flags, | ||
855 | ECRYPTFS_SECURITY_WARNING); | ||
856 | } | ||
857 | return rc; | ||
858 | } | ||
859 | |||
860 | static void ecryptfs_generate_new_key(struct ecryptfs_crypt_stat *crypt_stat) | ||
861 | { | ||
862 | get_random_bytes(crypt_stat->key, crypt_stat->key_size); | ||
863 | ECRYPTFS_SET_FLAG(crypt_stat->flags, ECRYPTFS_KEY_VALID); | ||
864 | ecryptfs_compute_root_iv(crypt_stat); | ||
865 | if (unlikely(ecryptfs_verbosity > 0)) { | ||
866 | ecryptfs_printk(KERN_DEBUG, "Generated new session key:\n"); | ||
867 | ecryptfs_dump_hex(crypt_stat->key, | ||
868 | crypt_stat->key_size); | ||
869 | } | ||
870 | } | ||
871 | |||
872 | /** | ||
873 | * ecryptfs_set_default_crypt_stat_vals | ||
874 | * @crypt_stat | ||
875 | * | ||
876 | * Default values in the event that policy does not override them. | ||
877 | */ | ||
878 | static void ecryptfs_set_default_crypt_stat_vals( | ||
879 | struct ecryptfs_crypt_stat *crypt_stat, | ||
880 | struct ecryptfs_mount_crypt_stat *mount_crypt_stat) | ||
881 | { | ||
882 | ecryptfs_set_default_sizes(crypt_stat); | ||
883 | strcpy(crypt_stat->cipher, ECRYPTFS_DEFAULT_CIPHER); | ||
884 | crypt_stat->key_size = ECRYPTFS_DEFAULT_KEY_BYTES; | ||
885 | ECRYPTFS_CLEAR_FLAG(crypt_stat->flags, ECRYPTFS_KEY_VALID); | ||
886 | crypt_stat->file_version = ECRYPTFS_FILE_VERSION; | ||
887 | crypt_stat->mount_crypt_stat = mount_crypt_stat; | ||
888 | } | ||
889 | |||
890 | /** | ||
891 | * ecryptfs_new_file_context | ||
892 | * @ecryptfs_dentry | ||
893 | * | ||
894 | * If the crypto context for the file has not yet been established, | ||
895 | * this is where we do that. Establishing a new crypto context | ||
896 | * involves the following decisions: | ||
897 | * - What cipher to use? | ||
898 | * - What set of authentication tokens to use? | ||
899 | * Here we just worry about getting enough information into the | ||
900 | * authentication tokens so that we know that they are available. | ||
901 | * We associate the available authentication tokens with the new file | ||
902 | * via the set of signatures in the crypt_stat struct. Later, when | ||
903 | * the headers are actually written out, we may again defer to | ||
904 | * userspace to perform the encryption of the session key; for the | ||
905 | * foreseeable future, this will be the case with public key packets. | ||
906 | * | ||
907 | * Returns zero on success; non-zero otherwise | ||
908 | */ | ||
909 | /* Associate an authentication token(s) with the file */ | ||
910 | int ecryptfs_new_file_context(struct dentry *ecryptfs_dentry) | ||
911 | { | ||
912 | int rc = 0; | ||
913 | struct ecryptfs_crypt_stat *crypt_stat = | ||
914 | &ecryptfs_inode_to_private(ecryptfs_dentry->d_inode)->crypt_stat; | ||
915 | struct ecryptfs_mount_crypt_stat *mount_crypt_stat = | ||
916 | &ecryptfs_superblock_to_private( | ||
917 | ecryptfs_dentry->d_sb)->mount_crypt_stat; | ||
918 | int cipher_name_len; | ||
919 | |||
920 | ecryptfs_set_default_crypt_stat_vals(crypt_stat, mount_crypt_stat); | ||
921 | /* See if there are mount crypt options */ | ||
922 | if (mount_crypt_stat->global_auth_tok) { | ||
923 | ecryptfs_printk(KERN_DEBUG, "Initializing context for new " | ||
924 | "file using mount_crypt_stat\n"); | ||
925 | ECRYPTFS_SET_FLAG(crypt_stat->flags, ECRYPTFS_ENCRYPTED); | ||
926 | ECRYPTFS_SET_FLAG(crypt_stat->flags, ECRYPTFS_KEY_VALID); | ||
927 | memcpy(crypt_stat->keysigs[crypt_stat->num_keysigs++], | ||
928 | mount_crypt_stat->global_auth_tok_sig, | ||
929 | ECRYPTFS_SIG_SIZE_HEX); | ||
930 | cipher_name_len = | ||
931 | strlen(mount_crypt_stat->global_default_cipher_name); | ||
932 | memcpy(crypt_stat->cipher, | ||
933 | mount_crypt_stat->global_default_cipher_name, | ||
934 | cipher_name_len); | ||
935 | crypt_stat->cipher[cipher_name_len] = '\0'; | ||
936 | crypt_stat->key_size = | ||
937 | mount_crypt_stat->global_default_cipher_key_size; | ||
938 | ecryptfs_generate_new_key(crypt_stat); | ||
939 | } else | ||
940 | /* We should not encounter this scenario since we | ||
941 | * should detect lack of global_auth_tok at mount time | ||
942 | * TODO: Applies to 0.1 release only; remove in future | ||
943 | * release */ | ||
944 | BUG(); | ||
945 | rc = ecryptfs_init_crypt_ctx(crypt_stat); | ||
946 | if (rc) | ||
947 | ecryptfs_printk(KERN_ERR, "Error initializing cryptographic " | ||
948 | "context for cipher [%s]: rc = [%d]\n", | ||
949 | crypt_stat->cipher, rc); | ||
950 | return rc; | ||
951 | } | ||
952 | |||
953 | /** | ||
954 | * contains_ecryptfs_marker - check for the ecryptfs marker | ||
955 | * @data: The data block in which to check | ||
956 | * | ||
957 | * Returns one if marker found; zero if not found | ||
958 | */ | ||
959 | int contains_ecryptfs_marker(char *data) | ||
960 | { | ||
961 | u32 m_1, m_2; | ||
962 | |||
963 | memcpy(&m_1, data, 4); | ||
964 | m_1 = be32_to_cpu(m_1); | ||
965 | memcpy(&m_2, (data + 4), 4); | ||
966 | m_2 = be32_to_cpu(m_2); | ||
967 | if ((m_1 ^ MAGIC_ECRYPTFS_MARKER) == m_2) | ||
968 | return 1; | ||
969 | ecryptfs_printk(KERN_DEBUG, "m_1 = [0x%.8x]; m_2 = [0x%.8x]; " | ||
970 | "MAGIC_ECRYPTFS_MARKER = [0x%.8x]\n", m_1, m_2, | ||
971 | MAGIC_ECRYPTFS_MARKER); | ||
972 | ecryptfs_printk(KERN_DEBUG, "(m_1 ^ MAGIC_ECRYPTFS_MARKER) = " | ||
973 | "[0x%.8x]\n", (m_1 ^ MAGIC_ECRYPTFS_MARKER)); | ||
974 | return 0; | ||
975 | } | ||
976 | |||
977 | struct ecryptfs_flag_map_elem { | ||
978 | u32 file_flag; | ||
979 | u32 local_flag; | ||
980 | }; | ||
981 | |||
982 | /* Add support for additional flags by adding elements here. */ | ||
983 | static struct ecryptfs_flag_map_elem ecryptfs_flag_map[] = { | ||
984 | {0x00000001, ECRYPTFS_ENABLE_HMAC}, | ||
985 | {0x00000002, ECRYPTFS_ENCRYPTED} | ||
986 | }; | ||
987 | |||
988 | /** | ||
989 | * ecryptfs_process_flags | ||
990 | * @crypt_stat | ||
991 | * @page_virt: Source data to be parsed | ||
992 | * @bytes_read: Updated with the number of bytes read | ||
993 | * | ||
994 | * Returns zero on success; non-zero if the flag set is invalid | ||
995 | */ | ||
996 | static int ecryptfs_process_flags(struct ecryptfs_crypt_stat *crypt_stat, | ||
997 | char *page_virt, int *bytes_read) | ||
998 | { | ||
999 | int rc = 0; | ||
1000 | int i; | ||
1001 | u32 flags; | ||
1002 | |||
1003 | memcpy(&flags, page_virt, 4); | ||
1004 | flags = be32_to_cpu(flags); | ||
1005 | for (i = 0; i < ((sizeof(ecryptfs_flag_map) | ||
1006 | / sizeof(struct ecryptfs_flag_map_elem))); i++) | ||
1007 | if (flags & ecryptfs_flag_map[i].file_flag) { | ||
1008 | ECRYPTFS_SET_FLAG(crypt_stat->flags, | ||
1009 | ecryptfs_flag_map[i].local_flag); | ||
1010 | } else | ||
1011 | ECRYPTFS_CLEAR_FLAG(crypt_stat->flags, | ||
1012 | ecryptfs_flag_map[i].local_flag); | ||
1013 | /* Version is in top 8 bits of the 32-bit flag vector */ | ||
1014 | crypt_stat->file_version = ((flags >> 24) & 0xFF); | ||
1015 | (*bytes_read) = 4; | ||
1016 | return rc; | ||
1017 | } | ||
1018 | |||
1019 | /** | ||
1020 | * write_ecryptfs_marker | ||
1021 | * @page_virt: The pointer to in a page to begin writing the marker | ||
1022 | * @written: Number of bytes written | ||
1023 | * | ||
1024 | * Marker = 0x3c81b7f5 | ||
1025 | */ | ||
1026 | static void write_ecryptfs_marker(char *page_virt, size_t *written) | ||
1027 | { | ||
1028 | u32 m_1, m_2; | ||
1029 | |||
1030 | get_random_bytes(&m_1, (MAGIC_ECRYPTFS_MARKER_SIZE_BYTES / 2)); | ||
1031 | m_2 = (m_1 ^ MAGIC_ECRYPTFS_MARKER); | ||
1032 | m_1 = cpu_to_be32(m_1); | ||
1033 | memcpy(page_virt, &m_1, (MAGIC_ECRYPTFS_MARKER_SIZE_BYTES / 2)); | ||
1034 | m_2 = cpu_to_be32(m_2); | ||
1035 | memcpy(page_virt + (MAGIC_ECRYPTFS_MARKER_SIZE_BYTES / 2), &m_2, | ||
1036 | (MAGIC_ECRYPTFS_MARKER_SIZE_BYTES / 2)); | ||
1037 | (*written) = MAGIC_ECRYPTFS_MARKER_SIZE_BYTES; | ||
1038 | } | ||
1039 | |||
1040 | static void | ||
1041 | write_ecryptfs_flags(char *page_virt, struct ecryptfs_crypt_stat *crypt_stat, | ||
1042 | size_t *written) | ||
1043 | { | ||
1044 | u32 flags = 0; | ||
1045 | int i; | ||
1046 | |||
1047 | for (i = 0; i < ((sizeof(ecryptfs_flag_map) | ||
1048 | / sizeof(struct ecryptfs_flag_map_elem))); i++) | ||
1049 | if (ECRYPTFS_CHECK_FLAG(crypt_stat->flags, | ||
1050 | ecryptfs_flag_map[i].local_flag)) | ||
1051 | flags |= ecryptfs_flag_map[i].file_flag; | ||
1052 | /* Version is in top 8 bits of the 32-bit flag vector */ | ||
1053 | flags |= ((((u8)crypt_stat->file_version) << 24) & 0xFF000000); | ||
1054 | flags = cpu_to_be32(flags); | ||
1055 | memcpy(page_virt, &flags, 4); | ||
1056 | (*written) = 4; | ||
1057 | } | ||
1058 | |||
1059 | struct ecryptfs_cipher_code_str_map_elem { | ||
1060 | char cipher_str[16]; | ||
1061 | u16 cipher_code; | ||
1062 | }; | ||
1063 | |||
1064 | /* Add support for additional ciphers by adding elements here. The | ||
1065 | * cipher_code is whatever OpenPGP applicatoins use to identify the | ||
1066 | * ciphers. List in order of probability. */ | ||
1067 | static struct ecryptfs_cipher_code_str_map_elem | ||
1068 | ecryptfs_cipher_code_str_map[] = { | ||
1069 | {"aes",RFC2440_CIPHER_AES_128 }, | ||
1070 | {"blowfish", RFC2440_CIPHER_BLOWFISH}, | ||
1071 | {"des3_ede", RFC2440_CIPHER_DES3_EDE}, | ||
1072 | {"cast5", RFC2440_CIPHER_CAST_5}, | ||
1073 | {"twofish", RFC2440_CIPHER_TWOFISH}, | ||
1074 | {"cast6", RFC2440_CIPHER_CAST_6}, | ||
1075 | {"aes", RFC2440_CIPHER_AES_192}, | ||
1076 | {"aes", RFC2440_CIPHER_AES_256} | ||
1077 | }; | ||
1078 | |||
1079 | /** | ||
1080 | * ecryptfs_code_for_cipher_string | ||
1081 | * @str: The string representing the cipher name | ||
1082 | * | ||
1083 | * Returns zero on no match, or the cipher code on match | ||
1084 | */ | ||
1085 | u16 ecryptfs_code_for_cipher_string(struct ecryptfs_crypt_stat *crypt_stat) | ||
1086 | { | ||
1087 | int i; | ||
1088 | u16 code = 0; | ||
1089 | struct ecryptfs_cipher_code_str_map_elem *map = | ||
1090 | ecryptfs_cipher_code_str_map; | ||
1091 | |||
1092 | if (strcmp(crypt_stat->cipher, "aes") == 0) { | ||
1093 | switch (crypt_stat->key_size) { | ||
1094 | case 16: | ||
1095 | code = RFC2440_CIPHER_AES_128; | ||
1096 | break; | ||
1097 | case 24: | ||
1098 | code = RFC2440_CIPHER_AES_192; | ||
1099 | break; | ||
1100 | case 32: | ||
1101 | code = RFC2440_CIPHER_AES_256; | ||
1102 | } | ||
1103 | } else { | ||
1104 | for (i = 0; i < ARRAY_SIZE(ecryptfs_cipher_code_str_map); i++) | ||
1105 | if (strcmp(crypt_stat->cipher, map[i].cipher_str) == 0){ | ||
1106 | code = map[i].cipher_code; | ||
1107 | break; | ||
1108 | } | ||
1109 | } | ||
1110 | return code; | ||
1111 | } | ||
1112 | |||
1113 | /** | ||
1114 | * ecryptfs_cipher_code_to_string | ||
1115 | * @str: Destination to write out the cipher name | ||
1116 | * @cipher_code: The code to convert to cipher name string | ||
1117 | * | ||
1118 | * Returns zero on success | ||
1119 | */ | ||
1120 | int ecryptfs_cipher_code_to_string(char *str, u16 cipher_code) | ||
1121 | { | ||
1122 | int rc = 0; | ||
1123 | int i; | ||
1124 | |||
1125 | str[0] = '\0'; | ||
1126 | for (i = 0; i < ARRAY_SIZE(ecryptfs_cipher_code_str_map); i++) | ||
1127 | if (cipher_code == ecryptfs_cipher_code_str_map[i].cipher_code) | ||
1128 | strcpy(str, ecryptfs_cipher_code_str_map[i].cipher_str); | ||
1129 | if (str[0] == '\0') { | ||
1130 | ecryptfs_printk(KERN_WARNING, "Cipher code not recognized: " | ||
1131 | "[%d]\n", cipher_code); | ||
1132 | rc = -EINVAL; | ||
1133 | } | ||
1134 | return rc; | ||
1135 | } | ||
1136 | |||
1137 | /** | ||
1138 | * ecryptfs_read_header_region | ||
1139 | * @data | ||
1140 | * @dentry | ||
1141 | * @nd | ||
1142 | * | ||
1143 | * Returns zero on success; non-zero otherwise | ||
1144 | */ | ||
1145 | int ecryptfs_read_header_region(char *data, struct dentry *dentry, | ||
1146 | struct vfsmount *mnt) | ||
1147 | { | ||
1148 | struct file *file; | ||
1149 | mm_segment_t oldfs; | ||
1150 | int rc; | ||
1151 | |||
1152 | mnt = mntget(mnt); | ||
1153 | file = dentry_open(dentry, mnt, O_RDONLY); | ||
1154 | if (IS_ERR(file)) { | ||
1155 | ecryptfs_printk(KERN_DEBUG, "Error opening file to " | ||
1156 | "read header region\n"); | ||
1157 | mntput(mnt); | ||
1158 | rc = PTR_ERR(file); | ||
1159 | goto out; | ||
1160 | } | ||
1161 | file->f_pos = 0; | ||
1162 | oldfs = get_fs(); | ||
1163 | set_fs(get_ds()); | ||
1164 | /* For releases 0.1 and 0.2, all of the header information | ||
1165 | * fits in the first data extent-sized region. */ | ||
1166 | rc = file->f_op->read(file, (char __user *)data, | ||
1167 | ECRYPTFS_DEFAULT_EXTENT_SIZE, &file->f_pos); | ||
1168 | set_fs(oldfs); | ||
1169 | fput(file); | ||
1170 | rc = 0; | ||
1171 | out: | ||
1172 | return rc; | ||
1173 | } | ||
1174 | |||
1175 | static void | ||
1176 | write_header_metadata(char *virt, struct ecryptfs_crypt_stat *crypt_stat, | ||
1177 | size_t *written) | ||
1178 | { | ||
1179 | u32 header_extent_size; | ||
1180 | u16 num_header_extents_at_front; | ||
1181 | |||
1182 | header_extent_size = (u32)crypt_stat->header_extent_size; | ||
1183 | num_header_extents_at_front = | ||
1184 | (u16)crypt_stat->num_header_extents_at_front; | ||
1185 | header_extent_size = cpu_to_be32(header_extent_size); | ||
1186 | memcpy(virt, &header_extent_size, 4); | ||
1187 | virt += 4; | ||
1188 | num_header_extents_at_front = cpu_to_be16(num_header_extents_at_front); | ||
1189 | memcpy(virt, &num_header_extents_at_front, 2); | ||
1190 | (*written) = 6; | ||
1191 | } | ||
1192 | |||
1193 | struct kmem_cache *ecryptfs_header_cache_0; | ||
1194 | struct kmem_cache *ecryptfs_header_cache_1; | ||
1195 | struct kmem_cache *ecryptfs_header_cache_2; | ||
1196 | |||
1197 | /** | ||
1198 | * ecryptfs_write_headers_virt | ||
1199 | * @page_virt | ||
1200 | * @crypt_stat | ||
1201 | * @ecryptfs_dentry | ||
1202 | * | ||
1203 | * Format version: 1 | ||
1204 | * | ||
1205 | * Header Extent: | ||
1206 | * Octets 0-7: Unencrypted file size (big-endian) | ||
1207 | * Octets 8-15: eCryptfs special marker | ||
1208 | * Octets 16-19: Flags | ||
1209 | * Octet 16: File format version number (between 0 and 255) | ||
1210 | * Octets 17-18: Reserved | ||
1211 | * Octet 19: Bit 1 (lsb): Reserved | ||
1212 | * Bit 2: Encrypted? | ||
1213 | * Bits 3-8: Reserved | ||
1214 | * Octets 20-23: Header extent size (big-endian) | ||
1215 | * Octets 24-25: Number of header extents at front of file | ||
1216 | * (big-endian) | ||
1217 | * Octet 26: Begin RFC 2440 authentication token packet set | ||
1218 | * Data Extent 0: | ||
1219 | * Lower data (CBC encrypted) | ||
1220 | * Data Extent 1: | ||
1221 | * Lower data (CBC encrypted) | ||
1222 | * ... | ||
1223 | * | ||
1224 | * Returns zero on success | ||
1225 | */ | ||
1226 | int ecryptfs_write_headers_virt(char *page_virt, | ||
1227 | struct ecryptfs_crypt_stat *crypt_stat, | ||
1228 | struct dentry *ecryptfs_dentry) | ||
1229 | { | ||
1230 | int rc; | ||
1231 | size_t written; | ||
1232 | size_t offset; | ||
1233 | |||
1234 | offset = ECRYPTFS_FILE_SIZE_BYTES; | ||
1235 | write_ecryptfs_marker((page_virt + offset), &written); | ||
1236 | offset += written; | ||
1237 | write_ecryptfs_flags((page_virt + offset), crypt_stat, &written); | ||
1238 | offset += written; | ||
1239 | write_header_metadata((page_virt + offset), crypt_stat, &written); | ||
1240 | offset += written; | ||
1241 | rc = ecryptfs_generate_key_packet_set((page_virt + offset), crypt_stat, | ||
1242 | ecryptfs_dentry, &written, | ||
1243 | PAGE_CACHE_SIZE - offset); | ||
1244 | if (rc) | ||
1245 | ecryptfs_printk(KERN_WARNING, "Error generating key packet " | ||
1246 | "set; rc = [%d]\n", rc); | ||
1247 | return rc; | ||
1248 | } | ||
1249 | |||
1250 | /** | ||
1251 | * ecryptfs_write_headers | ||
1252 | * @lower_file: The lower file struct, which was returned from dentry_open | ||
1253 | * | ||
1254 | * Write the file headers out. This will likely involve a userspace | ||
1255 | * callout, in which the session key is encrypted with one or more | ||
1256 | * public keys and/or the passphrase necessary to do the encryption is | ||
1257 | * retrieved via a prompt. Exactly what happens at this point should | ||
1258 | * be policy-dependent. | ||
1259 | * | ||
1260 | * Returns zero on success; non-zero on error | ||
1261 | */ | ||
1262 | int ecryptfs_write_headers(struct dentry *ecryptfs_dentry, | ||
1263 | struct file *lower_file) | ||
1264 | { | ||
1265 | mm_segment_t oldfs; | ||
1266 | struct ecryptfs_crypt_stat *crypt_stat; | ||
1267 | char *page_virt; | ||
1268 | int current_header_page; | ||
1269 | int header_pages; | ||
1270 | int rc = 0; | ||
1271 | |||
1272 | crypt_stat = &ecryptfs_inode_to_private( | ||
1273 | ecryptfs_dentry->d_inode)->crypt_stat; | ||
1274 | if (likely(ECRYPTFS_CHECK_FLAG(crypt_stat->flags, | ||
1275 | ECRYPTFS_ENCRYPTED))) { | ||
1276 | if (!ECRYPTFS_CHECK_FLAG(crypt_stat->flags, | ||
1277 | ECRYPTFS_KEY_VALID)) { | ||
1278 | ecryptfs_printk(KERN_DEBUG, "Key is " | ||
1279 | "invalid; bailing out\n"); | ||
1280 | rc = -EINVAL; | ||
1281 | goto out; | ||
1282 | } | ||
1283 | } else { | ||
1284 | rc = -EINVAL; | ||
1285 | ecryptfs_printk(KERN_WARNING, | ||
1286 | "Called with crypt_stat->encrypted == 0\n"); | ||
1287 | goto out; | ||
1288 | } | ||
1289 | /* Released in this function */ | ||
1290 | page_virt = kmem_cache_alloc(ecryptfs_header_cache_0, SLAB_USER); | ||
1291 | if (!page_virt) { | ||
1292 | ecryptfs_printk(KERN_ERR, "Out of memory\n"); | ||
1293 | rc = -ENOMEM; | ||
1294 | goto out; | ||
1295 | } | ||
1296 | memset(page_virt, 0, PAGE_CACHE_SIZE); | ||
1297 | rc = ecryptfs_write_headers_virt(page_virt, crypt_stat, | ||
1298 | ecryptfs_dentry); | ||
1299 | if (unlikely(rc)) { | ||
1300 | ecryptfs_printk(KERN_ERR, "Error whilst writing headers\n"); | ||
1301 | memset(page_virt, 0, PAGE_CACHE_SIZE); | ||
1302 | goto out_free; | ||
1303 | } | ||
1304 | ecryptfs_printk(KERN_DEBUG, | ||
1305 | "Writing key packet set to underlying file\n"); | ||
1306 | lower_file->f_pos = 0; | ||
1307 | oldfs = get_fs(); | ||
1308 | set_fs(get_ds()); | ||
1309 | ecryptfs_printk(KERN_DEBUG, "Calling lower_file->f_op->" | ||
1310 | "write() w/ header page; lower_file->f_pos = " | ||
1311 | "[0x%.16x]\n", lower_file->f_pos); | ||
1312 | lower_file->f_op->write(lower_file, (char __user *)page_virt, | ||
1313 | PAGE_CACHE_SIZE, &lower_file->f_pos); | ||
1314 | header_pages = ((crypt_stat->header_extent_size | ||
1315 | * crypt_stat->num_header_extents_at_front) | ||
1316 | / PAGE_CACHE_SIZE); | ||
1317 | memset(page_virt, 0, PAGE_CACHE_SIZE); | ||
1318 | current_header_page = 1; | ||
1319 | while (current_header_page < header_pages) { | ||
1320 | ecryptfs_printk(KERN_DEBUG, "Calling lower_file->f_op->" | ||
1321 | "write() w/ zero'd page; lower_file->f_pos = " | ||
1322 | "[0x%.16x]\n", lower_file->f_pos); | ||
1323 | lower_file->f_op->write(lower_file, (char __user *)page_virt, | ||
1324 | PAGE_CACHE_SIZE, &lower_file->f_pos); | ||
1325 | current_header_page++; | ||
1326 | } | ||
1327 | set_fs(oldfs); | ||
1328 | ecryptfs_printk(KERN_DEBUG, | ||
1329 | "Done writing key packet set to underlying file.\n"); | ||
1330 | out_free: | ||
1331 | kmem_cache_free(ecryptfs_header_cache_0, page_virt); | ||
1332 | out: | ||
1333 | return rc; | ||
1334 | } | ||
1335 | |||
1336 | static int parse_header_metadata(struct ecryptfs_crypt_stat *crypt_stat, | ||
1337 | char *virt, int *bytes_read) | ||
1338 | { | ||
1339 | int rc = 0; | ||
1340 | u32 header_extent_size; | ||
1341 | u16 num_header_extents_at_front; | ||
1342 | |||
1343 | memcpy(&header_extent_size, virt, 4); | ||
1344 | header_extent_size = be32_to_cpu(header_extent_size); | ||
1345 | virt += 4; | ||
1346 | memcpy(&num_header_extents_at_front, virt, 2); | ||
1347 | num_header_extents_at_front = be16_to_cpu(num_header_extents_at_front); | ||
1348 | crypt_stat->header_extent_size = (int)header_extent_size; | ||
1349 | crypt_stat->num_header_extents_at_front = | ||
1350 | (int)num_header_extents_at_front; | ||
1351 | (*bytes_read) = 6; | ||
1352 | if ((crypt_stat->header_extent_size | ||
1353 | * crypt_stat->num_header_extents_at_front) | ||
1354 | < ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE) { | ||
1355 | rc = -EINVAL; | ||
1356 | ecryptfs_printk(KERN_WARNING, "Invalid header extent size: " | ||
1357 | "[%d]\n", crypt_stat->header_extent_size); | ||
1358 | } | ||
1359 | return rc; | ||
1360 | } | ||
1361 | |||
1362 | /** | ||
1363 | * set_default_header_data | ||
1364 | * | ||
1365 | * For version 0 file format; this function is only for backwards | ||
1366 | * compatibility for files created with the prior versions of | ||
1367 | * eCryptfs. | ||
1368 | */ | ||
1369 | static void set_default_header_data(struct ecryptfs_crypt_stat *crypt_stat) | ||
1370 | { | ||
1371 | crypt_stat->header_extent_size = 4096; | ||
1372 | crypt_stat->num_header_extents_at_front = 1; | ||
1373 | } | ||
1374 | |||
1375 | /** | ||
1376 | * ecryptfs_read_headers_virt | ||
1377 | * | ||
1378 | * Read/parse the header data. The header format is detailed in the | ||
1379 | * comment block for the ecryptfs_write_headers_virt() function. | ||
1380 | * | ||
1381 | * Returns zero on success | ||
1382 | */ | ||
1383 | static int ecryptfs_read_headers_virt(char *page_virt, | ||
1384 | struct ecryptfs_crypt_stat *crypt_stat, | ||
1385 | struct dentry *ecryptfs_dentry) | ||
1386 | { | ||
1387 | int rc = 0; | ||
1388 | int offset; | ||
1389 | int bytes_read; | ||
1390 | |||
1391 | ecryptfs_set_default_sizes(crypt_stat); | ||
1392 | crypt_stat->mount_crypt_stat = &ecryptfs_superblock_to_private( | ||
1393 | ecryptfs_dentry->d_sb)->mount_crypt_stat; | ||
1394 | offset = ECRYPTFS_FILE_SIZE_BYTES; | ||
1395 | rc = contains_ecryptfs_marker(page_virt + offset); | ||
1396 | if (rc == 0) { | ||
1397 | rc = -EINVAL; | ||
1398 | goto out; | ||
1399 | } | ||
1400 | offset += MAGIC_ECRYPTFS_MARKER_SIZE_BYTES; | ||
1401 | rc = ecryptfs_process_flags(crypt_stat, (page_virt + offset), | ||
1402 | &bytes_read); | ||
1403 | if (rc) { | ||
1404 | ecryptfs_printk(KERN_WARNING, "Error processing flags\n"); | ||
1405 | goto out; | ||
1406 | } | ||
1407 | if (crypt_stat->file_version > ECRYPTFS_SUPPORTED_FILE_VERSION) { | ||
1408 | ecryptfs_printk(KERN_WARNING, "File version is [%d]; only " | ||
1409 | "file version [%d] is supported by this " | ||
1410 | "version of eCryptfs\n", | ||
1411 | crypt_stat->file_version, | ||
1412 | ECRYPTFS_SUPPORTED_FILE_VERSION); | ||
1413 | rc = -EINVAL; | ||
1414 | goto out; | ||
1415 | } | ||
1416 | offset += bytes_read; | ||
1417 | if (crypt_stat->file_version >= 1) { | ||
1418 | rc = parse_header_metadata(crypt_stat, (page_virt + offset), | ||
1419 | &bytes_read); | ||
1420 | if (rc) { | ||
1421 | ecryptfs_printk(KERN_WARNING, "Error reading header " | ||
1422 | "metadata; rc = [%d]\n", rc); | ||
1423 | } | ||
1424 | offset += bytes_read; | ||
1425 | } else | ||
1426 | set_default_header_data(crypt_stat); | ||
1427 | rc = ecryptfs_parse_packet_set(crypt_stat, (page_virt + offset), | ||
1428 | ecryptfs_dentry); | ||
1429 | out: | ||
1430 | return rc; | ||
1431 | } | ||
1432 | |||
1433 | /** | ||
1434 | * ecryptfs_read_headers | ||
1435 | * | ||
1436 | * Returns zero if valid headers found and parsed; non-zero otherwise | ||
1437 | */ | ||
1438 | int ecryptfs_read_headers(struct dentry *ecryptfs_dentry, | ||
1439 | struct file *lower_file) | ||
1440 | { | ||
1441 | int rc = 0; | ||
1442 | char *page_virt = NULL; | ||
1443 | mm_segment_t oldfs; | ||
1444 | ssize_t bytes_read; | ||
1445 | struct ecryptfs_crypt_stat *crypt_stat = | ||
1446 | &ecryptfs_inode_to_private(ecryptfs_dentry->d_inode)->crypt_stat; | ||
1447 | |||
1448 | /* Read the first page from the underlying file */ | ||
1449 | page_virt = kmem_cache_alloc(ecryptfs_header_cache_1, SLAB_USER); | ||
1450 | if (!page_virt) { | ||
1451 | rc = -ENOMEM; | ||
1452 | ecryptfs_printk(KERN_ERR, "Unable to allocate page_virt\n"); | ||
1453 | goto out; | ||
1454 | } | ||
1455 | lower_file->f_pos = 0; | ||
1456 | oldfs = get_fs(); | ||
1457 | set_fs(get_ds()); | ||
1458 | bytes_read = lower_file->f_op->read(lower_file, | ||
1459 | (char __user *)page_virt, | ||
1460 | ECRYPTFS_DEFAULT_EXTENT_SIZE, | ||
1461 | &lower_file->f_pos); | ||
1462 | set_fs(oldfs); | ||
1463 | if (bytes_read != ECRYPTFS_DEFAULT_EXTENT_SIZE) { | ||
1464 | rc = -EINVAL; | ||
1465 | goto out; | ||
1466 | } | ||
1467 | rc = ecryptfs_read_headers_virt(page_virt, crypt_stat, | ||
1468 | ecryptfs_dentry); | ||
1469 | if (rc) { | ||
1470 | ecryptfs_printk(KERN_DEBUG, "Valid eCryptfs headers not " | ||
1471 | "found\n"); | ||
1472 | rc = -EINVAL; | ||
1473 | } | ||
1474 | out: | ||
1475 | if (page_virt) { | ||
1476 | memset(page_virt, 0, PAGE_CACHE_SIZE); | ||
1477 | kmem_cache_free(ecryptfs_header_cache_1, page_virt); | ||
1478 | } | ||
1479 | return rc; | ||
1480 | } | ||
1481 | |||
1482 | /** | ||
1483 | * ecryptfs_encode_filename - converts a plaintext file name to cipher text | ||
1484 | * @crypt_stat: The crypt_stat struct associated with the file anem to encode | ||
1485 | * @name: The plaintext name | ||
1486 | * @length: The length of the plaintext | ||
1487 | * @encoded_name: The encypted name | ||
1488 | * | ||
1489 | * Encrypts and encodes a filename into something that constitutes a | ||
1490 | * valid filename for a filesystem, with printable characters. | ||
1491 | * | ||
1492 | * We assume that we have a properly initialized crypto context, | ||
1493 | * pointed to by crypt_stat->tfm. | ||
1494 | * | ||
1495 | * TODO: Implement filename decoding and decryption here, in place of | ||
1496 | * memcpy. We are keeping the framework around for now to (1) | ||
1497 | * facilitate testing of the components needed to implement filename | ||
1498 | * encryption and (2) to provide a code base from which other | ||
1499 | * developers in the community can easily implement this feature. | ||
1500 | * | ||
1501 | * Returns the length of encoded filename; negative if error | ||
1502 | */ | ||
1503 | int | ||
1504 | ecryptfs_encode_filename(struct ecryptfs_crypt_stat *crypt_stat, | ||
1505 | const char *name, int length, char **encoded_name) | ||
1506 | { | ||
1507 | int error = 0; | ||
1508 | |||
1509 | (*encoded_name) = kmalloc(length + 2, GFP_KERNEL); | ||
1510 | if (!(*encoded_name)) { | ||
1511 | error = -ENOMEM; | ||
1512 | goto out; | ||
1513 | } | ||
1514 | /* TODO: Filename encryption is a scheduled feature for a | ||
1515 | * future version of eCryptfs. This function is here only for | ||
1516 | * the purpose of providing a framework for other developers | ||
1517 | * to easily implement filename encryption. Hint: Replace this | ||
1518 | * memcpy() with a call to encrypt and encode the | ||
1519 | * filename, the set the length accordingly. */ | ||
1520 | memcpy((void *)(*encoded_name), (void *)name, length); | ||
1521 | (*encoded_name)[length] = '\0'; | ||
1522 | error = length + 1; | ||
1523 | out: | ||
1524 | return error; | ||
1525 | } | ||
1526 | |||
1527 | /** | ||
1528 | * ecryptfs_decode_filename - converts the cipher text name to plaintext | ||
1529 | * @crypt_stat: The crypt_stat struct associated with the file | ||
1530 | * @name: The filename in cipher text | ||
1531 | * @length: The length of the cipher text name | ||
1532 | * @decrypted_name: The plaintext name | ||
1533 | * | ||
1534 | * Decodes and decrypts the filename. | ||
1535 | * | ||
1536 | * We assume that we have a properly initialized crypto context, | ||
1537 | * pointed to by crypt_stat->tfm. | ||
1538 | * | ||
1539 | * TODO: Implement filename decoding and decryption here, in place of | ||
1540 | * memcpy. We are keeping the framework around for now to (1) | ||
1541 | * facilitate testing of the components needed to implement filename | ||
1542 | * encryption and (2) to provide a code base from which other | ||
1543 | * developers in the community can easily implement this feature. | ||
1544 | * | ||
1545 | * Returns the length of decoded filename; negative if error | ||
1546 | */ | ||
1547 | int | ||
1548 | ecryptfs_decode_filename(struct ecryptfs_crypt_stat *crypt_stat, | ||
1549 | const char *name, int length, char **decrypted_name) | ||
1550 | { | ||
1551 | int error = 0; | ||
1552 | |||
1553 | (*decrypted_name) = kmalloc(length + 2, GFP_KERNEL); | ||
1554 | if (!(*decrypted_name)) { | ||
1555 | error = -ENOMEM; | ||
1556 | goto out; | ||
1557 | } | ||
1558 | /* TODO: Filename encryption is a scheduled feature for a | ||
1559 | * future version of eCryptfs. This function is here only for | ||
1560 | * the purpose of providing a framework for other developers | ||
1561 | * to easily implement filename encryption. Hint: Replace this | ||
1562 | * memcpy() with a call to decode and decrypt the | ||
1563 | * filename, the set the length accordingly. */ | ||
1564 | memcpy((void *)(*decrypted_name), (void *)name, length); | ||
1565 | (*decrypted_name)[length + 1] = '\0'; /* Only for convenience | ||
1566 | * in printing out the | ||
1567 | * string in debug | ||
1568 | * messages */ | ||
1569 | error = length; | ||
1570 | out: | ||
1571 | return error; | ||
1572 | } | ||
1573 | |||
1574 | /** | ||
1575 | * ecryptfs_process_cipher - Perform cipher initialization. | ||
1576 | * @tfm: Crypto context set by this function | ||
1577 | * @key_tfm: Crypto context for key material, set by this function | ||
1578 | * @cipher_name: Name of the cipher. | ||
1579 | * @key_size: Size of the key in bytes. | ||
1580 | * | ||
1581 | * Returns zero on success. Any crypto_tfm structs allocated here | ||
1582 | * should be released by other functions, such as on a superblock put | ||
1583 | * event, regardless of whether this function succeeds for fails. | ||
1584 | */ | ||
1585 | int | ||
1586 | ecryptfs_process_cipher(struct crypto_tfm **tfm, struct crypto_tfm **key_tfm, | ||
1587 | char *cipher_name, size_t key_size) | ||
1588 | { | ||
1589 | char dummy_key[ECRYPTFS_MAX_KEY_BYTES]; | ||
1590 | int rc; | ||
1591 | |||
1592 | *tfm = *key_tfm = NULL; | ||
1593 | if (key_size > ECRYPTFS_MAX_KEY_BYTES) { | ||
1594 | rc = -EINVAL; | ||
1595 | printk(KERN_ERR "Requested key size is [%Zd] bytes; maximum " | ||
1596 | "allowable is [%d]\n", key_size, ECRYPTFS_MAX_KEY_BYTES); | ||
1597 | goto out; | ||
1598 | } | ||
1599 | *tfm = crypto_alloc_tfm(cipher_name, (ECRYPTFS_DEFAULT_CHAINING_MODE | ||
1600 | | CRYPTO_TFM_REQ_WEAK_KEY)); | ||
1601 | if (!(*tfm)) { | ||
1602 | rc = -EINVAL; | ||
1603 | printk(KERN_ERR "Unable to allocate crypto cipher with name " | ||
1604 | "[%s]\n", cipher_name); | ||
1605 | goto out; | ||
1606 | } | ||
1607 | *key_tfm = crypto_alloc_tfm(cipher_name, CRYPTO_TFM_REQ_WEAK_KEY); | ||
1608 | if (!(*key_tfm)) { | ||
1609 | rc = -EINVAL; | ||
1610 | printk(KERN_ERR "Unable to allocate crypto cipher with name " | ||
1611 | "[%s]\n", cipher_name); | ||
1612 | goto out; | ||
1613 | } | ||
1614 | if (key_size < crypto_tfm_alg_min_keysize(*tfm)) { | ||
1615 | rc = -EINVAL; | ||
1616 | printk(KERN_ERR "Request key size is [%Zd]; minimum key size " | ||
1617 | "supported by cipher [%s] is [%d]\n", key_size, | ||
1618 | cipher_name, crypto_tfm_alg_min_keysize(*tfm)); | ||
1619 | goto out; | ||
1620 | } | ||
1621 | if (key_size < crypto_tfm_alg_min_keysize(*key_tfm)) { | ||
1622 | rc = -EINVAL; | ||
1623 | printk(KERN_ERR "Request key size is [%Zd]; minimum key size " | ||
1624 | "supported by cipher [%s] is [%d]\n", key_size, | ||
1625 | cipher_name, crypto_tfm_alg_min_keysize(*key_tfm)); | ||
1626 | goto out; | ||
1627 | } | ||
1628 | if (key_size > crypto_tfm_alg_max_keysize(*tfm)) { | ||
1629 | rc = -EINVAL; | ||
1630 | printk(KERN_ERR "Request key size is [%Zd]; maximum key size " | ||
1631 | "supported by cipher [%s] is [%d]\n", key_size, | ||
1632 | cipher_name, crypto_tfm_alg_min_keysize(*tfm)); | ||
1633 | goto out; | ||
1634 | } | ||
1635 | if (key_size > crypto_tfm_alg_max_keysize(*key_tfm)) { | ||
1636 | rc = -EINVAL; | ||
1637 | printk(KERN_ERR "Request key size is [%Zd]; maximum key size " | ||
1638 | "supported by cipher [%s] is [%d]\n", key_size, | ||
1639 | cipher_name, crypto_tfm_alg_min_keysize(*key_tfm)); | ||
1640 | goto out; | ||
1641 | } | ||
1642 | get_random_bytes(dummy_key, key_size); | ||
1643 | rc = crypto_cipher_setkey(*tfm, dummy_key, key_size); | ||
1644 | if (rc) { | ||
1645 | printk(KERN_ERR "Error attempting to set key of size [%Zd] for " | ||
1646 | "cipher [%s]; rc = [%d]\n", key_size, cipher_name, rc); | ||
1647 | rc = -EINVAL; | ||
1648 | goto out; | ||
1649 | } | ||
1650 | rc = crypto_cipher_setkey(*key_tfm, dummy_key, key_size); | ||
1651 | if (rc) { | ||
1652 | printk(KERN_ERR "Error attempting to set key of size [%Zd] for " | ||
1653 | "cipher [%s]; rc = [%d]\n", key_size, cipher_name, rc); | ||
1654 | rc = -EINVAL; | ||
1655 | goto out; | ||
1656 | } | ||
1657 | out: | ||
1658 | return rc; | ||
1659 | } | ||
diff --git a/fs/ecryptfs/debug.c b/fs/ecryptfs/debug.c new file mode 100644 index 000000000000..61f8e894284f --- /dev/null +++ b/fs/ecryptfs/debug.c | |||
@@ -0,0 +1,123 @@ | |||
1 | /** | ||
2 | * eCryptfs: Linux filesystem encryption layer | ||
3 | * Functions only useful for debugging. | ||
4 | * | ||
5 | * Copyright (C) 2006 International Business Machines Corp. | ||
6 | * Author(s): Michael A. Halcrow <mahalcro@us.ibm.com> | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or | ||
9 | * modify it under the terms of the GNU General Public License as | ||
10 | * published by the Free Software Foundation; either version 2 of the | ||
11 | * License, or (at your option) any later version. | ||
12 | * | ||
13 | * This program is distributed in the hope that it will be useful, but | ||
14 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
16 | * General Public License for more details. | ||
17 | * | ||
18 | * You should have received a copy of the GNU General Public License | ||
19 | * along with this program; if not, write to the Free Software | ||
20 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA | ||
21 | * 02111-1307, USA. | ||
22 | */ | ||
23 | |||
24 | #include "ecryptfs_kernel.h" | ||
25 | |||
26 | /** | ||
27 | * ecryptfs_dump_auth_tok - debug function to print auth toks | ||
28 | * | ||
29 | * This function will print the contents of an ecryptfs authentication | ||
30 | * token. | ||
31 | */ | ||
32 | void ecryptfs_dump_auth_tok(struct ecryptfs_auth_tok *auth_tok) | ||
33 | { | ||
34 | char salt[ECRYPTFS_SALT_SIZE * 2 + 1]; | ||
35 | char sig[ECRYPTFS_SIG_SIZE_HEX + 1]; | ||
36 | |||
37 | ecryptfs_printk(KERN_DEBUG, "Auth tok at mem loc [%p]:\n", | ||
38 | auth_tok); | ||
39 | if (ECRYPTFS_CHECK_FLAG(auth_tok->flags, ECRYPTFS_PRIVATE_KEY)) { | ||
40 | ecryptfs_printk(KERN_DEBUG, " * private key type\n"); | ||
41 | ecryptfs_printk(KERN_DEBUG, " * (NO PRIVATE KEY SUPPORT " | ||
42 | "IN ECRYPTFS VERSION 0.1)\n"); | ||
43 | } else { | ||
44 | ecryptfs_printk(KERN_DEBUG, " * passphrase type\n"); | ||
45 | ecryptfs_to_hex(salt, auth_tok->token.password.salt, | ||
46 | ECRYPTFS_SALT_SIZE); | ||
47 | salt[ECRYPTFS_SALT_SIZE * 2] = '\0'; | ||
48 | ecryptfs_printk(KERN_DEBUG, " * salt = [%s]\n", salt); | ||
49 | if (ECRYPTFS_CHECK_FLAG(auth_tok->token.password.flags, | ||
50 | ECRYPTFS_PERSISTENT_PASSWORD)) { | ||
51 | ecryptfs_printk(KERN_DEBUG, " * persistent\n"); | ||
52 | } | ||
53 | memcpy(sig, auth_tok->token.password.signature, | ||
54 | ECRYPTFS_SIG_SIZE_HEX); | ||
55 | sig[ECRYPTFS_SIG_SIZE_HEX] = '\0'; | ||
56 | ecryptfs_printk(KERN_DEBUG, " * signature = [%s]\n", sig); | ||
57 | } | ||
58 | ecryptfs_printk(KERN_DEBUG, " * session_key.flags = [0x%x]\n", | ||
59 | auth_tok->session_key.flags); | ||
60 | if (auth_tok->session_key.flags | ||
61 | & ECRYPTFS_USERSPACE_SHOULD_TRY_TO_DECRYPT) | ||
62 | ecryptfs_printk(KERN_DEBUG, | ||
63 | " * Userspace decrypt request set\n"); | ||
64 | if (auth_tok->session_key.flags | ||
65 | & ECRYPTFS_USERSPACE_SHOULD_TRY_TO_ENCRYPT) | ||
66 | ecryptfs_printk(KERN_DEBUG, | ||
67 | " * Userspace encrypt request set\n"); | ||
68 | if (auth_tok->session_key.flags & ECRYPTFS_CONTAINS_DECRYPTED_KEY) { | ||
69 | ecryptfs_printk(KERN_DEBUG, " * Contains decrypted key\n"); | ||
70 | ecryptfs_printk(KERN_DEBUG, | ||
71 | " * session_key.decrypted_key_size = [0x%x]\n", | ||
72 | auth_tok->session_key.decrypted_key_size); | ||
73 | ecryptfs_printk(KERN_DEBUG, " * Decrypted session key " | ||
74 | "dump:\n"); | ||
75 | if (ecryptfs_verbosity > 0) | ||
76 | ecryptfs_dump_hex(auth_tok->session_key.decrypted_key, | ||
77 | ECRYPTFS_DEFAULT_KEY_BYTES); | ||
78 | } | ||
79 | if (auth_tok->session_key.flags & ECRYPTFS_CONTAINS_ENCRYPTED_KEY) { | ||
80 | ecryptfs_printk(KERN_DEBUG, " * Contains encrypted key\n"); | ||
81 | ecryptfs_printk(KERN_DEBUG, | ||
82 | " * session_key.encrypted_key_size = [0x%x]\n", | ||
83 | auth_tok->session_key.encrypted_key_size); | ||
84 | ecryptfs_printk(KERN_DEBUG, " * Encrypted session key " | ||
85 | "dump:\n"); | ||
86 | if (ecryptfs_verbosity > 0) | ||
87 | ecryptfs_dump_hex(auth_tok->session_key.encrypted_key, | ||
88 | auth_tok->session_key. | ||
89 | encrypted_key_size); | ||
90 | } | ||
91 | } | ||
92 | |||
93 | /** | ||
94 | * ecryptfs_dump_hex - debug hex printer | ||
95 | * @data: string of bytes to be printed | ||
96 | * @bytes: number of bytes to print | ||
97 | * | ||
98 | * Dump hexadecimal representation of char array | ||
99 | */ | ||
100 | void ecryptfs_dump_hex(char *data, int bytes) | ||
101 | { | ||
102 | int i = 0; | ||
103 | int add_newline = 1; | ||
104 | |||
105 | if (ecryptfs_verbosity < 1) | ||
106 | return; | ||
107 | if (bytes != 0) { | ||
108 | printk(KERN_DEBUG "0x%.2x.", (unsigned char)data[i]); | ||
109 | i++; | ||
110 | } | ||
111 | while (i < bytes) { | ||
112 | printk("0x%.2x.", (unsigned char)data[i]); | ||
113 | i++; | ||
114 | if (i % 16 == 0) { | ||
115 | printk("\n"); | ||
116 | add_newline = 0; | ||
117 | } else | ||
118 | add_newline = 1; | ||
119 | } | ||
120 | if (add_newline) | ||
121 | printk("\n"); | ||
122 | } | ||
123 | |||
diff --git a/fs/ecryptfs/dentry.c b/fs/ecryptfs/dentry.c new file mode 100644 index 000000000000..f0d2a433242b --- /dev/null +++ b/fs/ecryptfs/dentry.c | |||
@@ -0,0 +1,87 @@ | |||
1 | /** | ||
2 | * eCryptfs: Linux filesystem encryption layer | ||
3 | * | ||
4 | * Copyright (C) 1997-2003 Erez Zadok | ||
5 | * Copyright (C) 2001-2003 Stony Brook University | ||
6 | * Copyright (C) 2004-2006 International Business Machines Corp. | ||
7 | * Author(s): Michael A. Halcrow <mahalcro@us.ibm.com> | ||
8 | * | ||
9 | * This program is free software; you can redistribute it and/or | ||
10 | * modify it under the terms of the GNU General Public License as | ||
11 | * published by the Free Software Foundation; either version 2 of the | ||
12 | * License, or (at your option) any later version. | ||
13 | * | ||
14 | * This program is distributed in the hope that it will be useful, but | ||
15 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
17 | * General Public License for more details. | ||
18 | * | ||
19 | * You should have received a copy of the GNU General Public License | ||
20 | * along with this program; if not, write to the Free Software | ||
21 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA | ||
22 | * 02111-1307, USA. | ||
23 | */ | ||
24 | |||
25 | #include <linux/dcache.h> | ||
26 | #include <linux/namei.h> | ||
27 | #include "ecryptfs_kernel.h" | ||
28 | |||
29 | /** | ||
30 | * ecryptfs_d_revalidate - revalidate an ecryptfs dentry | ||
31 | * @dentry: The ecryptfs dentry | ||
32 | * @nd: The associated nameidata | ||
33 | * | ||
34 | * Called when the VFS needs to revalidate a dentry. This | ||
35 | * is called whenever a name lookup finds a dentry in the | ||
36 | * dcache. Most filesystems leave this as NULL, because all their | ||
37 | * dentries in the dcache are valid. | ||
38 | * | ||
39 | * Returns 1 if valid, 0 otherwise. | ||
40 | * | ||
41 | */ | ||
42 | static int ecryptfs_d_revalidate(struct dentry *dentry, struct nameidata *nd) | ||
43 | { | ||
44 | struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry); | ||
45 | struct vfsmount *lower_mnt = ecryptfs_dentry_to_lower_mnt(dentry); | ||
46 | struct dentry *dentry_save; | ||
47 | struct vfsmount *vfsmount_save; | ||
48 | int rc = 1; | ||
49 | |||
50 | if (!lower_dentry->d_op || !lower_dentry->d_op->d_revalidate) | ||
51 | goto out; | ||
52 | dentry_save = nd->dentry; | ||
53 | vfsmount_save = nd->mnt; | ||
54 | nd->dentry = lower_dentry; | ||
55 | nd->mnt = lower_mnt; | ||
56 | rc = lower_dentry->d_op->d_revalidate(lower_dentry, nd); | ||
57 | nd->dentry = dentry_save; | ||
58 | nd->mnt = vfsmount_save; | ||
59 | out: | ||
60 | return rc; | ||
61 | } | ||
62 | |||
63 | struct kmem_cache *ecryptfs_dentry_info_cache; | ||
64 | |||
65 | /** | ||
66 | * ecryptfs_d_release | ||
67 | * @dentry: The ecryptfs dentry | ||
68 | * | ||
69 | * Called when a dentry is really deallocated. | ||
70 | */ | ||
71 | static void ecryptfs_d_release(struct dentry *dentry) | ||
72 | { | ||
73 | struct dentry *lower_dentry; | ||
74 | |||
75 | lower_dentry = ecryptfs_dentry_to_lower(dentry); | ||
76 | if (ecryptfs_dentry_to_private(dentry)) | ||
77 | kmem_cache_free(ecryptfs_dentry_info_cache, | ||
78 | ecryptfs_dentry_to_private(dentry)); | ||
79 | if (lower_dentry) | ||
80 | dput(lower_dentry); | ||
81 | return; | ||
82 | } | ||
83 | |||
84 | struct dentry_operations ecryptfs_dops = { | ||
85 | .d_revalidate = ecryptfs_d_revalidate, | ||
86 | .d_release = ecryptfs_d_release, | ||
87 | }; | ||
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h new file mode 100644 index 000000000000..872c9958531a --- /dev/null +++ b/fs/ecryptfs/ecryptfs_kernel.h | |||
@@ -0,0 +1,482 @@ | |||
1 | /** | ||
2 | * eCryptfs: Linux filesystem encryption layer | ||
3 | * Kernel declarations. | ||
4 | * | ||
5 | * Copyright (C) 1997-2003 Erez Zadok | ||
6 | * Copyright (C) 2001-2003 Stony Brook University | ||
7 | * Copyright (C) 2004-2006 International Business Machines Corp. | ||
8 | * Author(s): Michael A. Halcrow <mahalcro@us.ibm.com> | ||
9 | * | ||
10 | * This program is free software; you can redistribute it and/or | ||
11 | * modify it under the terms of the GNU General Public License as | ||
12 | * published by the Free Software Foundation; either version 2 of the | ||
13 | * License, or (at your option) any later version. | ||
14 | * | ||
15 | * This program is distributed in the hope that it will be useful, but | ||
16 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
18 | * General Public License for more details. | ||
19 | * | ||
20 | * You should have received a copy of the GNU General Public License | ||
21 | * along with this program; if not, write to the Free Software | ||
22 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA | ||
23 | * 02111-1307, USA. | ||
24 | */ | ||
25 | |||
26 | #ifndef ECRYPTFS_KERNEL_H | ||
27 | #define ECRYPTFS_KERNEL_H | ||
28 | |||
29 | #include <keys/user-type.h> | ||
30 | #include <linux/fs.h> | ||
31 | #include <linux/scatterlist.h> | ||
32 | |||
33 | /* Version verification for shared data structures w/ userspace */ | ||
34 | #define ECRYPTFS_VERSION_MAJOR 0x00 | ||
35 | #define ECRYPTFS_VERSION_MINOR 0x04 | ||
36 | #define ECRYPTFS_SUPPORTED_FILE_VERSION 0x01 | ||
37 | /* These flags indicate which features are supported by the kernel | ||
38 | * module; userspace tools such as the mount helper read | ||
39 | * ECRYPTFS_VERSIONING_MASK from a sysfs handle in order to determine | ||
40 | * how to behave. */ | ||
41 | #define ECRYPTFS_VERSIONING_PASSPHRASE 0x00000001 | ||
42 | #define ECRYPTFS_VERSIONING_PUBKEY 0x00000002 | ||
43 | #define ECRYPTFS_VERSIONING_PLAINTEXT_PASSTHROUGH 0x00000004 | ||
44 | #define ECRYPTFS_VERSIONING_POLICY 0x00000008 | ||
45 | #define ECRYPTFS_VERSIONING_MASK (ECRYPTFS_VERSIONING_PASSPHRASE \ | ||
46 | | ECRYPTFS_VERSIONING_PLAINTEXT_PASSTHROUGH) | ||
47 | |||
48 | #define ECRYPTFS_MAX_PASSWORD_LENGTH 64 | ||
49 | #define ECRYPTFS_MAX_PASSPHRASE_BYTES ECRYPTFS_MAX_PASSWORD_LENGTH | ||
50 | #define ECRYPTFS_SALT_SIZE 8 | ||
51 | #define ECRYPTFS_SALT_SIZE_HEX (ECRYPTFS_SALT_SIZE*2) | ||
52 | /* The original signature size is only for what is stored on disk; all | ||
53 | * in-memory representations are expanded hex, so it better adapted to | ||
54 | * be passed around or referenced on the command line */ | ||
55 | #define ECRYPTFS_SIG_SIZE 8 | ||
56 | #define ECRYPTFS_SIG_SIZE_HEX (ECRYPTFS_SIG_SIZE*2) | ||
57 | #define ECRYPTFS_PASSWORD_SIG_SIZE ECRYPTFS_SIG_SIZE_HEX | ||
58 | #define ECRYPTFS_MAX_KEY_BYTES 64 | ||
59 | #define ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES 512 | ||
60 | #define ECRYPTFS_DEFAULT_IV_BYTES 16 | ||
61 | #define ECRYPTFS_FILE_VERSION 0x01 | ||
62 | #define ECRYPTFS_DEFAULT_HEADER_EXTENT_SIZE 8192 | ||
63 | #define ECRYPTFS_DEFAULT_EXTENT_SIZE 4096 | ||
64 | #define ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE 8192 | ||
65 | |||
66 | #define RFC2440_CIPHER_DES3_EDE 0x02 | ||
67 | #define RFC2440_CIPHER_CAST_5 0x03 | ||
68 | #define RFC2440_CIPHER_BLOWFISH 0x04 | ||
69 | #define RFC2440_CIPHER_AES_128 0x07 | ||
70 | #define RFC2440_CIPHER_AES_192 0x08 | ||
71 | #define RFC2440_CIPHER_AES_256 0x09 | ||
72 | #define RFC2440_CIPHER_TWOFISH 0x0a | ||
73 | #define RFC2440_CIPHER_CAST_6 0x0b | ||
74 | |||
75 | #define ECRYPTFS_SET_FLAG(flag_bit_vector, flag) (flag_bit_vector |= (flag)) | ||
76 | #define ECRYPTFS_CLEAR_FLAG(flag_bit_vector, flag) (flag_bit_vector &= ~(flag)) | ||
77 | #define ECRYPTFS_CHECK_FLAG(flag_bit_vector, flag) (flag_bit_vector & (flag)) | ||
78 | |||
79 | /** | ||
80 | * For convenience, we may need to pass around the encrypted session | ||
81 | * key between kernel and userspace because the authentication token | ||
82 | * may not be extractable. For example, the TPM may not release the | ||
83 | * private key, instead requiring the encrypted data and returning the | ||
84 | * decrypted data. | ||
85 | */ | ||
86 | struct ecryptfs_session_key { | ||
87 | #define ECRYPTFS_USERSPACE_SHOULD_TRY_TO_DECRYPT 0x00000001 | ||
88 | #define ECRYPTFS_USERSPACE_SHOULD_TRY_TO_ENCRYPT 0x00000002 | ||
89 | #define ECRYPTFS_CONTAINS_DECRYPTED_KEY 0x00000004 | ||
90 | #define ECRYPTFS_CONTAINS_ENCRYPTED_KEY 0x00000008 | ||
91 | u32 flags; | ||
92 | u32 encrypted_key_size; | ||
93 | u32 decrypted_key_size; | ||
94 | u8 encrypted_key[ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES]; | ||
95 | u8 decrypted_key[ECRYPTFS_MAX_KEY_BYTES]; | ||
96 | }; | ||
97 | |||
98 | struct ecryptfs_password { | ||
99 | u32 password_bytes; | ||
100 | s32 hash_algo; | ||
101 | u32 hash_iterations; | ||
102 | u32 session_key_encryption_key_bytes; | ||
103 | #define ECRYPTFS_PERSISTENT_PASSWORD 0x01 | ||
104 | #define ECRYPTFS_SESSION_KEY_ENCRYPTION_KEY_SET 0x02 | ||
105 | u32 flags; | ||
106 | /* Iterated-hash concatenation of salt and passphrase */ | ||
107 | u8 session_key_encryption_key[ECRYPTFS_MAX_KEY_BYTES]; | ||
108 | u8 signature[ECRYPTFS_PASSWORD_SIG_SIZE + 1]; | ||
109 | /* Always in expanded hex */ | ||
110 | u8 salt[ECRYPTFS_SALT_SIZE]; | ||
111 | }; | ||
112 | |||
113 | enum ecryptfs_token_types {ECRYPTFS_PASSWORD, ECRYPTFS_PRIVATE_KEY}; | ||
114 | |||
115 | /* May be a password or a private key */ | ||
116 | struct ecryptfs_auth_tok { | ||
117 | u16 version; /* 8-bit major and 8-bit minor */ | ||
118 | u16 token_type; | ||
119 | u32 flags; | ||
120 | struct ecryptfs_session_key session_key; | ||
121 | u8 reserved[32]; | ||
122 | union { | ||
123 | struct ecryptfs_password password; | ||
124 | /* Private key is in future eCryptfs releases */ | ||
125 | } token; | ||
126 | } __attribute__ ((packed)); | ||
127 | |||
128 | void ecryptfs_dump_auth_tok(struct ecryptfs_auth_tok *auth_tok); | ||
129 | extern void ecryptfs_to_hex(char *dst, char *src, size_t src_size); | ||
130 | extern void ecryptfs_from_hex(char *dst, char *src, int dst_size); | ||
131 | |||
132 | struct ecryptfs_key_record { | ||
133 | unsigned char type; | ||
134 | size_t enc_key_size; | ||
135 | unsigned char sig[ECRYPTFS_SIG_SIZE]; | ||
136 | unsigned char enc_key[ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES]; | ||
137 | }; | ||
138 | |||
139 | struct ecryptfs_auth_tok_list { | ||
140 | struct ecryptfs_auth_tok *auth_tok; | ||
141 | struct list_head list; | ||
142 | }; | ||
143 | |||
144 | struct ecryptfs_crypt_stat; | ||
145 | struct ecryptfs_mount_crypt_stat; | ||
146 | |||
147 | struct ecryptfs_page_crypt_context { | ||
148 | struct page *page; | ||
149 | #define ECRYPTFS_PREPARE_COMMIT_MODE 0 | ||
150 | #define ECRYPTFS_WRITEPAGE_MODE 1 | ||
151 | unsigned int mode; | ||
152 | union { | ||
153 | struct file *lower_file; | ||
154 | struct writeback_control *wbc; | ||
155 | } param; | ||
156 | }; | ||
157 | |||
158 | static inline struct ecryptfs_auth_tok * | ||
159 | ecryptfs_get_key_payload_data(struct key *key) | ||
160 | { | ||
161 | return (struct ecryptfs_auth_tok *) | ||
162 | (((struct user_key_payload*)key->payload.data)->data); | ||
163 | } | ||
164 | |||
165 | #define ECRYPTFS_SUPER_MAGIC 0xf15f | ||
166 | #define ECRYPTFS_MAX_KEYSET_SIZE 1024 | ||
167 | #define ECRYPTFS_MAX_CIPHER_NAME_SIZE 32 | ||
168 | #define ECRYPTFS_MAX_NUM_ENC_KEYS 64 | ||
169 | #define ECRYPTFS_MAX_NUM_KEYSIGS 2 /* TODO: Make this a linked list */ | ||
170 | #define ECRYPTFS_MAX_IV_BYTES 16 /* 128 bits */ | ||
171 | #define ECRYPTFS_SALT_BYTES 2 | ||
172 | #define MAGIC_ECRYPTFS_MARKER 0x3c81b7f5 | ||
173 | #define MAGIC_ECRYPTFS_MARKER_SIZE_BYTES 8 /* 4*2 */ | ||
174 | #define ECRYPTFS_FILE_SIZE_BYTES 8 | ||
175 | #define ECRYPTFS_DEFAULT_CIPHER "aes" | ||
176 | #define ECRYPTFS_DEFAULT_KEY_BYTES 16 | ||
177 | #define ECRYPTFS_DEFAULT_CHAINING_MODE CRYPTO_TFM_MODE_CBC | ||
178 | #define ECRYPTFS_TAG_3_PACKET_TYPE 0x8C | ||
179 | #define ECRYPTFS_TAG_11_PACKET_TYPE 0xED | ||
180 | #define MD5_DIGEST_SIZE 16 | ||
181 | |||
182 | /** | ||
183 | * This is the primary struct associated with each encrypted file. | ||
184 | * | ||
185 | * TODO: cache align/pack? | ||
186 | */ | ||
187 | struct ecryptfs_crypt_stat { | ||
188 | #define ECRYPTFS_STRUCT_INITIALIZED 0x00000001 | ||
189 | #define ECRYPTFS_POLICY_APPLIED 0x00000002 | ||
190 | #define ECRYPTFS_NEW_FILE 0x00000004 | ||
191 | #define ECRYPTFS_ENCRYPTED 0x00000008 | ||
192 | #define ECRYPTFS_SECURITY_WARNING 0x00000010 | ||
193 | #define ECRYPTFS_ENABLE_HMAC 0x00000020 | ||
194 | #define ECRYPTFS_ENCRYPT_IV_PAGES 0x00000040 | ||
195 | #define ECRYPTFS_KEY_VALID 0x00000080 | ||
196 | u32 flags; | ||
197 | unsigned int file_version; | ||
198 | size_t iv_bytes; | ||
199 | size_t num_keysigs; | ||
200 | size_t header_extent_size; | ||
201 | size_t num_header_extents_at_front; | ||
202 | size_t extent_size; /* Data extent size; default is 4096 */ | ||
203 | size_t key_size; | ||
204 | size_t extent_shift; | ||
205 | unsigned int extent_mask; | ||
206 | struct ecryptfs_mount_crypt_stat *mount_crypt_stat; | ||
207 | struct crypto_tfm *tfm; | ||
208 | struct crypto_tfm *md5_tfm; /* Crypto context for generating | ||
209 | * the initialization vectors */ | ||
210 | unsigned char cipher[ECRYPTFS_MAX_CIPHER_NAME_SIZE]; | ||
211 | unsigned char key[ECRYPTFS_MAX_KEY_BYTES]; | ||
212 | unsigned char root_iv[ECRYPTFS_MAX_IV_BYTES]; | ||
213 | unsigned char keysigs[ECRYPTFS_MAX_NUM_KEYSIGS][ECRYPTFS_SIG_SIZE_HEX]; | ||
214 | struct mutex cs_tfm_mutex; | ||
215 | struct mutex cs_md5_tfm_mutex; | ||
216 | struct mutex cs_mutex; | ||
217 | }; | ||
218 | |||
219 | /* inode private data. */ | ||
220 | struct ecryptfs_inode_info { | ||
221 | struct inode vfs_inode; | ||
222 | struct inode *wii_inode; | ||
223 | struct ecryptfs_crypt_stat crypt_stat; | ||
224 | }; | ||
225 | |||
226 | /* dentry private data. Each dentry must keep track of a lower | ||
227 | * vfsmount too. */ | ||
228 | struct ecryptfs_dentry_info { | ||
229 | struct dentry *wdi_dentry; | ||
230 | struct vfsmount *lower_mnt; | ||
231 | struct ecryptfs_crypt_stat *crypt_stat; | ||
232 | }; | ||
233 | |||
234 | /** | ||
235 | * This struct is to enable a mount-wide passphrase/salt combo. This | ||
236 | * is more or less a stopgap to provide similar functionality to other | ||
237 | * crypto filesystems like EncFS or CFS until full policy support is | ||
238 | * implemented in eCryptfs. | ||
239 | */ | ||
240 | struct ecryptfs_mount_crypt_stat { | ||
241 | /* Pointers to memory we do not own, do not free these */ | ||
242 | #define ECRYPTFS_PLAINTEXT_PASSTHROUGH_ENABLED 0x00000001 | ||
243 | u32 flags; | ||
244 | struct ecryptfs_auth_tok *global_auth_tok; | ||
245 | struct key *global_auth_tok_key; | ||
246 | size_t global_default_cipher_key_size; | ||
247 | struct crypto_tfm *global_key_tfm; | ||
248 | struct mutex global_key_tfm_mutex; | ||
249 | unsigned char global_default_cipher_name[ECRYPTFS_MAX_CIPHER_NAME_SIZE | ||
250 | + 1]; | ||
251 | unsigned char global_auth_tok_sig[ECRYPTFS_SIG_SIZE_HEX + 1]; | ||
252 | }; | ||
253 | |||
254 | /* superblock private data. */ | ||
255 | struct ecryptfs_sb_info { | ||
256 | struct super_block *wsi_sb; | ||
257 | struct ecryptfs_mount_crypt_stat mount_crypt_stat; | ||
258 | }; | ||
259 | |||
260 | /* file private data. */ | ||
261 | struct ecryptfs_file_info { | ||
262 | struct file *wfi_file; | ||
263 | struct ecryptfs_crypt_stat *crypt_stat; | ||
264 | }; | ||
265 | |||
266 | /* auth_tok <=> encrypted_session_key mappings */ | ||
267 | struct ecryptfs_auth_tok_list_item { | ||
268 | unsigned char encrypted_session_key[ECRYPTFS_MAX_KEY_BYTES]; | ||
269 | struct list_head list; | ||
270 | struct ecryptfs_auth_tok auth_tok; | ||
271 | }; | ||
272 | |||
273 | static inline struct ecryptfs_file_info * | ||
274 | ecryptfs_file_to_private(struct file *file) | ||
275 | { | ||
276 | return (struct ecryptfs_file_info *)file->private_data; | ||
277 | } | ||
278 | |||
279 | static inline void | ||
280 | ecryptfs_set_file_private(struct file *file, | ||
281 | struct ecryptfs_file_info *file_info) | ||
282 | { | ||
283 | file->private_data = file_info; | ||
284 | } | ||
285 | |||
286 | static inline struct file *ecryptfs_file_to_lower(struct file *file) | ||
287 | { | ||
288 | return ((struct ecryptfs_file_info *)file->private_data)->wfi_file; | ||
289 | } | ||
290 | |||
291 | static inline void | ||
292 | ecryptfs_set_file_lower(struct file *file, struct file *lower_file) | ||
293 | { | ||
294 | ((struct ecryptfs_file_info *)file->private_data)->wfi_file = | ||
295 | lower_file; | ||
296 | } | ||
297 | |||
298 | static inline struct ecryptfs_inode_info * | ||
299 | ecryptfs_inode_to_private(struct inode *inode) | ||
300 | { | ||
301 | return container_of(inode, struct ecryptfs_inode_info, vfs_inode); | ||
302 | } | ||
303 | |||
304 | static inline struct inode *ecryptfs_inode_to_lower(struct inode *inode) | ||
305 | { | ||
306 | return ecryptfs_inode_to_private(inode)->wii_inode; | ||
307 | } | ||
308 | |||
309 | static inline void | ||
310 | ecryptfs_set_inode_lower(struct inode *inode, struct inode *lower_inode) | ||
311 | { | ||
312 | ecryptfs_inode_to_private(inode)->wii_inode = lower_inode; | ||
313 | } | ||
314 | |||
315 | static inline struct ecryptfs_sb_info * | ||
316 | ecryptfs_superblock_to_private(struct super_block *sb) | ||
317 | { | ||
318 | return (struct ecryptfs_sb_info *)sb->s_fs_info; | ||
319 | } | ||
320 | |||
321 | static inline void | ||
322 | ecryptfs_set_superblock_private(struct super_block *sb, | ||
323 | struct ecryptfs_sb_info *sb_info) | ||
324 | { | ||
325 | sb->s_fs_info = sb_info; | ||
326 | } | ||
327 | |||
328 | static inline struct super_block * | ||
329 | ecryptfs_superblock_to_lower(struct super_block *sb) | ||
330 | { | ||
331 | return ((struct ecryptfs_sb_info *)sb->s_fs_info)->wsi_sb; | ||
332 | } | ||
333 | |||
334 | static inline void | ||
335 | ecryptfs_set_superblock_lower(struct super_block *sb, | ||
336 | struct super_block *lower_sb) | ||
337 | { | ||
338 | ((struct ecryptfs_sb_info *)sb->s_fs_info)->wsi_sb = lower_sb; | ||
339 | } | ||
340 | |||
341 | static inline struct ecryptfs_dentry_info * | ||
342 | ecryptfs_dentry_to_private(struct dentry *dentry) | ||
343 | { | ||
344 | return (struct ecryptfs_dentry_info *)dentry->d_fsdata; | ||
345 | } | ||
346 | |||
347 | static inline void | ||
348 | ecryptfs_set_dentry_private(struct dentry *dentry, | ||
349 | struct ecryptfs_dentry_info *dentry_info) | ||
350 | { | ||
351 | dentry->d_fsdata = dentry_info; | ||
352 | } | ||
353 | |||
354 | static inline struct dentry * | ||
355 | ecryptfs_dentry_to_lower(struct dentry *dentry) | ||
356 | { | ||
357 | return ((struct ecryptfs_dentry_info *)dentry->d_fsdata)->wdi_dentry; | ||
358 | } | ||
359 | |||
360 | static inline void | ||
361 | ecryptfs_set_dentry_lower(struct dentry *dentry, struct dentry *lower_dentry) | ||
362 | { | ||
363 | ((struct ecryptfs_dentry_info *)dentry->d_fsdata)->wdi_dentry = | ||
364 | lower_dentry; | ||
365 | } | ||
366 | |||
367 | static inline struct vfsmount * | ||
368 | ecryptfs_dentry_to_lower_mnt(struct dentry *dentry) | ||
369 | { | ||
370 | return ((struct ecryptfs_dentry_info *)dentry->d_fsdata)->lower_mnt; | ||
371 | } | ||
372 | |||
373 | static inline void | ||
374 | ecryptfs_set_dentry_lower_mnt(struct dentry *dentry, struct vfsmount *lower_mnt) | ||
375 | { | ||
376 | ((struct ecryptfs_dentry_info *)dentry->d_fsdata)->lower_mnt = | ||
377 | lower_mnt; | ||
378 | } | ||
379 | |||
380 | #define ecryptfs_printk(type, fmt, arg...) \ | ||
381 | __ecryptfs_printk(type "%s: " fmt, __FUNCTION__, ## arg); | ||
382 | void __ecryptfs_printk(const char *fmt, ...); | ||
383 | |||
384 | extern const struct file_operations ecryptfs_main_fops; | ||
385 | extern const struct file_operations ecryptfs_dir_fops; | ||
386 | extern struct inode_operations ecryptfs_main_iops; | ||
387 | extern struct inode_operations ecryptfs_dir_iops; | ||
388 | extern struct inode_operations ecryptfs_symlink_iops; | ||
389 | extern struct super_operations ecryptfs_sops; | ||
390 | extern struct dentry_operations ecryptfs_dops; | ||
391 | extern struct address_space_operations ecryptfs_aops; | ||
392 | extern int ecryptfs_verbosity; | ||
393 | |||
394 | extern struct kmem_cache *ecryptfs_auth_tok_list_item_cache; | ||
395 | extern struct kmem_cache *ecryptfs_file_info_cache; | ||
396 | extern struct kmem_cache *ecryptfs_dentry_info_cache; | ||
397 | extern struct kmem_cache *ecryptfs_inode_info_cache; | ||
398 | extern struct kmem_cache *ecryptfs_sb_info_cache; | ||
399 | extern struct kmem_cache *ecryptfs_header_cache_0; | ||
400 | extern struct kmem_cache *ecryptfs_header_cache_1; | ||
401 | extern struct kmem_cache *ecryptfs_header_cache_2; | ||
402 | extern struct kmem_cache *ecryptfs_lower_page_cache; | ||
403 | |||
404 | int ecryptfs_interpose(struct dentry *hidden_dentry, | ||
405 | struct dentry *this_dentry, struct super_block *sb, | ||
406 | int flag); | ||
407 | int ecryptfs_fill_zeros(struct file *file, loff_t new_length); | ||
408 | int ecryptfs_decode_filename(struct ecryptfs_crypt_stat *crypt_stat, | ||
409 | const char *name, int length, | ||
410 | char **decrypted_name); | ||
411 | int ecryptfs_encode_filename(struct ecryptfs_crypt_stat *crypt_stat, | ||
412 | const char *name, int length, | ||
413 | char **encoded_name); | ||
414 | struct dentry *ecryptfs_lower_dentry(struct dentry *this_dentry); | ||
415 | void ecryptfs_copy_attr_atime(struct inode *dest, const struct inode *src); | ||
416 | void ecryptfs_copy_attr_all(struct inode *dest, const struct inode *src); | ||
417 | void ecryptfs_copy_inode_size(struct inode *dst, const struct inode *src); | ||
418 | void ecryptfs_dump_hex(char *data, int bytes); | ||
419 | int virt_to_scatterlist(const void *addr, int size, struct scatterlist *sg, | ||
420 | int sg_size); | ||
421 | int ecryptfs_compute_root_iv(struct ecryptfs_crypt_stat *crypt_stat); | ||
422 | void ecryptfs_rotate_iv(unsigned char *iv); | ||
423 | void ecryptfs_init_crypt_stat(struct ecryptfs_crypt_stat *crypt_stat); | ||
424 | void ecryptfs_destruct_crypt_stat(struct ecryptfs_crypt_stat *crypt_stat); | ||
425 | void ecryptfs_destruct_mount_crypt_stat( | ||
426 | struct ecryptfs_mount_crypt_stat *mount_crypt_stat); | ||
427 | int ecryptfs_init_crypt_ctx(struct ecryptfs_crypt_stat *crypt_stat); | ||
428 | int ecryptfs_write_inode_size_to_header(struct file *lower_file, | ||
429 | struct inode *lower_inode, | ||
430 | struct inode *inode); | ||
431 | int ecryptfs_get_lower_page(struct page **lower_page, struct inode *lower_inode, | ||
432 | struct file *lower_file, | ||
433 | unsigned long lower_page_index, int byte_offset, | ||
434 | int region_bytes); | ||
435 | int | ||
436 | ecryptfs_commit_lower_page(struct page *lower_page, struct inode *lower_inode, | ||
437 | struct file *lower_file, int byte_offset, | ||
438 | int region_size); | ||
439 | int ecryptfs_copy_page_to_lower(struct page *page, struct inode *lower_inode, | ||
440 | struct file *lower_file); | ||
441 | int ecryptfs_do_readpage(struct file *file, struct page *page, | ||
442 | pgoff_t lower_page_index); | ||
443 | int ecryptfs_grab_and_map_lower_page(struct page **lower_page, | ||
444 | char **lower_virt, | ||
445 | struct inode *lower_inode, | ||
446 | unsigned long lower_page_index); | ||
447 | int ecryptfs_writepage_and_release_lower_page(struct page *lower_page, | ||
448 | struct inode *lower_inode, | ||
449 | struct writeback_control *wbc); | ||
450 | int ecryptfs_encrypt_page(struct ecryptfs_page_crypt_context *ctx); | ||
451 | int ecryptfs_decrypt_page(struct file *file, struct page *page); | ||
452 | int ecryptfs_write_headers(struct dentry *ecryptfs_dentry, | ||
453 | struct file *lower_file); | ||
454 | int ecryptfs_write_headers_virt(char *page_virt, | ||
455 | struct ecryptfs_crypt_stat *crypt_stat, | ||
456 | struct dentry *ecryptfs_dentry); | ||
457 | int ecryptfs_read_headers(struct dentry *ecryptfs_dentry, | ||
458 | struct file *lower_file); | ||
459 | int ecryptfs_new_file_context(struct dentry *ecryptfs_dentry); | ||
460 | int contains_ecryptfs_marker(char *data); | ||
461 | int ecryptfs_read_header_region(char *data, struct dentry *dentry, | ||
462 | struct vfsmount *mnt); | ||
463 | u16 ecryptfs_code_for_cipher_string(struct ecryptfs_crypt_stat *crypt_stat); | ||
464 | int ecryptfs_cipher_code_to_string(char *str, u16 cipher_code); | ||
465 | void ecryptfs_set_default_sizes(struct ecryptfs_crypt_stat *crypt_stat); | ||
466 | int ecryptfs_generate_key_packet_set(char *dest_base, | ||
467 | struct ecryptfs_crypt_stat *crypt_stat, | ||
468 | struct dentry *ecryptfs_dentry, | ||
469 | size_t *len, size_t max); | ||
470 | int process_request_key_err(long err_code); | ||
471 | int | ||
472 | ecryptfs_parse_packet_set(struct ecryptfs_crypt_stat *crypt_stat, | ||
473 | unsigned char *src, struct dentry *ecryptfs_dentry); | ||
474 | int ecryptfs_truncate(struct dentry *dentry, loff_t new_length); | ||
475 | int | ||
476 | ecryptfs_process_cipher(struct crypto_tfm **tfm, struct crypto_tfm **key_tfm, | ||
477 | char *cipher_name, size_t key_size); | ||
478 | int ecryptfs_inode_test(struct inode *inode, void *candidate_lower_inode); | ||
479 | int ecryptfs_inode_set(struct inode *inode, void *lower_inode); | ||
480 | void ecryptfs_init_inode(struct inode *inode, struct inode *lower_inode); | ||
481 | |||
482 | #endif /* #ifndef ECRYPTFS_KERNEL_H */ | ||
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c new file mode 100644 index 000000000000..c8550c9f9cd2 --- /dev/null +++ b/fs/ecryptfs/file.c | |||
@@ -0,0 +1,440 @@ | |||
1 | /** | ||
2 | * eCryptfs: Linux filesystem encryption layer | ||
3 | * | ||
4 | * Copyright (C) 1997-2004 Erez Zadok | ||
5 | * Copyright (C) 2001-2004 Stony Brook University | ||
6 | * Copyright (C) 2004-2006 International Business Machines Corp. | ||
7 | * Author(s): Michael A. Halcrow <mhalcrow@us.ibm.com> | ||
8 | * Michael C. Thompson <mcthomps@us.ibm.com> | ||
9 | * | ||
10 | * This program is free software; you can redistribute it and/or | ||
11 | * modify it under the terms of the GNU General Public License as | ||
12 | * published by the Free Software Foundation; either version 2 of the | ||
13 | * License, or (at your option) any later version. | ||
14 | * | ||
15 | * This program is distributed in the hope that it will be useful, but | ||
16 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
18 | * General Public License for more details. | ||
19 | * | ||
20 | * You should have received a copy of the GNU General Public License | ||
21 | * along with this program; if not, write to the Free Software | ||
22 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA | ||
23 | * 02111-1307, USA. | ||
24 | */ | ||
25 | |||
26 | #include <linux/file.h> | ||
27 | #include <linux/poll.h> | ||
28 | #include <linux/mount.h> | ||
29 | #include <linux/pagemap.h> | ||
30 | #include <linux/security.h> | ||
31 | #include <linux/smp_lock.h> | ||
32 | #include <linux/compat.h> | ||
33 | #include "ecryptfs_kernel.h" | ||
34 | |||
35 | /** | ||
36 | * ecryptfs_llseek | ||
37 | * @file: File we are seeking in | ||
38 | * @offset: The offset to seek to | ||
39 | * @origin: 2 - offset from i_size; 1 - offset from f_pos | ||
40 | * | ||
41 | * Returns the position we have seeked to, or negative on error | ||
42 | */ | ||
43 | static loff_t ecryptfs_llseek(struct file *file, loff_t offset, int origin) | ||
44 | { | ||
45 | loff_t rv; | ||
46 | loff_t new_end_pos; | ||
47 | int rc; | ||
48 | int expanding_file = 0; | ||
49 | struct inode *inode = file->f_mapping->host; | ||
50 | |||
51 | /* If our offset is past the end of our file, we're going to | ||
52 | * need to grow it so we have a valid length of 0's */ | ||
53 | new_end_pos = offset; | ||
54 | switch (origin) { | ||
55 | case 2: | ||
56 | new_end_pos += i_size_read(inode); | ||
57 | expanding_file = 1; | ||
58 | break; | ||
59 | case 1: | ||
60 | new_end_pos += file->f_pos; | ||
61 | if (new_end_pos > i_size_read(inode)) { | ||
62 | ecryptfs_printk(KERN_DEBUG, "new_end_pos(=[0x%.16x]) " | ||
63 | "> i_size_read(inode)(=[0x%.16x])\n", | ||
64 | new_end_pos, i_size_read(inode)); | ||
65 | expanding_file = 1; | ||
66 | } | ||
67 | break; | ||
68 | default: | ||
69 | if (new_end_pos > i_size_read(inode)) { | ||
70 | ecryptfs_printk(KERN_DEBUG, "new_end_pos(=[0x%.16x]) " | ||
71 | "> i_size_read(inode)(=[0x%.16x])\n", | ||
72 | new_end_pos, i_size_read(inode)); | ||
73 | expanding_file = 1; | ||
74 | } | ||
75 | } | ||
76 | ecryptfs_printk(KERN_DEBUG, "new_end_pos = [0x%.16x]\n", new_end_pos); | ||
77 | if (expanding_file) { | ||
78 | rc = ecryptfs_truncate(file->f_dentry, new_end_pos); | ||
79 | if (rc) { | ||
80 | rv = rc; | ||
81 | ecryptfs_printk(KERN_ERR, "Error on attempt to " | ||
82 | "truncate to (higher) offset [0x%.16x];" | ||
83 | " rc = [%d]\n", new_end_pos, rc); | ||
84 | goto out; | ||
85 | } | ||
86 | } | ||
87 | rv = generic_file_llseek(file, offset, origin); | ||
88 | out: | ||
89 | return rv; | ||
90 | } | ||
91 | |||
92 | /** | ||
93 | * ecryptfs_read_update_atime | ||
94 | * | ||
95 | * generic_file_read updates the atime of upper layer inode. But, it | ||
96 | * doesn't give us a chance to update the atime of the lower layer | ||
97 | * inode. This function is a wrapper to generic_file_read. It | ||
98 | * updates the atime of the lower level inode if generic_file_read | ||
99 | * returns without any errors. This is to be used only for file reads. | ||
100 | * The function to be used for directory reads is ecryptfs_read. | ||
101 | */ | ||
102 | static ssize_t ecryptfs_read_update_atime(struct kiocb *iocb, | ||
103 | const struct iovec *iov, | ||
104 | unsigned long nr_segs, loff_t pos) | ||
105 | { | ||
106 | int rc; | ||
107 | struct dentry *lower_dentry; | ||
108 | struct vfsmount *lower_vfsmount; | ||
109 | struct file *file = iocb->ki_filp; | ||
110 | |||
111 | rc = generic_file_aio_read(iocb, iov, nr_segs, pos); | ||
112 | /* | ||
113 | * Even though this is a async interface, we need to wait | ||
114 | * for IO to finish to update atime | ||
115 | */ | ||
116 | if (-EIOCBQUEUED == rc) | ||
117 | rc = wait_on_sync_kiocb(iocb); | ||
118 | if (rc >= 0) { | ||
119 | lower_dentry = ecryptfs_dentry_to_lower(file->f_dentry); | ||
120 | lower_vfsmount = ecryptfs_dentry_to_lower_mnt(file->f_dentry); | ||
121 | touch_atime(lower_vfsmount, lower_dentry); | ||
122 | } | ||
123 | return rc; | ||
124 | } | ||
125 | |||
126 | struct ecryptfs_getdents_callback { | ||
127 | void *dirent; | ||
128 | struct dentry *dentry; | ||
129 | filldir_t filldir; | ||
130 | int err; | ||
131 | int filldir_called; | ||
132 | int entries_written; | ||
133 | }; | ||
134 | |||
135 | /* Inspired by generic filldir in fs/readir.c */ | ||
136 | static int | ||
137 | ecryptfs_filldir(void *dirent, const char *name, int namelen, loff_t offset, | ||
138 | u64 ino, unsigned int d_type) | ||
139 | { | ||
140 | struct ecryptfs_crypt_stat *crypt_stat; | ||
141 | struct ecryptfs_getdents_callback *buf = | ||
142 | (struct ecryptfs_getdents_callback *)dirent; | ||
143 | int rc; | ||
144 | int decoded_length; | ||
145 | char *decoded_name; | ||
146 | |||
147 | crypt_stat = ecryptfs_dentry_to_private(buf->dentry)->crypt_stat; | ||
148 | buf->filldir_called++; | ||
149 | decoded_length = ecryptfs_decode_filename(crypt_stat, name, namelen, | ||
150 | &decoded_name); | ||
151 | if (decoded_length < 0) { | ||
152 | rc = decoded_length; | ||
153 | goto out; | ||
154 | } | ||
155 | rc = buf->filldir(buf->dirent, decoded_name, decoded_length, offset, | ||
156 | ino, d_type); | ||
157 | kfree(decoded_name); | ||
158 | if (rc >= 0) | ||
159 | buf->entries_written++; | ||
160 | out: | ||
161 | return rc; | ||
162 | } | ||
163 | |||
164 | /** | ||
165 | * ecryptfs_readdir | ||
166 | * @file: The ecryptfs file struct | ||
167 | * @dirent: Directory entry | ||
168 | * @filldir: The filldir callback function | ||
169 | */ | ||
170 | static int ecryptfs_readdir(struct file *file, void *dirent, filldir_t filldir) | ||
171 | { | ||
172 | int rc; | ||
173 | struct file *lower_file; | ||
174 | struct inode *inode; | ||
175 | struct ecryptfs_getdents_callback buf; | ||
176 | |||
177 | lower_file = ecryptfs_file_to_lower(file); | ||
178 | lower_file->f_pos = file->f_pos; | ||
179 | inode = file->f_dentry->d_inode; | ||
180 | memset(&buf, 0, sizeof(buf)); | ||
181 | buf.dirent = dirent; | ||
182 | buf.dentry = file->f_dentry; | ||
183 | buf.filldir = filldir; | ||
184 | retry: | ||
185 | buf.filldir_called = 0; | ||
186 | buf.entries_written = 0; | ||
187 | buf.err = 0; | ||
188 | rc = vfs_readdir(lower_file, ecryptfs_filldir, (void *)&buf); | ||
189 | if (buf.err) | ||
190 | rc = buf.err; | ||
191 | if (buf.filldir_called && !buf.entries_written) | ||
192 | goto retry; | ||
193 | file->f_pos = lower_file->f_pos; | ||
194 | if (rc >= 0) | ||
195 | ecryptfs_copy_attr_atime(inode, lower_file->f_dentry->d_inode); | ||
196 | return rc; | ||
197 | } | ||
198 | |||
199 | struct kmem_cache *ecryptfs_file_info_cache; | ||
200 | |||
201 | /** | ||
202 | * ecryptfs_open | ||
203 | * @inode: inode speciying file to open | ||
204 | * @file: Structure to return filled in | ||
205 | * | ||
206 | * Opens the file specified by inode. | ||
207 | * | ||
208 | * Returns zero on success; non-zero otherwise | ||
209 | */ | ||
210 | static int ecryptfs_open(struct inode *inode, struct file *file) | ||
211 | { | ||
212 | int rc = 0; | ||
213 | struct ecryptfs_crypt_stat *crypt_stat = NULL; | ||
214 | struct ecryptfs_mount_crypt_stat *mount_crypt_stat; | ||
215 | struct dentry *ecryptfs_dentry = file->f_dentry; | ||
216 | /* Private value of ecryptfs_dentry allocated in | ||
217 | * ecryptfs_lookup() */ | ||
218 | struct dentry *lower_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry); | ||
219 | struct inode *lower_inode = NULL; | ||
220 | struct file *lower_file = NULL; | ||
221 | struct vfsmount *lower_mnt; | ||
222 | struct ecryptfs_file_info *file_info; | ||
223 | int lower_flags; | ||
224 | |||
225 | /* Released in ecryptfs_release or end of function if failure */ | ||
226 | file_info = kmem_cache_alloc(ecryptfs_file_info_cache, SLAB_KERNEL); | ||
227 | ecryptfs_set_file_private(file, file_info); | ||
228 | if (!file_info) { | ||
229 | ecryptfs_printk(KERN_ERR, | ||
230 | "Error attempting to allocate memory\n"); | ||
231 | rc = -ENOMEM; | ||
232 | goto out; | ||
233 | } | ||
234 | memset(file_info, 0, sizeof(*file_info)); | ||
235 | lower_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry); | ||
236 | crypt_stat = &ecryptfs_inode_to_private(inode)->crypt_stat; | ||
237 | mount_crypt_stat = &ecryptfs_superblock_to_private( | ||
238 | ecryptfs_dentry->d_sb)->mount_crypt_stat; | ||
239 | mutex_lock(&crypt_stat->cs_mutex); | ||
240 | if (!ECRYPTFS_CHECK_FLAG(crypt_stat->flags, ECRYPTFS_POLICY_APPLIED)) { | ||
241 | ecryptfs_printk(KERN_DEBUG, "Setting flags for stat...\n"); | ||
242 | /* Policy code enabled in future release */ | ||
243 | ECRYPTFS_SET_FLAG(crypt_stat->flags, ECRYPTFS_POLICY_APPLIED); | ||
244 | ECRYPTFS_SET_FLAG(crypt_stat->flags, ECRYPTFS_ENCRYPTED); | ||
245 | } | ||
246 | mutex_unlock(&crypt_stat->cs_mutex); | ||
247 | /* This mntget & dget is undone via fput when the file is released */ | ||
248 | dget(lower_dentry); | ||
249 | lower_flags = file->f_flags; | ||
250 | if ((lower_flags & O_ACCMODE) == O_WRONLY) | ||
251 | lower_flags = (lower_flags & O_ACCMODE) | O_RDWR; | ||
252 | if (file->f_flags & O_APPEND) | ||
253 | lower_flags &= ~O_APPEND; | ||
254 | lower_mnt = ecryptfs_dentry_to_lower_mnt(ecryptfs_dentry); | ||
255 | mntget(lower_mnt); | ||
256 | /* Corresponding fput() in ecryptfs_release() */ | ||
257 | lower_file = dentry_open(lower_dentry, lower_mnt, lower_flags); | ||
258 | if (IS_ERR(lower_file)) { | ||
259 | rc = PTR_ERR(lower_file); | ||
260 | ecryptfs_printk(KERN_ERR, "Error opening lower file\n"); | ||
261 | goto out_puts; | ||
262 | } | ||
263 | ecryptfs_set_file_lower(file, lower_file); | ||
264 | /* Isn't this check the same as the one in lookup? */ | ||
265 | lower_inode = lower_dentry->d_inode; | ||
266 | if (S_ISDIR(ecryptfs_dentry->d_inode->i_mode)) { | ||
267 | ecryptfs_printk(KERN_DEBUG, "This is a directory\n"); | ||
268 | ECRYPTFS_CLEAR_FLAG(crypt_stat->flags, ECRYPTFS_ENCRYPTED); | ||
269 | rc = 0; | ||
270 | goto out; | ||
271 | } | ||
272 | mutex_lock(&crypt_stat->cs_mutex); | ||
273 | if (i_size_read(lower_inode) < ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE) { | ||
274 | if (!(mount_crypt_stat->flags | ||
275 | & ECRYPTFS_PLAINTEXT_PASSTHROUGH_ENABLED)) { | ||
276 | rc = -EIO; | ||
277 | printk(KERN_WARNING "Attempt to read file that is " | ||
278 | "not in a valid eCryptfs format, and plaintext " | ||
279 | "passthrough mode is not enabled; returning " | ||
280 | "-EIO\n"); | ||
281 | mutex_unlock(&crypt_stat->cs_mutex); | ||
282 | goto out_puts; | ||
283 | } | ||
284 | crypt_stat->flags &= ~(ECRYPTFS_ENCRYPTED); | ||
285 | rc = 0; | ||
286 | mutex_unlock(&crypt_stat->cs_mutex); | ||
287 | goto out; | ||
288 | } else if (!ECRYPTFS_CHECK_FLAG(crypt_stat->flags, | ||
289 | ECRYPTFS_POLICY_APPLIED) | ||
290 | || !ECRYPTFS_CHECK_FLAG(crypt_stat->flags, | ||
291 | ECRYPTFS_KEY_VALID)) { | ||
292 | rc = ecryptfs_read_headers(ecryptfs_dentry, lower_file); | ||
293 | if (rc) { | ||
294 | ecryptfs_printk(KERN_DEBUG, | ||
295 | "Valid headers not found\n"); | ||
296 | if (!(mount_crypt_stat->flags | ||
297 | & ECRYPTFS_PLAINTEXT_PASSTHROUGH_ENABLED)) { | ||
298 | rc = -EIO; | ||
299 | printk(KERN_WARNING "Attempt to read file that " | ||
300 | "is not in a valid eCryptfs format, " | ||
301 | "and plaintext passthrough mode is not " | ||
302 | "enabled; returning -EIO\n"); | ||
303 | mutex_unlock(&crypt_stat->cs_mutex); | ||
304 | goto out_puts; | ||
305 | } | ||
306 | ECRYPTFS_CLEAR_FLAG(crypt_stat->flags, | ||
307 | ECRYPTFS_ENCRYPTED); | ||
308 | rc = 0; | ||
309 | mutex_unlock(&crypt_stat->cs_mutex); | ||
310 | goto out; | ||
311 | } | ||
312 | } | ||
313 | mutex_unlock(&crypt_stat->cs_mutex); | ||
314 | ecryptfs_printk(KERN_DEBUG, "inode w/ addr = [0x%p], i_ino = [0x%.16x] " | ||
315 | "size: [0x%.16x]\n", inode, inode->i_ino, | ||
316 | i_size_read(inode)); | ||
317 | ecryptfs_set_file_lower(file, lower_file); | ||
318 | goto out; | ||
319 | out_puts: | ||
320 | mntput(lower_mnt); | ||
321 | dput(lower_dentry); | ||
322 | kmem_cache_free(ecryptfs_file_info_cache, | ||
323 | ecryptfs_file_to_private(file)); | ||
324 | out: | ||
325 | return rc; | ||
326 | } | ||
327 | |||
328 | static int ecryptfs_flush(struct file *file, fl_owner_t td) | ||
329 | { | ||
330 | int rc = 0; | ||
331 | struct file *lower_file = NULL; | ||
332 | |||
333 | lower_file = ecryptfs_file_to_lower(file); | ||
334 | if (lower_file->f_op && lower_file->f_op->flush) | ||
335 | rc = lower_file->f_op->flush(lower_file, td); | ||
336 | return rc; | ||
337 | } | ||
338 | |||
339 | static int ecryptfs_release(struct inode *inode, struct file *file) | ||
340 | { | ||
341 | struct file *lower_file = ecryptfs_file_to_lower(file); | ||
342 | struct ecryptfs_file_info *file_info = ecryptfs_file_to_private(file); | ||
343 | struct inode *lower_inode = ecryptfs_inode_to_lower(inode); | ||
344 | |||
345 | fput(lower_file); | ||
346 | inode->i_blocks = lower_inode->i_blocks; | ||
347 | kmem_cache_free(ecryptfs_file_info_cache, file_info); | ||
348 | return 0; | ||
349 | } | ||
350 | |||
351 | static int | ||
352 | ecryptfs_fsync(struct file *file, struct dentry *dentry, int datasync) | ||
353 | { | ||
354 | struct file *lower_file = ecryptfs_file_to_lower(file); | ||
355 | struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry); | ||
356 | struct inode *lower_inode = lower_dentry->d_inode; | ||
357 | int rc = -EINVAL; | ||
358 | |||
359 | if (lower_inode->i_fop->fsync) { | ||
360 | mutex_lock(&lower_inode->i_mutex); | ||
361 | rc = lower_inode->i_fop->fsync(lower_file, lower_dentry, | ||
362 | datasync); | ||
363 | mutex_unlock(&lower_inode->i_mutex); | ||
364 | } | ||
365 | return rc; | ||
366 | } | ||
367 | |||
368 | static int ecryptfs_fasync(int fd, struct file *file, int flag) | ||
369 | { | ||
370 | int rc = 0; | ||
371 | struct file *lower_file = NULL; | ||
372 | |||
373 | lower_file = ecryptfs_file_to_lower(file); | ||
374 | if (lower_file->f_op && lower_file->f_op->fasync) | ||
375 | rc = lower_file->f_op->fasync(fd, lower_file, flag); | ||
376 | return rc; | ||
377 | } | ||
378 | |||
379 | static ssize_t ecryptfs_sendfile(struct file *file, loff_t * ppos, | ||
380 | size_t count, read_actor_t actor, void *target) | ||
381 | { | ||
382 | struct file *lower_file = NULL; | ||
383 | int rc = -EINVAL; | ||
384 | |||
385 | lower_file = ecryptfs_file_to_lower(file); | ||
386 | if (lower_file->f_op && lower_file->f_op->sendfile) | ||
387 | rc = lower_file->f_op->sendfile(lower_file, ppos, count, | ||
388 | actor, target); | ||
389 | |||
390 | return rc; | ||
391 | } | ||
392 | |||
393 | static int ecryptfs_ioctl(struct inode *inode, struct file *file, | ||
394 | unsigned int cmd, unsigned long arg); | ||
395 | |||
396 | const struct file_operations ecryptfs_dir_fops = { | ||
397 | .readdir = ecryptfs_readdir, | ||
398 | .ioctl = ecryptfs_ioctl, | ||
399 | .mmap = generic_file_mmap, | ||
400 | .open = ecryptfs_open, | ||
401 | .flush = ecryptfs_flush, | ||
402 | .release = ecryptfs_release, | ||
403 | .fsync = ecryptfs_fsync, | ||
404 | .fasync = ecryptfs_fasync, | ||
405 | .sendfile = ecryptfs_sendfile, | ||
406 | }; | ||
407 | |||
408 | const struct file_operations ecryptfs_main_fops = { | ||
409 | .llseek = ecryptfs_llseek, | ||
410 | .read = do_sync_read, | ||
411 | .aio_read = ecryptfs_read_update_atime, | ||
412 | .write = do_sync_write, | ||
413 | .aio_write = generic_file_aio_write, | ||
414 | .readdir = ecryptfs_readdir, | ||
415 | .ioctl = ecryptfs_ioctl, | ||
416 | .mmap = generic_file_mmap, | ||
417 | .open = ecryptfs_open, | ||
418 | .flush = ecryptfs_flush, | ||
419 | .release = ecryptfs_release, | ||
420 | .fsync = ecryptfs_fsync, | ||
421 | .fasync = ecryptfs_fasync, | ||
422 | .sendfile = ecryptfs_sendfile, | ||
423 | }; | ||
424 | |||
425 | static int | ||
426 | ecryptfs_ioctl(struct inode *inode, struct file *file, unsigned int cmd, | ||
427 | unsigned long arg) | ||
428 | { | ||
429 | int rc = 0; | ||
430 | struct file *lower_file = NULL; | ||
431 | |||
432 | if (ecryptfs_file_to_private(file)) | ||
433 | lower_file = ecryptfs_file_to_lower(file); | ||
434 | if (lower_file && lower_file->f_op && lower_file->f_op->ioctl) | ||
435 | rc = lower_file->f_op->ioctl(ecryptfs_inode_to_lower(inode), | ||
436 | lower_file, cmd, arg); | ||
437 | else | ||
438 | rc = -ENOTTY; | ||
439 | return rc; | ||
440 | } | ||
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c new file mode 100644 index 000000000000..efdd2b7b62d7 --- /dev/null +++ b/fs/ecryptfs/inode.c | |||
@@ -0,0 +1,1079 @@ | |||
1 | /** | ||
2 | * eCryptfs: Linux filesystem encryption layer | ||
3 | * | ||
4 | * Copyright (C) 1997-2004 Erez Zadok | ||
5 | * Copyright (C) 2001-2004 Stony Brook University | ||
6 | * Copyright (C) 2004-2006 International Business Machines Corp. | ||
7 | * Author(s): Michael A. Halcrow <mahalcro@us.ibm.com> | ||
8 | * Michael C. Thompsion <mcthomps@us.ibm.com> | ||
9 | * | ||
10 | * This program is free software; you can redistribute it and/or | ||
11 | * modify it under the terms of the GNU General Public License as | ||
12 | * published by the Free Software Foundation; either version 2 of the | ||
13 | * License, or (at your option) any later version. | ||
14 | * | ||
15 | * This program is distributed in the hope that it will be useful, but | ||
16 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
18 | * General Public License for more details. | ||
19 | * | ||
20 | * You should have received a copy of the GNU General Public License | ||
21 | * along with this program; if not, write to the Free Software | ||
22 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA | ||
23 | * 02111-1307, USA. | ||
24 | */ | ||
25 | |||
26 | #include <linux/file.h> | ||
27 | #include <linux/vmalloc.h> | ||
28 | #include <linux/pagemap.h> | ||
29 | #include <linux/dcache.h> | ||
30 | #include <linux/namei.h> | ||
31 | #include <linux/mount.h> | ||
32 | #include <linux/crypto.h> | ||
33 | #include "ecryptfs_kernel.h" | ||
34 | |||
35 | static struct dentry *lock_parent(struct dentry *dentry) | ||
36 | { | ||
37 | struct dentry *dir; | ||
38 | |||
39 | dir = dget(dentry->d_parent); | ||
40 | mutex_lock(&(dir->d_inode->i_mutex)); | ||
41 | return dir; | ||
42 | } | ||
43 | |||
44 | static void unlock_parent(struct dentry *dentry) | ||
45 | { | ||
46 | mutex_unlock(&(dentry->d_parent->d_inode->i_mutex)); | ||
47 | dput(dentry->d_parent); | ||
48 | } | ||
49 | |||
50 | static void unlock_dir(struct dentry *dir) | ||
51 | { | ||
52 | mutex_unlock(&dir->d_inode->i_mutex); | ||
53 | dput(dir); | ||
54 | } | ||
55 | |||
56 | void ecryptfs_copy_inode_size(struct inode *dst, const struct inode *src) | ||
57 | { | ||
58 | i_size_write(dst, i_size_read((struct inode *)src)); | ||
59 | dst->i_blocks = src->i_blocks; | ||
60 | } | ||
61 | |||
62 | void ecryptfs_copy_attr_atime(struct inode *dest, const struct inode *src) | ||
63 | { | ||
64 | dest->i_atime = src->i_atime; | ||
65 | } | ||
66 | |||
67 | static void ecryptfs_copy_attr_times(struct inode *dest, | ||
68 | const struct inode *src) | ||
69 | { | ||
70 | dest->i_atime = src->i_atime; | ||
71 | dest->i_mtime = src->i_mtime; | ||
72 | dest->i_ctime = src->i_ctime; | ||
73 | } | ||
74 | |||
75 | static void ecryptfs_copy_attr_timesizes(struct inode *dest, | ||
76 | const struct inode *src) | ||
77 | { | ||
78 | dest->i_atime = src->i_atime; | ||
79 | dest->i_mtime = src->i_mtime; | ||
80 | dest->i_ctime = src->i_ctime; | ||
81 | ecryptfs_copy_inode_size(dest, src); | ||
82 | } | ||
83 | |||
84 | void ecryptfs_copy_attr_all(struct inode *dest, const struct inode *src) | ||
85 | { | ||
86 | dest->i_mode = src->i_mode; | ||
87 | dest->i_nlink = src->i_nlink; | ||
88 | dest->i_uid = src->i_uid; | ||
89 | dest->i_gid = src->i_gid; | ||
90 | dest->i_rdev = src->i_rdev; | ||
91 | dest->i_atime = src->i_atime; | ||
92 | dest->i_mtime = src->i_mtime; | ||
93 | dest->i_ctime = src->i_ctime; | ||
94 | dest->i_blkbits = src->i_blkbits; | ||
95 | dest->i_flags = src->i_flags; | ||
96 | } | ||
97 | |||
98 | /** | ||
99 | * ecryptfs_create_underlying_file | ||
100 | * @lower_dir_inode: inode of the parent in the lower fs of the new file | ||
101 | * @lower_dentry: New file's dentry in the lower fs | ||
102 | * @ecryptfs_dentry: New file's dentry in ecryptfs | ||
103 | * @mode: The mode of the new file | ||
104 | * @nd: nameidata of ecryptfs' parent's dentry & vfsmount | ||
105 | * | ||
106 | * Creates the file in the lower file system. | ||
107 | * | ||
108 | * Returns zero on success; non-zero on error condition | ||
109 | */ | ||
110 | static int | ||
111 | ecryptfs_create_underlying_file(struct inode *lower_dir_inode, | ||
112 | struct dentry *dentry, int mode, | ||
113 | struct nameidata *nd) | ||
114 | { | ||
115 | struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry); | ||
116 | struct vfsmount *lower_mnt = ecryptfs_dentry_to_lower_mnt(dentry); | ||
117 | struct dentry *dentry_save; | ||
118 | struct vfsmount *vfsmount_save; | ||
119 | int rc; | ||
120 | |||
121 | dentry_save = nd->dentry; | ||
122 | vfsmount_save = nd->mnt; | ||
123 | nd->dentry = lower_dentry; | ||
124 | nd->mnt = lower_mnt; | ||
125 | rc = vfs_create(lower_dir_inode, lower_dentry, mode, nd); | ||
126 | nd->dentry = dentry_save; | ||
127 | nd->mnt = vfsmount_save; | ||
128 | return rc; | ||
129 | } | ||
130 | |||
131 | /** | ||
132 | * ecryptfs_do_create | ||
133 | * @directory_inode: inode of the new file's dentry's parent in ecryptfs | ||
134 | * @ecryptfs_dentry: New file's dentry in ecryptfs | ||
135 | * @mode: The mode of the new file | ||
136 | * @nd: nameidata of ecryptfs' parent's dentry & vfsmount | ||
137 | * | ||
138 | * Creates the underlying file and the eCryptfs inode which will link to | ||
139 | * it. It will also update the eCryptfs directory inode to mimic the | ||
140 | * stat of the lower directory inode. | ||
141 | * | ||
142 | * Returns zero on success; non-zero on error condition | ||
143 | */ | ||
144 | static int | ||
145 | ecryptfs_do_create(struct inode *directory_inode, | ||
146 | struct dentry *ecryptfs_dentry, int mode, | ||
147 | struct nameidata *nd) | ||
148 | { | ||
149 | int rc; | ||
150 | struct dentry *lower_dentry; | ||
151 | struct dentry *lower_dir_dentry; | ||
152 | |||
153 | lower_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry); | ||
154 | lower_dir_dentry = lock_parent(lower_dentry); | ||
155 | if (unlikely(IS_ERR(lower_dir_dentry))) { | ||
156 | ecryptfs_printk(KERN_ERR, "Error locking directory of " | ||
157 | "dentry\n"); | ||
158 | rc = PTR_ERR(lower_dir_dentry); | ||
159 | goto out; | ||
160 | } | ||
161 | rc = ecryptfs_create_underlying_file(lower_dir_dentry->d_inode, | ||
162 | ecryptfs_dentry, mode, nd); | ||
163 | if (unlikely(rc)) { | ||
164 | ecryptfs_printk(KERN_ERR, | ||
165 | "Failure to create underlying file\n"); | ||
166 | goto out_lock; | ||
167 | } | ||
168 | rc = ecryptfs_interpose(lower_dentry, ecryptfs_dentry, | ||
169 | directory_inode->i_sb, 0); | ||
170 | if (rc) { | ||
171 | ecryptfs_printk(KERN_ERR, "Failure in ecryptfs_interpose\n"); | ||
172 | goto out_lock; | ||
173 | } | ||
174 | ecryptfs_copy_attr_timesizes(directory_inode, | ||
175 | lower_dir_dentry->d_inode); | ||
176 | out_lock: | ||
177 | unlock_dir(lower_dir_dentry); | ||
178 | out: | ||
179 | return rc; | ||
180 | } | ||
181 | |||
182 | /** | ||
183 | * grow_file | ||
184 | * @ecryptfs_dentry: the ecryptfs dentry | ||
185 | * @lower_file: The lower file | ||
186 | * @inode: The ecryptfs inode | ||
187 | * @lower_inode: The lower inode | ||
188 | * | ||
189 | * This is the code which will grow the file to its correct size. | ||
190 | */ | ||
191 | static int grow_file(struct dentry *ecryptfs_dentry, struct file *lower_file, | ||
192 | struct inode *inode, struct inode *lower_inode) | ||
193 | { | ||
194 | int rc = 0; | ||
195 | struct file fake_file; | ||
196 | struct ecryptfs_file_info tmp_file_info; | ||
197 | |||
198 | memset(&fake_file, 0, sizeof(fake_file)); | ||
199 | fake_file.f_dentry = ecryptfs_dentry; | ||
200 | memset(&tmp_file_info, 0, sizeof(tmp_file_info)); | ||
201 | ecryptfs_set_file_private(&fake_file, &tmp_file_info); | ||
202 | ecryptfs_set_file_lower(&fake_file, lower_file); | ||
203 | rc = ecryptfs_fill_zeros(&fake_file, 1); | ||
204 | if (rc) { | ||
205 | ECRYPTFS_SET_FLAG( | ||
206 | ecryptfs_inode_to_private(inode)->crypt_stat.flags, | ||
207 | ECRYPTFS_SECURITY_WARNING); | ||
208 | ecryptfs_printk(KERN_WARNING, "Error attempting to fill zeros " | ||
209 | "in file; rc = [%d]\n", rc); | ||
210 | goto out; | ||
211 | } | ||
212 | i_size_write(inode, 0); | ||
213 | ecryptfs_write_inode_size_to_header(lower_file, lower_inode, inode); | ||
214 | ECRYPTFS_SET_FLAG(ecryptfs_inode_to_private(inode)->crypt_stat.flags, | ||
215 | ECRYPTFS_NEW_FILE); | ||
216 | out: | ||
217 | return rc; | ||
218 | } | ||
219 | |||
220 | /** | ||
221 | * ecryptfs_initialize_file | ||
222 | * | ||
223 | * Cause the file to be changed from a basic empty file to an ecryptfs | ||
224 | * file with a header and first data page. | ||
225 | * | ||
226 | * Returns zero on success | ||
227 | */ | ||
228 | static int ecryptfs_initialize_file(struct dentry *ecryptfs_dentry) | ||
229 | { | ||
230 | int rc = 0; | ||
231 | int lower_flags; | ||
232 | struct ecryptfs_crypt_stat *crypt_stat; | ||
233 | struct dentry *lower_dentry; | ||
234 | struct dentry *tlower_dentry = NULL; | ||
235 | struct file *lower_file; | ||
236 | struct inode *inode, *lower_inode; | ||
237 | struct vfsmount *lower_mnt; | ||
238 | |||
239 | lower_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry); | ||
240 | ecryptfs_printk(KERN_DEBUG, "lower_dentry->d_name.name = [%s]\n", | ||
241 | lower_dentry->d_name.name); | ||
242 | inode = ecryptfs_dentry->d_inode; | ||
243 | crypt_stat = &ecryptfs_inode_to_private(inode)->crypt_stat; | ||
244 | tlower_dentry = dget(lower_dentry); | ||
245 | if (!tlower_dentry) { | ||
246 | rc = -ENOMEM; | ||
247 | ecryptfs_printk(KERN_ERR, "Error dget'ing lower_dentry\n"); | ||
248 | goto out; | ||
249 | } | ||
250 | lower_flags = ((O_CREAT | O_WRONLY | O_TRUNC) & O_ACCMODE) | O_RDWR; | ||
251 | #if BITS_PER_LONG != 32 | ||
252 | lower_flags |= O_LARGEFILE; | ||
253 | #endif | ||
254 | lower_mnt = ecryptfs_dentry_to_lower_mnt(ecryptfs_dentry); | ||
255 | mntget(lower_mnt); | ||
256 | /* Corresponding fput() at end of this function */ | ||
257 | lower_file = dentry_open(tlower_dentry, lower_mnt, lower_flags); | ||
258 | if (IS_ERR(lower_file)) { | ||
259 | rc = PTR_ERR(lower_file); | ||
260 | ecryptfs_printk(KERN_ERR, | ||
261 | "Error opening dentry; rc = [%i]\n", rc); | ||
262 | goto out; | ||
263 | } | ||
264 | /* fput(lower_file) should handle the puts if we do this */ | ||
265 | lower_file->f_dentry = tlower_dentry; | ||
266 | lower_file->f_vfsmnt = lower_mnt; | ||
267 | lower_inode = tlower_dentry->d_inode; | ||
268 | if (S_ISDIR(ecryptfs_dentry->d_inode->i_mode)) { | ||
269 | ecryptfs_printk(KERN_DEBUG, "This is a directory\n"); | ||
270 | ECRYPTFS_CLEAR_FLAG(crypt_stat->flags, ECRYPTFS_ENCRYPTED); | ||
271 | goto out_fput; | ||
272 | } | ||
273 | ECRYPTFS_SET_FLAG(crypt_stat->flags, ECRYPTFS_NEW_FILE); | ||
274 | ecryptfs_printk(KERN_DEBUG, "Initializing crypto context\n"); | ||
275 | rc = ecryptfs_new_file_context(ecryptfs_dentry); | ||
276 | if (rc) { | ||
277 | ecryptfs_printk(KERN_DEBUG, "Error creating new file " | ||
278 | "context\n"); | ||
279 | goto out_fput; | ||
280 | } | ||
281 | rc = ecryptfs_write_headers(ecryptfs_dentry, lower_file); | ||
282 | if (rc) { | ||
283 | ecryptfs_printk(KERN_DEBUG, "Error writing headers\n"); | ||
284 | goto out_fput; | ||
285 | } | ||
286 | rc = grow_file(ecryptfs_dentry, lower_file, inode, lower_inode); | ||
287 | out_fput: | ||
288 | fput(lower_file); | ||
289 | out: | ||
290 | return rc; | ||
291 | } | ||
292 | |||
293 | /** | ||
294 | * ecryptfs_create | ||
295 | * @dir: The inode of the directory in which to create the file. | ||
296 | * @dentry: The eCryptfs dentry | ||
297 | * @mode: The mode of the new file. | ||
298 | * @nd: nameidata | ||
299 | * | ||
300 | * Creates a new file. | ||
301 | * | ||
302 | * Returns zero on success; non-zero on error condition | ||
303 | */ | ||
304 | static int | ||
305 | ecryptfs_create(struct inode *directory_inode, struct dentry *ecryptfs_dentry, | ||
306 | int mode, struct nameidata *nd) | ||
307 | { | ||
308 | int rc; | ||
309 | |||
310 | rc = ecryptfs_do_create(directory_inode, ecryptfs_dentry, mode, nd); | ||
311 | if (unlikely(rc)) { | ||
312 | ecryptfs_printk(KERN_WARNING, "Failed to create file in" | ||
313 | "lower filesystem\n"); | ||
314 | goto out; | ||
315 | } | ||
316 | /* At this point, a file exists on "disk"; we need to make sure | ||
317 | * that this on disk file is prepared to be an ecryptfs file */ | ||
318 | rc = ecryptfs_initialize_file(ecryptfs_dentry); | ||
319 | out: | ||
320 | return rc; | ||
321 | } | ||
322 | |||
323 | /** | ||
324 | * ecryptfs_lookup | ||
325 | * @dir: inode | ||
326 | * @dentry: The dentry | ||
327 | * @nd: nameidata, may be NULL | ||
328 | * | ||
329 | * Find a file on disk. If the file does not exist, then we'll add it to the | ||
330 | * dentry cache and continue on to read it from the disk. | ||
331 | */ | ||
332 | static struct dentry *ecryptfs_lookup(struct inode *dir, struct dentry *dentry, | ||
333 | struct nameidata *nd) | ||
334 | { | ||
335 | int rc = 0; | ||
336 | struct dentry *lower_dir_dentry; | ||
337 | struct dentry *lower_dentry; | ||
338 | struct vfsmount *lower_mnt; | ||
339 | struct dentry *tlower_dentry = NULL; | ||
340 | char *encoded_name; | ||
341 | unsigned int encoded_namelen; | ||
342 | struct ecryptfs_crypt_stat *crypt_stat = NULL; | ||
343 | char *page_virt = NULL; | ||
344 | struct inode *lower_inode; | ||
345 | u64 file_size; | ||
346 | |||
347 | lower_dir_dentry = ecryptfs_dentry_to_lower(dentry->d_parent); | ||
348 | dentry->d_op = &ecryptfs_dops; | ||
349 | if ((dentry->d_name.len == 1 && !strcmp(dentry->d_name.name, ".")) | ||
350 | || (dentry->d_name.len == 2 && !strcmp(dentry->d_name.name, ".."))) | ||
351 | goto out_drop; | ||
352 | encoded_namelen = ecryptfs_encode_filename(crypt_stat, | ||
353 | dentry->d_name.name, | ||
354 | dentry->d_name.len, | ||
355 | &encoded_name); | ||
356 | if (encoded_namelen < 0) { | ||
357 | rc = encoded_namelen; | ||
358 | goto out_drop; | ||
359 | } | ||
360 | ecryptfs_printk(KERN_DEBUG, "encoded_name = [%s]; encoded_namelen " | ||
361 | "= [%d]\n", encoded_name, encoded_namelen); | ||
362 | lower_dentry = lookup_one_len(encoded_name, lower_dir_dentry, | ||
363 | encoded_namelen - 1); | ||
364 | kfree(encoded_name); | ||
365 | lower_mnt = mntget(ecryptfs_dentry_to_lower_mnt(dentry->d_parent)); | ||
366 | if (IS_ERR(lower_dentry)) { | ||
367 | ecryptfs_printk(KERN_ERR, "ERR from lower_dentry\n"); | ||
368 | rc = PTR_ERR(lower_dentry); | ||
369 | goto out_drop; | ||
370 | } | ||
371 | ecryptfs_printk(KERN_DEBUG, "lower_dentry = [%p]; lower_dentry->" | ||
372 | "d_name.name = [%s]\n", lower_dentry, | ||
373 | lower_dentry->d_name.name); | ||
374 | lower_inode = lower_dentry->d_inode; | ||
375 | ecryptfs_copy_attr_atime(dir, lower_dir_dentry->d_inode); | ||
376 | BUG_ON(!atomic_read(&lower_dentry->d_count)); | ||
377 | ecryptfs_set_dentry_private(dentry, | ||
378 | kmem_cache_alloc(ecryptfs_dentry_info_cache, | ||
379 | SLAB_KERNEL)); | ||
380 | if (!ecryptfs_dentry_to_private(dentry)) { | ||
381 | rc = -ENOMEM; | ||
382 | ecryptfs_printk(KERN_ERR, "Out of memory whilst attempting " | ||
383 | "to allocate ecryptfs_dentry_info struct\n"); | ||
384 | goto out_dput; | ||
385 | } | ||
386 | ecryptfs_set_dentry_lower(dentry, lower_dentry); | ||
387 | ecryptfs_set_dentry_lower_mnt(dentry, lower_mnt); | ||
388 | if (!lower_dentry->d_inode) { | ||
389 | /* We want to add because we couldn't find in lower */ | ||
390 | d_add(dentry, NULL); | ||
391 | goto out; | ||
392 | } | ||
393 | rc = ecryptfs_interpose(lower_dentry, dentry, dir->i_sb, 1); | ||
394 | if (rc) { | ||
395 | ecryptfs_printk(KERN_ERR, "Error interposing\n"); | ||
396 | goto out_dput; | ||
397 | } | ||
398 | if (S_ISDIR(lower_inode->i_mode)) { | ||
399 | ecryptfs_printk(KERN_DEBUG, "Is a directory; returning\n"); | ||
400 | goto out; | ||
401 | } | ||
402 | if (S_ISLNK(lower_inode->i_mode)) { | ||
403 | ecryptfs_printk(KERN_DEBUG, "Is a symlink; returning\n"); | ||
404 | goto out; | ||
405 | } | ||
406 | if (!nd) { | ||
407 | ecryptfs_printk(KERN_DEBUG, "We have a NULL nd, just leave" | ||
408 | "as we *think* we are about to unlink\n"); | ||
409 | goto out; | ||
410 | } | ||
411 | tlower_dentry = dget(lower_dentry); | ||
412 | if (!tlower_dentry || IS_ERR(tlower_dentry)) { | ||
413 | rc = -ENOMEM; | ||
414 | ecryptfs_printk(KERN_ERR, "Cannot dget lower_dentry\n"); | ||
415 | goto out_dput; | ||
416 | } | ||
417 | /* Released in this function */ | ||
418 | page_virt = | ||
419 | (char *)kmem_cache_alloc(ecryptfs_header_cache_2, | ||
420 | SLAB_USER); | ||
421 | if (!page_virt) { | ||
422 | rc = -ENOMEM; | ||
423 | ecryptfs_printk(KERN_ERR, | ||
424 | "Cannot ecryptfs_kmalloc a page\n"); | ||
425 | goto out_dput; | ||
426 | } | ||
427 | memset(page_virt, 0, PAGE_CACHE_SIZE); | ||
428 | rc = ecryptfs_read_header_region(page_virt, tlower_dentry, nd->mnt); | ||
429 | crypt_stat = &ecryptfs_inode_to_private(dentry->d_inode)->crypt_stat; | ||
430 | if (!ECRYPTFS_CHECK_FLAG(crypt_stat->flags, ECRYPTFS_POLICY_APPLIED)) | ||
431 | ecryptfs_set_default_sizes(crypt_stat); | ||
432 | if (rc) { | ||
433 | rc = 0; | ||
434 | ecryptfs_printk(KERN_WARNING, "Error reading header region;" | ||
435 | " assuming unencrypted\n"); | ||
436 | } else { | ||
437 | if (!contains_ecryptfs_marker(page_virt | ||
438 | + ECRYPTFS_FILE_SIZE_BYTES)) { | ||
439 | kmem_cache_free(ecryptfs_header_cache_2, page_virt); | ||
440 | goto out; | ||
441 | } | ||
442 | memcpy(&file_size, page_virt, sizeof(file_size)); | ||
443 | file_size = be64_to_cpu(file_size); | ||
444 | i_size_write(dentry->d_inode, (loff_t)file_size); | ||
445 | } | ||
446 | kmem_cache_free(ecryptfs_header_cache_2, page_virt); | ||
447 | goto out; | ||
448 | |||
449 | out_dput: | ||
450 | dput(lower_dentry); | ||
451 | if (tlower_dentry) | ||
452 | dput(tlower_dentry); | ||
453 | out_drop: | ||
454 | d_drop(dentry); | ||
455 | out: | ||
456 | return ERR_PTR(rc); | ||
457 | } | ||
458 | |||
459 | static int ecryptfs_link(struct dentry *old_dentry, struct inode *dir, | ||
460 | struct dentry *new_dentry) | ||
461 | { | ||
462 | struct dentry *lower_old_dentry; | ||
463 | struct dentry *lower_new_dentry; | ||
464 | struct dentry *lower_dir_dentry; | ||
465 | u64 file_size_save; | ||
466 | int rc; | ||
467 | |||
468 | file_size_save = i_size_read(old_dentry->d_inode); | ||
469 | lower_old_dentry = ecryptfs_dentry_to_lower(old_dentry); | ||
470 | lower_new_dentry = ecryptfs_dentry_to_lower(new_dentry); | ||
471 | dget(lower_old_dentry); | ||
472 | dget(lower_new_dentry); | ||
473 | lower_dir_dentry = lock_parent(lower_new_dentry); | ||
474 | rc = vfs_link(lower_old_dentry, lower_dir_dentry->d_inode, | ||
475 | lower_new_dentry); | ||
476 | if (rc || !lower_new_dentry->d_inode) | ||
477 | goto out_lock; | ||
478 | rc = ecryptfs_interpose(lower_new_dentry, new_dentry, dir->i_sb, 0); | ||
479 | if (rc) | ||
480 | goto out_lock; | ||
481 | ecryptfs_copy_attr_timesizes(dir, lower_new_dentry->d_inode); | ||
482 | old_dentry->d_inode->i_nlink = | ||
483 | ecryptfs_inode_to_lower(old_dentry->d_inode)->i_nlink; | ||
484 | i_size_write(new_dentry->d_inode, file_size_save); | ||
485 | out_lock: | ||
486 | unlock_dir(lower_dir_dentry); | ||
487 | dput(lower_new_dentry); | ||
488 | dput(lower_old_dentry); | ||
489 | if (!new_dentry->d_inode) | ||
490 | d_drop(new_dentry); | ||
491 | return rc; | ||
492 | } | ||
493 | |||
494 | static int ecryptfs_unlink(struct inode *dir, struct dentry *dentry) | ||
495 | { | ||
496 | int rc = 0; | ||
497 | struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry); | ||
498 | struct inode *lower_dir_inode = ecryptfs_inode_to_lower(dir); | ||
499 | |||
500 | lock_parent(lower_dentry); | ||
501 | rc = vfs_unlink(lower_dir_inode, lower_dentry); | ||
502 | if (rc) { | ||
503 | ecryptfs_printk(KERN_ERR, "Error in vfs_unlink\n"); | ||
504 | goto out_unlock; | ||
505 | } | ||
506 | ecryptfs_copy_attr_times(dir, lower_dir_inode); | ||
507 | dentry->d_inode->i_nlink = | ||
508 | ecryptfs_inode_to_lower(dentry->d_inode)->i_nlink; | ||
509 | dentry->d_inode->i_ctime = dir->i_ctime; | ||
510 | out_unlock: | ||
511 | unlock_parent(lower_dentry); | ||
512 | return rc; | ||
513 | } | ||
514 | |||
515 | static int ecryptfs_symlink(struct inode *dir, struct dentry *dentry, | ||
516 | const char *symname) | ||
517 | { | ||
518 | int rc; | ||
519 | struct dentry *lower_dentry; | ||
520 | struct dentry *lower_dir_dentry; | ||
521 | umode_t mode; | ||
522 | char *encoded_symname; | ||
523 | unsigned int encoded_symlen; | ||
524 | struct ecryptfs_crypt_stat *crypt_stat = NULL; | ||
525 | |||
526 | lower_dentry = ecryptfs_dentry_to_lower(dentry); | ||
527 | dget(lower_dentry); | ||
528 | lower_dir_dentry = lock_parent(lower_dentry); | ||
529 | mode = S_IALLUGO; | ||
530 | encoded_symlen = ecryptfs_encode_filename(crypt_stat, symname, | ||
531 | strlen(symname), | ||
532 | &encoded_symname); | ||
533 | if (encoded_symlen < 0) { | ||
534 | rc = encoded_symlen; | ||
535 | goto out_lock; | ||
536 | } | ||
537 | rc = vfs_symlink(lower_dir_dentry->d_inode, lower_dentry, | ||
538 | encoded_symname, mode); | ||
539 | kfree(encoded_symname); | ||
540 | if (rc || !lower_dentry->d_inode) | ||
541 | goto out_lock; | ||
542 | rc = ecryptfs_interpose(lower_dentry, dentry, dir->i_sb, 0); | ||
543 | if (rc) | ||
544 | goto out_lock; | ||
545 | ecryptfs_copy_attr_timesizes(dir, lower_dir_dentry->d_inode); | ||
546 | out_lock: | ||
547 | unlock_dir(lower_dir_dentry); | ||
548 | dput(lower_dentry); | ||
549 | if (!dentry->d_inode) | ||
550 | d_drop(dentry); | ||
551 | return rc; | ||
552 | } | ||
553 | |||
554 | static int ecryptfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) | ||
555 | { | ||
556 | int rc; | ||
557 | struct dentry *lower_dentry; | ||
558 | struct dentry *lower_dir_dentry; | ||
559 | |||
560 | lower_dentry = ecryptfs_dentry_to_lower(dentry); | ||
561 | lower_dir_dentry = lock_parent(lower_dentry); | ||
562 | rc = vfs_mkdir(lower_dir_dentry->d_inode, lower_dentry, mode); | ||
563 | if (rc || !lower_dentry->d_inode) | ||
564 | goto out; | ||
565 | rc = ecryptfs_interpose(lower_dentry, dentry, dir->i_sb, 0); | ||
566 | if (rc) | ||
567 | goto out; | ||
568 | ecryptfs_copy_attr_timesizes(dir, lower_dir_dentry->d_inode); | ||
569 | dir->i_nlink = lower_dir_dentry->d_inode->i_nlink; | ||
570 | out: | ||
571 | unlock_dir(lower_dir_dentry); | ||
572 | if (!dentry->d_inode) | ||
573 | d_drop(dentry); | ||
574 | return rc; | ||
575 | } | ||
576 | |||
577 | static int ecryptfs_rmdir(struct inode *dir, struct dentry *dentry) | ||
578 | { | ||
579 | int rc = 0; | ||
580 | struct dentry *tdentry = NULL; | ||
581 | struct dentry *lower_dentry; | ||
582 | struct dentry *tlower_dentry = NULL; | ||
583 | struct dentry *lower_dir_dentry; | ||
584 | |||
585 | lower_dentry = ecryptfs_dentry_to_lower(dentry); | ||
586 | if (!(tdentry = dget(dentry))) { | ||
587 | rc = -EINVAL; | ||
588 | ecryptfs_printk(KERN_ERR, "Error dget'ing dentry [%p]\n", | ||
589 | dentry); | ||
590 | goto out; | ||
591 | } | ||
592 | lower_dir_dentry = lock_parent(lower_dentry); | ||
593 | if (!(tlower_dentry = dget(lower_dentry))) { | ||
594 | rc = -EINVAL; | ||
595 | ecryptfs_printk(KERN_ERR, "Error dget'ing lower_dentry " | ||
596 | "[%p]\n", lower_dentry); | ||
597 | goto out; | ||
598 | } | ||
599 | rc = vfs_rmdir(lower_dir_dentry->d_inode, lower_dentry); | ||
600 | if (!rc) { | ||
601 | d_delete(tlower_dentry); | ||
602 | tlower_dentry = NULL; | ||
603 | } | ||
604 | ecryptfs_copy_attr_times(dir, lower_dir_dentry->d_inode); | ||
605 | dir->i_nlink = lower_dir_dentry->d_inode->i_nlink; | ||
606 | unlock_dir(lower_dir_dentry); | ||
607 | if (!rc) | ||
608 | d_drop(dentry); | ||
609 | out: | ||
610 | if (tdentry) | ||
611 | dput(tdentry); | ||
612 | if (tlower_dentry) | ||
613 | dput(tlower_dentry); | ||
614 | return rc; | ||
615 | } | ||
616 | |||
617 | static int | ||
618 | ecryptfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev) | ||
619 | { | ||
620 | int rc; | ||
621 | struct dentry *lower_dentry; | ||
622 | struct dentry *lower_dir_dentry; | ||
623 | |||
624 | lower_dentry = ecryptfs_dentry_to_lower(dentry); | ||
625 | lower_dir_dentry = lock_parent(lower_dentry); | ||
626 | rc = vfs_mknod(lower_dir_dentry->d_inode, lower_dentry, mode, dev); | ||
627 | if (rc || !lower_dentry->d_inode) | ||
628 | goto out; | ||
629 | rc = ecryptfs_interpose(lower_dentry, dentry, dir->i_sb, 0); | ||
630 | if (rc) | ||
631 | goto out; | ||
632 | ecryptfs_copy_attr_timesizes(dir, lower_dir_dentry->d_inode); | ||
633 | out: | ||
634 | unlock_dir(lower_dir_dentry); | ||
635 | if (!dentry->d_inode) | ||
636 | d_drop(dentry); | ||
637 | return rc; | ||
638 | } | ||
639 | |||
640 | static int | ||
641 | ecryptfs_rename(struct inode *old_dir, struct dentry *old_dentry, | ||
642 | struct inode *new_dir, struct dentry *new_dentry) | ||
643 | { | ||
644 | int rc; | ||
645 | struct dentry *lower_old_dentry; | ||
646 | struct dentry *lower_new_dentry; | ||
647 | struct dentry *lower_old_dir_dentry; | ||
648 | struct dentry *lower_new_dir_dentry; | ||
649 | |||
650 | lower_old_dentry = ecryptfs_dentry_to_lower(old_dentry); | ||
651 | lower_new_dentry = ecryptfs_dentry_to_lower(new_dentry); | ||
652 | dget(lower_old_dentry); | ||
653 | dget(lower_new_dentry); | ||
654 | lower_old_dir_dentry = dget_parent(lower_old_dentry); | ||
655 | lower_new_dir_dentry = dget_parent(lower_new_dentry); | ||
656 | lock_rename(lower_old_dir_dentry, lower_new_dir_dentry); | ||
657 | rc = vfs_rename(lower_old_dir_dentry->d_inode, lower_old_dentry, | ||
658 | lower_new_dir_dentry->d_inode, lower_new_dentry); | ||
659 | if (rc) | ||
660 | goto out_lock; | ||
661 | ecryptfs_copy_attr_all(new_dir, lower_new_dir_dentry->d_inode); | ||
662 | if (new_dir != old_dir) | ||
663 | ecryptfs_copy_attr_all(old_dir, lower_old_dir_dentry->d_inode); | ||
664 | out_lock: | ||
665 | unlock_rename(lower_old_dir_dentry, lower_new_dir_dentry); | ||
666 | dput(lower_new_dentry); | ||
667 | dput(lower_old_dentry); | ||
668 | return rc; | ||
669 | } | ||
670 | |||
671 | static int | ||
672 | ecryptfs_readlink(struct dentry *dentry, char __user * buf, int bufsiz) | ||
673 | { | ||
674 | int rc; | ||
675 | struct dentry *lower_dentry; | ||
676 | char *decoded_name; | ||
677 | char *lower_buf; | ||
678 | mm_segment_t old_fs; | ||
679 | struct ecryptfs_crypt_stat *crypt_stat; | ||
680 | |||
681 | lower_dentry = ecryptfs_dentry_to_lower(dentry); | ||
682 | if (!lower_dentry->d_inode->i_op || | ||
683 | !lower_dentry->d_inode->i_op->readlink) { | ||
684 | rc = -EINVAL; | ||
685 | goto out; | ||
686 | } | ||
687 | /* Released in this function */ | ||
688 | lower_buf = kmalloc(bufsiz, GFP_KERNEL); | ||
689 | if (lower_buf == NULL) { | ||
690 | ecryptfs_printk(KERN_ERR, "Out of memory\n"); | ||
691 | rc = -ENOMEM; | ||
692 | goto out; | ||
693 | } | ||
694 | old_fs = get_fs(); | ||
695 | set_fs(get_ds()); | ||
696 | ecryptfs_printk(KERN_DEBUG, "Calling readlink w/ " | ||
697 | "lower_dentry->d_name.name = [%s]\n", | ||
698 | lower_dentry->d_name.name); | ||
699 | rc = lower_dentry->d_inode->i_op->readlink(lower_dentry, | ||
700 | (char __user *)lower_buf, | ||
701 | bufsiz); | ||
702 | set_fs(old_fs); | ||
703 | if (rc >= 0) { | ||
704 | crypt_stat = NULL; | ||
705 | rc = ecryptfs_decode_filename(crypt_stat, lower_buf, rc, | ||
706 | &decoded_name); | ||
707 | if (rc == -ENOMEM) | ||
708 | goto out_free_lower_buf; | ||
709 | if (rc > 0) { | ||
710 | ecryptfs_printk(KERN_DEBUG, "Copying [%d] bytes " | ||
711 | "to userspace: [%*s]\n", rc, | ||
712 | decoded_name); | ||
713 | if (copy_to_user(buf, decoded_name, rc)) | ||
714 | rc = -EFAULT; | ||
715 | } | ||
716 | kfree(decoded_name); | ||
717 | ecryptfs_copy_attr_atime(dentry->d_inode, | ||
718 | lower_dentry->d_inode); | ||
719 | } | ||
720 | out_free_lower_buf: | ||
721 | kfree(lower_buf); | ||
722 | out: | ||
723 | return rc; | ||
724 | } | ||
725 | |||
726 | static void *ecryptfs_follow_link(struct dentry *dentry, struct nameidata *nd) | ||
727 | { | ||
728 | char *buf; | ||
729 | int len = PAGE_SIZE, rc; | ||
730 | mm_segment_t old_fs; | ||
731 | |||
732 | /* Released in ecryptfs_put_link(); only release here on error */ | ||
733 | buf = kmalloc(len, GFP_KERNEL); | ||
734 | if (!buf) { | ||
735 | rc = -ENOMEM; | ||
736 | goto out; | ||
737 | } | ||
738 | old_fs = get_fs(); | ||
739 | set_fs(get_ds()); | ||
740 | ecryptfs_printk(KERN_DEBUG, "Calling readlink w/ " | ||
741 | "dentry->d_name.name = [%s]\n", dentry->d_name.name); | ||
742 | rc = dentry->d_inode->i_op->readlink(dentry, (char __user *)buf, len); | ||
743 | buf[rc] = '\0'; | ||
744 | set_fs(old_fs); | ||
745 | if (rc < 0) | ||
746 | goto out_free; | ||
747 | rc = 0; | ||
748 | nd_set_link(nd, buf); | ||
749 | goto out; | ||
750 | out_free: | ||
751 | kfree(buf); | ||
752 | out: | ||
753 | return ERR_PTR(rc); | ||
754 | } | ||
755 | |||
756 | static void | ||
757 | ecryptfs_put_link(struct dentry *dentry, struct nameidata *nd, void *ptr) | ||
758 | { | ||
759 | /* Free the char* */ | ||
760 | kfree(nd_get_link(nd)); | ||
761 | } | ||
762 | |||
763 | /** | ||
764 | * upper_size_to_lower_size | ||
765 | * @crypt_stat: Crypt_stat associated with file | ||
766 | * @upper_size: Size of the upper file | ||
767 | * | ||
768 | * Calculate the requried size of the lower file based on the | ||
769 | * specified size of the upper file. This calculation is based on the | ||
770 | * number of headers in the underlying file and the extent size. | ||
771 | * | ||
772 | * Returns Calculated size of the lower file. | ||
773 | */ | ||
774 | static loff_t | ||
775 | upper_size_to_lower_size(struct ecryptfs_crypt_stat *crypt_stat, | ||
776 | loff_t upper_size) | ||
777 | { | ||
778 | loff_t lower_size; | ||
779 | |||
780 | lower_size = ( crypt_stat->header_extent_size | ||
781 | * crypt_stat->num_header_extents_at_front ); | ||
782 | if (upper_size != 0) { | ||
783 | loff_t num_extents; | ||
784 | |||
785 | num_extents = upper_size >> crypt_stat->extent_shift; | ||
786 | if (upper_size & ~crypt_stat->extent_mask) | ||
787 | num_extents++; | ||
788 | lower_size += (num_extents * crypt_stat->extent_size); | ||
789 | } | ||
790 | return lower_size; | ||
791 | } | ||
792 | |||
793 | /** | ||
794 | * ecryptfs_truncate | ||
795 | * @dentry: The ecryptfs layer dentry | ||
796 | * @new_length: The length to expand the file to | ||
797 | * | ||
798 | * Function to handle truncations modifying the size of the file. Note | ||
799 | * that the file sizes are interpolated. When expanding, we are simply | ||
800 | * writing strings of 0's out. When truncating, we need to modify the | ||
801 | * underlying file size according to the page index interpolations. | ||
802 | * | ||
803 | * Returns zero on success; non-zero otherwise | ||
804 | */ | ||
805 | int ecryptfs_truncate(struct dentry *dentry, loff_t new_length) | ||
806 | { | ||
807 | int rc = 0; | ||
808 | struct inode *inode = dentry->d_inode; | ||
809 | struct dentry *lower_dentry; | ||
810 | struct vfsmount *lower_mnt; | ||
811 | struct file fake_ecryptfs_file, *lower_file = NULL; | ||
812 | struct ecryptfs_crypt_stat *crypt_stat; | ||
813 | loff_t i_size = i_size_read(inode); | ||
814 | loff_t lower_size_before_truncate; | ||
815 | loff_t lower_size_after_truncate; | ||
816 | |||
817 | if (unlikely((new_length == i_size))) | ||
818 | goto out; | ||
819 | crypt_stat = &ecryptfs_inode_to_private(dentry->d_inode)->crypt_stat; | ||
820 | /* Set up a fake ecryptfs file, this is used to interface with | ||
821 | * the file in the underlying filesystem so that the | ||
822 | * truncation has an effect there as well. */ | ||
823 | memset(&fake_ecryptfs_file, 0, sizeof(fake_ecryptfs_file)); | ||
824 | fake_ecryptfs_file.f_dentry = dentry; | ||
825 | /* Released at out_free: label */ | ||
826 | ecryptfs_set_file_private(&fake_ecryptfs_file, | ||
827 | kmem_cache_alloc(ecryptfs_file_info_cache, | ||
828 | SLAB_KERNEL)); | ||
829 | if (unlikely(!ecryptfs_file_to_private(&fake_ecryptfs_file))) { | ||
830 | rc = -ENOMEM; | ||
831 | goto out; | ||
832 | } | ||
833 | lower_dentry = ecryptfs_dentry_to_lower(dentry); | ||
834 | /* This dget & mntget is released through fput at out_fput: */ | ||
835 | dget(lower_dentry); | ||
836 | lower_mnt = ecryptfs_dentry_to_lower_mnt(dentry); | ||
837 | mntget(lower_mnt); | ||
838 | lower_file = dentry_open(lower_dentry, lower_mnt, O_RDWR); | ||
839 | if (unlikely(IS_ERR(lower_file))) { | ||
840 | rc = PTR_ERR(lower_file); | ||
841 | goto out_free; | ||
842 | } | ||
843 | ecryptfs_set_file_lower(&fake_ecryptfs_file, lower_file); | ||
844 | /* Switch on growing or shrinking file */ | ||
845 | if (new_length > i_size) { | ||
846 | rc = ecryptfs_fill_zeros(&fake_ecryptfs_file, new_length); | ||
847 | if (rc) { | ||
848 | ecryptfs_printk(KERN_ERR, | ||
849 | "Problem with fill_zeros\n"); | ||
850 | goto out_fput; | ||
851 | } | ||
852 | i_size_write(inode, new_length); | ||
853 | rc = ecryptfs_write_inode_size_to_header(lower_file, | ||
854 | lower_dentry->d_inode, | ||
855 | inode); | ||
856 | if (rc) { | ||
857 | ecryptfs_printk(KERN_ERR, | ||
858 | "Problem with ecryptfs_write" | ||
859 | "_inode_size\n"); | ||
860 | goto out_fput; | ||
861 | } | ||
862 | } else { /* new_length < i_size_read(inode) */ | ||
863 | vmtruncate(inode, new_length); | ||
864 | ecryptfs_write_inode_size_to_header(lower_file, | ||
865 | lower_dentry->d_inode, | ||
866 | inode); | ||
867 | /* We are reducing the size of the ecryptfs file, and need to | ||
868 | * know if we need to reduce the size of the lower file. */ | ||
869 | lower_size_before_truncate = | ||
870 | upper_size_to_lower_size(crypt_stat, i_size); | ||
871 | lower_size_after_truncate = | ||
872 | upper_size_to_lower_size(crypt_stat, new_length); | ||
873 | if (lower_size_after_truncate < lower_size_before_truncate) | ||
874 | vmtruncate(lower_dentry->d_inode, | ||
875 | lower_size_after_truncate); | ||
876 | } | ||
877 | /* Update the access times */ | ||
878 | lower_dentry->d_inode->i_mtime = lower_dentry->d_inode->i_ctime | ||
879 | = CURRENT_TIME; | ||
880 | mark_inode_dirty_sync(inode); | ||
881 | out_fput: | ||
882 | fput(lower_file); | ||
883 | out_free: | ||
884 | if (ecryptfs_file_to_private(&fake_ecryptfs_file)) | ||
885 | kmem_cache_free(ecryptfs_file_info_cache, | ||
886 | ecryptfs_file_to_private(&fake_ecryptfs_file)); | ||
887 | out: | ||
888 | return rc; | ||
889 | } | ||
890 | |||
891 | static int | ||
892 | ecryptfs_permission(struct inode *inode, int mask, struct nameidata *nd) | ||
893 | { | ||
894 | int rc; | ||
895 | |||
896 | if (nd) { | ||
897 | struct vfsmount *vfsmnt_save = nd->mnt; | ||
898 | struct dentry *dentry_save = nd->dentry; | ||
899 | |||
900 | nd->mnt = ecryptfs_dentry_to_lower_mnt(nd->dentry); | ||
901 | nd->dentry = ecryptfs_dentry_to_lower(nd->dentry); | ||
902 | rc = permission(ecryptfs_inode_to_lower(inode), mask, nd); | ||
903 | nd->mnt = vfsmnt_save; | ||
904 | nd->dentry = dentry_save; | ||
905 | } else | ||
906 | rc = permission(ecryptfs_inode_to_lower(inode), mask, NULL); | ||
907 | return rc; | ||
908 | } | ||
909 | |||
910 | /** | ||
911 | * ecryptfs_setattr | ||
912 | * @dentry: dentry handle to the inode to modify | ||
913 | * @ia: Structure with flags of what to change and values | ||
914 | * | ||
915 | * Updates the metadata of an inode. If the update is to the size | ||
916 | * i.e. truncation, then ecryptfs_truncate will handle the size modification | ||
917 | * of both the ecryptfs inode and the lower inode. | ||
918 | * | ||
919 | * All other metadata changes will be passed right to the lower filesystem, | ||
920 | * and we will just update our inode to look like the lower. | ||
921 | */ | ||
922 | static int ecryptfs_setattr(struct dentry *dentry, struct iattr *ia) | ||
923 | { | ||
924 | int rc = 0; | ||
925 | struct dentry *lower_dentry; | ||
926 | struct inode *inode; | ||
927 | struct inode *lower_inode; | ||
928 | struct ecryptfs_crypt_stat *crypt_stat; | ||
929 | |||
930 | crypt_stat = &ecryptfs_inode_to_private(dentry->d_inode)->crypt_stat; | ||
931 | lower_dentry = ecryptfs_dentry_to_lower(dentry); | ||
932 | inode = dentry->d_inode; | ||
933 | lower_inode = ecryptfs_inode_to_lower(inode); | ||
934 | if (ia->ia_valid & ATTR_SIZE) { | ||
935 | ecryptfs_printk(KERN_DEBUG, | ||
936 | "ia->ia_valid = [0x%x] ATTR_SIZE" " = [0x%x]\n", | ||
937 | ia->ia_valid, ATTR_SIZE); | ||
938 | rc = ecryptfs_truncate(dentry, ia->ia_size); | ||
939 | /* ecryptfs_truncate handles resizing of the lower file */ | ||
940 | ia->ia_valid &= ~ATTR_SIZE; | ||
941 | ecryptfs_printk(KERN_DEBUG, "ia->ia_valid = [%x]\n", | ||
942 | ia->ia_valid); | ||
943 | if (rc < 0) | ||
944 | goto out; | ||
945 | } | ||
946 | rc = notify_change(lower_dentry, ia); | ||
947 | out: | ||
948 | ecryptfs_copy_attr_all(inode, lower_inode); | ||
949 | return rc; | ||
950 | } | ||
951 | |||
952 | static int | ||
953 | ecryptfs_setxattr(struct dentry *dentry, const char *name, const void *value, | ||
954 | size_t size, int flags) | ||
955 | { | ||
956 | int rc = 0; | ||
957 | struct dentry *lower_dentry; | ||
958 | |||
959 | lower_dentry = ecryptfs_dentry_to_lower(dentry); | ||
960 | if (!lower_dentry->d_inode->i_op->setxattr) { | ||
961 | rc = -ENOSYS; | ||
962 | goto out; | ||
963 | } | ||
964 | mutex_lock(&lower_dentry->d_inode->i_mutex); | ||
965 | rc = lower_dentry->d_inode->i_op->setxattr(lower_dentry, name, value, | ||
966 | size, flags); | ||
967 | mutex_unlock(&lower_dentry->d_inode->i_mutex); | ||
968 | out: | ||
969 | return rc; | ||
970 | } | ||
971 | |||
972 | static ssize_t | ||
973 | ecryptfs_getxattr(struct dentry *dentry, const char *name, void *value, | ||
974 | size_t size) | ||
975 | { | ||
976 | int rc = 0; | ||
977 | struct dentry *lower_dentry; | ||
978 | |||
979 | lower_dentry = ecryptfs_dentry_to_lower(dentry); | ||
980 | if (!lower_dentry->d_inode->i_op->getxattr) { | ||
981 | rc = -ENOSYS; | ||
982 | goto out; | ||
983 | } | ||
984 | mutex_lock(&lower_dentry->d_inode->i_mutex); | ||
985 | rc = lower_dentry->d_inode->i_op->getxattr(lower_dentry, name, value, | ||
986 | size); | ||
987 | mutex_unlock(&lower_dentry->d_inode->i_mutex); | ||
988 | out: | ||
989 | return rc; | ||
990 | } | ||
991 | |||
992 | static ssize_t | ||
993 | ecryptfs_listxattr(struct dentry *dentry, char *list, size_t size) | ||
994 | { | ||
995 | int rc = 0; | ||
996 | struct dentry *lower_dentry; | ||
997 | |||
998 | lower_dentry = ecryptfs_dentry_to_lower(dentry); | ||
999 | if (!lower_dentry->d_inode->i_op->listxattr) { | ||
1000 | rc = -ENOSYS; | ||
1001 | goto out; | ||
1002 | } | ||
1003 | mutex_lock(&lower_dentry->d_inode->i_mutex); | ||
1004 | rc = lower_dentry->d_inode->i_op->listxattr(lower_dentry, list, size); | ||
1005 | mutex_unlock(&lower_dentry->d_inode->i_mutex); | ||
1006 | out: | ||
1007 | return rc; | ||
1008 | } | ||
1009 | |||
1010 | static int ecryptfs_removexattr(struct dentry *dentry, const char *name) | ||
1011 | { | ||
1012 | int rc = 0; | ||
1013 | struct dentry *lower_dentry; | ||
1014 | |||
1015 | lower_dentry = ecryptfs_dentry_to_lower(dentry); | ||
1016 | if (!lower_dentry->d_inode->i_op->removexattr) { | ||
1017 | rc = -ENOSYS; | ||
1018 | goto out; | ||
1019 | } | ||
1020 | mutex_lock(&lower_dentry->d_inode->i_mutex); | ||
1021 | rc = lower_dentry->d_inode->i_op->removexattr(lower_dentry, name); | ||
1022 | mutex_unlock(&lower_dentry->d_inode->i_mutex); | ||
1023 | out: | ||
1024 | return rc; | ||
1025 | } | ||
1026 | |||
1027 | int ecryptfs_inode_test(struct inode *inode, void *candidate_lower_inode) | ||
1028 | { | ||
1029 | if ((ecryptfs_inode_to_lower(inode) | ||
1030 | == (struct inode *)candidate_lower_inode)) | ||
1031 | return 1; | ||
1032 | else | ||
1033 | return 0; | ||
1034 | } | ||
1035 | |||
1036 | int ecryptfs_inode_set(struct inode *inode, void *lower_inode) | ||
1037 | { | ||
1038 | ecryptfs_init_inode(inode, (struct inode *)lower_inode); | ||
1039 | return 0; | ||
1040 | } | ||
1041 | |||
1042 | struct inode_operations ecryptfs_symlink_iops = { | ||
1043 | .readlink = ecryptfs_readlink, | ||
1044 | .follow_link = ecryptfs_follow_link, | ||
1045 | .put_link = ecryptfs_put_link, | ||
1046 | .permission = ecryptfs_permission, | ||
1047 | .setattr = ecryptfs_setattr, | ||
1048 | .setxattr = ecryptfs_setxattr, | ||
1049 | .getxattr = ecryptfs_getxattr, | ||
1050 | .listxattr = ecryptfs_listxattr, | ||
1051 | .removexattr = ecryptfs_removexattr | ||
1052 | }; | ||
1053 | |||
1054 | struct inode_operations ecryptfs_dir_iops = { | ||
1055 | .create = ecryptfs_create, | ||
1056 | .lookup = ecryptfs_lookup, | ||
1057 | .link = ecryptfs_link, | ||
1058 | .unlink = ecryptfs_unlink, | ||
1059 | .symlink = ecryptfs_symlink, | ||
1060 | .mkdir = ecryptfs_mkdir, | ||
1061 | .rmdir = ecryptfs_rmdir, | ||
1062 | .mknod = ecryptfs_mknod, | ||
1063 | .rename = ecryptfs_rename, | ||
1064 | .permission = ecryptfs_permission, | ||
1065 | .setattr = ecryptfs_setattr, | ||
1066 | .setxattr = ecryptfs_setxattr, | ||
1067 | .getxattr = ecryptfs_getxattr, | ||
1068 | .listxattr = ecryptfs_listxattr, | ||
1069 | .removexattr = ecryptfs_removexattr | ||
1070 | }; | ||
1071 | |||
1072 | struct inode_operations ecryptfs_main_iops = { | ||
1073 | .permission = ecryptfs_permission, | ||
1074 | .setattr = ecryptfs_setattr, | ||
1075 | .setxattr = ecryptfs_setxattr, | ||
1076 | .getxattr = ecryptfs_getxattr, | ||
1077 | .listxattr = ecryptfs_listxattr, | ||
1078 | .removexattr = ecryptfs_removexattr | ||
1079 | }; | ||
diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c new file mode 100644 index 000000000000..ba454785a0c5 --- /dev/null +++ b/fs/ecryptfs/keystore.c | |||
@@ -0,0 +1,1061 @@ | |||
1 | /** | ||
2 | * eCryptfs: Linux filesystem encryption layer | ||
3 | * In-kernel key management code. Includes functions to parse and | ||
4 | * write authentication token-related packets with the underlying | ||
5 | * file. | ||
6 | * | ||
7 | * Copyright (C) 2004-2006 International Business Machines Corp. | ||
8 | * Author(s): Michael A. Halcrow <mhalcrow@us.ibm.com> | ||
9 | * Michael C. Thompson <mcthomps@us.ibm.com> | ||
10 | * | ||
11 | * This program is free software; you can redistribute it and/or | ||
12 | * modify it under the terms of the GNU General Public License as | ||
13 | * published by the Free Software Foundation; either version 2 of the | ||
14 | * License, or (at your option) any later version. | ||
15 | * | ||
16 | * This program is distributed in the hope that it will be useful, but | ||
17 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
18 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
19 | * General Public License for more details. | ||
20 | * | ||
21 | * You should have received a copy of the GNU General Public License | ||
22 | * along with this program; if not, write to the Free Software | ||
23 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA | ||
24 | * 02111-1307, USA. | ||
25 | */ | ||
26 | |||
27 | #include <linux/string.h> | ||
28 | #include <linux/sched.h> | ||
29 | #include <linux/syscalls.h> | ||
30 | #include <linux/pagemap.h> | ||
31 | #include <linux/key.h> | ||
32 | #include <linux/random.h> | ||
33 | #include <linux/crypto.h> | ||
34 | #include <linux/scatterlist.h> | ||
35 | #include "ecryptfs_kernel.h" | ||
36 | |||
37 | /** | ||
38 | * request_key returned an error instead of a valid key address; | ||
39 | * determine the type of error, make appropriate log entries, and | ||
40 | * return an error code. | ||
41 | */ | ||
42 | int process_request_key_err(long err_code) | ||
43 | { | ||
44 | int rc = 0; | ||
45 | |||
46 | switch (err_code) { | ||
47 | case ENOKEY: | ||
48 | ecryptfs_printk(KERN_WARNING, "No key\n"); | ||
49 | rc = -ENOENT; | ||
50 | break; | ||
51 | case EKEYEXPIRED: | ||
52 | ecryptfs_printk(KERN_WARNING, "Key expired\n"); | ||
53 | rc = -ETIME; | ||
54 | break; | ||
55 | case EKEYREVOKED: | ||
56 | ecryptfs_printk(KERN_WARNING, "Key revoked\n"); | ||
57 | rc = -EINVAL; | ||
58 | break; | ||
59 | default: | ||
60 | ecryptfs_printk(KERN_WARNING, "Unknown error code: " | ||
61 | "[0x%.16x]\n", err_code); | ||
62 | rc = -EINVAL; | ||
63 | } | ||
64 | return rc; | ||
65 | } | ||
66 | |||
67 | static void wipe_auth_tok_list(struct list_head *auth_tok_list_head) | ||
68 | { | ||
69 | struct list_head *walker; | ||
70 | struct ecryptfs_auth_tok_list_item *auth_tok_list_item; | ||
71 | |||
72 | walker = auth_tok_list_head->next; | ||
73 | while (walker != auth_tok_list_head) { | ||
74 | auth_tok_list_item = | ||
75 | list_entry(walker, struct ecryptfs_auth_tok_list_item, | ||
76 | list); | ||
77 | walker = auth_tok_list_item->list.next; | ||
78 | memset(auth_tok_list_item, 0, | ||
79 | sizeof(struct ecryptfs_auth_tok_list_item)); | ||
80 | kmem_cache_free(ecryptfs_auth_tok_list_item_cache, | ||
81 | auth_tok_list_item); | ||
82 | } | ||
83 | } | ||
84 | |||
85 | struct kmem_cache *ecryptfs_auth_tok_list_item_cache; | ||
86 | |||
87 | /** | ||
88 | * parse_packet_length | ||
89 | * @data: Pointer to memory containing length at offset | ||
90 | * @size: This function writes the decoded size to this memory | ||
91 | * address; zero on error | ||
92 | * @length_size: The number of bytes occupied by the encoded length | ||
93 | * | ||
94 | * Returns Zero on success | ||
95 | */ | ||
96 | static int parse_packet_length(unsigned char *data, size_t *size, | ||
97 | size_t *length_size) | ||
98 | { | ||
99 | int rc = 0; | ||
100 | |||
101 | (*length_size) = 0; | ||
102 | (*size) = 0; | ||
103 | if (data[0] < 192) { | ||
104 | /* One-byte length */ | ||
105 | (*size) = data[0]; | ||
106 | (*length_size) = 1; | ||
107 | } else if (data[0] < 224) { | ||
108 | /* Two-byte length */ | ||
109 | (*size) = ((data[0] - 192) * 256); | ||
110 | (*size) += (data[1] + 192); | ||
111 | (*length_size) = 2; | ||
112 | } else if (data[0] == 255) { | ||
113 | /* Five-byte length; we're not supposed to see this */ | ||
114 | ecryptfs_printk(KERN_ERR, "Five-byte packet length not " | ||
115 | "supported\n"); | ||
116 | rc = -EINVAL; | ||
117 | goto out; | ||
118 | } else { | ||
119 | ecryptfs_printk(KERN_ERR, "Error parsing packet length\n"); | ||
120 | rc = -EINVAL; | ||
121 | goto out; | ||
122 | } | ||
123 | out: | ||
124 | return rc; | ||
125 | } | ||
126 | |||
127 | /** | ||
128 | * write_packet_length | ||
129 | * @dest: The byte array target into which to write the | ||
130 | * length. Must have at least 5 bytes allocated. | ||
131 | * @size: The length to write. | ||
132 | * @packet_size_length: The number of bytes used to encode the | ||
133 | * packet length is written to this address. | ||
134 | * | ||
135 | * Returns zero on success; non-zero on error. | ||
136 | */ | ||
137 | static int write_packet_length(char *dest, size_t size, | ||
138 | size_t *packet_size_length) | ||
139 | { | ||
140 | int rc = 0; | ||
141 | |||
142 | if (size < 192) { | ||
143 | dest[0] = size; | ||
144 | (*packet_size_length) = 1; | ||
145 | } else if (size < 65536) { | ||
146 | dest[0] = (((size - 192) / 256) + 192); | ||
147 | dest[1] = ((size - 192) % 256); | ||
148 | (*packet_size_length) = 2; | ||
149 | } else { | ||
150 | rc = -EINVAL; | ||
151 | ecryptfs_printk(KERN_WARNING, | ||
152 | "Unsupported packet size: [%d]\n", size); | ||
153 | } | ||
154 | return rc; | ||
155 | } | ||
156 | |||
157 | /** | ||
158 | * parse_tag_3_packet | ||
159 | * @crypt_stat: The cryptographic context to modify based on packet | ||
160 | * contents. | ||
161 | * @data: The raw bytes of the packet. | ||
162 | * @auth_tok_list: eCryptfs parses packets into authentication tokens; | ||
163 | * a new authentication token will be placed at the end | ||
164 | * of this list for this packet. | ||
165 | * @new_auth_tok: Pointer to a pointer to memory that this function | ||
166 | * allocates; sets the memory address of the pointer to | ||
167 | * NULL on error. This object is added to the | ||
168 | * auth_tok_list. | ||
169 | * @packet_size: This function writes the size of the parsed packet | ||
170 | * into this memory location; zero on error. | ||
171 | * @max_packet_size: maximum number of bytes to parse | ||
172 | * | ||
173 | * Returns zero on success; non-zero on error. | ||
174 | */ | ||
175 | static int | ||
176 | parse_tag_3_packet(struct ecryptfs_crypt_stat *crypt_stat, | ||
177 | unsigned char *data, struct list_head *auth_tok_list, | ||
178 | struct ecryptfs_auth_tok **new_auth_tok, | ||
179 | size_t *packet_size, size_t max_packet_size) | ||
180 | { | ||
181 | int rc = 0; | ||
182 | size_t body_size; | ||
183 | struct ecryptfs_auth_tok_list_item *auth_tok_list_item; | ||
184 | size_t length_size; | ||
185 | |||
186 | (*packet_size) = 0; | ||
187 | (*new_auth_tok) = NULL; | ||
188 | |||
189 | /* we check that: | ||
190 | * one byte for the Tag 3 ID flag | ||
191 | * two bytes for the body size | ||
192 | * do not exceed the maximum_packet_size | ||
193 | */ | ||
194 | if (unlikely((*packet_size) + 3 > max_packet_size)) { | ||
195 | ecryptfs_printk(KERN_ERR, "Packet size exceeds max\n"); | ||
196 | rc = -EINVAL; | ||
197 | goto out; | ||
198 | } | ||
199 | |||
200 | /* check for Tag 3 identifyer - one byte */ | ||
201 | if (data[(*packet_size)++] != ECRYPTFS_TAG_3_PACKET_TYPE) { | ||
202 | ecryptfs_printk(KERN_ERR, "Enter w/ first byte != 0x%.2x\n", | ||
203 | ECRYPTFS_TAG_3_PACKET_TYPE); | ||
204 | rc = -EINVAL; | ||
205 | goto out; | ||
206 | } | ||
207 | /* Released: wipe_auth_tok_list called in ecryptfs_parse_packet_set or | ||
208 | * at end of function upon failure */ | ||
209 | auth_tok_list_item = | ||
210 | kmem_cache_alloc(ecryptfs_auth_tok_list_item_cache, SLAB_KERNEL); | ||
211 | if (!auth_tok_list_item) { | ||
212 | ecryptfs_printk(KERN_ERR, "Unable to allocate memory\n"); | ||
213 | rc = -ENOMEM; | ||
214 | goto out; | ||
215 | } | ||
216 | memset(auth_tok_list_item, 0, | ||
217 | sizeof(struct ecryptfs_auth_tok_list_item)); | ||
218 | (*new_auth_tok) = &auth_tok_list_item->auth_tok; | ||
219 | |||
220 | /* check for body size - one to two bytes */ | ||
221 | rc = parse_packet_length(&data[(*packet_size)], &body_size, | ||
222 | &length_size); | ||
223 | if (rc) { | ||
224 | ecryptfs_printk(KERN_WARNING, "Error parsing packet length; " | ||
225 | "rc = [%d]\n", rc); | ||
226 | goto out_free; | ||
227 | } | ||
228 | if (unlikely(body_size < (0x05 + ECRYPTFS_SALT_SIZE))) { | ||
229 | ecryptfs_printk(KERN_WARNING, "Invalid body size ([%d])\n", | ||
230 | body_size); | ||
231 | rc = -EINVAL; | ||
232 | goto out_free; | ||
233 | } | ||
234 | (*packet_size) += length_size; | ||
235 | |||
236 | /* now we know the length of the remainting Tag 3 packet size: | ||
237 | * 5 fix bytes for: version string, cipher, S2K ID, hash algo, | ||
238 | * number of hash iterations | ||
239 | * ECRYPTFS_SALT_SIZE bytes for salt | ||
240 | * body_size bytes minus the stuff above is the encrypted key size | ||
241 | */ | ||
242 | if (unlikely((*packet_size) + body_size > max_packet_size)) { | ||
243 | ecryptfs_printk(KERN_ERR, "Packet size exceeds max\n"); | ||
244 | rc = -EINVAL; | ||
245 | goto out_free; | ||
246 | } | ||
247 | |||
248 | /* There are 5 characters of additional information in the | ||
249 | * packet */ | ||
250 | (*new_auth_tok)->session_key.encrypted_key_size = | ||
251 | body_size - (0x05 + ECRYPTFS_SALT_SIZE); | ||
252 | ecryptfs_printk(KERN_DEBUG, "Encrypted key size = [%d]\n", | ||
253 | (*new_auth_tok)->session_key.encrypted_key_size); | ||
254 | |||
255 | /* Version 4 (from RFC2440) - one byte */ | ||
256 | if (unlikely(data[(*packet_size)++] != 0x04)) { | ||
257 | ecryptfs_printk(KERN_DEBUG, "Unknown version number " | ||
258 | "[%d]\n", data[(*packet_size) - 1]); | ||
259 | rc = -EINVAL; | ||
260 | goto out_free; | ||
261 | } | ||
262 | |||
263 | /* cipher - one byte */ | ||
264 | ecryptfs_cipher_code_to_string(crypt_stat->cipher, | ||
265 | (u16)data[(*packet_size)]); | ||
266 | /* A little extra work to differentiate among the AES key | ||
267 | * sizes; see RFC2440 */ | ||
268 | switch(data[(*packet_size)++]) { | ||
269 | case RFC2440_CIPHER_AES_192: | ||
270 | crypt_stat->key_size = 24; | ||
271 | break; | ||
272 | default: | ||
273 | crypt_stat->key_size = | ||
274 | (*new_auth_tok)->session_key.encrypted_key_size; | ||
275 | } | ||
276 | ecryptfs_init_crypt_ctx(crypt_stat); | ||
277 | /* S2K identifier 3 (from RFC2440) */ | ||
278 | if (unlikely(data[(*packet_size)++] != 0x03)) { | ||
279 | ecryptfs_printk(KERN_ERR, "Only S2K ID 3 is currently " | ||
280 | "supported\n"); | ||
281 | rc = -ENOSYS; | ||
282 | goto out_free; | ||
283 | } | ||
284 | |||
285 | /* TODO: finish the hash mapping */ | ||
286 | /* hash algorithm - one byte */ | ||
287 | switch (data[(*packet_size)++]) { | ||
288 | case 0x01: /* See RFC2440 for these numbers and their mappings */ | ||
289 | /* Choose MD5 */ | ||
290 | /* salt - ECRYPTFS_SALT_SIZE bytes */ | ||
291 | memcpy((*new_auth_tok)->token.password.salt, | ||
292 | &data[(*packet_size)], ECRYPTFS_SALT_SIZE); | ||
293 | (*packet_size) += ECRYPTFS_SALT_SIZE; | ||
294 | |||
295 | /* This conversion was taken straight from RFC2440 */ | ||
296 | /* number of hash iterations - one byte */ | ||
297 | (*new_auth_tok)->token.password.hash_iterations = | ||
298 | ((u32) 16 + (data[(*packet_size)] & 15)) | ||
299 | << ((data[(*packet_size)] >> 4) + 6); | ||
300 | (*packet_size)++; | ||
301 | |||
302 | /* encrypted session key - | ||
303 | * (body_size-5-ECRYPTFS_SALT_SIZE) bytes */ | ||
304 | memcpy((*new_auth_tok)->session_key.encrypted_key, | ||
305 | &data[(*packet_size)], | ||
306 | (*new_auth_tok)->session_key.encrypted_key_size); | ||
307 | (*packet_size) += | ||
308 | (*new_auth_tok)->session_key.encrypted_key_size; | ||
309 | (*new_auth_tok)->session_key.flags &= | ||
310 | ~ECRYPTFS_CONTAINS_DECRYPTED_KEY; | ||
311 | (*new_auth_tok)->session_key.flags |= | ||
312 | ECRYPTFS_CONTAINS_ENCRYPTED_KEY; | ||
313 | (*new_auth_tok)->token.password.hash_algo = 0x01; | ||
314 | break; | ||
315 | default: | ||
316 | ecryptfs_printk(KERN_ERR, "Unsupported hash algorithm: " | ||
317 | "[%d]\n", data[(*packet_size) - 1]); | ||
318 | rc = -ENOSYS; | ||
319 | goto out_free; | ||
320 | } | ||
321 | (*new_auth_tok)->token_type = ECRYPTFS_PASSWORD; | ||
322 | /* TODO: Parametarize; we might actually want userspace to | ||
323 | * decrypt the session key. */ | ||
324 | ECRYPTFS_CLEAR_FLAG((*new_auth_tok)->session_key.flags, | ||
325 | ECRYPTFS_USERSPACE_SHOULD_TRY_TO_DECRYPT); | ||
326 | ECRYPTFS_CLEAR_FLAG((*new_auth_tok)->session_key.flags, | ||
327 | ECRYPTFS_USERSPACE_SHOULD_TRY_TO_ENCRYPT); | ||
328 | list_add(&auth_tok_list_item->list, auth_tok_list); | ||
329 | goto out; | ||
330 | out_free: | ||
331 | (*new_auth_tok) = NULL; | ||
332 | memset(auth_tok_list_item, 0, | ||
333 | sizeof(struct ecryptfs_auth_tok_list_item)); | ||
334 | kmem_cache_free(ecryptfs_auth_tok_list_item_cache, | ||
335 | auth_tok_list_item); | ||
336 | out: | ||
337 | if (rc) | ||
338 | (*packet_size) = 0; | ||
339 | return rc; | ||
340 | } | ||
341 | |||
342 | /** | ||
343 | * parse_tag_11_packet | ||
344 | * @data: The raw bytes of the packet | ||
345 | * @contents: This function writes the data contents of the literal | ||
346 | * packet into this memory location | ||
347 | * @max_contents_bytes: The maximum number of bytes that this function | ||
348 | * is allowed to write into contents | ||
349 | * @tag_11_contents_size: This function writes the size of the parsed | ||
350 | * contents into this memory location; zero on | ||
351 | * error | ||
352 | * @packet_size: This function writes the size of the parsed packet | ||
353 | * into this memory location; zero on error | ||
354 | * @max_packet_size: maximum number of bytes to parse | ||
355 | * | ||
356 | * Returns zero on success; non-zero on error. | ||
357 | */ | ||
358 | static int | ||
359 | parse_tag_11_packet(unsigned char *data, unsigned char *contents, | ||
360 | size_t max_contents_bytes, size_t *tag_11_contents_size, | ||
361 | size_t *packet_size, size_t max_packet_size) | ||
362 | { | ||
363 | int rc = 0; | ||
364 | size_t body_size; | ||
365 | size_t length_size; | ||
366 | |||
367 | (*packet_size) = 0; | ||
368 | (*tag_11_contents_size) = 0; | ||
369 | |||
370 | /* check that: | ||
371 | * one byte for the Tag 11 ID flag | ||
372 | * two bytes for the Tag 11 length | ||
373 | * do not exceed the maximum_packet_size | ||
374 | */ | ||
375 | if (unlikely((*packet_size) + 3 > max_packet_size)) { | ||
376 | ecryptfs_printk(KERN_ERR, "Packet size exceeds max\n"); | ||
377 | rc = -EINVAL; | ||
378 | goto out; | ||
379 | } | ||
380 | |||
381 | /* check for Tag 11 identifyer - one byte */ | ||
382 | if (data[(*packet_size)++] != ECRYPTFS_TAG_11_PACKET_TYPE) { | ||
383 | ecryptfs_printk(KERN_WARNING, | ||
384 | "Invalid tag 11 packet format\n"); | ||
385 | rc = -EINVAL; | ||
386 | goto out; | ||
387 | } | ||
388 | |||
389 | /* get Tag 11 content length - one or two bytes */ | ||
390 | rc = parse_packet_length(&data[(*packet_size)], &body_size, | ||
391 | &length_size); | ||
392 | if (rc) { | ||
393 | ecryptfs_printk(KERN_WARNING, | ||
394 | "Invalid tag 11 packet format\n"); | ||
395 | goto out; | ||
396 | } | ||
397 | (*packet_size) += length_size; | ||
398 | |||
399 | if (body_size < 13) { | ||
400 | ecryptfs_printk(KERN_WARNING, "Invalid body size ([%d])\n", | ||
401 | body_size); | ||
402 | rc = -EINVAL; | ||
403 | goto out; | ||
404 | } | ||
405 | /* We have 13 bytes of surrounding packet values */ | ||
406 | (*tag_11_contents_size) = (body_size - 13); | ||
407 | |||
408 | /* now we know the length of the remainting Tag 11 packet size: | ||
409 | * 14 fix bytes for: special flag one, special flag two, | ||
410 | * 12 skipped bytes | ||
411 | * body_size bytes minus the stuff above is the Tag 11 content | ||
412 | */ | ||
413 | /* FIXME why is the body size one byte smaller than the actual | ||
414 | * size of the body? | ||
415 | * this seems to be an error here as well as in | ||
416 | * write_tag_11_packet() */ | ||
417 | if (unlikely((*packet_size) + body_size + 1 > max_packet_size)) { | ||
418 | ecryptfs_printk(KERN_ERR, "Packet size exceeds max\n"); | ||
419 | rc = -EINVAL; | ||
420 | goto out; | ||
421 | } | ||
422 | |||
423 | /* special flag one - one byte */ | ||
424 | if (data[(*packet_size)++] != 0x62) { | ||
425 | ecryptfs_printk(KERN_WARNING, "Unrecognizable packet\n"); | ||
426 | rc = -EINVAL; | ||
427 | goto out; | ||
428 | } | ||
429 | |||
430 | /* special flag two - one byte */ | ||
431 | if (data[(*packet_size)++] != 0x08) { | ||
432 | ecryptfs_printk(KERN_WARNING, "Unrecognizable packet\n"); | ||
433 | rc = -EINVAL; | ||
434 | goto out; | ||
435 | } | ||
436 | |||
437 | /* skip the next 12 bytes */ | ||
438 | (*packet_size) += 12; /* We don't care about the filename or | ||
439 | * the timestamp */ | ||
440 | |||
441 | /* get the Tag 11 contents - tag_11_contents_size bytes */ | ||
442 | memcpy(contents, &data[(*packet_size)], (*tag_11_contents_size)); | ||
443 | (*packet_size) += (*tag_11_contents_size); | ||
444 | |||
445 | out: | ||
446 | if (rc) { | ||
447 | (*packet_size) = 0; | ||
448 | (*tag_11_contents_size) = 0; | ||
449 | } | ||
450 | return rc; | ||
451 | } | ||
452 | |||
453 | /** | ||
454 | * decrypt_session_key - Decrypt the session key with the given auth_tok. | ||
455 | * | ||
456 | * Returns Zero on success; non-zero error otherwise. | ||
457 | */ | ||
458 | static int decrypt_session_key(struct ecryptfs_auth_tok *auth_tok, | ||
459 | struct ecryptfs_crypt_stat *crypt_stat) | ||
460 | { | ||
461 | int rc = 0; | ||
462 | struct ecryptfs_password *password_s_ptr; | ||
463 | struct crypto_tfm *tfm = NULL; | ||
464 | struct scatterlist src_sg[2], dst_sg[2]; | ||
465 | struct mutex *tfm_mutex = NULL; | ||
466 | /* TODO: Use virt_to_scatterlist for these */ | ||
467 | char *encrypted_session_key; | ||
468 | char *session_key; | ||
469 | |||
470 | password_s_ptr = &auth_tok->token.password; | ||
471 | if (ECRYPTFS_CHECK_FLAG(password_s_ptr->flags, | ||
472 | ECRYPTFS_SESSION_KEY_ENCRYPTION_KEY_SET)) | ||
473 | ecryptfs_printk(KERN_DEBUG, "Session key encryption key " | ||
474 | "set; skipping key generation\n"); | ||
475 | ecryptfs_printk(KERN_DEBUG, "Session key encryption key (size [%d])" | ||
476 | ":\n", | ||
477 | password_s_ptr->session_key_encryption_key_bytes); | ||
478 | if (ecryptfs_verbosity > 0) | ||
479 | ecryptfs_dump_hex(password_s_ptr->session_key_encryption_key, | ||
480 | password_s_ptr-> | ||
481 | session_key_encryption_key_bytes); | ||
482 | if (!strcmp(crypt_stat->cipher, | ||
483 | crypt_stat->mount_crypt_stat->global_default_cipher_name) | ||
484 | && crypt_stat->mount_crypt_stat->global_key_tfm) { | ||
485 | tfm = crypt_stat->mount_crypt_stat->global_key_tfm; | ||
486 | tfm_mutex = &crypt_stat->mount_crypt_stat->global_key_tfm_mutex; | ||
487 | } else { | ||
488 | tfm = crypto_alloc_tfm(crypt_stat->cipher, | ||
489 | CRYPTO_TFM_REQ_WEAK_KEY); | ||
490 | if (!tfm) { | ||
491 | printk(KERN_ERR "Error allocating crypto context\n"); | ||
492 | rc = -ENOMEM; | ||
493 | goto out; | ||
494 | } | ||
495 | } | ||
496 | if (password_s_ptr->session_key_encryption_key_bytes | ||
497 | < crypto_tfm_alg_min_keysize(tfm)) { | ||
498 | printk(KERN_WARNING "Session key encryption key is [%d] bytes; " | ||
499 | "minimum keysize for selected cipher is [%d] bytes.\n", | ||
500 | password_s_ptr->session_key_encryption_key_bytes, | ||
501 | crypto_tfm_alg_min_keysize(tfm)); | ||
502 | rc = -EINVAL; | ||
503 | goto out; | ||
504 | } | ||
505 | if (tfm_mutex) | ||
506 | mutex_lock(tfm_mutex); | ||
507 | crypto_cipher_setkey(tfm, password_s_ptr->session_key_encryption_key, | ||
508 | crypt_stat->key_size); | ||
509 | /* TODO: virt_to_scatterlist */ | ||
510 | encrypted_session_key = (char *)__get_free_page(GFP_KERNEL); | ||
511 | if (!encrypted_session_key) { | ||
512 | ecryptfs_printk(KERN_ERR, "Out of memory\n"); | ||
513 | rc = -ENOMEM; | ||
514 | goto out_free_tfm; | ||
515 | } | ||
516 | session_key = (char *)__get_free_page(GFP_KERNEL); | ||
517 | if (!session_key) { | ||
518 | kfree(encrypted_session_key); | ||
519 | ecryptfs_printk(KERN_ERR, "Out of memory\n"); | ||
520 | rc = -ENOMEM; | ||
521 | goto out_free_tfm; | ||
522 | } | ||
523 | memcpy(encrypted_session_key, auth_tok->session_key.encrypted_key, | ||
524 | auth_tok->session_key.encrypted_key_size); | ||
525 | src_sg[0].page = virt_to_page(encrypted_session_key); | ||
526 | src_sg[0].offset = 0; | ||
527 | BUG_ON(auth_tok->session_key.encrypted_key_size > PAGE_CACHE_SIZE); | ||
528 | src_sg[0].length = auth_tok->session_key.encrypted_key_size; | ||
529 | dst_sg[0].page = virt_to_page(session_key); | ||
530 | dst_sg[0].offset = 0; | ||
531 | auth_tok->session_key.decrypted_key_size = | ||
532 | auth_tok->session_key.encrypted_key_size; | ||
533 | dst_sg[0].length = auth_tok->session_key.encrypted_key_size; | ||
534 | /* TODO: Handle error condition */ | ||
535 | crypto_cipher_decrypt(tfm, dst_sg, src_sg, | ||
536 | auth_tok->session_key.encrypted_key_size); | ||
537 | auth_tok->session_key.decrypted_key_size = | ||
538 | auth_tok->session_key.encrypted_key_size; | ||
539 | memcpy(auth_tok->session_key.decrypted_key, session_key, | ||
540 | auth_tok->session_key.decrypted_key_size); | ||
541 | auth_tok->session_key.flags |= ECRYPTFS_CONTAINS_DECRYPTED_KEY; | ||
542 | memcpy(crypt_stat->key, auth_tok->session_key.decrypted_key, | ||
543 | auth_tok->session_key.decrypted_key_size); | ||
544 | ECRYPTFS_SET_FLAG(crypt_stat->flags, ECRYPTFS_KEY_VALID); | ||
545 | ecryptfs_printk(KERN_DEBUG, "Decrypted session key:\n"); | ||
546 | if (ecryptfs_verbosity > 0) | ||
547 | ecryptfs_dump_hex(crypt_stat->key, | ||
548 | crypt_stat->key_size); | ||
549 | memset(encrypted_session_key, 0, PAGE_CACHE_SIZE); | ||
550 | free_page((unsigned long)encrypted_session_key); | ||
551 | memset(session_key, 0, PAGE_CACHE_SIZE); | ||
552 | free_page((unsigned long)session_key); | ||
553 | out_free_tfm: | ||
554 | if (tfm_mutex) | ||
555 | mutex_unlock(tfm_mutex); | ||
556 | else | ||
557 | crypto_free_tfm(tfm); | ||
558 | out: | ||
559 | return rc; | ||
560 | } | ||
561 | |||
562 | /** | ||
563 | * ecryptfs_parse_packet_set | ||
564 | * @dest: The header page in memory | ||
565 | * @version: Version of file format, to guide parsing behavior | ||
566 | * | ||
567 | * Get crypt_stat to have the file's session key if the requisite key | ||
568 | * is available to decrypt the session key. | ||
569 | * | ||
570 | * Returns Zero if a valid authentication token was retrieved and | ||
571 | * processed; negative value for file not encrypted or for error | ||
572 | * conditions. | ||
573 | */ | ||
574 | int ecryptfs_parse_packet_set(struct ecryptfs_crypt_stat *crypt_stat, | ||
575 | unsigned char *src, | ||
576 | struct dentry *ecryptfs_dentry) | ||
577 | { | ||
578 | size_t i = 0; | ||
579 | int rc = 0; | ||
580 | size_t found_auth_tok = 0; | ||
581 | size_t next_packet_is_auth_tok_packet; | ||
582 | char sig[ECRYPTFS_SIG_SIZE_HEX]; | ||
583 | struct list_head auth_tok_list; | ||
584 | struct list_head *walker; | ||
585 | struct ecryptfs_auth_tok *chosen_auth_tok = NULL; | ||
586 | struct ecryptfs_mount_crypt_stat *mount_crypt_stat = | ||
587 | &ecryptfs_superblock_to_private( | ||
588 | ecryptfs_dentry->d_sb)->mount_crypt_stat; | ||
589 | struct ecryptfs_auth_tok *candidate_auth_tok = NULL; | ||
590 | size_t packet_size; | ||
591 | struct ecryptfs_auth_tok *new_auth_tok; | ||
592 | unsigned char sig_tmp_space[ECRYPTFS_SIG_SIZE]; | ||
593 | size_t tag_11_contents_size; | ||
594 | size_t tag_11_packet_size; | ||
595 | |||
596 | INIT_LIST_HEAD(&auth_tok_list); | ||
597 | /* Parse the header to find as many packets as we can, these will be | ||
598 | * added the our &auth_tok_list */ | ||
599 | next_packet_is_auth_tok_packet = 1; | ||
600 | while (next_packet_is_auth_tok_packet) { | ||
601 | size_t max_packet_size = ((PAGE_CACHE_SIZE - 8) - i); | ||
602 | |||
603 | switch (src[i]) { | ||
604 | case ECRYPTFS_TAG_3_PACKET_TYPE: | ||
605 | rc = parse_tag_3_packet(crypt_stat, | ||
606 | (unsigned char *)&src[i], | ||
607 | &auth_tok_list, &new_auth_tok, | ||
608 | &packet_size, max_packet_size); | ||
609 | if (rc) { | ||
610 | ecryptfs_printk(KERN_ERR, "Error parsing " | ||
611 | "tag 3 packet\n"); | ||
612 | rc = -EIO; | ||
613 | goto out_wipe_list; | ||
614 | } | ||
615 | i += packet_size; | ||
616 | rc = parse_tag_11_packet((unsigned char *)&src[i], | ||
617 | sig_tmp_space, | ||
618 | ECRYPTFS_SIG_SIZE, | ||
619 | &tag_11_contents_size, | ||
620 | &tag_11_packet_size, | ||
621 | max_packet_size); | ||
622 | if (rc) { | ||
623 | ecryptfs_printk(KERN_ERR, "No valid " | ||
624 | "(ecryptfs-specific) literal " | ||
625 | "packet containing " | ||
626 | "authentication token " | ||
627 | "signature found after " | ||
628 | "tag 3 packet\n"); | ||
629 | rc = -EIO; | ||
630 | goto out_wipe_list; | ||
631 | } | ||
632 | i += tag_11_packet_size; | ||
633 | if (ECRYPTFS_SIG_SIZE != tag_11_contents_size) { | ||
634 | ecryptfs_printk(KERN_ERR, "Expected " | ||
635 | "signature of size [%d]; " | ||
636 | "read size [%d]\n", | ||
637 | ECRYPTFS_SIG_SIZE, | ||
638 | tag_11_contents_size); | ||
639 | rc = -EIO; | ||
640 | goto out_wipe_list; | ||
641 | } | ||
642 | ecryptfs_to_hex(new_auth_tok->token.password.signature, | ||
643 | sig_tmp_space, tag_11_contents_size); | ||
644 | new_auth_tok->token.password.signature[ | ||
645 | ECRYPTFS_PASSWORD_SIG_SIZE] = '\0'; | ||
646 | ECRYPTFS_SET_FLAG(crypt_stat->flags, | ||
647 | ECRYPTFS_ENCRYPTED); | ||
648 | break; | ||
649 | case ECRYPTFS_TAG_11_PACKET_TYPE: | ||
650 | ecryptfs_printk(KERN_WARNING, "Invalid packet set " | ||
651 | "(Tag 11 not allowed by itself)\n"); | ||
652 | rc = -EIO; | ||
653 | goto out_wipe_list; | ||
654 | break; | ||
655 | default: | ||
656 | ecryptfs_printk(KERN_DEBUG, "No packet at offset " | ||
657 | "[%d] of the file header; hex value of " | ||
658 | "character is [0x%.2x]\n", i, src[i]); | ||
659 | next_packet_is_auth_tok_packet = 0; | ||
660 | } | ||
661 | } | ||
662 | if (list_empty(&auth_tok_list)) { | ||
663 | rc = -EINVAL; /* Do not support non-encrypted files in | ||
664 | * the 0.1 release */ | ||
665 | goto out; | ||
666 | } | ||
667 | /* If we have a global auth tok, then we should try to use | ||
668 | * it */ | ||
669 | if (mount_crypt_stat->global_auth_tok) { | ||
670 | memcpy(sig, mount_crypt_stat->global_auth_tok_sig, | ||
671 | ECRYPTFS_SIG_SIZE_HEX); | ||
672 | chosen_auth_tok = mount_crypt_stat->global_auth_tok; | ||
673 | } else | ||
674 | BUG(); /* We should always have a global auth tok in | ||
675 | * the 0.1 release */ | ||
676 | /* Scan list to see if our chosen_auth_tok works */ | ||
677 | list_for_each(walker, &auth_tok_list) { | ||
678 | struct ecryptfs_auth_tok_list_item *auth_tok_list_item; | ||
679 | auth_tok_list_item = | ||
680 | list_entry(walker, struct ecryptfs_auth_tok_list_item, | ||
681 | list); | ||
682 | candidate_auth_tok = &auth_tok_list_item->auth_tok; | ||
683 | if (unlikely(ecryptfs_verbosity > 0)) { | ||
684 | ecryptfs_printk(KERN_DEBUG, | ||
685 | "Considering cadidate auth tok:\n"); | ||
686 | ecryptfs_dump_auth_tok(candidate_auth_tok); | ||
687 | } | ||
688 | /* TODO: Replace ECRYPTFS_SIG_SIZE_HEX w/ dynamic value */ | ||
689 | if (candidate_auth_tok->token_type == ECRYPTFS_PASSWORD | ||
690 | && !strncmp(candidate_auth_tok->token.password.signature, | ||
691 | sig, ECRYPTFS_SIG_SIZE_HEX)) { | ||
692 | found_auth_tok = 1; | ||
693 | goto leave_list; | ||
694 | /* TODO: Transfer the common salt into the | ||
695 | * crypt_stat salt */ | ||
696 | } | ||
697 | } | ||
698 | leave_list: | ||
699 | if (!found_auth_tok) { | ||
700 | ecryptfs_printk(KERN_ERR, "Could not find authentication " | ||
701 | "token on temporary list for sig [%.*s]\n", | ||
702 | ECRYPTFS_SIG_SIZE_HEX, sig); | ||
703 | rc = -EIO; | ||
704 | goto out_wipe_list; | ||
705 | } else { | ||
706 | memcpy(&(candidate_auth_tok->token.password), | ||
707 | &(chosen_auth_tok->token.password), | ||
708 | sizeof(struct ecryptfs_password)); | ||
709 | rc = decrypt_session_key(candidate_auth_tok, crypt_stat); | ||
710 | if (rc) { | ||
711 | ecryptfs_printk(KERN_ERR, "Error decrypting the " | ||
712 | "session key\n"); | ||
713 | goto out_wipe_list; | ||
714 | } | ||
715 | rc = ecryptfs_compute_root_iv(crypt_stat); | ||
716 | if (rc) { | ||
717 | ecryptfs_printk(KERN_ERR, "Error computing " | ||
718 | "the root IV\n"); | ||
719 | goto out_wipe_list; | ||
720 | } | ||
721 | } | ||
722 | rc = ecryptfs_init_crypt_ctx(crypt_stat); | ||
723 | if (rc) { | ||
724 | ecryptfs_printk(KERN_ERR, "Error initializing crypto " | ||
725 | "context for cipher [%s]; rc = [%d]\n", | ||
726 | crypt_stat->cipher, rc); | ||
727 | } | ||
728 | out_wipe_list: | ||
729 | wipe_auth_tok_list(&auth_tok_list); | ||
730 | out: | ||
731 | return rc; | ||
732 | } | ||
733 | |||
734 | /** | ||
735 | * write_tag_11_packet | ||
736 | * @dest: Target into which Tag 11 packet is to be written | ||
737 | * @max: Maximum packet length | ||
738 | * @contents: Byte array of contents to copy in | ||
739 | * @contents_length: Number of bytes in contents | ||
740 | * @packet_length: Length of the Tag 11 packet written; zero on error | ||
741 | * | ||
742 | * Returns zero on success; non-zero on error. | ||
743 | */ | ||
744 | static int | ||
745 | write_tag_11_packet(char *dest, int max, char *contents, size_t contents_length, | ||
746 | size_t *packet_length) | ||
747 | { | ||
748 | int rc = 0; | ||
749 | size_t packet_size_length; | ||
750 | |||
751 | (*packet_length) = 0; | ||
752 | if ((13 + contents_length) > max) { | ||
753 | rc = -EINVAL; | ||
754 | ecryptfs_printk(KERN_ERR, "Packet length larger than " | ||
755 | "maximum allowable\n"); | ||
756 | goto out; | ||
757 | } | ||
758 | /* General packet header */ | ||
759 | /* Packet tag */ | ||
760 | dest[(*packet_length)++] = ECRYPTFS_TAG_11_PACKET_TYPE; | ||
761 | /* Packet length */ | ||
762 | rc = write_packet_length(&dest[(*packet_length)], | ||
763 | (13 + contents_length), &packet_size_length); | ||
764 | if (rc) { | ||
765 | ecryptfs_printk(KERN_ERR, "Error generating tag 11 packet " | ||
766 | "header; cannot generate packet length\n"); | ||
767 | goto out; | ||
768 | } | ||
769 | (*packet_length) += packet_size_length; | ||
770 | /* Tag 11 specific */ | ||
771 | /* One-octet field that describes how the data is formatted */ | ||
772 | dest[(*packet_length)++] = 0x62; /* binary data */ | ||
773 | /* One-octet filename length followed by filename */ | ||
774 | dest[(*packet_length)++] = 8; | ||
775 | memcpy(&dest[(*packet_length)], "_CONSOLE", 8); | ||
776 | (*packet_length) += 8; | ||
777 | /* Four-octet number indicating modification date */ | ||
778 | memset(&dest[(*packet_length)], 0x00, 4); | ||
779 | (*packet_length) += 4; | ||
780 | /* Remainder is literal data */ | ||
781 | memcpy(&dest[(*packet_length)], contents, contents_length); | ||
782 | (*packet_length) += contents_length; | ||
783 | out: | ||
784 | if (rc) | ||
785 | (*packet_length) = 0; | ||
786 | return rc; | ||
787 | } | ||
788 | |||
789 | /** | ||
790 | * write_tag_3_packet | ||
791 | * @dest: Buffer into which to write the packet | ||
792 | * @max: Maximum number of bytes that can be written | ||
793 | * @auth_tok: Authentication token | ||
794 | * @crypt_stat: The cryptographic context | ||
795 | * @key_rec: encrypted key | ||
796 | * @packet_size: This function will write the number of bytes that end | ||
797 | * up constituting the packet; set to zero on error | ||
798 | * | ||
799 | * Returns zero on success; non-zero on error. | ||
800 | */ | ||
801 | static int | ||
802 | write_tag_3_packet(char *dest, size_t max, struct ecryptfs_auth_tok *auth_tok, | ||
803 | struct ecryptfs_crypt_stat *crypt_stat, | ||
804 | struct ecryptfs_key_record *key_rec, size_t *packet_size) | ||
805 | { | ||
806 | int rc = 0; | ||
807 | |||
808 | size_t i; | ||
809 | size_t signature_is_valid = 0; | ||
810 | size_t encrypted_session_key_valid = 0; | ||
811 | char session_key_encryption_key[ECRYPTFS_MAX_KEY_BYTES]; | ||
812 | struct scatterlist dest_sg[2]; | ||
813 | struct scatterlist src_sg[2]; | ||
814 | struct crypto_tfm *tfm = NULL; | ||
815 | struct mutex *tfm_mutex = NULL; | ||
816 | size_t key_rec_size; | ||
817 | size_t packet_size_length; | ||
818 | size_t cipher_code; | ||
819 | |||
820 | (*packet_size) = 0; | ||
821 | /* Check for a valid signature on the auth_tok */ | ||
822 | for (i = 0; i < ECRYPTFS_SIG_SIZE_HEX; i++) | ||
823 | signature_is_valid |= auth_tok->token.password.signature[i]; | ||
824 | if (!signature_is_valid) | ||
825 | BUG(); | ||
826 | ecryptfs_from_hex((*key_rec).sig, auth_tok->token.password.signature, | ||
827 | ECRYPTFS_SIG_SIZE); | ||
828 | encrypted_session_key_valid = 0; | ||
829 | for (i = 0; i < crypt_stat->key_size; i++) | ||
830 | encrypted_session_key_valid |= | ||
831 | auth_tok->session_key.encrypted_key[i]; | ||
832 | if (encrypted_session_key_valid) { | ||
833 | memcpy((*key_rec).enc_key, | ||
834 | auth_tok->session_key.encrypted_key, | ||
835 | auth_tok->session_key.encrypted_key_size); | ||
836 | goto encrypted_session_key_set; | ||
837 | } | ||
838 | if (auth_tok->session_key.encrypted_key_size == 0) | ||
839 | auth_tok->session_key.encrypted_key_size = | ||
840 | crypt_stat->key_size; | ||
841 | if (crypt_stat->key_size == 24 | ||
842 | && strcmp("aes", crypt_stat->cipher) == 0) { | ||
843 | memset((crypt_stat->key + 24), 0, 8); | ||
844 | auth_tok->session_key.encrypted_key_size = 32; | ||
845 | } | ||
846 | (*key_rec).enc_key_size = | ||
847 | auth_tok->session_key.encrypted_key_size; | ||
848 | if (ECRYPTFS_CHECK_FLAG(auth_tok->token.password.flags, | ||
849 | ECRYPTFS_SESSION_KEY_ENCRYPTION_KEY_SET)) { | ||
850 | ecryptfs_printk(KERN_DEBUG, "Using previously generated " | ||
851 | "session key encryption key of size [%d]\n", | ||
852 | auth_tok->token.password. | ||
853 | session_key_encryption_key_bytes); | ||
854 | memcpy(session_key_encryption_key, | ||
855 | auth_tok->token.password.session_key_encryption_key, | ||
856 | crypt_stat->key_size); | ||
857 | ecryptfs_printk(KERN_DEBUG, | ||
858 | "Cached session key " "encryption key: \n"); | ||
859 | if (ecryptfs_verbosity > 0) | ||
860 | ecryptfs_dump_hex(session_key_encryption_key, 16); | ||
861 | } | ||
862 | if (unlikely(ecryptfs_verbosity > 0)) { | ||
863 | ecryptfs_printk(KERN_DEBUG, "Session key encryption key:\n"); | ||
864 | ecryptfs_dump_hex(session_key_encryption_key, 16); | ||
865 | } | ||
866 | rc = virt_to_scatterlist(crypt_stat->key, | ||
867 | (*key_rec).enc_key_size, src_sg, 2); | ||
868 | if (!rc) { | ||
869 | ecryptfs_printk(KERN_ERR, "Error generating scatterlist " | ||
870 | "for crypt_stat session key\n"); | ||
871 | rc = -ENOMEM; | ||
872 | goto out; | ||
873 | } | ||
874 | rc = virt_to_scatterlist((*key_rec).enc_key, | ||
875 | (*key_rec).enc_key_size, dest_sg, 2); | ||
876 | if (!rc) { | ||
877 | ecryptfs_printk(KERN_ERR, "Error generating scatterlist " | ||
878 | "for crypt_stat encrypted session key\n"); | ||
879 | rc = -ENOMEM; | ||
880 | goto out; | ||
881 | } | ||
882 | if (!strcmp(crypt_stat->cipher, | ||
883 | crypt_stat->mount_crypt_stat->global_default_cipher_name) | ||
884 | && crypt_stat->mount_crypt_stat->global_key_tfm) { | ||
885 | tfm = crypt_stat->mount_crypt_stat->global_key_tfm; | ||
886 | tfm_mutex = &crypt_stat->mount_crypt_stat->global_key_tfm_mutex; | ||
887 | } else | ||
888 | tfm = crypto_alloc_tfm(crypt_stat->cipher, 0); | ||
889 | if (!tfm) { | ||
890 | ecryptfs_printk(KERN_ERR, "Could not initialize crypto " | ||
891 | "context for cipher [%s]\n", | ||
892 | crypt_stat->cipher); | ||
893 | rc = -EINVAL; | ||
894 | goto out; | ||
895 | } | ||
896 | if (tfm_mutex) | ||
897 | mutex_lock(tfm_mutex); | ||
898 | rc = crypto_cipher_setkey(tfm, session_key_encryption_key, | ||
899 | crypt_stat->key_size); | ||
900 | if (rc < 0) { | ||
901 | if (tfm_mutex) | ||
902 | mutex_unlock(tfm_mutex); | ||
903 | ecryptfs_printk(KERN_ERR, "Error setting key for crypto " | ||
904 | "context\n"); | ||
905 | goto out; | ||
906 | } | ||
907 | rc = 0; | ||
908 | ecryptfs_printk(KERN_DEBUG, "Encrypting [%d] bytes of the key\n", | ||
909 | crypt_stat->key_size); | ||
910 | crypto_cipher_encrypt(tfm, dest_sg, src_sg, | ||
911 | (*key_rec).enc_key_size); | ||
912 | if (tfm_mutex) | ||
913 | mutex_unlock(tfm_mutex); | ||
914 | ecryptfs_printk(KERN_DEBUG, "This should be the encrypted key:\n"); | ||
915 | if (ecryptfs_verbosity > 0) | ||
916 | ecryptfs_dump_hex((*key_rec).enc_key, | ||
917 | (*key_rec).enc_key_size); | ||
918 | encrypted_session_key_set: | ||
919 | /* Now we have a valid key_rec. Append it to the | ||
920 | * key_rec set. */ | ||
921 | key_rec_size = (sizeof(struct ecryptfs_key_record) | ||
922 | - ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES | ||
923 | + ((*key_rec).enc_key_size)); | ||
924 | /* TODO: Include a packet size limit as a parameter to this | ||
925 | * function once we have multi-packet headers (for versions | ||
926 | * later than 0.1 */ | ||
927 | if (key_rec_size >= ECRYPTFS_MAX_KEYSET_SIZE) { | ||
928 | ecryptfs_printk(KERN_ERR, "Keyset too large\n"); | ||
929 | rc = -EINVAL; | ||
930 | goto out; | ||
931 | } | ||
932 | /* TODO: Packet size limit */ | ||
933 | /* We have 5 bytes of surrounding packet data */ | ||
934 | if ((0x05 + ECRYPTFS_SALT_SIZE | ||
935 | + (*key_rec).enc_key_size) >= max) { | ||
936 | ecryptfs_printk(KERN_ERR, "Authentication token is too " | ||
937 | "large\n"); | ||
938 | rc = -EINVAL; | ||
939 | goto out; | ||
940 | } | ||
941 | /* This format is inspired by OpenPGP; see RFC 2440 | ||
942 | * packet tag 3 */ | ||
943 | dest[(*packet_size)++] = ECRYPTFS_TAG_3_PACKET_TYPE; | ||
944 | /* ver+cipher+s2k+hash+salt+iter+enc_key */ | ||
945 | rc = write_packet_length(&dest[(*packet_size)], | ||
946 | (0x05 + ECRYPTFS_SALT_SIZE | ||
947 | + (*key_rec).enc_key_size), | ||
948 | &packet_size_length); | ||
949 | if (rc) { | ||
950 | ecryptfs_printk(KERN_ERR, "Error generating tag 3 packet " | ||
951 | "header; cannot generate packet length\n"); | ||
952 | goto out; | ||
953 | } | ||
954 | (*packet_size) += packet_size_length; | ||
955 | dest[(*packet_size)++] = 0x04; /* version 4 */ | ||
956 | cipher_code = ecryptfs_code_for_cipher_string(crypt_stat); | ||
957 | if (cipher_code == 0) { | ||
958 | ecryptfs_printk(KERN_WARNING, "Unable to generate code for " | ||
959 | "cipher [%s]\n", crypt_stat->cipher); | ||
960 | rc = -EINVAL; | ||
961 | goto out; | ||
962 | } | ||
963 | dest[(*packet_size)++] = cipher_code; | ||
964 | dest[(*packet_size)++] = 0x03; /* S2K */ | ||
965 | dest[(*packet_size)++] = 0x01; /* MD5 (TODO: parameterize) */ | ||
966 | memcpy(&dest[(*packet_size)], auth_tok->token.password.salt, | ||
967 | ECRYPTFS_SALT_SIZE); | ||
968 | (*packet_size) += ECRYPTFS_SALT_SIZE; /* salt */ | ||
969 | dest[(*packet_size)++] = 0x60; /* hash iterations (65536) */ | ||
970 | memcpy(&dest[(*packet_size)], (*key_rec).enc_key, | ||
971 | (*key_rec).enc_key_size); | ||
972 | (*packet_size) += (*key_rec).enc_key_size; | ||
973 | out: | ||
974 | if (tfm && !tfm_mutex) | ||
975 | crypto_free_tfm(tfm); | ||
976 | if (rc) | ||
977 | (*packet_size) = 0; | ||
978 | return rc; | ||
979 | } | ||
980 | |||
981 | /** | ||
982 | * ecryptfs_generate_key_packet_set | ||
983 | * @dest: Virtual address from which to write the key record set | ||
984 | * @crypt_stat: The cryptographic context from which the | ||
985 | * authentication tokens will be retrieved | ||
986 | * @ecryptfs_dentry: The dentry, used to retrieve the mount crypt stat | ||
987 | * for the global parameters | ||
988 | * @len: The amount written | ||
989 | * @max: The maximum amount of data allowed to be written | ||
990 | * | ||
991 | * Generates a key packet set and writes it to the virtual address | ||
992 | * passed in. | ||
993 | * | ||
994 | * Returns zero on success; non-zero on error. | ||
995 | */ | ||
996 | int | ||
997 | ecryptfs_generate_key_packet_set(char *dest_base, | ||
998 | struct ecryptfs_crypt_stat *crypt_stat, | ||
999 | struct dentry *ecryptfs_dentry, size_t *len, | ||
1000 | size_t max) | ||
1001 | { | ||
1002 | int rc = 0; | ||
1003 | struct ecryptfs_auth_tok *auth_tok; | ||
1004 | struct ecryptfs_mount_crypt_stat *mount_crypt_stat = | ||
1005 | &ecryptfs_superblock_to_private( | ||
1006 | ecryptfs_dentry->d_sb)->mount_crypt_stat; | ||
1007 | size_t written; | ||
1008 | struct ecryptfs_key_record key_rec; | ||
1009 | |||
1010 | (*len) = 0; | ||
1011 | if (mount_crypt_stat->global_auth_tok) { | ||
1012 | auth_tok = mount_crypt_stat->global_auth_tok; | ||
1013 | if (auth_tok->token_type == ECRYPTFS_PASSWORD) { | ||
1014 | rc = write_tag_3_packet((dest_base + (*len)), | ||
1015 | max, auth_tok, | ||
1016 | crypt_stat, &key_rec, | ||
1017 | &written); | ||
1018 | if (rc) { | ||
1019 | ecryptfs_printk(KERN_WARNING, "Error " | ||
1020 | "writing tag 3 packet\n"); | ||
1021 | goto out; | ||
1022 | } | ||
1023 | (*len) += written; | ||
1024 | /* Write auth tok signature packet */ | ||
1025 | rc = write_tag_11_packet( | ||
1026 | (dest_base + (*len)), | ||
1027 | (max - (*len)), | ||
1028 | key_rec.sig, ECRYPTFS_SIG_SIZE, &written); | ||
1029 | if (rc) { | ||
1030 | ecryptfs_printk(KERN_ERR, "Error writing " | ||
1031 | "auth tok signature packet\n"); | ||
1032 | goto out; | ||
1033 | } | ||
1034 | (*len) += written; | ||
1035 | } else { | ||
1036 | ecryptfs_printk(KERN_WARNING, "Unsupported " | ||
1037 | "authentication token type\n"); | ||
1038 | rc = -EINVAL; | ||
1039 | goto out; | ||
1040 | } | ||
1041 | if (rc) { | ||
1042 | ecryptfs_printk(KERN_WARNING, "Error writing " | ||
1043 | "authentication token packet with sig " | ||
1044 | "= [%s]\n", | ||
1045 | mount_crypt_stat->global_auth_tok_sig); | ||
1046 | rc = -EIO; | ||
1047 | goto out; | ||
1048 | } | ||
1049 | } else | ||
1050 | BUG(); | ||
1051 | if (likely((max - (*len)) > 0)) { | ||
1052 | dest_base[(*len)] = 0x00; | ||
1053 | } else { | ||
1054 | ecryptfs_printk(KERN_ERR, "Error writing boundary byte\n"); | ||
1055 | rc = -EIO; | ||
1056 | } | ||
1057 | out: | ||
1058 | if (rc) | ||
1059 | (*len) = 0; | ||
1060 | return rc; | ||
1061 | } | ||
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c new file mode 100644 index 000000000000..7a11b8ae6644 --- /dev/null +++ b/fs/ecryptfs/main.c | |||
@@ -0,0 +1,831 @@ | |||
1 | /** | ||
2 | * eCryptfs: Linux filesystem encryption layer | ||
3 | * | ||
4 | * Copyright (C) 1997-2003 Erez Zadok | ||
5 | * Copyright (C) 2001-2003 Stony Brook University | ||
6 | * Copyright (C) 2004-2006 International Business Machines Corp. | ||
7 | * Author(s): Michael A. Halcrow <mahalcro@us.ibm.com> | ||
8 | * Michael C. Thompson <mcthomps@us.ibm.com> | ||
9 | * | ||
10 | * This program is free software; you can redistribute it and/or | ||
11 | * modify it under the terms of the GNU General Public License as | ||
12 | * published by the Free Software Foundation; either version 2 of the | ||
13 | * License, or (at your option) any later version. | ||
14 | * | ||
15 | * This program is distributed in the hope that it will be useful, but | ||
16 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
18 | * General Public License for more details. | ||
19 | * | ||
20 | * You should have received a copy of the GNU General Public License | ||
21 | * along with this program; if not, write to the Free Software | ||
22 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA | ||
23 | * 02111-1307, USA. | ||
24 | */ | ||
25 | |||
26 | #include <linux/dcache.h> | ||
27 | #include <linux/file.h> | ||
28 | #include <linux/module.h> | ||
29 | #include <linux/namei.h> | ||
30 | #include <linux/skbuff.h> | ||
31 | #include <linux/crypto.h> | ||
32 | #include <linux/netlink.h> | ||
33 | #include <linux/mount.h> | ||
34 | #include <linux/dcache.h> | ||
35 | #include <linux/pagemap.h> | ||
36 | #include <linux/key.h> | ||
37 | #include <linux/parser.h> | ||
38 | #include "ecryptfs_kernel.h" | ||
39 | |||
40 | /** | ||
41 | * Module parameter that defines the ecryptfs_verbosity level. | ||
42 | */ | ||
43 | int ecryptfs_verbosity = 0; | ||
44 | |||
45 | module_param(ecryptfs_verbosity, int, 0); | ||
46 | MODULE_PARM_DESC(ecryptfs_verbosity, | ||
47 | "Initial verbosity level (0 or 1; defaults to " | ||
48 | "0, which is Quiet)"); | ||
49 | |||
50 | void __ecryptfs_printk(const char *fmt, ...) | ||
51 | { | ||
52 | va_list args; | ||
53 | va_start(args, fmt); | ||
54 | if (fmt[1] == '7') { /* KERN_DEBUG */ | ||
55 | if (ecryptfs_verbosity >= 1) | ||
56 | vprintk(fmt, args); | ||
57 | } else | ||
58 | vprintk(fmt, args); | ||
59 | va_end(args); | ||
60 | } | ||
61 | |||
62 | /** | ||
63 | * ecryptfs_interpose | ||
64 | * @lower_dentry: Existing dentry in the lower filesystem | ||
65 | * @dentry: ecryptfs' dentry | ||
66 | * @sb: ecryptfs's super_block | ||
67 | * @flag: If set to true, then d_add is called, else d_instantiate is called | ||
68 | * | ||
69 | * Interposes upper and lower dentries. | ||
70 | * | ||
71 | * Returns zero on success; non-zero otherwise | ||
72 | */ | ||
73 | int ecryptfs_interpose(struct dentry *lower_dentry, struct dentry *dentry, | ||
74 | struct super_block *sb, int flag) | ||
75 | { | ||
76 | struct inode *lower_inode; | ||
77 | struct inode *inode; | ||
78 | int rc = 0; | ||
79 | |||
80 | lower_inode = lower_dentry->d_inode; | ||
81 | if (lower_inode->i_sb != ecryptfs_superblock_to_lower(sb)) { | ||
82 | rc = -EXDEV; | ||
83 | goto out; | ||
84 | } | ||
85 | if (!igrab(lower_inode)) { | ||
86 | rc = -ESTALE; | ||
87 | goto out; | ||
88 | } | ||
89 | inode = iget5_locked(sb, (unsigned long)lower_inode, | ||
90 | ecryptfs_inode_test, ecryptfs_inode_set, | ||
91 | lower_inode); | ||
92 | if (!inode) { | ||
93 | rc = -EACCES; | ||
94 | iput(lower_inode); | ||
95 | goto out; | ||
96 | } | ||
97 | if (inode->i_state & I_NEW) | ||
98 | unlock_new_inode(inode); | ||
99 | else | ||
100 | iput(lower_inode); | ||
101 | if (S_ISLNK(lower_inode->i_mode)) | ||
102 | inode->i_op = &ecryptfs_symlink_iops; | ||
103 | else if (S_ISDIR(lower_inode->i_mode)) | ||
104 | inode->i_op = &ecryptfs_dir_iops; | ||
105 | if (S_ISDIR(lower_inode->i_mode)) | ||
106 | inode->i_fop = &ecryptfs_dir_fops; | ||
107 | /* TODO: Is there a better way to identify if the inode is | ||
108 | * special? */ | ||
109 | if (S_ISBLK(lower_inode->i_mode) || S_ISCHR(lower_inode->i_mode) || | ||
110 | S_ISFIFO(lower_inode->i_mode) || S_ISSOCK(lower_inode->i_mode)) | ||
111 | init_special_inode(inode, lower_inode->i_mode, | ||
112 | lower_inode->i_rdev); | ||
113 | dentry->d_op = &ecryptfs_dops; | ||
114 | if (flag) | ||
115 | d_add(dentry, inode); | ||
116 | else | ||
117 | d_instantiate(dentry, inode); | ||
118 | ecryptfs_copy_attr_all(inode, lower_inode); | ||
119 | /* This size will be overwritten for real files w/ headers and | ||
120 | * other metadata */ | ||
121 | ecryptfs_copy_inode_size(inode, lower_inode); | ||
122 | out: | ||
123 | return rc; | ||
124 | } | ||
125 | |||
126 | enum { ecryptfs_opt_sig, ecryptfs_opt_ecryptfs_sig, ecryptfs_opt_debug, | ||
127 | ecryptfs_opt_ecryptfs_debug, ecryptfs_opt_cipher, | ||
128 | ecryptfs_opt_ecryptfs_cipher, ecryptfs_opt_ecryptfs_key_bytes, | ||
129 | ecryptfs_opt_passthrough, ecryptfs_opt_err }; | ||
130 | |||
131 | static match_table_t tokens = { | ||
132 | {ecryptfs_opt_sig, "sig=%s"}, | ||
133 | {ecryptfs_opt_ecryptfs_sig, "ecryptfs_sig=%s"}, | ||
134 | {ecryptfs_opt_debug, "debug=%u"}, | ||
135 | {ecryptfs_opt_ecryptfs_debug, "ecryptfs_debug=%u"}, | ||
136 | {ecryptfs_opt_cipher, "cipher=%s"}, | ||
137 | {ecryptfs_opt_ecryptfs_cipher, "ecryptfs_cipher=%s"}, | ||
138 | {ecryptfs_opt_ecryptfs_key_bytes, "ecryptfs_key_bytes=%u"}, | ||
139 | {ecryptfs_opt_passthrough, "ecryptfs_passthrough"}, | ||
140 | {ecryptfs_opt_err, NULL} | ||
141 | }; | ||
142 | |||
143 | /** | ||
144 | * ecryptfs_verify_version | ||
145 | * @version: The version number to confirm | ||
146 | * | ||
147 | * Returns zero on good version; non-zero otherwise | ||
148 | */ | ||
149 | static int ecryptfs_verify_version(u16 version) | ||
150 | { | ||
151 | int rc = 0; | ||
152 | unsigned char major; | ||
153 | unsigned char minor; | ||
154 | |||
155 | major = ((version >> 8) & 0xFF); | ||
156 | minor = (version & 0xFF); | ||
157 | if (major != ECRYPTFS_VERSION_MAJOR) { | ||
158 | ecryptfs_printk(KERN_ERR, "Major version number mismatch. " | ||
159 | "Expected [%d]; got [%d]\n", | ||
160 | ECRYPTFS_VERSION_MAJOR, major); | ||
161 | rc = -EINVAL; | ||
162 | goto out; | ||
163 | } | ||
164 | if (minor != ECRYPTFS_VERSION_MINOR) { | ||
165 | ecryptfs_printk(KERN_ERR, "Minor version number mismatch. " | ||
166 | "Expected [%d]; got [%d]\n", | ||
167 | ECRYPTFS_VERSION_MINOR, minor); | ||
168 | rc = -EINVAL; | ||
169 | goto out; | ||
170 | } | ||
171 | out: | ||
172 | return rc; | ||
173 | } | ||
174 | |||
175 | /** | ||
176 | * ecryptfs_parse_options | ||
177 | * @sb: The ecryptfs super block | ||
178 | * @options: The options pased to the kernel | ||
179 | * | ||
180 | * Parse mount options: | ||
181 | * debug=N - ecryptfs_verbosity level for debug output | ||
182 | * sig=XXX - description(signature) of the key to use | ||
183 | * | ||
184 | * Returns the dentry object of the lower-level (lower/interposed) | ||
185 | * directory; We want to mount our stackable file system on top of | ||
186 | * that lower directory. | ||
187 | * | ||
188 | * The signature of the key to use must be the description of a key | ||
189 | * already in the keyring. Mounting will fail if the key can not be | ||
190 | * found. | ||
191 | * | ||
192 | * Returns zero on success; non-zero on error | ||
193 | */ | ||
194 | static int ecryptfs_parse_options(struct super_block *sb, char *options) | ||
195 | { | ||
196 | char *p; | ||
197 | int rc = 0; | ||
198 | int sig_set = 0; | ||
199 | int cipher_name_set = 0; | ||
200 | int cipher_key_bytes; | ||
201 | int cipher_key_bytes_set = 0; | ||
202 | struct key *auth_tok_key = NULL; | ||
203 | struct ecryptfs_auth_tok *auth_tok = NULL; | ||
204 | struct ecryptfs_mount_crypt_stat *mount_crypt_stat = | ||
205 | &ecryptfs_superblock_to_private(sb)->mount_crypt_stat; | ||
206 | substring_t args[MAX_OPT_ARGS]; | ||
207 | int token; | ||
208 | char *sig_src; | ||
209 | char *sig_dst; | ||
210 | char *debug_src; | ||
211 | char *cipher_name_dst; | ||
212 | char *cipher_name_src; | ||
213 | char *cipher_key_bytes_src; | ||
214 | struct crypto_tfm *tmp_tfm; | ||
215 | int cipher_name_len; | ||
216 | |||
217 | if (!options) { | ||
218 | rc = -EINVAL; | ||
219 | goto out; | ||
220 | } | ||
221 | while ((p = strsep(&options, ",")) != NULL) { | ||
222 | if (!*p) | ||
223 | continue; | ||
224 | token = match_token(p, tokens, args); | ||
225 | switch (token) { | ||
226 | case ecryptfs_opt_sig: | ||
227 | case ecryptfs_opt_ecryptfs_sig: | ||
228 | sig_src = args[0].from; | ||
229 | sig_dst = | ||
230 | mount_crypt_stat->global_auth_tok_sig; | ||
231 | memcpy(sig_dst, sig_src, ECRYPTFS_SIG_SIZE_HEX); | ||
232 | sig_dst[ECRYPTFS_SIG_SIZE_HEX] = '\0'; | ||
233 | ecryptfs_printk(KERN_DEBUG, | ||
234 | "The mount_crypt_stat " | ||
235 | "global_auth_tok_sig set to: " | ||
236 | "[%s]\n", sig_dst); | ||
237 | sig_set = 1; | ||
238 | break; | ||
239 | case ecryptfs_opt_debug: | ||
240 | case ecryptfs_opt_ecryptfs_debug: | ||
241 | debug_src = args[0].from; | ||
242 | ecryptfs_verbosity = | ||
243 | (int)simple_strtol(debug_src, &debug_src, | ||
244 | 0); | ||
245 | ecryptfs_printk(KERN_DEBUG, | ||
246 | "Verbosity set to [%d]" "\n", | ||
247 | ecryptfs_verbosity); | ||
248 | break; | ||
249 | case ecryptfs_opt_cipher: | ||
250 | case ecryptfs_opt_ecryptfs_cipher: | ||
251 | cipher_name_src = args[0].from; | ||
252 | cipher_name_dst = | ||
253 | mount_crypt_stat-> | ||
254 | global_default_cipher_name; | ||
255 | strncpy(cipher_name_dst, cipher_name_src, | ||
256 | ECRYPTFS_MAX_CIPHER_NAME_SIZE); | ||
257 | ecryptfs_printk(KERN_DEBUG, | ||
258 | "The mount_crypt_stat " | ||
259 | "global_default_cipher_name set to: " | ||
260 | "[%s]\n", cipher_name_dst); | ||
261 | cipher_name_set = 1; | ||
262 | break; | ||
263 | case ecryptfs_opt_ecryptfs_key_bytes: | ||
264 | cipher_key_bytes_src = args[0].from; | ||
265 | cipher_key_bytes = | ||
266 | (int)simple_strtol(cipher_key_bytes_src, | ||
267 | &cipher_key_bytes_src, 0); | ||
268 | mount_crypt_stat->global_default_cipher_key_size = | ||
269 | cipher_key_bytes; | ||
270 | ecryptfs_printk(KERN_DEBUG, | ||
271 | "The mount_crypt_stat " | ||
272 | "global_default_cipher_key_size " | ||
273 | "set to: [%d]\n", mount_crypt_stat-> | ||
274 | global_default_cipher_key_size); | ||
275 | cipher_key_bytes_set = 1; | ||
276 | break; | ||
277 | case ecryptfs_opt_passthrough: | ||
278 | mount_crypt_stat->flags |= | ||
279 | ECRYPTFS_PLAINTEXT_PASSTHROUGH_ENABLED; | ||
280 | break; | ||
281 | case ecryptfs_opt_err: | ||
282 | default: | ||
283 | ecryptfs_printk(KERN_WARNING, | ||
284 | "eCryptfs: unrecognized option '%s'\n", | ||
285 | p); | ||
286 | } | ||
287 | } | ||
288 | /* Do not support lack of mount-wide signature in 0.1 | ||
289 | * release */ | ||
290 | if (!sig_set) { | ||
291 | rc = -EINVAL; | ||
292 | ecryptfs_printk(KERN_ERR, "You must supply a valid " | ||
293 | "passphrase auth tok signature as a mount " | ||
294 | "parameter; see the eCryptfs README\n"); | ||
295 | goto out; | ||
296 | } | ||
297 | if (!cipher_name_set) { | ||
298 | cipher_name_len = strlen(ECRYPTFS_DEFAULT_CIPHER); | ||
299 | if (unlikely(cipher_name_len | ||
300 | >= ECRYPTFS_MAX_CIPHER_NAME_SIZE)) { | ||
301 | rc = -EINVAL; | ||
302 | BUG(); | ||
303 | goto out; | ||
304 | } | ||
305 | memcpy(mount_crypt_stat->global_default_cipher_name, | ||
306 | ECRYPTFS_DEFAULT_CIPHER, cipher_name_len); | ||
307 | mount_crypt_stat->global_default_cipher_name[cipher_name_len] | ||
308 | = '\0'; | ||
309 | } | ||
310 | if (!cipher_key_bytes_set) { | ||
311 | mount_crypt_stat->global_default_cipher_key_size = | ||
312 | ECRYPTFS_DEFAULT_KEY_BYTES; | ||
313 | ecryptfs_printk(KERN_DEBUG, "Cipher key size was not " | ||
314 | "specified. Defaulting to [%d]\n", | ||
315 | mount_crypt_stat-> | ||
316 | global_default_cipher_key_size); | ||
317 | } | ||
318 | rc = ecryptfs_process_cipher( | ||
319 | &tmp_tfm, | ||
320 | &mount_crypt_stat->global_key_tfm, | ||
321 | mount_crypt_stat->global_default_cipher_name, | ||
322 | mount_crypt_stat->global_default_cipher_key_size); | ||
323 | if (tmp_tfm) | ||
324 | crypto_free_tfm(tmp_tfm); | ||
325 | if (rc) { | ||
326 | printk(KERN_ERR "Error attempting to initialize cipher [%s] " | ||
327 | "with key size [%Zd] bytes; rc = [%d]\n", | ||
328 | mount_crypt_stat->global_default_cipher_name, | ||
329 | mount_crypt_stat->global_default_cipher_key_size, rc); | ||
330 | rc = -EINVAL; | ||
331 | goto out; | ||
332 | } | ||
333 | mutex_init(&mount_crypt_stat->global_key_tfm_mutex); | ||
334 | ecryptfs_printk(KERN_DEBUG, "Requesting the key with description: " | ||
335 | "[%s]\n", mount_crypt_stat->global_auth_tok_sig); | ||
336 | /* The reference to this key is held until umount is done The | ||
337 | * call to key_put is done in ecryptfs_put_super() */ | ||
338 | auth_tok_key = request_key(&key_type_user, | ||
339 | mount_crypt_stat->global_auth_tok_sig, | ||
340 | NULL); | ||
341 | if (!auth_tok_key || IS_ERR(auth_tok_key)) { | ||
342 | ecryptfs_printk(KERN_ERR, "Could not find key with " | ||
343 | "description: [%s]\n", | ||
344 | mount_crypt_stat->global_auth_tok_sig); | ||
345 | process_request_key_err(PTR_ERR(auth_tok_key)); | ||
346 | rc = -EINVAL; | ||
347 | goto out; | ||
348 | } | ||
349 | auth_tok = ecryptfs_get_key_payload_data(auth_tok_key); | ||
350 | if (ecryptfs_verify_version(auth_tok->version)) { | ||
351 | ecryptfs_printk(KERN_ERR, "Data structure version mismatch. " | ||
352 | "Userspace tools must match eCryptfs kernel " | ||
353 | "module with major version [%d] and minor " | ||
354 | "version [%d]\n", ECRYPTFS_VERSION_MAJOR, | ||
355 | ECRYPTFS_VERSION_MINOR); | ||
356 | rc = -EINVAL; | ||
357 | goto out; | ||
358 | } | ||
359 | if (auth_tok->token_type != ECRYPTFS_PASSWORD) { | ||
360 | ecryptfs_printk(KERN_ERR, "Invalid auth_tok structure " | ||
361 | "returned from key\n"); | ||
362 | rc = -EINVAL; | ||
363 | goto out; | ||
364 | } | ||
365 | mount_crypt_stat->global_auth_tok_key = auth_tok_key; | ||
366 | mount_crypt_stat->global_auth_tok = auth_tok; | ||
367 | out: | ||
368 | return rc; | ||
369 | } | ||
370 | |||
371 | struct kmem_cache *ecryptfs_sb_info_cache; | ||
372 | |||
373 | /** | ||
374 | * ecryptfs_fill_super | ||
375 | * @sb: The ecryptfs super block | ||
376 | * @raw_data: The options passed to mount | ||
377 | * @silent: Not used but required by function prototype | ||
378 | * | ||
379 | * Sets up what we can of the sb, rest is done in ecryptfs_read_super | ||
380 | * | ||
381 | * Returns zero on success; non-zero otherwise | ||
382 | */ | ||
383 | static int | ||
384 | ecryptfs_fill_super(struct super_block *sb, void *raw_data, int silent) | ||
385 | { | ||
386 | int rc = 0; | ||
387 | |||
388 | /* Released in ecryptfs_put_super() */ | ||
389 | ecryptfs_set_superblock_private(sb, | ||
390 | kmem_cache_alloc(ecryptfs_sb_info_cache, | ||
391 | SLAB_KERNEL)); | ||
392 | if (!ecryptfs_superblock_to_private(sb)) { | ||
393 | ecryptfs_printk(KERN_WARNING, "Out of memory\n"); | ||
394 | rc = -ENOMEM; | ||
395 | goto out; | ||
396 | } | ||
397 | memset(ecryptfs_superblock_to_private(sb), 0, | ||
398 | sizeof(struct ecryptfs_sb_info)); | ||
399 | sb->s_op = &ecryptfs_sops; | ||
400 | /* Released through deactivate_super(sb) from get_sb_nodev */ | ||
401 | sb->s_root = d_alloc(NULL, &(const struct qstr) { | ||
402 | .hash = 0,.name = "/",.len = 1}); | ||
403 | if (!sb->s_root) { | ||
404 | ecryptfs_printk(KERN_ERR, "d_alloc failed\n"); | ||
405 | rc = -ENOMEM; | ||
406 | goto out; | ||
407 | } | ||
408 | sb->s_root->d_op = &ecryptfs_dops; | ||
409 | sb->s_root->d_sb = sb; | ||
410 | sb->s_root->d_parent = sb->s_root; | ||
411 | /* Released in d_release when dput(sb->s_root) is called */ | ||
412 | /* through deactivate_super(sb) from get_sb_nodev() */ | ||
413 | ecryptfs_set_dentry_private(sb->s_root, | ||
414 | kmem_cache_alloc(ecryptfs_dentry_info_cache, | ||
415 | SLAB_KERNEL)); | ||
416 | if (!ecryptfs_dentry_to_private(sb->s_root)) { | ||
417 | ecryptfs_printk(KERN_ERR, | ||
418 | "dentry_info_cache alloc failed\n"); | ||
419 | rc = -ENOMEM; | ||
420 | goto out; | ||
421 | } | ||
422 | memset(ecryptfs_dentry_to_private(sb->s_root), 0, | ||
423 | sizeof(struct ecryptfs_dentry_info)); | ||
424 | rc = 0; | ||
425 | out: | ||
426 | /* Should be able to rely on deactivate_super called from | ||
427 | * get_sb_nodev */ | ||
428 | return rc; | ||
429 | } | ||
430 | |||
431 | /** | ||
432 | * ecryptfs_read_super | ||
433 | * @sb: The ecryptfs super block | ||
434 | * @dev_name: The path to mount over | ||
435 | * | ||
436 | * Read the super block of the lower filesystem, and use | ||
437 | * ecryptfs_interpose to create our initial inode and super block | ||
438 | * struct. | ||
439 | */ | ||
440 | static int ecryptfs_read_super(struct super_block *sb, const char *dev_name) | ||
441 | { | ||
442 | int rc; | ||
443 | struct nameidata nd; | ||
444 | struct dentry *lower_root; | ||
445 | struct vfsmount *lower_mnt; | ||
446 | |||
447 | memset(&nd, 0, sizeof(struct nameidata)); | ||
448 | rc = path_lookup(dev_name, LOOKUP_FOLLOW, &nd); | ||
449 | if (rc) { | ||
450 | ecryptfs_printk(KERN_WARNING, "path_lookup() failed\n"); | ||
451 | goto out_free; | ||
452 | } | ||
453 | lower_root = nd.dentry; | ||
454 | if (!lower_root->d_inode) { | ||
455 | ecryptfs_printk(KERN_WARNING, | ||
456 | "No directory to interpose on\n"); | ||
457 | rc = -ENOENT; | ||
458 | goto out_free; | ||
459 | } | ||
460 | lower_mnt = nd.mnt; | ||
461 | ecryptfs_set_superblock_lower(sb, lower_root->d_sb); | ||
462 | sb->s_maxbytes = lower_root->d_sb->s_maxbytes; | ||
463 | ecryptfs_set_dentry_lower(sb->s_root, lower_root); | ||
464 | ecryptfs_set_dentry_lower_mnt(sb->s_root, lower_mnt); | ||
465 | if ((rc = ecryptfs_interpose(lower_root, sb->s_root, sb, 0))) | ||
466 | goto out_free; | ||
467 | rc = 0; | ||
468 | goto out; | ||
469 | out_free: | ||
470 | path_release(&nd); | ||
471 | out: | ||
472 | return rc; | ||
473 | } | ||
474 | |||
475 | /** | ||
476 | * ecryptfs_get_sb | ||
477 | * @fs_type | ||
478 | * @flags | ||
479 | * @dev_name: The path to mount over | ||
480 | * @raw_data: The options passed into the kernel | ||
481 | * | ||
482 | * The whole ecryptfs_get_sb process is broken into 4 functions: | ||
483 | * ecryptfs_parse_options(): handle options passed to ecryptfs, if any | ||
484 | * ecryptfs_fill_super(): used by get_sb_nodev, fills out the super_block | ||
485 | * with as much information as it can before needing | ||
486 | * the lower filesystem. | ||
487 | * ecryptfs_read_super(): this accesses the lower filesystem and uses | ||
488 | * ecryptfs_interpolate to perform most of the linking | ||
489 | * ecryptfs_interpolate(): links the lower filesystem into ecryptfs | ||
490 | */ | ||
491 | static int ecryptfs_get_sb(struct file_system_type *fs_type, int flags, | ||
492 | const char *dev_name, void *raw_data, | ||
493 | struct vfsmount *mnt) | ||
494 | { | ||
495 | int rc; | ||
496 | struct super_block *sb; | ||
497 | |||
498 | rc = get_sb_nodev(fs_type, flags, raw_data, ecryptfs_fill_super, mnt); | ||
499 | if (rc < 0) { | ||
500 | printk(KERN_ERR "Getting sb failed; rc = [%d]\n", rc); | ||
501 | goto out; | ||
502 | } | ||
503 | sb = mnt->mnt_sb; | ||
504 | rc = ecryptfs_parse_options(sb, raw_data); | ||
505 | if (rc) { | ||
506 | printk(KERN_ERR "Error parsing options; rc = [%d]\n", rc); | ||
507 | goto out_abort; | ||
508 | } | ||
509 | rc = ecryptfs_read_super(sb, dev_name); | ||
510 | if (rc) { | ||
511 | printk(KERN_ERR "Reading sb failed; rc = [%d]\n", rc); | ||
512 | goto out_abort; | ||
513 | } | ||
514 | goto out; | ||
515 | out_abort: | ||
516 | dput(sb->s_root); | ||
517 | up_write(&sb->s_umount); | ||
518 | deactivate_super(sb); | ||
519 | out: | ||
520 | return rc; | ||
521 | } | ||
522 | |||
523 | /** | ||
524 | * ecryptfs_kill_block_super | ||
525 | * @sb: The ecryptfs super block | ||
526 | * | ||
527 | * Used to bring the superblock down and free the private data. | ||
528 | * Private data is free'd in ecryptfs_put_super() | ||
529 | */ | ||
530 | static void ecryptfs_kill_block_super(struct super_block *sb) | ||
531 | { | ||
532 | generic_shutdown_super(sb); | ||
533 | } | ||
534 | |||
535 | static struct file_system_type ecryptfs_fs_type = { | ||
536 | .owner = THIS_MODULE, | ||
537 | .name = "ecryptfs", | ||
538 | .get_sb = ecryptfs_get_sb, | ||
539 | .kill_sb = ecryptfs_kill_block_super, | ||
540 | .fs_flags = 0 | ||
541 | }; | ||
542 | |||
543 | /** | ||
544 | * inode_info_init_once | ||
545 | * | ||
546 | * Initializes the ecryptfs_inode_info_cache when it is created | ||
547 | */ | ||
548 | static void | ||
549 | inode_info_init_once(void *vptr, struct kmem_cache *cachep, unsigned long flags) | ||
550 | { | ||
551 | struct ecryptfs_inode_info *ei = (struct ecryptfs_inode_info *)vptr; | ||
552 | |||
553 | if ((flags & (SLAB_CTOR_VERIFY | SLAB_CTOR_CONSTRUCTOR)) == | ||
554 | SLAB_CTOR_CONSTRUCTOR) | ||
555 | inode_init_once(&ei->vfs_inode); | ||
556 | } | ||
557 | |||
558 | static struct ecryptfs_cache_info { | ||
559 | kmem_cache_t **cache; | ||
560 | const char *name; | ||
561 | size_t size; | ||
562 | void (*ctor)(void*, struct kmem_cache *, unsigned long); | ||
563 | } ecryptfs_cache_infos[] = { | ||
564 | { | ||
565 | .cache = &ecryptfs_auth_tok_list_item_cache, | ||
566 | .name = "ecryptfs_auth_tok_list_item", | ||
567 | .size = sizeof(struct ecryptfs_auth_tok_list_item), | ||
568 | }, | ||
569 | { | ||
570 | .cache = &ecryptfs_file_info_cache, | ||
571 | .name = "ecryptfs_file_cache", | ||
572 | .size = sizeof(struct ecryptfs_file_info), | ||
573 | }, | ||
574 | { | ||
575 | .cache = &ecryptfs_dentry_info_cache, | ||
576 | .name = "ecryptfs_dentry_info_cache", | ||
577 | .size = sizeof(struct ecryptfs_dentry_info), | ||
578 | }, | ||
579 | { | ||
580 | .cache = &ecryptfs_inode_info_cache, | ||
581 | .name = "ecryptfs_inode_cache", | ||
582 | .size = sizeof(struct ecryptfs_inode_info), | ||
583 | .ctor = inode_info_init_once, | ||
584 | }, | ||
585 | { | ||
586 | .cache = &ecryptfs_sb_info_cache, | ||
587 | .name = "ecryptfs_sb_cache", | ||
588 | .size = sizeof(struct ecryptfs_sb_info), | ||
589 | }, | ||
590 | { | ||
591 | .cache = &ecryptfs_header_cache_0, | ||
592 | .name = "ecryptfs_headers_0", | ||
593 | .size = PAGE_CACHE_SIZE, | ||
594 | }, | ||
595 | { | ||
596 | .cache = &ecryptfs_header_cache_1, | ||
597 | .name = "ecryptfs_headers_1", | ||
598 | .size = PAGE_CACHE_SIZE, | ||
599 | }, | ||
600 | { | ||
601 | .cache = &ecryptfs_header_cache_2, | ||
602 | .name = "ecryptfs_headers_2", | ||
603 | .size = PAGE_CACHE_SIZE, | ||
604 | }, | ||
605 | { | ||
606 | .cache = &ecryptfs_lower_page_cache, | ||
607 | .name = "ecryptfs_lower_page_cache", | ||
608 | .size = PAGE_CACHE_SIZE, | ||
609 | }, | ||
610 | }; | ||
611 | |||
612 | static void ecryptfs_free_kmem_caches(void) | ||
613 | { | ||
614 | int i; | ||
615 | |||
616 | for (i = 0; i < ARRAY_SIZE(ecryptfs_cache_infos); i++) { | ||
617 | struct ecryptfs_cache_info *info; | ||
618 | |||
619 | info = &ecryptfs_cache_infos[i]; | ||
620 | if (*(info->cache)) | ||
621 | kmem_cache_destroy(*(info->cache)); | ||
622 | } | ||
623 | } | ||
624 | |||
625 | /** | ||
626 | * ecryptfs_init_kmem_caches | ||
627 | * | ||
628 | * Returns zero on success; non-zero otherwise | ||
629 | */ | ||
630 | static int ecryptfs_init_kmem_caches(void) | ||
631 | { | ||
632 | int i; | ||
633 | |||
634 | for (i = 0; i < ARRAY_SIZE(ecryptfs_cache_infos); i++) { | ||
635 | struct ecryptfs_cache_info *info; | ||
636 | |||
637 | info = &ecryptfs_cache_infos[i]; | ||
638 | *(info->cache) = kmem_cache_create(info->name, info->size, | ||
639 | 0, SLAB_HWCACHE_ALIGN, info->ctor, NULL); | ||
640 | if (!*(info->cache)) { | ||
641 | ecryptfs_free_kmem_caches(); | ||
642 | ecryptfs_printk(KERN_WARNING, "%s: " | ||
643 | "kmem_cache_create failed\n", | ||
644 | info->name); | ||
645 | return -ENOMEM; | ||
646 | } | ||
647 | } | ||
648 | return 0; | ||
649 | } | ||
650 | |||
651 | struct ecryptfs_obj { | ||
652 | char *name; | ||
653 | struct list_head slot_list; | ||
654 | struct kobject kobj; | ||
655 | }; | ||
656 | |||
657 | struct ecryptfs_attribute { | ||
658 | struct attribute attr; | ||
659 | ssize_t(*show) (struct ecryptfs_obj *, char *); | ||
660 | ssize_t(*store) (struct ecryptfs_obj *, const char *, size_t); | ||
661 | }; | ||
662 | |||
663 | static ssize_t | ||
664 | ecryptfs_attr_store(struct kobject *kobj, | ||
665 | struct attribute *attr, const char *buf, size_t len) | ||
666 | { | ||
667 | struct ecryptfs_obj *obj = container_of(kobj, struct ecryptfs_obj, | ||
668 | kobj); | ||
669 | struct ecryptfs_attribute *attribute = | ||
670 | container_of(attr, struct ecryptfs_attribute, attr); | ||
671 | |||
672 | return (attribute->store ? attribute->store(obj, buf, len) : 0); | ||
673 | } | ||
674 | |||
675 | static ssize_t | ||
676 | ecryptfs_attr_show(struct kobject *kobj, struct attribute *attr, char *buf) | ||
677 | { | ||
678 | struct ecryptfs_obj *obj = container_of(kobj, struct ecryptfs_obj, | ||
679 | kobj); | ||
680 | struct ecryptfs_attribute *attribute = | ||
681 | container_of(attr, struct ecryptfs_attribute, attr); | ||
682 | |||
683 | return (attribute->show ? attribute->show(obj, buf) : 0); | ||
684 | } | ||
685 | |||
686 | static struct sysfs_ops ecryptfs_sysfs_ops = { | ||
687 | .show = ecryptfs_attr_show, | ||
688 | .store = ecryptfs_attr_store | ||
689 | }; | ||
690 | |||
691 | static struct kobj_type ecryptfs_ktype = { | ||
692 | .sysfs_ops = &ecryptfs_sysfs_ops | ||
693 | }; | ||
694 | |||
695 | static decl_subsys(ecryptfs, &ecryptfs_ktype, NULL); | ||
696 | |||
697 | static ssize_t version_show(struct ecryptfs_obj *obj, char *buff) | ||
698 | { | ||
699 | return snprintf(buff, PAGE_SIZE, "%d\n", ECRYPTFS_VERSIONING_MASK); | ||
700 | } | ||
701 | |||
702 | static struct ecryptfs_attribute sysfs_attr_version = __ATTR_RO(version); | ||
703 | |||
704 | struct ecryptfs_version_str_map_elem { | ||
705 | u32 flag; | ||
706 | char *str; | ||
707 | } ecryptfs_version_str_map[] = { | ||
708 | {ECRYPTFS_VERSIONING_PASSPHRASE, "passphrase"}, | ||
709 | {ECRYPTFS_VERSIONING_PUBKEY, "pubkey"}, | ||
710 | {ECRYPTFS_VERSIONING_PLAINTEXT_PASSTHROUGH, "plaintext passthrough"}, | ||
711 | {ECRYPTFS_VERSIONING_POLICY, "policy"} | ||
712 | }; | ||
713 | |||
714 | static ssize_t version_str_show(struct ecryptfs_obj *obj, char *buff) | ||
715 | { | ||
716 | int i; | ||
717 | int remaining = PAGE_SIZE; | ||
718 | int total_written = 0; | ||
719 | |||
720 | buff[0] = '\0'; | ||
721 | for (i = 0; i < ARRAY_SIZE(ecryptfs_version_str_map); i++) { | ||
722 | int entry_size; | ||
723 | |||
724 | if (!(ECRYPTFS_VERSIONING_MASK | ||
725 | & ecryptfs_version_str_map[i].flag)) | ||
726 | continue; | ||
727 | entry_size = strlen(ecryptfs_version_str_map[i].str); | ||
728 | if ((entry_size + 2) > remaining) | ||
729 | goto out; | ||
730 | memcpy(buff, ecryptfs_version_str_map[i].str, entry_size); | ||
731 | buff[entry_size++] = '\n'; | ||
732 | buff[entry_size] = '\0'; | ||
733 | buff += entry_size; | ||
734 | total_written += entry_size; | ||
735 | remaining -= entry_size; | ||
736 | } | ||
737 | out: | ||
738 | return total_written; | ||
739 | } | ||
740 | |||
741 | static struct ecryptfs_attribute sysfs_attr_version_str = __ATTR_RO(version_str); | ||
742 | |||
743 | static int do_sysfs_registration(void) | ||
744 | { | ||
745 | int rc; | ||
746 | |||
747 | if ((rc = subsystem_register(&ecryptfs_subsys))) { | ||
748 | printk(KERN_ERR | ||
749 | "Unable to register ecryptfs sysfs subsystem\n"); | ||
750 | goto out; | ||
751 | } | ||
752 | rc = sysfs_create_file(&ecryptfs_subsys.kset.kobj, | ||
753 | &sysfs_attr_version.attr); | ||
754 | if (rc) { | ||
755 | printk(KERN_ERR | ||
756 | "Unable to create ecryptfs version attribute\n"); | ||
757 | subsystem_unregister(&ecryptfs_subsys); | ||
758 | goto out; | ||
759 | } | ||
760 | rc = sysfs_create_file(&ecryptfs_subsys.kset.kobj, | ||
761 | &sysfs_attr_version_str.attr); | ||
762 | if (rc) { | ||
763 | printk(KERN_ERR | ||
764 | "Unable to create ecryptfs version_str attribute\n"); | ||
765 | sysfs_remove_file(&ecryptfs_subsys.kset.kobj, | ||
766 | &sysfs_attr_version.attr); | ||
767 | subsystem_unregister(&ecryptfs_subsys); | ||
768 | goto out; | ||
769 | } | ||
770 | out: | ||
771 | return rc; | ||
772 | } | ||
773 | |||
774 | static int __init ecryptfs_init(void) | ||
775 | { | ||
776 | int rc; | ||
777 | |||
778 | if (ECRYPTFS_DEFAULT_EXTENT_SIZE > PAGE_CACHE_SIZE) { | ||
779 | rc = -EINVAL; | ||
780 | ecryptfs_printk(KERN_ERR, "The eCryptfs extent size is " | ||
781 | "larger than the host's page size, and so " | ||
782 | "eCryptfs cannot run on this system. The " | ||
783 | "default eCryptfs extent size is [%d] bytes; " | ||
784 | "the page size is [%d] bytes.\n", | ||
785 | ECRYPTFS_DEFAULT_EXTENT_SIZE, PAGE_CACHE_SIZE); | ||
786 | goto out; | ||
787 | } | ||
788 | rc = ecryptfs_init_kmem_caches(); | ||
789 | if (rc) { | ||
790 | printk(KERN_ERR | ||
791 | "Failed to allocate one or more kmem_cache objects\n"); | ||
792 | goto out; | ||
793 | } | ||
794 | rc = register_filesystem(&ecryptfs_fs_type); | ||
795 | if (rc) { | ||
796 | printk(KERN_ERR "Failed to register filesystem\n"); | ||
797 | ecryptfs_free_kmem_caches(); | ||
798 | goto out; | ||
799 | } | ||
800 | kset_set_kset_s(&ecryptfs_subsys, fs_subsys); | ||
801 | sysfs_attr_version.attr.owner = THIS_MODULE; | ||
802 | sysfs_attr_version_str.attr.owner = THIS_MODULE; | ||
803 | rc = do_sysfs_registration(); | ||
804 | if (rc) { | ||
805 | printk(KERN_ERR "sysfs registration failed\n"); | ||
806 | unregister_filesystem(&ecryptfs_fs_type); | ||
807 | ecryptfs_free_kmem_caches(); | ||
808 | goto out; | ||
809 | } | ||
810 | out: | ||
811 | return rc; | ||
812 | } | ||
813 | |||
814 | static void __exit ecryptfs_exit(void) | ||
815 | { | ||
816 | sysfs_remove_file(&ecryptfs_subsys.kset.kobj, | ||
817 | &sysfs_attr_version.attr); | ||
818 | sysfs_remove_file(&ecryptfs_subsys.kset.kobj, | ||
819 | &sysfs_attr_version_str.attr); | ||
820 | subsystem_unregister(&ecryptfs_subsys); | ||
821 | unregister_filesystem(&ecryptfs_fs_type); | ||
822 | ecryptfs_free_kmem_caches(); | ||
823 | } | ||
824 | |||
825 | MODULE_AUTHOR("Michael A. Halcrow <mhalcrow@us.ibm.com>"); | ||
826 | MODULE_DESCRIPTION("eCryptfs"); | ||
827 | |||
828 | MODULE_LICENSE("GPL"); | ||
829 | |||
830 | module_init(ecryptfs_init) | ||
831 | module_exit(ecryptfs_exit) | ||
diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c new file mode 100644 index 000000000000..924dd90a4cf5 --- /dev/null +++ b/fs/ecryptfs/mmap.c | |||
@@ -0,0 +1,788 @@ | |||
1 | /** | ||
2 | * eCryptfs: Linux filesystem encryption layer | ||
3 | * This is where eCryptfs coordinates the symmetric encryption and | ||
4 | * decryption of the file data as it passes between the lower | ||
5 | * encrypted file and the upper decrypted file. | ||
6 | * | ||
7 | * Copyright (C) 1997-2003 Erez Zadok | ||
8 | * Copyright (C) 2001-2003 Stony Brook University | ||
9 | * Copyright (C) 2004-2006 International Business Machines Corp. | ||
10 | * Author(s): Michael A. Halcrow <mahalcro@us.ibm.com> | ||
11 | * | ||
12 | * This program is free software; you can redistribute it and/or | ||
13 | * modify it under the terms of the GNU General Public License as | ||
14 | * published by the Free Software Foundation; either version 2 of the | ||
15 | * License, or (at your option) any later version. | ||
16 | * | ||
17 | * This program is distributed in the hope that it will be useful, but | ||
18 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
19 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
20 | * General Public License for more details. | ||
21 | * | ||
22 | * You should have received a copy of the GNU General Public License | ||
23 | * along with this program; if not, write to the Free Software | ||
24 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA | ||
25 | * 02111-1307, USA. | ||
26 | */ | ||
27 | |||
28 | #include <linux/pagemap.h> | ||
29 | #include <linux/writeback.h> | ||
30 | #include <linux/page-flags.h> | ||
31 | #include <linux/mount.h> | ||
32 | #include <linux/file.h> | ||
33 | #include <linux/crypto.h> | ||
34 | #include <linux/scatterlist.h> | ||
35 | #include "ecryptfs_kernel.h" | ||
36 | |||
37 | struct kmem_cache *ecryptfs_lower_page_cache; | ||
38 | |||
39 | /** | ||
40 | * ecryptfs_get1page | ||
41 | * | ||
42 | * Get one page from cache or lower f/s, return error otherwise. | ||
43 | * | ||
44 | * Returns unlocked and up-to-date page (if ok), with increased | ||
45 | * refcnt. | ||
46 | */ | ||
47 | static struct page *ecryptfs_get1page(struct file *file, int index) | ||
48 | { | ||
49 | struct page *page; | ||
50 | struct dentry *dentry; | ||
51 | struct inode *inode; | ||
52 | struct address_space *mapping; | ||
53 | |||
54 | dentry = file->f_dentry; | ||
55 | inode = dentry->d_inode; | ||
56 | mapping = inode->i_mapping; | ||
57 | page = read_cache_page(mapping, index, | ||
58 | (filler_t *)mapping->a_ops->readpage, | ||
59 | (void *)file); | ||
60 | if (IS_ERR(page)) | ||
61 | goto out; | ||
62 | wait_on_page_locked(page); | ||
63 | out: | ||
64 | return page; | ||
65 | } | ||
66 | |||
67 | static | ||
68 | int write_zeros(struct file *file, pgoff_t index, int start, int num_zeros); | ||
69 | |||
70 | /** | ||
71 | * ecryptfs_fill_zeros | ||
72 | * @file: The ecryptfs file | ||
73 | * @new_length: The new length of the data in the underlying file; | ||
74 | * everything between the prior end of the file and the | ||
75 | * new end of the file will be filled with zero's. | ||
76 | * new_length must be greater than current length | ||
77 | * | ||
78 | * Function for handling lseek-ing past the end of the file. | ||
79 | * | ||
80 | * This function does not support shrinking, only growing a file. | ||
81 | * | ||
82 | * Returns zero on success; non-zero otherwise. | ||
83 | */ | ||
84 | int ecryptfs_fill_zeros(struct file *file, loff_t new_length) | ||
85 | { | ||
86 | int rc = 0; | ||
87 | struct dentry *dentry = file->f_dentry; | ||
88 | struct inode *inode = dentry->d_inode; | ||
89 | pgoff_t old_end_page_index = 0; | ||
90 | pgoff_t index = old_end_page_index; | ||
91 | int old_end_pos_in_page = -1; | ||
92 | pgoff_t new_end_page_index; | ||
93 | int new_end_pos_in_page; | ||
94 | loff_t cur_length = i_size_read(inode); | ||
95 | |||
96 | if (cur_length != 0) { | ||
97 | index = old_end_page_index = | ||
98 | ((cur_length - 1) >> PAGE_CACHE_SHIFT); | ||
99 | old_end_pos_in_page = ((cur_length - 1) & ~PAGE_CACHE_MASK); | ||
100 | } | ||
101 | new_end_page_index = ((new_length - 1) >> PAGE_CACHE_SHIFT); | ||
102 | new_end_pos_in_page = ((new_length - 1) & ~PAGE_CACHE_MASK); | ||
103 | ecryptfs_printk(KERN_DEBUG, "old_end_page_index = [0x%.16x]; " | ||
104 | "old_end_pos_in_page = [%d]; " | ||
105 | "new_end_page_index = [0x%.16x]; " | ||
106 | "new_end_pos_in_page = [%d]\n", | ||
107 | old_end_page_index, old_end_pos_in_page, | ||
108 | new_end_page_index, new_end_pos_in_page); | ||
109 | if (old_end_page_index == new_end_page_index) { | ||
110 | /* Start and end are in the same page; we just need to | ||
111 | * set a portion of the existing page to zero's */ | ||
112 | rc = write_zeros(file, index, (old_end_pos_in_page + 1), | ||
113 | (new_end_pos_in_page - old_end_pos_in_page)); | ||
114 | if (rc) | ||
115 | ecryptfs_printk(KERN_ERR, "write_zeros(file=[%p], " | ||
116 | "index=[0x%.16x], " | ||
117 | "old_end_pos_in_page=[d], " | ||
118 | "(PAGE_CACHE_SIZE - new_end_pos_in_page" | ||
119 | "=[%d]" | ||
120 | ")=[d]) returned [%d]\n", file, index, | ||
121 | old_end_pos_in_page, | ||
122 | new_end_pos_in_page, | ||
123 | (PAGE_CACHE_SIZE - new_end_pos_in_page), | ||
124 | rc); | ||
125 | goto out; | ||
126 | } | ||
127 | /* Fill the remainder of the previous last page with zeros */ | ||
128 | rc = write_zeros(file, index, (old_end_pos_in_page + 1), | ||
129 | ((PAGE_CACHE_SIZE - 1) - old_end_pos_in_page)); | ||
130 | if (rc) { | ||
131 | ecryptfs_printk(KERN_ERR, "write_zeros(file=[%p], " | ||
132 | "index=[0x%.16x], old_end_pos_in_page=[d], " | ||
133 | "(PAGE_CACHE_SIZE - old_end_pos_in_page)=[d]) " | ||
134 | "returned [%d]\n", file, index, | ||
135 | old_end_pos_in_page, | ||
136 | (PAGE_CACHE_SIZE - old_end_pos_in_page), rc); | ||
137 | goto out; | ||
138 | } | ||
139 | index++; | ||
140 | while (index < new_end_page_index) { | ||
141 | /* Fill all intermediate pages with zeros */ | ||
142 | rc = write_zeros(file, index, 0, PAGE_CACHE_SIZE); | ||
143 | if (rc) { | ||
144 | ecryptfs_printk(KERN_ERR, "write_zeros(file=[%p], " | ||
145 | "index=[0x%.16x], " | ||
146 | "old_end_pos_in_page=[d], " | ||
147 | "(PAGE_CACHE_SIZE - new_end_pos_in_page" | ||
148 | "=[%d]" | ||
149 | ")=[d]) returned [%d]\n", file, index, | ||
150 | old_end_pos_in_page, | ||
151 | new_end_pos_in_page, | ||
152 | (PAGE_CACHE_SIZE - new_end_pos_in_page), | ||
153 | rc); | ||
154 | goto out; | ||
155 | } | ||
156 | index++; | ||
157 | } | ||
158 | /* Fill the portion at the beginning of the last new page with | ||
159 | * zero's */ | ||
160 | rc = write_zeros(file, index, 0, (new_end_pos_in_page + 1)); | ||
161 | if (rc) { | ||
162 | ecryptfs_printk(KERN_ERR, "write_zeros(file=" | ||
163 | "[%p], index=[0x%.16x], 0, " | ||
164 | "new_end_pos_in_page=[%d]" | ||
165 | "returned [%d]\n", file, index, | ||
166 | new_end_pos_in_page, rc); | ||
167 | goto out; | ||
168 | } | ||
169 | out: | ||
170 | return rc; | ||
171 | } | ||
172 | |||
173 | /** | ||
174 | * ecryptfs_writepage | ||
175 | * @page: Page that is locked before this call is made | ||
176 | * | ||
177 | * Returns zero on success; non-zero otherwise | ||
178 | */ | ||
179 | static int ecryptfs_writepage(struct page *page, struct writeback_control *wbc) | ||
180 | { | ||
181 | struct ecryptfs_page_crypt_context ctx; | ||
182 | int rc; | ||
183 | |||
184 | ctx.page = page; | ||
185 | ctx.mode = ECRYPTFS_WRITEPAGE_MODE; | ||
186 | ctx.param.wbc = wbc; | ||
187 | rc = ecryptfs_encrypt_page(&ctx); | ||
188 | if (rc) { | ||
189 | ecryptfs_printk(KERN_WARNING, "Error encrypting " | ||
190 | "page (upper index [0x%.16x])\n", page->index); | ||
191 | ClearPageUptodate(page); | ||
192 | goto out; | ||
193 | } | ||
194 | SetPageUptodate(page); | ||
195 | unlock_page(page); | ||
196 | out: | ||
197 | return rc; | ||
198 | } | ||
199 | |||
200 | /** | ||
201 | * Reads the data from the lower file file at index lower_page_index | ||
202 | * and copies that data into page. | ||
203 | * | ||
204 | * @param page Page to fill | ||
205 | * @param lower_page_index Index of the page in the lower file to get | ||
206 | */ | ||
207 | int ecryptfs_do_readpage(struct file *file, struct page *page, | ||
208 | pgoff_t lower_page_index) | ||
209 | { | ||
210 | int rc; | ||
211 | struct dentry *dentry; | ||
212 | struct file *lower_file; | ||
213 | struct dentry *lower_dentry; | ||
214 | struct inode *inode; | ||
215 | struct inode *lower_inode; | ||
216 | char *page_data; | ||
217 | struct page *lower_page = NULL; | ||
218 | char *lower_page_data; | ||
219 | const struct address_space_operations *lower_a_ops; | ||
220 | |||
221 | dentry = file->f_dentry; | ||
222 | lower_file = ecryptfs_file_to_lower(file); | ||
223 | lower_dentry = ecryptfs_dentry_to_lower(dentry); | ||
224 | inode = dentry->d_inode; | ||
225 | lower_inode = ecryptfs_inode_to_lower(inode); | ||
226 | lower_a_ops = lower_inode->i_mapping->a_ops; | ||
227 | lower_page = read_cache_page(lower_inode->i_mapping, lower_page_index, | ||
228 | (filler_t *)lower_a_ops->readpage, | ||
229 | (void *)lower_file); | ||
230 | if (IS_ERR(lower_page)) { | ||
231 | rc = PTR_ERR(lower_page); | ||
232 | lower_page = NULL; | ||
233 | ecryptfs_printk(KERN_ERR, "Error reading from page cache\n"); | ||
234 | goto out; | ||
235 | } | ||
236 | wait_on_page_locked(lower_page); | ||
237 | page_data = (char *)kmap(page); | ||
238 | if (!page_data) { | ||
239 | rc = -ENOMEM; | ||
240 | ecryptfs_printk(KERN_ERR, "Error mapping page\n"); | ||
241 | goto out; | ||
242 | } | ||
243 | lower_page_data = (char *)kmap(lower_page); | ||
244 | if (!lower_page_data) { | ||
245 | rc = -ENOMEM; | ||
246 | ecryptfs_printk(KERN_ERR, "Error mapping page\n"); | ||
247 | kunmap(page); | ||
248 | goto out; | ||
249 | } | ||
250 | memcpy(page_data, lower_page_data, PAGE_CACHE_SIZE); | ||
251 | kunmap(lower_page); | ||
252 | kunmap(page); | ||
253 | rc = 0; | ||
254 | out: | ||
255 | if (likely(lower_page)) | ||
256 | page_cache_release(lower_page); | ||
257 | if (rc == 0) | ||
258 | SetPageUptodate(page); | ||
259 | else | ||
260 | ClearPageUptodate(page); | ||
261 | return rc; | ||
262 | } | ||
263 | |||
264 | /** | ||
265 | * ecryptfs_readpage | ||
266 | * @file: This is an ecryptfs file | ||
267 | * @page: ecryptfs associated page to stick the read data into | ||
268 | * | ||
269 | * Read in a page, decrypting if necessary. | ||
270 | * | ||
271 | * Returns zero on success; non-zero on error. | ||
272 | */ | ||
273 | static int ecryptfs_readpage(struct file *file, struct page *page) | ||
274 | { | ||
275 | int rc = 0; | ||
276 | struct ecryptfs_crypt_stat *crypt_stat; | ||
277 | |||
278 | BUG_ON(!(file && file->f_dentry && file->f_dentry->d_inode)); | ||
279 | crypt_stat = | ||
280 | &ecryptfs_inode_to_private(file->f_dentry->d_inode)->crypt_stat; | ||
281 | if (!crypt_stat | ||
282 | || !ECRYPTFS_CHECK_FLAG(crypt_stat->flags, ECRYPTFS_ENCRYPTED) | ||
283 | || ECRYPTFS_CHECK_FLAG(crypt_stat->flags, ECRYPTFS_NEW_FILE)) { | ||
284 | ecryptfs_printk(KERN_DEBUG, | ||
285 | "Passing through unencrypted page\n"); | ||
286 | rc = ecryptfs_do_readpage(file, page, page->index); | ||
287 | if (rc) { | ||
288 | ecryptfs_printk(KERN_ERR, "Error reading page; rc = " | ||
289 | "[%d]\n", rc); | ||
290 | goto out; | ||
291 | } | ||
292 | } else { | ||
293 | rc = ecryptfs_decrypt_page(file, page); | ||
294 | if (rc) { | ||
295 | |||
296 | ecryptfs_printk(KERN_ERR, "Error decrypting page; " | ||
297 | "rc = [%d]\n", rc); | ||
298 | goto out; | ||
299 | } | ||
300 | } | ||
301 | SetPageUptodate(page); | ||
302 | out: | ||
303 | if (rc) | ||
304 | ClearPageUptodate(page); | ||
305 | ecryptfs_printk(KERN_DEBUG, "Unlocking page with index = [0x%.16x]\n", | ||
306 | page->index); | ||
307 | unlock_page(page); | ||
308 | return rc; | ||
309 | } | ||
310 | |||
311 | static int fill_zeros_to_end_of_page(struct page *page, unsigned int to) | ||
312 | { | ||
313 | struct inode *inode = page->mapping->host; | ||
314 | int end_byte_in_page; | ||
315 | int rc = 0; | ||
316 | char *page_virt; | ||
317 | |||
318 | if ((i_size_read(inode) / PAGE_CACHE_SIZE) == page->index) { | ||
319 | end_byte_in_page = i_size_read(inode) % PAGE_CACHE_SIZE; | ||
320 | if (to > end_byte_in_page) | ||
321 | end_byte_in_page = to; | ||
322 | page_virt = kmap(page); | ||
323 | if (!page_virt) { | ||
324 | rc = -ENOMEM; | ||
325 | ecryptfs_printk(KERN_WARNING, | ||
326 | "Could not map page\n"); | ||
327 | goto out; | ||
328 | } | ||
329 | memset((page_virt + end_byte_in_page), 0, | ||
330 | (PAGE_CACHE_SIZE - end_byte_in_page)); | ||
331 | kunmap(page); | ||
332 | } | ||
333 | out: | ||
334 | return rc; | ||
335 | } | ||
336 | |||
337 | static int ecryptfs_prepare_write(struct file *file, struct page *page, | ||
338 | unsigned from, unsigned to) | ||
339 | { | ||
340 | int rc = 0; | ||
341 | |||
342 | kmap(page); | ||
343 | if (from == 0 && to == PAGE_CACHE_SIZE) | ||
344 | goto out; /* If we are writing a full page, it will be | ||
345 | up to date. */ | ||
346 | if (!PageUptodate(page)) | ||
347 | rc = ecryptfs_do_readpage(file, page, page->index); | ||
348 | out: | ||
349 | return rc; | ||
350 | } | ||
351 | |||
352 | int ecryptfs_grab_and_map_lower_page(struct page **lower_page, | ||
353 | char **lower_virt, | ||
354 | struct inode *lower_inode, | ||
355 | unsigned long lower_page_index) | ||
356 | { | ||
357 | int rc = 0; | ||
358 | |||
359 | (*lower_page) = grab_cache_page(lower_inode->i_mapping, | ||
360 | lower_page_index); | ||
361 | if (!(*lower_page)) { | ||
362 | ecryptfs_printk(KERN_ERR, "grab_cache_page for " | ||
363 | "lower_page_index = [0x%.16x] failed\n", | ||
364 | lower_page_index); | ||
365 | rc = -EINVAL; | ||
366 | goto out; | ||
367 | } | ||
368 | if (lower_virt) | ||
369 | (*lower_virt) = kmap((*lower_page)); | ||
370 | else | ||
371 | kmap((*lower_page)); | ||
372 | out: | ||
373 | return rc; | ||
374 | } | ||
375 | |||
376 | int ecryptfs_writepage_and_release_lower_page(struct page *lower_page, | ||
377 | struct inode *lower_inode, | ||
378 | struct writeback_control *wbc) | ||
379 | { | ||
380 | int rc = 0; | ||
381 | |||
382 | rc = lower_inode->i_mapping->a_ops->writepage(lower_page, wbc); | ||
383 | if (rc) { | ||
384 | ecryptfs_printk(KERN_ERR, "Error calling lower writepage(); " | ||
385 | "rc = [%d]\n", rc); | ||
386 | goto out; | ||
387 | } | ||
388 | lower_inode->i_mtime = lower_inode->i_ctime = CURRENT_TIME; | ||
389 | page_cache_release(lower_page); | ||
390 | out: | ||
391 | return rc; | ||
392 | } | ||
393 | |||
394 | static void ecryptfs_unmap_and_release_lower_page(struct page *lower_page) | ||
395 | { | ||
396 | kunmap(lower_page); | ||
397 | ecryptfs_printk(KERN_DEBUG, "Unlocking lower page with index = " | ||
398 | "[0x%.16x]\n", lower_page->index); | ||
399 | unlock_page(lower_page); | ||
400 | page_cache_release(lower_page); | ||
401 | } | ||
402 | |||
403 | /** | ||
404 | * ecryptfs_write_inode_size_to_header | ||
405 | * | ||
406 | * Writes the lower file size to the first 8 bytes of the header. | ||
407 | * | ||
408 | * Returns zero on success; non-zero on error. | ||
409 | */ | ||
410 | int | ||
411 | ecryptfs_write_inode_size_to_header(struct file *lower_file, | ||
412 | struct inode *lower_inode, | ||
413 | struct inode *inode) | ||
414 | { | ||
415 | int rc = 0; | ||
416 | struct page *header_page; | ||
417 | char *header_virt; | ||
418 | const struct address_space_operations *lower_a_ops; | ||
419 | u64 file_size; | ||
420 | |||
421 | rc = ecryptfs_grab_and_map_lower_page(&header_page, &header_virt, | ||
422 | lower_inode, 0); | ||
423 | if (rc) { | ||
424 | ecryptfs_printk(KERN_ERR, "grab_cache_page for header page " | ||
425 | "failed\n"); | ||
426 | goto out; | ||
427 | } | ||
428 | lower_a_ops = lower_inode->i_mapping->a_ops; | ||
429 | rc = lower_a_ops->prepare_write(lower_file, header_page, 0, 8); | ||
430 | file_size = (u64)i_size_read(inode); | ||
431 | ecryptfs_printk(KERN_DEBUG, "Writing size: [0x%.16x]\n", file_size); | ||
432 | file_size = cpu_to_be64(file_size); | ||
433 | memcpy(header_virt, &file_size, sizeof(u64)); | ||
434 | rc = lower_a_ops->commit_write(lower_file, header_page, 0, 8); | ||
435 | if (rc < 0) | ||
436 | ecryptfs_printk(KERN_ERR, "Error commiting header page " | ||
437 | "write\n"); | ||
438 | ecryptfs_unmap_and_release_lower_page(header_page); | ||
439 | lower_inode->i_mtime = lower_inode->i_ctime = CURRENT_TIME; | ||
440 | mark_inode_dirty_sync(inode); | ||
441 | out: | ||
442 | return rc; | ||
443 | } | ||
444 | |||
445 | int ecryptfs_get_lower_page(struct page **lower_page, struct inode *lower_inode, | ||
446 | struct file *lower_file, | ||
447 | unsigned long lower_page_index, int byte_offset, | ||
448 | int region_bytes) | ||
449 | { | ||
450 | int rc = 0; | ||
451 | |||
452 | rc = ecryptfs_grab_and_map_lower_page(lower_page, NULL, lower_inode, | ||
453 | lower_page_index); | ||
454 | if (rc) { | ||
455 | ecryptfs_printk(KERN_ERR, "Error attempting to grab and map " | ||
456 | "lower page with index [0x%.16x]\n", | ||
457 | lower_page_index); | ||
458 | goto out; | ||
459 | } | ||
460 | rc = lower_inode->i_mapping->a_ops->prepare_write(lower_file, | ||
461 | (*lower_page), | ||
462 | byte_offset, | ||
463 | region_bytes); | ||
464 | if (rc) { | ||
465 | ecryptfs_printk(KERN_ERR, "prepare_write for " | ||
466 | "lower_page_index = [0x%.16x] failed; rc = " | ||
467 | "[%d]\n", lower_page_index, rc); | ||
468 | } | ||
469 | out: | ||
470 | if (rc && (*lower_page)) { | ||
471 | ecryptfs_unmap_and_release_lower_page(*lower_page); | ||
472 | (*lower_page) = NULL; | ||
473 | } | ||
474 | return rc; | ||
475 | } | ||
476 | |||
477 | /** | ||
478 | * ecryptfs_commit_lower_page | ||
479 | * | ||
480 | * Returns zero on success; non-zero on error | ||
481 | */ | ||
482 | int | ||
483 | ecryptfs_commit_lower_page(struct page *lower_page, struct inode *lower_inode, | ||
484 | struct file *lower_file, int byte_offset, | ||
485 | int region_size) | ||
486 | { | ||
487 | int rc = 0; | ||
488 | |||
489 | rc = lower_inode->i_mapping->a_ops->commit_write( | ||
490 | lower_file, lower_page, byte_offset, region_size); | ||
491 | if (rc < 0) { | ||
492 | ecryptfs_printk(KERN_ERR, | ||
493 | "Error committing write; rc = [%d]\n", rc); | ||
494 | } else | ||
495 | rc = 0; | ||
496 | ecryptfs_unmap_and_release_lower_page(lower_page); | ||
497 | return rc; | ||
498 | } | ||
499 | |||
500 | /** | ||
501 | * ecryptfs_copy_page_to_lower | ||
502 | * | ||
503 | * Used for plaintext pass-through; no page index interpolation | ||
504 | * required. | ||
505 | */ | ||
506 | int ecryptfs_copy_page_to_lower(struct page *page, struct inode *lower_inode, | ||
507 | struct file *lower_file) | ||
508 | { | ||
509 | int rc = 0; | ||
510 | struct page *lower_page; | ||
511 | |||
512 | rc = ecryptfs_get_lower_page(&lower_page, lower_inode, lower_file, | ||
513 | page->index, 0, PAGE_CACHE_SIZE); | ||
514 | if (rc) { | ||
515 | ecryptfs_printk(KERN_ERR, "Error attempting to get page " | ||
516 | "at index [0x%.16x]\n", page->index); | ||
517 | goto out; | ||
518 | } | ||
519 | /* TODO: aops */ | ||
520 | memcpy((char *)page_address(lower_page), page_address(page), | ||
521 | PAGE_CACHE_SIZE); | ||
522 | rc = ecryptfs_commit_lower_page(lower_page, lower_inode, lower_file, | ||
523 | 0, PAGE_CACHE_SIZE); | ||
524 | if (rc) | ||
525 | ecryptfs_printk(KERN_ERR, "Error attempting to commit page " | ||
526 | "at index [0x%.16x]\n", page->index); | ||
527 | out: | ||
528 | return rc; | ||
529 | } | ||
530 | |||
531 | static int | ||
532 | process_new_file(struct ecryptfs_crypt_stat *crypt_stat, | ||
533 | struct file *file, struct inode *inode) | ||
534 | { | ||
535 | struct page *header_page; | ||
536 | const struct address_space_operations *lower_a_ops; | ||
537 | struct inode *lower_inode; | ||
538 | struct file *lower_file; | ||
539 | char *header_virt; | ||
540 | int rc = 0; | ||
541 | int current_header_page = 0; | ||
542 | int header_pages; | ||
543 | int more_header_data_to_be_written = 1; | ||
544 | |||
545 | lower_inode = ecryptfs_inode_to_lower(inode); | ||
546 | lower_file = ecryptfs_file_to_lower(file); | ||
547 | lower_a_ops = lower_inode->i_mapping->a_ops; | ||
548 | header_pages = ((crypt_stat->header_extent_size | ||
549 | * crypt_stat->num_header_extents_at_front) | ||
550 | / PAGE_CACHE_SIZE); | ||
551 | BUG_ON(header_pages < 1); | ||
552 | while (current_header_page < header_pages) { | ||
553 | rc = ecryptfs_grab_and_map_lower_page(&header_page, | ||
554 | &header_virt, | ||
555 | lower_inode, | ||
556 | current_header_page); | ||
557 | if (rc) { | ||
558 | ecryptfs_printk(KERN_ERR, "grab_cache_page for " | ||
559 | "header page [%d] failed; rc = [%d]\n", | ||
560 | current_header_page, rc); | ||
561 | goto out; | ||
562 | } | ||
563 | rc = lower_a_ops->prepare_write(lower_file, header_page, 0, | ||
564 | PAGE_CACHE_SIZE); | ||
565 | if (rc) { | ||
566 | ecryptfs_printk(KERN_ERR, "Error preparing to write " | ||
567 | "header page out; rc = [%d]\n", rc); | ||
568 | goto out; | ||
569 | } | ||
570 | memset(header_virt, 0, PAGE_CACHE_SIZE); | ||
571 | if (more_header_data_to_be_written) { | ||
572 | rc = ecryptfs_write_headers_virt(header_virt, | ||
573 | crypt_stat, | ||
574 | file->f_dentry); | ||
575 | if (rc) { | ||
576 | ecryptfs_printk(KERN_WARNING, "Error " | ||
577 | "generating header; rc = " | ||
578 | "[%d]\n", rc); | ||
579 | rc = -EIO; | ||
580 | memset(header_virt, 0, PAGE_CACHE_SIZE); | ||
581 | ecryptfs_unmap_and_release_lower_page( | ||
582 | header_page); | ||
583 | goto out; | ||
584 | } | ||
585 | if (current_header_page == 0) | ||
586 | memset(header_virt, 0, 8); | ||
587 | more_header_data_to_be_written = 0; | ||
588 | } | ||
589 | rc = lower_a_ops->commit_write(lower_file, header_page, 0, | ||
590 | PAGE_CACHE_SIZE); | ||
591 | ecryptfs_unmap_and_release_lower_page(header_page); | ||
592 | if (rc < 0) { | ||
593 | ecryptfs_printk(KERN_ERR, | ||
594 | "Error commiting header page write; " | ||
595 | "rc = [%d]\n", rc); | ||
596 | break; | ||
597 | } | ||
598 | current_header_page++; | ||
599 | } | ||
600 | if (rc >= 0) { | ||
601 | rc = 0; | ||
602 | ecryptfs_printk(KERN_DEBUG, "lower_inode->i_blocks = " | ||
603 | "[0x%.16x]\n", lower_inode->i_blocks); | ||
604 | i_size_write(inode, 0); | ||
605 | lower_inode->i_mtime = lower_inode->i_ctime = CURRENT_TIME; | ||
606 | mark_inode_dirty_sync(inode); | ||
607 | } | ||
608 | ecryptfs_printk(KERN_DEBUG, "Clearing ECRYPTFS_NEW_FILE flag in " | ||
609 | "crypt_stat at memory location [%p]\n", crypt_stat); | ||
610 | ECRYPTFS_CLEAR_FLAG(crypt_stat->flags, ECRYPTFS_NEW_FILE); | ||
611 | out: | ||
612 | return rc; | ||
613 | } | ||
614 | |||
615 | /** | ||
616 | * ecryptfs_commit_write | ||
617 | * @file: The eCryptfs file object | ||
618 | * @page: The eCryptfs page | ||
619 | * @from: Ignored (we rotate the page IV on each write) | ||
620 | * @to: Ignored | ||
621 | * | ||
622 | * This is where we encrypt the data and pass the encrypted data to | ||
623 | * the lower filesystem. In OpenPGP-compatible mode, we operate on | ||
624 | * entire underlying packets. | ||
625 | */ | ||
626 | static int ecryptfs_commit_write(struct file *file, struct page *page, | ||
627 | unsigned from, unsigned to) | ||
628 | { | ||
629 | struct ecryptfs_page_crypt_context ctx; | ||
630 | loff_t pos; | ||
631 | struct inode *inode; | ||
632 | struct inode *lower_inode; | ||
633 | struct file *lower_file; | ||
634 | struct ecryptfs_crypt_stat *crypt_stat; | ||
635 | int rc; | ||
636 | |||
637 | inode = page->mapping->host; | ||
638 | lower_inode = ecryptfs_inode_to_lower(inode); | ||
639 | lower_file = ecryptfs_file_to_lower(file); | ||
640 | mutex_lock(&lower_inode->i_mutex); | ||
641 | crypt_stat = | ||
642 | &ecryptfs_inode_to_private(file->f_dentry->d_inode)->crypt_stat; | ||
643 | if (ECRYPTFS_CHECK_FLAG(crypt_stat->flags, ECRYPTFS_NEW_FILE)) { | ||
644 | ecryptfs_printk(KERN_DEBUG, "ECRYPTFS_NEW_FILE flag set in " | ||
645 | "crypt_stat at memory location [%p]\n", crypt_stat); | ||
646 | rc = process_new_file(crypt_stat, file, inode); | ||
647 | if (rc) { | ||
648 | ecryptfs_printk(KERN_ERR, "Error processing new " | ||
649 | "file; rc = [%d]\n", rc); | ||
650 | goto out; | ||
651 | } | ||
652 | } else | ||
653 | ecryptfs_printk(KERN_DEBUG, "Not a new file\n"); | ||
654 | ecryptfs_printk(KERN_DEBUG, "Calling fill_zeros_to_end_of_page" | ||
655 | "(page w/ index = [0x%.16x], to = [%d])\n", page->index, | ||
656 | to); | ||
657 | rc = fill_zeros_to_end_of_page(page, to); | ||
658 | if (rc) { | ||
659 | ecryptfs_printk(KERN_WARNING, "Error attempting to fill " | ||
660 | "zeros in page with index = [0x%.16x]\n", | ||
661 | page->index); | ||
662 | goto out; | ||
663 | } | ||
664 | ctx.page = page; | ||
665 | ctx.mode = ECRYPTFS_PREPARE_COMMIT_MODE; | ||
666 | ctx.param.lower_file = lower_file; | ||
667 | rc = ecryptfs_encrypt_page(&ctx); | ||
668 | if (rc) { | ||
669 | ecryptfs_printk(KERN_WARNING, "Error encrypting page (upper " | ||
670 | "index [0x%.16x])\n", page->index); | ||
671 | goto out; | ||
672 | } | ||
673 | rc = 0; | ||
674 | inode->i_blocks = lower_inode->i_blocks; | ||
675 | pos = (page->index << PAGE_CACHE_SHIFT) + to; | ||
676 | if (pos > i_size_read(inode)) { | ||
677 | i_size_write(inode, pos); | ||
678 | ecryptfs_printk(KERN_DEBUG, "Expanded file size to " | ||
679 | "[0x%.16x]\n", i_size_read(inode)); | ||
680 | } | ||
681 | ecryptfs_write_inode_size_to_header(lower_file, lower_inode, inode); | ||
682 | lower_inode->i_mtime = lower_inode->i_ctime = CURRENT_TIME; | ||
683 | mark_inode_dirty_sync(inode); | ||
684 | out: | ||
685 | kunmap(page); /* mapped in prior call (prepare_write) */ | ||
686 | if (rc < 0) | ||
687 | ClearPageUptodate(page); | ||
688 | else | ||
689 | SetPageUptodate(page); | ||
690 | mutex_unlock(&lower_inode->i_mutex); | ||
691 | return rc; | ||
692 | } | ||
693 | |||
694 | /** | ||
695 | * write_zeros | ||
696 | * @file: The ecryptfs file | ||
697 | * @index: The index in which we are writing | ||
698 | * @start: The position after the last block of data | ||
699 | * @num_zeros: The number of zeros to write | ||
700 | * | ||
701 | * Write a specified number of zero's to a page. | ||
702 | * | ||
703 | * (start + num_zeros) must be less than or equal to PAGE_CACHE_SIZE | ||
704 | */ | ||
705 | static | ||
706 | int write_zeros(struct file *file, pgoff_t index, int start, int num_zeros) | ||
707 | { | ||
708 | int rc = 0; | ||
709 | struct page *tmp_page; | ||
710 | |||
711 | tmp_page = ecryptfs_get1page(file, index); | ||
712 | if (IS_ERR(tmp_page)) { | ||
713 | ecryptfs_printk(KERN_ERR, "Error getting page at index " | ||
714 | "[0x%.16x]\n", index); | ||
715 | rc = PTR_ERR(tmp_page); | ||
716 | goto out; | ||
717 | } | ||
718 | kmap(tmp_page); | ||
719 | rc = ecryptfs_prepare_write(file, tmp_page, start, start + num_zeros); | ||
720 | if (rc) { | ||
721 | ecryptfs_printk(KERN_ERR, "Error preparing to write zero's " | ||
722 | "to remainder of page at index [0x%.16x]\n", | ||
723 | index); | ||
724 | kunmap(tmp_page); | ||
725 | page_cache_release(tmp_page); | ||
726 | goto out; | ||
727 | } | ||
728 | memset(((char *)page_address(tmp_page) + start), 0, num_zeros); | ||
729 | rc = ecryptfs_commit_write(file, tmp_page, start, start + num_zeros); | ||
730 | if (rc < 0) { | ||
731 | ecryptfs_printk(KERN_ERR, "Error attempting to write zero's " | ||
732 | "to remainder of page at index [0x%.16x]\n", | ||
733 | index); | ||
734 | kunmap(tmp_page); | ||
735 | page_cache_release(tmp_page); | ||
736 | goto out; | ||
737 | } | ||
738 | rc = 0; | ||
739 | kunmap(tmp_page); | ||
740 | page_cache_release(tmp_page); | ||
741 | out: | ||
742 | return rc; | ||
743 | } | ||
744 | |||
745 | static sector_t ecryptfs_bmap(struct address_space *mapping, sector_t block) | ||
746 | { | ||
747 | int rc = 0; | ||
748 | struct inode *inode; | ||
749 | struct inode *lower_inode; | ||
750 | |||
751 | inode = (struct inode *)mapping->host; | ||
752 | lower_inode = ecryptfs_inode_to_lower(inode); | ||
753 | if (lower_inode->i_mapping->a_ops->bmap) | ||
754 | rc = lower_inode->i_mapping->a_ops->bmap(lower_inode->i_mapping, | ||
755 | block); | ||
756 | return rc; | ||
757 | } | ||
758 | |||
759 | static void ecryptfs_sync_page(struct page *page) | ||
760 | { | ||
761 | struct inode *inode; | ||
762 | struct inode *lower_inode; | ||
763 | struct page *lower_page; | ||
764 | |||
765 | inode = page->mapping->host; | ||
766 | lower_inode = ecryptfs_inode_to_lower(inode); | ||
767 | /* NOTE: Recently swapped with grab_cache_page(), since | ||
768 | * sync_page() just makes sure that pending I/O gets done. */ | ||
769 | lower_page = find_lock_page(lower_inode->i_mapping, page->index); | ||
770 | if (!lower_page) { | ||
771 | ecryptfs_printk(KERN_DEBUG, "find_lock_page failed\n"); | ||
772 | return; | ||
773 | } | ||
774 | lower_page->mapping->a_ops->sync_page(lower_page); | ||
775 | ecryptfs_printk(KERN_DEBUG, "Unlocking page with index = [0x%.16x]\n", | ||
776 | lower_page->index); | ||
777 | unlock_page(lower_page); | ||
778 | page_cache_release(lower_page); | ||
779 | } | ||
780 | |||
781 | struct address_space_operations ecryptfs_aops = { | ||
782 | .writepage = ecryptfs_writepage, | ||
783 | .readpage = ecryptfs_readpage, | ||
784 | .prepare_write = ecryptfs_prepare_write, | ||
785 | .commit_write = ecryptfs_commit_write, | ||
786 | .bmap = ecryptfs_bmap, | ||
787 | .sync_page = ecryptfs_sync_page, | ||
788 | }; | ||
diff --git a/fs/ecryptfs/super.c b/fs/ecryptfs/super.c new file mode 100644 index 000000000000..c337c0410fb1 --- /dev/null +++ b/fs/ecryptfs/super.c | |||
@@ -0,0 +1,198 @@ | |||
1 | /** | ||
2 | * eCryptfs: Linux filesystem encryption layer | ||
3 | * | ||
4 | * Copyright (C) 1997-2003 Erez Zadok | ||
5 | * Copyright (C) 2001-2003 Stony Brook University | ||
6 | * Copyright (C) 2004-2006 International Business Machines Corp. | ||
7 | * Author(s): Michael A. Halcrow <mahalcro@us.ibm.com> | ||
8 | * Michael C. Thompson <mcthomps@us.ibm.com> | ||
9 | * | ||
10 | * This program is free software; you can redistribute it and/or | ||
11 | * modify it under the terms of the GNU General Public License as | ||
12 | * published by the Free Software Foundation; either version 2 of the | ||
13 | * License, or (at your option) any later version. | ||
14 | * | ||
15 | * This program is distributed in the hope that it will be useful, but | ||
16 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
18 | * General Public License for more details. | ||
19 | * | ||
20 | * You should have received a copy of the GNU General Public License | ||
21 | * along with this program; if not, write to the Free Software | ||
22 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA | ||
23 | * 02111-1307, USA. | ||
24 | */ | ||
25 | |||
26 | #include <linux/fs.h> | ||
27 | #include <linux/mount.h> | ||
28 | #include <linux/key.h> | ||
29 | #include <linux/seq_file.h> | ||
30 | #include <linux/crypto.h> | ||
31 | #include "ecryptfs_kernel.h" | ||
32 | |||
33 | struct kmem_cache *ecryptfs_inode_info_cache; | ||
34 | |||
35 | /** | ||
36 | * ecryptfs_alloc_inode - allocate an ecryptfs inode | ||
37 | * @sb: Pointer to the ecryptfs super block | ||
38 | * | ||
39 | * Called to bring an inode into existence. | ||
40 | * | ||
41 | * Only handle allocation, setting up structures should be done in | ||
42 | * ecryptfs_read_inode. This is because the kernel, between now and | ||
43 | * then, will 0 out the private data pointer. | ||
44 | * | ||
45 | * Returns a pointer to a newly allocated inode, NULL otherwise | ||
46 | */ | ||
47 | static struct inode *ecryptfs_alloc_inode(struct super_block *sb) | ||
48 | { | ||
49 | struct ecryptfs_inode_info *ecryptfs_inode; | ||
50 | struct inode *inode = NULL; | ||
51 | |||
52 | ecryptfs_inode = kmem_cache_alloc(ecryptfs_inode_info_cache, | ||
53 | SLAB_KERNEL); | ||
54 | if (unlikely(!ecryptfs_inode)) | ||
55 | goto out; | ||
56 | ecryptfs_init_crypt_stat(&ecryptfs_inode->crypt_stat); | ||
57 | inode = &ecryptfs_inode->vfs_inode; | ||
58 | out: | ||
59 | return inode; | ||
60 | } | ||
61 | |||
62 | /** | ||
63 | * ecryptfs_destroy_inode | ||
64 | * @inode: The ecryptfs inode | ||
65 | * | ||
66 | * This is used during the final destruction of the inode. | ||
67 | * All allocation of memory related to the inode, including allocated | ||
68 | * memory in the crypt_stat struct, will be released here. | ||
69 | * There should be no chance that this deallocation will be missed. | ||
70 | */ | ||
71 | static void ecryptfs_destroy_inode(struct inode *inode) | ||
72 | { | ||
73 | struct ecryptfs_inode_info *inode_info; | ||
74 | |||
75 | inode_info = ecryptfs_inode_to_private(inode); | ||
76 | ecryptfs_destruct_crypt_stat(&inode_info->crypt_stat); | ||
77 | kmem_cache_free(ecryptfs_inode_info_cache, inode_info); | ||
78 | } | ||
79 | |||
80 | /** | ||
81 | * ecryptfs_init_inode | ||
82 | * @inode: The ecryptfs inode | ||
83 | * | ||
84 | * Set up the ecryptfs inode. | ||
85 | */ | ||
86 | void ecryptfs_init_inode(struct inode *inode, struct inode *lower_inode) | ||
87 | { | ||
88 | ecryptfs_set_inode_lower(inode, lower_inode); | ||
89 | inode->i_ino = lower_inode->i_ino; | ||
90 | inode->i_version++; | ||
91 | inode->i_op = &ecryptfs_main_iops; | ||
92 | inode->i_fop = &ecryptfs_main_fops; | ||
93 | inode->i_mapping->a_ops = &ecryptfs_aops; | ||
94 | } | ||
95 | |||
96 | /** | ||
97 | * ecryptfs_put_super | ||
98 | * @sb: Pointer to the ecryptfs super block | ||
99 | * | ||
100 | * Final actions when unmounting a file system. | ||
101 | * This will handle deallocation and release of our private data. | ||
102 | */ | ||
103 | static void ecryptfs_put_super(struct super_block *sb) | ||
104 | { | ||
105 | struct ecryptfs_sb_info *sb_info = ecryptfs_superblock_to_private(sb); | ||
106 | |||
107 | ecryptfs_destruct_mount_crypt_stat(&sb_info->mount_crypt_stat); | ||
108 | kmem_cache_free(ecryptfs_sb_info_cache, sb_info); | ||
109 | ecryptfs_set_superblock_private(sb, NULL); | ||
110 | } | ||
111 | |||
112 | /** | ||
113 | * ecryptfs_statfs | ||
114 | * @sb: The ecryptfs super block | ||
115 | * @buf: The struct kstatfs to fill in with stats | ||
116 | * | ||
117 | * Get the filesystem statistics. Currently, we let this pass right through | ||
118 | * to the lower filesystem and take no action ourselves. | ||
119 | */ | ||
120 | static int ecryptfs_statfs(struct dentry *dentry, struct kstatfs *buf) | ||
121 | { | ||
122 | return vfs_statfs(ecryptfs_dentry_to_lower(dentry), buf); | ||
123 | } | ||
124 | |||
125 | /** | ||
126 | * ecryptfs_clear_inode | ||
127 | * @inode - The ecryptfs inode | ||
128 | * | ||
129 | * Called by iput() when the inode reference count reached zero | ||
130 | * and the inode is not hashed anywhere. Used to clear anything | ||
131 | * that needs to be, before the inode is completely destroyed and put | ||
132 | * on the inode free list. We use this to drop out reference to the | ||
133 | * lower inode. | ||
134 | */ | ||
135 | static void ecryptfs_clear_inode(struct inode *inode) | ||
136 | { | ||
137 | iput(ecryptfs_inode_to_lower(inode)); | ||
138 | } | ||
139 | |||
140 | /** | ||
141 | * ecryptfs_umount_begin | ||
142 | * | ||
143 | * Called in do_umount(). | ||
144 | */ | ||
145 | static void ecryptfs_umount_begin(struct vfsmount *vfsmnt, int flags) | ||
146 | { | ||
147 | struct vfsmount *lower_mnt = | ||
148 | ecryptfs_dentry_to_lower_mnt(vfsmnt->mnt_sb->s_root); | ||
149 | struct super_block *lower_sb; | ||
150 | |||
151 | mntput(lower_mnt); | ||
152 | lower_sb = lower_mnt->mnt_sb; | ||
153 | if (lower_sb->s_op->umount_begin) | ||
154 | lower_sb->s_op->umount_begin(lower_mnt, flags); | ||
155 | } | ||
156 | |||
157 | /** | ||
158 | * ecryptfs_show_options | ||
159 | * | ||
160 | * Prints the directory we are currently mounted over. | ||
161 | * Returns zero on success; non-zero otherwise | ||
162 | */ | ||
163 | static int ecryptfs_show_options(struct seq_file *m, struct vfsmount *mnt) | ||
164 | { | ||
165 | struct super_block *sb = mnt->mnt_sb; | ||
166 | struct dentry *lower_root_dentry = ecryptfs_dentry_to_lower(sb->s_root); | ||
167 | struct vfsmount *lower_mnt = ecryptfs_dentry_to_lower_mnt(sb->s_root); | ||
168 | char *tmp_page; | ||
169 | char *path; | ||
170 | int rc = 0; | ||
171 | |||
172 | tmp_page = (char *)__get_free_page(GFP_KERNEL); | ||
173 | if (!tmp_page) { | ||
174 | rc = -ENOMEM; | ||
175 | goto out; | ||
176 | } | ||
177 | path = d_path(lower_root_dentry, lower_mnt, tmp_page, PAGE_SIZE); | ||
178 | if (IS_ERR(path)) { | ||
179 | rc = PTR_ERR(path); | ||
180 | goto out; | ||
181 | } | ||
182 | seq_printf(m, ",dir=%s", path); | ||
183 | free_page((unsigned long)tmp_page); | ||
184 | out: | ||
185 | return rc; | ||
186 | } | ||
187 | |||
188 | struct super_operations ecryptfs_sops = { | ||
189 | .alloc_inode = ecryptfs_alloc_inode, | ||
190 | .destroy_inode = ecryptfs_destroy_inode, | ||
191 | .drop_inode = generic_delete_inode, | ||
192 | .put_super = ecryptfs_put_super, | ||
193 | .statfs = ecryptfs_statfs, | ||
194 | .remount_fs = NULL, | ||
195 | .clear_inode = ecryptfs_clear_inode, | ||
196 | .umount_begin = ecryptfs_umount_begin, | ||
197 | .show_options = ecryptfs_show_options | ||
198 | }; | ||
diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 8d544334bcd2..557d5b614fae 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c | |||
@@ -720,9 +720,10 @@ static int ep_getfd(int *efd, struct inode **einode, struct file **efile, | |||
720 | 720 | ||
721 | /* Allocates an inode from the eventpoll file system */ | 721 | /* Allocates an inode from the eventpoll file system */ |
722 | inode = ep_eventpoll_inode(); | 722 | inode = ep_eventpoll_inode(); |
723 | error = PTR_ERR(inode); | 723 | if (IS_ERR(inode)) { |
724 | if (IS_ERR(inode)) | 724 | error = PTR_ERR(inode); |
725 | goto eexit_2; | 725 | goto eexit_2; |
726 | } | ||
726 | 727 | ||
727 | /* Allocates a free descriptor to plug the file onto */ | 728 | /* Allocates a free descriptor to plug the file onto */ |
728 | error = get_unused_fd(); | 729 | error = get_unused_fd(); |
@@ -1318,7 +1318,7 @@ static void format_corename(char *corename, const char *pattern, long signr) | |||
1318 | case 'h': | 1318 | case 'h': |
1319 | down_read(&uts_sem); | 1319 | down_read(&uts_sem); |
1320 | rc = snprintf(out_ptr, out_end - out_ptr, | 1320 | rc = snprintf(out_ptr, out_end - out_ptr, |
1321 | "%s", system_utsname.nodename); | 1321 | "%s", utsname()->nodename); |
1322 | up_read(&uts_sem); | 1322 | up_read(&uts_sem); |
1323 | if (rc > out_end - out_ptr) | 1323 | if (rc > out_end - out_ptr) |
1324 | goto out; | 1324 | goto out; |
diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c index 4c39009350f3..93e77c3d2490 100644 --- a/fs/exportfs/expfs.c +++ b/fs/exportfs/expfs.c | |||
@@ -315,7 +315,7 @@ struct getdents_callback { | |||
315 | * the name matching the specified inode number. | 315 | * the name matching the specified inode number. |
316 | */ | 316 | */ |
317 | static int filldir_one(void * __buf, const char * name, int len, | 317 | static int filldir_one(void * __buf, const char * name, int len, |
318 | loff_t pos, ino_t ino, unsigned int d_type) | 318 | loff_t pos, u64 ino, unsigned int d_type) |
319 | { | 319 | { |
320 | struct getdents_callback *buf = __buf; | 320 | struct getdents_callback *buf = __buf; |
321 | int result = 0; | 321 | int result = 0; |
diff --git a/fs/fat/dir.c b/fs/fat/dir.c index 3e50a4166283..69c439f44387 100644 --- a/fs/fat/dir.c +++ b/fs/fat/dir.c | |||
@@ -648,7 +648,7 @@ static int fat_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
648 | } | 648 | } |
649 | 649 | ||
650 | static int fat_ioctl_filldir(void *__buf, const char *name, int name_len, | 650 | static int fat_ioctl_filldir(void *__buf, const char *name, int name_len, |
651 | loff_t offset, ino_t ino, unsigned int d_type) | 651 | loff_t offset, u64 ino, unsigned int d_type) |
652 | { | 652 | { |
653 | struct fat_ioctl_filldir_callback *buf = __buf; | 653 | struct fat_ioctl_filldir_callback *buf = __buf; |
654 | struct dirent __user *d1 = buf->dirent; | 654 | struct dirent __user *d1 = buf->dirent; |
diff --git a/fs/fcntl.c b/fs/fcntl.c index d35cbc6bc112..e4f26165f12a 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c | |||
@@ -250,19 +250,22 @@ static int setfl(int fd, struct file * filp, unsigned long arg) | |||
250 | return error; | 250 | return error; |
251 | } | 251 | } |
252 | 252 | ||
253 | static void f_modown(struct file *filp, unsigned long pid, | 253 | static void f_modown(struct file *filp, struct pid *pid, enum pid_type type, |
254 | uid_t uid, uid_t euid, int force) | 254 | uid_t uid, uid_t euid, int force) |
255 | { | 255 | { |
256 | write_lock_irq(&filp->f_owner.lock); | 256 | write_lock_irq(&filp->f_owner.lock); |
257 | if (force || !filp->f_owner.pid) { | 257 | if (force || !filp->f_owner.pid) { |
258 | filp->f_owner.pid = pid; | 258 | put_pid(filp->f_owner.pid); |
259 | filp->f_owner.pid = get_pid(pid); | ||
260 | filp->f_owner.pid_type = type; | ||
259 | filp->f_owner.uid = uid; | 261 | filp->f_owner.uid = uid; |
260 | filp->f_owner.euid = euid; | 262 | filp->f_owner.euid = euid; |
261 | } | 263 | } |
262 | write_unlock_irq(&filp->f_owner.lock); | 264 | write_unlock_irq(&filp->f_owner.lock); |
263 | } | 265 | } |
264 | 266 | ||
265 | int f_setown(struct file *filp, unsigned long arg, int force) | 267 | int __f_setown(struct file *filp, struct pid *pid, enum pid_type type, |
268 | int force) | ||
266 | { | 269 | { |
267 | int err; | 270 | int err; |
268 | 271 | ||
@@ -270,15 +273,44 @@ int f_setown(struct file *filp, unsigned long arg, int force) | |||
270 | if (err) | 273 | if (err) |
271 | return err; | 274 | return err; |
272 | 275 | ||
273 | f_modown(filp, arg, current->uid, current->euid, force); | 276 | f_modown(filp, pid, type, current->uid, current->euid, force); |
274 | return 0; | 277 | return 0; |
275 | } | 278 | } |
279 | EXPORT_SYMBOL(__f_setown); | ||
276 | 280 | ||
281 | int f_setown(struct file *filp, unsigned long arg, int force) | ||
282 | { | ||
283 | enum pid_type type; | ||
284 | struct pid *pid; | ||
285 | int who = arg; | ||
286 | int result; | ||
287 | type = PIDTYPE_PID; | ||
288 | if (who < 0) { | ||
289 | type = PIDTYPE_PGID; | ||
290 | who = -who; | ||
291 | } | ||
292 | rcu_read_lock(); | ||
293 | pid = find_pid(who); | ||
294 | result = __f_setown(filp, pid, type, force); | ||
295 | rcu_read_unlock(); | ||
296 | return result; | ||
297 | } | ||
277 | EXPORT_SYMBOL(f_setown); | 298 | EXPORT_SYMBOL(f_setown); |
278 | 299 | ||
279 | void f_delown(struct file *filp) | 300 | void f_delown(struct file *filp) |
280 | { | 301 | { |
281 | f_modown(filp, 0, 0, 0, 1); | 302 | f_modown(filp, NULL, PIDTYPE_PID, 0, 0, 1); |
303 | } | ||
304 | |||
305 | pid_t f_getown(struct file *filp) | ||
306 | { | ||
307 | pid_t pid; | ||
308 | read_lock(&filp->f_owner.lock); | ||
309 | pid = pid_nr(filp->f_owner.pid); | ||
310 | if (filp->f_owner.pid_type == PIDTYPE_PGID) | ||
311 | pid = -pid; | ||
312 | read_unlock(&filp->f_owner.lock); | ||
313 | return pid; | ||
282 | } | 314 | } |
283 | 315 | ||
284 | static long do_fcntl(int fd, unsigned int cmd, unsigned long arg, | 316 | static long do_fcntl(int fd, unsigned int cmd, unsigned long arg, |
@@ -319,7 +351,7 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg, | |||
319 | * current syscall conventions, the only way | 351 | * current syscall conventions, the only way |
320 | * to fix this will be in libc. | 352 | * to fix this will be in libc. |
321 | */ | 353 | */ |
322 | err = filp->f_owner.pid; | 354 | err = f_getown(filp); |
323 | force_successful_syscall_return(); | 355 | force_successful_syscall_return(); |
324 | break; | 356 | break; |
325 | case F_SETOWN: | 357 | case F_SETOWN: |
@@ -470,24 +502,19 @@ static void send_sigio_to_task(struct task_struct *p, | |||
470 | void send_sigio(struct fown_struct *fown, int fd, int band) | 502 | void send_sigio(struct fown_struct *fown, int fd, int band) |
471 | { | 503 | { |
472 | struct task_struct *p; | 504 | struct task_struct *p; |
473 | int pid; | 505 | enum pid_type type; |
506 | struct pid *pid; | ||
474 | 507 | ||
475 | read_lock(&fown->lock); | 508 | read_lock(&fown->lock); |
509 | type = fown->pid_type; | ||
476 | pid = fown->pid; | 510 | pid = fown->pid; |
477 | if (!pid) | 511 | if (!pid) |
478 | goto out_unlock_fown; | 512 | goto out_unlock_fown; |
479 | 513 | ||
480 | read_lock(&tasklist_lock); | 514 | read_lock(&tasklist_lock); |
481 | if (pid > 0) { | 515 | do_each_pid_task(pid, type, p) { |
482 | p = find_task_by_pid(pid); | 516 | send_sigio_to_task(p, fown, fd, band); |
483 | if (p) { | 517 | } while_each_pid_task(pid, type, p); |
484 | send_sigio_to_task(p, fown, fd, band); | ||
485 | } | ||
486 | } else { | ||
487 | do_each_task_pid(-pid, PIDTYPE_PGID, p) { | ||
488 | send_sigio_to_task(p, fown, fd, band); | ||
489 | } while_each_task_pid(-pid, PIDTYPE_PGID, p); | ||
490 | } | ||
491 | read_unlock(&tasklist_lock); | 518 | read_unlock(&tasklist_lock); |
492 | out_unlock_fown: | 519 | out_unlock_fown: |
493 | read_unlock(&fown->lock); | 520 | read_unlock(&fown->lock); |
@@ -503,9 +530,12 @@ static void send_sigurg_to_task(struct task_struct *p, | |||
503 | int send_sigurg(struct fown_struct *fown) | 530 | int send_sigurg(struct fown_struct *fown) |
504 | { | 531 | { |
505 | struct task_struct *p; | 532 | struct task_struct *p; |
506 | int pid, ret = 0; | 533 | enum pid_type type; |
534 | struct pid *pid; | ||
535 | int ret = 0; | ||
507 | 536 | ||
508 | read_lock(&fown->lock); | 537 | read_lock(&fown->lock); |
538 | type = fown->pid_type; | ||
509 | pid = fown->pid; | 539 | pid = fown->pid; |
510 | if (!pid) | 540 | if (!pid) |
511 | goto out_unlock_fown; | 541 | goto out_unlock_fown; |
@@ -513,16 +543,9 @@ int send_sigurg(struct fown_struct *fown) | |||
513 | ret = 1; | 543 | ret = 1; |
514 | 544 | ||
515 | read_lock(&tasklist_lock); | 545 | read_lock(&tasklist_lock); |
516 | if (pid > 0) { | 546 | do_each_pid_task(pid, type, p) { |
517 | p = find_task_by_pid(pid); | 547 | send_sigurg_to_task(p, fown); |
518 | if (p) { | 548 | } while_each_pid_task(pid, type, p); |
519 | send_sigurg_to_task(p, fown); | ||
520 | } | ||
521 | } else { | ||
522 | do_each_task_pid(-pid, PIDTYPE_PGID, p) { | ||
523 | send_sigurg_to_task(p, fown); | ||
524 | } while_each_task_pid(-pid, PIDTYPE_PGID, p); | ||
525 | } | ||
526 | read_unlock(&tasklist_lock); | 549 | read_unlock(&tasklist_lock); |
527 | out_unlock_fown: | 550 | out_unlock_fown: |
528 | read_unlock(&fown->lock); | 551 | read_unlock(&fown->lock); |
diff --git a/fs/file_table.c b/fs/file_table.c index bc35a40417d7..24f25a057d9c 100644 --- a/fs/file_table.c +++ b/fs/file_table.c | |||
@@ -174,6 +174,7 @@ void fastcall __fput(struct file *file) | |||
174 | fops_put(file->f_op); | 174 | fops_put(file->f_op); |
175 | if (file->f_mode & FMODE_WRITE) | 175 | if (file->f_mode & FMODE_WRITE) |
176 | put_write_access(inode); | 176 | put_write_access(inode); |
177 | put_pid(file->f_owner.pid); | ||
177 | file_kill(file); | 178 | file_kill(file); |
178 | file->f_dentry = NULL; | 179 | file->f_dentry = NULL; |
179 | file->f_vfsmnt = NULL; | 180 | file->f_vfsmnt = NULL; |
diff --git a/fs/hfsplus/part_tbl.c b/fs/hfsplus/part_tbl.c index ae783066fc3a..1528a6fd0299 100644 --- a/fs/hfsplus/part_tbl.c +++ b/fs/hfsplus/part_tbl.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * linux/fs/hfs/part_tbl.c | 2 | * linux/fs/hfsplus/part_tbl.c |
3 | * | 3 | * |
4 | * Copyright (C) 1996-1997 Paul H. Hargrove | 4 | * Copyright (C) 1996-1997 Paul H. Hargrove |
5 | * This file may be distributed under the terms of the GNU General Public License. | 5 | * This file may be distributed under the terms of the GNU General Public License. |
diff --git a/fs/inode.c b/fs/inode.c index ada7643104e1..bf6bec4e54ff 100644 --- a/fs/inode.c +++ b/fs/inode.c | |||
@@ -657,7 +657,7 @@ static struct inode * get_new_inode_fast(struct super_block *sb, struct hlist_he | |||
657 | return inode; | 657 | return inode; |
658 | } | 658 | } |
659 | 659 | ||
660 | static inline unsigned long hash(struct super_block *sb, unsigned long hashval) | 660 | static unsigned long hash(struct super_block *sb, unsigned long hashval) |
661 | { | 661 | { |
662 | unsigned long tmp; | 662 | unsigned long tmp; |
663 | 663 | ||
@@ -1003,7 +1003,7 @@ void generic_delete_inode(struct inode *inode) | |||
1003 | 1003 | ||
1004 | list_del_init(&inode->i_list); | 1004 | list_del_init(&inode->i_list); |
1005 | list_del_init(&inode->i_sb_list); | 1005 | list_del_init(&inode->i_sb_list); |
1006 | inode->i_state|=I_FREEING; | 1006 | inode->i_state |= I_FREEING; |
1007 | inodes_stat.nr_inodes--; | 1007 | inodes_stat.nr_inodes--; |
1008 | spin_unlock(&inode_lock); | 1008 | spin_unlock(&inode_lock); |
1009 | 1009 | ||
@@ -1210,13 +1210,15 @@ void file_update_time(struct file *file) | |||
1210 | return; | 1210 | return; |
1211 | 1211 | ||
1212 | now = current_fs_time(inode->i_sb); | 1212 | now = current_fs_time(inode->i_sb); |
1213 | if (!timespec_equal(&inode->i_mtime, &now)) | 1213 | if (!timespec_equal(&inode->i_mtime, &now)) { |
1214 | inode->i_mtime = now; | ||
1214 | sync_it = 1; | 1215 | sync_it = 1; |
1215 | inode->i_mtime = now; | 1216 | } |
1216 | 1217 | ||
1217 | if (!timespec_equal(&inode->i_ctime, &now)) | 1218 | if (!timespec_equal(&inode->i_ctime, &now)) { |
1219 | inode->i_ctime = now; | ||
1218 | sync_it = 1; | 1220 | sync_it = 1; |
1219 | inode->i_ctime = now; | 1221 | } |
1220 | 1222 | ||
1221 | if (sync_it) | 1223 | if (sync_it) |
1222 | mark_inode_dirty_sync(inode); | 1224 | mark_inode_dirty_sync(inode); |
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c index 32a8caf0c41e..10be51290a27 100644 --- a/fs/jbd/commit.c +++ b/fs/jbd/commit.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * linux/fs/commit.c | 2 | * linux/fs/jbd/commit.c |
3 | * | 3 | * |
4 | * Written by Stephen C. Tweedie <sct@redhat.com>, 1998 | 4 | * Written by Stephen C. Tweedie <sct@redhat.com>, 1998 |
5 | * | 5 | * |
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c index 7af6099c911c..c518dd8fe60a 100644 --- a/fs/jbd/journal.c +++ b/fs/jbd/journal.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * linux/fs/journal.c | 2 | * linux/fs/jbd/journal.c |
3 | * | 3 | * |
4 | * Written by Stephen C. Tweedie <sct@redhat.com>, 1998 | 4 | * Written by Stephen C. Tweedie <sct@redhat.com>, 1998 |
5 | * | 5 | * |
diff --git a/fs/jfs/acl.c b/fs/jfs/acl.c index e2281300979c..4d84bdc88299 100644 --- a/fs/jfs/acl.c +++ b/fs/jfs/acl.c | |||
@@ -5,16 +5,16 @@ | |||
5 | * | 5 | * |
6 | * This program is free software; you can redistribute it and/or modify | 6 | * This program is free software; you can redistribute it and/or modify |
7 | * it under the terms of the GNU General Public License as published by | 7 | * it under the terms of the GNU General Public License as published by |
8 | * the Free Software Foundation; either version 2 of the License, or | 8 | * the Free Software Foundation; either version 2 of the License, or |
9 | * (at your option) any later version. | 9 | * (at your option) any later version. |
10 | * | 10 | * |
11 | * This program is distributed in the hope that it will be useful, | 11 | * This program is distributed in the hope that it will be useful, |
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See | 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See |
14 | * the GNU General Public License for more details. | 14 | * the GNU General Public License for more details. |
15 | * | 15 | * |
16 | * You should have received a copy of the GNU General Public License | 16 | * You should have received a copy of the GNU General Public License |
17 | * along with this program; if not, write to the Free Software | 17 | * along with this program; if not, write to the Free Software |
18 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | 18 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
19 | */ | 19 | */ |
20 | 20 | ||
@@ -183,7 +183,7 @@ cleanup: | |||
183 | posix_acl_release(acl); | 183 | posix_acl_release(acl); |
184 | } else | 184 | } else |
185 | inode->i_mode &= ~current->fs->umask; | 185 | inode->i_mode &= ~current->fs->umask; |
186 | 186 | ||
187 | JFS_IP(inode)->mode2 = (JFS_IP(inode)->mode2 & 0xffff0000) | | 187 | JFS_IP(inode)->mode2 = (JFS_IP(inode)->mode2 & 0xffff0000) | |
188 | inode->i_mode; | 188 | inode->i_mode; |
189 | 189 | ||
diff --git a/fs/jfs/endian24.h b/fs/jfs/endian24.h index ab7cd0567c95..79494c4f2b10 100644 --- a/fs/jfs/endian24.h +++ b/fs/jfs/endian24.h | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (c) International Business Machines Corp., 2001 | 2 | * Copyright (C) International Business Machines Corp., 2001 |
3 | * | 3 | * |
4 | * This program is free software; you can redistribute it and/or modify | 4 | * This program is free software; you can redistribute it and/or modify |
5 | * it under the terms of the GNU General Public License as published by | 5 | * it under the terms of the GNU General Public License as published by |
diff --git a/fs/jfs/file.c b/fs/jfs/file.c index 976e90dc2d1b..34181b8f5a0a 100644 --- a/fs/jfs/file.c +++ b/fs/jfs/file.c | |||
@@ -4,16 +4,16 @@ | |||
4 | * | 4 | * |
5 | * This program is free software; you can redistribute it and/or modify | 5 | * This program is free software; you can redistribute it and/or modify |
6 | * it under the terms of the GNU General Public License as published by | 6 | * it under the terms of the GNU General Public License as published by |
7 | * the Free Software Foundation; either version 2 of the License, or | 7 | * the Free Software Foundation; either version 2 of the License, or |
8 | * (at your option) any later version. | 8 | * (at your option) any later version. |
9 | * | 9 | * |
10 | * This program is distributed in the hope that it will be useful, | 10 | * This program is distributed in the hope that it will be useful, |
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See | 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See |
13 | * the GNU General Public License for more details. | 13 | * the GNU General Public License for more details. |
14 | * | 14 | * |
15 | * You should have received a copy of the GNU General Public License | 15 | * You should have received a copy of the GNU General Public License |
16 | * along with this program; if not, write to the Free Software | 16 | * along with this program; if not, write to the Free Software |
17 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | 17 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
18 | */ | 18 | */ |
19 | 19 | ||
@@ -108,7 +108,7 @@ const struct file_operations jfs_file_operations = { | |||
108 | .aio_read = generic_file_aio_read, | 108 | .aio_read = generic_file_aio_read, |
109 | .aio_write = generic_file_aio_write, | 109 | .aio_write = generic_file_aio_write, |
110 | .mmap = generic_file_mmap, | 110 | .mmap = generic_file_mmap, |
111 | .sendfile = generic_file_sendfile, | 111 | .sendfile = generic_file_sendfile, |
112 | .fsync = jfs_fsync, | 112 | .fsync = jfs_fsync, |
113 | .release = jfs_release, | 113 | .release = jfs_release, |
114 | .ioctl = jfs_ioctl, | 114 | .ioctl = jfs_ioctl, |
diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c index a8cc169235d9..f5719117edfe 100644 --- a/fs/jfs/inode.c +++ b/fs/jfs/inode.c | |||
@@ -4,16 +4,16 @@ | |||
4 | * | 4 | * |
5 | * This program is free software; you can redistribute it and/or modify | 5 | * This program is free software; you can redistribute it and/or modify |
6 | * it under the terms of the GNU General Public License as published by | 6 | * it under the terms of the GNU General Public License as published by |
7 | * the Free Software Foundation; either version 2 of the License, or | 7 | * the Free Software Foundation; either version 2 of the License, or |
8 | * (at your option) any later version. | 8 | * (at your option) any later version. |
9 | * | 9 | * |
10 | * This program is distributed in the hope that it will be useful, | 10 | * This program is distributed in the hope that it will be useful, |
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See | 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See |
13 | * the GNU General Public License for more details. | 13 | * the GNU General Public License for more details. |
14 | * | 14 | * |
15 | * You should have received a copy of the GNU General Public License | 15 | * You should have received a copy of the GNU General Public License |
16 | * along with this program; if not, write to the Free Software | 16 | * along with this program; if not, write to the Free Software |
17 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | 17 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
18 | */ | 18 | */ |
19 | 19 | ||
@@ -33,7 +33,7 @@ | |||
33 | 33 | ||
34 | void jfs_read_inode(struct inode *inode) | 34 | void jfs_read_inode(struct inode *inode) |
35 | { | 35 | { |
36 | if (diRead(inode)) { | 36 | if (diRead(inode)) { |
37 | make_bad_inode(inode); | 37 | make_bad_inode(inode); |
38 | return; | 38 | return; |
39 | } | 39 | } |
diff --git a/fs/jfs/jfs_acl.h b/fs/jfs/jfs_acl.h index a76293767c73..455fa4292045 100644 --- a/fs/jfs/jfs_acl.h +++ b/fs/jfs/jfs_acl.h | |||
@@ -1,18 +1,18 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (c) International Business Machines Corp., 2002 | 2 | * Copyright (C) International Business Machines Corp., 2002 |
3 | * | 3 | * |
4 | * This program is free software; you can redistribute it and/or modify | 4 | * This program is free software; you can redistribute it and/or modify |
5 | * it under the terms of the GNU General Public License as published by | 5 | * it under the terms of the GNU General Public License as published by |
6 | * the Free Software Foundation; either version 2 of the License, or | 6 | * the Free Software Foundation; either version 2 of the License, or |
7 | * (at your option) any later version. | 7 | * (at your option) any later version. |
8 | * | 8 | * |
9 | * This program is distributed in the hope that it will be useful, | 9 | * This program is distributed in the hope that it will be useful, |
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See | 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See |
12 | * the GNU General Public License for more details. | 12 | * the GNU General Public License for more details. |
13 | * | 13 | * |
14 | * You should have received a copy of the GNU General Public License | 14 | * You should have received a copy of the GNU General Public License |
15 | * along with this program; if not, write to the Free Software | 15 | * along with this program; if not, write to the Free Software |
16 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | 16 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
17 | */ | 17 | */ |
18 | #ifndef _H_JFS_ACL | 18 | #ifndef _H_JFS_ACL |
diff --git a/fs/jfs/jfs_btree.h b/fs/jfs/jfs_btree.h index 7f3e9ac454ff..79c61805bd33 100644 --- a/fs/jfs/jfs_btree.h +++ b/fs/jfs/jfs_btree.h | |||
@@ -3,16 +3,16 @@ | |||
3 | * | 3 | * |
4 | * This program is free software; you can redistribute it and/or modify | 4 | * This program is free software; you can redistribute it and/or modify |
5 | * it under the terms of the GNU General Public License as published by | 5 | * it under the terms of the GNU General Public License as published by |
6 | * the Free Software Foundation; either version 2 of the License, or | 6 | * the Free Software Foundation; either version 2 of the License, or |
7 | * (at your option) any later version. | 7 | * (at your option) any later version. |
8 | * | 8 | * |
9 | * This program is distributed in the hope that it will be useful, | 9 | * This program is distributed in the hope that it will be useful, |
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See | 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See |
12 | * the GNU General Public License for more details. | 12 | * the GNU General Public License for more details. |
13 | * | 13 | * |
14 | * You should have received a copy of the GNU General Public License | 14 | * You should have received a copy of the GNU General Public License |
15 | * along with this program; if not, write to the Free Software | 15 | * along with this program; if not, write to the Free Software |
16 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | 16 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
17 | */ | 17 | */ |
18 | #ifndef _H_JFS_BTREE | 18 | #ifndef _H_JFS_BTREE |
diff --git a/fs/jfs/jfs_debug.c b/fs/jfs/jfs_debug.c index 81f0e514c490..9c5d59632aac 100644 --- a/fs/jfs/jfs_debug.c +++ b/fs/jfs/jfs_debug.c | |||
@@ -4,16 +4,16 @@ | |||
4 | * | 4 | * |
5 | * This program is free software; you can redistribute it and/or modify | 5 | * This program is free software; you can redistribute it and/or modify |
6 | * it under the terms of the GNU General Public License as published by | 6 | * it under the terms of the GNU General Public License as published by |
7 | * the Free Software Foundation; either version 2 of the License, or | 7 | * the Free Software Foundation; either version 2 of the License, or |
8 | * (at your option) any later version. | 8 | * (at your option) any later version. |
9 | * | 9 | * |
10 | * This program is distributed in the hope that it will be useful, | 10 | * This program is distributed in the hope that it will be useful, |
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See | 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See |
13 | * the GNU General Public License for more details. | 13 | * the GNU General Public License for more details. |
14 | * | 14 | * |
15 | * You should have received a copy of the GNU General Public License | 15 | * You should have received a copy of the GNU General Public License |
16 | * along with this program; if not, write to the Free Software | 16 | * along with this program; if not, write to the Free Software |
17 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | 17 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
18 | */ | 18 | */ |
19 | 19 | ||
diff --git a/fs/jfs/jfs_dinode.h b/fs/jfs/jfs_dinode.h index 9f2572aea561..40b20111383c 100644 --- a/fs/jfs/jfs_dinode.h +++ b/fs/jfs/jfs_dinode.h | |||
@@ -1,18 +1,18 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (c) International Business Machines Corp., 2000-2001 | 2 | * Copyright (C) International Business Machines Corp., 2000-2001 |
3 | * | 3 | * |
4 | * This program is free software; you can redistribute it and/or modify | 4 | * This program is free software; you can redistribute it and/or modify |
5 | * it under the terms of the GNU General Public License as published by | 5 | * it under the terms of the GNU General Public License as published by |
6 | * the Free Software Foundation; either version 2 of the License, or | 6 | * the Free Software Foundation; either version 2 of the License, or |
7 | * (at your option) any later version. | 7 | * (at your option) any later version. |
8 | * | 8 | * |
9 | * This program is distributed in the hope that it will be useful, | 9 | * This program is distributed in the hope that it will be useful, |
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See | 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See |
12 | * the GNU General Public License for more details. | 12 | * the GNU General Public License for more details. |
13 | * | 13 | * |
14 | * You should have received a copy of the GNU General Public License | 14 | * You should have received a copy of the GNU General Public License |
15 | * along with this program; if not, write to the Free Software | 15 | * along with this program; if not, write to the Free Software |
16 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | 16 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
17 | */ | 17 | */ |
18 | #ifndef _H_JFS_DINODE | 18 | #ifndef _H_JFS_DINODE |
diff --git a/fs/jfs/jfs_dmap.c b/fs/jfs/jfs_dmap.c index f05ebb629182..23546c8fd48b 100644 --- a/fs/jfs/jfs_dmap.c +++ b/fs/jfs/jfs_dmap.c | |||
@@ -3,16 +3,16 @@ | |||
3 | * | 3 | * |
4 | * This program is free software; you can redistribute it and/or modify | 4 | * This program is free software; you can redistribute it and/or modify |
5 | * it under the terms of the GNU General Public License as published by | 5 | * it under the terms of the GNU General Public License as published by |
6 | * the Free Software Foundation; either version 2 of the License, or | 6 | * the Free Software Foundation; either version 2 of the License, or |
7 | * (at your option) any later version. | 7 | * (at your option) any later version. |
8 | * | 8 | * |
9 | * This program is distributed in the hope that it will be useful, | 9 | * This program is distributed in the hope that it will be useful, |
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See | 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See |
12 | * the GNU General Public License for more details. | 12 | * the GNU General Public License for more details. |
13 | * | 13 | * |
14 | * You should have received a copy of the GNU General Public License | 14 | * You should have received a copy of the GNU General Public License |
15 | * along with this program; if not, write to the Free Software | 15 | * along with this program; if not, write to the Free Software |
16 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | 16 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
17 | */ | 17 | */ |
18 | 18 | ||
@@ -30,28 +30,28 @@ | |||
30 | * | 30 | * |
31 | * the working state of the block allocation map is accessed in | 31 | * the working state of the block allocation map is accessed in |
32 | * two directions: | 32 | * two directions: |
33 | * | 33 | * |
34 | * 1) allocation and free requests that start at the dmap | 34 | * 1) allocation and free requests that start at the dmap |
35 | * level and move up through the dmap control pages (i.e. | 35 | * level and move up through the dmap control pages (i.e. |
36 | * the vast majority of requests). | 36 | * the vast majority of requests). |
37 | * | 37 | * |
38 | * 2) allocation requests that start at dmap control page | 38 | * 2) allocation requests that start at dmap control page |
39 | * level and work down towards the dmaps. | 39 | * level and work down towards the dmaps. |
40 | * | ||
41 | * the serialization scheme used here is as follows. | ||
42 | * | 40 | * |
43 | * requests which start at the bottom are serialized against each | 41 | * the serialization scheme used here is as follows. |
44 | * other through buffers and each requests holds onto its buffers | 42 | * |
45 | * as it works it way up from a single dmap to the required level | 43 | * requests which start at the bottom are serialized against each |
44 | * other through buffers and each requests holds onto its buffers | ||
45 | * as it works it way up from a single dmap to the required level | ||
46 | * of dmap control page. | 46 | * of dmap control page. |
47 | * requests that start at the top are serialized against each other | 47 | * requests that start at the top are serialized against each other |
48 | * and request that start from the bottom by the multiple read/single | 48 | * and request that start from the bottom by the multiple read/single |
49 | * write inode lock of the bmap inode. requests starting at the top | 49 | * write inode lock of the bmap inode. requests starting at the top |
50 | * take this lock in write mode while request starting at the bottom | 50 | * take this lock in write mode while request starting at the bottom |
51 | * take the lock in read mode. a single top-down request may proceed | 51 | * take the lock in read mode. a single top-down request may proceed |
52 | * exclusively while multiple bottoms-up requests may proceed | 52 | * exclusively while multiple bottoms-up requests may proceed |
53 | * simultaneously (under the protection of busy buffers). | 53 | * simultaneously (under the protection of busy buffers). |
54 | * | 54 | * |
55 | * in addition to information found in dmaps and dmap control pages, | 55 | * in addition to information found in dmaps and dmap control pages, |
56 | * the working state of the block allocation map also includes read/ | 56 | * the working state of the block allocation map also includes read/ |
57 | * write information maintained in the bmap descriptor (i.e. total | 57 | * write information maintained in the bmap descriptor (i.e. total |
@@ -59,7 +59,7 @@ | |||
59 | * a single exclusive lock (BMAP_LOCK) is used to guard this information | 59 | * a single exclusive lock (BMAP_LOCK) is used to guard this information |
60 | * in the face of multiple-bottoms up requests. | 60 | * in the face of multiple-bottoms up requests. |
61 | * (lock ordering: IREAD_LOCK, BMAP_LOCK); | 61 | * (lock ordering: IREAD_LOCK, BMAP_LOCK); |
62 | * | 62 | * |
63 | * accesses to the persistent state of the block allocation map (limited | 63 | * accesses to the persistent state of the block allocation map (limited |
64 | * to the persistent bitmaps in dmaps) is guarded by (busy) buffers. | 64 | * to the persistent bitmaps in dmaps) is guarded by (busy) buffers. |
65 | */ | 65 | */ |
@@ -120,7 +120,7 @@ static int dbGetL2AGSize(s64 nblocks); | |||
120 | /* | 120 | /* |
121 | * buddy table | 121 | * buddy table |
122 | * | 122 | * |
123 | * table used for determining buddy sizes within characters of | 123 | * table used for determining buddy sizes within characters of |
124 | * dmap bitmap words. the characters themselves serve as indexes | 124 | * dmap bitmap words. the characters themselves serve as indexes |
125 | * into the table, with the table elements yielding the maximum | 125 | * into the table, with the table elements yielding the maximum |
126 | * binary buddy of free bits within the character. | 126 | * binary buddy of free bits within the character. |
@@ -146,7 +146,7 @@ static const s8 budtab[256] = { | |||
146 | 146 | ||
147 | 147 | ||
148 | /* | 148 | /* |
149 | * NAME: dbMount() | 149 | * NAME: dbMount() |
150 | * | 150 | * |
151 | * FUNCTION: initializate the block allocation map. | 151 | * FUNCTION: initializate the block allocation map. |
152 | * | 152 | * |
@@ -223,12 +223,12 @@ int dbMount(struct inode *ipbmap) | |||
223 | 223 | ||
224 | 224 | ||
225 | /* | 225 | /* |
226 | * NAME: dbUnmount() | 226 | * NAME: dbUnmount() |
227 | * | 227 | * |
228 | * FUNCTION: terminate the block allocation map in preparation for | 228 | * FUNCTION: terminate the block allocation map in preparation for |
229 | * file system unmount. | 229 | * file system unmount. |
230 | * | 230 | * |
231 | * the in-core bmap descriptor is written to disk and | 231 | * the in-core bmap descriptor is written to disk and |
232 | * the memory for this descriptor is freed. | 232 | * the memory for this descriptor is freed. |
233 | * | 233 | * |
234 | * PARAMETERS: | 234 | * PARAMETERS: |
@@ -311,7 +311,7 @@ int dbSync(struct inode *ipbmap) | |||
311 | 311 | ||
312 | 312 | ||
313 | /* | 313 | /* |
314 | * NAME: dbFree() | 314 | * NAME: dbFree() |
315 | * | 315 | * |
316 | * FUNCTION: free the specified block range from the working block | 316 | * FUNCTION: free the specified block range from the working block |
317 | * allocation map. | 317 | * allocation map. |
@@ -397,7 +397,7 @@ int dbFree(struct inode *ip, s64 blkno, s64 nblocks) | |||
397 | * | 397 | * |
398 | * FUNCTION: update the allocation state (free or allocate) of the | 398 | * FUNCTION: update the allocation state (free or allocate) of the |
399 | * specified block range in the persistent block allocation map. | 399 | * specified block range in the persistent block allocation map. |
400 | * | 400 | * |
401 | * the blocks will be updated in the persistent map one | 401 | * the blocks will be updated in the persistent map one |
402 | * dmap at a time. | 402 | * dmap at a time. |
403 | * | 403 | * |
@@ -475,7 +475,7 @@ dbUpdatePMap(struct inode *ipbmap, | |||
475 | /* update the bits of the dmap words. the first and last | 475 | /* update the bits of the dmap words. the first and last |
476 | * words may only have a subset of their bits updated. if | 476 | * words may only have a subset of their bits updated. if |
477 | * this is the case, we'll work against that word (i.e. | 477 | * this is the case, we'll work against that word (i.e. |
478 | * partial first and/or last) only in a single pass. a | 478 | * partial first and/or last) only in a single pass. a |
479 | * single pass will also be used to update all words that | 479 | * single pass will also be used to update all words that |
480 | * are to have all their bits updated. | 480 | * are to have all their bits updated. |
481 | */ | 481 | */ |
@@ -662,11 +662,11 @@ unlock: | |||
662 | * the block allocation policy uses hints and a multi-step | 662 | * the block allocation policy uses hints and a multi-step |
663 | * approach. | 663 | * approach. |
664 | * | 664 | * |
665 | * for allocation requests smaller than the number of blocks | 665 | * for allocation requests smaller than the number of blocks |
666 | * per dmap, we first try to allocate the new blocks | 666 | * per dmap, we first try to allocate the new blocks |
667 | * immediately following the hint. if these blocks are not | 667 | * immediately following the hint. if these blocks are not |
668 | * available, we try to allocate blocks near the hint. if | 668 | * available, we try to allocate blocks near the hint. if |
669 | * no blocks near the hint are available, we next try to | 669 | * no blocks near the hint are available, we next try to |
670 | * allocate within the same dmap as contains the hint. | 670 | * allocate within the same dmap as contains the hint. |
671 | * | 671 | * |
672 | * if no blocks are available in the dmap or the allocation | 672 | * if no blocks are available in the dmap or the allocation |
@@ -713,7 +713,7 @@ int dbAlloc(struct inode *ip, s64 hint, s64 nblocks, s64 * results) | |||
713 | #endif /* _STILL_TO_PORT */ | 713 | #endif /* _STILL_TO_PORT */ |
714 | 714 | ||
715 | /* get the log2 number of blocks to be allocated. | 715 | /* get the log2 number of blocks to be allocated. |
716 | * if the number of blocks is not a log2 multiple, | 716 | * if the number of blocks is not a log2 multiple, |
717 | * it will be rounded up to the next log2 multiple. | 717 | * it will be rounded up to the next log2 multiple. |
718 | */ | 718 | */ |
719 | l2nb = BLKSTOL2(nblocks); | 719 | l2nb = BLKSTOL2(nblocks); |
@@ -906,7 +906,7 @@ int dbAllocExact(struct inode *ip, s64 blkno, int nblocks) | |||
906 | * validate extent request: | 906 | * validate extent request: |
907 | * | 907 | * |
908 | * note: defragfs policy: | 908 | * note: defragfs policy: |
909 | * max 64 blocks will be moved. | 909 | * max 64 blocks will be moved. |
910 | * allocation request size must be satisfied from a single dmap. | 910 | * allocation request size must be satisfied from a single dmap. |
911 | */ | 911 | */ |
912 | if (nblocks <= 0 || nblocks > BPERDMAP || blkno >= bmp->db_mapsize) { | 912 | if (nblocks <= 0 || nblocks > BPERDMAP || blkno >= bmp->db_mapsize) { |
@@ -1333,7 +1333,7 @@ dbAllocNear(struct bmap * bmp, | |||
1333 | * or two sub-trees, depending on the allocation group size. | 1333 | * or two sub-trees, depending on the allocation group size. |
1334 | * we search the top nodes of these subtrees left to right for | 1334 | * we search the top nodes of these subtrees left to right for |
1335 | * sufficient free space. if sufficient free space is found, | 1335 | * sufficient free space. if sufficient free space is found, |
1336 | * the subtree is searched to find the leftmost leaf that | 1336 | * the subtree is searched to find the leftmost leaf that |
1337 | * has free space. once we have made it to the leaf, we | 1337 | * has free space. once we have made it to the leaf, we |
1338 | * move the search to the next lower level dmap control page | 1338 | * move the search to the next lower level dmap control page |
1339 | * corresponding to this leaf. we continue down the dmap control | 1339 | * corresponding to this leaf. we continue down the dmap control |
@@ -1398,7 +1398,7 @@ dbAllocAG(struct bmap * bmp, int agno, s64 nblocks, int l2nb, s64 * results) | |||
1398 | * that fully describes the allocation group since the allocation | 1398 | * that fully describes the allocation group since the allocation |
1399 | * group is already fully described by a dmap. in this case, we | 1399 | * group is already fully described by a dmap. in this case, we |
1400 | * just call dbAllocCtl() to search the dmap tree and allocate the | 1400 | * just call dbAllocCtl() to search the dmap tree and allocate the |
1401 | * required space if available. | 1401 | * required space if available. |
1402 | * | 1402 | * |
1403 | * if the allocation group is completely free, dbAllocCtl() is | 1403 | * if the allocation group is completely free, dbAllocCtl() is |
1404 | * also called to allocate the required space. this is done for | 1404 | * also called to allocate the required space. this is done for |
@@ -1450,7 +1450,7 @@ dbAllocAG(struct bmap * bmp, int agno, s64 nblocks, int l2nb, s64 * results) | |||
1450 | (1 << (L2LPERCTL - (bmp->db_agheigth << 1))) / bmp->db_agwidth; | 1450 | (1 << (L2LPERCTL - (bmp->db_agheigth << 1))) / bmp->db_agwidth; |
1451 | ti = bmp->db_agstart + bmp->db_agwidth * (agno & (agperlev - 1)); | 1451 | ti = bmp->db_agstart + bmp->db_agwidth * (agno & (agperlev - 1)); |
1452 | 1452 | ||
1453 | /* dmap control page trees fan-out by 4 and a single allocation | 1453 | /* dmap control page trees fan-out by 4 and a single allocation |
1454 | * group may be described by 1 or 2 subtrees within the ag level | 1454 | * group may be described by 1 or 2 subtrees within the ag level |
1455 | * dmap control page, depending upon the ag size. examine the ag's | 1455 | * dmap control page, depending upon the ag size. examine the ag's |
1456 | * subtrees for sufficient free space, starting with the leftmost | 1456 | * subtrees for sufficient free space, starting with the leftmost |
@@ -1633,7 +1633,7 @@ static int dbFindCtl(struct bmap * bmp, int l2nb, int level, s64 * blkno) | |||
1633 | 1633 | ||
1634 | /* starting at the specified dmap control page level and block | 1634 | /* starting at the specified dmap control page level and block |
1635 | * number, search down the dmap control levels for the starting | 1635 | * number, search down the dmap control levels for the starting |
1636 | * block number of a dmap page that contains or starts off | 1636 | * block number of a dmap page that contains or starts off |
1637 | * sufficient free blocks. | 1637 | * sufficient free blocks. |
1638 | */ | 1638 | */ |
1639 | for (lev = level, b = *blkno; lev >= 0; lev--) { | 1639 | for (lev = level, b = *blkno; lev >= 0; lev--) { |
@@ -1677,7 +1677,7 @@ static int dbFindCtl(struct bmap * bmp, int l2nb, int level, s64 * blkno) | |||
1677 | } | 1677 | } |
1678 | 1678 | ||
1679 | /* adjust the block number to reflect the location within | 1679 | /* adjust the block number to reflect the location within |
1680 | * the dmap control page (i.e. the leaf) at which free | 1680 | * the dmap control page (i.e. the leaf) at which free |
1681 | * space was found. | 1681 | * space was found. |
1682 | */ | 1682 | */ |
1683 | b += (((s64) leafidx) << budmin); | 1683 | b += (((s64) leafidx) << budmin); |
@@ -1700,12 +1700,12 @@ static int dbFindCtl(struct bmap * bmp, int l2nb, int level, s64 * blkno) | |||
1700 | * NAME: dbAllocCtl() | 1700 | * NAME: dbAllocCtl() |
1701 | * | 1701 | * |
1702 | * FUNCTION: attempt to allocate a specified number of contiguous | 1702 | * FUNCTION: attempt to allocate a specified number of contiguous |
1703 | * blocks starting within a specific dmap. | 1703 | * blocks starting within a specific dmap. |
1704 | * | 1704 | * |
1705 | * this routine is called by higher level routines that search | 1705 | * this routine is called by higher level routines that search |
1706 | * the dmap control pages above the actual dmaps for contiguous | 1706 | * the dmap control pages above the actual dmaps for contiguous |
1707 | * free space. the result of successful searches by these | 1707 | * free space. the result of successful searches by these |
1708 | * routines are the starting block numbers within dmaps, with | 1708 | * routines are the starting block numbers within dmaps, with |
1709 | * the dmaps themselves containing the desired contiguous free | 1709 | * the dmaps themselves containing the desired contiguous free |
1710 | * space or starting a contiguous free space of desired size | 1710 | * space or starting a contiguous free space of desired size |
1711 | * that is made up of the blocks of one or more dmaps. these | 1711 | * that is made up of the blocks of one or more dmaps. these |
@@ -1872,14 +1872,14 @@ dbAllocCtl(struct bmap * bmp, s64 nblocks, int l2nb, s64 blkno, s64 * results) | |||
1872 | * | 1872 | * |
1873 | * FUNCTION: attempt to allocate a specified number of contiguous blocks | 1873 | * FUNCTION: attempt to allocate a specified number of contiguous blocks |
1874 | * from a specified dmap. | 1874 | * from a specified dmap. |
1875 | * | 1875 | * |
1876 | * this routine checks if the contiguous blocks are available. | 1876 | * this routine checks if the contiguous blocks are available. |
1877 | * if so, nblocks of blocks are allocated; otherwise, ENOSPC is | 1877 | * if so, nblocks of blocks are allocated; otherwise, ENOSPC is |
1878 | * returned. | 1878 | * returned. |
1879 | * | 1879 | * |
1880 | * PARAMETERS: | 1880 | * PARAMETERS: |
1881 | * mp - pointer to bmap descriptor | 1881 | * mp - pointer to bmap descriptor |
1882 | * dp - pointer to dmap to attempt to allocate blocks from. | 1882 | * dp - pointer to dmap to attempt to allocate blocks from. |
1883 | * l2nb - log2 number of contiguous block desired. | 1883 | * l2nb - log2 number of contiguous block desired. |
1884 | * nblocks - actual number of contiguous block desired. | 1884 | * nblocks - actual number of contiguous block desired. |
1885 | * results - on successful return, set to the starting block number | 1885 | * results - on successful return, set to the starting block number |
@@ -1890,7 +1890,7 @@ dbAllocCtl(struct bmap * bmp, s64 nblocks, int l2nb, s64 blkno, s64 * results) | |||
1890 | * -ENOSPC - insufficient disk resources | 1890 | * -ENOSPC - insufficient disk resources |
1891 | * -EIO - i/o error | 1891 | * -EIO - i/o error |
1892 | * | 1892 | * |
1893 | * serialization: IREAD_LOCK(ipbmap), e.g., from dbAlloc(), or | 1893 | * serialization: IREAD_LOCK(ipbmap), e.g., from dbAlloc(), or |
1894 | * IWRITE_LOCK(ipbmap), e.g., dbAllocCtl(), held on entry/exit; | 1894 | * IWRITE_LOCK(ipbmap), e.g., dbAllocCtl(), held on entry/exit; |
1895 | */ | 1895 | */ |
1896 | static int | 1896 | static int |
@@ -2032,7 +2032,7 @@ static int dbFreeDmap(struct bmap * bmp, struct dmap * dp, s64 blkno, | |||
2032 | 2032 | ||
2033 | /* root changed. bubble the change up to the dmap control pages. | 2033 | /* root changed. bubble the change up to the dmap control pages. |
2034 | * if the adjustment of the upper level control pages fails, | 2034 | * if the adjustment of the upper level control pages fails, |
2035 | * backout the deallocation. | 2035 | * backout the deallocation. |
2036 | */ | 2036 | */ |
2037 | if ((rc = dbAdjCtl(bmp, blkno, dp->tree.stree[ROOT], 0, 0))) { | 2037 | if ((rc = dbAdjCtl(bmp, blkno, dp->tree.stree[ROOT], 0, 0))) { |
2038 | word = (blkno & (BPERDMAP - 1)) >> L2DBWORD; | 2038 | word = (blkno & (BPERDMAP - 1)) >> L2DBWORD; |
@@ -2245,7 +2245,7 @@ static int dbFreeBits(struct bmap * bmp, struct dmap * dp, s64 blkno, | |||
2245 | * words (i.e. partial first and/or last) on an individual basis | 2245 | * words (i.e. partial first and/or last) on an individual basis |
2246 | * (a single pass), freeing the bits of interest by hand and updating | 2246 | * (a single pass), freeing the bits of interest by hand and updating |
2247 | * the leaf corresponding to the dmap word. a single pass will be used | 2247 | * the leaf corresponding to the dmap word. a single pass will be used |
2248 | * for all dmap words fully contained within the specified range. | 2248 | * for all dmap words fully contained within the specified range. |
2249 | * within this pass, the bits of all fully contained dmap words will | 2249 | * within this pass, the bits of all fully contained dmap words will |
2250 | * be marked as free in a single shot and the leaves will be updated. a | 2250 | * be marked as free in a single shot and the leaves will be updated. a |
2251 | * single leaf may describe the free space of multiple dmap words, | 2251 | * single leaf may describe the free space of multiple dmap words, |
@@ -2267,7 +2267,7 @@ static int dbFreeBits(struct bmap * bmp, struct dmap * dp, s64 blkno, | |||
2267 | */ | 2267 | */ |
2268 | if (nb < DBWORD) { | 2268 | if (nb < DBWORD) { |
2269 | /* free (zero) the appropriate bits within this | 2269 | /* free (zero) the appropriate bits within this |
2270 | * dmap word. | 2270 | * dmap word. |
2271 | */ | 2271 | */ |
2272 | dp->wmap[word] &= | 2272 | dp->wmap[word] &= |
2273 | cpu_to_le32(~(ONES << (DBWORD - nb) | 2273 | cpu_to_le32(~(ONES << (DBWORD - nb) |
@@ -2327,7 +2327,7 @@ static int dbFreeBits(struct bmap * bmp, struct dmap * dp, s64 blkno, | |||
2327 | 2327 | ||
2328 | BMAP_LOCK(bmp); | 2328 | BMAP_LOCK(bmp); |
2329 | 2329 | ||
2330 | /* update the free count for the allocation group and | 2330 | /* update the free count for the allocation group and |
2331 | * map. | 2331 | * map. |
2332 | */ | 2332 | */ |
2333 | agno = blkno >> bmp->db_agl2size; | 2333 | agno = blkno >> bmp->db_agl2size; |
@@ -2378,7 +2378,7 @@ static int dbFreeBits(struct bmap * bmp, struct dmap * dp, s64 blkno, | |||
2378 | * or deallocation resulted in the root change. this range | 2378 | * or deallocation resulted in the root change. this range |
2379 | * is respresented by a single leaf of the current dmapctl | 2379 | * is respresented by a single leaf of the current dmapctl |
2380 | * and the leaf will be updated with this value, possibly | 2380 | * and the leaf will be updated with this value, possibly |
2381 | * causing a binary buddy system within the leaves to be | 2381 | * causing a binary buddy system within the leaves to be |
2382 | * split or joined. the update may also cause the dmapctl's | 2382 | * split or joined. the update may also cause the dmapctl's |
2383 | * dmtree to be updated. | 2383 | * dmtree to be updated. |
2384 | * | 2384 | * |
@@ -2590,7 +2590,7 @@ static void dbSplit(dmtree_t * tp, int leafno, int splitsz, int newval) | |||
2590 | } | 2590 | } |
2591 | } | 2591 | } |
2592 | 2592 | ||
2593 | /* adjust the dmap tree to reflect the specified leaf's new | 2593 | /* adjust the dmap tree to reflect the specified leaf's new |
2594 | * value. | 2594 | * value. |
2595 | */ | 2595 | */ |
2596 | dbAdjTree(tp, leafno, newval); | 2596 | dbAdjTree(tp, leafno, newval); |
@@ -2638,7 +2638,7 @@ static int dbBackSplit(dmtree_t * tp, int leafno) | |||
2638 | /* the back split is accomplished by iteratively finding the leaf | 2638 | /* the back split is accomplished by iteratively finding the leaf |
2639 | * that starts the buddy system that contains the specified leaf and | 2639 | * that starts the buddy system that contains the specified leaf and |
2640 | * splitting that system in two. this iteration continues until | 2640 | * splitting that system in two. this iteration continues until |
2641 | * the specified leaf becomes the start of a buddy system. | 2641 | * the specified leaf becomes the start of a buddy system. |
2642 | * | 2642 | * |
2643 | * determine maximum possible l2 size for the specified leaf. | 2643 | * determine maximum possible l2 size for the specified leaf. |
2644 | */ | 2644 | */ |
@@ -2853,7 +2853,7 @@ static void dbAdjTree(dmtree_t * tp, int leafno, int newval) | |||
2853 | * NAME: dbFindLeaf() | 2853 | * NAME: dbFindLeaf() |
2854 | * | 2854 | * |
2855 | * FUNCTION: search a dmtree_t for sufficient free blocks, returning | 2855 | * FUNCTION: search a dmtree_t for sufficient free blocks, returning |
2856 | * the index of a leaf describing the free blocks if | 2856 | * the index of a leaf describing the free blocks if |
2857 | * sufficient free blocks are found. | 2857 | * sufficient free blocks are found. |
2858 | * | 2858 | * |
2859 | * the search starts at the top of the dmtree_t tree and | 2859 | * the search starts at the top of the dmtree_t tree and |
@@ -2869,7 +2869,7 @@ static void dbAdjTree(dmtree_t * tp, int leafno, int newval) | |||
2869 | * | 2869 | * |
2870 | * RETURN VALUES: | 2870 | * RETURN VALUES: |
2871 | * 0 - success | 2871 | * 0 - success |
2872 | * -ENOSPC - insufficient free blocks. | 2872 | * -ENOSPC - insufficient free blocks. |
2873 | */ | 2873 | */ |
2874 | static int dbFindLeaf(dmtree_t * tp, int l2nb, int *leafidx) | 2874 | static int dbFindLeaf(dmtree_t * tp, int l2nb, int *leafidx) |
2875 | { | 2875 | { |
@@ -3090,7 +3090,7 @@ static int blkstol2(s64 nb) | |||
3090 | 3090 | ||
3091 | 3091 | ||
3092 | /* | 3092 | /* |
3093 | * NAME: dbAllocBottomUp() | 3093 | * NAME: dbAllocBottomUp() |
3094 | * | 3094 | * |
3095 | * FUNCTION: alloc the specified block range from the working block | 3095 | * FUNCTION: alloc the specified block range from the working block |
3096 | * allocation map. | 3096 | * allocation map. |
@@ -3241,7 +3241,7 @@ static int dbAllocDmapBU(struct bmap * bmp, struct dmap * dp, s64 blkno, | |||
3241 | BMAP_LOCK(bmp); | 3241 | BMAP_LOCK(bmp); |
3242 | 3242 | ||
3243 | /* if this allocation group is completely free, | 3243 | /* if this allocation group is completely free, |
3244 | * update the highest active allocation group number | 3244 | * update the highest active allocation group number |
3245 | * if this allocation group is the new max. | 3245 | * if this allocation group is the new max. |
3246 | */ | 3246 | */ |
3247 | agno = blkno >> bmp->db_agl2size; | 3247 | agno = blkno >> bmp->db_agl2size; |
@@ -3273,7 +3273,7 @@ static int dbAllocDmapBU(struct bmap * bmp, struct dmap * dp, s64 blkno, | |||
3273 | * NAME: dbExtendFS() | 3273 | * NAME: dbExtendFS() |
3274 | * | 3274 | * |
3275 | * FUNCTION: extend bmap from blkno for nblocks; | 3275 | * FUNCTION: extend bmap from blkno for nblocks; |
3276 | * dbExtendFS() updates bmap ready for dbAllocBottomUp(); | 3276 | * dbExtendFS() updates bmap ready for dbAllocBottomUp(); |
3277 | * | 3277 | * |
3278 | * L2 | 3278 | * L2 |
3279 | * | | 3279 | * | |
@@ -3284,7 +3284,7 @@ static int dbAllocDmapBU(struct bmap * bmp, struct dmap * dp, s64 blkno, | |||
3284 | * d0,...,dn d0,...,dn d0,...,dn d0,...,dn d0,...,dn d0,.,dm; | 3284 | * d0,...,dn d0,...,dn d0,...,dn d0,...,dn d0,...,dn d0,.,dm; |
3285 | * L2L1L0d0,...,dnL0d0,...,dnL0d0,...,dnL1L0d0,...,dnL0d0,...,dnL0d0,..dm | 3285 | * L2L1L0d0,...,dnL0d0,...,dnL0d0,...,dnL1L0d0,...,dnL0d0,...,dnL0d0,..dm |
3286 | * | 3286 | * |
3287 | * <---old---><----------------------------extend-----------------------> | 3287 | * <---old---><----------------------------extend-----------------------> |
3288 | */ | 3288 | */ |
3289 | int dbExtendFS(struct inode *ipbmap, s64 blkno, s64 nblocks) | 3289 | int dbExtendFS(struct inode *ipbmap, s64 blkno, s64 nblocks) |
3290 | { | 3290 | { |
@@ -3330,7 +3330,7 @@ int dbExtendFS(struct inode *ipbmap, s64 blkno, s64 nblocks) | |||
3330 | bmp->db_numag += ((u32) newsize % (u32) bmp->db_agsize) ? 1 : 0; | 3330 | bmp->db_numag += ((u32) newsize % (u32) bmp->db_agsize) ? 1 : 0; |
3331 | 3331 | ||
3332 | /* | 3332 | /* |
3333 | * reconfigure db_agfree[] | 3333 | * reconfigure db_agfree[] |
3334 | * from old AG configuration to new AG configuration; | 3334 | * from old AG configuration to new AG configuration; |
3335 | * | 3335 | * |
3336 | * coalesce contiguous k (newAGSize/oldAGSize) AGs; | 3336 | * coalesce contiguous k (newAGSize/oldAGSize) AGs; |
@@ -3491,7 +3491,7 @@ int dbExtendFS(struct inode *ipbmap, s64 blkno, s64 nblocks) | |||
3491 | } /* for each dmap in a L0 */ | 3491 | } /* for each dmap in a L0 */ |
3492 | 3492 | ||
3493 | /* | 3493 | /* |
3494 | * build current L0 page from its leaves, and | 3494 | * build current L0 page from its leaves, and |
3495 | * initialize corresponding parent L1 leaf | 3495 | * initialize corresponding parent L1 leaf |
3496 | */ | 3496 | */ |
3497 | *l1leaf = dbInitDmapCtl(l0dcp, 0, ++i); | 3497 | *l1leaf = dbInitDmapCtl(l0dcp, 0, ++i); |
@@ -3515,7 +3515,7 @@ int dbExtendFS(struct inode *ipbmap, s64 blkno, s64 nblocks) | |||
3515 | } /* for each L0 in a L1 */ | 3515 | } /* for each L0 in a L1 */ |
3516 | 3516 | ||
3517 | /* | 3517 | /* |
3518 | * build current L1 page from its leaves, and | 3518 | * build current L1 page from its leaves, and |
3519 | * initialize corresponding parent L2 leaf | 3519 | * initialize corresponding parent L2 leaf |
3520 | */ | 3520 | */ |
3521 | *l2leaf = dbInitDmapCtl(l1dcp, 1, ++j); | 3521 | *l2leaf = dbInitDmapCtl(l1dcp, 1, ++j); |
@@ -3570,7 +3570,7 @@ void dbFinalizeBmap(struct inode *ipbmap) | |||
3570 | * finalize bmap control page | 3570 | * finalize bmap control page |
3571 | */ | 3571 | */ |
3572 | //finalize: | 3572 | //finalize: |
3573 | /* | 3573 | /* |
3574 | * compute db_agpref: preferred ag to allocate from | 3574 | * compute db_agpref: preferred ag to allocate from |
3575 | * (the leftmost ag with average free space in it); | 3575 | * (the leftmost ag with average free space in it); |
3576 | */ | 3576 | */ |
@@ -3614,9 +3614,9 @@ void dbFinalizeBmap(struct inode *ipbmap) | |||
3614 | 3614 | ||
3615 | /* | 3615 | /* |
3616 | * compute db_aglevel, db_agheigth, db_width, db_agstart: | 3616 | * compute db_aglevel, db_agheigth, db_width, db_agstart: |
3617 | * an ag is covered in aglevel dmapctl summary tree, | 3617 | * an ag is covered in aglevel dmapctl summary tree, |
3618 | * at agheight level height (from leaf) with agwidth number of nodes | 3618 | * at agheight level height (from leaf) with agwidth number of nodes |
3619 | * each, which starts at agstart index node of the smmary tree node | 3619 | * each, which starts at agstart index node of the smmary tree node |
3620 | * array; | 3620 | * array; |
3621 | */ | 3621 | */ |
3622 | bmp->db_aglevel = BMAPSZTOLEV(bmp->db_agsize); | 3622 | bmp->db_aglevel = BMAPSZTOLEV(bmp->db_agsize); |
@@ -3635,13 +3635,13 @@ void dbFinalizeBmap(struct inode *ipbmap) | |||
3635 | 3635 | ||
3636 | /* | 3636 | /* |
3637 | * NAME: dbInitDmap()/ujfs_idmap_page() | 3637 | * NAME: dbInitDmap()/ujfs_idmap_page() |
3638 | * | 3638 | * |
3639 | * FUNCTION: initialize working/persistent bitmap of the dmap page | 3639 | * FUNCTION: initialize working/persistent bitmap of the dmap page |
3640 | * for the specified number of blocks: | 3640 | * for the specified number of blocks: |
3641 | * | 3641 | * |
3642 | * at entry, the bitmaps had been initialized as free (ZEROS); | 3642 | * at entry, the bitmaps had been initialized as free (ZEROS); |
3643 | * The number of blocks will only account for the actually | 3643 | * The number of blocks will only account for the actually |
3644 | * existing blocks. Blocks which don't actually exist in | 3644 | * existing blocks. Blocks which don't actually exist in |
3645 | * the aggregate will be marked as allocated (ONES); | 3645 | * the aggregate will be marked as allocated (ONES); |
3646 | * | 3646 | * |
3647 | * PARAMETERS: | 3647 | * PARAMETERS: |
@@ -3677,7 +3677,7 @@ static int dbInitDmap(struct dmap * dp, s64 Blkno, int nblocks) | |||
3677 | 3677 | ||
3678 | /* | 3678 | /* |
3679 | * free the bits corresponding to the block range (ZEROS): | 3679 | * free the bits corresponding to the block range (ZEROS): |
3680 | * note: not all bits of the first and last words may be contained | 3680 | * note: not all bits of the first and last words may be contained |
3681 | * within the block range. | 3681 | * within the block range. |
3682 | */ | 3682 | */ |
3683 | for (r = nblocks; r > 0; r -= nb, blkno += nb) { | 3683 | for (r = nblocks; r > 0; r -= nb, blkno += nb) { |
@@ -3709,7 +3709,7 @@ static int dbInitDmap(struct dmap * dp, s64 Blkno, int nblocks) | |||
3709 | } | 3709 | } |
3710 | 3710 | ||
3711 | /* | 3711 | /* |
3712 | * mark bits following the range to be freed (non-existing | 3712 | * mark bits following the range to be freed (non-existing |
3713 | * blocks) as allocated (ONES) | 3713 | * blocks) as allocated (ONES) |
3714 | */ | 3714 | */ |
3715 | 3715 | ||
@@ -3741,11 +3741,11 @@ static int dbInitDmap(struct dmap * dp, s64 Blkno, int nblocks) | |||
3741 | 3741 | ||
3742 | /* | 3742 | /* |
3743 | * NAME: dbInitDmapTree()/ujfs_complete_dmap() | 3743 | * NAME: dbInitDmapTree()/ujfs_complete_dmap() |
3744 | * | 3744 | * |
3745 | * FUNCTION: initialize summary tree of the specified dmap: | 3745 | * FUNCTION: initialize summary tree of the specified dmap: |
3746 | * | 3746 | * |
3747 | * at entry, bitmap of the dmap has been initialized; | 3747 | * at entry, bitmap of the dmap has been initialized; |
3748 | * | 3748 | * |
3749 | * PARAMETERS: | 3749 | * PARAMETERS: |
3750 | * dp - dmap to complete | 3750 | * dp - dmap to complete |
3751 | * blkno - starting block number for this dmap | 3751 | * blkno - starting block number for this dmap |
@@ -3769,7 +3769,7 @@ static int dbInitDmapTree(struct dmap * dp) | |||
3769 | 3769 | ||
3770 | /* init each leaf from corresponding wmap word: | 3770 | /* init each leaf from corresponding wmap word: |
3771 | * note: leaf is set to NOFREE(-1) if all blocks of corresponding | 3771 | * note: leaf is set to NOFREE(-1) if all blocks of corresponding |
3772 | * bitmap word are allocated. | 3772 | * bitmap word are allocated. |
3773 | */ | 3773 | */ |
3774 | cp = tp->stree + le32_to_cpu(tp->leafidx); | 3774 | cp = tp->stree + le32_to_cpu(tp->leafidx); |
3775 | for (i = 0; i < LPERDMAP; i++) | 3775 | for (i = 0; i < LPERDMAP; i++) |
@@ -3782,10 +3782,10 @@ static int dbInitDmapTree(struct dmap * dp) | |||
3782 | 3782 | ||
3783 | /* | 3783 | /* |
3784 | * NAME: dbInitTree()/ujfs_adjtree() | 3784 | * NAME: dbInitTree()/ujfs_adjtree() |
3785 | * | 3785 | * |
3786 | * FUNCTION: initialize binary buddy summary tree of a dmap or dmapctl. | 3786 | * FUNCTION: initialize binary buddy summary tree of a dmap or dmapctl. |
3787 | * | 3787 | * |
3788 | * at entry, the leaves of the tree has been initialized | 3788 | * at entry, the leaves of the tree has been initialized |
3789 | * from corresponding bitmap word or root of summary tree | 3789 | * from corresponding bitmap word or root of summary tree |
3790 | * of the child control page; | 3790 | * of the child control page; |
3791 | * configure binary buddy system at the leaf level, then | 3791 | * configure binary buddy system at the leaf level, then |
@@ -3813,15 +3813,15 @@ static int dbInitTree(struct dmaptree * dtp) | |||
3813 | /* | 3813 | /* |
3814 | * configure the leaf levevl into binary buddy system | 3814 | * configure the leaf levevl into binary buddy system |
3815 | * | 3815 | * |
3816 | * Try to combine buddies starting with a buddy size of 1 | 3816 | * Try to combine buddies starting with a buddy size of 1 |
3817 | * (i.e. two leaves). At a buddy size of 1 two buddy leaves | 3817 | * (i.e. two leaves). At a buddy size of 1 two buddy leaves |
3818 | * can be combined if both buddies have a maximum free of l2min; | 3818 | * can be combined if both buddies have a maximum free of l2min; |
3819 | * the combination will result in the left-most buddy leaf having | 3819 | * the combination will result in the left-most buddy leaf having |
3820 | * a maximum free of l2min+1. | 3820 | * a maximum free of l2min+1. |
3821 | * After processing all buddies for a given size, process buddies | 3821 | * After processing all buddies for a given size, process buddies |
3822 | * at the next higher buddy size (i.e. current size * 2) and | 3822 | * at the next higher buddy size (i.e. current size * 2) and |
3823 | * the next maximum free (current free + 1). | 3823 | * the next maximum free (current free + 1). |
3824 | * This continues until the maximum possible buddy combination | 3824 | * This continues until the maximum possible buddy combination |
3825 | * yields maximum free. | 3825 | * yields maximum free. |
3826 | */ | 3826 | */ |
3827 | for (l2free = dtp->budmin, bsize = 1; l2free < l2max; | 3827 | for (l2free = dtp->budmin, bsize = 1; l2free < l2max; |
@@ -3845,10 +3845,10 @@ static int dbInitTree(struct dmaptree * dtp) | |||
3845 | * bubble summary information of leaves up the tree. | 3845 | * bubble summary information of leaves up the tree. |
3846 | * | 3846 | * |
3847 | * Starting at the leaf node level, the four nodes described by | 3847 | * Starting at the leaf node level, the four nodes described by |
3848 | * the higher level parent node are compared for a maximum free and | 3848 | * the higher level parent node are compared for a maximum free and |
3849 | * this maximum becomes the value of the parent node. | 3849 | * this maximum becomes the value of the parent node. |
3850 | * when all lower level nodes are processed in this fashion then | 3850 | * when all lower level nodes are processed in this fashion then |
3851 | * move up to the next level (parent becomes a lower level node) and | 3851 | * move up to the next level (parent becomes a lower level node) and |
3852 | * continue the process for that level. | 3852 | * continue the process for that level. |
3853 | */ | 3853 | */ |
3854 | for (child = le32_to_cpu(dtp->leafidx), | 3854 | for (child = le32_to_cpu(dtp->leafidx), |
@@ -3857,7 +3857,7 @@ static int dbInitTree(struct dmaptree * dtp) | |||
3857 | /* get index of 1st node of parent level */ | 3857 | /* get index of 1st node of parent level */ |
3858 | parent = (child - 1) >> 2; | 3858 | parent = (child - 1) >> 2; |
3859 | 3859 | ||
3860 | /* set the value of the parent node as the maximum | 3860 | /* set the value of the parent node as the maximum |
3861 | * of the four nodes of the current level. | 3861 | * of the four nodes of the current level. |
3862 | */ | 3862 | */ |
3863 | for (i = 0, cp = tp + child, cp1 = tp + parent; | 3863 | for (i = 0, cp = tp + child, cp1 = tp + parent; |
@@ -3885,8 +3885,8 @@ static int dbInitDmapCtl(struct dmapctl * dcp, int level, int i) | |||
3885 | dcp->budmin = L2BPERDMAP + L2LPERCTL * level; | 3885 | dcp->budmin = L2BPERDMAP + L2LPERCTL * level; |
3886 | 3886 | ||
3887 | /* | 3887 | /* |
3888 | * initialize the leaves of current level that were not covered | 3888 | * initialize the leaves of current level that were not covered |
3889 | * by the specified input block range (i.e. the leaves have no | 3889 | * by the specified input block range (i.e. the leaves have no |
3890 | * low level dmapctl or dmap). | 3890 | * low level dmapctl or dmap). |
3891 | */ | 3891 | */ |
3892 | cp = &dcp->stree[CTLLEAFIND + i]; | 3892 | cp = &dcp->stree[CTLLEAFIND + i]; |
@@ -3900,9 +3900,9 @@ static int dbInitDmapCtl(struct dmapctl * dcp, int level, int i) | |||
3900 | 3900 | ||
3901 | /* | 3901 | /* |
3902 | * NAME: dbGetL2AGSize()/ujfs_getagl2size() | 3902 | * NAME: dbGetL2AGSize()/ujfs_getagl2size() |
3903 | * | 3903 | * |
3904 | * FUNCTION: Determine log2(allocation group size) from aggregate size | 3904 | * FUNCTION: Determine log2(allocation group size) from aggregate size |
3905 | * | 3905 | * |
3906 | * PARAMETERS: | 3906 | * PARAMETERS: |
3907 | * nblocks - Number of blocks in aggregate | 3907 | * nblocks - Number of blocks in aggregate |
3908 | * | 3908 | * |
@@ -3935,8 +3935,8 @@ static int dbGetL2AGSize(s64 nblocks) | |||
3935 | 3935 | ||
3936 | /* | 3936 | /* |
3937 | * NAME: dbMapFileSizeToMapSize() | 3937 | * NAME: dbMapFileSizeToMapSize() |
3938 | * | 3938 | * |
3939 | * FUNCTION: compute number of blocks the block allocation map file | 3939 | * FUNCTION: compute number of blocks the block allocation map file |
3940 | * can cover from the map file size; | 3940 | * can cover from the map file size; |
3941 | * | 3941 | * |
3942 | * RETURNS: Number of blocks which can be covered by this block map file; | 3942 | * RETURNS: Number of blocks which can be covered by this block map file; |
@@ -3968,7 +3968,7 @@ s64 dbMapFileSizeToMapSize(struct inode * ipbmap) | |||
3968 | npages = nblocks >> JFS_SBI(sb)->l2nbperpage; | 3968 | npages = nblocks >> JFS_SBI(sb)->l2nbperpage; |
3969 | level = BMAPPGTOLEV(npages); | 3969 | level = BMAPPGTOLEV(npages); |
3970 | 3970 | ||
3971 | /* At each level, accumulate the number of dmap pages covered by | 3971 | /* At each level, accumulate the number of dmap pages covered by |
3972 | * the number of full child levels below it; | 3972 | * the number of full child levels below it; |
3973 | * repeat for the last incomplete child level. | 3973 | * repeat for the last incomplete child level. |
3974 | */ | 3974 | */ |
@@ -3990,7 +3990,7 @@ s64 dbMapFileSizeToMapSize(struct inode * ipbmap) | |||
3990 | npages--; | 3990 | npages--; |
3991 | } | 3991 | } |
3992 | 3992 | ||
3993 | /* convert the number of dmaps into the number of blocks | 3993 | /* convert the number of dmaps into the number of blocks |
3994 | * which can be covered by the dmaps; | 3994 | * which can be covered by the dmaps; |
3995 | */ | 3995 | */ |
3996 | nblocks = ndmaps << L2BPERDMAP; | 3996 | nblocks = ndmaps << L2BPERDMAP; |
diff --git a/fs/jfs/jfs_dmap.h b/fs/jfs/jfs_dmap.h index 8b14cc8e0228..45ea454c74bd 100644 --- a/fs/jfs/jfs_dmap.h +++ b/fs/jfs/jfs_dmap.h | |||
@@ -1,18 +1,18 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (c) International Business Machines Corp., 2000-2002 | 2 | * Copyright (C) International Business Machines Corp., 2000-2002 |
3 | * | 3 | * |
4 | * This program is free software; you can redistribute it and/or modify | 4 | * This program is free software; you can redistribute it and/or modify |
5 | * it under the terms of the GNU General Public License as published by | 5 | * it under the terms of the GNU General Public License as published by |
6 | * the Free Software Foundation; either version 2 of the License, or | 6 | * the Free Software Foundation; either version 2 of the License, or |
7 | * (at your option) any later version. | 7 | * (at your option) any later version. |
8 | * | 8 | * |
9 | * This program is distributed in the hope that it will be useful, | 9 | * This program is distributed in the hope that it will be useful, |
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See | 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See |
12 | * the GNU General Public License for more details. | 12 | * the GNU General Public License for more details. |
13 | * | 13 | * |
14 | * You should have received a copy of the GNU General Public License | 14 | * You should have received a copy of the GNU General Public License |
15 | * along with this program; if not, write to the Free Software | 15 | * along with this program; if not, write to the Free Software |
16 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | 16 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
17 | */ | 17 | */ |
18 | #ifndef _H_JFS_DMAP | 18 | #ifndef _H_JFS_DMAP |
@@ -27,7 +27,7 @@ | |||
27 | #define L2LPERDMAP 8 /* l2 number of leaves per dmap tree */ | 27 | #define L2LPERDMAP 8 /* l2 number of leaves per dmap tree */ |
28 | #define DBWORD 32 /* # of blks covered by a map word */ | 28 | #define DBWORD 32 /* # of blks covered by a map word */ |
29 | #define L2DBWORD 5 /* l2 # of blks covered by a mword */ | 29 | #define L2DBWORD 5 /* l2 # of blks covered by a mword */ |
30 | #define BUDMIN L2DBWORD /* max free string in a map word */ | 30 | #define BUDMIN L2DBWORD /* max free string in a map word */ |
31 | #define BPERDMAP (LPERDMAP * DBWORD) /* num of blks per dmap */ | 31 | #define BPERDMAP (LPERDMAP * DBWORD) /* num of blks per dmap */ |
32 | #define L2BPERDMAP 13 /* l2 num of blks per dmap */ | 32 | #define L2BPERDMAP 13 /* l2 num of blks per dmap */ |
33 | #define CTLTREESIZE (1024+256+64+16+4+1) /* size of a dmapctl tree */ | 33 | #define CTLTREESIZE (1024+256+64+16+4+1) /* size of a dmapctl tree */ |
@@ -57,7 +57,7 @@ | |||
57 | 57 | ||
58 | #define MAXMAPSIZE MAXL2SIZE /* maximum aggregate map size */ | 58 | #define MAXMAPSIZE MAXL2SIZE /* maximum aggregate map size */ |
59 | 59 | ||
60 | /* | 60 | /* |
61 | * determine the maximum free string for four (lower level) nodes | 61 | * determine the maximum free string for four (lower level) nodes |
62 | * of the tree. | 62 | * of the tree. |
63 | */ | 63 | */ |
@@ -122,7 +122,7 @@ static __inline signed char TREEMAX(signed char *cp) | |||
122 | #define BLKTOCTL(b,s,l) \ | 122 | #define BLKTOCTL(b,s,l) \ |
123 | (((l) == 2) ? 1 : ((l) == 1) ? BLKTOL1((b),(s)) : BLKTOL0((b),(s))) | 123 | (((l) == 2) ? 1 : ((l) == 1) ? BLKTOL1((b),(s)) : BLKTOL0((b),(s))) |
124 | 124 | ||
125 | /* | 125 | /* |
126 | * convert aggregate map size to the zero origin dmapctl level of the | 126 | * convert aggregate map size to the zero origin dmapctl level of the |
127 | * top dmapctl. | 127 | * top dmapctl. |
128 | */ | 128 | */ |
@@ -192,13 +192,13 @@ typedef union dmtree { | |||
192 | 192 | ||
193 | /* macros for accessing fields within dmtree */ | 193 | /* macros for accessing fields within dmtree */ |
194 | #define dmt_nleafs t1.nleafs | 194 | #define dmt_nleafs t1.nleafs |
195 | #define dmt_l2nleafs t1.l2nleafs | 195 | #define dmt_l2nleafs t1.l2nleafs |
196 | #define dmt_leafidx t1.leafidx | 196 | #define dmt_leafidx t1.leafidx |
197 | #define dmt_height t1.height | 197 | #define dmt_height t1.height |
198 | #define dmt_budmin t1.budmin | 198 | #define dmt_budmin t1.budmin |
199 | #define dmt_stree t1.stree | 199 | #define dmt_stree t1.stree |
200 | 200 | ||
201 | /* | 201 | /* |
202 | * on-disk aggregate disk allocation map descriptor. | 202 | * on-disk aggregate disk allocation map descriptor. |
203 | */ | 203 | */ |
204 | struct dbmap_disk { | 204 | struct dbmap_disk { |
@@ -237,7 +237,7 @@ struct dbmap { | |||
237 | s64 dn_agsize; /* num of blks per alloc group */ | 237 | s64 dn_agsize; /* num of blks per alloc group */ |
238 | signed char dn_maxfreebud; /* max free buddy system */ | 238 | signed char dn_maxfreebud; /* max free buddy system */ |
239 | }; /* - 4096 - */ | 239 | }; /* - 4096 - */ |
240 | /* | 240 | /* |
241 | * in-memory aggregate disk allocation map descriptor. | 241 | * in-memory aggregate disk allocation map descriptor. |
242 | */ | 242 | */ |
243 | struct bmap { | 243 | struct bmap { |
diff --git a/fs/jfs/jfs_dtree.c b/fs/jfs/jfs_dtree.c index 6c3f08319846..ecb2216d881c 100644 --- a/fs/jfs/jfs_dtree.c +++ b/fs/jfs/jfs_dtree.c | |||
@@ -3,16 +3,16 @@ | |||
3 | * | 3 | * |
4 | * This program is free software; you can redistribute it and/or modify | 4 | * This program is free software; you can redistribute it and/or modify |
5 | * it under the terms of the GNU General Public License as published by | 5 | * it under the terms of the GNU General Public License as published by |
6 | * the Free Software Foundation; either version 2 of the License, or | 6 | * the Free Software Foundation; either version 2 of the License, or |
7 | * (at your option) any later version. | 7 | * (at your option) any later version. |
8 | * | 8 | * |
9 | * This program is distributed in the hope that it will be useful, | 9 | * This program is distributed in the hope that it will be useful, |
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See | 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See |
12 | * the GNU General Public License for more details. | 12 | * the GNU General Public License for more details. |
13 | * | 13 | * |
14 | * You should have received a copy of the GNU General Public License | 14 | * You should have received a copy of the GNU General Public License |
15 | * along with this program; if not, write to the Free Software | 15 | * along with this program; if not, write to the Free Software |
16 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | 16 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
17 | */ | 17 | */ |
18 | 18 | ||
@@ -78,7 +78,7 @@ | |||
78 | * | 78 | * |
79 | * case-insensitive search: | 79 | * case-insensitive search: |
80 | * | 80 | * |
81 | * fold search key; | 81 | * fold search key; |
82 | * | 82 | * |
83 | * case-insensitive search of B-tree: | 83 | * case-insensitive search of B-tree: |
84 | * for internal entry, router key is already folded; | 84 | * for internal entry, router key is already folded; |
@@ -93,7 +93,7 @@ | |||
93 | * else | 93 | * else |
94 | * return no match; | 94 | * return no match; |
95 | * | 95 | * |
96 | * serialization: | 96 | * serialization: |
97 | * target directory inode lock is being held on entry/exit | 97 | * target directory inode lock is being held on entry/exit |
98 | * of all main directory service routines. | 98 | * of all main directory service routines. |
99 | * | 99 | * |
@@ -925,7 +925,7 @@ int dtInsert(tid_t tid, struct inode *ip, | |||
925 | * | 925 | * |
926 | * return: 0 - success; | 926 | * return: 0 - success; |
927 | * errno - failure; | 927 | * errno - failure; |
928 | * leaf page unpinned; | 928 | * leaf page unpinned; |
929 | */ | 929 | */ |
930 | static int dtSplitUp(tid_t tid, | 930 | static int dtSplitUp(tid_t tid, |
931 | struct inode *ip, struct dtsplit * split, struct btstack * btstack) | 931 | struct inode *ip, struct dtsplit * split, struct btstack * btstack) |
@@ -3767,7 +3767,7 @@ static int ciCompare(struct component_name * key, /* search key */ | |||
3767 | * across page boundary | 3767 | * across page boundary |
3768 | * | 3768 | * |
3769 | * return: non-zero on error | 3769 | * return: non-zero on error |
3770 | * | 3770 | * |
3771 | */ | 3771 | */ |
3772 | static int ciGetLeafPrefixKey(dtpage_t * lp, int li, dtpage_t * rp, | 3772 | static int ciGetLeafPrefixKey(dtpage_t * lp, int li, dtpage_t * rp, |
3773 | int ri, struct component_name * key, int flag) | 3773 | int ri, struct component_name * key, int flag) |
@@ -3780,13 +3780,13 @@ static int ciGetLeafPrefixKey(dtpage_t * lp, int li, dtpage_t * rp, | |||
3780 | lkey.name = (wchar_t *) kmalloc((JFS_NAME_MAX + 1) * sizeof(wchar_t), | 3780 | lkey.name = (wchar_t *) kmalloc((JFS_NAME_MAX + 1) * sizeof(wchar_t), |
3781 | GFP_KERNEL); | 3781 | GFP_KERNEL); |
3782 | if (lkey.name == NULL) | 3782 | if (lkey.name == NULL) |
3783 | return -ENOSPC; | 3783 | return -ENOMEM; |
3784 | 3784 | ||
3785 | rkey.name = (wchar_t *) kmalloc((JFS_NAME_MAX + 1) * sizeof(wchar_t), | 3785 | rkey.name = (wchar_t *) kmalloc((JFS_NAME_MAX + 1) * sizeof(wchar_t), |
3786 | GFP_KERNEL); | 3786 | GFP_KERNEL); |
3787 | if (rkey.name == NULL) { | 3787 | if (rkey.name == NULL) { |
3788 | kfree(lkey.name); | 3788 | kfree(lkey.name); |
3789 | return -ENOSPC; | 3789 | return -ENOMEM; |
3790 | } | 3790 | } |
3791 | 3791 | ||
3792 | /* get left and right key */ | 3792 | /* get left and right key */ |
diff --git a/fs/jfs/jfs_dtree.h b/fs/jfs/jfs_dtree.h index 13e4fdf07724..af8513f78648 100644 --- a/fs/jfs/jfs_dtree.h +++ b/fs/jfs/jfs_dtree.h | |||
@@ -1,18 +1,18 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (c) International Business Machines Corp., 2000-2002 | 2 | * Copyright (C) International Business Machines Corp., 2000-2002 |
3 | * | 3 | * |
4 | * This program is free software; you can redistribute it and/or modify | 4 | * This program is free software; you can redistribute it and/or modify |
5 | * it under the terms of the GNU General Public License as published by | 5 | * it under the terms of the GNU General Public License as published by |
6 | * the Free Software Foundation; either version 2 of the License, or | 6 | * the Free Software Foundation; either version 2 of the License, or |
7 | * (at your option) any later version. | 7 | * (at your option) any later version. |
8 | * | 8 | * |
9 | * This program is distributed in the hope that it will be useful, | 9 | * This program is distributed in the hope that it will be useful, |
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See | 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See |
12 | * the GNU General Public License for more details. | 12 | * the GNU General Public License for more details. |
13 | * | 13 | * |
14 | * You should have received a copy of the GNU General Public License | 14 | * You should have received a copy of the GNU General Public License |
15 | * along with this program; if not, write to the Free Software | 15 | * along with this program; if not, write to the Free Software |
16 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | 16 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
17 | */ | 17 | */ |
18 | #ifndef _H_JFS_DTREE | 18 | #ifndef _H_JFS_DTREE |
@@ -80,7 +80,7 @@ struct idtentry { | |||
80 | /* | 80 | /* |
81 | * leaf node entry head/only segment | 81 | * leaf node entry head/only segment |
82 | * | 82 | * |
83 | * For legacy filesystems, name contains 13 wchars -- no index field | 83 | * For legacy filesystems, name contains 13 wchars -- no index field |
84 | */ | 84 | */ |
85 | struct ldtentry { | 85 | struct ldtentry { |
86 | __le32 inumber; /* 4: 4-byte aligned */ | 86 | __le32 inumber; /* 4: 4-byte aligned */ |
diff --git a/fs/jfs/jfs_extent.c b/fs/jfs/jfs_extent.c index 933b7457bfbd..a35bdca6a805 100644 --- a/fs/jfs/jfs_extent.c +++ b/fs/jfs/jfs_extent.c | |||
@@ -3,16 +3,16 @@ | |||
3 | * | 3 | * |
4 | * This program is free software; you can redistribute it and/or modify | 4 | * This program is free software; you can redistribute it and/or modify |
5 | * it under the terms of the GNU General Public License as published by | 5 | * it under the terms of the GNU General Public License as published by |
6 | * the Free Software Foundation; either version 2 of the License, or | 6 | * the Free Software Foundation; either version 2 of the License, or |
7 | * (at your option) any later version. | 7 | * (at your option) any later version. |
8 | * | 8 | * |
9 | * This program is distributed in the hope that it will be useful, | 9 | * This program is distributed in the hope that it will be useful, |
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See | 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See |
12 | * the GNU General Public License for more details. | 12 | * the GNU General Public License for more details. |
13 | * | 13 | * |
14 | * You should have received a copy of the GNU General Public License | 14 | * You should have received a copy of the GNU General Public License |
15 | * along with this program; if not, write to the Free Software | 15 | * along with this program; if not, write to the Free Software |
16 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | 16 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
17 | */ | 17 | */ |
18 | 18 | ||
@@ -125,7 +125,7 @@ extAlloc(struct inode *ip, s64 xlen, s64 pno, xad_t * xp, bool abnr) | |||
125 | } | 125 | } |
126 | 126 | ||
127 | /* allocate the disk blocks for the extent. initially, extBalloc() | 127 | /* allocate the disk blocks for the extent. initially, extBalloc() |
128 | * will try to allocate disk blocks for the requested size (xlen). | 128 | * will try to allocate disk blocks for the requested size (xlen). |
129 | * if this fails (xlen contiguous free blocks not avaliable), it'll | 129 | * if this fails (xlen contiguous free blocks not avaliable), it'll |
130 | * try to allocate a smaller number of blocks (producing a smaller | 130 | * try to allocate a smaller number of blocks (producing a smaller |
131 | * extent), with this smaller number of blocks consisting of the | 131 | * extent), with this smaller number of blocks consisting of the |
@@ -150,7 +150,7 @@ extAlloc(struct inode *ip, s64 xlen, s64 pno, xad_t * xp, bool abnr) | |||
150 | /* determine the value of the extent flag */ | 150 | /* determine the value of the extent flag */ |
151 | xflag = abnr ? XAD_NOTRECORDED : 0; | 151 | xflag = abnr ? XAD_NOTRECORDED : 0; |
152 | 152 | ||
153 | /* if we can extend the hint extent to cover the current request, | 153 | /* if we can extend the hint extent to cover the current request, |
154 | * extend it. otherwise, insert a new extent to | 154 | * extend it. otherwise, insert a new extent to |
155 | * cover the current request. | 155 | * cover the current request. |
156 | */ | 156 | */ |
@@ -159,7 +159,7 @@ extAlloc(struct inode *ip, s64 xlen, s64 pno, xad_t * xp, bool abnr) | |||
159 | else | 159 | else |
160 | rc = xtInsert(0, ip, xflag, xoff, (int) nxlen, &nxaddr, 0); | 160 | rc = xtInsert(0, ip, xflag, xoff, (int) nxlen, &nxaddr, 0); |
161 | 161 | ||
162 | /* if the extend or insert failed, | 162 | /* if the extend or insert failed, |
163 | * free the newly allocated blocks and return the error. | 163 | * free the newly allocated blocks and return the error. |
164 | */ | 164 | */ |
165 | if (rc) { | 165 | if (rc) { |
@@ -235,7 +235,7 @@ int extRealloc(struct inode *ip, s64 nxlen, xad_t * xp, bool abnr) | |||
235 | xoff = offsetXAD(xp); | 235 | xoff = offsetXAD(xp); |
236 | 236 | ||
237 | /* if the extend page is abnr and if the request is for | 237 | /* if the extend page is abnr and if the request is for |
238 | * the extent to be allocated and recorded, | 238 | * the extent to be allocated and recorded, |
239 | * make the page allocated and recorded. | 239 | * make the page allocated and recorded. |
240 | */ | 240 | */ |
241 | if ((xp->flag & XAD_NOTRECORDED) && !abnr) { | 241 | if ((xp->flag & XAD_NOTRECORDED) && !abnr) { |
@@ -397,7 +397,7 @@ int extHint(struct inode *ip, s64 offset, xad_t * xp) | |||
397 | if ((rc = xtLookupList(ip, &lxdl, &xadl, 0))) | 397 | if ((rc = xtLookupList(ip, &lxdl, &xadl, 0))) |
398 | return (rc); | 398 | return (rc); |
399 | 399 | ||
400 | /* check if not extent exists for the previous page. | 400 | /* check if not extent exists for the previous page. |
401 | * this is possible for sparse files. | 401 | * this is possible for sparse files. |
402 | */ | 402 | */ |
403 | if (xadl.nxad == 0) { | 403 | if (xadl.nxad == 0) { |
@@ -410,7 +410,7 @@ int extHint(struct inode *ip, s64 offset, xad_t * xp) | |||
410 | */ | 410 | */ |
411 | xp->flag &= XAD_NOTRECORDED; | 411 | xp->flag &= XAD_NOTRECORDED; |
412 | 412 | ||
413 | if(xadl.nxad != 1 || lengthXAD(xp) != nbperpage) { | 413 | if(xadl.nxad != 1 || lengthXAD(xp) != nbperpage) { |
414 | jfs_error(ip->i_sb, "extHint: corrupt xtree"); | 414 | jfs_error(ip->i_sb, "extHint: corrupt xtree"); |
415 | return -EIO; | 415 | return -EIO; |
416 | } | 416 | } |
@@ -492,7 +492,7 @@ int extFill(struct inode *ip, xad_t * xp) | |||
492 | * FUNCTION: allocate disk blocks to form an extent. | 492 | * FUNCTION: allocate disk blocks to form an extent. |
493 | * | 493 | * |
494 | * initially, we will try to allocate disk blocks for the | 494 | * initially, we will try to allocate disk blocks for the |
495 | * requested size (nblocks). if this fails (nblocks | 495 | * requested size (nblocks). if this fails (nblocks |
496 | * contiguous free blocks not avaliable), we'll try to allocate | 496 | * contiguous free blocks not avaliable), we'll try to allocate |
497 | * a smaller number of blocks (producing a smaller extent), with | 497 | * a smaller number of blocks (producing a smaller extent), with |
498 | * this smaller number of blocks consisting of the requested | 498 | * this smaller number of blocks consisting of the requested |
@@ -500,7 +500,7 @@ int extFill(struct inode *ip, xad_t * xp) | |||
500 | * number (i.e. 16 -> 8). we'll continue to round down and | 500 | * number (i.e. 16 -> 8). we'll continue to round down and |
501 | * retry the allocation until the number of blocks to allocate | 501 | * retry the allocation until the number of blocks to allocate |
502 | * is smaller than the number of blocks per page. | 502 | * is smaller than the number of blocks per page. |
503 | * | 503 | * |
504 | * PARAMETERS: | 504 | * PARAMETERS: |
505 | * ip - the inode of the file. | 505 | * ip - the inode of the file. |
506 | * hint - disk block number to be used as an allocation hint. | 506 | * hint - disk block number to be used as an allocation hint. |
@@ -509,7 +509,7 @@ int extFill(struct inode *ip, xad_t * xp) | |||
509 | * exit, this value is set to the number of blocks actually | 509 | * exit, this value is set to the number of blocks actually |
510 | * allocated. | 510 | * allocated. |
511 | * blkno - pointer to a block address that is filled in on successful | 511 | * blkno - pointer to a block address that is filled in on successful |
512 | * return with the starting block number of the newly | 512 | * return with the starting block number of the newly |
513 | * allocated block range. | 513 | * allocated block range. |
514 | * | 514 | * |
515 | * RETURN VALUES: | 515 | * RETURN VALUES: |
@@ -530,7 +530,7 @@ extBalloc(struct inode *ip, s64 hint, s64 * nblocks, s64 * blkno) | |||
530 | /* get the number of blocks to initially attempt to allocate. | 530 | /* get the number of blocks to initially attempt to allocate. |
531 | * we'll first try the number of blocks requested unless this | 531 | * we'll first try the number of blocks requested unless this |
532 | * number is greater than the maximum number of contiguous free | 532 | * number is greater than the maximum number of contiguous free |
533 | * blocks in the map. in that case, we'll start off with the | 533 | * blocks in the map. in that case, we'll start off with the |
534 | * maximum free. | 534 | * maximum free. |
535 | */ | 535 | */ |
536 | max = (s64) 1 << bmp->db_maxfreebud; | 536 | max = (s64) 1 << bmp->db_maxfreebud; |
@@ -582,19 +582,19 @@ extBalloc(struct inode *ip, s64 hint, s64 * nblocks, s64 * blkno) | |||
582 | * | 582 | * |
583 | * FUNCTION: attempt to extend an extent's allocation. | 583 | * FUNCTION: attempt to extend an extent's allocation. |
584 | * | 584 | * |
585 | * initially, we will try to extend the extent's allocation | 585 | * Initially, we will try to extend the extent's allocation |
586 | * in place. if this fails, we'll try to move the extent | 586 | * in place. If this fails, we'll try to move the extent |
587 | * to a new set of blocks. if moving the extent, we initially | 587 | * to a new set of blocks. If moving the extent, we initially |
588 | * will try to allocate disk blocks for the requested size | 588 | * will try to allocate disk blocks for the requested size |
589 | * (nnew). if this fails (new contiguous free blocks not | 589 | * (newnblks). if this fails (new contiguous free blocks not |
590 | * avaliable), we'll try to allocate a smaller number of | 590 | * avaliable), we'll try to allocate a smaller number of |
591 | * blocks (producing a smaller extent), with this smaller | 591 | * blocks (producing a smaller extent), with this smaller |
592 | * number of blocks consisting of the requested number of | 592 | * number of blocks consisting of the requested number of |
593 | * blocks rounded down to the next smaller power of 2 | 593 | * blocks rounded down to the next smaller power of 2 |
594 | * number (i.e. 16 -> 8). we'll continue to round down and | 594 | * number (i.e. 16 -> 8). We'll continue to round down and |
595 | * retry the allocation until the number of blocks to allocate | 595 | * retry the allocation until the number of blocks to allocate |
596 | * is smaller than the number of blocks per page. | 596 | * is smaller than the number of blocks per page. |
597 | * | 597 | * |
598 | * PARAMETERS: | 598 | * PARAMETERS: |
599 | * ip - the inode of the file. | 599 | * ip - the inode of the file. |
600 | * blkno - starting block number of the extents current allocation. | 600 | * blkno - starting block number of the extents current allocation. |
@@ -625,7 +625,7 @@ extBrealloc(struct inode *ip, | |||
625 | return (rc); | 625 | return (rc); |
626 | } | 626 | } |
627 | 627 | ||
628 | /* in place extension not possible. | 628 | /* in place extension not possible. |
629 | * try to move the extent to a new set of blocks. | 629 | * try to move the extent to a new set of blocks. |
630 | */ | 630 | */ |
631 | return (extBalloc(ip, blkno, newnblks, newblkno)); | 631 | return (extBalloc(ip, blkno, newnblks, newblkno)); |
diff --git a/fs/jfs/jfs_extent.h b/fs/jfs/jfs_extent.h index 3a7f3f22e989..b567e12c52d3 100644 --- a/fs/jfs/jfs_extent.h +++ b/fs/jfs/jfs_extent.h | |||
@@ -1,18 +1,18 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (c) International Business Machines Corp., 2000-2001 | 2 | * Copyright (C) International Business Machines Corp., 2000-2001 |
3 | * | 3 | * |
4 | * This program is free software; you can redistribute it and/or modify | 4 | * This program is free software; you can redistribute it and/or modify |
5 | * it under the terms of the GNU General Public License as published by | 5 | * it under the terms of the GNU General Public License as published by |
6 | * the Free Software Foundation; either version 2 of the License, or | 6 | * the Free Software Foundation; either version 2 of the License, or |
7 | * (at your option) any later version. | 7 | * (at your option) any later version. |
8 | * | 8 | * |
9 | * This program is distributed in the hope that it will be useful, | 9 | * This program is distributed in the hope that it will be useful, |
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See | 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See |
12 | * the GNU General Public License for more details. | 12 | * the GNU General Public License for more details. |
13 | * | 13 | * |
14 | * You should have received a copy of the GNU General Public License | 14 | * You should have received a copy of the GNU General Public License |
15 | * along with this program; if not, write to the Free Software | 15 | * along with this program; if not, write to the Free Software |
16 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | 16 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
17 | */ | 17 | */ |
18 | #ifndef _H_JFS_EXTENT | 18 | #ifndef _H_JFS_EXTENT |
diff --git a/fs/jfs/jfs_filsys.h b/fs/jfs/jfs_filsys.h index 72a5588faeca..9901928668cf 100644 --- a/fs/jfs/jfs_filsys.h +++ b/fs/jfs/jfs_filsys.h | |||
@@ -3,16 +3,16 @@ | |||
3 | * | 3 | * |
4 | * This program is free software; you can redistribute it and/or modify | 4 | * This program is free software; you can redistribute it and/or modify |
5 | * it under the terms of the GNU General Public License as published by | 5 | * it under the terms of the GNU General Public License as published by |
6 | * the Free Software Foundation; either version 2 of the License, or | 6 | * the Free Software Foundation; either version 2 of the License, or |
7 | * (at your option) any later version. | 7 | * (at your option) any later version. |
8 | * | 8 | * |
9 | * This program is distributed in the hope that it will be useful, | 9 | * This program is distributed in the hope that it will be useful, |
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See | 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See |
12 | * the GNU General Public License for more details. | 12 | * the GNU General Public License for more details. |
13 | * | 13 | * |
14 | * You should have received a copy of the GNU General Public License | 14 | * You should have received a copy of the GNU General Public License |
15 | * along with this program; if not, write to the Free Software | 15 | * along with this program; if not, write to the Free Software |
16 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | 16 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
17 | */ | 17 | */ |
18 | #ifndef _H_JFS_FILSYS | 18 | #ifndef _H_JFS_FILSYS |
@@ -21,9 +21,9 @@ | |||
21 | /* | 21 | /* |
22 | * jfs_filsys.h | 22 | * jfs_filsys.h |
23 | * | 23 | * |
24 | * file system (implementation-dependent) constants | 24 | * file system (implementation-dependent) constants |
25 | * | 25 | * |
26 | * refer to <limits.h> for system wide implementation-dependent constants | 26 | * refer to <limits.h> for system wide implementation-dependent constants |
27 | */ | 27 | */ |
28 | 28 | ||
29 | /* | 29 | /* |
@@ -49,7 +49,7 @@ | |||
49 | 49 | ||
50 | #define JFS_DFS 0x20000000 /* DCE DFS LFS support */ | 50 | #define JFS_DFS 0x20000000 /* DCE DFS LFS support */ |
51 | 51 | ||
52 | #define JFS_LINUX 0x10000000 /* Linux support */ | 52 | #define JFS_LINUX 0x10000000 /* Linux support */ |
53 | /* case-sensitive name/directory support */ | 53 | /* case-sensitive name/directory support */ |
54 | 54 | ||
55 | /* directory option */ | 55 | /* directory option */ |
@@ -59,7 +59,7 @@ | |||
59 | #define JFS_COMMIT 0x00000f00 /* commit option mask */ | 59 | #define JFS_COMMIT 0x00000f00 /* commit option mask */ |
60 | #define JFS_GROUPCOMMIT 0x00000100 /* group (of 1) commit */ | 60 | #define JFS_GROUPCOMMIT 0x00000100 /* group (of 1) commit */ |
61 | #define JFS_LAZYCOMMIT 0x00000200 /* lazy commit */ | 61 | #define JFS_LAZYCOMMIT 0x00000200 /* lazy commit */ |
62 | #define JFS_TMPFS 0x00000400 /* temporary file system - | 62 | #define JFS_TMPFS 0x00000400 /* temporary file system - |
63 | * do not log/commit: | 63 | * do not log/commit: |
64 | */ | 64 | */ |
65 | 65 | ||
@@ -196,7 +196,7 @@ | |||
196 | * followed by 1st extent of map | 196 | * followed by 1st extent of map |
197 | */ | 197 | */ |
198 | #define AITBL_OFF (AIMAP_OFF + (SIZE_OF_MAP_PAGE << 1)) | 198 | #define AITBL_OFF (AIMAP_OFF + (SIZE_OF_MAP_PAGE << 1)) |
199 | /* | 199 | /* |
200 | * 1st extent of aggregate inode table | 200 | * 1st extent of aggregate inode table |
201 | */ | 201 | */ |
202 | #define SUPER2_OFF (AITBL_OFF + INODE_EXTENT_SIZE) | 202 | #define SUPER2_OFF (AITBL_OFF + INODE_EXTENT_SIZE) |
@@ -270,13 +270,13 @@ | |||
270 | */ | 270 | */ |
271 | #define FM_CLEAN 0x00000000 /* file system is unmounted and clean */ | 271 | #define FM_CLEAN 0x00000000 /* file system is unmounted and clean */ |
272 | #define FM_MOUNT 0x00000001 /* file system is mounted cleanly */ | 272 | #define FM_MOUNT 0x00000001 /* file system is mounted cleanly */ |
273 | #define FM_DIRTY 0x00000002 /* file system was not unmounted and clean | 273 | #define FM_DIRTY 0x00000002 /* file system was not unmounted and clean |
274 | * when mounted or | 274 | * when mounted or |
275 | * commit failure occurred while being mounted: | 275 | * commit failure occurred while being mounted: |
276 | * fsck() must be run to repair | 276 | * fsck() must be run to repair |
277 | */ | 277 | */ |
278 | #define FM_LOGREDO 0x00000004 /* log based recovery (logredo()) failed: | 278 | #define FM_LOGREDO 0x00000004 /* log based recovery (logredo()) failed: |
279 | * fsck() must be run to repair | 279 | * fsck() must be run to repair |
280 | */ | 280 | */ |
281 | #define FM_EXTENDFS 0x00000008 /* file system extendfs() in progress */ | 281 | #define FM_EXTENDFS 0x00000008 /* file system extendfs() in progress */ |
282 | 282 | ||
diff --git a/fs/jfs/jfs_imap.c b/fs/jfs/jfs_imap.c index a45ee2489580..489a3d63002d 100644 --- a/fs/jfs/jfs_imap.c +++ b/fs/jfs/jfs_imap.c | |||
@@ -3,16 +3,16 @@ | |||
3 | * | 3 | * |
4 | * This program is free software; you can redistribute it and/or modify | 4 | * This program is free software; you can redistribute it and/or modify |
5 | * it under the terms of the GNU General Public License as published by | 5 | * it under the terms of the GNU General Public License as published by |
6 | * the Free Software Foundation; either version 2 of the License, or | 6 | * the Free Software Foundation; either version 2 of the License, or |
7 | * (at your option) any later version. | 7 | * (at your option) any later version. |
8 | * | 8 | * |
9 | * This program is distributed in the hope that it will be useful, | 9 | * This program is distributed in the hope that it will be useful, |
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See | 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See |
12 | * the GNU General Public License for more details. | 12 | * the GNU General Public License for more details. |
13 | * | 13 | * |
14 | * You should have received a copy of the GNU General Public License | 14 | * You should have received a copy of the GNU General Public License |
15 | * along with this program; if not, write to the Free Software | 15 | * along with this program; if not, write to the Free Software |
16 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | 16 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
17 | */ | 17 | */ |
18 | 18 | ||
@@ -98,7 +98,7 @@ static void copy_to_dinode(struct dinode *, struct inode *); | |||
98 | * FUNCTION: initialize the incore inode map control structures for | 98 | * FUNCTION: initialize the incore inode map control structures for |
99 | * a fileset or aggregate init time. | 99 | * a fileset or aggregate init time. |
100 | * | 100 | * |
101 | * the inode map's control structure (dinomap) is | 101 | * the inode map's control structure (dinomap) is |
102 | * brought in from disk and placed in virtual memory. | 102 | * brought in from disk and placed in virtual memory. |
103 | * | 103 | * |
104 | * PARAMETERS: | 104 | * PARAMETERS: |
@@ -107,7 +107,7 @@ static void copy_to_dinode(struct dinode *, struct inode *); | |||
107 | * RETURN VALUES: | 107 | * RETURN VALUES: |
108 | * 0 - success | 108 | * 0 - success |
109 | * -ENOMEM - insufficient free virtual memory. | 109 | * -ENOMEM - insufficient free virtual memory. |
110 | * -EIO - i/o error. | 110 | * -EIO - i/o error. |
111 | */ | 111 | */ |
112 | int diMount(struct inode *ipimap) | 112 | int diMount(struct inode *ipimap) |
113 | { | 113 | { |
@@ -191,7 +191,7 @@ int diMount(struct inode *ipimap) | |||
191 | * RETURN VALUES: | 191 | * RETURN VALUES: |
192 | * 0 - success | 192 | * 0 - success |
193 | * -ENOMEM - insufficient free virtual memory. | 193 | * -ENOMEM - insufficient free virtual memory. |
194 | * -EIO - i/o error. | 194 | * -EIO - i/o error. |
195 | */ | 195 | */ |
196 | int diUnmount(struct inode *ipimap, int mounterror) | 196 | int diUnmount(struct inode *ipimap, int mounterror) |
197 | { | 197 | { |
@@ -281,7 +281,7 @@ int diSync(struct inode *ipimap) | |||
281 | * on entry, the specifed incore inode should itself | 281 | * on entry, the specifed incore inode should itself |
282 | * specify the disk inode number corresponding to the | 282 | * specify the disk inode number corresponding to the |
283 | * incore inode (i.e. i_number should be initialized). | 283 | * incore inode (i.e. i_number should be initialized). |
284 | * | 284 | * |
285 | * this routine handles incore inode initialization for | 285 | * this routine handles incore inode initialization for |
286 | * both "special" and "regular" inodes. special inodes | 286 | * both "special" and "regular" inodes. special inodes |
287 | * are those required early in the mount process and | 287 | * are those required early in the mount process and |
@@ -289,7 +289,7 @@ int diSync(struct inode *ipimap) | |||
289 | * is not yet initialized. these "special" inodes are | 289 | * is not yet initialized. these "special" inodes are |
290 | * identified by a NULL inode map inode pointer and are | 290 | * identified by a NULL inode map inode pointer and are |
291 | * actually initialized by a call to diReadSpecial(). | 291 | * actually initialized by a call to diReadSpecial(). |
292 | * | 292 | * |
293 | * for regular inodes, the iag describing the disk inode | 293 | * for regular inodes, the iag describing the disk inode |
294 | * is read from disk to determine the inode extent address | 294 | * is read from disk to determine the inode extent address |
295 | * for the disk inode. with the inode extent address in | 295 | * for the disk inode. with the inode extent address in |
@@ -302,9 +302,9 @@ int diSync(struct inode *ipimap) | |||
302 | * | 302 | * |
303 | * RETURN VALUES: | 303 | * RETURN VALUES: |
304 | * 0 - success | 304 | * 0 - success |
305 | * -EIO - i/o error. | 305 | * -EIO - i/o error. |
306 | * -ENOMEM - insufficient memory | 306 | * -ENOMEM - insufficient memory |
307 | * | 307 | * |
308 | */ | 308 | */ |
309 | int diRead(struct inode *ip) | 309 | int diRead(struct inode *ip) |
310 | { | 310 | { |
@@ -586,14 +586,14 @@ void diFreeSpecial(struct inode *ip) | |||
586 | * page of the extent that contains the disk inode is | 586 | * page of the extent that contains the disk inode is |
587 | * read and the disk inode portion of the incore inode | 587 | * read and the disk inode portion of the incore inode |
588 | * is copied to the disk inode. | 588 | * is copied to the disk inode. |
589 | * | 589 | * |
590 | * PARAMETERS: | 590 | * PARAMETERS: |
591 | * tid - transacation id | 591 | * tid - transacation id |
592 | * ip - pointer to incore inode to be written to the inode extent. | 592 | * ip - pointer to incore inode to be written to the inode extent. |
593 | * | 593 | * |
594 | * RETURN VALUES: | 594 | * RETURN VALUES: |
595 | * 0 - success | 595 | * 0 - success |
596 | * -EIO - i/o error. | 596 | * -EIO - i/o error. |
597 | */ | 597 | */ |
598 | int diWrite(tid_t tid, struct inode *ip) | 598 | int diWrite(tid_t tid, struct inode *ip) |
599 | { | 599 | { |
@@ -676,11 +676,11 @@ int diWrite(tid_t tid, struct inode *ip) | |||
676 | * copy btree root from in-memory inode to on-disk inode | 676 | * copy btree root from in-memory inode to on-disk inode |
677 | * | 677 | * |
678 | * (tlock is taken from inline B+-tree root in in-memory | 678 | * (tlock is taken from inline B+-tree root in in-memory |
679 | * inode when the B+-tree root is updated, which is pointed | 679 | * inode when the B+-tree root is updated, which is pointed |
680 | * by jfs_ip->blid as well as being on tx tlock list) | 680 | * by jfs_ip->blid as well as being on tx tlock list) |
681 | * | 681 | * |
682 | * further processing of btree root is based on the copy | 682 | * further processing of btree root is based on the copy |
683 | * in in-memory inode, where txLog() will log from, and, | 683 | * in in-memory inode, where txLog() will log from, and, |
684 | * for xtree root, txUpdateMap() will update map and reset | 684 | * for xtree root, txUpdateMap() will update map and reset |
685 | * XAD_NEW bit; | 685 | * XAD_NEW bit; |
686 | */ | 686 | */ |
@@ -824,7 +824,7 @@ int diWrite(tid_t tid, struct inode *ip) | |||
824 | memcpy(&dp->di_DASD, &ip->i_DASD, sizeof(struct dasd)); | 824 | memcpy(&dp->di_DASD, &ip->i_DASD, sizeof(struct dasd)); |
825 | #endif /* _JFS_FASTDASD */ | 825 | #endif /* _JFS_FASTDASD */ |
826 | 826 | ||
827 | /* release the buffer holding the updated on-disk inode. | 827 | /* release the buffer holding the updated on-disk inode. |
828 | * the buffer will be later written by commit processing. | 828 | * the buffer will be later written by commit processing. |
829 | */ | 829 | */ |
830 | write_metapage(mp); | 830 | write_metapage(mp); |
@@ -842,7 +842,7 @@ int diWrite(tid_t tid, struct inode *ip) | |||
842 | * if the inode to be freed represents the first (only) | 842 | * if the inode to be freed represents the first (only) |
843 | * free inode within the iag, the iag will be placed on | 843 | * free inode within the iag, the iag will be placed on |
844 | * the ag free inode list. | 844 | * the ag free inode list. |
845 | * | 845 | * |
846 | * freeing the inode will cause the inode extent to be | 846 | * freeing the inode will cause the inode extent to be |
847 | * freed if the inode is the only allocated inode within | 847 | * freed if the inode is the only allocated inode within |
848 | * the extent. in this case all the disk resource backing | 848 | * the extent. in this case all the disk resource backing |
@@ -865,11 +865,11 @@ int diWrite(tid_t tid, struct inode *ip) | |||
865 | * any updates and are held until all updates are complete. | 865 | * any updates and are held until all updates are complete. |
866 | * | 866 | * |
867 | * PARAMETERS: | 867 | * PARAMETERS: |
868 | * ip - inode to be freed. | 868 | * ip - inode to be freed. |
869 | * | 869 | * |
870 | * RETURN VALUES: | 870 | * RETURN VALUES: |
871 | * 0 - success | 871 | * 0 - success |
872 | * -EIO - i/o error. | 872 | * -EIO - i/o error. |
873 | */ | 873 | */ |
874 | int diFree(struct inode *ip) | 874 | int diFree(struct inode *ip) |
875 | { | 875 | { |
@@ -898,7 +898,7 @@ int diFree(struct inode *ip) | |||
898 | */ | 898 | */ |
899 | iagno = INOTOIAG(inum); | 899 | iagno = INOTOIAG(inum); |
900 | 900 | ||
901 | /* make sure that the iag is contained within | 901 | /* make sure that the iag is contained within |
902 | * the map. | 902 | * the map. |
903 | */ | 903 | */ |
904 | if (iagno >= imap->im_nextiag) { | 904 | if (iagno >= imap->im_nextiag) { |
@@ -1013,7 +1013,7 @@ int diFree(struct inode *ip) | |||
1013 | 1013 | ||
1014 | /* update the free inode summary map for the extent if | 1014 | /* update the free inode summary map for the extent if |
1015 | * freeing the inode means the extent will now have free | 1015 | * freeing the inode means the extent will now have free |
1016 | * inodes (i.e., the inode being freed is the first free | 1016 | * inodes (i.e., the inode being freed is the first free |
1017 | * inode of extent), | 1017 | * inode of extent), |
1018 | */ | 1018 | */ |
1019 | if (iagp->wmap[extno] == cpu_to_le32(ONES)) { | 1019 | if (iagp->wmap[extno] == cpu_to_le32(ONES)) { |
@@ -1204,9 +1204,9 @@ int diFree(struct inode *ip) | |||
1204 | iagp->inofreefwd = iagp->inofreeback = cpu_to_le32(-1); | 1204 | iagp->inofreefwd = iagp->inofreeback = cpu_to_le32(-1); |
1205 | } | 1205 | } |
1206 | 1206 | ||
1207 | /* update the inode extent address and working map | 1207 | /* update the inode extent address and working map |
1208 | * to reflect the free extent. | 1208 | * to reflect the free extent. |
1209 | * the permanent map should have been updated already | 1209 | * the permanent map should have been updated already |
1210 | * for the inode being freed. | 1210 | * for the inode being freed. |
1211 | */ | 1211 | */ |
1212 | if (iagp->pmap[extno] != 0) { | 1212 | if (iagp->pmap[extno] != 0) { |
@@ -1218,7 +1218,7 @@ int diFree(struct inode *ip) | |||
1218 | 1218 | ||
1219 | /* update the free extent and free inode summary maps | 1219 | /* update the free extent and free inode summary maps |
1220 | * to reflect the freed extent. | 1220 | * to reflect the freed extent. |
1221 | * the inode summary map is marked to indicate no inodes | 1221 | * the inode summary map is marked to indicate no inodes |
1222 | * available for the freed extent. | 1222 | * available for the freed extent. |
1223 | */ | 1223 | */ |
1224 | sword = extno >> L2EXTSPERSUM; | 1224 | sword = extno >> L2EXTSPERSUM; |
@@ -1255,17 +1255,17 @@ int diFree(struct inode *ip) | |||
1255 | * start transaction to update block allocation map | 1255 | * start transaction to update block allocation map |
1256 | * for the inode extent freed; | 1256 | * for the inode extent freed; |
1257 | * | 1257 | * |
1258 | * N.B. AG_LOCK is released and iag will be released below, and | 1258 | * N.B. AG_LOCK is released and iag will be released below, and |
1259 | * other thread may allocate inode from/reusing the ixad freed | 1259 | * other thread may allocate inode from/reusing the ixad freed |
1260 | * BUT with new/different backing inode extent from the extent | 1260 | * BUT with new/different backing inode extent from the extent |
1261 | * to be freed by the transaction; | 1261 | * to be freed by the transaction; |
1262 | */ | 1262 | */ |
1263 | tid = txBegin(ipimap->i_sb, COMMIT_FORCE); | 1263 | tid = txBegin(ipimap->i_sb, COMMIT_FORCE); |
1264 | mutex_lock(&JFS_IP(ipimap)->commit_mutex); | 1264 | mutex_lock(&JFS_IP(ipimap)->commit_mutex); |
1265 | 1265 | ||
1266 | /* acquire tlock of the iag page of the freed ixad | 1266 | /* acquire tlock of the iag page of the freed ixad |
1267 | * to force the page NOHOMEOK (even though no data is | 1267 | * to force the page NOHOMEOK (even though no data is |
1268 | * logged from the iag page) until NOREDOPAGE|FREEXTENT log | 1268 | * logged from the iag page) until NOREDOPAGE|FREEXTENT log |
1269 | * for the free of the extent is committed; | 1269 | * for the free of the extent is committed; |
1270 | * write FREEXTENT|NOREDOPAGE log record | 1270 | * write FREEXTENT|NOREDOPAGE log record |
1271 | * N.B. linelock is overlaid as freed extent descriptor; | 1271 | * N.B. linelock is overlaid as freed extent descriptor; |
@@ -1284,8 +1284,8 @@ int diFree(struct inode *ip) | |||
1284 | * logredo needs the IAG number and IAG extent index in order | 1284 | * logredo needs the IAG number and IAG extent index in order |
1285 | * to ensure that the IMap is consistent. The least disruptive | 1285 | * to ensure that the IMap is consistent. The least disruptive |
1286 | * way to pass these values through to the transaction manager | 1286 | * way to pass these values through to the transaction manager |
1287 | * is in the iplist array. | 1287 | * is in the iplist array. |
1288 | * | 1288 | * |
1289 | * It's not pretty, but it works. | 1289 | * It's not pretty, but it works. |
1290 | */ | 1290 | */ |
1291 | iplist[1] = (struct inode *) (size_t)iagno; | 1291 | iplist[1] = (struct inode *) (size_t)iagno; |
@@ -1340,18 +1340,18 @@ diInitInode(struct inode *ip, int iagno, int ino, int extno, struct iag * iagp) | |||
1340 | /* | 1340 | /* |
1341 | * NAME: diAlloc(pip,dir,ip) | 1341 | * NAME: diAlloc(pip,dir,ip) |
1342 | * | 1342 | * |
1343 | * FUNCTION: allocate a disk inode from the inode working map | 1343 | * FUNCTION: allocate a disk inode from the inode working map |
1344 | * for a fileset or aggregate. | 1344 | * for a fileset or aggregate. |
1345 | * | 1345 | * |
1346 | * PARAMETERS: | 1346 | * PARAMETERS: |
1347 | * pip - pointer to incore inode for the parent inode. | 1347 | * pip - pointer to incore inode for the parent inode. |
1348 | * dir - 'true' if the new disk inode is for a directory. | 1348 | * dir - 'true' if the new disk inode is for a directory. |
1349 | * ip - pointer to a new inode | 1349 | * ip - pointer to a new inode |
1350 | * | 1350 | * |
1351 | * RETURN VALUES: | 1351 | * RETURN VALUES: |
1352 | * 0 - success. | 1352 | * 0 - success. |
1353 | * -ENOSPC - insufficient disk resources. | 1353 | * -ENOSPC - insufficient disk resources. |
1354 | * -EIO - i/o error. | 1354 | * -EIO - i/o error. |
1355 | */ | 1355 | */ |
1356 | int diAlloc(struct inode *pip, bool dir, struct inode *ip) | 1356 | int diAlloc(struct inode *pip, bool dir, struct inode *ip) |
1357 | { | 1357 | { |
@@ -1372,7 +1372,7 @@ int diAlloc(struct inode *pip, bool dir, struct inode *ip) | |||
1372 | JFS_IP(ip)->ipimap = ipimap; | 1372 | JFS_IP(ip)->ipimap = ipimap; |
1373 | JFS_IP(ip)->fileset = FILESYSTEM_I; | 1373 | JFS_IP(ip)->fileset = FILESYSTEM_I; |
1374 | 1374 | ||
1375 | /* for a directory, the allocation policy is to start | 1375 | /* for a directory, the allocation policy is to start |
1376 | * at the ag level using the preferred ag. | 1376 | * at the ag level using the preferred ag. |
1377 | */ | 1377 | */ |
1378 | if (dir) { | 1378 | if (dir) { |
@@ -1435,7 +1435,7 @@ int diAlloc(struct inode *pip, bool dir, struct inode *ip) | |||
1435 | /* | 1435 | /* |
1436 | * try to allocate from the IAG | 1436 | * try to allocate from the IAG |
1437 | */ | 1437 | */ |
1438 | /* check if the inode may be allocated from the iag | 1438 | /* check if the inode may be allocated from the iag |
1439 | * (i.e. the inode has free inodes or new extent can be added). | 1439 | * (i.e. the inode has free inodes or new extent can be added). |
1440 | */ | 1440 | */ |
1441 | if (iagp->nfreeinos || addext) { | 1441 | if (iagp->nfreeinos || addext) { |
@@ -1490,7 +1490,7 @@ int diAlloc(struct inode *pip, bool dir, struct inode *ip) | |||
1490 | * hint or, if appropriate (i.e. addext is true), allocate | 1490 | * hint or, if appropriate (i.e. addext is true), allocate |
1491 | * an extent of free inodes at or following the extent | 1491 | * an extent of free inodes at or following the extent |
1492 | * containing the hint. | 1492 | * containing the hint. |
1493 | * | 1493 | * |
1494 | * the free inode and free extent summary maps are used | 1494 | * the free inode and free extent summary maps are used |
1495 | * here, so determine the starting summary map position | 1495 | * here, so determine the starting summary map position |
1496 | * and the number of words we'll have to examine. again, | 1496 | * and the number of words we'll have to examine. again, |
@@ -1641,7 +1641,7 @@ int diAlloc(struct inode *pip, bool dir, struct inode *ip) | |||
1641 | * inodes should be added for the allocation group, with | 1641 | * inodes should be added for the allocation group, with |
1642 | * the current request satisfied from this extent. if this | 1642 | * the current request satisfied from this extent. if this |
1643 | * is the case, an attempt will be made to do just that. if | 1643 | * is the case, an attempt will be made to do just that. if |
1644 | * this attempt fails or it has been determined that a new | 1644 | * this attempt fails or it has been determined that a new |
1645 | * extent should not be added, an attempt is made to satisfy | 1645 | * extent should not be added, an attempt is made to satisfy |
1646 | * the request by allocating an existing (backed) free inode | 1646 | * the request by allocating an existing (backed) free inode |
1647 | * from the allocation group. | 1647 | * from the allocation group. |
@@ -1649,24 +1649,24 @@ int diAlloc(struct inode *pip, bool dir, struct inode *ip) | |||
1649 | * PRE CONDITION: Already have the AG lock for this AG. | 1649 | * PRE CONDITION: Already have the AG lock for this AG. |
1650 | * | 1650 | * |
1651 | * PARAMETERS: | 1651 | * PARAMETERS: |
1652 | * imap - pointer to inode map control structure. | 1652 | * imap - pointer to inode map control structure. |
1653 | * agno - allocation group to allocate from. | 1653 | * agno - allocation group to allocate from. |
1654 | * dir - 'true' if the new disk inode is for a directory. | 1654 | * dir - 'true' if the new disk inode is for a directory. |
1655 | * ip - pointer to the new inode to be filled in on successful return | 1655 | * ip - pointer to the new inode to be filled in on successful return |
1656 | * with the disk inode number allocated, its extent address | 1656 | * with the disk inode number allocated, its extent address |
1657 | * and the start of the ag. | 1657 | * and the start of the ag. |
1658 | * | 1658 | * |
1659 | * RETURN VALUES: | 1659 | * RETURN VALUES: |
1660 | * 0 - success. | 1660 | * 0 - success. |
1661 | * -ENOSPC - insufficient disk resources. | 1661 | * -ENOSPC - insufficient disk resources. |
1662 | * -EIO - i/o error. | 1662 | * -EIO - i/o error. |
1663 | */ | 1663 | */ |
1664 | static int | 1664 | static int |
1665 | diAllocAG(struct inomap * imap, int agno, bool dir, struct inode *ip) | 1665 | diAllocAG(struct inomap * imap, int agno, bool dir, struct inode *ip) |
1666 | { | 1666 | { |
1667 | int rc, addext, numfree, numinos; | 1667 | int rc, addext, numfree, numinos; |
1668 | 1668 | ||
1669 | /* get the number of free and the number of backed disk | 1669 | /* get the number of free and the number of backed disk |
1670 | * inodes currently within the ag. | 1670 | * inodes currently within the ag. |
1671 | */ | 1671 | */ |
1672 | numfree = imap->im_agctl[agno].numfree; | 1672 | numfree = imap->im_agctl[agno].numfree; |
@@ -1719,17 +1719,17 @@ diAllocAG(struct inomap * imap, int agno, bool dir, struct inode *ip) | |||
1719 | * specified primary group. | 1719 | * specified primary group. |
1720 | * | 1720 | * |
1721 | * PARAMETERS: | 1721 | * PARAMETERS: |
1722 | * imap - pointer to inode map control structure. | 1722 | * imap - pointer to inode map control structure. |
1723 | * agno - primary allocation group (to avoid). | 1723 | * agno - primary allocation group (to avoid). |
1724 | * dir - 'true' if the new disk inode is for a directory. | 1724 | * dir - 'true' if the new disk inode is for a directory. |
1725 | * ip - pointer to a new inode to be filled in on successful return | 1725 | * ip - pointer to a new inode to be filled in on successful return |
1726 | * with the disk inode number allocated, its extent address | 1726 | * with the disk inode number allocated, its extent address |
1727 | * and the start of the ag. | 1727 | * and the start of the ag. |
1728 | * | 1728 | * |
1729 | * RETURN VALUES: | 1729 | * RETURN VALUES: |
1730 | * 0 - success. | 1730 | * 0 - success. |
1731 | * -ENOSPC - insufficient disk resources. | 1731 | * -ENOSPC - insufficient disk resources. |
1732 | * -EIO - i/o error. | 1732 | * -EIO - i/o error. |
1733 | */ | 1733 | */ |
1734 | static int | 1734 | static int |
1735 | diAllocAny(struct inomap * imap, int agno, bool dir, struct inode *ip) | 1735 | diAllocAny(struct inomap * imap, int agno, bool dir, struct inode *ip) |
@@ -1738,7 +1738,7 @@ diAllocAny(struct inomap * imap, int agno, bool dir, struct inode *ip) | |||
1738 | int maxag = JFS_SBI(imap->im_ipimap->i_sb)->bmap->db_maxag; | 1738 | int maxag = JFS_SBI(imap->im_ipimap->i_sb)->bmap->db_maxag; |
1739 | 1739 | ||
1740 | 1740 | ||
1741 | /* try to allocate from the ags following agno up to | 1741 | /* try to allocate from the ags following agno up to |
1742 | * the maximum ag number. | 1742 | * the maximum ag number. |
1743 | */ | 1743 | */ |
1744 | for (ag = agno + 1; ag <= maxag; ag++) { | 1744 | for (ag = agno + 1; ag <= maxag; ag++) { |
@@ -1780,21 +1780,21 @@ diAllocAny(struct inomap * imap, int agno, bool dir, struct inode *ip) | |||
1780 | * | 1780 | * |
1781 | * allocation occurs from the first iag on the list using | 1781 | * allocation occurs from the first iag on the list using |
1782 | * the iag's free inode summary map to find the leftmost | 1782 | * the iag's free inode summary map to find the leftmost |
1783 | * free inode in the iag. | 1783 | * free inode in the iag. |
1784 | * | 1784 | * |
1785 | * PRE CONDITION: Already have AG lock for this AG. | 1785 | * PRE CONDITION: Already have AG lock for this AG. |
1786 | * | 1786 | * |
1787 | * PARAMETERS: | 1787 | * PARAMETERS: |
1788 | * imap - pointer to inode map control structure. | 1788 | * imap - pointer to inode map control structure. |
1789 | * agno - allocation group. | 1789 | * agno - allocation group. |
1790 | * ip - pointer to new inode to be filled in on successful return | 1790 | * ip - pointer to new inode to be filled in on successful return |
1791 | * with the disk inode number allocated, its extent address | 1791 | * with the disk inode number allocated, its extent address |
1792 | * and the start of the ag. | 1792 | * and the start of the ag. |
1793 | * | 1793 | * |
1794 | * RETURN VALUES: | 1794 | * RETURN VALUES: |
1795 | * 0 - success. | 1795 | * 0 - success. |
1796 | * -ENOSPC - insufficient disk resources. | 1796 | * -ENOSPC - insufficient disk resources. |
1797 | * -EIO - i/o error. | 1797 | * -EIO - i/o error. |
1798 | */ | 1798 | */ |
1799 | static int diAllocIno(struct inomap * imap, int agno, struct inode *ip) | 1799 | static int diAllocIno(struct inomap * imap, int agno, struct inode *ip) |
1800 | { | 1800 | { |
@@ -1867,7 +1867,7 @@ static int diAllocIno(struct inomap * imap, int agno, struct inode *ip) | |||
1867 | return -EIO; | 1867 | return -EIO; |
1868 | } | 1868 | } |
1869 | 1869 | ||
1870 | /* compute the inode number within the iag. | 1870 | /* compute the inode number within the iag. |
1871 | */ | 1871 | */ |
1872 | ino = (extno << L2INOSPEREXT) + rem; | 1872 | ino = (extno << L2INOSPEREXT) + rem; |
1873 | 1873 | ||
@@ -1892,17 +1892,17 @@ static int diAllocIno(struct inomap * imap, int agno, struct inode *ip) | |||
1892 | /* | 1892 | /* |
1893 | * NAME: diAllocExt(imap,agno,ip) | 1893 | * NAME: diAllocExt(imap,agno,ip) |
1894 | * | 1894 | * |
1895 | * FUNCTION: add a new extent of free inodes to an iag, allocating | 1895 | * FUNCTION: add a new extent of free inodes to an iag, allocating |
1896 | * an inode from this extent to satisfy the current allocation | 1896 | * an inode from this extent to satisfy the current allocation |
1897 | * request. | 1897 | * request. |
1898 | * | 1898 | * |
1899 | * this routine first tries to find an existing iag with free | 1899 | * this routine first tries to find an existing iag with free |
1900 | * extents through the ag free extent list. if list is not | 1900 | * extents through the ag free extent list. if list is not |
1901 | * empty, the head of the list will be selected as the home | 1901 | * empty, the head of the list will be selected as the home |
1902 | * of the new extent of free inodes. otherwise (the list is | 1902 | * of the new extent of free inodes. otherwise (the list is |
1903 | * empty), a new iag will be allocated for the ag to contain | 1903 | * empty), a new iag will be allocated for the ag to contain |
1904 | * the extent. | 1904 | * the extent. |
1905 | * | 1905 | * |
1906 | * once an iag has been selected, the free extent summary map | 1906 | * once an iag has been selected, the free extent summary map |
1907 | * is used to locate a free extent within the iag and diNewExt() | 1907 | * is used to locate a free extent within the iag and diNewExt() |
1908 | * is called to initialize the extent, with initialization | 1908 | * is called to initialize the extent, with initialization |
@@ -1910,16 +1910,16 @@ static int diAllocIno(struct inomap * imap, int agno, struct inode *ip) | |||
1910 | * for the purpose of satisfying this request. | 1910 | * for the purpose of satisfying this request. |
1911 | * | 1911 | * |
1912 | * PARAMETERS: | 1912 | * PARAMETERS: |
1913 | * imap - pointer to inode map control structure. | 1913 | * imap - pointer to inode map control structure. |
1914 | * agno - allocation group number. | 1914 | * agno - allocation group number. |
1915 | * ip - pointer to new inode to be filled in on successful return | 1915 | * ip - pointer to new inode to be filled in on successful return |
1916 | * with the disk inode number allocated, its extent address | 1916 | * with the disk inode number allocated, its extent address |
1917 | * and the start of the ag. | 1917 | * and the start of the ag. |
1918 | * | 1918 | * |
1919 | * RETURN VALUES: | 1919 | * RETURN VALUES: |
1920 | * 0 - success. | 1920 | * 0 - success. |
1921 | * -ENOSPC - insufficient disk resources. | 1921 | * -ENOSPC - insufficient disk resources. |
1922 | * -EIO - i/o error. | 1922 | * -EIO - i/o error. |
1923 | */ | 1923 | */ |
1924 | static int diAllocExt(struct inomap * imap, int agno, struct inode *ip) | 1924 | static int diAllocExt(struct inomap * imap, int agno, struct inode *ip) |
1925 | { | 1925 | { |
@@ -2012,7 +2012,7 @@ static int diAllocExt(struct inomap * imap, int agno, struct inode *ip) | |||
2012 | /* | 2012 | /* |
2013 | * NAME: diAllocBit(imap,iagp,ino) | 2013 | * NAME: diAllocBit(imap,iagp,ino) |
2014 | * | 2014 | * |
2015 | * FUNCTION: allocate a backed inode from an iag. | 2015 | * FUNCTION: allocate a backed inode from an iag. |
2016 | * | 2016 | * |
2017 | * this routine performs the mechanics of allocating a | 2017 | * this routine performs the mechanics of allocating a |
2018 | * specified inode from a backed extent. | 2018 | * specified inode from a backed extent. |
@@ -2025,19 +2025,19 @@ static int diAllocExt(struct inomap * imap, int agno, struct inode *ip) | |||
2025 | * in the face of updates to multiple buffers. under this | 2025 | * in the face of updates to multiple buffers. under this |
2026 | * approach, all required buffers are obtained before making | 2026 | * approach, all required buffers are obtained before making |
2027 | * any updates and are held all are updates are complete. | 2027 | * any updates and are held all are updates are complete. |
2028 | * | 2028 | * |
2029 | * PRE CONDITION: Already have buffer lock on iagp. Already have AG lock on | 2029 | * PRE CONDITION: Already have buffer lock on iagp. Already have AG lock on |
2030 | * this AG. Must have read lock on imap inode. | 2030 | * this AG. Must have read lock on imap inode. |
2031 | * | 2031 | * |
2032 | * PARAMETERS: | 2032 | * PARAMETERS: |
2033 | * imap - pointer to inode map control structure. | 2033 | * imap - pointer to inode map control structure. |
2034 | * iagp - pointer to iag. | 2034 | * iagp - pointer to iag. |
2035 | * ino - inode number to be allocated within the iag. | 2035 | * ino - inode number to be allocated within the iag. |
2036 | * | 2036 | * |
2037 | * RETURN VALUES: | 2037 | * RETURN VALUES: |
2038 | * 0 - success. | 2038 | * 0 - success. |
2039 | * -ENOSPC - insufficient disk resources. | 2039 | * -ENOSPC - insufficient disk resources. |
2040 | * -EIO - i/o error. | 2040 | * -EIO - i/o error. |
2041 | */ | 2041 | */ |
2042 | static int diAllocBit(struct inomap * imap, struct iag * iagp, int ino) | 2042 | static int diAllocBit(struct inomap * imap, struct iag * iagp, int ino) |
2043 | { | 2043 | { |
@@ -2172,19 +2172,19 @@ static int diAllocBit(struct inomap * imap, struct iag * iagp, int ino) | |||
2172 | * buffers. under this approach, all required buffers are | 2172 | * buffers. under this approach, all required buffers are |
2173 | * obtained before making any updates and are held until all | 2173 | * obtained before making any updates and are held until all |
2174 | * updates are complete. | 2174 | * updates are complete. |
2175 | * | 2175 | * |
2176 | * PRE CONDITION: Already have buffer lock on iagp. Already have AG lock on | 2176 | * PRE CONDITION: Already have buffer lock on iagp. Already have AG lock on |
2177 | * this AG. Must have read lock on imap inode. | 2177 | * this AG. Must have read lock on imap inode. |
2178 | * | 2178 | * |
2179 | * PARAMETERS: | 2179 | * PARAMETERS: |
2180 | * imap - pointer to inode map control structure. | 2180 | * imap - pointer to inode map control structure. |
2181 | * iagp - pointer to iag. | 2181 | * iagp - pointer to iag. |
2182 | * extno - extent number. | 2182 | * extno - extent number. |
2183 | * | 2183 | * |
2184 | * RETURN VALUES: | 2184 | * RETURN VALUES: |
2185 | * 0 - success. | 2185 | * 0 - success. |
2186 | * -ENOSPC - insufficient disk resources. | 2186 | * -ENOSPC - insufficient disk resources. |
2187 | * -EIO - i/o error. | 2187 | * -EIO - i/o error. |
2188 | */ | 2188 | */ |
2189 | static int diNewExt(struct inomap * imap, struct iag * iagp, int extno) | 2189 | static int diNewExt(struct inomap * imap, struct iag * iagp, int extno) |
2190 | { | 2190 | { |
@@ -2432,34 +2432,34 @@ static int diNewExt(struct inomap * imap, struct iag * iagp, int extno) | |||
2432 | /* | 2432 | /* |
2433 | * NAME: diNewIAG(imap,iagnop,agno) | 2433 | * NAME: diNewIAG(imap,iagnop,agno) |
2434 | * | 2434 | * |
2435 | * FUNCTION: allocate a new iag for an allocation group. | 2435 | * FUNCTION: allocate a new iag for an allocation group. |
2436 | * | 2436 | * |
2437 | * first tries to allocate the iag from the inode map | 2437 | * first tries to allocate the iag from the inode map |
2438 | * iagfree list: | 2438 | * iagfree list: |
2439 | * if the list has free iags, the head of the list is removed | 2439 | * if the list has free iags, the head of the list is removed |
2440 | * and returned to satisfy the request. | 2440 | * and returned to satisfy the request. |
2441 | * if the inode map's iag free list is empty, the inode map | 2441 | * if the inode map's iag free list is empty, the inode map |
2442 | * is extended to hold a new iag. this new iag is initialized | 2442 | * is extended to hold a new iag. this new iag is initialized |
2443 | * and returned to satisfy the request. | 2443 | * and returned to satisfy the request. |
2444 | * | 2444 | * |
2445 | * PARAMETERS: | 2445 | * PARAMETERS: |
2446 | * imap - pointer to inode map control structure. | 2446 | * imap - pointer to inode map control structure. |
2447 | * iagnop - pointer to an iag number set with the number of the | 2447 | * iagnop - pointer to an iag number set with the number of the |
2448 | * newly allocated iag upon successful return. | 2448 | * newly allocated iag upon successful return. |
2449 | * agno - allocation group number. | 2449 | * agno - allocation group number. |
2450 | * bpp - Buffer pointer to be filled in with new IAG's buffer | 2450 | * bpp - Buffer pointer to be filled in with new IAG's buffer |
2451 | * | 2451 | * |
2452 | * RETURN VALUES: | 2452 | * RETURN VALUES: |
2453 | * 0 - success. | 2453 | * 0 - success. |
2454 | * -ENOSPC - insufficient disk resources. | 2454 | * -ENOSPC - insufficient disk resources. |
2455 | * -EIO - i/o error. | 2455 | * -EIO - i/o error. |
2456 | * | 2456 | * |
2457 | * serialization: | 2457 | * serialization: |
2458 | * AG lock held on entry/exit; | 2458 | * AG lock held on entry/exit; |
2459 | * write lock on the map is held inside; | 2459 | * write lock on the map is held inside; |
2460 | * read lock on the map is held on successful completion; | 2460 | * read lock on the map is held on successful completion; |
2461 | * | 2461 | * |
2462 | * note: new iag transaction: | 2462 | * note: new iag transaction: |
2463 | * . synchronously write iag; | 2463 | * . synchronously write iag; |
2464 | * . write log of xtree and inode of imap; | 2464 | * . write log of xtree and inode of imap; |
2465 | * . commit; | 2465 | * . commit; |
@@ -2494,7 +2494,7 @@ diNewIAG(struct inomap * imap, int *iagnop, int agno, struct metapage ** mpp) | |||
2494 | /* acquire the free iag lock */ | 2494 | /* acquire the free iag lock */ |
2495 | IAGFREE_LOCK(imap); | 2495 | IAGFREE_LOCK(imap); |
2496 | 2496 | ||
2497 | /* if there are any iags on the inode map free iag list, | 2497 | /* if there are any iags on the inode map free iag list, |
2498 | * allocate the iag from the head of the list. | 2498 | * allocate the iag from the head of the list. |
2499 | */ | 2499 | */ |
2500 | if (imap->im_freeiag >= 0) { | 2500 | if (imap->im_freeiag >= 0) { |
@@ -2618,8 +2618,8 @@ diNewIAG(struct inomap * imap, int *iagnop, int agno, struct metapage ** mpp) | |||
2618 | flush_metapage(mp); | 2618 | flush_metapage(mp); |
2619 | 2619 | ||
2620 | /* | 2620 | /* |
2621 | * txCommit(COMMIT_FORCE) will synchronously write address | 2621 | * txCommit(COMMIT_FORCE) will synchronously write address |
2622 | * index pages and inode after commit in careful update order | 2622 | * index pages and inode after commit in careful update order |
2623 | * of address index pages (right to left, bottom up); | 2623 | * of address index pages (right to left, bottom up); |
2624 | */ | 2624 | */ |
2625 | iplist[0] = ipimap; | 2625 | iplist[0] = ipimap; |
@@ -2678,11 +2678,11 @@ diNewIAG(struct inomap * imap, int *iagnop, int agno, struct metapage ** mpp) | |||
2678 | * | 2678 | * |
2679 | * FUNCTION: get the buffer for the specified iag within a fileset | 2679 | * FUNCTION: get the buffer for the specified iag within a fileset |
2680 | * or aggregate inode map. | 2680 | * or aggregate inode map. |
2681 | * | 2681 | * |
2682 | * PARAMETERS: | 2682 | * PARAMETERS: |
2683 | * imap - pointer to inode map control structure. | 2683 | * imap - pointer to inode map control structure. |
2684 | * iagno - iag number. | 2684 | * iagno - iag number. |
2685 | * bpp - point to buffer pointer to be filled in on successful | 2685 | * bpp - point to buffer pointer to be filled in on successful |
2686 | * exit. | 2686 | * exit. |
2687 | * | 2687 | * |
2688 | * SERIALIZATION: | 2688 | * SERIALIZATION: |
@@ -2692,7 +2692,7 @@ diNewIAG(struct inomap * imap, int *iagnop, int agno, struct metapage ** mpp) | |||
2692 | * | 2692 | * |
2693 | * RETURN VALUES: | 2693 | * RETURN VALUES: |
2694 | * 0 - success. | 2694 | * 0 - success. |
2695 | * -EIO - i/o error. | 2695 | * -EIO - i/o error. |
2696 | */ | 2696 | */ |
2697 | static int diIAGRead(struct inomap * imap, int iagno, struct metapage ** mpp) | 2697 | static int diIAGRead(struct inomap * imap, int iagno, struct metapage ** mpp) |
2698 | { | 2698 | { |
@@ -2718,8 +2718,8 @@ static int diIAGRead(struct inomap * imap, int iagno, struct metapage ** mpp) | |||
2718 | * the specified bit position. | 2718 | * the specified bit position. |
2719 | * | 2719 | * |
2720 | * PARAMETERS: | 2720 | * PARAMETERS: |
2721 | * word - word to be examined. | 2721 | * word - word to be examined. |
2722 | * start - starting bit position. | 2722 | * start - starting bit position. |
2723 | * | 2723 | * |
2724 | * RETURN VALUES: | 2724 | * RETURN VALUES: |
2725 | * bit position of first free bit in the word or 32 if | 2725 | * bit position of first free bit in the word or 32 if |
@@ -2740,10 +2740,10 @@ static int diFindFree(u32 word, int start) | |||
2740 | 2740 | ||
2741 | /* | 2741 | /* |
2742 | * NAME: diUpdatePMap() | 2742 | * NAME: diUpdatePMap() |
2743 | * | 2743 | * |
2744 | * FUNCTION: Update the persistent map in an IAG for the allocation or | 2744 | * FUNCTION: Update the persistent map in an IAG for the allocation or |
2745 | * freeing of the specified inode. | 2745 | * freeing of the specified inode. |
2746 | * | 2746 | * |
2747 | * PRE CONDITIONS: Working map has already been updated for allocate. | 2747 | * PRE CONDITIONS: Working map has already been updated for allocate. |
2748 | * | 2748 | * |
2749 | * PARAMETERS: | 2749 | * PARAMETERS: |
@@ -2752,7 +2752,7 @@ static int diFindFree(u32 word, int start) | |||
2752 | * is_free - If 'true' indicates inode should be marked freed, otherwise | 2752 | * is_free - If 'true' indicates inode should be marked freed, otherwise |
2753 | * indicates inode should be marked allocated. | 2753 | * indicates inode should be marked allocated. |
2754 | * | 2754 | * |
2755 | * RETURN VALUES: | 2755 | * RETURN VALUES: |
2756 | * 0 for success | 2756 | * 0 for success |
2757 | */ | 2757 | */ |
2758 | int | 2758 | int |
@@ -2793,7 +2793,7 @@ diUpdatePMap(struct inode *ipimap, | |||
2793 | extno = ino >> L2INOSPEREXT; | 2793 | extno = ino >> L2INOSPEREXT; |
2794 | bitno = ino & (INOSPEREXT - 1); | 2794 | bitno = ino & (INOSPEREXT - 1); |
2795 | mask = HIGHORDER >> bitno; | 2795 | mask = HIGHORDER >> bitno; |
2796 | /* | 2796 | /* |
2797 | * mark the inode free in persistent map: | 2797 | * mark the inode free in persistent map: |
2798 | */ | 2798 | */ |
2799 | if (is_free) { | 2799 | if (is_free) { |
@@ -2803,7 +2803,7 @@ diUpdatePMap(struct inode *ipimap, | |||
2803 | * of last reference release; | 2803 | * of last reference release; |
2804 | */ | 2804 | */ |
2805 | if (!(le32_to_cpu(iagp->wmap[extno]) & mask)) { | 2805 | if (!(le32_to_cpu(iagp->wmap[extno]) & mask)) { |
2806 | jfs_error(ipimap->i_sb, | 2806 | jfs_error(ipimap->i_sb, |
2807 | "diUpdatePMap: inode %ld not marked as " | 2807 | "diUpdatePMap: inode %ld not marked as " |
2808 | "allocated in wmap!", inum); | 2808 | "allocated in wmap!", inum); |
2809 | } | 2809 | } |
@@ -2877,8 +2877,8 @@ diUpdatePMap(struct inode *ipimap, | |||
2877 | * diExtendFS() | 2877 | * diExtendFS() |
2878 | * | 2878 | * |
2879 | * function: update imap for extendfs(); | 2879 | * function: update imap for extendfs(); |
2880 | * | 2880 | * |
2881 | * note: AG size has been increased s.t. each k old contiguous AGs are | 2881 | * note: AG size has been increased s.t. each k old contiguous AGs are |
2882 | * coalesced into a new AG; | 2882 | * coalesced into a new AG; |
2883 | */ | 2883 | */ |
2884 | int diExtendFS(struct inode *ipimap, struct inode *ipbmap) | 2884 | int diExtendFS(struct inode *ipimap, struct inode *ipbmap) |
@@ -2897,7 +2897,7 @@ int diExtendFS(struct inode *ipimap, struct inode *ipbmap) | |||
2897 | atomic_read(&imap->im_numfree)); | 2897 | atomic_read(&imap->im_numfree)); |
2898 | 2898 | ||
2899 | /* | 2899 | /* |
2900 | * reconstruct imap | 2900 | * reconstruct imap |
2901 | * | 2901 | * |
2902 | * coalesce contiguous k (newAGSize/oldAGSize) AGs; | 2902 | * coalesce contiguous k (newAGSize/oldAGSize) AGs; |
2903 | * i.e., (AGi, ..., AGj) where i = k*n and j = k*(n+1) - 1 to AGn; | 2903 | * i.e., (AGi, ..., AGj) where i = k*n and j = k*(n+1) - 1 to AGn; |
@@ -2931,7 +2931,7 @@ int diExtendFS(struct inode *ipimap, struct inode *ipbmap) | |||
2931 | } | 2931 | } |
2932 | 2932 | ||
2933 | /* leave free iag in the free iag list */ | 2933 | /* leave free iag in the free iag list */ |
2934 | if (iagp->nfreeexts == cpu_to_le32(EXTSPERIAG)) { | 2934 | if (iagp->nfreeexts == cpu_to_le32(EXTSPERIAG)) { |
2935 | release_metapage(bp); | 2935 | release_metapage(bp); |
2936 | continue; | 2936 | continue; |
2937 | } | 2937 | } |
diff --git a/fs/jfs/jfs_imap.h b/fs/jfs/jfs_imap.h index e3b7db47db6b..4f9c346ed498 100644 --- a/fs/jfs/jfs_imap.h +++ b/fs/jfs/jfs_imap.h | |||
@@ -1,18 +1,18 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (c) International Business Machines Corp., 2000-2002 | 2 | * Copyright (C) International Business Machines Corp., 2000-2002 |
3 | * | 3 | * |
4 | * This program is free software; you can redistribute it and/or modify | 4 | * This program is free software; you can redistribute it and/or modify |
5 | * it under the terms of the GNU General Public License as published by | 5 | * it under the terms of the GNU General Public License as published by |
6 | * the Free Software Foundation; either version 2 of the License, or | 6 | * the Free Software Foundation; either version 2 of the License, or |
7 | * (at your option) any later version. | 7 | * (at your option) any later version. |
8 | * | 8 | * |
9 | * This program is distributed in the hope that it will be useful, | 9 | * This program is distributed in the hope that it will be useful, |
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See | 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See |
12 | * the GNU General Public License for more details. | 12 | * the GNU General Public License for more details. |
13 | * | 13 | * |
14 | * You should have received a copy of the GNU General Public License | 14 | * You should have received a copy of the GNU General Public License |
15 | * along with this program; if not, write to the Free Software | 15 | * along with this program; if not, write to the Free Software |
16 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | 16 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
17 | */ | 17 | */ |
18 | #ifndef _H_JFS_IMAP | 18 | #ifndef _H_JFS_IMAP |
@@ -45,13 +45,13 @@ | |||
45 | /* get the starting block number of the 4K page of an inode extent | 45 | /* get the starting block number of the 4K page of an inode extent |
46 | * that contains ino. | 46 | * that contains ino. |
47 | */ | 47 | */ |
48 | #define INOPBLK(pxd,ino,l2nbperpg) (addressPXD((pxd)) + \ | 48 | #define INOPBLK(pxd,ino,l2nbperpg) (addressPXD((pxd)) + \ |
49 | ((((ino) & (INOSPEREXT-1)) >> L2INOSPERPAGE) << (l2nbperpg))) | 49 | ((((ino) & (INOSPEREXT-1)) >> L2INOSPERPAGE) << (l2nbperpg))) |
50 | 50 | ||
51 | /* | 51 | /* |
52 | * inode allocation map: | 52 | * inode allocation map: |
53 | * | 53 | * |
54 | * inode allocation map consists of | 54 | * inode allocation map consists of |
55 | * . the inode map control page and | 55 | * . the inode map control page and |
56 | * . inode allocation group pages (per 4096 inodes) | 56 | * . inode allocation group pages (per 4096 inodes) |
57 | * which are addressed by standard JFS xtree. | 57 | * which are addressed by standard JFS xtree. |
diff --git a/fs/jfs/jfs_incore.h b/fs/jfs/jfs_incore.h index 54d73716ca8c..94005584445a 100644 --- a/fs/jfs/jfs_incore.h +++ b/fs/jfs/jfs_incore.h | |||
@@ -4,18 +4,18 @@ | |||
4 | * | 4 | * |
5 | * This program is free software; you can redistribute it and/or modify | 5 | * This program is free software; you can redistribute it and/or modify |
6 | * it under the terms of the GNU General Public License as published by | 6 | * it under the terms of the GNU General Public License as published by |
7 | * the Free Software Foundation; either version 2 of the License, or | 7 | * the Free Software Foundation; either version 2 of the License, or |
8 | * (at your option) any later version. | 8 | * (at your option) any later version. |
9 | * | 9 | * |
10 | * This program is distributed in the hope that it will be useful, | 10 | * This program is distributed in the hope that it will be useful, |
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See | 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See |
13 | * the GNU General Public License for more details. | 13 | * the GNU General Public License for more details. |
14 | * | 14 | * |
15 | * You should have received a copy of the GNU General Public License | 15 | * You should have received a copy of the GNU General Public License |
16 | * along with this program; if not, write to the Free Software | 16 | * along with this program; if not, write to the Free Software |
17 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | 17 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
18 | */ | 18 | */ |
19 | #ifndef _H_JFS_INCORE | 19 | #ifndef _H_JFS_INCORE |
20 | #define _H_JFS_INCORE | 20 | #define _H_JFS_INCORE |
21 | 21 | ||
diff --git a/fs/jfs/jfs_inode.c b/fs/jfs/jfs_inode.c index bffaca9ae3a2..4c67ed97682b 100644 --- a/fs/jfs/jfs_inode.c +++ b/fs/jfs/jfs_inode.c | |||
@@ -3,16 +3,16 @@ | |||
3 | * | 3 | * |
4 | * This program is free software; you can redistribute it and/or modify | 4 | * This program is free software; you can redistribute it and/or modify |
5 | * it under the terms of the GNU General Public License as published by | 5 | * it under the terms of the GNU General Public License as published by |
6 | * the Free Software Foundation; either version 2 of the License, or | 6 | * the Free Software Foundation; either version 2 of the License, or |
7 | * (at your option) any later version. | 7 | * (at your option) any later version. |
8 | * | 8 | * |
9 | * This program is distributed in the hope that it will be useful, | 9 | * This program is distributed in the hope that it will be useful, |
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See | 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See |
12 | * the GNU General Public License for more details. | 12 | * the GNU General Public License for more details. |
13 | * | 13 | * |
14 | * You should have received a copy of the GNU General Public License | 14 | * You should have received a copy of the GNU General Public License |
15 | * along with this program; if not, write to the Free Software | 15 | * along with this program; if not, write to the Free Software |
16 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | 16 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
17 | */ | 17 | */ |
18 | 18 | ||
@@ -61,7 +61,7 @@ struct inode *ialloc(struct inode *parent, umode_t mode) | |||
61 | inode = new_inode(sb); | 61 | inode = new_inode(sb); |
62 | if (!inode) { | 62 | if (!inode) { |
63 | jfs_warn("ialloc: new_inode returned NULL!"); | 63 | jfs_warn("ialloc: new_inode returned NULL!"); |
64 | return inode; | 64 | return ERR_PTR(-ENOMEM); |
65 | } | 65 | } |
66 | 66 | ||
67 | jfs_inode = JFS_IP(inode); | 67 | jfs_inode = JFS_IP(inode); |
@@ -69,9 +69,10 @@ struct inode *ialloc(struct inode *parent, umode_t mode) | |||
69 | rc = diAlloc(parent, S_ISDIR(mode), inode); | 69 | rc = diAlloc(parent, S_ISDIR(mode), inode); |
70 | if (rc) { | 70 | if (rc) { |
71 | jfs_warn("ialloc: diAlloc returned %d!", rc); | 71 | jfs_warn("ialloc: diAlloc returned %d!", rc); |
72 | make_bad_inode(inode); | 72 | if (rc == -EIO) |
73 | make_bad_inode(inode); | ||
73 | iput(inode); | 74 | iput(inode); |
74 | return NULL; | 75 | return ERR_PTR(rc); |
75 | } | 76 | } |
76 | 77 | ||
77 | inode->i_uid = current->fsuid; | 78 | inode->i_uid = current->fsuid; |
@@ -97,7 +98,7 @@ struct inode *ialloc(struct inode *parent, umode_t mode) | |||
97 | inode->i_flags |= S_NOQUOTA; | 98 | inode->i_flags |= S_NOQUOTA; |
98 | inode->i_nlink = 0; | 99 | inode->i_nlink = 0; |
99 | iput(inode); | 100 | iput(inode); |
100 | return NULL; | 101 | return ERR_PTR(-EDQUOT); |
101 | } | 102 | } |
102 | 103 | ||
103 | inode->i_mode = mode; | 104 | inode->i_mode = mode; |
diff --git a/fs/jfs/jfs_inode.h b/fs/jfs/jfs_inode.h index 1fc48df670c8..0d06ccfaff0e 100644 --- a/fs/jfs/jfs_inode.h +++ b/fs/jfs/jfs_inode.h | |||
@@ -3,16 +3,16 @@ | |||
3 | * | 3 | * |
4 | * This program is free software; you can redistribute it and/or modify | 4 | * This program is free software; you can redistribute it and/or modify |
5 | * it under the terms of the GNU General Public License as published by | 5 | * it under the terms of the GNU General Public License as published by |
6 | * the Free Software Foundation; either version 2 of the License, or | 6 | * the Free Software Foundation; either version 2 of the License, or |
7 | * (at your option) any later version. | 7 | * (at your option) any later version. |
8 | * | 8 | * |
9 | * This program is distributed in the hope that it will be useful, | 9 | * This program is distributed in the hope that it will be useful, |
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See | 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See |
12 | * the GNU General Public License for more details. | 12 | * the GNU General Public License for more details. |
13 | * | 13 | * |
14 | * You should have received a copy of the GNU General Public License | 14 | * You should have received a copy of the GNU General Public License |
15 | * along with this program; if not, write to the Free Software | 15 | * along with this program; if not, write to the Free Software |
16 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | 16 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
17 | */ | 17 | */ |
18 | #ifndef _H_JFS_INODE | 18 | #ifndef _H_JFS_INODE |
diff --git a/fs/jfs/jfs_lock.h b/fs/jfs/jfs_lock.h index 70ac9f7d1e00..7d78e83d7c40 100644 --- a/fs/jfs/jfs_lock.h +++ b/fs/jfs/jfs_lock.h | |||
@@ -1,19 +1,19 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (c) International Business Machines Corp., 2000-2001 | 2 | * Copyright (C) International Business Machines Corp., 2000-2001 |
3 | * Portions Copyright (c) Christoph Hellwig, 2001-2002 | 3 | * Portions Copyright (C) Christoph Hellwig, 2001-2002 |
4 | * | 4 | * |
5 | * This program is free software; you can redistribute it and/or modify | 5 | * This program is free software; you can redistribute it and/or modify |
6 | * it under the terms of the GNU General Public License as published by | 6 | * it under the terms of the GNU General Public License as published by |
7 | * the Free Software Foundation; either version 2 of the License, or | 7 | * the Free Software Foundation; either version 2 of the License, or |
8 | * (at your option) any later version. | 8 | * (at your option) any later version. |
9 | * | 9 | * |
10 | * This program is distributed in the hope that it will be useful, | 10 | * This program is distributed in the hope that it will be useful, |
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See | 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See |
13 | * the GNU General Public License for more details. | 13 | * the GNU General Public License for more details. |
14 | * | 14 | * |
15 | * You should have received a copy of the GNU General Public License | 15 | * You should have received a copy of the GNU General Public License |
16 | * along with this program; if not, write to the Free Software | 16 | * along with this program; if not, write to the Free Software |
17 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | 17 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
18 | */ | 18 | */ |
19 | #ifndef _H_JFS_LOCK | 19 | #ifndef _H_JFS_LOCK |
diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c index 3315f0b1fbc0..b89c9aba0466 100644 --- a/fs/jfs/jfs_logmgr.c +++ b/fs/jfs/jfs_logmgr.c | |||
@@ -4,16 +4,16 @@ | |||
4 | * | 4 | * |
5 | * This program is free software; you can redistribute it and/or modify | 5 | * This program is free software; you can redistribute it and/or modify |
6 | * it under the terms of the GNU General Public License as published by | 6 | * it under the terms of the GNU General Public License as published by |
7 | * the Free Software Foundation; either version 2 of the License, or | 7 | * the Free Software Foundation; either version 2 of the License, or |
8 | * (at your option) any later version. | 8 | * (at your option) any later version. |
9 | * | 9 | * |
10 | * This program is distributed in the hope that it will be useful, | 10 | * This program is distributed in the hope that it will be useful, |
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See | 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See |
13 | * the GNU General Public License for more details. | 13 | * the GNU General Public License for more details. |
14 | * | 14 | * |
15 | * You should have received a copy of the GNU General Public License | 15 | * You should have received a copy of the GNU General Public License |
16 | * along with this program; if not, write to the Free Software | 16 | * along with this program; if not, write to the Free Software |
17 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | 17 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
18 | */ | 18 | */ |
19 | 19 | ||
@@ -337,7 +337,7 @@ int lmLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, | |||
337 | * PARAMETER: cd - commit descriptor | 337 | * PARAMETER: cd - commit descriptor |
338 | * | 338 | * |
339 | * RETURN: end-of-log address | 339 | * RETURN: end-of-log address |
340 | * | 340 | * |
341 | * serialization: LOG_LOCK() held on entry/exit | 341 | * serialization: LOG_LOCK() held on entry/exit |
342 | */ | 342 | */ |
343 | static int | 343 | static int |
@@ -554,7 +554,7 @@ lmWriteRecord(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, | |||
554 | * PARAMETER: log | 554 | * PARAMETER: log |
555 | * | 555 | * |
556 | * RETURN: 0 | 556 | * RETURN: 0 |
557 | * | 557 | * |
558 | * serialization: LOG_LOCK() held on entry/exit | 558 | * serialization: LOG_LOCK() held on entry/exit |
559 | */ | 559 | */ |
560 | static int lmNextPage(struct jfs_log * log) | 560 | static int lmNextPage(struct jfs_log * log) |
@@ -656,7 +656,7 @@ static int lmNextPage(struct jfs_log * log) | |||
656 | * page number - redrive pageout of the page at the head of | 656 | * page number - redrive pageout of the page at the head of |
657 | * pageout queue until full page has been written. | 657 | * pageout queue until full page has been written. |
658 | * | 658 | * |
659 | * RETURN: | 659 | * RETURN: |
660 | * | 660 | * |
661 | * NOTE: | 661 | * NOTE: |
662 | * LOGGC_LOCK serializes log group commit queue, and | 662 | * LOGGC_LOCK serializes log group commit queue, and |
@@ -920,10 +920,10 @@ static void lmPostGC(struct lbuf * bp) | |||
920 | * this code is called again. | 920 | * this code is called again. |
921 | * | 921 | * |
922 | * PARAMETERS: log - log structure | 922 | * PARAMETERS: log - log structure |
923 | * hard_sync - 1 to force all metadata to be written | 923 | * hard_sync - 1 to force all metadata to be written |
924 | * | 924 | * |
925 | * RETURN: 0 | 925 | * RETURN: 0 |
926 | * | 926 | * |
927 | * serialization: LOG_LOCK() held on entry/exit | 927 | * serialization: LOG_LOCK() held on entry/exit |
928 | */ | 928 | */ |
929 | static int lmLogSync(struct jfs_log * log, int hard_sync) | 929 | static int lmLogSync(struct jfs_log * log, int hard_sync) |
@@ -1052,7 +1052,7 @@ static int lmLogSync(struct jfs_log * log, int hard_sync) | |||
1052 | * FUNCTION: write log SYNCPT record for specified log | 1052 | * FUNCTION: write log SYNCPT record for specified log |
1053 | * | 1053 | * |
1054 | * PARAMETERS: log - log structure | 1054 | * PARAMETERS: log - log structure |
1055 | * hard_sync - set to 1 to force metadata to be written | 1055 | * hard_sync - set to 1 to force metadata to be written |
1056 | */ | 1056 | */ |
1057 | void jfs_syncpt(struct jfs_log *log, int hard_sync) | 1057 | void jfs_syncpt(struct jfs_log *log, int hard_sync) |
1058 | { LOG_LOCK(log); | 1058 | { LOG_LOCK(log); |
@@ -1067,7 +1067,7 @@ void jfs_syncpt(struct jfs_log *log, int hard_sync) | |||
1067 | * insert filesystem in the active list of the log. | 1067 | * insert filesystem in the active list of the log. |
1068 | * | 1068 | * |
1069 | * PARAMETER: ipmnt - file system mount inode | 1069 | * PARAMETER: ipmnt - file system mount inode |
1070 | * iplog - log inode (out) | 1070 | * iplog - log inode (out) |
1071 | * | 1071 | * |
1072 | * RETURN: | 1072 | * RETURN: |
1073 | * | 1073 | * |
@@ -1082,7 +1082,7 @@ int lmLogOpen(struct super_block *sb) | |||
1082 | 1082 | ||
1083 | if (sbi->flag & JFS_NOINTEGRITY) | 1083 | if (sbi->flag & JFS_NOINTEGRITY) |
1084 | return open_dummy_log(sb); | 1084 | return open_dummy_log(sb); |
1085 | 1085 | ||
1086 | if (sbi->mntflag & JFS_INLINELOG) | 1086 | if (sbi->mntflag & JFS_INLINELOG) |
1087 | return open_inline_log(sb); | 1087 | return open_inline_log(sb); |
1088 | 1088 | ||
@@ -1131,7 +1131,7 @@ int lmLogOpen(struct super_block *sb) | |||
1131 | 1131 | ||
1132 | log->bdev = bdev; | 1132 | log->bdev = bdev; |
1133 | memcpy(log->uuid, sbi->loguuid, sizeof(log->uuid)); | 1133 | memcpy(log->uuid, sbi->loguuid, sizeof(log->uuid)); |
1134 | 1134 | ||
1135 | /* | 1135 | /* |
1136 | * initialize log: | 1136 | * initialize log: |
1137 | */ | 1137 | */ |
@@ -1253,13 +1253,13 @@ static int open_dummy_log(struct super_block *sb) | |||
1253 | * initialize the log from log superblock. | 1253 | * initialize the log from log superblock. |
1254 | * set the log state in the superblock to LOGMOUNT and | 1254 | * set the log state in the superblock to LOGMOUNT and |
1255 | * write SYNCPT log record. | 1255 | * write SYNCPT log record. |
1256 | * | 1256 | * |
1257 | * PARAMETER: log - log structure | 1257 | * PARAMETER: log - log structure |
1258 | * | 1258 | * |
1259 | * RETURN: 0 - if ok | 1259 | * RETURN: 0 - if ok |
1260 | * -EINVAL - bad log magic number or superblock dirty | 1260 | * -EINVAL - bad log magic number or superblock dirty |
1261 | * error returned from logwait() | 1261 | * error returned from logwait() |
1262 | * | 1262 | * |
1263 | * serialization: single first open thread | 1263 | * serialization: single first open thread |
1264 | */ | 1264 | */ |
1265 | int lmLogInit(struct jfs_log * log) | 1265 | int lmLogInit(struct jfs_log * log) |
@@ -1297,7 +1297,7 @@ int lmLogInit(struct jfs_log * log) | |||
1297 | 1297 | ||
1298 | if (!test_bit(log_INLINELOG, &log->flag)) | 1298 | if (!test_bit(log_INLINELOG, &log->flag)) |
1299 | log->l2bsize = L2LOGPSIZE; | 1299 | log->l2bsize = L2LOGPSIZE; |
1300 | 1300 | ||
1301 | /* check for disabled journaling to disk */ | 1301 | /* check for disabled journaling to disk */ |
1302 | if (log->no_integrity) { | 1302 | if (log->no_integrity) { |
1303 | /* | 1303 | /* |
@@ -1651,7 +1651,7 @@ void jfs_flush_journal(struct jfs_log *log, int wait) | |||
1651 | * PARAMETER: log - log inode | 1651 | * PARAMETER: log - log inode |
1652 | * | 1652 | * |
1653 | * RETURN: 0 - success | 1653 | * RETURN: 0 - success |
1654 | * | 1654 | * |
1655 | * serialization: single last close thread | 1655 | * serialization: single last close thread |
1656 | */ | 1656 | */ |
1657 | int lmLogShutdown(struct jfs_log * log) | 1657 | int lmLogShutdown(struct jfs_log * log) |
@@ -1677,7 +1677,7 @@ int lmLogShutdown(struct jfs_log * log) | |||
1677 | lrd.type = cpu_to_le16(LOG_SYNCPT); | 1677 | lrd.type = cpu_to_le16(LOG_SYNCPT); |
1678 | lrd.length = 0; | 1678 | lrd.length = 0; |
1679 | lrd.log.syncpt.sync = 0; | 1679 | lrd.log.syncpt.sync = 0; |
1680 | 1680 | ||
1681 | lsn = lmWriteRecord(log, NULL, &lrd, NULL); | 1681 | lsn = lmWriteRecord(log, NULL, &lrd, NULL); |
1682 | bp = log->bp; | 1682 | bp = log->bp; |
1683 | lp = (struct logpage *) bp->l_ldata; | 1683 | lp = (struct logpage *) bp->l_ldata; |
@@ -1703,7 +1703,7 @@ int lmLogShutdown(struct jfs_log * log) | |||
1703 | jfs_info("lmLogShutdown: lsn:0x%x page:%d eor:%d", | 1703 | jfs_info("lmLogShutdown: lsn:0x%x page:%d eor:%d", |
1704 | lsn, log->page, log->eor); | 1704 | lsn, log->page, log->eor); |
1705 | 1705 | ||
1706 | out: | 1706 | out: |
1707 | /* | 1707 | /* |
1708 | * shutdown per log i/o | 1708 | * shutdown per log i/o |
1709 | */ | 1709 | */ |
@@ -1769,7 +1769,7 @@ static int lmLogFileSystem(struct jfs_log * log, struct jfs_sb_info *sbi, | |||
1769 | lbmFree(bpsuper); | 1769 | lbmFree(bpsuper); |
1770 | return -EIO; | 1770 | return -EIO; |
1771 | } | 1771 | } |
1772 | 1772 | ||
1773 | } | 1773 | } |
1774 | 1774 | ||
1775 | /* | 1775 | /* |
diff --git a/fs/jfs/jfs_logmgr.h b/fs/jfs/jfs_logmgr.h index 8c6909b80014..a53fb17ea219 100644 --- a/fs/jfs/jfs_logmgr.h +++ b/fs/jfs/jfs_logmgr.h | |||
@@ -4,16 +4,16 @@ | |||
4 | * | 4 | * |
5 | * This program is free software; you can redistribute it and/or modify | 5 | * This program is free software; you can redistribute it and/or modify |
6 | * it under the terms of the GNU General Public License as published by | 6 | * it under the terms of the GNU General Public License as published by |
7 | * the Free Software Foundation; either version 2 of the License, or | 7 | * the Free Software Foundation; either version 2 of the License, or |
8 | * (at your option) any later version. | 8 | * (at your option) any later version. |
9 | * | 9 | * |
10 | * This program is distributed in the hope that it will be useful, | 10 | * This program is distributed in the hope that it will be useful, |
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See | 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See |
13 | * the GNU General Public License for more details. | 13 | * the GNU General Public License for more details. |
14 | * | 14 | * |
15 | * You should have received a copy of the GNU General Public License | 15 | * You should have received a copy of the GNU General Public License |
16 | * along with this program; if not, write to the Free Software | 16 | * along with this program; if not, write to the Free Software |
17 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | 17 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
18 | */ | 18 | */ |
19 | #ifndef _H_JFS_LOGMGR | 19 | #ifndef _H_JFS_LOGMGR |
@@ -35,19 +35,19 @@ | |||
35 | /* | 35 | /* |
36 | * log logical volume | 36 | * log logical volume |
37 | * | 37 | * |
38 | * a log is used to make the commit operation on journalled | 38 | * a log is used to make the commit operation on journalled |
39 | * files within the same logical volume group atomic. | 39 | * files within the same logical volume group atomic. |
40 | * a log is implemented with a logical volume. | 40 | * a log is implemented with a logical volume. |
41 | * there is one log per logical volume group. | 41 | * there is one log per logical volume group. |
42 | * | 42 | * |
43 | * block 0 of the log logical volume is not used (ipl etc). | 43 | * block 0 of the log logical volume is not used (ipl etc). |
44 | * block 1 contains a log "superblock" and is used by logFormat(), | 44 | * block 1 contains a log "superblock" and is used by logFormat(), |
45 | * lmLogInit(), lmLogShutdown(), and logRedo() to record status | 45 | * lmLogInit(), lmLogShutdown(), and logRedo() to record status |
46 | * of the log but is not otherwise used during normal processing. | 46 | * of the log but is not otherwise used during normal processing. |
47 | * blocks 2 - (N-1) are used to contain log records. | 47 | * blocks 2 - (N-1) are used to contain log records. |
48 | * | 48 | * |
49 | * when a volume group is varied-on-line, logRedo() must have | 49 | * when a volume group is varied-on-line, logRedo() must have |
50 | * been executed before the file systems (logical volumes) in | 50 | * been executed before the file systems (logical volumes) in |
51 | * the volume group can be mounted. | 51 | * the volume group can be mounted. |
52 | */ | 52 | */ |
53 | /* | 53 | /* |
@@ -97,26 +97,26 @@ struct logsuper { | |||
97 | * log logical page | 97 | * log logical page |
98 | * | 98 | * |
99 | * (this comment should be rewritten !) | 99 | * (this comment should be rewritten !) |
100 | * the header and trailer structures (h,t) will normally have | 100 | * the header and trailer structures (h,t) will normally have |
101 | * the same page and eor value. | 101 | * the same page and eor value. |
102 | * An exception to this occurs when a complete page write is not | 102 | * An exception to this occurs when a complete page write is not |
103 | * accomplished on a power failure. Since the hardware may "split write" | 103 | * accomplished on a power failure. Since the hardware may "split write" |
104 | * sectors in the page, any out of order sequence may occur during powerfail | 104 | * sectors in the page, any out of order sequence may occur during powerfail |
105 | * and needs to be recognized during log replay. The xor value is | 105 | * and needs to be recognized during log replay. The xor value is |
106 | * an "exclusive or" of all log words in the page up to eor. This | 106 | * an "exclusive or" of all log words in the page up to eor. This |
107 | * 32 bit eor is stored with the top 16 bits in the header and the | 107 | * 32 bit eor is stored with the top 16 bits in the header and the |
108 | * bottom 16 bits in the trailer. logredo can easily recognize pages | 108 | * bottom 16 bits in the trailer. logredo can easily recognize pages |
109 | * that were not completed by reconstructing this eor and checking | 109 | * that were not completed by reconstructing this eor and checking |
110 | * the log page. | 110 | * the log page. |
111 | * | 111 | * |
112 | * Previous versions of the operating system did not allow split | 112 | * Previous versions of the operating system did not allow split |
113 | * writes and detected partially written records in logredo by | 113 | * writes and detected partially written records in logredo by |
114 | * ordering the updates to the header, trailer, and the move of data | 114 | * ordering the updates to the header, trailer, and the move of data |
115 | * into the logdata area. The order: (1) data is moved (2) header | 115 | * into the logdata area. The order: (1) data is moved (2) header |
116 | * is updated (3) trailer is updated. In logredo, when the header | 116 | * is updated (3) trailer is updated. In logredo, when the header |
117 | * differed from the trailer, the header and trailer were reconciled | 117 | * differed from the trailer, the header and trailer were reconciled |
118 | * as follows: if h.page != t.page they were set to the smaller of | 118 | * as follows: if h.page != t.page they were set to the smaller of |
119 | * the two and h.eor and t.eor set to 8 (i.e. empty page). if (only) | 119 | * the two and h.eor and t.eor set to 8 (i.e. empty page). if (only) |
120 | * h.eor != t.eor they were set to the smaller of their two values. | 120 | * h.eor != t.eor they were set to the smaller of their two values. |
121 | */ | 121 | */ |
122 | struct logpage { | 122 | struct logpage { |
@@ -147,20 +147,20 @@ struct logpage { | |||
147 | * in a page, pages are written to temporary paging space if | 147 | * in a page, pages are written to temporary paging space if |
148 | * if they must be written to disk before commit, and i/o is | 148 | * if they must be written to disk before commit, and i/o is |
149 | * scheduled for modified pages to their home location after | 149 | * scheduled for modified pages to their home location after |
150 | * the log records containing the after values and the commit | 150 | * the log records containing the after values and the commit |
151 | * record is written to the log on disk, undo discards the copy | 151 | * record is written to the log on disk, undo discards the copy |
152 | * in main-memory.) | 152 | * in main-memory.) |
153 | * | 153 | * |
154 | * a log record consists of a data area of variable length followed by | 154 | * a log record consists of a data area of variable length followed by |
155 | * a descriptor of fixed size LOGRDSIZE bytes. | 155 | * a descriptor of fixed size LOGRDSIZE bytes. |
156 | * the data area is rounded up to an integral number of 4-bytes and | 156 | * the data area is rounded up to an integral number of 4-bytes and |
157 | * must be no longer than LOGPSIZE. | 157 | * must be no longer than LOGPSIZE. |
158 | * the descriptor is of size of multiple of 4-bytes and aligned on a | 158 | * the descriptor is of size of multiple of 4-bytes and aligned on a |
159 | * 4-byte boundary. | 159 | * 4-byte boundary. |
160 | * records are packed one after the other in the data area of log pages. | 160 | * records are packed one after the other in the data area of log pages. |
161 | * (sometimes a DUMMY record is inserted so that at least one record ends | 161 | * (sometimes a DUMMY record is inserted so that at least one record ends |
162 | * on every page or the longest record is placed on at most two pages). | 162 | * on every page or the longest record is placed on at most two pages). |
163 | * the field eor in page header/trailer points to the byte following | 163 | * the field eor in page header/trailer points to the byte following |
164 | * the last record on a page. | 164 | * the last record on a page. |
165 | */ | 165 | */ |
166 | 166 | ||
@@ -270,11 +270,11 @@ struct lrd { | |||
270 | /* | 270 | /* |
271 | * NOREDOINOEXT: the inode extent is freed | 271 | * NOREDOINOEXT: the inode extent is freed |
272 | * | 272 | * |
273 | * do not apply after-image records which precede this | 273 | * do not apply after-image records which precede this |
274 | * record in the log with the any of the 4 page block | 274 | * record in the log with the any of the 4 page block |
275 | * numbers in this inode extent. | 275 | * numbers in this inode extent. |
276 | * | 276 | * |
277 | * NOTE: The fileset and pxd fields MUST remain in | 277 | * NOTE: The fileset and pxd fields MUST remain in |
278 | * the same fields in the REDOPAGE record format. | 278 | * the same fields in the REDOPAGE record format. |
279 | * | 279 | * |
280 | */ | 280 | */ |
@@ -319,12 +319,10 @@ struct lrd { | |||
319 | * do not apply records which precede this record in the log | 319 | * do not apply records which precede this record in the log |
320 | * with the same inode number. | 320 | * with the same inode number. |
321 | * | 321 | * |
322 | * NOREDILE must be the first to be written at commit | 322 | * NOREDOFILE must be the first to be written at commit |
323 | * (last to be read in logredo()) - it prevents | 323 | * (last to be read in logredo()) - it prevents |
324 | * replay of preceding updates of all preceding generations | 324 | * replay of preceding updates of all preceding generations |
325 | * of the inumber esp. the on-disk inode itself, | 325 | * of the inumber esp. the on-disk inode itself. |
326 | * but does NOT prevent | ||
327 | * replay of the | ||
328 | */ | 326 | */ |
329 | struct { | 327 | struct { |
330 | __le32 fileset; /* 4: fileset number */ | 328 | __le32 fileset; /* 4: fileset number */ |
@@ -332,7 +330,7 @@ struct lrd { | |||
332 | } noredofile; | 330 | } noredofile; |
333 | 331 | ||
334 | /* | 332 | /* |
335 | * ? NEWPAGE: | 333 | * ? NEWPAGE: |
336 | * | 334 | * |
337 | * metadata type dependent | 335 | * metadata type dependent |
338 | */ | 336 | */ |
@@ -464,7 +462,7 @@ struct lbuf { | |||
464 | s64 l_blkno; /* 8: log page block number */ | 462 | s64 l_blkno; /* 8: log page block number */ |
465 | caddr_t l_ldata; /* 4: data page */ | 463 | caddr_t l_ldata; /* 4: data page */ |
466 | struct page *l_page; /* The page itself */ | 464 | struct page *l_page; /* The page itself */ |
467 | uint l_offset; /* Offset of l_ldata within the page */ | 465 | uint l_offset; /* Offset of l_ldata within the page */ |
468 | 466 | ||
469 | wait_queue_head_t l_ioevent; /* 4: i/o done event */ | 467 | wait_queue_head_t l_ioevent; /* 4: i/o done event */ |
470 | }; | 468 | }; |
diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c index f5afc129d6b1..0cccd1c39d75 100644 --- a/fs/jfs/jfs_metapage.c +++ b/fs/jfs/jfs_metapage.c | |||
@@ -4,16 +4,16 @@ | |||
4 | * | 4 | * |
5 | * This program is free software; you can redistribute it and/or modify | 5 | * This program is free software; you can redistribute it and/or modify |
6 | * it under the terms of the GNU General Public License as published by | 6 | * it under the terms of the GNU General Public License as published by |
7 | * the Free Software Foundation; either version 2 of the License, or | 7 | * the Free Software Foundation; either version 2 of the License, or |
8 | * (at your option) any later version. | 8 | * (at your option) any later version. |
9 | * | 9 | * |
10 | * This program is distributed in the hope that it will be useful, | 10 | * This program is distributed in the hope that it will be useful, |
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See | 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See |
13 | * the GNU General Public License for more details. | 13 | * the GNU General Public License for more details. |
14 | * | 14 | * |
15 | * You should have received a copy of the GNU General Public License | 15 | * You should have received a copy of the GNU General Public License |
16 | * along with this program; if not, write to the Free Software | 16 | * along with this program; if not, write to the Free Software |
17 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | 17 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
18 | */ | 18 | */ |
19 | 19 | ||
@@ -461,7 +461,7 @@ static int metapage_writepage(struct page *page, struct writeback_control *wbc) | |||
461 | goto add_failed; | 461 | goto add_failed; |
462 | if (!bio->bi_size) | 462 | if (!bio->bi_size) |
463 | goto dump_bio; | 463 | goto dump_bio; |
464 | 464 | ||
465 | submit_bio(WRITE, bio); | 465 | submit_bio(WRITE, bio); |
466 | } | 466 | } |
467 | if (redirty) | 467 | if (redirty) |
@@ -648,7 +648,7 @@ struct metapage *__get_metapage(struct inode *inode, unsigned long lblock, | |||
648 | jfs_err("logical_size = %d, size = %d", | 648 | jfs_err("logical_size = %d, size = %d", |
649 | mp->logical_size, size); | 649 | mp->logical_size, size); |
650 | dump_stack(); | 650 | dump_stack(); |
651 | goto unlock; | 651 | goto unlock; |
652 | } | 652 | } |
653 | mp->count++; | 653 | mp->count++; |
654 | lock_metapage(mp); | 654 | lock_metapage(mp); |
@@ -658,7 +658,7 @@ struct metapage *__get_metapage(struct inode *inode, unsigned long lblock, | |||
658 | "__get_metapage: using a " | 658 | "__get_metapage: using a " |
659 | "discarded metapage"); | 659 | "discarded metapage"); |
660 | discard_metapage(mp); | 660 | discard_metapage(mp); |
661 | goto unlock; | 661 | goto unlock; |
662 | } | 662 | } |
663 | clear_bit(META_discard, &mp->flag); | 663 | clear_bit(META_discard, &mp->flag); |
664 | } | 664 | } |
diff --git a/fs/jfs/jfs_metapage.h b/fs/jfs/jfs_metapage.h index 01a5a455e012..d94f8d9e87d7 100644 --- a/fs/jfs/jfs_metapage.h +++ b/fs/jfs/jfs_metapage.h | |||
@@ -4,16 +4,16 @@ | |||
4 | * | 4 | * |
5 | * This program is free software; you can redistribute it and/or modify | 5 | * This program is free software; you can redistribute it and/or modify |
6 | * it under the terms of the GNU General Public License as published by | 6 | * it under the terms of the GNU General Public License as published by |
7 | * the Free Software Foundation; either version 2 of the License, or | 7 | * the Free Software Foundation; either version 2 of the License, or |
8 | * (at your option) any later version. | 8 | * (at your option) any later version. |
9 | * | 9 | * |
10 | * This program is distributed in the hope that it will be useful, | 10 | * This program is distributed in the hope that it will be useful, |
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See | 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See |
13 | * the GNU General Public License for more details. | 13 | * the GNU General Public License for more details. |
14 | * | 14 | * |
15 | * You should have received a copy of the GNU General Public License | 15 | * You should have received a copy of the GNU General Public License |
16 | * along with this program; if not, write to the Free Software | 16 | * along with this program; if not, write to the Free Software |
17 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | 17 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
18 | */ | 18 | */ |
19 | #ifndef _H_JFS_METAPAGE | 19 | #ifndef _H_JFS_METAPAGE |
@@ -33,7 +33,7 @@ struct metapage { | |||
33 | unsigned long flag; /* See Below */ | 33 | unsigned long flag; /* See Below */ |
34 | unsigned long count; /* Reference count */ | 34 | unsigned long count; /* Reference count */ |
35 | void *data; /* Data pointer */ | 35 | void *data; /* Data pointer */ |
36 | sector_t index; /* block address of page */ | 36 | sector_t index; /* block address of page */ |
37 | wait_queue_head_t wait; | 37 | wait_queue_head_t wait; |
38 | 38 | ||
39 | /* implementation */ | 39 | /* implementation */ |
diff --git a/fs/jfs/jfs_mount.c b/fs/jfs/jfs_mount.c index 032d111bc330..4dd479834897 100644 --- a/fs/jfs/jfs_mount.c +++ b/fs/jfs/jfs_mount.c | |||
@@ -3,16 +3,16 @@ | |||
3 | * | 3 | * |
4 | * This program is free software; you can redistribute it and/or modify | 4 | * This program is free software; you can redistribute it and/or modify |
5 | * it under the terms of the GNU General Public License as published by | 5 | * it under the terms of the GNU General Public License as published by |
6 | * the Free Software Foundation; either version 2 of the License, or | 6 | * the Free Software Foundation; either version 2 of the License, or |
7 | * (at your option) any later version. | 7 | * (at your option) any later version. |
8 | * | 8 | * |
9 | * This program is distributed in the hope that it will be useful, | 9 | * This program is distributed in the hope that it will be useful, |
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See | 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See |
12 | * the GNU General Public License for more details. | 12 | * the GNU General Public License for more details. |
13 | * | 13 | * |
14 | * You should have received a copy of the GNU General Public License | 14 | * You should have received a copy of the GNU General Public License |
15 | * along with this program; if not, write to the Free Software | 15 | * along with this program; if not, write to the Free Software |
16 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | 16 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
17 | */ | 17 | */ |
18 | 18 | ||
@@ -21,18 +21,18 @@ | |||
21 | * | 21 | * |
22 | * note: file system in transition to aggregate/fileset: | 22 | * note: file system in transition to aggregate/fileset: |
23 | * | 23 | * |
24 | * file system mount is interpreted as the mount of aggregate, | 24 | * file system mount is interpreted as the mount of aggregate, |
25 | * if not already mounted, and mount of the single/only fileset in | 25 | * if not already mounted, and mount of the single/only fileset in |
26 | * the aggregate; | 26 | * the aggregate; |
27 | * | 27 | * |
28 | * a file system/aggregate is represented by an internal inode | 28 | * a file system/aggregate is represented by an internal inode |
29 | * (aka mount inode) initialized with aggregate superblock; | 29 | * (aka mount inode) initialized with aggregate superblock; |
30 | * each vfs represents a fileset, and points to its "fileset inode | 30 | * each vfs represents a fileset, and points to its "fileset inode |
31 | * allocation map inode" (aka fileset inode): | 31 | * allocation map inode" (aka fileset inode): |
32 | * (an aggregate itself is structured recursively as a filset: | 32 | * (an aggregate itself is structured recursively as a filset: |
33 | * an internal vfs is constructed and points to its "fileset inode | 33 | * an internal vfs is constructed and points to its "fileset inode |
34 | * allocation map inode" (aka aggregate inode) where each inode | 34 | * allocation map inode" (aka aggregate inode) where each inode |
35 | * represents a fileset inode) so that inode number is mapped to | 35 | * represents a fileset inode) so that inode number is mapped to |
36 | * on-disk inode in uniform way at both aggregate and fileset level; | 36 | * on-disk inode in uniform way at both aggregate and fileset level; |
37 | * | 37 | * |
38 | * each vnode/inode of a fileset is linked to its vfs (to facilitate | 38 | * each vnode/inode of a fileset is linked to its vfs (to facilitate |
@@ -41,7 +41,7 @@ | |||
41 | * per aggregate information, e.g., block size, etc.) as well as | 41 | * per aggregate information, e.g., block size, etc.) as well as |
42 | * its file set inode. | 42 | * its file set inode. |
43 | * | 43 | * |
44 | * aggregate | 44 | * aggregate |
45 | * ipmnt | 45 | * ipmnt |
46 | * mntvfs -> fileset ipimap+ -> aggregate ipbmap -> aggregate ipaimap; | 46 | * mntvfs -> fileset ipimap+ -> aggregate ipbmap -> aggregate ipaimap; |
47 | * fileset vfs -> vp(1) <-> ... <-> vp(n) <->vproot; | 47 | * fileset vfs -> vp(1) <-> ... <-> vp(n) <->vproot; |
@@ -88,7 +88,7 @@ int jfs_mount(struct super_block *sb) | |||
88 | struct inode *ipbmap = NULL; | 88 | struct inode *ipbmap = NULL; |
89 | 89 | ||
90 | /* | 90 | /* |
91 | * read/validate superblock | 91 | * read/validate superblock |
92 | * (initialize mount inode from the superblock) | 92 | * (initialize mount inode from the superblock) |
93 | */ | 93 | */ |
94 | if ((rc = chkSuper(sb))) { | 94 | if ((rc = chkSuper(sb))) { |
@@ -238,7 +238,7 @@ int jfs_mount(struct super_block *sb) | |||
238 | */ | 238 | */ |
239 | int jfs_mount_rw(struct super_block *sb, int remount) | 239 | int jfs_mount_rw(struct super_block *sb, int remount) |
240 | { | 240 | { |
241 | struct jfs_sb_info *sbi = JFS_SBI(sb); | 241 | struct jfs_sb_info *sbi = JFS_SBI(sb); |
242 | int rc; | 242 | int rc; |
243 | 243 | ||
244 | /* | 244 | /* |
@@ -291,7 +291,7 @@ int jfs_mount_rw(struct super_block *sb, int remount) | |||
291 | /* | 291 | /* |
292 | * chkSuper() | 292 | * chkSuper() |
293 | * | 293 | * |
294 | * validate the superblock of the file system to be mounted and | 294 | * validate the superblock of the file system to be mounted and |
295 | * get the file system parameters. | 295 | * get the file system parameters. |
296 | * | 296 | * |
297 | * returns | 297 | * returns |
@@ -426,7 +426,7 @@ int updateSuper(struct super_block *sb, uint state) | |||
426 | jfs_err("updateSuper: bad state"); | 426 | jfs_err("updateSuper: bad state"); |
427 | } else if (sbi->state == FM_DIRTY) | 427 | } else if (sbi->state == FM_DIRTY) |
428 | return 0; | 428 | return 0; |
429 | 429 | ||
430 | if ((rc = readSuper(sb, &bh))) | 430 | if ((rc = readSuper(sb, &bh))) |
431 | return rc; | 431 | return rc; |
432 | 432 | ||
@@ -486,9 +486,9 @@ int readSuper(struct super_block *sb, struct buffer_head **bpp) | |||
486 | * for this file system past this point in log. | 486 | * for this file system past this point in log. |
487 | * it is harmless if mount fails. | 487 | * it is harmless if mount fails. |
488 | * | 488 | * |
489 | * note: MOUNT record is at aggregate level, not at fileset level, | 489 | * note: MOUNT record is at aggregate level, not at fileset level, |
490 | * since log records of previous mounts of a fileset | 490 | * since log records of previous mounts of a fileset |
491 | * (e.g., AFTER record of extent allocation) have to be processed | 491 | * (e.g., AFTER record of extent allocation) have to be processed |
492 | * to update block allocation map at aggregate level. | 492 | * to update block allocation map at aggregate level. |
493 | */ | 493 | */ |
494 | static int logMOUNT(struct super_block *sb) | 494 | static int logMOUNT(struct super_block *sb) |
diff --git a/fs/jfs/jfs_superblock.h b/fs/jfs/jfs_superblock.h index 682cf1a68a18..884fc21ab8ee 100644 --- a/fs/jfs/jfs_superblock.h +++ b/fs/jfs/jfs_superblock.h | |||
@@ -3,16 +3,16 @@ | |||
3 | * | 3 | * |
4 | * This program is free software; you can redistribute it and/or modify | 4 | * This program is free software; you can redistribute it and/or modify |
5 | * it under the terms of the GNU General Public License as published by | 5 | * it under the terms of the GNU General Public License as published by |
6 | * the Free Software Foundation; either version 2 of the License, or | 6 | * the Free Software Foundation; either version 2 of the License, or |
7 | * (at your option) any later version. | 7 | * (at your option) any later version. |
8 | * | 8 | * |
9 | * This program is distributed in the hope that it will be useful, | 9 | * This program is distributed in the hope that it will be useful, |
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See | 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See |
12 | * the GNU General Public License for more details. | 12 | * the GNU General Public License for more details. |
13 | * | 13 | * |
14 | * You should have received a copy of the GNU General Public License | 14 | * You should have received a copy of the GNU General Public License |
15 | * along with this program; if not, write to the Free Software | 15 | * along with this program; if not, write to the Free Software |
16 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | 16 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
17 | */ | 17 | */ |
18 | #ifndef _H_JFS_SUPERBLOCK | 18 | #ifndef _H_JFS_SUPERBLOCK |
@@ -21,14 +21,14 @@ | |||
21 | /* | 21 | /* |
22 | * make the magic number something a human could read | 22 | * make the magic number something a human could read |
23 | */ | 23 | */ |
24 | #define JFS_MAGIC "JFS1" /* Magic word */ | 24 | #define JFS_MAGIC "JFS1" /* Magic word */ |
25 | 25 | ||
26 | #define JFS_VERSION 2 /* Version number: Version 2 */ | 26 | #define JFS_VERSION 2 /* Version number: Version 2 */ |
27 | 27 | ||
28 | #define LV_NAME_SIZE 11 /* MUST BE 11 for OS/2 boot sector */ | 28 | #define LV_NAME_SIZE 11 /* MUST BE 11 for OS/2 boot sector */ |
29 | 29 | ||
30 | /* | 30 | /* |
31 | * aggregate superblock | 31 | * aggregate superblock |
32 | * | 32 | * |
33 | * The name superblock is too close to super_block, so the name has been | 33 | * The name superblock is too close to super_block, so the name has been |
34 | * changed to jfs_superblock. The utilities are still using the old name. | 34 | * changed to jfs_superblock. The utilities are still using the old name. |
@@ -40,7 +40,7 @@ struct jfs_superblock { | |||
40 | __le64 s_size; /* 8: aggregate size in hardware/LVM blocks; | 40 | __le64 s_size; /* 8: aggregate size in hardware/LVM blocks; |
41 | * VFS: number of blocks | 41 | * VFS: number of blocks |
42 | */ | 42 | */ |
43 | __le32 s_bsize; /* 4: aggregate block size in bytes; | 43 | __le32 s_bsize; /* 4: aggregate block size in bytes; |
44 | * VFS: fragment size | 44 | * VFS: fragment size |
45 | */ | 45 | */ |
46 | __le16 s_l2bsize; /* 2: log2 of s_bsize */ | 46 | __le16 s_l2bsize; /* 2: log2 of s_bsize */ |
@@ -54,7 +54,7 @@ struct jfs_superblock { | |||
54 | __le32 s_flag; /* 4: aggregate attributes: | 54 | __le32 s_flag; /* 4: aggregate attributes: |
55 | * see jfs_filsys.h | 55 | * see jfs_filsys.h |
56 | */ | 56 | */ |
57 | __le32 s_state; /* 4: mount/unmount/recovery state: | 57 | __le32 s_state; /* 4: mount/unmount/recovery state: |
58 | * see jfs_filsys.h | 58 | * see jfs_filsys.h |
59 | */ | 59 | */ |
60 | __le32 s_compress; /* 4: > 0 if data compression */ | 60 | __le32 s_compress; /* 4: > 0 if data compression */ |
@@ -75,11 +75,11 @@ struct jfs_superblock { | |||
75 | struct timestruc_t s_time; /* 8: time last updated */ | 75 | struct timestruc_t s_time; /* 8: time last updated */ |
76 | 76 | ||
77 | __le32 s_fsckloglen; /* 4: Number of filesystem blocks reserved for | 77 | __le32 s_fsckloglen; /* 4: Number of filesystem blocks reserved for |
78 | * the fsck service log. | 78 | * the fsck service log. |
79 | * N.B. These blocks are divided among the | 79 | * N.B. These blocks are divided among the |
80 | * versions kept. This is not a per | 80 | * versions kept. This is not a per |
81 | * version size. | 81 | * version size. |
82 | * N.B. These blocks are included in the | 82 | * N.B. These blocks are included in the |
83 | * length field of s_fsckpxd. | 83 | * length field of s_fsckpxd. |
84 | */ | 84 | */ |
85 | s8 s_fscklog; /* 1: which fsck service log is most recent | 85 | s8 s_fscklog; /* 1: which fsck service log is most recent |
@@ -87,7 +87,7 @@ struct jfs_superblock { | |||
87 | * 1 => the first one | 87 | * 1 => the first one |
88 | * 2 => the 2nd one | 88 | * 2 => the 2nd one |
89 | */ | 89 | */ |
90 | char s_fpack[11]; /* 11: file system volume name | 90 | char s_fpack[11]; /* 11: file system volume name |
91 | * N.B. This must be 11 bytes to | 91 | * N.B. This must be 11 bytes to |
92 | * conform with the OS/2 BootSector | 92 | * conform with the OS/2 BootSector |
93 | * requirements | 93 | * requirements |
diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c index ebfa6c061d78..81f6f04af192 100644 --- a/fs/jfs/jfs_txnmgr.c +++ b/fs/jfs/jfs_txnmgr.c | |||
@@ -4,16 +4,16 @@ | |||
4 | * | 4 | * |
5 | * This program is free software; you can redistribute it and/or modify | 5 | * This program is free software; you can redistribute it and/or modify |
6 | * it under the terms of the GNU General Public License as published by | 6 | * it under the terms of the GNU General Public License as published by |
7 | * the Free Software Foundation; either version 2 of the License, or | 7 | * the Free Software Foundation; either version 2 of the License, or |
8 | * (at your option) any later version. | 8 | * (at your option) any later version. |
9 | * | 9 | * |
10 | * This program is distributed in the hope that it will be useful, | 10 | * This program is distributed in the hope that it will be useful, |
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See | 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See |
13 | * the GNU General Public License for more details. | 13 | * the GNU General Public License for more details. |
14 | * | 14 | * |
15 | * You should have received a copy of the GNU General Public License | 15 | * You should have received a copy of the GNU General Public License |
16 | * along with this program; if not, write to the Free Software | 16 | * along with this program; if not, write to the Free Software |
17 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | 17 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
18 | */ | 18 | */ |
19 | 19 | ||
@@ -2026,8 +2026,6 @@ static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, | |||
2026 | * truncate entry XAD[twm == next - 1]: | 2026 | * truncate entry XAD[twm == next - 1]: |
2027 | */ | 2027 | */ |
2028 | if (twm == next - 1) { | 2028 | if (twm == next - 1) { |
2029 | struct pxd_lock *pxdlock; | ||
2030 | |||
2031 | /* format a maplock for txUpdateMap() to update bmap | 2029 | /* format a maplock for txUpdateMap() to update bmap |
2032 | * to free truncated delta extent of the truncated | 2030 | * to free truncated delta extent of the truncated |
2033 | * entry XAD[next - 1]; | 2031 | * entry XAD[next - 1]; |
diff --git a/fs/jfs/jfs_txnmgr.h b/fs/jfs/jfs_txnmgr.h index 0e4dc4514c47..7863cf21afca 100644 --- a/fs/jfs/jfs_txnmgr.h +++ b/fs/jfs/jfs_txnmgr.h | |||
@@ -3,16 +3,16 @@ | |||
3 | * | 3 | * |
4 | * This program is free software; you can redistribute it and/or modify | 4 | * This program is free software; you can redistribute it and/or modify |
5 | * it under the terms of the GNU General Public License as published by | 5 | * it under the terms of the GNU General Public License as published by |
6 | * the Free Software Foundation; either version 2 of the License, or | 6 | * the Free Software Foundation; either version 2 of the License, or |
7 | * (at your option) any later version. | 7 | * (at your option) any later version. |
8 | * | 8 | * |
9 | * This program is distributed in the hope that it will be useful, | 9 | * This program is distributed in the hope that it will be useful, |
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See | 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See |
12 | * the GNU General Public License for more details. | 12 | * the GNU General Public License for more details. |
13 | * | 13 | * |
14 | * You should have received a copy of the GNU General Public License | 14 | * You should have received a copy of the GNU General Public License |
15 | * along with this program; if not, write to the Free Software | 15 | * along with this program; if not, write to the Free Software |
16 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | 16 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
17 | */ | 17 | */ |
18 | #ifndef _H_JFS_TXNMGR | 18 | #ifndef _H_JFS_TXNMGR |
@@ -179,7 +179,7 @@ struct linelock { | |||
179 | /* (8) */ | 179 | /* (8) */ |
180 | 180 | ||
181 | struct lv lv[20]; /* 40: */ | 181 | struct lv lv[20]; /* 40: */ |
182 | }; /* (48) */ | 182 | }; /* (48) */ |
183 | 183 | ||
184 | #define dt_lock linelock | 184 | #define dt_lock linelock |
185 | 185 | ||
@@ -211,8 +211,8 @@ struct xtlock { | |||
211 | * at tlock.lock/linelock: watch for alignment; | 211 | * at tlock.lock/linelock: watch for alignment; |
212 | * N.B. next field may be set by linelock, and should not | 212 | * N.B. next field may be set by linelock, and should not |
213 | * be modified by maplock; | 213 | * be modified by maplock; |
214 | * N.B. index of the first pxdlock specifies index of next | 214 | * N.B. index of the first pxdlock specifies index of next |
215 | * free maplock (i.e., number of maplock) in the tlock; | 215 | * free maplock (i.e., number of maplock) in the tlock; |
216 | */ | 216 | */ |
217 | struct maplock { | 217 | struct maplock { |
218 | lid_t next; /* 2: */ | 218 | lid_t next; /* 2: */ |
diff --git a/fs/jfs/jfs_umount.c b/fs/jfs/jfs_umount.c index 21eaf7ac0fcb..a386f48c73fc 100644 --- a/fs/jfs/jfs_umount.c +++ b/fs/jfs/jfs_umount.c | |||
@@ -3,16 +3,16 @@ | |||
3 | * | 3 | * |
4 | * This program is free software; you can redistribute it and/or modify | 4 | * This program is free software; you can redistribute it and/or modify |
5 | * it under the terms of the GNU General Public License as published by | 5 | * it under the terms of the GNU General Public License as published by |
6 | * the Free Software Foundation; either version 2 of the License, or | 6 | * the Free Software Foundation; either version 2 of the License, or |
7 | * (at your option) any later version. | 7 | * (at your option) any later version. |
8 | * | 8 | * |
9 | * This program is distributed in the hope that it will be useful, | 9 | * This program is distributed in the hope that it will be useful, |
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See | 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See |
12 | * the GNU General Public License for more details. | 12 | * the GNU General Public License for more details. |
13 | * | 13 | * |
14 | * You should have received a copy of the GNU General Public License | 14 | * You should have received a copy of the GNU General Public License |
15 | * along with this program; if not, write to the Free Software | 15 | * along with this program; if not, write to the Free Software |
16 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | 16 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
17 | */ | 17 | */ |
18 | 18 | ||
@@ -22,8 +22,8 @@ | |||
22 | * note: file system in transition to aggregate/fileset: | 22 | * note: file system in transition to aggregate/fileset: |
23 | * (ref. jfs_mount.c) | 23 | * (ref. jfs_mount.c) |
24 | * | 24 | * |
25 | * file system unmount is interpreted as mount of the single/only | 25 | * file system unmount is interpreted as mount of the single/only |
26 | * fileset in the aggregate and, if unmount of the last fileset, | 26 | * fileset in the aggregate and, if unmount of the last fileset, |
27 | * as unmount of the aggerate; | 27 | * as unmount of the aggerate; |
28 | */ | 28 | */ |
29 | 29 | ||
@@ -60,13 +60,13 @@ int jfs_umount(struct super_block *sb) | |||
60 | jfs_info("UnMount JFS: sb:0x%p", sb); | 60 | jfs_info("UnMount JFS: sb:0x%p", sb); |
61 | 61 | ||
62 | /* | 62 | /* |
63 | * update superblock and close log | 63 | * update superblock and close log |
64 | * | 64 | * |
65 | * if mounted read-write and log based recovery was enabled | 65 | * if mounted read-write and log based recovery was enabled |
66 | */ | 66 | */ |
67 | if ((log = sbi->log)) | 67 | if ((log = sbi->log)) |
68 | /* | 68 | /* |
69 | * Wait for outstanding transactions to be written to log: | 69 | * Wait for outstanding transactions to be written to log: |
70 | */ | 70 | */ |
71 | jfs_flush_journal(log, 2); | 71 | jfs_flush_journal(log, 2); |
72 | 72 | ||
@@ -112,17 +112,17 @@ int jfs_umount(struct super_block *sb) | |||
112 | 112 | ||
113 | /* | 113 | /* |
114 | * ensure all file system file pages are propagated to their | 114 | * ensure all file system file pages are propagated to their |
115 | * home blocks on disk (and their in-memory buffer pages are | 115 | * home blocks on disk (and their in-memory buffer pages are |
116 | * invalidated) BEFORE updating file system superblock state | 116 | * invalidated) BEFORE updating file system superblock state |
117 | * (to signify file system is unmounted cleanly, and thus in | 117 | * (to signify file system is unmounted cleanly, and thus in |
118 | * consistent state) and log superblock active file system | 118 | * consistent state) and log superblock active file system |
119 | * list (to signify skip logredo()). | 119 | * list (to signify skip logredo()). |
120 | */ | 120 | */ |
121 | if (log) { /* log = NULL if read-only mount */ | 121 | if (log) { /* log = NULL if read-only mount */ |
122 | updateSuper(sb, FM_CLEAN); | 122 | updateSuper(sb, FM_CLEAN); |
123 | 123 | ||
124 | /* | 124 | /* |
125 | * close log: | 125 | * close log: |
126 | * | 126 | * |
127 | * remove file system from log active file system list. | 127 | * remove file system from log active file system list. |
128 | */ | 128 | */ |
@@ -142,7 +142,7 @@ int jfs_umount_rw(struct super_block *sb) | |||
142 | return 0; | 142 | return 0; |
143 | 143 | ||
144 | /* | 144 | /* |
145 | * close log: | 145 | * close log: |
146 | * | 146 | * |
147 | * remove file system from log active file system list. | 147 | * remove file system from log active file system list. |
148 | */ | 148 | */ |
diff --git a/fs/jfs/jfs_unicode.c b/fs/jfs/jfs_unicode.c index f327decfb155..c7de6f5bbefc 100644 --- a/fs/jfs/jfs_unicode.c +++ b/fs/jfs/jfs_unicode.c | |||
@@ -3,16 +3,16 @@ | |||
3 | * | 3 | * |
4 | * This program is free software; you can redistribute it and/or modify | 4 | * This program is free software; you can redistribute it and/or modify |
5 | * it under the terms of the GNU General Public License as published by | 5 | * it under the terms of the GNU General Public License as published by |
6 | * the Free Software Foundation; either version 2 of the License, or | 6 | * the Free Software Foundation; either version 2 of the License, or |
7 | * (at your option) any later version. | 7 | * (at your option) any later version. |
8 | * | 8 | * |
9 | * This program is distributed in the hope that it will be useful, | 9 | * This program is distributed in the hope that it will be useful, |
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See | 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See |
12 | * the GNU General Public License for more details. | 12 | * the GNU General Public License for more details. |
13 | * | 13 | * |
14 | * You should have received a copy of the GNU General Public License | 14 | * You should have received a copy of the GNU General Public License |
15 | * along with this program; if not, write to the Free Software | 15 | * along with this program; if not, write to the Free Software |
16 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | 16 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
17 | */ | 17 | */ |
18 | 18 | ||
@@ -57,8 +57,8 @@ int jfs_strfromUCS_le(char *to, const __le16 * from, | |||
57 | warn--; | 57 | warn--; |
58 | warn_again--; | 58 | warn_again--; |
59 | printk(KERN_ERR | 59 | printk(KERN_ERR |
60 | "non-latin1 character 0x%x found in JFS file name\n", | 60 | "non-latin1 character 0x%x found in JFS file name\n", |
61 | le16_to_cpu(from[i])); | 61 | le16_to_cpu(from[i])); |
62 | printk(KERN_ERR | 62 | printk(KERN_ERR |
63 | "mount with iocharset=utf8 to access\n"); | 63 | "mount with iocharset=utf8 to access\n"); |
64 | } | 64 | } |
@@ -124,7 +124,7 @@ int get_UCSname(struct component_name * uniName, struct dentry *dentry) | |||
124 | kmalloc((length + 1) * sizeof(wchar_t), GFP_NOFS); | 124 | kmalloc((length + 1) * sizeof(wchar_t), GFP_NOFS); |
125 | 125 | ||
126 | if (uniName->name == NULL) | 126 | if (uniName->name == NULL) |
127 | return -ENOSPC; | 127 | return -ENOMEM; |
128 | 128 | ||
129 | uniName->namlen = jfs_strtoUCS(uniName->name, dentry->d_name.name, | 129 | uniName->namlen = jfs_strtoUCS(uniName->name, dentry->d_name.name, |
130 | length, nls_tab); | 130 | length, nls_tab); |
diff --git a/fs/jfs/jfs_unicode.h b/fs/jfs/jfs_unicode.h index 69e25ebe87ac..3fbb3a225590 100644 --- a/fs/jfs/jfs_unicode.h +++ b/fs/jfs/jfs_unicode.h | |||
@@ -1,19 +1,19 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (c) International Business Machines Corp., 2000-2002 | 2 | * Copyright (C) International Business Machines Corp., 2000-2002 |
3 | * Portions Copyright (c) Christoph Hellwig, 2001-2002 | 3 | * Portions Copyright (C) Christoph Hellwig, 2001-2002 |
4 | * | 4 | * |
5 | * This program is free software; you can redistribute it and/or modify | 5 | * This program is free software; you can redistribute it and/or modify |
6 | * it under the terms of the GNU General Public License as published by | 6 | * it under the terms of the GNU General Public License as published by |
7 | * the Free Software Foundation; either version 2 of the License, or | 7 | * the Free Software Foundation; either version 2 of the License, or |
8 | * (at your option) any later version. | 8 | * (at your option) any later version. |
9 | * | 9 | * |
10 | * This program is distributed in the hope that it will be useful, | 10 | * This program is distributed in the hope that it will be useful, |
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See | 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See |
13 | * the GNU General Public License for more details. | 13 | * the GNU General Public License for more details. |
14 | * | 14 | * |
15 | * You should have received a copy of the GNU General Public License | 15 | * You should have received a copy of the GNU General Public License |
16 | * along with this program; if not, write to the Free Software | 16 | * along with this program; if not, write to the Free Software |
17 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | 17 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
18 | */ | 18 | */ |
19 | #ifndef _H_JFS_UNICODE | 19 | #ifndef _H_JFS_UNICODE |
diff --git a/fs/jfs/jfs_uniupr.c b/fs/jfs/jfs_uniupr.c index 4ab185d26308..cfe50666d312 100644 --- a/fs/jfs/jfs_uniupr.c +++ b/fs/jfs/jfs_uniupr.c | |||
@@ -1,18 +1,18 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (c) International Business Machines Corp., 2000-2002 | 2 | * Copyright (C) International Business Machines Corp., 2000-2002 |
3 | * | 3 | * |
4 | * This program is free software; you can redistribute it and/or modify | 4 | * This program is free software; you can redistribute it and/or modify |
5 | * it under the terms of the GNU General Public License as published by | 5 | * it under the terms of the GNU General Public License as published by |
6 | * the Free Software Foundation; either version 2 of the License, or | 6 | * the Free Software Foundation; either version 2 of the License, or |
7 | * (at your option) any later version. | 7 | * (at your option) any later version. |
8 | * | 8 | * |
9 | * This program is distributed in the hope that it will be useful, | 9 | * This program is distributed in the hope that it will be useful, |
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See | 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See |
12 | * the GNU General Public License for more details. | 12 | * the GNU General Public License for more details. |
13 | * | 13 | * |
14 | * You should have received a copy of the GNU General Public License | 14 | * You should have received a copy of the GNU General Public License |
15 | * along with this program; if not, write to the Free Software | 15 | * along with this program; if not, write to the Free Software |
16 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | 16 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
17 | */ | 17 | */ |
18 | 18 | ||
diff --git a/fs/jfs/jfs_xattr.h b/fs/jfs/jfs_xattr.h index 25e9990bccd1..88b6cc535bf2 100644 --- a/fs/jfs/jfs_xattr.h +++ b/fs/jfs/jfs_xattr.h | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (c) International Business Machines Corp., 2000-2002 | 2 | * Copyright (C) International Business Machines Corp., 2000-2002 |
3 | * | 3 | * |
4 | * This program is free software; you can redistribute it and/or modify | 4 | * This program is free software; you can redistribute it and/or modify |
5 | * it under the terms of the GNU General Public License as published by | 5 | * it under the terms of the GNU General Public License as published by |
diff --git a/fs/jfs/jfs_xtree.c b/fs/jfs/jfs_xtree.c index c92307d3a57e..e98eb03e5310 100644 --- a/fs/jfs/jfs_xtree.c +++ b/fs/jfs/jfs_xtree.c | |||
@@ -3,16 +3,16 @@ | |||
3 | * | 3 | * |
4 | * This program is free software; you can redistribute it and/or modify | 4 | * This program is free software; you can redistribute it and/or modify |
5 | * it under the terms of the GNU General Public License as published by | 5 | * it under the terms of the GNU General Public License as published by |
6 | * the Free Software Foundation; either version 2 of the License, or | 6 | * the Free Software Foundation; either version 2 of the License, or |
7 | * (at your option) any later version. | 7 | * (at your option) any later version. |
8 | * | 8 | * |
9 | * This program is distributed in the hope that it will be useful, | 9 | * This program is distributed in the hope that it will be useful, |
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See | 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See |
12 | * the GNU General Public License for more details. | 12 | * the GNU General Public License for more details. |
13 | * | 13 | * |
14 | * You should have received a copy of the GNU General Public License | 14 | * You should have received a copy of the GNU General Public License |
15 | * along with this program; if not, write to the Free Software | 15 | * along with this program; if not, write to the Free Software |
16 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | 16 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
17 | */ | 17 | */ |
18 | /* | 18 | /* |
@@ -2428,7 +2428,7 @@ printf("xtUpdate.updateLeft.split p:0x%p\n", p); | |||
2428 | * return: | 2428 | * return: |
2429 | */ | 2429 | */ |
2430 | int xtAppend(tid_t tid, /* transaction id */ | 2430 | int xtAppend(tid_t tid, /* transaction id */ |
2431 | struct inode *ip, int xflag, s64 xoff, s32 maxblocks, | 2431 | struct inode *ip, int xflag, s64 xoff, s32 maxblocks, |
2432 | s32 * xlenp, /* (in/out) */ | 2432 | s32 * xlenp, /* (in/out) */ |
2433 | s64 * xaddrp, /* (in/out) */ | 2433 | s64 * xaddrp, /* (in/out) */ |
2434 | int flag) | 2434 | int flag) |
@@ -2499,7 +2499,7 @@ int xtAppend(tid_t tid, /* transaction id */ | |||
2499 | pxdlist.maxnpxd = pxdlist.npxd = 0; | 2499 | pxdlist.maxnpxd = pxdlist.npxd = 0; |
2500 | pxd = &pxdlist.pxd[0]; | 2500 | pxd = &pxdlist.pxd[0]; |
2501 | nblocks = JFS_SBI(ip->i_sb)->nbperpage; | 2501 | nblocks = JFS_SBI(ip->i_sb)->nbperpage; |
2502 | for (; nsplit > 0; nsplit--, pxd++, xaddr += nblocks, maxblocks -= nblocks) { | 2502 | for (; nsplit > 0; nsplit--, pxd++, xaddr += nblocks, maxblocks -= nblocks) { |
2503 | if ((rc = dbAllocBottomUp(ip, xaddr, (s64) nblocks)) == 0) { | 2503 | if ((rc = dbAllocBottomUp(ip, xaddr, (s64) nblocks)) == 0) { |
2504 | PXDaddress(pxd, xaddr); | 2504 | PXDaddress(pxd, xaddr); |
2505 | PXDlength(pxd, nblocks); | 2505 | PXDlength(pxd, nblocks); |
@@ -2514,7 +2514,7 @@ int xtAppend(tid_t tid, /* transaction id */ | |||
2514 | goto out; | 2514 | goto out; |
2515 | } | 2515 | } |
2516 | 2516 | ||
2517 | xlen = min(xlen, maxblocks); | 2517 | xlen = min(xlen, maxblocks); |
2518 | 2518 | ||
2519 | /* | 2519 | /* |
2520 | * allocate data extent requested | 2520 | * allocate data extent requested |
diff --git a/fs/jfs/jfs_xtree.h b/fs/jfs/jfs_xtree.h index af668a80b40f..164f6f2b1019 100644 --- a/fs/jfs/jfs_xtree.h +++ b/fs/jfs/jfs_xtree.h | |||
@@ -1,18 +1,18 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (c) International Business Machines Corp., 2000-2002 | 2 | * Copyright (C) International Business Machines Corp., 2000-2002 |
3 | * | 3 | * |
4 | * This program is free software; you can redistribute it and/or modify | 4 | * This program is free software; you can redistribute it and/or modify |
5 | * it under the terms of the GNU General Public License as published by | 5 | * it under the terms of the GNU General Public License as published by |
6 | * the Free Software Foundation; either version 2 of the License, or | 6 | * the Free Software Foundation; either version 2 of the License, or |
7 | * (at your option) any later version. | 7 | * (at your option) any later version. |
8 | * | 8 | * |
9 | * This program is distributed in the hope that it will be useful, | 9 | * This program is distributed in the hope that it will be useful, |
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See | 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See |
12 | * the GNU General Public License for more details. | 12 | * the GNU General Public License for more details. |
13 | * | 13 | * |
14 | * You should have received a copy of the GNU General Public License | 14 | * You should have received a copy of the GNU General Public License |
15 | * along with this program; if not, write to the Free Software | 15 | * along with this program; if not, write to the Free Software |
16 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | 16 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
17 | */ | 17 | */ |
18 | #ifndef _H_JFS_XTREE | 18 | #ifndef _H_JFS_XTREE |
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c index b8d16a6aa88f..a6a8c16c872c 100644 --- a/fs/jfs/namei.c +++ b/fs/jfs/namei.c | |||
@@ -4,16 +4,16 @@ | |||
4 | * | 4 | * |
5 | * This program is free software; you can redistribute it and/or modify | 5 | * This program is free software; you can redistribute it and/or modify |
6 | * it under the terms of the GNU General Public License as published by | 6 | * it under the terms of the GNU General Public License as published by |
7 | * the Free Software Foundation; either version 2 of the License, or | 7 | * the Free Software Foundation; either version 2 of the License, or |
8 | * (at your option) any later version. | 8 | * (at your option) any later version. |
9 | * | 9 | * |
10 | * This program is distributed in the hope that it will be useful, | 10 | * This program is distributed in the hope that it will be useful, |
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See | 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See |
13 | * the GNU General Public License for more details. | 13 | * the GNU General Public License for more details. |
14 | * | 14 | * |
15 | * You should have received a copy of the GNU General Public License | 15 | * You should have received a copy of the GNU General Public License |
16 | * along with this program; if not, write to the Free Software | 16 | * along with this program; if not, write to the Free Software |
17 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | 17 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
18 | */ | 18 | */ |
19 | 19 | ||
@@ -41,7 +41,7 @@ static s64 commitZeroLink(tid_t, struct inode *); | |||
41 | /* | 41 | /* |
42 | * NAME: free_ea_wmap(inode) | 42 | * NAME: free_ea_wmap(inode) |
43 | * | 43 | * |
44 | * FUNCTION: free uncommitted extended attributes from working map | 44 | * FUNCTION: free uncommitted extended attributes from working map |
45 | * | 45 | * |
46 | */ | 46 | */ |
47 | static inline void free_ea_wmap(struct inode *inode) | 47 | static inline void free_ea_wmap(struct inode *inode) |
@@ -62,7 +62,7 @@ static inline void free_ea_wmap(struct inode *inode) | |||
62 | * FUNCTION: create a regular file in the parent directory <dip> | 62 | * FUNCTION: create a regular file in the parent directory <dip> |
63 | * with name = <from dentry> and mode = <mode> | 63 | * with name = <from dentry> and mode = <mode> |
64 | * | 64 | * |
65 | * PARAMETER: dip - parent directory vnode | 65 | * PARAMETER: dip - parent directory vnode |
66 | * dentry - dentry of new file | 66 | * dentry - dentry of new file |
67 | * mode - create mode (rwxrwxrwx). | 67 | * mode - create mode (rwxrwxrwx). |
68 | * nd- nd struct | 68 | * nd- nd struct |
@@ -97,8 +97,8 @@ static int jfs_create(struct inode *dip, struct dentry *dentry, int mode, | |||
97 | * begin the transaction before we search the directory. | 97 | * begin the transaction before we search the directory. |
98 | */ | 98 | */ |
99 | ip = ialloc(dip, mode); | 99 | ip = ialloc(dip, mode); |
100 | if (ip == NULL) { | 100 | if (IS_ERR(ip)) { |
101 | rc = -ENOSPC; | 101 | rc = PTR_ERR(ip); |
102 | goto out2; | 102 | goto out2; |
103 | } | 103 | } |
104 | 104 | ||
@@ -190,7 +190,7 @@ static int jfs_create(struct inode *dip, struct dentry *dentry, int mode, | |||
190 | * FUNCTION: create a child directory in the parent directory <dip> | 190 | * FUNCTION: create a child directory in the parent directory <dip> |
191 | * with name = <from dentry> and mode = <mode> | 191 | * with name = <from dentry> and mode = <mode> |
192 | * | 192 | * |
193 | * PARAMETER: dip - parent directory vnode | 193 | * PARAMETER: dip - parent directory vnode |
194 | * dentry - dentry of child directory | 194 | * dentry - dentry of child directory |
195 | * mode - create mode (rwxrwxrwx). | 195 | * mode - create mode (rwxrwxrwx). |
196 | * | 196 | * |
@@ -231,8 +231,8 @@ static int jfs_mkdir(struct inode *dip, struct dentry *dentry, int mode) | |||
231 | * begin the transaction before we search the directory. | 231 | * begin the transaction before we search the directory. |
232 | */ | 232 | */ |
233 | ip = ialloc(dip, S_IFDIR | mode); | 233 | ip = ialloc(dip, S_IFDIR | mode); |
234 | if (ip == NULL) { | 234 | if (IS_ERR(ip)) { |
235 | rc = -ENOSPC; | 235 | rc = PTR_ERR(ip); |
236 | goto out2; | 236 | goto out2; |
237 | } | 237 | } |
238 | 238 | ||
@@ -324,7 +324,7 @@ static int jfs_mkdir(struct inode *dip, struct dentry *dentry, int mode) | |||
324 | * | 324 | * |
325 | * FUNCTION: remove a link to child directory | 325 | * FUNCTION: remove a link to child directory |
326 | * | 326 | * |
327 | * PARAMETER: dip - parent inode | 327 | * PARAMETER: dip - parent inode |
328 | * dentry - child directory dentry | 328 | * dentry - child directory dentry |
329 | * | 329 | * |
330 | * RETURN: -EINVAL - if name is . or .. | 330 | * RETURN: -EINVAL - if name is . or .. |
@@ -332,10 +332,10 @@ static int jfs_mkdir(struct inode *dip, struct dentry *dentry, int mode) | |||
332 | * errors from subroutines | 332 | * errors from subroutines |
333 | * | 333 | * |
334 | * note: | 334 | * note: |
335 | * if other threads have the directory open when the last link | 335 | * if other threads have the directory open when the last link |
336 | * is removed, the "." and ".." entries, if present, are removed before | 336 | * is removed, the "." and ".." entries, if present, are removed before |
337 | * rmdir() returns and no new entries may be created in the directory, | 337 | * rmdir() returns and no new entries may be created in the directory, |
338 | * but the directory is not removed until the last reference to | 338 | * but the directory is not removed until the last reference to |
339 | * the directory is released (cf.unlink() of regular file). | 339 | * the directory is released (cf.unlink() of regular file). |
340 | */ | 340 | */ |
341 | static int jfs_rmdir(struct inode *dip, struct dentry *dentry) | 341 | static int jfs_rmdir(struct inode *dip, struct dentry *dentry) |
@@ -446,11 +446,11 @@ static int jfs_rmdir(struct inode *dip, struct dentry *dentry) | |||
446 | /* | 446 | /* |
447 | * NAME: jfs_unlink(dip, dentry) | 447 | * NAME: jfs_unlink(dip, dentry) |
448 | * | 448 | * |
449 | * FUNCTION: remove a link to object <vp> named by <name> | 449 | * FUNCTION: remove a link to object <vp> named by <name> |
450 | * from parent directory <dvp> | 450 | * from parent directory <dvp> |
451 | * | 451 | * |
452 | * PARAMETER: dip - inode of parent directory | 452 | * PARAMETER: dip - inode of parent directory |
453 | * dentry - dentry of object to be removed | 453 | * dentry - dentry of object to be removed |
454 | * | 454 | * |
455 | * RETURN: errors from subroutines | 455 | * RETURN: errors from subroutines |
456 | * | 456 | * |
@@ -598,7 +598,7 @@ static int jfs_unlink(struct inode *dip, struct dentry *dentry) | |||
598 | * | 598 | * |
599 | * FUNCTION: for non-directory, called by jfs_remove(), | 599 | * FUNCTION: for non-directory, called by jfs_remove(), |
600 | * truncate a regular file, directory or symbolic | 600 | * truncate a regular file, directory or symbolic |
601 | * link to zero length. return 0 if type is not | 601 | * link to zero length. return 0 if type is not |
602 | * one of these. | 602 | * one of these. |
603 | * | 603 | * |
604 | * if the file is currently associated with a VM segment | 604 | * if the file is currently associated with a VM segment |
@@ -608,7 +608,7 @@ static int jfs_unlink(struct inode *dip, struct dentry *dentry) | |||
608 | * map by ctrunc1. | 608 | * map by ctrunc1. |
609 | * if there is no VM segment on entry, the resources are | 609 | * if there is no VM segment on entry, the resources are |
610 | * freed in both work and permanent map. | 610 | * freed in both work and permanent map. |
611 | * (? for temporary file - memory object is cached even | 611 | * (? for temporary file - memory object is cached even |
612 | * after no reference: | 612 | * after no reference: |
613 | * reference count > 0 - ) | 613 | * reference count > 0 - ) |
614 | * | 614 | * |
@@ -662,7 +662,7 @@ static s64 commitZeroLink(tid_t tid, struct inode *ip) | |||
662 | 662 | ||
663 | /* | 663 | /* |
664 | * free xtree/data (truncate to zero length): | 664 | * free xtree/data (truncate to zero length): |
665 | * free xtree/data pages from cache if COMMIT_PWMAP, | 665 | * free xtree/data pages from cache if COMMIT_PWMAP, |
666 | * free xtree/data blocks from persistent block map, and | 666 | * free xtree/data blocks from persistent block map, and |
667 | * free xtree/data blocks from working block map if COMMIT_PWMAP; | 667 | * free xtree/data blocks from working block map if COMMIT_PWMAP; |
668 | */ | 668 | */ |
@@ -677,7 +677,7 @@ static s64 commitZeroLink(tid_t tid, struct inode *ip) | |||
677 | * NAME: jfs_free_zero_link() | 677 | * NAME: jfs_free_zero_link() |
678 | * | 678 | * |
679 | * FUNCTION: for non-directory, called by iClose(), | 679 | * FUNCTION: for non-directory, called by iClose(), |
680 | * free resources of a file from cache and WORKING map | 680 | * free resources of a file from cache and WORKING map |
681 | * for a file previously committed with zero link count | 681 | * for a file previously committed with zero link count |
682 | * while associated with a pager object, | 682 | * while associated with a pager object, |
683 | * | 683 | * |
@@ -762,7 +762,7 @@ void jfs_free_zero_link(struct inode *ip) | |||
762 | * FUNCTION: create a link to <vp> by the name = <name> | 762 | * FUNCTION: create a link to <vp> by the name = <name> |
763 | * in the parent directory <dvp> | 763 | * in the parent directory <dvp> |
764 | * | 764 | * |
765 | * PARAMETER: vp - target object | 765 | * PARAMETER: vp - target object |
766 | * dvp - parent directory of new link | 766 | * dvp - parent directory of new link |
767 | * name - name of new link to target object | 767 | * name - name of new link to target object |
768 | * crp - credential | 768 | * crp - credential |
@@ -858,8 +858,8 @@ static int jfs_link(struct dentry *old_dentry, | |||
858 | * in directory <dip> | 858 | * in directory <dip> |
859 | * | 859 | * |
860 | * PARAMETER: dip - parent directory vnode | 860 | * PARAMETER: dip - parent directory vnode |
861 | * dentry - dentry of symbolic link | 861 | * dentry - dentry of symbolic link |
862 | * name - the path name of the existing object | 862 | * name - the path name of the existing object |
863 | * that will be the source of the link | 863 | * that will be the source of the link |
864 | * | 864 | * |
865 | * RETURN: errors from subroutines | 865 | * RETURN: errors from subroutines |
@@ -906,8 +906,8 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry, | |||
906 | * (iAlloc() returns new, locked inode) | 906 | * (iAlloc() returns new, locked inode) |
907 | */ | 907 | */ |
908 | ip = ialloc(dip, S_IFLNK | 0777); | 908 | ip = ialloc(dip, S_IFLNK | 0777); |
909 | if (ip == NULL) { | 909 | if (IS_ERR(ip)) { |
910 | rc = -ENOSPC; | 910 | rc = PTR_ERR(ip); |
911 | goto out2; | 911 | goto out2; |
912 | } | 912 | } |
913 | 913 | ||
@@ -926,7 +926,7 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry, | |||
926 | tblk->u.ixpxd = JFS_IP(ip)->ixpxd; | 926 | tblk->u.ixpxd = JFS_IP(ip)->ixpxd; |
927 | 927 | ||
928 | /* fix symlink access permission | 928 | /* fix symlink access permission |
929 | * (dir_create() ANDs in the u.u_cmask, | 929 | * (dir_create() ANDs in the u.u_cmask, |
930 | * but symlinks really need to be 777 access) | 930 | * but symlinks really need to be 777 access) |
931 | */ | 931 | */ |
932 | ip->i_mode |= 0777; | 932 | ip->i_mode |= 0777; |
@@ -967,7 +967,7 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry, | |||
967 | ip->i_mapping->a_ops = &jfs_aops; | 967 | ip->i_mapping->a_ops = &jfs_aops; |
968 | 968 | ||
969 | /* | 969 | /* |
970 | * even though the data of symlink object (source | 970 | * even though the data of symlink object (source |
971 | * path name) is treated as non-journaled user data, | 971 | * path name) is treated as non-journaled user data, |
972 | * it is read/written thru buffer cache for performance. | 972 | * it is read/written thru buffer cache for performance. |
973 | */ | 973 | */ |
@@ -978,7 +978,6 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry, | |||
978 | xlen = xsize >> JFS_SBI(sb)->l2bsize; | 978 | xlen = xsize >> JFS_SBI(sb)->l2bsize; |
979 | if ((rc = xtInsert(tid, ip, 0, 0, xlen, &xaddr, 0))) { | 979 | if ((rc = xtInsert(tid, ip, 0, 0, xlen, &xaddr, 0))) { |
980 | txAbort(tid, 0); | 980 | txAbort(tid, 0); |
981 | rc = -ENOSPC; | ||
982 | goto out3; | 981 | goto out3; |
983 | } | 982 | } |
984 | extent = xaddr; | 983 | extent = xaddr; |
@@ -1176,7 +1175,7 @@ static int jfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
1176 | /* free block resources */ | 1175 | /* free block resources */ |
1177 | if ((new_size = commitZeroLink(tid, new_ip)) < 0) { | 1176 | if ((new_size = commitZeroLink(tid, new_ip)) < 0) { |
1178 | txAbort(tid, 1); /* Marks FS Dirty */ | 1177 | txAbort(tid, 1); /* Marks FS Dirty */ |
1179 | rc = new_size; | 1178 | rc = new_size; |
1180 | goto out4; | 1179 | goto out4; |
1181 | } | 1180 | } |
1182 | tblk = tid_to_tblock(tid); | 1181 | tblk = tid_to_tblock(tid); |
@@ -1292,7 +1291,7 @@ static int jfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
1292 | new_size = xtTruncate_pmap(tid, new_ip, new_size); | 1291 | new_size = xtTruncate_pmap(tid, new_ip, new_size); |
1293 | if (new_size < 0) { | 1292 | if (new_size < 0) { |
1294 | txAbort(tid, 1); | 1293 | txAbort(tid, 1); |
1295 | rc = new_size; | 1294 | rc = new_size; |
1296 | } else | 1295 | } else |
1297 | rc = txCommit(tid, 1, &new_ip, COMMIT_SYNC); | 1296 | rc = txCommit(tid, 1, &new_ip, COMMIT_SYNC); |
1298 | txEnd(tid); | 1297 | txEnd(tid); |
@@ -1350,8 +1349,8 @@ static int jfs_mknod(struct inode *dir, struct dentry *dentry, | |||
1350 | goto out; | 1349 | goto out; |
1351 | 1350 | ||
1352 | ip = ialloc(dir, mode); | 1351 | ip = ialloc(dir, mode); |
1353 | if (ip == NULL) { | 1352 | if (IS_ERR(ip)) { |
1354 | rc = -ENOSPC; | 1353 | rc = PTR_ERR(ip); |
1355 | goto out1; | 1354 | goto out1; |
1356 | } | 1355 | } |
1357 | jfs_ip = JFS_IP(ip); | 1356 | jfs_ip = JFS_IP(ip); |
diff --git a/fs/jfs/resize.c b/fs/jfs/resize.c index 45180361871c..79d625f3f733 100644 --- a/fs/jfs/resize.c +++ b/fs/jfs/resize.c | |||
@@ -3,16 +3,16 @@ | |||
3 | * | 3 | * |
4 | * This program is free software; you can redistribute it and/or modify | 4 | * This program is free software; you can redistribute it and/or modify |
5 | * it under the terms of the GNU General Public License as published by | 5 | * it under the terms of the GNU General Public License as published by |
6 | * the Free Software Foundation; either version 2 of the License, or | 6 | * the Free Software Foundation; either version 2 of the License, or |
7 | * (at your option) any later version. | 7 | * (at your option) any later version. |
8 | * | 8 | * |
9 | * This program is distributed in the hope that it will be useful, | 9 | * This program is distributed in the hope that it will be useful, |
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See | 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See |
12 | * the GNU General Public License for more details. | 12 | * the GNU General Public License for more details. |
13 | * | 13 | * |
14 | * You should have received a copy of the GNU General Public License | 14 | * You should have received a copy of the GNU General Public License |
15 | * along with this program; if not, write to the Free Software | 15 | * along with this program; if not, write to the Free Software |
16 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | 16 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
17 | */ | 17 | */ |
18 | 18 | ||
diff --git a/fs/jfs/super.c b/fs/jfs/super.c index 143bcd1d5eaa..9c1c6e0e633d 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c | |||
@@ -4,16 +4,16 @@ | |||
4 | * | 4 | * |
5 | * This program is free software; you can redistribute it and/or modify | 5 | * This program is free software; you can redistribute it and/or modify |
6 | * it under the terms of the GNU General Public License as published by | 6 | * it under the terms of the GNU General Public License as published by |
7 | * the Free Software Foundation; either version 2 of the License, or | 7 | * the Free Software Foundation; either version 2 of the License, or |
8 | * (at your option) any later version. | 8 | * (at your option) any later version. |
9 | * | 9 | * |
10 | * This program is distributed in the hope that it will be useful, | 10 | * This program is distributed in the hope that it will be useful, |
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See | 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See |
13 | * the GNU General Public License for more details. | 13 | * the GNU General Public License for more details. |
14 | * | 14 | * |
15 | * You should have received a copy of the GNU General Public License | 15 | * You should have received a copy of the GNU General Public License |
16 | * along with this program; if not, write to the Free Software | 16 | * along with this program; if not, write to the Free Software |
17 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | 17 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
18 | */ | 18 | */ |
19 | 19 | ||
@@ -82,7 +82,7 @@ static void jfs_handle_error(struct super_block *sb) | |||
82 | "as read-only\n", | 82 | "as read-only\n", |
83 | sb->s_id); | 83 | sb->s_id); |
84 | sb->s_flags |= MS_RDONLY; | 84 | sb->s_flags |= MS_RDONLY; |
85 | } | 85 | } |
86 | 86 | ||
87 | /* nothing is done for continue beyond marking the superblock dirty */ | 87 | /* nothing is done for continue beyond marking the superblock dirty */ |
88 | } | 88 | } |
@@ -422,7 +422,7 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent) | |||
422 | 422 | ||
423 | sbi = kzalloc(sizeof (struct jfs_sb_info), GFP_KERNEL); | 423 | sbi = kzalloc(sizeof (struct jfs_sb_info), GFP_KERNEL); |
424 | if (!sbi) | 424 | if (!sbi) |
425 | return -ENOSPC; | 425 | return -ENOMEM; |
426 | sb->s_fs_info = sbi; | 426 | sb->s_fs_info = sbi; |
427 | sbi->sb = sb; | 427 | sbi->sb = sb; |
428 | sbi->uid = sbi->gid = sbi->umask = -1; | 428 | sbi->uid = sbi->gid = sbi->umask = -1; |
@@ -775,7 +775,7 @@ static int __init init_jfs_fs(void) | |||
775 | int rc; | 775 | int rc; |
776 | 776 | ||
777 | jfs_inode_cachep = | 777 | jfs_inode_cachep = |
778 | kmem_cache_create("jfs_ip", sizeof(struct jfs_inode_info), 0, | 778 | kmem_cache_create("jfs_ip", sizeof(struct jfs_inode_info), 0, |
779 | SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD, | 779 | SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD, |
780 | init_once, NULL); | 780 | init_once, NULL); |
781 | if (jfs_inode_cachep == NULL) | 781 | if (jfs_inode_cachep == NULL) |
diff --git a/fs/jfs/symlink.c b/fs/jfs/symlink.c index 16477b3835e1..cee43f36f51d 100644 --- a/fs/jfs/symlink.c +++ b/fs/jfs/symlink.c | |||
@@ -3,16 +3,16 @@ | |||
3 | * | 3 | * |
4 | * This program is free software; you can redistribute it and/or modify | 4 | * This program is free software; you can redistribute it and/or modify |
5 | * it under the terms of the GNU General Public License as published by | 5 | * it under the terms of the GNU General Public License as published by |
6 | * the Free Software Foundation; either version 2 of the License, or | 6 | * the Free Software Foundation; either version 2 of the License, or |
7 | * (at your option) any later version. | 7 | * (at your option) any later version. |
8 | * | 8 | * |
9 | * This program is distributed in the hope that it will be useful, | 9 | * This program is distributed in the hope that it will be useful, |
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See | 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See |
12 | * the GNU General Public License for more details. | 12 | * the GNU General Public License for more details. |
13 | * | 13 | * |
14 | * You should have received a copy of the GNU General Public License | 14 | * You should have received a copy of the GNU General Public License |
15 | * along with this program; if not, write to the Free Software | 15 | * along with this program; if not, write to the Free Software |
16 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | 16 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
17 | */ | 17 | */ |
18 | 18 | ||
diff --git a/fs/jfs/xattr.c b/fs/jfs/xattr.c index 7a10e1928961..4c7985ebca92 100644 --- a/fs/jfs/xattr.c +++ b/fs/jfs/xattr.c | |||
@@ -4,16 +4,16 @@ | |||
4 | * | 4 | * |
5 | * This program is free software; you can redistribute it and/or modify | 5 | * This program is free software; you can redistribute it and/or modify |
6 | * it under the terms of the GNU General Public License as published by | 6 | * it under the terms of the GNU General Public License as published by |
7 | * the Free Software Foundation; either version 2 of the License, or | 7 | * the Free Software Foundation; either version 2 of the License, or |
8 | * (at your option) any later version. | 8 | * (at your option) any later version. |
9 | * | 9 | * |
10 | * This program is distributed in the hope that it will be useful, | 10 | * This program is distributed in the hope that it will be useful, |
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See | 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See |
13 | * the GNU General Public License for more details. | 13 | * the GNU General Public License for more details. |
14 | * | 14 | * |
15 | * You should have received a copy of the GNU General Public License | 15 | * You should have received a copy of the GNU General Public License |
16 | * along with this program; if not, write to the Free Software | 16 | * along with this program; if not, write to the Free Software |
17 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | 17 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
18 | */ | 18 | */ |
19 | 19 | ||
@@ -57,7 +57,7 @@ | |||
57 | * | 57 | * |
58 | * 0 4 4 + EA_SIZE(ea1) | 58 | * 0 4 4 + EA_SIZE(ea1) |
59 | * +------------+-------------------+--------------------+----- | 59 | * +------------+-------------------+--------------------+----- |
60 | * | Overall EA | First FEA Element | Second FEA Element | ..... | 60 | * | Overall EA | First FEA Element | Second FEA Element | ..... |
61 | * | List Size | | | | 61 | * | List Size | | | |
62 | * +------------+-------------------+--------------------+----- | 62 | * +------------+-------------------+--------------------+----- |
63 | * | 63 | * |
@@ -155,9 +155,9 @@ static void ea_release(struct inode *inode, struct ea_buffer *ea_buf); | |||
155 | 155 | ||
156 | /* | 156 | /* |
157 | * NAME: ea_write_inline | 157 | * NAME: ea_write_inline |
158 | * | 158 | * |
159 | * FUNCTION: Attempt to write an EA inline if area is available | 159 | * FUNCTION: Attempt to write an EA inline if area is available |
160 | * | 160 | * |
161 | * PRE CONDITIONS: | 161 | * PRE CONDITIONS: |
162 | * Already verified that the specified EA is small enough to fit inline | 162 | * Already verified that the specified EA is small enough to fit inline |
163 | * | 163 | * |
@@ -216,10 +216,10 @@ static int ea_write_inline(struct inode *ip, struct jfs_ea_list *ealist, | |||
216 | 216 | ||
217 | /* | 217 | /* |
218 | * NAME: ea_write | 218 | * NAME: ea_write |
219 | * | 219 | * |
220 | * FUNCTION: Write an EA for an inode | 220 | * FUNCTION: Write an EA for an inode |
221 | * | 221 | * |
222 | * PRE CONDITIONS: EA has been verified | 222 | * PRE CONDITIONS: EA has been verified |
223 | * | 223 | * |
224 | * PARAMETERS: | 224 | * PARAMETERS: |
225 | * ip - Inode pointer | 225 | * ip - Inode pointer |
@@ -340,9 +340,9 @@ static int ea_write(struct inode *ip, struct jfs_ea_list *ealist, int size, | |||
340 | 340 | ||
341 | /* | 341 | /* |
342 | * NAME: ea_read_inline | 342 | * NAME: ea_read_inline |
343 | * | 343 | * |
344 | * FUNCTION: Read an inlined EA into user's buffer | 344 | * FUNCTION: Read an inlined EA into user's buffer |
345 | * | 345 | * |
346 | * PARAMETERS: | 346 | * PARAMETERS: |
347 | * ip - Inode pointer | 347 | * ip - Inode pointer |
348 | * ealist - Pointer to buffer to fill in with EA | 348 | * ealist - Pointer to buffer to fill in with EA |
@@ -372,9 +372,9 @@ static int ea_read_inline(struct inode *ip, struct jfs_ea_list *ealist) | |||
372 | 372 | ||
373 | /* | 373 | /* |
374 | * NAME: ea_read | 374 | * NAME: ea_read |
375 | * | 375 | * |
376 | * FUNCTION: copy EA data into user's buffer | 376 | * FUNCTION: copy EA data into user's buffer |
377 | * | 377 | * |
378 | * PARAMETERS: | 378 | * PARAMETERS: |
379 | * ip - Inode pointer | 379 | * ip - Inode pointer |
380 | * ealist - Pointer to buffer to fill in with EA | 380 | * ealist - Pointer to buffer to fill in with EA |
@@ -406,7 +406,7 @@ static int ea_read(struct inode *ip, struct jfs_ea_list *ealist) | |||
406 | return -EIO; | 406 | return -EIO; |
407 | } | 407 | } |
408 | 408 | ||
409 | /* | 409 | /* |
410 | * Figure out how many blocks were allocated when this EA list was | 410 | * Figure out how many blocks were allocated when this EA list was |
411 | * originally written to disk. | 411 | * originally written to disk. |
412 | */ | 412 | */ |
@@ -443,14 +443,14 @@ static int ea_read(struct inode *ip, struct jfs_ea_list *ealist) | |||
443 | 443 | ||
444 | /* | 444 | /* |
445 | * NAME: ea_get | 445 | * NAME: ea_get |
446 | * | 446 | * |
447 | * FUNCTION: Returns buffer containing existing extended attributes. | 447 | * FUNCTION: Returns buffer containing existing extended attributes. |
448 | * The size of the buffer will be the larger of the existing | 448 | * The size of the buffer will be the larger of the existing |
449 | * attributes size, or min_size. | 449 | * attributes size, or min_size. |
450 | * | 450 | * |
451 | * The buffer, which may be inlined in the inode or in the | 451 | * The buffer, which may be inlined in the inode or in the |
452 | * page cache must be release by calling ea_release or ea_put | 452 | * page cache must be release by calling ea_release or ea_put |
453 | * | 453 | * |
454 | * PARAMETERS: | 454 | * PARAMETERS: |
455 | * inode - Inode pointer | 455 | * inode - Inode pointer |
456 | * ea_buf - Structure to be populated with ealist and its metadata | 456 | * ea_buf - Structure to be populated with ealist and its metadata |
@@ -1054,7 +1054,7 @@ ssize_t jfs_listxattr(struct dentry * dentry, char *data, size_t buf_size) | |||
1054 | 1054 | ||
1055 | /* compute required size of list */ | 1055 | /* compute required size of list */ |
1056 | for (ea = FIRST_EA(ealist); ea < END_EALIST(ealist); ea = NEXT_EA(ea)) { | 1056 | for (ea = FIRST_EA(ealist); ea < END_EALIST(ealist); ea = NEXT_EA(ea)) { |
1057 | if (can_list(ea)) | 1057 | if (can_list(ea)) |
1058 | size += name_size(ea) + 1; | 1058 | size += name_size(ea) + 1; |
1059 | } | 1059 | } |
1060 | 1060 | ||
@@ -1069,7 +1069,7 @@ ssize_t jfs_listxattr(struct dentry * dentry, char *data, size_t buf_size) | |||
1069 | /* Copy attribute names to buffer */ | 1069 | /* Copy attribute names to buffer */ |
1070 | buffer = data; | 1070 | buffer = data; |
1071 | for (ea = FIRST_EA(ealist); ea < END_EALIST(ealist); ea = NEXT_EA(ea)) { | 1071 | for (ea = FIRST_EA(ealist); ea < END_EALIST(ealist); ea = NEXT_EA(ea)) { |
1072 | if (can_list(ea)) { | 1072 | if (can_list(ea)) { |
1073 | int namelen = copy_name(buffer, ea); | 1073 | int namelen = copy_name(buffer, ea); |
1074 | buffer += namelen + 1; | 1074 | buffer += namelen + 1; |
1075 | } | 1075 | } |
diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c index f95cc3f3c42d..e8c7765419e8 100644 --- a/fs/lockd/clntlock.c +++ b/fs/lockd/clntlock.c | |||
@@ -144,42 +144,12 @@ u32 nlmclnt_grant(const struct sockaddr_in *addr, const struct nlm_lock *lock) | |||
144 | */ | 144 | */ |
145 | 145 | ||
146 | /* | 146 | /* |
147 | * Someone has sent us an SM_NOTIFY. Ensure we bind to the new port number, | ||
148 | * that we mark locks for reclaiming, and that we bump the pseudo NSM state. | ||
149 | */ | ||
150 | static void nlmclnt_prepare_reclaim(struct nlm_host *host) | ||
151 | { | ||
152 | down_write(&host->h_rwsem); | ||
153 | host->h_monitored = 0; | ||
154 | host->h_state++; | ||
155 | host->h_nextrebind = 0; | ||
156 | nlm_rebind_host(host); | ||
157 | |||
158 | /* | ||
159 | * Mark the locks for reclaiming. | ||
160 | */ | ||
161 | list_splice_init(&host->h_granted, &host->h_reclaim); | ||
162 | |||
163 | dprintk("NLM: reclaiming locks for host %s\n", host->h_name); | ||
164 | } | ||
165 | |||
166 | static void nlmclnt_finish_reclaim(struct nlm_host *host) | ||
167 | { | ||
168 | host->h_reclaiming = 0; | ||
169 | up_write(&host->h_rwsem); | ||
170 | dprintk("NLM: done reclaiming locks for host %s", host->h_name); | ||
171 | } | ||
172 | |||
173 | /* | ||
174 | * Reclaim all locks on server host. We do this by spawning a separate | 147 | * Reclaim all locks on server host. We do this by spawning a separate |
175 | * reclaimer thread. | 148 | * reclaimer thread. |
176 | */ | 149 | */ |
177 | void | 150 | void |
178 | nlmclnt_recovery(struct nlm_host *host, u32 newstate) | 151 | nlmclnt_recovery(struct nlm_host *host) |
179 | { | 152 | { |
180 | if (host->h_nsmstate == newstate) | ||
181 | return; | ||
182 | host->h_nsmstate = newstate; | ||
183 | if (!host->h_reclaiming++) { | 153 | if (!host->h_reclaiming++) { |
184 | nlm_get_host(host); | 154 | nlm_get_host(host); |
185 | __module_get(THIS_MODULE); | 155 | __module_get(THIS_MODULE); |
@@ -199,18 +169,30 @@ reclaimer(void *ptr) | |||
199 | daemonize("%s-reclaim", host->h_name); | 169 | daemonize("%s-reclaim", host->h_name); |
200 | allow_signal(SIGKILL); | 170 | allow_signal(SIGKILL); |
201 | 171 | ||
172 | down_write(&host->h_rwsem); | ||
173 | |||
202 | /* This one ensures that our parent doesn't terminate while the | 174 | /* This one ensures that our parent doesn't terminate while the |
203 | * reclaim is in progress */ | 175 | * reclaim is in progress */ |
204 | lock_kernel(); | 176 | lock_kernel(); |
205 | lockd_up(); | 177 | lockd_up(0); /* note: this cannot fail as lockd is already running */ |
178 | |||
179 | dprintk("lockd: reclaiming locks for host %s", host->h_name); | ||
206 | 180 | ||
207 | nlmclnt_prepare_reclaim(host); | ||
208 | /* First, reclaim all locks that have been marked. */ | ||
209 | restart: | 181 | restart: |
210 | nsmstate = host->h_nsmstate; | 182 | nsmstate = host->h_nsmstate; |
183 | |||
184 | /* Force a portmap getport - the peer's lockd will | ||
185 | * most likely end up on a different port. | ||
186 | */ | ||
187 | host->h_nextrebind = jiffies; | ||
188 | nlm_rebind_host(host); | ||
189 | |||
190 | /* First, reclaim all locks that have been granted. */ | ||
191 | list_splice_init(&host->h_granted, &host->h_reclaim); | ||
211 | list_for_each_entry_safe(fl, next, &host->h_reclaim, fl_u.nfs_fl.list) { | 192 | list_for_each_entry_safe(fl, next, &host->h_reclaim, fl_u.nfs_fl.list) { |
212 | list_del_init(&fl->fl_u.nfs_fl.list); | 193 | list_del_init(&fl->fl_u.nfs_fl.list); |
213 | 194 | ||
195 | /* Why are we leaking memory here? --okir */ | ||
214 | if (signalled()) | 196 | if (signalled()) |
215 | continue; | 197 | continue; |
216 | if (nlmclnt_reclaim(host, fl) != 0) | 198 | if (nlmclnt_reclaim(host, fl) != 0) |
@@ -218,11 +200,13 @@ restart: | |||
218 | list_add_tail(&fl->fl_u.nfs_fl.list, &host->h_granted); | 200 | list_add_tail(&fl->fl_u.nfs_fl.list, &host->h_granted); |
219 | if (host->h_nsmstate != nsmstate) { | 201 | if (host->h_nsmstate != nsmstate) { |
220 | /* Argh! The server rebooted again! */ | 202 | /* Argh! The server rebooted again! */ |
221 | list_splice_init(&host->h_granted, &host->h_reclaim); | ||
222 | goto restart; | 203 | goto restart; |
223 | } | 204 | } |
224 | } | 205 | } |
225 | nlmclnt_finish_reclaim(host); | 206 | |
207 | host->h_reclaiming = 0; | ||
208 | up_write(&host->h_rwsem); | ||
209 | dprintk("NLM: done reclaiming locks for host %s", host->h_name); | ||
226 | 210 | ||
227 | /* Now, wake up all processes that sleep on a blocked lock */ | 211 | /* Now, wake up all processes that sleep on a blocked lock */ |
228 | list_for_each_entry(block, &nlm_blocked, b_list) { | 212 | list_for_each_entry(block, &nlm_blocked, b_list) { |
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c index 271e2165fff6..3d84f600b633 100644 --- a/fs/lockd/clntproc.c +++ b/fs/lockd/clntproc.c | |||
@@ -36,14 +36,14 @@ static const struct rpc_call_ops nlmclnt_cancel_ops; | |||
36 | /* | 36 | /* |
37 | * Cookie counter for NLM requests | 37 | * Cookie counter for NLM requests |
38 | */ | 38 | */ |
39 | static u32 nlm_cookie = 0x1234; | 39 | static atomic_t nlm_cookie = ATOMIC_INIT(0x1234); |
40 | 40 | ||
41 | static inline void nlmclnt_next_cookie(struct nlm_cookie *c) | 41 | void nlmclnt_next_cookie(struct nlm_cookie *c) |
42 | { | 42 | { |
43 | memcpy(c->data, &nlm_cookie, 4); | 43 | u32 cookie = atomic_inc_return(&nlm_cookie); |
44 | memset(c->data+4, 0, 4); | 44 | |
45 | memcpy(c->data, &cookie, 4); | ||
45 | c->len=4; | 46 | c->len=4; |
46 | nlm_cookie++; | ||
47 | } | 47 | } |
48 | 48 | ||
49 | static struct nlm_lockowner *nlm_get_lockowner(struct nlm_lockowner *lockowner) | 49 | static struct nlm_lockowner *nlm_get_lockowner(struct nlm_lockowner *lockowner) |
@@ -129,11 +129,11 @@ static void nlmclnt_setlockargs(struct nlm_rqst *req, struct file_lock *fl) | |||
129 | nlmclnt_next_cookie(&argp->cookie); | 129 | nlmclnt_next_cookie(&argp->cookie); |
130 | argp->state = nsm_local_state; | 130 | argp->state = nsm_local_state; |
131 | memcpy(&lock->fh, NFS_FH(fl->fl_file->f_dentry->d_inode), sizeof(struct nfs_fh)); | 131 | memcpy(&lock->fh, NFS_FH(fl->fl_file->f_dentry->d_inode), sizeof(struct nfs_fh)); |
132 | lock->caller = system_utsname.nodename; | 132 | lock->caller = utsname()->nodename; |
133 | lock->oh.data = req->a_owner; | 133 | lock->oh.data = req->a_owner; |
134 | lock->oh.len = snprintf(req->a_owner, sizeof(req->a_owner), "%u@%s", | 134 | lock->oh.len = snprintf(req->a_owner, sizeof(req->a_owner), "%u@%s", |
135 | (unsigned int)fl->fl_u.nfs_fl.owner->pid, | 135 | (unsigned int)fl->fl_u.nfs_fl.owner->pid, |
136 | system_utsname.nodename); | 136 | utsname()->nodename); |
137 | lock->svid = fl->fl_u.nfs_fl.owner->pid; | 137 | lock->svid = fl->fl_u.nfs_fl.owner->pid; |
138 | lock->fl.fl_start = fl->fl_start; | 138 | lock->fl.fl_start = fl->fl_start; |
139 | lock->fl.fl_end = fl->fl_end; | 139 | lock->fl.fl_end = fl->fl_end; |
@@ -153,6 +153,7 @@ nlmclnt_proc(struct inode *inode, int cmd, struct file_lock *fl) | |||
153 | { | 153 | { |
154 | struct rpc_clnt *client = NFS_CLIENT(inode); | 154 | struct rpc_clnt *client = NFS_CLIENT(inode); |
155 | struct sockaddr_in addr; | 155 | struct sockaddr_in addr; |
156 | struct nfs_server *nfssrv = NFS_SERVER(inode); | ||
156 | struct nlm_host *host; | 157 | struct nlm_host *host; |
157 | struct nlm_rqst *call; | 158 | struct nlm_rqst *call; |
158 | sigset_t oldset; | 159 | sigset_t oldset; |
@@ -166,7 +167,9 @@ nlmclnt_proc(struct inode *inode, int cmd, struct file_lock *fl) | |||
166 | } | 167 | } |
167 | 168 | ||
168 | rpc_peeraddr(client, (struct sockaddr *) &addr, sizeof(addr)); | 169 | rpc_peeraddr(client, (struct sockaddr *) &addr, sizeof(addr)); |
169 | host = nlmclnt_lookup_host(&addr, client->cl_xprt->prot, vers); | 170 | host = nlmclnt_lookup_host(&addr, client->cl_xprt->prot, vers, |
171 | nfssrv->nfs_client->cl_hostname, | ||
172 | strlen(nfssrv->nfs_client->cl_hostname)); | ||
170 | if (host == NULL) | 173 | if (host == NULL) |
171 | return -ENOLCK; | 174 | return -ENOLCK; |
172 | 175 | ||
@@ -499,7 +502,7 @@ nlmclnt_lock(struct nlm_rqst *req, struct file_lock *fl) | |||
499 | unsigned char fl_flags = fl->fl_flags; | 502 | unsigned char fl_flags = fl->fl_flags; |
500 | int status = -ENOLCK; | 503 | int status = -ENOLCK; |
501 | 504 | ||
502 | if (!host->h_monitored && nsm_monitor(host) < 0) { | 505 | if (nsm_monitor(host) < 0) { |
503 | printk(KERN_NOTICE "lockd: failed to monitor %s\n", | 506 | printk(KERN_NOTICE "lockd: failed to monitor %s\n", |
504 | host->h_name); | 507 | host->h_name); |
505 | goto out; | 508 | goto out; |
diff --git a/fs/lockd/host.c b/fs/lockd/host.c index a0d0b58ce7a4..fb24a9730345 100644 --- a/fs/lockd/host.c +++ b/fs/lockd/host.c | |||
@@ -27,46 +27,60 @@ | |||
27 | #define NLM_HOST_EXPIRE ((nrhosts > NLM_HOST_MAX)? 300 * HZ : 120 * HZ) | 27 | #define NLM_HOST_EXPIRE ((nrhosts > NLM_HOST_MAX)? 300 * HZ : 120 * HZ) |
28 | #define NLM_HOST_COLLECT ((nrhosts > NLM_HOST_MAX)? 120 * HZ : 60 * HZ) | 28 | #define NLM_HOST_COLLECT ((nrhosts > NLM_HOST_MAX)? 120 * HZ : 60 * HZ) |
29 | 29 | ||
30 | static struct nlm_host * nlm_hosts[NLM_HOST_NRHASH]; | 30 | static struct hlist_head nlm_hosts[NLM_HOST_NRHASH]; |
31 | static unsigned long next_gc; | 31 | static unsigned long next_gc; |
32 | static int nrhosts; | 32 | static int nrhosts; |
33 | static DEFINE_MUTEX(nlm_host_mutex); | 33 | static DEFINE_MUTEX(nlm_host_mutex); |
34 | 34 | ||
35 | 35 | ||
36 | static void nlm_gc_hosts(void); | 36 | static void nlm_gc_hosts(void); |
37 | static struct nsm_handle * __nsm_find(const struct sockaddr_in *, | ||
38 | const char *, int, int); | ||
37 | 39 | ||
38 | /* | 40 | /* |
39 | * Find an NLM server handle in the cache. If there is none, create it. | 41 | * Find an NLM server handle in the cache. If there is none, create it. |
40 | */ | 42 | */ |
41 | struct nlm_host * | 43 | struct nlm_host * |
42 | nlmclnt_lookup_host(struct sockaddr_in *sin, int proto, int version) | 44 | nlmclnt_lookup_host(const struct sockaddr_in *sin, int proto, int version, |
45 | const char *hostname, int hostname_len) | ||
43 | { | 46 | { |
44 | return nlm_lookup_host(0, sin, proto, version); | 47 | return nlm_lookup_host(0, sin, proto, version, |
48 | hostname, hostname_len); | ||
45 | } | 49 | } |
46 | 50 | ||
47 | /* | 51 | /* |
48 | * Find an NLM client handle in the cache. If there is none, create it. | 52 | * Find an NLM client handle in the cache. If there is none, create it. |
49 | */ | 53 | */ |
50 | struct nlm_host * | 54 | struct nlm_host * |
51 | nlmsvc_lookup_host(struct svc_rqst *rqstp) | 55 | nlmsvc_lookup_host(struct svc_rqst *rqstp, |
56 | const char *hostname, int hostname_len) | ||
52 | { | 57 | { |
53 | return nlm_lookup_host(1, &rqstp->rq_addr, | 58 | return nlm_lookup_host(1, &rqstp->rq_addr, |
54 | rqstp->rq_prot, rqstp->rq_vers); | 59 | rqstp->rq_prot, rqstp->rq_vers, |
60 | hostname, hostname_len); | ||
55 | } | 61 | } |
56 | 62 | ||
57 | /* | 63 | /* |
58 | * Common host lookup routine for server & client | 64 | * Common host lookup routine for server & client |
59 | */ | 65 | */ |
60 | struct nlm_host * | 66 | struct nlm_host * |
61 | nlm_lookup_host(int server, struct sockaddr_in *sin, | 67 | nlm_lookup_host(int server, const struct sockaddr_in *sin, |
62 | int proto, int version) | 68 | int proto, int version, |
69 | const char *hostname, | ||
70 | int hostname_len) | ||
63 | { | 71 | { |
64 | struct nlm_host *host, **hp; | 72 | struct hlist_head *chain; |
65 | u32 addr; | 73 | struct hlist_node *pos; |
74 | struct nlm_host *host; | ||
75 | struct nsm_handle *nsm = NULL; | ||
66 | int hash; | 76 | int hash; |
67 | 77 | ||
68 | dprintk("lockd: nlm_lookup_host(%08x, p=%d, v=%d)\n", | 78 | dprintk("lockd: nlm_lookup_host(%u.%u.%u.%u, p=%d, v=%d, my role=%s, name=%.*s)\n", |
69 | (unsigned)(sin? ntohl(sin->sin_addr.s_addr) : 0), proto, version); | 79 | NIPQUAD(sin->sin_addr.s_addr), proto, version, |
80 | server? "server" : "client", | ||
81 | hostname_len, | ||
82 | hostname? hostname : "<none>"); | ||
83 | |||
70 | 84 | ||
71 | hash = NLM_ADDRHASH(sin->sin_addr.s_addr); | 85 | hash = NLM_ADDRHASH(sin->sin_addr.s_addr); |
72 | 86 | ||
@@ -76,7 +90,22 @@ nlm_lookup_host(int server, struct sockaddr_in *sin, | |||
76 | if (time_after_eq(jiffies, next_gc)) | 90 | if (time_after_eq(jiffies, next_gc)) |
77 | nlm_gc_hosts(); | 91 | nlm_gc_hosts(); |
78 | 92 | ||
79 | for (hp = &nlm_hosts[hash]; (host = *hp) != 0; hp = &host->h_next) { | 93 | /* We may keep several nlm_host objects for a peer, because each |
94 | * nlm_host is identified by | ||
95 | * (address, protocol, version, server/client) | ||
96 | * We could probably simplify this a little by putting all those | ||
97 | * different NLM rpc_clients into one single nlm_host object. | ||
98 | * This would allow us to have one nlm_host per address. | ||
99 | */ | ||
100 | chain = &nlm_hosts[hash]; | ||
101 | hlist_for_each_entry(host, pos, chain, h_hash) { | ||
102 | if (!nlm_cmp_addr(&host->h_addr, sin)) | ||
103 | continue; | ||
104 | |||
105 | /* See if we have an NSM handle for this client */ | ||
106 | if (!nsm) | ||
107 | nsm = host->h_nsmhandle; | ||
108 | |||
80 | if (host->h_proto != proto) | 109 | if (host->h_proto != proto) |
81 | continue; | 110 | continue; |
82 | if (host->h_version != version) | 111 | if (host->h_version != version) |
@@ -84,28 +113,30 @@ nlm_lookup_host(int server, struct sockaddr_in *sin, | |||
84 | if (host->h_server != server) | 113 | if (host->h_server != server) |
85 | continue; | 114 | continue; |
86 | 115 | ||
87 | if (nlm_cmp_addr(&host->h_addr, sin)) { | 116 | /* Move to head of hash chain. */ |
88 | if (hp != nlm_hosts + hash) { | 117 | hlist_del(&host->h_hash); |
89 | *hp = host->h_next; | 118 | hlist_add_head(&host->h_hash, chain); |
90 | host->h_next = nlm_hosts[hash]; | ||
91 | nlm_hosts[hash] = host; | ||
92 | } | ||
93 | nlm_get_host(host); | ||
94 | mutex_unlock(&nlm_host_mutex); | ||
95 | return host; | ||
96 | } | ||
97 | } | ||
98 | 119 | ||
99 | /* Ooops, no host found, create it */ | 120 | nlm_get_host(host); |
100 | dprintk("lockd: creating host entry\n"); | 121 | goto out; |
122 | } | ||
123 | if (nsm) | ||
124 | atomic_inc(&nsm->sm_count); | ||
101 | 125 | ||
102 | host = kzalloc(sizeof(*host), GFP_KERNEL); | 126 | host = NULL; |
103 | if (!host) | ||
104 | goto nohost; | ||
105 | 127 | ||
106 | addr = sin->sin_addr.s_addr; | 128 | /* Sadly, the host isn't in our hash table yet. See if |
107 | sprintf(host->h_name, "%u.%u.%u.%u", NIPQUAD(addr)); | 129 | * we have an NSM handle for it. If not, create one. |
130 | */ | ||
131 | if (!nsm && !(nsm = nsm_find(sin, hostname, hostname_len))) | ||
132 | goto out; | ||
108 | 133 | ||
134 | host = kzalloc(sizeof(*host), GFP_KERNEL); | ||
135 | if (!host) { | ||
136 | nsm_release(nsm); | ||
137 | goto out; | ||
138 | } | ||
139 | host->h_name = nsm->sm_name; | ||
109 | host->h_addr = *sin; | 140 | host->h_addr = *sin; |
110 | host->h_addr.sin_port = 0; /* ouch! */ | 141 | host->h_addr.sin_port = 0; /* ouch! */ |
111 | host->h_version = version; | 142 | host->h_version = version; |
@@ -119,9 +150,9 @@ nlm_lookup_host(int server, struct sockaddr_in *sin, | |||
119 | init_rwsem(&host->h_rwsem); | 150 | init_rwsem(&host->h_rwsem); |
120 | host->h_state = 0; /* pseudo NSM state */ | 151 | host->h_state = 0; /* pseudo NSM state */ |
121 | host->h_nsmstate = 0; /* real NSM state */ | 152 | host->h_nsmstate = 0; /* real NSM state */ |
153 | host->h_nsmhandle = nsm; | ||
122 | host->h_server = server; | 154 | host->h_server = server; |
123 | host->h_next = nlm_hosts[hash]; | 155 | hlist_add_head(&host->h_hash, chain); |
124 | nlm_hosts[hash] = host; | ||
125 | INIT_LIST_HEAD(&host->h_lockowners); | 156 | INIT_LIST_HEAD(&host->h_lockowners); |
126 | spin_lock_init(&host->h_lock); | 157 | spin_lock_init(&host->h_lock); |
127 | INIT_LIST_HEAD(&host->h_granted); | 158 | INIT_LIST_HEAD(&host->h_granted); |
@@ -130,35 +161,39 @@ nlm_lookup_host(int server, struct sockaddr_in *sin, | |||
130 | if (++nrhosts > NLM_HOST_MAX) | 161 | if (++nrhosts > NLM_HOST_MAX) |
131 | next_gc = 0; | 162 | next_gc = 0; |
132 | 163 | ||
133 | nohost: | 164 | out: |
134 | mutex_unlock(&nlm_host_mutex); | 165 | mutex_unlock(&nlm_host_mutex); |
135 | return host; | 166 | return host; |
136 | } | 167 | } |
137 | 168 | ||
138 | struct nlm_host * | 169 | /* |
139 | nlm_find_client(void) | 170 | * Destroy a host |
171 | */ | ||
172 | static void | ||
173 | nlm_destroy_host(struct nlm_host *host) | ||
140 | { | 174 | { |
141 | /* find a nlm_host for a client for which h_killed == 0. | 175 | struct rpc_clnt *clnt; |
142 | * and return it | 176 | |
177 | BUG_ON(!list_empty(&host->h_lockowners)); | ||
178 | BUG_ON(atomic_read(&host->h_count)); | ||
179 | |||
180 | /* | ||
181 | * Release NSM handle and unmonitor host. | ||
143 | */ | 182 | */ |
144 | int hash; | 183 | nsm_unmonitor(host); |
145 | mutex_lock(&nlm_host_mutex); | 184 | |
146 | for (hash = 0 ; hash < NLM_HOST_NRHASH; hash++) { | 185 | if ((clnt = host->h_rpcclnt) != NULL) { |
147 | struct nlm_host *host, **hp; | 186 | if (atomic_read(&clnt->cl_users)) { |
148 | for (hp = &nlm_hosts[hash]; (host = *hp) != 0; hp = &host->h_next) { | 187 | printk(KERN_WARNING |
149 | if (host->h_server && | 188 | "lockd: active RPC handle\n"); |
150 | host->h_killed == 0) { | 189 | clnt->cl_dead = 1; |
151 | nlm_get_host(host); | 190 | } else { |
152 | mutex_unlock(&nlm_host_mutex); | 191 | rpc_destroy_client(host->h_rpcclnt); |
153 | return host; | ||
154 | } | ||
155 | } | 192 | } |
156 | } | 193 | } |
157 | mutex_unlock(&nlm_host_mutex); | 194 | kfree(host); |
158 | return NULL; | ||
159 | } | 195 | } |
160 | 196 | ||
161 | |||
162 | /* | 197 | /* |
163 | * Create the NLM RPC client for an NLM peer | 198 | * Create the NLM RPC client for an NLM peer |
164 | */ | 199 | */ |
@@ -260,22 +295,82 @@ void nlm_release_host(struct nlm_host *host) | |||
260 | } | 295 | } |
261 | 296 | ||
262 | /* | 297 | /* |
298 | * We were notified that the host indicated by address &sin | ||
299 | * has rebooted. | ||
300 | * Release all resources held by that peer. | ||
301 | */ | ||
302 | void nlm_host_rebooted(const struct sockaddr_in *sin, | ||
303 | const char *hostname, int hostname_len, | ||
304 | u32 new_state) | ||
305 | { | ||
306 | struct hlist_head *chain; | ||
307 | struct hlist_node *pos; | ||
308 | struct nsm_handle *nsm; | ||
309 | struct nlm_host *host; | ||
310 | |||
311 | dprintk("lockd: nlm_host_rebooted(%s, %u.%u.%u.%u)\n", | ||
312 | hostname, NIPQUAD(sin->sin_addr)); | ||
313 | |||
314 | /* Find the NSM handle for this peer */ | ||
315 | if (!(nsm = __nsm_find(sin, hostname, hostname_len, 0))) | ||
316 | return; | ||
317 | |||
318 | /* When reclaiming locks on this peer, make sure that | ||
319 | * we set up a new notification */ | ||
320 | nsm->sm_monitored = 0; | ||
321 | |||
322 | /* Mark all hosts tied to this NSM state as having rebooted. | ||
323 | * We run the loop repeatedly, because we drop the host table | ||
324 | * lock for this. | ||
325 | * To avoid processing a host several times, we match the nsmstate. | ||
326 | */ | ||
327 | again: mutex_lock(&nlm_host_mutex); | ||
328 | for (chain = nlm_hosts; chain < nlm_hosts + NLM_HOST_NRHASH; ++chain) { | ||
329 | hlist_for_each_entry(host, pos, chain, h_hash) { | ||
330 | if (host->h_nsmhandle == nsm | ||
331 | && host->h_nsmstate != new_state) { | ||
332 | host->h_nsmstate = new_state; | ||
333 | host->h_state++; | ||
334 | |||
335 | nlm_get_host(host); | ||
336 | mutex_unlock(&nlm_host_mutex); | ||
337 | |||
338 | if (host->h_server) { | ||
339 | /* We're server for this guy, just ditch | ||
340 | * all the locks he held. */ | ||
341 | nlmsvc_free_host_resources(host); | ||
342 | } else { | ||
343 | /* He's the server, initiate lock recovery. */ | ||
344 | nlmclnt_recovery(host); | ||
345 | } | ||
346 | |||
347 | nlm_release_host(host); | ||
348 | goto again; | ||
349 | } | ||
350 | } | ||
351 | } | ||
352 | |||
353 | mutex_unlock(&nlm_host_mutex); | ||
354 | } | ||
355 | |||
356 | /* | ||
263 | * Shut down the hosts module. | 357 | * Shut down the hosts module. |
264 | * Note that this routine is called only at server shutdown time. | 358 | * Note that this routine is called only at server shutdown time. |
265 | */ | 359 | */ |
266 | void | 360 | void |
267 | nlm_shutdown_hosts(void) | 361 | nlm_shutdown_hosts(void) |
268 | { | 362 | { |
363 | struct hlist_head *chain; | ||
364 | struct hlist_node *pos; | ||
269 | struct nlm_host *host; | 365 | struct nlm_host *host; |
270 | int i; | ||
271 | 366 | ||
272 | dprintk("lockd: shutting down host module\n"); | 367 | dprintk("lockd: shutting down host module\n"); |
273 | mutex_lock(&nlm_host_mutex); | 368 | mutex_lock(&nlm_host_mutex); |
274 | 369 | ||
275 | /* First, make all hosts eligible for gc */ | 370 | /* First, make all hosts eligible for gc */ |
276 | dprintk("lockd: nuking all hosts...\n"); | 371 | dprintk("lockd: nuking all hosts...\n"); |
277 | for (i = 0; i < NLM_HOST_NRHASH; i++) { | 372 | for (chain = nlm_hosts; chain < nlm_hosts + NLM_HOST_NRHASH; ++chain) { |
278 | for (host = nlm_hosts[i]; host; host = host->h_next) | 373 | hlist_for_each_entry(host, pos, chain, h_hash) |
279 | host->h_expires = jiffies - 1; | 374 | host->h_expires = jiffies - 1; |
280 | } | 375 | } |
281 | 376 | ||
@@ -287,8 +382,8 @@ nlm_shutdown_hosts(void) | |||
287 | if (nrhosts) { | 382 | if (nrhosts) { |
288 | printk(KERN_WARNING "lockd: couldn't shutdown host module!\n"); | 383 | printk(KERN_WARNING "lockd: couldn't shutdown host module!\n"); |
289 | dprintk("lockd: %d hosts left:\n", nrhosts); | 384 | dprintk("lockd: %d hosts left:\n", nrhosts); |
290 | for (i = 0; i < NLM_HOST_NRHASH; i++) { | 385 | for (chain = nlm_hosts; chain < nlm_hosts + NLM_HOST_NRHASH; ++chain) { |
291 | for (host = nlm_hosts[i]; host; host = host->h_next) { | 386 | hlist_for_each_entry(host, pos, chain, h_hash) { |
292 | dprintk(" %s (cnt %d use %d exp %ld)\n", | 387 | dprintk(" %s (cnt %d use %d exp %ld)\n", |
293 | host->h_name, atomic_read(&host->h_count), | 388 | host->h_name, atomic_read(&host->h_count), |
294 | host->h_inuse, host->h_expires); | 389 | host->h_inuse, host->h_expires); |
@@ -305,45 +400,32 @@ nlm_shutdown_hosts(void) | |||
305 | static void | 400 | static void |
306 | nlm_gc_hosts(void) | 401 | nlm_gc_hosts(void) |
307 | { | 402 | { |
308 | struct nlm_host **q, *host; | 403 | struct hlist_head *chain; |
309 | struct rpc_clnt *clnt; | 404 | struct hlist_node *pos, *next; |
310 | int i; | 405 | struct nlm_host *host; |
311 | 406 | ||
312 | dprintk("lockd: host garbage collection\n"); | 407 | dprintk("lockd: host garbage collection\n"); |
313 | for (i = 0; i < NLM_HOST_NRHASH; i++) { | 408 | for (chain = nlm_hosts; chain < nlm_hosts + NLM_HOST_NRHASH; ++chain) { |
314 | for (host = nlm_hosts[i]; host; host = host->h_next) | 409 | hlist_for_each_entry(host, pos, chain, h_hash) |
315 | host->h_inuse = 0; | 410 | host->h_inuse = 0; |
316 | } | 411 | } |
317 | 412 | ||
318 | /* Mark all hosts that hold locks, blocks or shares */ | 413 | /* Mark all hosts that hold locks, blocks or shares */ |
319 | nlmsvc_mark_resources(); | 414 | nlmsvc_mark_resources(); |
320 | 415 | ||
321 | for (i = 0; i < NLM_HOST_NRHASH; i++) { | 416 | for (chain = nlm_hosts; chain < nlm_hosts + NLM_HOST_NRHASH; ++chain) { |
322 | q = &nlm_hosts[i]; | 417 | hlist_for_each_entry_safe(host, pos, next, chain, h_hash) { |
323 | while ((host = *q) != NULL) { | ||
324 | if (atomic_read(&host->h_count) || host->h_inuse | 418 | if (atomic_read(&host->h_count) || host->h_inuse |
325 | || time_before(jiffies, host->h_expires)) { | 419 | || time_before(jiffies, host->h_expires)) { |
326 | dprintk("nlm_gc_hosts skipping %s (cnt %d use %d exp %ld)\n", | 420 | dprintk("nlm_gc_hosts skipping %s (cnt %d use %d exp %ld)\n", |
327 | host->h_name, atomic_read(&host->h_count), | 421 | host->h_name, atomic_read(&host->h_count), |
328 | host->h_inuse, host->h_expires); | 422 | host->h_inuse, host->h_expires); |
329 | q = &host->h_next; | ||
330 | continue; | 423 | continue; |
331 | } | 424 | } |
332 | dprintk("lockd: delete host %s\n", host->h_name); | 425 | dprintk("lockd: delete host %s\n", host->h_name); |
333 | *q = host->h_next; | 426 | hlist_del_init(&host->h_hash); |
334 | /* Don't unmonitor hosts that have been invalidated */ | 427 | |
335 | if (host->h_monitored && !host->h_killed) | 428 | nlm_destroy_host(host); |
336 | nsm_unmonitor(host); | ||
337 | if ((clnt = host->h_rpcclnt) != NULL) { | ||
338 | if (atomic_read(&clnt->cl_users)) { | ||
339 | printk(KERN_WARNING | ||
340 | "lockd: active RPC handle\n"); | ||
341 | clnt->cl_dead = 1; | ||
342 | } else { | ||
343 | rpc_destroy_client(host->h_rpcclnt); | ||
344 | } | ||
345 | } | ||
346 | kfree(host); | ||
347 | nrhosts--; | 429 | nrhosts--; |
348 | } | 430 | } |
349 | } | 431 | } |
@@ -351,3 +433,88 @@ nlm_gc_hosts(void) | |||
351 | next_gc = jiffies + NLM_HOST_COLLECT; | 433 | next_gc = jiffies + NLM_HOST_COLLECT; |
352 | } | 434 | } |
353 | 435 | ||
436 | |||
437 | /* | ||
438 | * Manage NSM handles | ||
439 | */ | ||
440 | static LIST_HEAD(nsm_handles); | ||
441 | static DEFINE_MUTEX(nsm_mutex); | ||
442 | |||
443 | static struct nsm_handle * | ||
444 | __nsm_find(const struct sockaddr_in *sin, | ||
445 | const char *hostname, int hostname_len, | ||
446 | int create) | ||
447 | { | ||
448 | struct nsm_handle *nsm = NULL; | ||
449 | struct list_head *pos; | ||
450 | |||
451 | if (!sin) | ||
452 | return NULL; | ||
453 | |||
454 | if (hostname && memchr(hostname, '/', hostname_len) != NULL) { | ||
455 | if (printk_ratelimit()) { | ||
456 | printk(KERN_WARNING "Invalid hostname \"%.*s\" " | ||
457 | "in NFS lock request\n", | ||
458 | hostname_len, hostname); | ||
459 | } | ||
460 | return NULL; | ||
461 | } | ||
462 | |||
463 | mutex_lock(&nsm_mutex); | ||
464 | list_for_each(pos, &nsm_handles) { | ||
465 | nsm = list_entry(pos, struct nsm_handle, sm_link); | ||
466 | |||
467 | if (hostname && nsm_use_hostnames) { | ||
468 | if (strlen(nsm->sm_name) != hostname_len | ||
469 | || memcmp(nsm->sm_name, hostname, hostname_len)) | ||
470 | continue; | ||
471 | } else if (!nlm_cmp_addr(&nsm->sm_addr, sin)) | ||
472 | continue; | ||
473 | atomic_inc(&nsm->sm_count); | ||
474 | goto out; | ||
475 | } | ||
476 | |||
477 | if (!create) { | ||
478 | nsm = NULL; | ||
479 | goto out; | ||
480 | } | ||
481 | |||
482 | nsm = kzalloc(sizeof(*nsm) + hostname_len + 1, GFP_KERNEL); | ||
483 | if (nsm != NULL) { | ||
484 | nsm->sm_addr = *sin; | ||
485 | nsm->sm_name = (char *) (nsm + 1); | ||
486 | memcpy(nsm->sm_name, hostname, hostname_len); | ||
487 | nsm->sm_name[hostname_len] = '\0'; | ||
488 | atomic_set(&nsm->sm_count, 1); | ||
489 | |||
490 | list_add(&nsm->sm_link, &nsm_handles); | ||
491 | } | ||
492 | |||
493 | out: | ||
494 | mutex_unlock(&nsm_mutex); | ||
495 | return nsm; | ||
496 | } | ||
497 | |||
498 | struct nsm_handle * | ||
499 | nsm_find(const struct sockaddr_in *sin, const char *hostname, int hostname_len) | ||
500 | { | ||
501 | return __nsm_find(sin, hostname, hostname_len, 1); | ||
502 | } | ||
503 | |||
504 | /* | ||
505 | * Release an NSM handle | ||
506 | */ | ||
507 | void | ||
508 | nsm_release(struct nsm_handle *nsm) | ||
509 | { | ||
510 | if (!nsm) | ||
511 | return; | ||
512 | if (atomic_dec_and_test(&nsm->sm_count)) { | ||
513 | mutex_lock(&nsm_mutex); | ||
514 | if (atomic_read(&nsm->sm_count) == 0) { | ||
515 | list_del(&nsm->sm_link); | ||
516 | kfree(nsm); | ||
517 | } | ||
518 | mutex_unlock(&nsm_mutex); | ||
519 | } | ||
520 | } | ||
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c index 5954dcb497e4..e0179f8c327f 100644 --- a/fs/lockd/mon.c +++ b/fs/lockd/mon.c | |||
@@ -24,13 +24,13 @@ static struct rpc_program nsm_program; | |||
24 | /* | 24 | /* |
25 | * Local NSM state | 25 | * Local NSM state |
26 | */ | 26 | */ |
27 | u32 nsm_local_state; | 27 | int nsm_local_state; |
28 | 28 | ||
29 | /* | 29 | /* |
30 | * Common procedure for SM_MON/SM_UNMON calls | 30 | * Common procedure for SM_MON/SM_UNMON calls |
31 | */ | 31 | */ |
32 | static int | 32 | static int |
33 | nsm_mon_unmon(struct nlm_host *host, u32 proc, struct nsm_res *res) | 33 | nsm_mon_unmon(struct nsm_handle *nsm, u32 proc, struct nsm_res *res) |
34 | { | 34 | { |
35 | struct rpc_clnt *clnt; | 35 | struct rpc_clnt *clnt; |
36 | int status; | 36 | int status; |
@@ -46,10 +46,11 @@ nsm_mon_unmon(struct nlm_host *host, u32 proc, struct nsm_res *res) | |||
46 | goto out; | 46 | goto out; |
47 | } | 47 | } |
48 | 48 | ||
49 | args.addr = host->h_addr.sin_addr.s_addr; | 49 | memset(&args, 0, sizeof(args)); |
50 | args.proto= (host->h_proto<<1) | host->h_server; | 50 | args.mon_name = nsm->sm_name; |
51 | args.addr = nsm->sm_addr.sin_addr.s_addr; | ||
51 | args.prog = NLM_PROGRAM; | 52 | args.prog = NLM_PROGRAM; |
52 | args.vers = host->h_version; | 53 | args.vers = 3; |
53 | args.proc = NLMPROC_NSM_NOTIFY; | 54 | args.proc = NLMPROC_NSM_NOTIFY; |
54 | memset(res, 0, sizeof(*res)); | 55 | memset(res, 0, sizeof(*res)); |
55 | 56 | ||
@@ -70,17 +71,22 @@ nsm_mon_unmon(struct nlm_host *host, u32 proc, struct nsm_res *res) | |||
70 | int | 71 | int |
71 | nsm_monitor(struct nlm_host *host) | 72 | nsm_monitor(struct nlm_host *host) |
72 | { | 73 | { |
74 | struct nsm_handle *nsm = host->h_nsmhandle; | ||
73 | struct nsm_res res; | 75 | struct nsm_res res; |
74 | int status; | 76 | int status; |
75 | 77 | ||
76 | dprintk("lockd: nsm_monitor(%s)\n", host->h_name); | 78 | dprintk("lockd: nsm_monitor(%s)\n", host->h_name); |
79 | BUG_ON(nsm == NULL); | ||
77 | 80 | ||
78 | status = nsm_mon_unmon(host, SM_MON, &res); | 81 | if (nsm->sm_monitored) |
82 | return 0; | ||
83 | |||
84 | status = nsm_mon_unmon(nsm, SM_MON, &res); | ||
79 | 85 | ||
80 | if (status < 0 || res.status != 0) | 86 | if (status < 0 || res.status != 0) |
81 | printk(KERN_NOTICE "lockd: cannot monitor %s\n", host->h_name); | 87 | printk(KERN_NOTICE "lockd: cannot monitor %s\n", host->h_name); |
82 | else | 88 | else |
83 | host->h_monitored = 1; | 89 | nsm->sm_monitored = 1; |
84 | return status; | 90 | return status; |
85 | } | 91 | } |
86 | 92 | ||
@@ -90,16 +96,26 @@ nsm_monitor(struct nlm_host *host) | |||
90 | int | 96 | int |
91 | nsm_unmonitor(struct nlm_host *host) | 97 | nsm_unmonitor(struct nlm_host *host) |
92 | { | 98 | { |
99 | struct nsm_handle *nsm = host->h_nsmhandle; | ||
93 | struct nsm_res res; | 100 | struct nsm_res res; |
94 | int status; | 101 | int status = 0; |
95 | 102 | ||
96 | dprintk("lockd: nsm_unmonitor(%s)\n", host->h_name); | 103 | if (nsm == NULL) |
97 | 104 | return 0; | |
98 | status = nsm_mon_unmon(host, SM_UNMON, &res); | 105 | host->h_nsmhandle = NULL; |
99 | if (status < 0) | 106 | |
100 | printk(KERN_NOTICE "lockd: cannot unmonitor %s\n", host->h_name); | 107 | if (atomic_read(&nsm->sm_count) == 1 |
101 | else | 108 | && nsm->sm_monitored && !nsm->sm_sticky) { |
102 | host->h_monitored = 0; | 109 | dprintk("lockd: nsm_unmonitor(%s)\n", host->h_name); |
110 | |||
111 | status = nsm_mon_unmon(nsm, SM_UNMON, &res); | ||
112 | if (status < 0) | ||
113 | printk(KERN_NOTICE "lockd: cannot unmonitor %s\n", | ||
114 | host->h_name); | ||
115 | else | ||
116 | nsm->sm_monitored = 0; | ||
117 | } | ||
118 | nsm_release(nsm); | ||
103 | return status; | 119 | return status; |
104 | } | 120 | } |
105 | 121 | ||
@@ -135,7 +151,7 @@ nsm_create(void) | |||
135 | static u32 * | 151 | static u32 * |
136 | xdr_encode_common(struct rpc_rqst *rqstp, u32 *p, struct nsm_args *argp) | 152 | xdr_encode_common(struct rpc_rqst *rqstp, u32 *p, struct nsm_args *argp) |
137 | { | 153 | { |
138 | char buffer[20]; | 154 | char buffer[20], *name; |
139 | 155 | ||
140 | /* | 156 | /* |
141 | * Use the dotted-quad IP address of the remote host as | 157 | * Use the dotted-quad IP address of the remote host as |
@@ -143,9 +159,14 @@ xdr_encode_common(struct rpc_rqst *rqstp, u32 *p, struct nsm_args *argp) | |||
143 | * hostname first for whatever remote hostname it receives, | 159 | * hostname first for whatever remote hostname it receives, |
144 | * so this works alright. | 160 | * so this works alright. |
145 | */ | 161 | */ |
146 | sprintf(buffer, "%u.%u.%u.%u", NIPQUAD(argp->addr)); | 162 | if (nsm_use_hostnames) { |
147 | if (!(p = xdr_encode_string(p, buffer)) | 163 | name = argp->mon_name; |
148 | || !(p = xdr_encode_string(p, system_utsname.nodename))) | 164 | } else { |
165 | sprintf(buffer, "%u.%u.%u.%u", NIPQUAD(argp->addr)); | ||
166 | name = buffer; | ||
167 | } | ||
168 | if (!(p = xdr_encode_string(p, name)) | ||
169 | || !(p = xdr_encode_string(p, utsname()->nodename))) | ||
149 | return ERR_PTR(-EIO); | 170 | return ERR_PTR(-EIO); |
150 | *p++ = htonl(argp->prog); | 171 | *p++ = htonl(argp->prog); |
151 | *p++ = htonl(argp->vers); | 172 | *p++ = htonl(argp->vers); |
@@ -160,9 +181,11 @@ xdr_encode_mon(struct rpc_rqst *rqstp, u32 *p, struct nsm_args *argp) | |||
160 | p = xdr_encode_common(rqstp, p, argp); | 181 | p = xdr_encode_common(rqstp, p, argp); |
161 | if (IS_ERR(p)) | 182 | if (IS_ERR(p)) |
162 | return PTR_ERR(p); | 183 | return PTR_ERR(p); |
184 | |||
185 | /* Surprise - there may even be room for an IPv6 address now */ | ||
163 | *p++ = argp->addr; | 186 | *p++ = argp->addr; |
164 | *p++ = argp->vers; | 187 | *p++ = 0; |
165 | *p++ = argp->proto; | 188 | *p++ = 0; |
166 | *p++ = 0; | 189 | *p++ = 0; |
167 | rqstp->rq_slen = xdr_adjust_iovec(rqstp->rq_svec, p); | 190 | rqstp->rq_slen = xdr_adjust_iovec(rqstp->rq_svec, p); |
168 | return 0; | 191 | return 0; |
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index 9a991b52c647..634139232aaf 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c | |||
@@ -31,7 +31,9 @@ | |||
31 | #include <linux/sunrpc/clnt.h> | 31 | #include <linux/sunrpc/clnt.h> |
32 | #include <linux/sunrpc/svc.h> | 32 | #include <linux/sunrpc/svc.h> |
33 | #include <linux/sunrpc/svcsock.h> | 33 | #include <linux/sunrpc/svcsock.h> |
34 | #include <net/ip.h> | ||
34 | #include <linux/lockd/lockd.h> | 35 | #include <linux/lockd/lockd.h> |
36 | #include <linux/lockd/sm_inter.h> | ||
35 | #include <linux/nfs.h> | 37 | #include <linux/nfs.h> |
36 | 38 | ||
37 | #define NLMDBG_FACILITY NLMDBG_SVC | 39 | #define NLMDBG_FACILITY NLMDBG_SVC |
@@ -46,6 +48,7 @@ EXPORT_SYMBOL(nlmsvc_ops); | |||
46 | static DEFINE_MUTEX(nlmsvc_mutex); | 48 | static DEFINE_MUTEX(nlmsvc_mutex); |
47 | static unsigned int nlmsvc_users; | 49 | static unsigned int nlmsvc_users; |
48 | static pid_t nlmsvc_pid; | 50 | static pid_t nlmsvc_pid; |
51 | static struct svc_serv *nlmsvc_serv; | ||
49 | int nlmsvc_grace_period; | 52 | int nlmsvc_grace_period; |
50 | unsigned long nlmsvc_timeout; | 53 | unsigned long nlmsvc_timeout; |
51 | 54 | ||
@@ -59,6 +62,7 @@ static DECLARE_WAIT_QUEUE_HEAD(lockd_exit); | |||
59 | static unsigned long nlm_grace_period; | 62 | static unsigned long nlm_grace_period; |
60 | static unsigned long nlm_timeout = LOCKD_DFLT_TIMEO; | 63 | static unsigned long nlm_timeout = LOCKD_DFLT_TIMEO; |
61 | static int nlm_udpport, nlm_tcpport; | 64 | static int nlm_udpport, nlm_tcpport; |
65 | int nsm_use_hostnames = 0; | ||
62 | 66 | ||
63 | /* | 67 | /* |
64 | * Constants needed for the sysctl interface. | 68 | * Constants needed for the sysctl interface. |
@@ -96,7 +100,6 @@ static inline void clear_grace_period(void) | |||
96 | static void | 100 | static void |
97 | lockd(struct svc_rqst *rqstp) | 101 | lockd(struct svc_rqst *rqstp) |
98 | { | 102 | { |
99 | struct svc_serv *serv = rqstp->rq_server; | ||
100 | int err = 0; | 103 | int err = 0; |
101 | unsigned long grace_period_expire; | 104 | unsigned long grace_period_expire; |
102 | 105 | ||
@@ -112,6 +115,7 @@ lockd(struct svc_rqst *rqstp) | |||
112 | * Let our maker know we're running. | 115 | * Let our maker know we're running. |
113 | */ | 116 | */ |
114 | nlmsvc_pid = current->pid; | 117 | nlmsvc_pid = current->pid; |
118 | nlmsvc_serv = rqstp->rq_server; | ||
115 | complete(&lockd_start_done); | 119 | complete(&lockd_start_done); |
116 | 120 | ||
117 | daemonize("lockd"); | 121 | daemonize("lockd"); |
@@ -161,7 +165,7 @@ lockd(struct svc_rqst *rqstp) | |||
161 | * Find a socket with data available and call its | 165 | * Find a socket with data available and call its |
162 | * recvfrom routine. | 166 | * recvfrom routine. |
163 | */ | 167 | */ |
164 | err = svc_recv(serv, rqstp, timeout); | 168 | err = svc_recv(rqstp, timeout); |
165 | if (err == -EAGAIN || err == -EINTR) | 169 | if (err == -EAGAIN || err == -EINTR) |
166 | continue; | 170 | continue; |
167 | if (err < 0) { | 171 | if (err < 0) { |
@@ -174,7 +178,7 @@ lockd(struct svc_rqst *rqstp) | |||
174 | dprintk("lockd: request from %08x\n", | 178 | dprintk("lockd: request from %08x\n", |
175 | (unsigned)ntohl(rqstp->rq_addr.sin_addr.s_addr)); | 179 | (unsigned)ntohl(rqstp->rq_addr.sin_addr.s_addr)); |
176 | 180 | ||
177 | svc_process(serv, rqstp); | 181 | svc_process(rqstp); |
178 | 182 | ||
179 | } | 183 | } |
180 | 184 | ||
@@ -189,6 +193,7 @@ lockd(struct svc_rqst *rqstp) | |||
189 | nlmsvc_invalidate_all(); | 193 | nlmsvc_invalidate_all(); |
190 | nlm_shutdown_hosts(); | 194 | nlm_shutdown_hosts(); |
191 | nlmsvc_pid = 0; | 195 | nlmsvc_pid = 0; |
196 | nlmsvc_serv = NULL; | ||
192 | } else | 197 | } else |
193 | printk(KERN_DEBUG | 198 | printk(KERN_DEBUG |
194 | "lockd: new process, skipping host shutdown\n"); | 199 | "lockd: new process, skipping host shutdown\n"); |
@@ -205,54 +210,77 @@ lockd(struct svc_rqst *rqstp) | |||
205 | module_put_and_exit(0); | 210 | module_put_and_exit(0); |
206 | } | 211 | } |
207 | 212 | ||
213 | |||
214 | static int find_socket(struct svc_serv *serv, int proto) | ||
215 | { | ||
216 | struct svc_sock *svsk; | ||
217 | int found = 0; | ||
218 | list_for_each_entry(svsk, &serv->sv_permsocks, sk_list) | ||
219 | if (svsk->sk_sk->sk_protocol == proto) { | ||
220 | found = 1; | ||
221 | break; | ||
222 | } | ||
223 | return found; | ||
224 | } | ||
225 | |||
226 | static int make_socks(struct svc_serv *serv, int proto) | ||
227 | { | ||
228 | /* Make any sockets that are needed but not present. | ||
229 | * If nlm_udpport or nlm_tcpport were set as module | ||
230 | * options, make those sockets unconditionally | ||
231 | */ | ||
232 | static int warned; | ||
233 | int err = 0; | ||
234 | if (proto == IPPROTO_UDP || nlm_udpport) | ||
235 | if (!find_socket(serv, IPPROTO_UDP)) | ||
236 | err = svc_makesock(serv, IPPROTO_UDP, nlm_udpport); | ||
237 | if (err == 0 && (proto == IPPROTO_TCP || nlm_tcpport)) | ||
238 | if (!find_socket(serv, IPPROTO_TCP)) | ||
239 | err= svc_makesock(serv, IPPROTO_TCP, nlm_tcpport); | ||
240 | if (!err) | ||
241 | warned = 0; | ||
242 | else if (warned++ == 0) | ||
243 | printk(KERN_WARNING | ||
244 | "lockd_up: makesock failed, error=%d\n", err); | ||
245 | return err; | ||
246 | } | ||
247 | |||
208 | /* | 248 | /* |
209 | * Bring up the lockd process if it's not already up. | 249 | * Bring up the lockd process if it's not already up. |
210 | */ | 250 | */ |
211 | int | 251 | int |
212 | lockd_up(void) | 252 | lockd_up(int proto) /* Maybe add a 'family' option when IPv6 is supported ?? */ |
213 | { | 253 | { |
214 | static int warned; | ||
215 | struct svc_serv * serv; | 254 | struct svc_serv * serv; |
216 | int error = 0; | 255 | int error = 0; |
217 | 256 | ||
218 | mutex_lock(&nlmsvc_mutex); | 257 | mutex_lock(&nlmsvc_mutex); |
219 | /* | 258 | /* |
220 | * Unconditionally increment the user count ... this is | ||
221 | * the number of clients who _want_ a lockd process. | ||
222 | */ | ||
223 | nlmsvc_users++; | ||
224 | /* | ||
225 | * Check whether we're already up and running. | 259 | * Check whether we're already up and running. |
226 | */ | 260 | */ |
227 | if (nlmsvc_pid) | 261 | if (nlmsvc_pid) { |
262 | if (proto) | ||
263 | error = make_socks(nlmsvc_serv, proto); | ||
228 | goto out; | 264 | goto out; |
265 | } | ||
229 | 266 | ||
230 | /* | 267 | /* |
231 | * Sanity check: if there's no pid, | 268 | * Sanity check: if there's no pid, |
232 | * we should be the first user ... | 269 | * we should be the first user ... |
233 | */ | 270 | */ |
234 | if (nlmsvc_users > 1) | 271 | if (nlmsvc_users) |
235 | printk(KERN_WARNING | 272 | printk(KERN_WARNING |
236 | "lockd_up: no pid, %d users??\n", nlmsvc_users); | 273 | "lockd_up: no pid, %d users??\n", nlmsvc_users); |
237 | 274 | ||
238 | error = -ENOMEM; | 275 | error = -ENOMEM; |
239 | serv = svc_create(&nlmsvc_program, LOCKD_BUFSIZE); | 276 | serv = svc_create(&nlmsvc_program, LOCKD_BUFSIZE, NULL); |
240 | if (!serv) { | 277 | if (!serv) { |
241 | printk(KERN_WARNING "lockd_up: create service failed\n"); | 278 | printk(KERN_WARNING "lockd_up: create service failed\n"); |
242 | goto out; | 279 | goto out; |
243 | } | 280 | } |
244 | 281 | ||
245 | if ((error = svc_makesock(serv, IPPROTO_UDP, nlm_udpport)) < 0 | 282 | if ((error = make_socks(serv, proto)) < 0) |
246 | #ifdef CONFIG_NFSD_TCP | ||
247 | || (error = svc_makesock(serv, IPPROTO_TCP, nlm_tcpport)) < 0 | ||
248 | #endif | ||
249 | ) { | ||
250 | if (warned++ == 0) | ||
251 | printk(KERN_WARNING | ||
252 | "lockd_up: makesock failed, error=%d\n", error); | ||
253 | goto destroy_and_out; | 283 | goto destroy_and_out; |
254 | } | ||
255 | warned = 0; | ||
256 | 284 | ||
257 | /* | 285 | /* |
258 | * Create the kernel thread and wait for it to start. | 286 | * Create the kernel thread and wait for it to start. |
@@ -272,6 +300,8 @@ lockd_up(void) | |||
272 | destroy_and_out: | 300 | destroy_and_out: |
273 | svc_destroy(serv); | 301 | svc_destroy(serv); |
274 | out: | 302 | out: |
303 | if (!error) | ||
304 | nlmsvc_users++; | ||
275 | mutex_unlock(&nlmsvc_mutex); | 305 | mutex_unlock(&nlmsvc_mutex); |
276 | return error; | 306 | return error; |
277 | } | 307 | } |
@@ -367,6 +397,22 @@ static ctl_table nlm_sysctls[] = { | |||
367 | .extra1 = (int *) &nlm_port_min, | 397 | .extra1 = (int *) &nlm_port_min, |
368 | .extra2 = (int *) &nlm_port_max, | 398 | .extra2 = (int *) &nlm_port_max, |
369 | }, | 399 | }, |
400 | { | ||
401 | .ctl_name = CTL_UNNUMBERED, | ||
402 | .procname = "nsm_use_hostnames", | ||
403 | .data = &nsm_use_hostnames, | ||
404 | .maxlen = sizeof(int), | ||
405 | .mode = 0644, | ||
406 | .proc_handler = &proc_dointvec, | ||
407 | }, | ||
408 | { | ||
409 | .ctl_name = CTL_UNNUMBERED, | ||
410 | .procname = "nsm_local_state", | ||
411 | .data = &nsm_local_state, | ||
412 | .maxlen = sizeof(int), | ||
413 | .mode = 0644, | ||
414 | .proc_handler = &proc_dointvec, | ||
415 | }, | ||
370 | { .ctl_name = 0 } | 416 | { .ctl_name = 0 } |
371 | }; | 417 | }; |
372 | 418 | ||
@@ -455,6 +501,7 @@ module_param_call(nlm_udpport, param_set_port, param_get_int, | |||
455 | &nlm_udpport, 0644); | 501 | &nlm_udpport, 0644); |
456 | module_param_call(nlm_tcpport, param_set_port, param_get_int, | 502 | module_param_call(nlm_tcpport, param_set_port, param_get_int, |
457 | &nlm_tcpport, 0644); | 503 | &nlm_tcpport, 0644); |
504 | module_param(nsm_use_hostnames, bool, 0644); | ||
458 | 505 | ||
459 | /* | 506 | /* |
460 | * Initialising and terminating the module. | 507 | * Initialising and terminating the module. |
diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c index a2dd9ccb9b32..fa370f6eb07b 100644 --- a/fs/lockd/svc4proc.c +++ b/fs/lockd/svc4proc.c | |||
@@ -38,8 +38,8 @@ nlm4svc_retrieve_args(struct svc_rqst *rqstp, struct nlm_args *argp, | |||
38 | return nlm_lck_denied_nolocks; | 38 | return nlm_lck_denied_nolocks; |
39 | 39 | ||
40 | /* Obtain host handle */ | 40 | /* Obtain host handle */ |
41 | if (!(host = nlmsvc_lookup_host(rqstp)) | 41 | if (!(host = nlmsvc_lookup_host(rqstp, lock->caller, lock->len)) |
42 | || (argp->monitor && !host->h_monitored && nsm_monitor(host) < 0)) | 42 | || (argp->monitor && nsm_monitor(host) < 0)) |
43 | goto no_locks; | 43 | goto no_locks; |
44 | *hostp = host; | 44 | *hostp = host; |
45 | 45 | ||
@@ -260,7 +260,9 @@ static int nlm4svc_callback(struct svc_rqst *rqstp, u32 proc, struct nlm_args *a | |||
260 | struct nlm_rqst *call; | 260 | struct nlm_rqst *call; |
261 | int stat; | 261 | int stat; |
262 | 262 | ||
263 | host = nlmsvc_lookup_host(rqstp); | 263 | host = nlmsvc_lookup_host(rqstp, |
264 | argp->lock.caller, | ||
265 | argp->lock.len); | ||
264 | if (host == NULL) | 266 | if (host == NULL) |
265 | return rpc_system_err; | 267 | return rpc_system_err; |
266 | 268 | ||
@@ -420,10 +422,6 @@ nlm4svc_proc_sm_notify(struct svc_rqst *rqstp, struct nlm_reboot *argp, | |||
420 | void *resp) | 422 | void *resp) |
421 | { | 423 | { |
422 | struct sockaddr_in saddr = rqstp->rq_addr; | 424 | struct sockaddr_in saddr = rqstp->rq_addr; |
423 | int vers = argp->vers; | ||
424 | int prot = argp->proto >> 1; | ||
425 | |||
426 | struct nlm_host *host; | ||
427 | 425 | ||
428 | dprintk("lockd: SM_NOTIFY called\n"); | 426 | dprintk("lockd: SM_NOTIFY called\n"); |
429 | if (saddr.sin_addr.s_addr != htonl(INADDR_LOOPBACK) | 427 | if (saddr.sin_addr.s_addr != htonl(INADDR_LOOPBACK) |
@@ -438,21 +436,10 @@ nlm4svc_proc_sm_notify(struct svc_rqst *rqstp, struct nlm_reboot *argp, | |||
438 | /* Obtain the host pointer for this NFS server and try to | 436 | /* Obtain the host pointer for this NFS server and try to |
439 | * reclaim all locks we hold on this server. | 437 | * reclaim all locks we hold on this server. |
440 | */ | 438 | */ |
439 | memset(&saddr, 0, sizeof(saddr)); | ||
441 | saddr.sin_addr.s_addr = argp->addr; | 440 | saddr.sin_addr.s_addr = argp->addr; |
441 | nlm_host_rebooted(&saddr, argp->mon, argp->len, argp->state); | ||
442 | 442 | ||
443 | if ((argp->proto & 1)==0) { | ||
444 | if ((host = nlmclnt_lookup_host(&saddr, prot, vers)) != NULL) { | ||
445 | nlmclnt_recovery(host, argp->state); | ||
446 | nlm_release_host(host); | ||
447 | } | ||
448 | } else { | ||
449 | /* If we run on an NFS server, delete all locks held by the client */ | ||
450 | |||
451 | if ((host = nlm_lookup_host(1, &saddr, prot, vers)) != NULL) { | ||
452 | nlmsvc_free_host_resources(host); | ||
453 | nlm_release_host(host); | ||
454 | } | ||
455 | } | ||
456 | return rpc_success; | 443 | return rpc_success; |
457 | } | 444 | } |
458 | 445 | ||
@@ -468,7 +455,7 @@ nlm4svc_proc_granted_res(struct svc_rqst *rqstp, struct nlm_res *argp, | |||
468 | 455 | ||
469 | dprintk("lockd: GRANTED_RES called\n"); | 456 | dprintk("lockd: GRANTED_RES called\n"); |
470 | 457 | ||
471 | nlmsvc_grant_reply(rqstp, &argp->cookie, argp->status); | 458 | nlmsvc_grant_reply(&argp->cookie, argp->status); |
472 | return rpc_success; | 459 | return rpc_success; |
473 | } | 460 | } |
474 | 461 | ||
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c index c9d419703cf3..814c6064c9e0 100644 --- a/fs/lockd/svclock.c +++ b/fs/lockd/svclock.c | |||
@@ -40,7 +40,7 @@ | |||
40 | 40 | ||
41 | static void nlmsvc_release_block(struct nlm_block *block); | 41 | static void nlmsvc_release_block(struct nlm_block *block); |
42 | static void nlmsvc_insert_block(struct nlm_block *block, unsigned long); | 42 | static void nlmsvc_insert_block(struct nlm_block *block, unsigned long); |
43 | static int nlmsvc_remove_block(struct nlm_block *block); | 43 | static void nlmsvc_remove_block(struct nlm_block *block); |
44 | 44 | ||
45 | static int nlmsvc_setgrantargs(struct nlm_rqst *call, struct nlm_lock *lock); | 45 | static int nlmsvc_setgrantargs(struct nlm_rqst *call, struct nlm_lock *lock); |
46 | static void nlmsvc_freegrantargs(struct nlm_rqst *call); | 46 | static void nlmsvc_freegrantargs(struct nlm_rqst *call); |
@@ -49,7 +49,7 @@ static const struct rpc_call_ops nlmsvc_grant_ops; | |||
49 | /* | 49 | /* |
50 | * The list of blocked locks to retry | 50 | * The list of blocked locks to retry |
51 | */ | 51 | */ |
52 | static struct nlm_block * nlm_blocked; | 52 | static LIST_HEAD(nlm_blocked); |
53 | 53 | ||
54 | /* | 54 | /* |
55 | * Insert a blocked lock into the global list | 55 | * Insert a blocked lock into the global list |
@@ -57,48 +57,44 @@ static struct nlm_block * nlm_blocked; | |||
57 | static void | 57 | static void |
58 | nlmsvc_insert_block(struct nlm_block *block, unsigned long when) | 58 | nlmsvc_insert_block(struct nlm_block *block, unsigned long when) |
59 | { | 59 | { |
60 | struct nlm_block **bp, *b; | 60 | struct nlm_block *b; |
61 | struct list_head *pos; | ||
61 | 62 | ||
62 | dprintk("lockd: nlmsvc_insert_block(%p, %ld)\n", block, when); | 63 | dprintk("lockd: nlmsvc_insert_block(%p, %ld)\n", block, when); |
63 | kref_get(&block->b_count); | 64 | if (list_empty(&block->b_list)) { |
64 | if (block->b_queued) | 65 | kref_get(&block->b_count); |
65 | nlmsvc_remove_block(block); | 66 | } else { |
66 | bp = &nlm_blocked; | 67 | list_del_init(&block->b_list); |
68 | } | ||
69 | |||
70 | pos = &nlm_blocked; | ||
67 | if (when != NLM_NEVER) { | 71 | if (when != NLM_NEVER) { |
68 | if ((when += jiffies) == NLM_NEVER) | 72 | if ((when += jiffies) == NLM_NEVER) |
69 | when ++; | 73 | when ++; |
70 | while ((b = *bp) && time_before_eq(b->b_when,when) && b->b_when != NLM_NEVER) | 74 | list_for_each(pos, &nlm_blocked) { |
71 | bp = &b->b_next; | 75 | b = list_entry(pos, struct nlm_block, b_list); |
72 | } else | 76 | if (time_after(b->b_when,when) || b->b_when == NLM_NEVER) |
73 | while ((b = *bp) != 0) | 77 | break; |
74 | bp = &b->b_next; | 78 | } |
79 | /* On normal exit from the loop, pos == &nlm_blocked, | ||
80 | * so we will be adding to the end of the list - good | ||
81 | */ | ||
82 | } | ||
75 | 83 | ||
76 | block->b_queued = 1; | 84 | list_add_tail(&block->b_list, pos); |
77 | block->b_when = when; | 85 | block->b_when = when; |
78 | block->b_next = b; | ||
79 | *bp = block; | ||
80 | } | 86 | } |
81 | 87 | ||
82 | /* | 88 | /* |
83 | * Remove a block from the global list | 89 | * Remove a block from the global list |
84 | */ | 90 | */ |
85 | static int | 91 | static inline void |
86 | nlmsvc_remove_block(struct nlm_block *block) | 92 | nlmsvc_remove_block(struct nlm_block *block) |
87 | { | 93 | { |
88 | struct nlm_block **bp, *b; | 94 | if (!list_empty(&block->b_list)) { |
89 | 95 | list_del_init(&block->b_list); | |
90 | if (!block->b_queued) | 96 | nlmsvc_release_block(block); |
91 | return 1; | ||
92 | for (bp = &nlm_blocked; (b = *bp) != 0; bp = &b->b_next) { | ||
93 | if (b == block) { | ||
94 | *bp = block->b_next; | ||
95 | block->b_queued = 0; | ||
96 | nlmsvc_release_block(block); | ||
97 | return 1; | ||
98 | } | ||
99 | } | 97 | } |
100 | |||
101 | return 0; | ||
102 | } | 98 | } |
103 | 99 | ||
104 | /* | 100 | /* |
@@ -107,14 +103,14 @@ nlmsvc_remove_block(struct nlm_block *block) | |||
107 | static struct nlm_block * | 103 | static struct nlm_block * |
108 | nlmsvc_lookup_block(struct nlm_file *file, struct nlm_lock *lock) | 104 | nlmsvc_lookup_block(struct nlm_file *file, struct nlm_lock *lock) |
109 | { | 105 | { |
110 | struct nlm_block **head, *block; | 106 | struct nlm_block *block; |
111 | struct file_lock *fl; | 107 | struct file_lock *fl; |
112 | 108 | ||
113 | dprintk("lockd: nlmsvc_lookup_block f=%p pd=%d %Ld-%Ld ty=%d\n", | 109 | dprintk("lockd: nlmsvc_lookup_block f=%p pd=%d %Ld-%Ld ty=%d\n", |
114 | file, lock->fl.fl_pid, | 110 | file, lock->fl.fl_pid, |
115 | (long long)lock->fl.fl_start, | 111 | (long long)lock->fl.fl_start, |
116 | (long long)lock->fl.fl_end, lock->fl.fl_type); | 112 | (long long)lock->fl.fl_end, lock->fl.fl_type); |
117 | for (head = &nlm_blocked; (block = *head) != 0; head = &block->b_next) { | 113 | list_for_each_entry(block, &nlm_blocked, b_list) { |
118 | fl = &block->b_call->a_args.lock.fl; | 114 | fl = &block->b_call->a_args.lock.fl; |
119 | dprintk("lockd: check f=%p pd=%d %Ld-%Ld ty=%d cookie=%s\n", | 115 | dprintk("lockd: check f=%p pd=%d %Ld-%Ld ty=%d cookie=%s\n", |
120 | block->b_file, fl->fl_pid, | 116 | block->b_file, fl->fl_pid, |
@@ -143,20 +139,20 @@ static inline int nlm_cookie_match(struct nlm_cookie *a, struct nlm_cookie *b) | |||
143 | * Find a block with a given NLM cookie. | 139 | * Find a block with a given NLM cookie. |
144 | */ | 140 | */ |
145 | static inline struct nlm_block * | 141 | static inline struct nlm_block * |
146 | nlmsvc_find_block(struct nlm_cookie *cookie, struct sockaddr_in *sin) | 142 | nlmsvc_find_block(struct nlm_cookie *cookie) |
147 | { | 143 | { |
148 | struct nlm_block *block; | 144 | struct nlm_block *block; |
149 | 145 | ||
150 | for (block = nlm_blocked; block; block = block->b_next) { | 146 | list_for_each_entry(block, &nlm_blocked, b_list) { |
151 | dprintk("cookie: head of blocked queue %p, block %p\n", | 147 | if (nlm_cookie_match(&block->b_call->a_args.cookie,cookie)) |
152 | nlm_blocked, block); | 148 | goto found; |
153 | if (nlm_cookie_match(&block->b_call->a_args.cookie,cookie) | ||
154 | && nlm_cmp_addr(sin, &block->b_host->h_addr)) | ||
155 | break; | ||
156 | } | 149 | } |
157 | 150 | ||
158 | if (block != NULL) | 151 | return NULL; |
159 | kref_get(&block->b_count); | 152 | |
153 | found: | ||
154 | dprintk("nlmsvc_find_block(%s): block=%p\n", nlmdbg_cookie2a(cookie), block); | ||
155 | kref_get(&block->b_count); | ||
160 | return block; | 156 | return block; |
161 | } | 157 | } |
162 | 158 | ||
@@ -169,6 +165,11 @@ nlmsvc_find_block(struct nlm_cookie *cookie, struct sockaddr_in *sin) | |||
169 | * request, but (as I found out later) that's because some implementations | 165 | * request, but (as I found out later) that's because some implementations |
170 | * do just this. Never mind the standards comittees, they support our | 166 | * do just this. Never mind the standards comittees, they support our |
171 | * logging industries. | 167 | * logging industries. |
168 | * | ||
169 | * 10 years later: I hope we can safely ignore these old and broken | ||
170 | * clients by now. Let's fix this so we can uniquely identify an incoming | ||
171 | * GRANTED_RES message by cookie, without having to rely on the client's IP | ||
172 | * address. --okir | ||
172 | */ | 173 | */ |
173 | static inline struct nlm_block * | 174 | static inline struct nlm_block * |
174 | nlmsvc_create_block(struct svc_rqst *rqstp, struct nlm_file *file, | 175 | nlmsvc_create_block(struct svc_rqst *rqstp, struct nlm_file *file, |
@@ -179,7 +180,7 @@ nlmsvc_create_block(struct svc_rqst *rqstp, struct nlm_file *file, | |||
179 | struct nlm_rqst *call = NULL; | 180 | struct nlm_rqst *call = NULL; |
180 | 181 | ||
181 | /* Create host handle for callback */ | 182 | /* Create host handle for callback */ |
182 | host = nlmsvc_lookup_host(rqstp); | 183 | host = nlmsvc_lookup_host(rqstp, lock->caller, lock->len); |
183 | if (host == NULL) | 184 | if (host == NULL) |
184 | return NULL; | 185 | return NULL; |
185 | 186 | ||
@@ -192,6 +193,8 @@ nlmsvc_create_block(struct svc_rqst *rqstp, struct nlm_file *file, | |||
192 | if (block == NULL) | 193 | if (block == NULL) |
193 | goto failed; | 194 | goto failed; |
194 | kref_init(&block->b_count); | 195 | kref_init(&block->b_count); |
196 | INIT_LIST_HEAD(&block->b_list); | ||
197 | INIT_LIST_HEAD(&block->b_flist); | ||
195 | 198 | ||
196 | if (!nlmsvc_setgrantargs(call, lock)) | 199 | if (!nlmsvc_setgrantargs(call, lock)) |
197 | goto failed_free; | 200 | goto failed_free; |
@@ -199,7 +202,7 @@ nlmsvc_create_block(struct svc_rqst *rqstp, struct nlm_file *file, | |||
199 | /* Set notifier function for VFS, and init args */ | 202 | /* Set notifier function for VFS, and init args */ |
200 | call->a_args.lock.fl.fl_flags |= FL_SLEEP; | 203 | call->a_args.lock.fl.fl_flags |= FL_SLEEP; |
201 | call->a_args.lock.fl.fl_lmops = &nlmsvc_lock_operations; | 204 | call->a_args.lock.fl.fl_lmops = &nlmsvc_lock_operations; |
202 | call->a_args.cookie = *cookie; /* see above */ | 205 | nlmclnt_next_cookie(&call->a_args.cookie); |
203 | 206 | ||
204 | dprintk("lockd: created block %p...\n", block); | 207 | dprintk("lockd: created block %p...\n", block); |
205 | 208 | ||
@@ -210,8 +213,7 @@ nlmsvc_create_block(struct svc_rqst *rqstp, struct nlm_file *file, | |||
210 | file->f_count++; | 213 | file->f_count++; |
211 | 214 | ||
212 | /* Add to file's list of blocks */ | 215 | /* Add to file's list of blocks */ |
213 | block->b_fnext = file->f_blocks; | 216 | list_add(&block->b_flist, &file->f_blocks); |
214 | file->f_blocks = block; | ||
215 | 217 | ||
216 | /* Set up RPC arguments for callback */ | 218 | /* Set up RPC arguments for callback */ |
217 | block->b_call = call; | 219 | block->b_call = call; |
@@ -248,19 +250,13 @@ static void nlmsvc_free_block(struct kref *kref) | |||
248 | { | 250 | { |
249 | struct nlm_block *block = container_of(kref, struct nlm_block, b_count); | 251 | struct nlm_block *block = container_of(kref, struct nlm_block, b_count); |
250 | struct nlm_file *file = block->b_file; | 252 | struct nlm_file *file = block->b_file; |
251 | struct nlm_block **bp; | ||
252 | 253 | ||
253 | dprintk("lockd: freeing block %p...\n", block); | 254 | dprintk("lockd: freeing block %p...\n", block); |
254 | 255 | ||
255 | down(&file->f_sema); | ||
256 | /* Remove block from file's list of blocks */ | 256 | /* Remove block from file's list of blocks */ |
257 | for (bp = &file->f_blocks; *bp; bp = &(*bp)->b_fnext) { | 257 | mutex_lock(&file->f_mutex); |
258 | if (*bp == block) { | 258 | list_del_init(&block->b_flist); |
259 | *bp = block->b_fnext; | 259 | mutex_unlock(&file->f_mutex); |
260 | break; | ||
261 | } | ||
262 | } | ||
263 | up(&file->f_sema); | ||
264 | 260 | ||
265 | nlmsvc_freegrantargs(block->b_call); | 261 | nlmsvc_freegrantargs(block->b_call); |
266 | nlm_release_call(block->b_call); | 262 | nlm_release_call(block->b_call); |
@@ -274,47 +270,32 @@ static void nlmsvc_release_block(struct nlm_block *block) | |||
274 | kref_put(&block->b_count, nlmsvc_free_block); | 270 | kref_put(&block->b_count, nlmsvc_free_block); |
275 | } | 271 | } |
276 | 272 | ||
277 | static void nlmsvc_act_mark(struct nlm_host *host, struct nlm_file *file) | 273 | /* |
278 | { | 274 | * Loop over all blocks and delete blocks held by |
279 | struct nlm_block *block; | 275 | * a matching host. |
280 | 276 | */ | |
281 | down(&file->f_sema); | 277 | void nlmsvc_traverse_blocks(struct nlm_host *host, |
282 | for (block = file->f_blocks; block != NULL; block = block->b_fnext) | 278 | struct nlm_file *file, |
283 | block->b_host->h_inuse = 1; | 279 | nlm_host_match_fn_t match) |
284 | up(&file->f_sema); | ||
285 | } | ||
286 | |||
287 | static void nlmsvc_act_unlock(struct nlm_host *host, struct nlm_file *file) | ||
288 | { | 280 | { |
289 | struct nlm_block *block; | 281 | struct nlm_block *block, *next; |
290 | 282 | ||
291 | restart: | 283 | restart: |
292 | down(&file->f_sema); | 284 | mutex_lock(&file->f_mutex); |
293 | for (block = file->f_blocks; block != NULL; block = block->b_fnext) { | 285 | list_for_each_entry_safe(block, next, &file->f_blocks, b_flist) { |
294 | if (host != NULL && host != block->b_host) | 286 | if (!match(block->b_host, host)) |
295 | continue; | 287 | continue; |
296 | if (!block->b_queued) | 288 | /* Do not destroy blocks that are not on |
289 | * the global retry list - why? */ | ||
290 | if (list_empty(&block->b_list)) | ||
297 | continue; | 291 | continue; |
298 | kref_get(&block->b_count); | 292 | kref_get(&block->b_count); |
299 | up(&file->f_sema); | 293 | mutex_unlock(&file->f_mutex); |
300 | nlmsvc_unlink_block(block); | 294 | nlmsvc_unlink_block(block); |
301 | nlmsvc_release_block(block); | 295 | nlmsvc_release_block(block); |
302 | goto restart; | 296 | goto restart; |
303 | } | 297 | } |
304 | up(&file->f_sema); | 298 | mutex_unlock(&file->f_mutex); |
305 | } | ||
306 | |||
307 | /* | ||
308 | * Loop over all blocks and perform the action specified. | ||
309 | * (NLM_ACT_CHECK handled by nlmsvc_inspect_file). | ||
310 | */ | ||
311 | void | ||
312 | nlmsvc_traverse_blocks(struct nlm_host *host, struct nlm_file *file, int action) | ||
313 | { | ||
314 | if (action == NLM_ACT_MARK) | ||
315 | nlmsvc_act_mark(host, file); | ||
316 | else | ||
317 | nlmsvc_act_unlock(host, file); | ||
318 | } | 299 | } |
319 | 300 | ||
320 | /* | 301 | /* |
@@ -325,7 +306,7 @@ static int nlmsvc_setgrantargs(struct nlm_rqst *call, struct nlm_lock *lock) | |||
325 | { | 306 | { |
326 | locks_copy_lock(&call->a_args.lock.fl, &lock->fl); | 307 | locks_copy_lock(&call->a_args.lock.fl, &lock->fl); |
327 | memcpy(&call->a_args.lock.fh, &lock->fh, sizeof(call->a_args.lock.fh)); | 308 | memcpy(&call->a_args.lock.fh, &lock->fh, sizeof(call->a_args.lock.fh)); |
328 | call->a_args.lock.caller = system_utsname.nodename; | 309 | call->a_args.lock.caller = utsname()->nodename; |
329 | call->a_args.lock.oh.len = lock->oh.len; | 310 | call->a_args.lock.oh.len = lock->oh.len; |
330 | 311 | ||
331 | /* set default data area */ | 312 | /* set default data area */ |
@@ -373,7 +354,7 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file, | |||
373 | lock->fl.fl_flags &= ~FL_SLEEP; | 354 | lock->fl.fl_flags &= ~FL_SLEEP; |
374 | again: | 355 | again: |
375 | /* Lock file against concurrent access */ | 356 | /* Lock file against concurrent access */ |
376 | down(&file->f_sema); | 357 | mutex_lock(&file->f_mutex); |
377 | /* Get existing block (in case client is busy-waiting) */ | 358 | /* Get existing block (in case client is busy-waiting) */ |
378 | block = nlmsvc_lookup_block(file, lock); | 359 | block = nlmsvc_lookup_block(file, lock); |
379 | if (block == NULL) { | 360 | if (block == NULL) { |
@@ -411,10 +392,10 @@ again: | |||
411 | 392 | ||
412 | /* If we don't have a block, create and initialize it. Then | 393 | /* If we don't have a block, create and initialize it. Then |
413 | * retry because we may have slept in kmalloc. */ | 394 | * retry because we may have slept in kmalloc. */ |
414 | /* We have to release f_sema as nlmsvc_create_block may try to | 395 | /* We have to release f_mutex as nlmsvc_create_block may try to |
415 | * to claim it while doing host garbage collection */ | 396 | * to claim it while doing host garbage collection */ |
416 | if (newblock == NULL) { | 397 | if (newblock == NULL) { |
417 | up(&file->f_sema); | 398 | mutex_unlock(&file->f_mutex); |
418 | dprintk("lockd: blocking on this lock (allocating).\n"); | 399 | dprintk("lockd: blocking on this lock (allocating).\n"); |
419 | if (!(newblock = nlmsvc_create_block(rqstp, file, lock, cookie))) | 400 | if (!(newblock = nlmsvc_create_block(rqstp, file, lock, cookie))) |
420 | return nlm_lck_denied_nolocks; | 401 | return nlm_lck_denied_nolocks; |
@@ -424,7 +405,7 @@ again: | |||
424 | /* Append to list of blocked */ | 405 | /* Append to list of blocked */ |
425 | nlmsvc_insert_block(newblock, NLM_NEVER); | 406 | nlmsvc_insert_block(newblock, NLM_NEVER); |
426 | out: | 407 | out: |
427 | up(&file->f_sema); | 408 | mutex_unlock(&file->f_mutex); |
428 | nlmsvc_release_block(newblock); | 409 | nlmsvc_release_block(newblock); |
429 | nlmsvc_release_block(block); | 410 | nlmsvc_release_block(block); |
430 | dprintk("lockd: nlmsvc_lock returned %u\n", ret); | 411 | dprintk("lockd: nlmsvc_lock returned %u\n", ret); |
@@ -451,6 +432,7 @@ nlmsvc_testlock(struct nlm_file *file, struct nlm_lock *lock, | |||
451 | (long long)conflock->fl.fl_start, | 432 | (long long)conflock->fl.fl_start, |
452 | (long long)conflock->fl.fl_end); | 433 | (long long)conflock->fl.fl_end); |
453 | conflock->caller = "somehost"; /* FIXME */ | 434 | conflock->caller = "somehost"; /* FIXME */ |
435 | conflock->len = strlen(conflock->caller); | ||
454 | conflock->oh.len = 0; /* don't return OH info */ | 436 | conflock->oh.len = 0; /* don't return OH info */ |
455 | conflock->svid = conflock->fl.fl_pid; | 437 | conflock->svid = conflock->fl.fl_pid; |
456 | return nlm_lck_denied; | 438 | return nlm_lck_denied; |
@@ -507,9 +489,9 @@ nlmsvc_cancel_blocked(struct nlm_file *file, struct nlm_lock *lock) | |||
507 | (long long)lock->fl.fl_start, | 489 | (long long)lock->fl.fl_start, |
508 | (long long)lock->fl.fl_end); | 490 | (long long)lock->fl.fl_end); |
509 | 491 | ||
510 | down(&file->f_sema); | 492 | mutex_lock(&file->f_mutex); |
511 | block = nlmsvc_lookup_block(file, lock); | 493 | block = nlmsvc_lookup_block(file, lock); |
512 | up(&file->f_sema); | 494 | mutex_unlock(&file->f_mutex); |
513 | if (block != NULL) { | 495 | if (block != NULL) { |
514 | status = nlmsvc_unlink_block(block); | 496 | status = nlmsvc_unlink_block(block); |
515 | nlmsvc_release_block(block); | 497 | nlmsvc_release_block(block); |
@@ -527,10 +509,10 @@ nlmsvc_cancel_blocked(struct nlm_file *file, struct nlm_lock *lock) | |||
527 | static void | 509 | static void |
528 | nlmsvc_notify_blocked(struct file_lock *fl) | 510 | nlmsvc_notify_blocked(struct file_lock *fl) |
529 | { | 511 | { |
530 | struct nlm_block **bp, *block; | 512 | struct nlm_block *block; |
531 | 513 | ||
532 | dprintk("lockd: VFS unblock notification for block %p\n", fl); | 514 | dprintk("lockd: VFS unblock notification for block %p\n", fl); |
533 | for (bp = &nlm_blocked; (block = *bp) != 0; bp = &block->b_next) { | 515 | list_for_each_entry(block, &nlm_blocked, b_list) { |
534 | if (nlm_compare_locks(&block->b_call->a_args.lock.fl, fl)) { | 516 | if (nlm_compare_locks(&block->b_call->a_args.lock.fl, fl)) { |
535 | nlmsvc_insert_block(block, 0); | 517 | nlmsvc_insert_block(block, 0); |
536 | svc_wake_up(block->b_daemon); | 518 | svc_wake_up(block->b_daemon); |
@@ -663,17 +645,14 @@ static const struct rpc_call_ops nlmsvc_grant_ops = { | |||
663 | * block. | 645 | * block. |
664 | */ | 646 | */ |
665 | void | 647 | void |
666 | nlmsvc_grant_reply(struct svc_rqst *rqstp, struct nlm_cookie *cookie, u32 status) | 648 | nlmsvc_grant_reply(struct nlm_cookie *cookie, u32 status) |
667 | { | 649 | { |
668 | struct nlm_block *block; | 650 | struct nlm_block *block; |
669 | struct nlm_file *file; | ||
670 | 651 | ||
671 | dprintk("grant_reply: looking for cookie %x, host (%08x), s=%d \n", | 652 | dprintk("grant_reply: looking for cookie %x, s=%d \n", |
672 | *(unsigned int *)(cookie->data), | 653 | *(unsigned int *)(cookie->data), status); |
673 | ntohl(rqstp->rq_addr.sin_addr.s_addr), status); | 654 | if (!(block = nlmsvc_find_block(cookie))) |
674 | if (!(block = nlmsvc_find_block(cookie, &rqstp->rq_addr))) | ||
675 | return; | 655 | return; |
676 | file = block->b_file; | ||
677 | 656 | ||
678 | if (block) { | 657 | if (block) { |
679 | if (status == NLM_LCK_DENIED_GRACE_PERIOD) { | 658 | if (status == NLM_LCK_DENIED_GRACE_PERIOD) { |
@@ -696,16 +675,19 @@ nlmsvc_grant_reply(struct svc_rqst *rqstp, struct nlm_cookie *cookie, u32 status | |||
696 | unsigned long | 675 | unsigned long |
697 | nlmsvc_retry_blocked(void) | 676 | nlmsvc_retry_blocked(void) |
698 | { | 677 | { |
699 | struct nlm_block *block; | 678 | unsigned long timeout = MAX_SCHEDULE_TIMEOUT; |
679 | struct nlm_block *block; | ||
680 | |||
681 | while (!list_empty(&nlm_blocked)) { | ||
682 | block = list_entry(nlm_blocked.next, struct nlm_block, b_list); | ||
700 | 683 | ||
701 | dprintk("nlmsvc_retry_blocked(%p, when=%ld)\n", | ||
702 | nlm_blocked, | ||
703 | nlm_blocked? nlm_blocked->b_when : 0); | ||
704 | while ((block = nlm_blocked) != 0) { | ||
705 | if (block->b_when == NLM_NEVER) | 684 | if (block->b_when == NLM_NEVER) |
706 | break; | 685 | break; |
707 | if (time_after(block->b_when,jiffies)) | 686 | if (time_after(block->b_when,jiffies)) { |
687 | timeout = block->b_when - jiffies; | ||
708 | break; | 688 | break; |
689 | } | ||
690 | |||
709 | dprintk("nlmsvc_retry_blocked(%p, when=%ld)\n", | 691 | dprintk("nlmsvc_retry_blocked(%p, when=%ld)\n", |
710 | block, block->b_when); | 692 | block, block->b_when); |
711 | kref_get(&block->b_count); | 693 | kref_get(&block->b_count); |
@@ -713,8 +695,5 @@ nlmsvc_retry_blocked(void) | |||
713 | nlmsvc_release_block(block); | 695 | nlmsvc_release_block(block); |
714 | } | 696 | } |
715 | 697 | ||
716 | if ((block = nlm_blocked) && block->b_when != NLM_NEVER) | 698 | return timeout; |
717 | return (block->b_when - jiffies); | ||
718 | |||
719 | return MAX_SCHEDULE_TIMEOUT; | ||
720 | } | 699 | } |
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c index dbb66a3b5cd9..75b2c81bcb93 100644 --- a/fs/lockd/svcproc.c +++ b/fs/lockd/svcproc.c | |||
@@ -66,8 +66,8 @@ nlmsvc_retrieve_args(struct svc_rqst *rqstp, struct nlm_args *argp, | |||
66 | return nlm_lck_denied_nolocks; | 66 | return nlm_lck_denied_nolocks; |
67 | 67 | ||
68 | /* Obtain host handle */ | 68 | /* Obtain host handle */ |
69 | if (!(host = nlmsvc_lookup_host(rqstp)) | 69 | if (!(host = nlmsvc_lookup_host(rqstp, lock->caller, lock->len)) |
70 | || (argp->monitor && !host->h_monitored && nsm_monitor(host) < 0)) | 70 | || (argp->monitor && nsm_monitor(host) < 0)) |
71 | goto no_locks; | 71 | goto no_locks; |
72 | *hostp = host; | 72 | *hostp = host; |
73 | 73 | ||
@@ -287,7 +287,9 @@ static int nlmsvc_callback(struct svc_rqst *rqstp, u32 proc, struct nlm_args *ar | |||
287 | struct nlm_rqst *call; | 287 | struct nlm_rqst *call; |
288 | int stat; | 288 | int stat; |
289 | 289 | ||
290 | host = nlmsvc_lookup_host(rqstp); | 290 | host = nlmsvc_lookup_host(rqstp, |
291 | argp->lock.caller, | ||
292 | argp->lock.len); | ||
291 | if (host == NULL) | 293 | if (host == NULL) |
292 | return rpc_system_err; | 294 | return rpc_system_err; |
293 | 295 | ||
@@ -449,9 +451,6 @@ nlmsvc_proc_sm_notify(struct svc_rqst *rqstp, struct nlm_reboot *argp, | |||
449 | void *resp) | 451 | void *resp) |
450 | { | 452 | { |
451 | struct sockaddr_in saddr = rqstp->rq_addr; | 453 | struct sockaddr_in saddr = rqstp->rq_addr; |
452 | int vers = argp->vers; | ||
453 | int prot = argp->proto >> 1; | ||
454 | struct nlm_host *host; | ||
455 | 454 | ||
456 | dprintk("lockd: SM_NOTIFY called\n"); | 455 | dprintk("lockd: SM_NOTIFY called\n"); |
457 | if (saddr.sin_addr.s_addr != htonl(INADDR_LOOPBACK) | 456 | if (saddr.sin_addr.s_addr != htonl(INADDR_LOOPBACK) |
@@ -466,19 +465,9 @@ nlmsvc_proc_sm_notify(struct svc_rqst *rqstp, struct nlm_reboot *argp, | |||
466 | /* Obtain the host pointer for this NFS server and try to | 465 | /* Obtain the host pointer for this NFS server and try to |
467 | * reclaim all locks we hold on this server. | 466 | * reclaim all locks we hold on this server. |
468 | */ | 467 | */ |
468 | memset(&saddr, 0, sizeof(saddr)); | ||
469 | saddr.sin_addr.s_addr = argp->addr; | 469 | saddr.sin_addr.s_addr = argp->addr; |
470 | if ((argp->proto & 1)==0) { | 470 | nlm_host_rebooted(&saddr, argp->mon, argp->len, argp->state); |
471 | if ((host = nlmclnt_lookup_host(&saddr, prot, vers)) != NULL) { | ||
472 | nlmclnt_recovery(host, argp->state); | ||
473 | nlm_release_host(host); | ||
474 | } | ||
475 | } else { | ||
476 | /* If we run on an NFS server, delete all locks held by the client */ | ||
477 | if ((host = nlm_lookup_host(1, &saddr, prot, vers)) != NULL) { | ||
478 | nlmsvc_free_host_resources(host); | ||
479 | nlm_release_host(host); | ||
480 | } | ||
481 | } | ||
482 | 471 | ||
483 | return rpc_success; | 472 | return rpc_success; |
484 | } | 473 | } |
@@ -495,7 +484,7 @@ nlmsvc_proc_granted_res(struct svc_rqst *rqstp, struct nlm_res *argp, | |||
495 | 484 | ||
496 | dprintk("lockd: GRANTED_RES called\n"); | 485 | dprintk("lockd: GRANTED_RES called\n"); |
497 | 486 | ||
498 | nlmsvc_grant_reply(rqstp, &argp->cookie, argp->status); | 487 | nlmsvc_grant_reply(&argp->cookie, argp->status); |
499 | return rpc_success; | 488 | return rpc_success; |
500 | } | 489 | } |
501 | 490 | ||
diff --git a/fs/lockd/svcshare.c b/fs/lockd/svcshare.c index 27288c83da96..b9926ce8782e 100644 --- a/fs/lockd/svcshare.c +++ b/fs/lockd/svcshare.c | |||
@@ -85,24 +85,20 @@ nlmsvc_unshare_file(struct nlm_host *host, struct nlm_file *file, | |||
85 | } | 85 | } |
86 | 86 | ||
87 | /* | 87 | /* |
88 | * Traverse all shares for a given file (and host). | 88 | * Traverse all shares for a given file, and delete |
89 | * NLM_ACT_CHECK is handled by nlmsvc_inspect_file. | 89 | * those owned by the given (type of) host |
90 | */ | 90 | */ |
91 | void | 91 | void nlmsvc_traverse_shares(struct nlm_host *host, struct nlm_file *file, |
92 | nlmsvc_traverse_shares(struct nlm_host *host, struct nlm_file *file, int action) | 92 | nlm_host_match_fn_t match) |
93 | { | 93 | { |
94 | struct nlm_share *share, **shpp; | 94 | struct nlm_share *share, **shpp; |
95 | 95 | ||
96 | shpp = &file->f_shares; | 96 | shpp = &file->f_shares; |
97 | while ((share = *shpp) != NULL) { | 97 | while ((share = *shpp) != NULL) { |
98 | if (action == NLM_ACT_MARK) | 98 | if (match(share->s_host, host)) { |
99 | share->s_host->h_inuse = 1; | 99 | *shpp = share->s_next; |
100 | else if (action == NLM_ACT_UNLOCK) { | 100 | kfree(share); |
101 | if (host == NULL || host == share->s_host) { | 101 | continue; |
102 | *shpp = share->s_next; | ||
103 | kfree(share); | ||
104 | continue; | ||
105 | } | ||
106 | } | 102 | } |
107 | shpp = &share->s_next; | 103 | shpp = &share->s_next; |
108 | } | 104 | } |
diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c index a92dd98f8401..514f5f20701e 100644 --- a/fs/lockd/svcsubs.c +++ b/fs/lockd/svcsubs.c | |||
@@ -25,9 +25,9 @@ | |||
25 | /* | 25 | /* |
26 | * Global file hash table | 26 | * Global file hash table |
27 | */ | 27 | */ |
28 | #define FILE_HASH_BITS 5 | 28 | #define FILE_HASH_BITS 7 |
29 | #define FILE_NRHASH (1<<FILE_HASH_BITS) | 29 | #define FILE_NRHASH (1<<FILE_HASH_BITS) |
30 | static struct nlm_file * nlm_files[FILE_NRHASH]; | 30 | static struct hlist_head nlm_files[FILE_NRHASH]; |
31 | static DEFINE_MUTEX(nlm_file_mutex); | 31 | static DEFINE_MUTEX(nlm_file_mutex); |
32 | 32 | ||
33 | #ifdef NFSD_DEBUG | 33 | #ifdef NFSD_DEBUG |
@@ -82,6 +82,7 @@ u32 | |||
82 | nlm_lookup_file(struct svc_rqst *rqstp, struct nlm_file **result, | 82 | nlm_lookup_file(struct svc_rqst *rqstp, struct nlm_file **result, |
83 | struct nfs_fh *f) | 83 | struct nfs_fh *f) |
84 | { | 84 | { |
85 | struct hlist_node *pos; | ||
85 | struct nlm_file *file; | 86 | struct nlm_file *file; |
86 | unsigned int hash; | 87 | unsigned int hash; |
87 | u32 nfserr; | 88 | u32 nfserr; |
@@ -93,7 +94,7 @@ nlm_lookup_file(struct svc_rqst *rqstp, struct nlm_file **result, | |||
93 | /* Lock file table */ | 94 | /* Lock file table */ |
94 | mutex_lock(&nlm_file_mutex); | 95 | mutex_lock(&nlm_file_mutex); |
95 | 96 | ||
96 | for (file = nlm_files[hash]; file; file = file->f_next) | 97 | hlist_for_each_entry(file, pos, &nlm_files[hash], f_list) |
97 | if (!nfs_compare_fh(&file->f_handle, f)) | 98 | if (!nfs_compare_fh(&file->f_handle, f)) |
98 | goto found; | 99 | goto found; |
99 | 100 | ||
@@ -105,8 +106,9 @@ nlm_lookup_file(struct svc_rqst *rqstp, struct nlm_file **result, | |||
105 | goto out_unlock; | 106 | goto out_unlock; |
106 | 107 | ||
107 | memcpy(&file->f_handle, f, sizeof(struct nfs_fh)); | 108 | memcpy(&file->f_handle, f, sizeof(struct nfs_fh)); |
108 | file->f_hash = hash; | 109 | mutex_init(&file->f_mutex); |
109 | init_MUTEX(&file->f_sema); | 110 | INIT_HLIST_NODE(&file->f_list); |
111 | INIT_LIST_HEAD(&file->f_blocks); | ||
110 | 112 | ||
111 | /* Open the file. Note that this must not sleep for too long, else | 113 | /* Open the file. Note that this must not sleep for too long, else |
112 | * we would lock up lockd:-) So no NFS re-exports, folks. | 114 | * we would lock up lockd:-) So no NFS re-exports, folks. |
@@ -115,12 +117,11 @@ nlm_lookup_file(struct svc_rqst *rqstp, struct nlm_file **result, | |||
115 | * the file. | 117 | * the file. |
116 | */ | 118 | */ |
117 | if ((nfserr = nlmsvc_ops->fopen(rqstp, f, &file->f_file)) != 0) { | 119 | if ((nfserr = nlmsvc_ops->fopen(rqstp, f, &file->f_file)) != 0) { |
118 | dprintk("lockd: open failed (nfserr %d)\n", ntohl(nfserr)); | 120 | dprintk("lockd: open failed (error %d)\n", nfserr); |
119 | goto out_free; | 121 | goto out_free; |
120 | } | 122 | } |
121 | 123 | ||
122 | file->f_next = nlm_files[hash]; | 124 | hlist_add_head(&file->f_list, &nlm_files[hash]); |
123 | nlm_files[hash] = file; | ||
124 | 125 | ||
125 | found: | 126 | found: |
126 | dprintk("lockd: found file %p (count %d)\n", file, file->f_count); | 127 | dprintk("lockd: found file %p (count %d)\n", file, file->f_count); |
@@ -149,22 +150,14 @@ out_free: | |||
149 | static inline void | 150 | static inline void |
150 | nlm_delete_file(struct nlm_file *file) | 151 | nlm_delete_file(struct nlm_file *file) |
151 | { | 152 | { |
152 | struct nlm_file **fp, *f; | ||
153 | |||
154 | nlm_debug_print_file("closing file", file); | 153 | nlm_debug_print_file("closing file", file); |
155 | 154 | if (!hlist_unhashed(&file->f_list)) { | |
156 | fp = nlm_files + file->f_hash; | 155 | hlist_del(&file->f_list); |
157 | while ((f = *fp) != NULL) { | 156 | nlmsvc_ops->fclose(file->f_file); |
158 | if (f == file) { | 157 | kfree(file); |
159 | *fp = file->f_next; | 158 | } else { |
160 | nlmsvc_ops->fclose(file->f_file); | 159 | printk(KERN_WARNING "lockd: attempt to release unknown file!\n"); |
161 | kfree(file); | ||
162 | return; | ||
163 | } | ||
164 | fp = &f->f_next; | ||
165 | } | 160 | } |
166 | |||
167 | printk(KERN_WARNING "lockd: attempt to release unknown file!\n"); | ||
168 | } | 161 | } |
169 | 162 | ||
170 | /* | 163 | /* |
@@ -172,7 +165,8 @@ nlm_delete_file(struct nlm_file *file) | |||
172 | * action. | 165 | * action. |
173 | */ | 166 | */ |
174 | static int | 167 | static int |
175 | nlm_traverse_locks(struct nlm_host *host, struct nlm_file *file, int action) | 168 | nlm_traverse_locks(struct nlm_host *host, struct nlm_file *file, |
169 | nlm_host_match_fn_t match) | ||
176 | { | 170 | { |
177 | struct inode *inode = nlmsvc_file_inode(file); | 171 | struct inode *inode = nlmsvc_file_inode(file); |
178 | struct file_lock *fl; | 172 | struct file_lock *fl; |
@@ -186,17 +180,11 @@ again: | |||
186 | 180 | ||
187 | /* update current lock count */ | 181 | /* update current lock count */ |
188 | file->f_locks++; | 182 | file->f_locks++; |
183 | |||
189 | lockhost = (struct nlm_host *) fl->fl_owner; | 184 | lockhost = (struct nlm_host *) fl->fl_owner; |
190 | if (action == NLM_ACT_MARK) | 185 | if (match(lockhost, host)) { |
191 | lockhost->h_inuse = 1; | ||
192 | else if (action == NLM_ACT_CHECK) | ||
193 | return 1; | ||
194 | else if (action == NLM_ACT_UNLOCK) { | ||
195 | struct file_lock lock = *fl; | 186 | struct file_lock lock = *fl; |
196 | 187 | ||
197 | if (host && lockhost != host) | ||
198 | continue; | ||
199 | |||
200 | lock.fl_type = F_UNLCK; | 188 | lock.fl_type = F_UNLCK; |
201 | lock.fl_start = 0; | 189 | lock.fl_start = 0; |
202 | lock.fl_end = OFFSET_MAX; | 190 | lock.fl_end = OFFSET_MAX; |
@@ -213,53 +201,66 @@ again: | |||
213 | } | 201 | } |
214 | 202 | ||
215 | /* | 203 | /* |
216 | * Operate on a single file | 204 | * Inspect a single file |
217 | */ | 205 | */ |
218 | static inline int | 206 | static inline int |
219 | nlm_inspect_file(struct nlm_host *host, struct nlm_file *file, int action) | 207 | nlm_inspect_file(struct nlm_host *host, struct nlm_file *file, nlm_host_match_fn_t match) |
220 | { | 208 | { |
221 | if (action == NLM_ACT_CHECK) { | 209 | nlmsvc_traverse_blocks(host, file, match); |
222 | /* Fast path for mark and sweep garbage collection */ | 210 | nlmsvc_traverse_shares(host, file, match); |
223 | if (file->f_count || file->f_blocks || file->f_shares) | 211 | return nlm_traverse_locks(host, file, match); |
212 | } | ||
213 | |||
214 | /* | ||
215 | * Quick check whether there are still any locks, blocks or | ||
216 | * shares on a given file. | ||
217 | */ | ||
218 | static inline int | ||
219 | nlm_file_inuse(struct nlm_file *file) | ||
220 | { | ||
221 | struct inode *inode = nlmsvc_file_inode(file); | ||
222 | struct file_lock *fl; | ||
223 | |||
224 | if (file->f_count || !list_empty(&file->f_blocks) || file->f_shares) | ||
225 | return 1; | ||
226 | |||
227 | for (fl = inode->i_flock; fl; fl = fl->fl_next) { | ||
228 | if (fl->fl_lmops == &nlmsvc_lock_operations) | ||
224 | return 1; | 229 | return 1; |
225 | } else { | ||
226 | nlmsvc_traverse_blocks(host, file, action); | ||
227 | nlmsvc_traverse_shares(host, file, action); | ||
228 | } | 230 | } |
229 | return nlm_traverse_locks(host, file, action); | 231 | file->f_locks = 0; |
232 | return 0; | ||
230 | } | 233 | } |
231 | 234 | ||
232 | /* | 235 | /* |
233 | * Loop over all files in the file table. | 236 | * Loop over all files in the file table. |
234 | */ | 237 | */ |
235 | static int | 238 | static int |
236 | nlm_traverse_files(struct nlm_host *host, int action) | 239 | nlm_traverse_files(struct nlm_host *host, nlm_host_match_fn_t match) |
237 | { | 240 | { |
238 | struct nlm_file *file, **fp; | 241 | struct hlist_node *pos, *next; |
242 | struct nlm_file *file; | ||
239 | int i, ret = 0; | 243 | int i, ret = 0; |
240 | 244 | ||
241 | mutex_lock(&nlm_file_mutex); | 245 | mutex_lock(&nlm_file_mutex); |
242 | for (i = 0; i < FILE_NRHASH; i++) { | 246 | for (i = 0; i < FILE_NRHASH; i++) { |
243 | fp = nlm_files + i; | 247 | hlist_for_each_entry_safe(file, pos, next, &nlm_files[i], f_list) { |
244 | while ((file = *fp) != NULL) { | ||
245 | file->f_count++; | 248 | file->f_count++; |
246 | mutex_unlock(&nlm_file_mutex); | 249 | mutex_unlock(&nlm_file_mutex); |
247 | 250 | ||
248 | /* Traverse locks, blocks and shares of this file | 251 | /* Traverse locks, blocks and shares of this file |
249 | * and update file->f_locks count */ | 252 | * and update file->f_locks count */ |
250 | if (nlm_inspect_file(host, file, action)) | 253 | if (nlm_inspect_file(host, file, match)) |
251 | ret = 1; | 254 | ret = 1; |
252 | 255 | ||
253 | mutex_lock(&nlm_file_mutex); | 256 | mutex_lock(&nlm_file_mutex); |
254 | file->f_count--; | 257 | file->f_count--; |
255 | /* No more references to this file. Let go of it. */ | 258 | /* No more references to this file. Let go of it. */ |
256 | if (!file->f_blocks && !file->f_locks | 259 | if (list_empty(&file->f_blocks) && !file->f_locks |
257 | && !file->f_shares && !file->f_count) { | 260 | && !file->f_shares && !file->f_count) { |
258 | *fp = file->f_next; | 261 | hlist_del(&file->f_list); |
259 | nlmsvc_ops->fclose(file->f_file); | 262 | nlmsvc_ops->fclose(file->f_file); |
260 | kfree(file); | 263 | kfree(file); |
261 | } else { | ||
262 | fp = &file->f_next; | ||
263 | } | 264 | } |
264 | } | 265 | } |
265 | } | 266 | } |
@@ -286,23 +287,54 @@ nlm_release_file(struct nlm_file *file) | |||
286 | mutex_lock(&nlm_file_mutex); | 287 | mutex_lock(&nlm_file_mutex); |
287 | 288 | ||
288 | /* If there are no more locks etc, delete the file */ | 289 | /* If there are no more locks etc, delete the file */ |
289 | if(--file->f_count == 0) { | 290 | if (--file->f_count == 0 && !nlm_file_inuse(file)) |
290 | if(!nlm_inspect_file(NULL, file, NLM_ACT_CHECK)) | 291 | nlm_delete_file(file); |
291 | nlm_delete_file(file); | ||
292 | } | ||
293 | 292 | ||
294 | mutex_unlock(&nlm_file_mutex); | 293 | mutex_unlock(&nlm_file_mutex); |
295 | } | 294 | } |
296 | 295 | ||
297 | /* | 296 | /* |
297 | * Helpers function for resource traversal | ||
298 | * | ||
299 | * nlmsvc_mark_host: | ||
300 | * used by the garbage collector; simply sets h_inuse. | ||
301 | * Always returns 0. | ||
302 | * | ||
303 | * nlmsvc_same_host: | ||
304 | * returns 1 iff the two hosts match. Used to release | ||
305 | * all resources bound to a specific host. | ||
306 | * | ||
307 | * nlmsvc_is_client: | ||
308 | * returns 1 iff the host is a client. | ||
309 | * Used by nlmsvc_invalidate_all | ||
310 | */ | ||
311 | static int | ||
312 | nlmsvc_mark_host(struct nlm_host *host, struct nlm_host *dummy) | ||
313 | { | ||
314 | host->h_inuse = 1; | ||
315 | return 0; | ||
316 | } | ||
317 | |||
318 | static int | ||
319 | nlmsvc_same_host(struct nlm_host *host, struct nlm_host *other) | ||
320 | { | ||
321 | return host == other; | ||
322 | } | ||
323 | |||
324 | static int | ||
325 | nlmsvc_is_client(struct nlm_host *host, struct nlm_host *dummy) | ||
326 | { | ||
327 | return host->h_server; | ||
328 | } | ||
329 | |||
330 | /* | ||
298 | * Mark all hosts that still hold resources | 331 | * Mark all hosts that still hold resources |
299 | */ | 332 | */ |
300 | void | 333 | void |
301 | nlmsvc_mark_resources(void) | 334 | nlmsvc_mark_resources(void) |
302 | { | 335 | { |
303 | dprintk("lockd: nlmsvc_mark_resources\n"); | 336 | dprintk("lockd: nlmsvc_mark_resources\n"); |
304 | 337 | nlm_traverse_files(NULL, nlmsvc_mark_host); | |
305 | nlm_traverse_files(NULL, NLM_ACT_MARK); | ||
306 | } | 338 | } |
307 | 339 | ||
308 | /* | 340 | /* |
@@ -313,23 +345,25 @@ nlmsvc_free_host_resources(struct nlm_host *host) | |||
313 | { | 345 | { |
314 | dprintk("lockd: nlmsvc_free_host_resources\n"); | 346 | dprintk("lockd: nlmsvc_free_host_resources\n"); |
315 | 347 | ||
316 | if (nlm_traverse_files(host, NLM_ACT_UNLOCK)) | 348 | if (nlm_traverse_files(host, nlmsvc_same_host)) { |
317 | printk(KERN_WARNING | 349 | printk(KERN_WARNING |
318 | "lockd: couldn't remove all locks held by %s", | 350 | "lockd: couldn't remove all locks held by %s\n", |
319 | host->h_name); | 351 | host->h_name); |
352 | BUG(); | ||
353 | } | ||
320 | } | 354 | } |
321 | 355 | ||
322 | /* | 356 | /* |
323 | * delete all hosts structs for clients | 357 | * Remove all locks held for clients |
324 | */ | 358 | */ |
325 | void | 359 | void |
326 | nlmsvc_invalidate_all(void) | 360 | nlmsvc_invalidate_all(void) |
327 | { | 361 | { |
328 | struct nlm_host *host; | 362 | /* Release all locks held by NFS clients. |
329 | while ((host = nlm_find_client()) != NULL) { | 363 | * Previously, the code would call |
330 | nlmsvc_free_host_resources(host); | 364 | * nlmsvc_free_host_resources for each client in |
331 | host->h_expires = 0; | 365 | * turn, which is about as inefficient as it gets. |
332 | host->h_killed = 1; | 366 | * Now we just do it once in nlm_traverse_files. |
333 | nlm_release_host(host); | 367 | */ |
334 | } | 368 | nlm_traverse_files(NULL, nlmsvc_is_client); |
335 | } | 369 | } |
diff --git a/fs/lockd/xdr.c b/fs/lockd/xdr.c index 033ea4ac2c30..61c46facf257 100644 --- a/fs/lockd/xdr.c +++ b/fs/lockd/xdr.c | |||
@@ -515,7 +515,7 @@ nlmclt_decode_res(struct rpc_rqst *req, u32 *p, struct nlm_res *resp) | |||
515 | */ | 515 | */ |
516 | #define NLM_void_sz 0 | 516 | #define NLM_void_sz 0 |
517 | #define NLM_cookie_sz 1+XDR_QUADLEN(NLM_MAXCOOKIELEN) | 517 | #define NLM_cookie_sz 1+XDR_QUADLEN(NLM_MAXCOOKIELEN) |
518 | #define NLM_caller_sz 1+XDR_QUADLEN(sizeof(system_utsname.nodename)) | 518 | #define NLM_caller_sz 1+XDR_QUADLEN(sizeof(utsname()->nodename)) |
519 | #define NLM_netobj_sz 1+XDR_QUADLEN(XDR_MAX_NETOBJ) | 519 | #define NLM_netobj_sz 1+XDR_QUADLEN(XDR_MAX_NETOBJ) |
520 | /* #define NLM_owner_sz 1+XDR_QUADLEN(NLM_MAXOWNER) */ | 520 | /* #define NLM_owner_sz 1+XDR_QUADLEN(NLM_MAXOWNER) */ |
521 | #define NLM_fhandle_sz 1+XDR_QUADLEN(NFS2_FHSIZE) | 521 | #define NLM_fhandle_sz 1+XDR_QUADLEN(NFS2_FHSIZE) |
diff --git a/fs/locks.c b/fs/locks.c index 21dfadfca2bc..e0b6a80649a0 100644 --- a/fs/locks.c +++ b/fs/locks.c | |||
@@ -1514,7 +1514,7 @@ int fcntl_setlease(unsigned int fd, struct file *filp, long arg) | |||
1514 | goto out_unlock; | 1514 | goto out_unlock; |
1515 | } | 1515 | } |
1516 | 1516 | ||
1517 | error = f_setown(filp, current->pid, 0); | 1517 | error = __f_setown(filp, task_pid(current), PIDTYPE_PID, 0); |
1518 | out_unlock: | 1518 | out_unlock: |
1519 | unlock_kernel(); | 1519 | unlock_kernel(); |
1520 | return error; | 1520 | return error; |
diff --git a/fs/namespace.c b/fs/namespace.c index 66d921e14fee..55442a6cf221 100644 --- a/fs/namespace.c +++ b/fs/namespace.c | |||
@@ -133,7 +133,7 @@ struct vfsmount *lookup_mnt(struct vfsmount *mnt, struct dentry *dentry) | |||
133 | 133 | ||
134 | static inline int check_mnt(struct vfsmount *mnt) | 134 | static inline int check_mnt(struct vfsmount *mnt) |
135 | { | 135 | { |
136 | return mnt->mnt_namespace == current->namespace; | 136 | return mnt->mnt_namespace == current->nsproxy->namespace; |
137 | } | 137 | } |
138 | 138 | ||
139 | static void touch_namespace(struct namespace *ns) | 139 | static void touch_namespace(struct namespace *ns) |
@@ -830,7 +830,7 @@ static int attach_recursive_mnt(struct vfsmount *source_mnt, | |||
830 | if (parent_nd) { | 830 | if (parent_nd) { |
831 | detach_mnt(source_mnt, parent_nd); | 831 | detach_mnt(source_mnt, parent_nd); |
832 | attach_mnt(source_mnt, nd); | 832 | attach_mnt(source_mnt, nd); |
833 | touch_namespace(current->namespace); | 833 | touch_namespace(current->nsproxy->namespace); |
834 | } else { | 834 | } else { |
835 | mnt_set_mountpoint(dest_mnt, dest_dentry, source_mnt); | 835 | mnt_set_mountpoint(dest_mnt, dest_dentry, source_mnt); |
836 | commit_tree(source_mnt); | 836 | commit_tree(source_mnt); |
@@ -1441,7 +1441,7 @@ dput_out: | |||
1441 | */ | 1441 | */ |
1442 | struct namespace *dup_namespace(struct task_struct *tsk, struct fs_struct *fs) | 1442 | struct namespace *dup_namespace(struct task_struct *tsk, struct fs_struct *fs) |
1443 | { | 1443 | { |
1444 | struct namespace *namespace = tsk->namespace; | 1444 | struct namespace *namespace = tsk->nsproxy->namespace; |
1445 | struct namespace *new_ns; | 1445 | struct namespace *new_ns; |
1446 | struct vfsmount *rootmnt = NULL, *pwdmnt = NULL, *altrootmnt = NULL; | 1446 | struct vfsmount *rootmnt = NULL, *pwdmnt = NULL, *altrootmnt = NULL; |
1447 | struct vfsmount *p, *q; | 1447 | struct vfsmount *p, *q; |
@@ -1508,7 +1508,7 @@ struct namespace *dup_namespace(struct task_struct *tsk, struct fs_struct *fs) | |||
1508 | 1508 | ||
1509 | int copy_namespace(int flags, struct task_struct *tsk) | 1509 | int copy_namespace(int flags, struct task_struct *tsk) |
1510 | { | 1510 | { |
1511 | struct namespace *namespace = tsk->namespace; | 1511 | struct namespace *namespace = tsk->nsproxy->namespace; |
1512 | struct namespace *new_ns; | 1512 | struct namespace *new_ns; |
1513 | int err = 0; | 1513 | int err = 0; |
1514 | 1514 | ||
@@ -1531,7 +1531,7 @@ int copy_namespace(int flags, struct task_struct *tsk) | |||
1531 | goto out; | 1531 | goto out; |
1532 | } | 1532 | } |
1533 | 1533 | ||
1534 | tsk->namespace = new_ns; | 1534 | tsk->nsproxy->namespace = new_ns; |
1535 | 1535 | ||
1536 | out: | 1536 | out: |
1537 | put_namespace(namespace); | 1537 | put_namespace(namespace); |
@@ -1754,7 +1754,7 @@ asmlinkage long sys_pivot_root(const char __user * new_root, | |||
1754 | detach_mnt(user_nd.mnt, &root_parent); | 1754 | detach_mnt(user_nd.mnt, &root_parent); |
1755 | attach_mnt(user_nd.mnt, &old_nd); /* mount old root on put_old */ | 1755 | attach_mnt(user_nd.mnt, &old_nd); /* mount old root on put_old */ |
1756 | attach_mnt(new_nd.mnt, &root_parent); /* mount new_root on / */ | 1756 | attach_mnt(new_nd.mnt, &root_parent); /* mount new_root on / */ |
1757 | touch_namespace(current->namespace); | 1757 | touch_namespace(current->nsproxy->namespace); |
1758 | spin_unlock(&vfsmount_lock); | 1758 | spin_unlock(&vfsmount_lock); |
1759 | chroot_fs_refs(&user_nd, &new_nd); | 1759 | chroot_fs_refs(&user_nd, &new_nd); |
1760 | security_sb_post_pivotroot(&user_nd, &new_nd); | 1760 | security_sb_post_pivotroot(&user_nd, &new_nd); |
@@ -1780,7 +1780,6 @@ static void __init init_mount_tree(void) | |||
1780 | { | 1780 | { |
1781 | struct vfsmount *mnt; | 1781 | struct vfsmount *mnt; |
1782 | struct namespace *namespace; | 1782 | struct namespace *namespace; |
1783 | struct task_struct *g, *p; | ||
1784 | 1783 | ||
1785 | mnt = do_kern_mount("rootfs", 0, "rootfs", NULL); | 1784 | mnt = do_kern_mount("rootfs", 0, "rootfs", NULL); |
1786 | if (IS_ERR(mnt)) | 1785 | if (IS_ERR(mnt)) |
@@ -1796,13 +1795,8 @@ static void __init init_mount_tree(void) | |||
1796 | namespace->root = mnt; | 1795 | namespace->root = mnt; |
1797 | mnt->mnt_namespace = namespace; | 1796 | mnt->mnt_namespace = namespace; |
1798 | 1797 | ||
1799 | init_task.namespace = namespace; | 1798 | init_task.nsproxy->namespace = namespace; |
1800 | read_lock(&tasklist_lock); | 1799 | get_namespace(namespace); |
1801 | do_each_thread(g, p) { | ||
1802 | get_namespace(namespace); | ||
1803 | p->namespace = namespace; | ||
1804 | } while_each_thread(g, p); | ||
1805 | read_unlock(&tasklist_lock); | ||
1806 | 1800 | ||
1807 | set_fs_pwd(current->fs, namespace->root, namespace->root->mnt_root); | 1801 | set_fs_pwd(current->fs, namespace->root, namespace->root->mnt_root); |
1808 | set_fs_root(current->fs, namespace->root, namespace->root->mnt_root); | 1802 | set_fs_root(current->fs, namespace->root, namespace->root->mnt_root); |
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index a3ee11364db0..7933e2e99dbc 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c | |||
@@ -58,7 +58,6 @@ module_param_call(callback_tcpport, param_set_port, param_get_int, | |||
58 | */ | 58 | */ |
59 | static void nfs_callback_svc(struct svc_rqst *rqstp) | 59 | static void nfs_callback_svc(struct svc_rqst *rqstp) |
60 | { | 60 | { |
61 | struct svc_serv *serv = rqstp->rq_server; | ||
62 | int err; | 61 | int err; |
63 | 62 | ||
64 | __module_get(THIS_MODULE); | 63 | __module_get(THIS_MODULE); |
@@ -80,7 +79,7 @@ static void nfs_callback_svc(struct svc_rqst *rqstp) | |||
80 | /* | 79 | /* |
81 | * Listen for a request on the socket | 80 | * Listen for a request on the socket |
82 | */ | 81 | */ |
83 | err = svc_recv(serv, rqstp, MAX_SCHEDULE_TIMEOUT); | 82 | err = svc_recv(rqstp, MAX_SCHEDULE_TIMEOUT); |
84 | if (err == -EAGAIN || err == -EINTR) | 83 | if (err == -EAGAIN || err == -EINTR) |
85 | continue; | 84 | continue; |
86 | if (err < 0) { | 85 | if (err < 0) { |
@@ -91,7 +90,7 @@ static void nfs_callback_svc(struct svc_rqst *rqstp) | |||
91 | } | 90 | } |
92 | dprintk("%s: request from %u.%u.%u.%u\n", __FUNCTION__, | 91 | dprintk("%s: request from %u.%u.%u.%u\n", __FUNCTION__, |
93 | NIPQUAD(rqstp->rq_addr.sin_addr.s_addr)); | 92 | NIPQUAD(rqstp->rq_addr.sin_addr.s_addr)); |
94 | svc_process(serv, rqstp); | 93 | svc_process(rqstp); |
95 | } | 94 | } |
96 | 95 | ||
97 | svc_exit_thread(rqstp); | 96 | svc_exit_thread(rqstp); |
@@ -116,7 +115,7 @@ int nfs_callback_up(void) | |||
116 | goto out; | 115 | goto out; |
117 | init_completion(&nfs_callback_info.started); | 116 | init_completion(&nfs_callback_info.started); |
118 | init_completion(&nfs_callback_info.stopped); | 117 | init_completion(&nfs_callback_info.stopped); |
119 | serv = svc_create(&nfs4_callback_program, NFS4_CALLBACK_BUFSIZE); | 118 | serv = svc_create(&nfs4_callback_program, NFS4_CALLBACK_BUFSIZE, NULL); |
120 | ret = -ENOMEM; | 119 | ret = -ENOMEM; |
121 | if (!serv) | 120 | if (!serv) |
122 | goto out_err; | 121 | goto out_err; |
diff --git a/fs/nfs/client.c b/fs/nfs/client.c index ec1938d4b814..8106f3b29e4a 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c | |||
@@ -460,7 +460,8 @@ static int nfs_start_lockd(struct nfs_server *server) | |||
460 | goto out; | 460 | goto out; |
461 | if (server->flags & NFS_MOUNT_NONLM) | 461 | if (server->flags & NFS_MOUNT_NONLM) |
462 | goto out; | 462 | goto out; |
463 | error = lockd_up(); | 463 | error = lockd_up((server->flags & NFS_MOUNT_TCP) ? |
464 | IPPROTO_TCP : IPPROTO_UDP); | ||
464 | if (error < 0) | 465 | if (error < 0) |
465 | server->flags |= NFS_MOUNT_NONLM; | 466 | server->flags |= NFS_MOUNT_NONLM; |
466 | else | 467 | else |
diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c index c0a754ecdee6..1d656a645199 100644 --- a/fs/nfs/nfsroot.c +++ b/fs/nfs/nfsroot.c | |||
@@ -312,7 +312,7 @@ static int __init root_nfs_name(char *name) | |||
312 | /* Override them by options set on kernel command-line */ | 312 | /* Override them by options set on kernel command-line */ |
313 | root_nfs_parse(name, buf); | 313 | root_nfs_parse(name, buf); |
314 | 314 | ||
315 | cp = system_utsname.nodename; | 315 | cp = utsname()->nodename; |
316 | if (strlen(buf) + strlen(cp) > NFS_MAXPATHLEN) { | 316 | if (strlen(buf) + strlen(cp) > NFS_MAXPATHLEN) { |
317 | printk(KERN_ERR "Root-NFS: Pathname for remote directory too long.\n"); | 317 | printk(KERN_ERR "Root-NFS: Pathname for remote directory too long.\n"); |
318 | return -1; | 318 | return -1; |
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index 01bc68c628ad..e13fa23bd108 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c | |||
@@ -319,12 +319,25 @@ svc_expkey_update(struct svc_expkey *new, struct svc_expkey *old) | |||
319 | 319 | ||
320 | static struct cache_head *export_table[EXPORT_HASHMAX]; | 320 | static struct cache_head *export_table[EXPORT_HASHMAX]; |
321 | 321 | ||
322 | static void nfsd4_fslocs_free(struct nfsd4_fs_locations *fsloc) | ||
323 | { | ||
324 | int i; | ||
325 | |||
326 | for (i = 0; i < fsloc->locations_count; i++) { | ||
327 | kfree(fsloc->locations[i].path); | ||
328 | kfree(fsloc->locations[i].hosts); | ||
329 | } | ||
330 | kfree(fsloc->locations); | ||
331 | } | ||
332 | |||
322 | static void svc_export_put(struct kref *ref) | 333 | static void svc_export_put(struct kref *ref) |
323 | { | 334 | { |
324 | struct svc_export *exp = container_of(ref, struct svc_export, h.ref); | 335 | struct svc_export *exp = container_of(ref, struct svc_export, h.ref); |
325 | dput(exp->ex_dentry); | 336 | dput(exp->ex_dentry); |
326 | mntput(exp->ex_mnt); | 337 | mntput(exp->ex_mnt); |
327 | auth_domain_put(exp->ex_client); | 338 | auth_domain_put(exp->ex_client); |
339 | kfree(exp->ex_path); | ||
340 | nfsd4_fslocs_free(&exp->ex_fslocs); | ||
328 | kfree(exp); | 341 | kfree(exp); |
329 | } | 342 | } |
330 | 343 | ||
@@ -370,7 +383,7 @@ static int check_export(struct inode *inode, int flags) | |||
370 | */ | 383 | */ |
371 | if (!(inode->i_sb->s_type->fs_flags & FS_REQUIRES_DEV) && | 384 | if (!(inode->i_sb->s_type->fs_flags & FS_REQUIRES_DEV) && |
372 | !(flags & NFSEXP_FSID)) { | 385 | !(flags & NFSEXP_FSID)) { |
373 | dprintk("exp_export: export of non-dev fs without fsid"); | 386 | dprintk("exp_export: export of non-dev fs without fsid\n"); |
374 | return -EINVAL; | 387 | return -EINVAL; |
375 | } | 388 | } |
376 | if (!inode->i_sb->s_export_op) { | 389 | if (!inode->i_sb->s_export_op) { |
@@ -386,6 +399,69 @@ static int check_export(struct inode *inode, int flags) | |||
386 | 399 | ||
387 | } | 400 | } |
388 | 401 | ||
402 | #ifdef CONFIG_NFSD_V4 | ||
403 | |||
404 | static int | ||
405 | fsloc_parse(char **mesg, char *buf, struct nfsd4_fs_locations *fsloc) | ||
406 | { | ||
407 | int len; | ||
408 | int migrated, i, err; | ||
409 | |||
410 | len = qword_get(mesg, buf, PAGE_SIZE); | ||
411 | if (len != 5 || memcmp(buf, "fsloc", 5)) | ||
412 | return 0; | ||
413 | |||
414 | /* listsize */ | ||
415 | err = get_int(mesg, &fsloc->locations_count); | ||
416 | if (err) | ||
417 | return err; | ||
418 | if (fsloc->locations_count > MAX_FS_LOCATIONS) | ||
419 | return -EINVAL; | ||
420 | if (fsloc->locations_count == 0) | ||
421 | return 0; | ||
422 | |||
423 | fsloc->locations = kzalloc(fsloc->locations_count | ||
424 | * sizeof(struct nfsd4_fs_location), GFP_KERNEL); | ||
425 | if (!fsloc->locations) | ||
426 | return -ENOMEM; | ||
427 | for (i=0; i < fsloc->locations_count; i++) { | ||
428 | /* colon separated host list */ | ||
429 | err = -EINVAL; | ||
430 | len = qword_get(mesg, buf, PAGE_SIZE); | ||
431 | if (len <= 0) | ||
432 | goto out_free_all; | ||
433 | err = -ENOMEM; | ||
434 | fsloc->locations[i].hosts = kstrdup(buf, GFP_KERNEL); | ||
435 | if (!fsloc->locations[i].hosts) | ||
436 | goto out_free_all; | ||
437 | err = -EINVAL; | ||
438 | /* slash separated path component list */ | ||
439 | len = qword_get(mesg, buf, PAGE_SIZE); | ||
440 | if (len <= 0) | ||
441 | goto out_free_all; | ||
442 | err = -ENOMEM; | ||
443 | fsloc->locations[i].path = kstrdup(buf, GFP_KERNEL); | ||
444 | if (!fsloc->locations[i].path) | ||
445 | goto out_free_all; | ||
446 | } | ||
447 | /* migrated */ | ||
448 | err = get_int(mesg, &migrated); | ||
449 | if (err) | ||
450 | goto out_free_all; | ||
451 | err = -EINVAL; | ||
452 | if (migrated < 0 || migrated > 1) | ||
453 | goto out_free_all; | ||
454 | fsloc->migrated = migrated; | ||
455 | return 0; | ||
456 | out_free_all: | ||
457 | nfsd4_fslocs_free(fsloc); | ||
458 | return err; | ||
459 | } | ||
460 | |||
461 | #else /* CONFIG_NFSD_V4 */ | ||
462 | static inline int fsloc_parse(char **mesg, char *buf, struct nfsd4_fs_locations *fsloc) { return 0; } | ||
463 | #endif | ||
464 | |||
389 | static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen) | 465 | static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen) |
390 | { | 466 | { |
391 | /* client path expiry [flags anonuid anongid fsid] */ | 467 | /* client path expiry [flags anonuid anongid fsid] */ |
@@ -398,6 +474,7 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen) | |||
398 | int an_int; | 474 | int an_int; |
399 | 475 | ||
400 | nd.dentry = NULL; | 476 | nd.dentry = NULL; |
477 | exp.ex_path = NULL; | ||
401 | 478 | ||
402 | if (mesg[mlen-1] != '\n') | 479 | if (mesg[mlen-1] != '\n') |
403 | return -EINVAL; | 480 | return -EINVAL; |
@@ -428,6 +505,10 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen) | |||
428 | exp.ex_client = dom; | 505 | exp.ex_client = dom; |
429 | exp.ex_mnt = nd.mnt; | 506 | exp.ex_mnt = nd.mnt; |
430 | exp.ex_dentry = nd.dentry; | 507 | exp.ex_dentry = nd.dentry; |
508 | exp.ex_path = kstrdup(buf, GFP_KERNEL); | ||
509 | err = -ENOMEM; | ||
510 | if (!exp.ex_path) | ||
511 | goto out; | ||
431 | 512 | ||
432 | /* expiry */ | 513 | /* expiry */ |
433 | err = -EINVAL; | 514 | err = -EINVAL; |
@@ -435,6 +516,11 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen) | |||
435 | if (exp.h.expiry_time == 0) | 516 | if (exp.h.expiry_time == 0) |
436 | goto out; | 517 | goto out; |
437 | 518 | ||
519 | /* fs locations */ | ||
520 | exp.ex_fslocs.locations = NULL; | ||
521 | exp.ex_fslocs.locations_count = 0; | ||
522 | exp.ex_fslocs.migrated = 0; | ||
523 | |||
438 | /* flags */ | 524 | /* flags */ |
439 | err = get_int(&mesg, &an_int); | 525 | err = get_int(&mesg, &an_int); |
440 | if (err == -ENOENT) | 526 | if (err == -ENOENT) |
@@ -460,6 +546,10 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen) | |||
460 | 546 | ||
461 | err = check_export(nd.dentry->d_inode, exp.ex_flags); | 547 | err = check_export(nd.dentry->d_inode, exp.ex_flags); |
462 | if (err) goto out; | 548 | if (err) goto out; |
549 | |||
550 | err = fsloc_parse(&mesg, buf, &exp.ex_fslocs); | ||
551 | if (err) | ||
552 | goto out; | ||
463 | } | 553 | } |
464 | 554 | ||
465 | expp = svc_export_lookup(&exp); | 555 | expp = svc_export_lookup(&exp); |
@@ -473,6 +563,7 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen) | |||
473 | else | 563 | else |
474 | exp_put(expp); | 564 | exp_put(expp); |
475 | out: | 565 | out: |
566 | kfree(exp.ex_path); | ||
476 | if (nd.dentry) | 567 | if (nd.dentry) |
477 | path_release(&nd); | 568 | path_release(&nd); |
478 | out_no_path: | 569 | out_no_path: |
@@ -482,7 +573,8 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen) | |||
482 | return err; | 573 | return err; |
483 | } | 574 | } |
484 | 575 | ||
485 | static void exp_flags(struct seq_file *m, int flag, int fsid, uid_t anonu, uid_t anong); | 576 | static void exp_flags(struct seq_file *m, int flag, int fsid, |
577 | uid_t anonu, uid_t anong, struct nfsd4_fs_locations *fslocs); | ||
486 | 578 | ||
487 | static int svc_export_show(struct seq_file *m, | 579 | static int svc_export_show(struct seq_file *m, |
488 | struct cache_detail *cd, | 580 | struct cache_detail *cd, |
@@ -501,8 +593,8 @@ static int svc_export_show(struct seq_file *m, | |||
501 | seq_putc(m, '('); | 593 | seq_putc(m, '('); |
502 | if (test_bit(CACHE_VALID, &h->flags) && | 594 | if (test_bit(CACHE_VALID, &h->flags) && |
503 | !test_bit(CACHE_NEGATIVE, &h->flags)) | 595 | !test_bit(CACHE_NEGATIVE, &h->flags)) |
504 | exp_flags(m, exp->ex_flags, exp->ex_fsid, | 596 | exp_flags(m, exp->ex_flags, exp->ex_fsid, |
505 | exp->ex_anon_uid, exp->ex_anon_gid); | 597 | exp->ex_anon_uid, exp->ex_anon_gid, &exp->ex_fslocs); |
506 | seq_puts(m, ")\n"); | 598 | seq_puts(m, ")\n"); |
507 | return 0; | 599 | return 0; |
508 | } | 600 | } |
@@ -524,6 +616,10 @@ static void svc_export_init(struct cache_head *cnew, struct cache_head *citem) | |||
524 | new->ex_client = item->ex_client; | 616 | new->ex_client = item->ex_client; |
525 | new->ex_dentry = dget(item->ex_dentry); | 617 | new->ex_dentry = dget(item->ex_dentry); |
526 | new->ex_mnt = mntget(item->ex_mnt); | 618 | new->ex_mnt = mntget(item->ex_mnt); |
619 | new->ex_path = NULL; | ||
620 | new->ex_fslocs.locations = NULL; | ||
621 | new->ex_fslocs.locations_count = 0; | ||
622 | new->ex_fslocs.migrated = 0; | ||
527 | } | 623 | } |
528 | 624 | ||
529 | static void export_update(struct cache_head *cnew, struct cache_head *citem) | 625 | static void export_update(struct cache_head *cnew, struct cache_head *citem) |
@@ -535,6 +631,14 @@ static void export_update(struct cache_head *cnew, struct cache_head *citem) | |||
535 | new->ex_anon_uid = item->ex_anon_uid; | 631 | new->ex_anon_uid = item->ex_anon_uid; |
536 | new->ex_anon_gid = item->ex_anon_gid; | 632 | new->ex_anon_gid = item->ex_anon_gid; |
537 | new->ex_fsid = item->ex_fsid; | 633 | new->ex_fsid = item->ex_fsid; |
634 | new->ex_path = item->ex_path; | ||
635 | item->ex_path = NULL; | ||
636 | new->ex_fslocs.locations = item->ex_fslocs.locations; | ||
637 | item->ex_fslocs.locations = NULL; | ||
638 | new->ex_fslocs.locations_count = item->ex_fslocs.locations_count; | ||
639 | item->ex_fslocs.locations_count = 0; | ||
640 | new->ex_fslocs.migrated = item->ex_fslocs.migrated; | ||
641 | item->ex_fslocs.migrated = 0; | ||
538 | } | 642 | } |
539 | 643 | ||
540 | static struct cache_head *svc_export_alloc(void) | 644 | static struct cache_head *svc_export_alloc(void) |
@@ -1048,36 +1152,28 @@ int | |||
1048 | exp_pseudoroot(struct auth_domain *clp, struct svc_fh *fhp, | 1152 | exp_pseudoroot(struct auth_domain *clp, struct svc_fh *fhp, |
1049 | struct cache_req *creq) | 1153 | struct cache_req *creq) |
1050 | { | 1154 | { |
1051 | struct svc_expkey *fsid_key; | ||
1052 | struct svc_export *exp; | 1155 | struct svc_export *exp; |
1053 | int rv; | 1156 | int rv; |
1054 | u32 fsidv[2]; | 1157 | u32 fsidv[2]; |
1055 | 1158 | ||
1056 | mk_fsid_v1(fsidv, 0); | 1159 | mk_fsid_v1(fsidv, 0); |
1057 | 1160 | ||
1058 | fsid_key = exp_find_key(clp, 1, fsidv, creq); | 1161 | exp = exp_find(clp, 1, fsidv, creq); |
1059 | if (IS_ERR(fsid_key) && PTR_ERR(fsid_key) == -EAGAIN) | 1162 | if (IS_ERR(exp) && PTR_ERR(exp) == -EAGAIN) |
1060 | return nfserr_dropit; | 1163 | return nfserr_dropit; |
1061 | if (!fsid_key || IS_ERR(fsid_key)) | ||
1062 | return nfserr_perm; | ||
1063 | |||
1064 | exp = exp_get_by_name(clp, fsid_key->ek_mnt, fsid_key->ek_dentry, creq); | ||
1065 | if (exp == NULL) | 1164 | if (exp == NULL) |
1066 | rv = nfserr_perm; | 1165 | return nfserr_perm; |
1067 | else if (IS_ERR(exp)) | 1166 | else if (IS_ERR(exp)) |
1068 | rv = nfserrno(PTR_ERR(exp)); | 1167 | return nfserrno(PTR_ERR(exp)); |
1069 | else { | 1168 | rv = fh_compose(fhp, exp, exp->ex_dentry, NULL); |
1070 | rv = fh_compose(fhp, exp, | 1169 | exp_put(exp); |
1071 | fsid_key->ek_dentry, NULL); | ||
1072 | exp_put(exp); | ||
1073 | } | ||
1074 | cache_put(&fsid_key->h, &svc_expkey_cache); | ||
1075 | return rv; | 1170 | return rv; |
1076 | } | 1171 | } |
1077 | 1172 | ||
1078 | /* Iterator */ | 1173 | /* Iterator */ |
1079 | 1174 | ||
1080 | static void *e_start(struct seq_file *m, loff_t *pos) | 1175 | static void *e_start(struct seq_file *m, loff_t *pos) |
1176 | __acquires(svc_export_cache.hash_lock) | ||
1081 | { | 1177 | { |
1082 | loff_t n = *pos; | 1178 | loff_t n = *pos; |
1083 | unsigned hash, export; | 1179 | unsigned hash, export; |
@@ -1086,7 +1182,7 @@ static void *e_start(struct seq_file *m, loff_t *pos) | |||
1086 | exp_readlock(); | 1182 | exp_readlock(); |
1087 | read_lock(&svc_export_cache.hash_lock); | 1183 | read_lock(&svc_export_cache.hash_lock); |
1088 | if (!n--) | 1184 | if (!n--) |
1089 | return (void *)1; | 1185 | return SEQ_START_TOKEN; |
1090 | hash = n >> 32; | 1186 | hash = n >> 32; |
1091 | export = n & ((1LL<<32) - 1); | 1187 | export = n & ((1LL<<32) - 1); |
1092 | 1188 | ||
@@ -1110,7 +1206,7 @@ static void *e_next(struct seq_file *m, void *p, loff_t *pos) | |||
1110 | struct cache_head *ch = p; | 1206 | struct cache_head *ch = p; |
1111 | int hash = (*pos >> 32); | 1207 | int hash = (*pos >> 32); |
1112 | 1208 | ||
1113 | if (p == (void *)1) | 1209 | if (p == SEQ_START_TOKEN) |
1114 | hash = 0; | 1210 | hash = 0; |
1115 | else if (ch->next == NULL) { | 1211 | else if (ch->next == NULL) { |
1116 | hash++; | 1212 | hash++; |
@@ -1131,6 +1227,7 @@ static void *e_next(struct seq_file *m, void *p, loff_t *pos) | |||
1131 | } | 1227 | } |
1132 | 1228 | ||
1133 | static void e_stop(struct seq_file *m, void *p) | 1229 | static void e_stop(struct seq_file *m, void *p) |
1230 | __releases(svc_export_cache.hash_lock) | ||
1134 | { | 1231 | { |
1135 | read_unlock(&svc_export_cache.hash_lock); | 1232 | read_unlock(&svc_export_cache.hash_lock); |
1136 | exp_readunlock(); | 1233 | exp_readunlock(); |
@@ -1156,7 +1253,8 @@ static struct flags { | |||
1156 | { 0, {"", ""}} | 1253 | { 0, {"", ""}} |
1157 | }; | 1254 | }; |
1158 | 1255 | ||
1159 | static void exp_flags(struct seq_file *m, int flag, int fsid, uid_t anonu, uid_t anong) | 1256 | static void exp_flags(struct seq_file *m, int flag, int fsid, |
1257 | uid_t anonu, uid_t anong, struct nfsd4_fs_locations *fsloc) | ||
1160 | { | 1258 | { |
1161 | int first = 0; | 1259 | int first = 0; |
1162 | struct flags *flg; | 1260 | struct flags *flg; |
@@ -1172,21 +1270,34 @@ static void exp_flags(struct seq_file *m, int flag, int fsid, uid_t anonu, uid_t | |||
1172 | seq_printf(m, "%sanonuid=%d", first++?",":"", anonu); | 1270 | seq_printf(m, "%sanonuid=%d", first++?",":"", anonu); |
1173 | if (anong != (gid_t)-2 && anong != (0x10000-2)) | 1271 | if (anong != (gid_t)-2 && anong != (0x10000-2)) |
1174 | seq_printf(m, "%sanongid=%d", first++?",":"", anong); | 1272 | seq_printf(m, "%sanongid=%d", first++?",":"", anong); |
1273 | if (fsloc && fsloc->locations_count > 0) { | ||
1274 | char *loctype = (fsloc->migrated) ? "refer" : "replicas"; | ||
1275 | int i; | ||
1276 | |||
1277 | seq_printf(m, "%s%s=", first++?",":"", loctype); | ||
1278 | seq_escape(m, fsloc->locations[0].path, ",;@ \t\n\\"); | ||
1279 | seq_putc(m, '@'); | ||
1280 | seq_escape(m, fsloc->locations[0].hosts, ",;@ \t\n\\"); | ||
1281 | for (i = 1; i < fsloc->locations_count; i++) { | ||
1282 | seq_putc(m, ';'); | ||
1283 | seq_escape(m, fsloc->locations[i].path, ",;@ \t\n\\"); | ||
1284 | seq_putc(m, '@'); | ||
1285 | seq_escape(m, fsloc->locations[i].hosts, ",;@ \t\n\\"); | ||
1286 | } | ||
1287 | } | ||
1175 | } | 1288 | } |
1176 | 1289 | ||
1177 | static int e_show(struct seq_file *m, void *p) | 1290 | static int e_show(struct seq_file *m, void *p) |
1178 | { | 1291 | { |
1179 | struct cache_head *cp = p; | 1292 | struct cache_head *cp = p; |
1180 | struct svc_export *exp = container_of(cp, struct svc_export, h); | 1293 | struct svc_export *exp = container_of(cp, struct svc_export, h); |
1181 | svc_client *clp; | ||
1182 | 1294 | ||
1183 | if (p == (void *)1) { | 1295 | if (p == SEQ_START_TOKEN) { |
1184 | seq_puts(m, "# Version 1.1\n"); | 1296 | seq_puts(m, "# Version 1.1\n"); |
1185 | seq_puts(m, "# Path Client(Flags) # IPs\n"); | 1297 | seq_puts(m, "# Path Client(Flags) # IPs\n"); |
1186 | return 0; | 1298 | return 0; |
1187 | } | 1299 | } |
1188 | 1300 | ||
1189 | clp = exp->ex_client; | ||
1190 | cache_get(&exp->h); | 1301 | cache_get(&exp->h); |
1191 | if (cache_check(&svc_export_cache, &exp->h, NULL)) | 1302 | if (cache_check(&svc_export_cache, &exp->h, NULL)) |
1192 | return 0; | 1303 | return 0; |
diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c index fc95c4df6693..9187755661df 100644 --- a/fs/nfsd/nfs2acl.c +++ b/fs/nfsd/nfs2acl.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * linux/fs/nfsd/nfsacl.c | 2 | * linux/fs/nfsd/nfs2acl.c |
3 | * | 3 | * |
4 | * Process version 2 NFSACL requests. | 4 | * Process version 2 NFSACL requests. |
5 | * | 5 | * |
@@ -241,7 +241,7 @@ static int nfsaclsvc_encode_getaclres(struct svc_rqst *rqstp, u32 *p, | |||
241 | 241 | ||
242 | rqstp->rq_res.page_len = w; | 242 | rqstp->rq_res.page_len = w; |
243 | while (w > 0) { | 243 | while (w > 0) { |
244 | if (!svc_take_res_page(rqstp)) | 244 | if (!rqstp->rq_respages[rqstp->rq_resused++]) |
245 | return 0; | 245 | return 0; |
246 | w -= PAGE_SIZE; | 246 | w -= PAGE_SIZE; |
247 | } | 247 | } |
@@ -333,4 +333,5 @@ struct svc_version nfsd_acl_version2 = { | |||
333 | .vs_proc = nfsd_acl_procedures2, | 333 | .vs_proc = nfsd_acl_procedures2, |
334 | .vs_dispatch = nfsd_dispatch, | 334 | .vs_dispatch = nfsd_dispatch, |
335 | .vs_xdrsize = NFS3_SVC_XDRSIZE, | 335 | .vs_xdrsize = NFS3_SVC_XDRSIZE, |
336 | .vs_hidden = 1, | ||
336 | }; | 337 | }; |
diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c index 16e10c170aed..d4bdc00c1169 100644 --- a/fs/nfsd/nfs3acl.c +++ b/fs/nfsd/nfs3acl.c | |||
@@ -185,7 +185,7 @@ static int nfs3svc_encode_getaclres(struct svc_rqst *rqstp, u32 *p, | |||
185 | 185 | ||
186 | rqstp->rq_res.page_len = w; | 186 | rqstp->rq_res.page_len = w; |
187 | while (w > 0) { | 187 | while (w > 0) { |
188 | if (!svc_take_res_page(rqstp)) | 188 | if (!rqstp->rq_respages[rqstp->rq_resused++]) |
189 | return 0; | 189 | return 0; |
190 | w -= PAGE_SIZE; | 190 | w -= PAGE_SIZE; |
191 | } | 191 | } |
@@ -263,5 +263,6 @@ struct svc_version nfsd_acl_version3 = { | |||
263 | .vs_proc = nfsd_acl_procedures3, | 263 | .vs_proc = nfsd_acl_procedures3, |
264 | .vs_dispatch = nfsd_dispatch, | 264 | .vs_dispatch = nfsd_dispatch, |
265 | .vs_xdrsize = NFS3_SVC_XDRSIZE, | 265 | .vs_xdrsize = NFS3_SVC_XDRSIZE, |
266 | .vs_hidden = 1, | ||
266 | }; | 267 | }; |
267 | 268 | ||
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c index f61142afea44..a5ebc7dbb384 100644 --- a/fs/nfsd/nfs3proc.c +++ b/fs/nfsd/nfs3proc.c | |||
@@ -160,6 +160,7 @@ nfsd3_proc_read(struct svc_rqst *rqstp, struct nfsd3_readargs *argp, | |||
160 | struct nfsd3_readres *resp) | 160 | struct nfsd3_readres *resp) |
161 | { | 161 | { |
162 | int nfserr; | 162 | int nfserr; |
163 | u32 max_blocksize = svc_max_payload(rqstp); | ||
163 | 164 | ||
164 | dprintk("nfsd: READ(3) %s %lu bytes at %lu\n", | 165 | dprintk("nfsd: READ(3) %s %lu bytes at %lu\n", |
165 | SVCFH_fmt(&argp->fh), | 166 | SVCFH_fmt(&argp->fh), |
@@ -172,15 +173,15 @@ nfsd3_proc_read(struct svc_rqst *rqstp, struct nfsd3_readargs *argp, | |||
172 | */ | 173 | */ |
173 | 174 | ||
174 | resp->count = argp->count; | 175 | resp->count = argp->count; |
175 | if (NFSSVC_MAXBLKSIZE < resp->count) | 176 | if (max_blocksize < resp->count) |
176 | resp->count = NFSSVC_MAXBLKSIZE; | 177 | resp->count = max_blocksize; |
177 | 178 | ||
178 | svc_reserve(rqstp, ((1 + NFS3_POST_OP_ATTR_WORDS + 3)<<2) + resp->count +4); | 179 | svc_reserve(rqstp, ((1 + NFS3_POST_OP_ATTR_WORDS + 3)<<2) + resp->count +4); |
179 | 180 | ||
180 | fh_copy(&resp->fh, &argp->fh); | 181 | fh_copy(&resp->fh, &argp->fh); |
181 | nfserr = nfsd_read(rqstp, &resp->fh, NULL, | 182 | nfserr = nfsd_read(rqstp, &resp->fh, NULL, |
182 | argp->offset, | 183 | argp->offset, |
183 | argp->vec, argp->vlen, | 184 | rqstp->rq_vec, argp->vlen, |
184 | &resp->count); | 185 | &resp->count); |
185 | if (nfserr == 0) { | 186 | if (nfserr == 0) { |
186 | struct inode *inode = resp->fh.fh_dentry->d_inode; | 187 | struct inode *inode = resp->fh.fh_dentry->d_inode; |
@@ -210,7 +211,7 @@ nfsd3_proc_write(struct svc_rqst *rqstp, struct nfsd3_writeargs *argp, | |||
210 | resp->committed = argp->stable; | 211 | resp->committed = argp->stable; |
211 | nfserr = nfsd_write(rqstp, &resp->fh, NULL, | 212 | nfserr = nfsd_write(rqstp, &resp->fh, NULL, |
212 | argp->offset, | 213 | argp->offset, |
213 | argp->vec, argp->vlen, | 214 | rqstp->rq_vec, argp->vlen, |
214 | argp->len, | 215 | argp->len, |
215 | &resp->committed); | 216 | &resp->committed); |
216 | resp->count = argp->count; | 217 | resp->count = argp->count; |
@@ -538,15 +539,16 @@ nfsd3_proc_fsinfo(struct svc_rqst * rqstp, struct nfsd_fhandle *argp, | |||
538 | struct nfsd3_fsinfores *resp) | 539 | struct nfsd3_fsinfores *resp) |
539 | { | 540 | { |
540 | int nfserr; | 541 | int nfserr; |
542 | u32 max_blocksize = svc_max_payload(rqstp); | ||
541 | 543 | ||
542 | dprintk("nfsd: FSINFO(3) %s\n", | 544 | dprintk("nfsd: FSINFO(3) %s\n", |
543 | SVCFH_fmt(&argp->fh)); | 545 | SVCFH_fmt(&argp->fh)); |
544 | 546 | ||
545 | resp->f_rtmax = NFSSVC_MAXBLKSIZE; | 547 | resp->f_rtmax = max_blocksize; |
546 | resp->f_rtpref = NFSSVC_MAXBLKSIZE; | 548 | resp->f_rtpref = max_blocksize; |
547 | resp->f_rtmult = PAGE_SIZE; | 549 | resp->f_rtmult = PAGE_SIZE; |
548 | resp->f_wtmax = NFSSVC_MAXBLKSIZE; | 550 | resp->f_wtmax = max_blocksize; |
549 | resp->f_wtpref = NFSSVC_MAXBLKSIZE; | 551 | resp->f_wtpref = max_blocksize; |
550 | resp->f_wtmult = PAGE_SIZE; | 552 | resp->f_wtmult = PAGE_SIZE; |
551 | resp->f_dtpref = PAGE_SIZE; | 553 | resp->f_dtpref = PAGE_SIZE; |
552 | resp->f_maxfilesize = ~(u32) 0; | 554 | resp->f_maxfilesize = ~(u32) 0; |
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c index 243d94b9653a..247d518248bf 100644 --- a/fs/nfsd/nfs3xdr.c +++ b/fs/nfsd/nfs3xdr.c | |||
@@ -330,6 +330,7 @@ nfs3svc_decode_readargs(struct svc_rqst *rqstp, u32 *p, | |||
330 | { | 330 | { |
331 | unsigned int len; | 331 | unsigned int len; |
332 | int v,pn; | 332 | int v,pn; |
333 | u32 max_blocksize = svc_max_payload(rqstp); | ||
333 | 334 | ||
334 | if (!(p = decode_fh(p, &args->fh)) | 335 | if (!(p = decode_fh(p, &args->fh)) |
335 | || !(p = xdr_decode_hyper(p, &args->offset))) | 336 | || !(p = xdr_decode_hyper(p, &args->offset))) |
@@ -337,17 +338,16 @@ nfs3svc_decode_readargs(struct svc_rqst *rqstp, u32 *p, | |||
337 | 338 | ||
338 | len = args->count = ntohl(*p++); | 339 | len = args->count = ntohl(*p++); |
339 | 340 | ||
340 | if (len > NFSSVC_MAXBLKSIZE) | 341 | if (len > max_blocksize) |
341 | len = NFSSVC_MAXBLKSIZE; | 342 | len = max_blocksize; |
342 | 343 | ||
343 | /* set up the kvec */ | 344 | /* set up the kvec */ |
344 | v=0; | 345 | v=0; |
345 | while (len > 0) { | 346 | while (len > 0) { |
346 | pn = rqstp->rq_resused; | 347 | pn = rqstp->rq_resused++; |
347 | svc_take_page(rqstp); | 348 | rqstp->rq_vec[v].iov_base = page_address(rqstp->rq_respages[pn]); |
348 | args->vec[v].iov_base = page_address(rqstp->rq_respages[pn]); | 349 | rqstp->rq_vec[v].iov_len = len < PAGE_SIZE? len : PAGE_SIZE; |
349 | args->vec[v].iov_len = len < PAGE_SIZE? len : PAGE_SIZE; | 350 | len -= rqstp->rq_vec[v].iov_len; |
350 | len -= args->vec[v].iov_len; | ||
351 | v++; | 351 | v++; |
352 | } | 352 | } |
353 | args->vlen = v; | 353 | args->vlen = v; |
@@ -359,6 +359,7 @@ nfs3svc_decode_writeargs(struct svc_rqst *rqstp, u32 *p, | |||
359 | struct nfsd3_writeargs *args) | 359 | struct nfsd3_writeargs *args) |
360 | { | 360 | { |
361 | unsigned int len, v, hdr; | 361 | unsigned int len, v, hdr; |
362 | u32 max_blocksize = svc_max_payload(rqstp); | ||
362 | 363 | ||
363 | if (!(p = decode_fh(p, &args->fh)) | 364 | if (!(p = decode_fh(p, &args->fh)) |
364 | || !(p = xdr_decode_hyper(p, &args->offset))) | 365 | || !(p = xdr_decode_hyper(p, &args->offset))) |
@@ -373,22 +374,22 @@ nfs3svc_decode_writeargs(struct svc_rqst *rqstp, u32 *p, | |||
373 | rqstp->rq_arg.len - hdr < len) | 374 | rqstp->rq_arg.len - hdr < len) |
374 | return 0; | 375 | return 0; |
375 | 376 | ||
376 | args->vec[0].iov_base = (void*)p; | 377 | rqstp->rq_vec[0].iov_base = (void*)p; |
377 | args->vec[0].iov_len = rqstp->rq_arg.head[0].iov_len - hdr; | 378 | rqstp->rq_vec[0].iov_len = rqstp->rq_arg.head[0].iov_len - hdr; |
378 | 379 | ||
379 | if (len > NFSSVC_MAXBLKSIZE) | 380 | if (len > max_blocksize) |
380 | len = NFSSVC_MAXBLKSIZE; | 381 | len = max_blocksize; |
381 | v= 0; | 382 | v= 0; |
382 | while (len > args->vec[v].iov_len) { | 383 | while (len > rqstp->rq_vec[v].iov_len) { |
383 | len -= args->vec[v].iov_len; | 384 | len -= rqstp->rq_vec[v].iov_len; |
384 | v++; | 385 | v++; |
385 | args->vec[v].iov_base = page_address(rqstp->rq_argpages[v]); | 386 | rqstp->rq_vec[v].iov_base = page_address(rqstp->rq_pages[v]); |
386 | args->vec[v].iov_len = PAGE_SIZE; | 387 | rqstp->rq_vec[v].iov_len = PAGE_SIZE; |
387 | } | 388 | } |
388 | args->vec[v].iov_len = len; | 389 | rqstp->rq_vec[v].iov_len = len; |
389 | args->vlen = v+1; | 390 | args->vlen = v+1; |
390 | 391 | ||
391 | return args->count == args->len && args->vec[0].iov_len > 0; | 392 | return args->count == args->len && rqstp->rq_vec[0].iov_len > 0; |
392 | } | 393 | } |
393 | 394 | ||
394 | int | 395 | int |
@@ -446,11 +447,11 @@ nfs3svc_decode_symlinkargs(struct svc_rqst *rqstp, u32 *p, | |||
446 | * This page appears in the rq_res.pages list, but as pages_len is always | 447 | * This page appears in the rq_res.pages list, but as pages_len is always |
447 | * 0, it won't get in the way | 448 | * 0, it won't get in the way |
448 | */ | 449 | */ |
449 | svc_take_page(rqstp); | ||
450 | len = ntohl(*p++); | 450 | len = ntohl(*p++); |
451 | if (len == 0 || len > NFS3_MAXPATHLEN || len >= PAGE_SIZE) | 451 | if (len == 0 || len > NFS3_MAXPATHLEN || len >= PAGE_SIZE) |
452 | return 0; | 452 | return 0; |
453 | args->tname = new = page_address(rqstp->rq_respages[rqstp->rq_resused-1]); | 453 | args->tname = new = |
454 | page_address(rqstp->rq_respages[rqstp->rq_resused++]); | ||
454 | args->tlen = len; | 455 | args->tlen = len; |
455 | /* first copy and check from the first page */ | 456 | /* first copy and check from the first page */ |
456 | old = (char*)p; | 457 | old = (char*)p; |
@@ -522,8 +523,8 @@ nfs3svc_decode_readlinkargs(struct svc_rqst *rqstp, u32 *p, | |||
522 | { | 523 | { |
523 | if (!(p = decode_fh(p, &args->fh))) | 524 | if (!(p = decode_fh(p, &args->fh))) |
524 | return 0; | 525 | return 0; |
525 | svc_take_page(rqstp); | 526 | args->buffer = |
526 | args->buffer = page_address(rqstp->rq_respages[rqstp->rq_resused-1]); | 527 | page_address(rqstp->rq_respages[rqstp->rq_resused++]); |
527 | 528 | ||
528 | return xdr_argsize_check(rqstp, p); | 529 | return xdr_argsize_check(rqstp, p); |
529 | } | 530 | } |
@@ -554,8 +555,8 @@ nfs3svc_decode_readdirargs(struct svc_rqst *rqstp, u32 *p, | |||
554 | if (args->count > PAGE_SIZE) | 555 | if (args->count > PAGE_SIZE) |
555 | args->count = PAGE_SIZE; | 556 | args->count = PAGE_SIZE; |
556 | 557 | ||
557 | svc_take_page(rqstp); | 558 | args->buffer = |
558 | args->buffer = page_address(rqstp->rq_respages[rqstp->rq_resused-1]); | 559 | page_address(rqstp->rq_respages[rqstp->rq_resused++]); |
559 | 560 | ||
560 | return xdr_argsize_check(rqstp, p); | 561 | return xdr_argsize_check(rqstp, p); |
561 | } | 562 | } |
@@ -565,6 +566,7 @@ nfs3svc_decode_readdirplusargs(struct svc_rqst *rqstp, u32 *p, | |||
565 | struct nfsd3_readdirargs *args) | 566 | struct nfsd3_readdirargs *args) |
566 | { | 567 | { |
567 | int len, pn; | 568 | int len, pn; |
569 | u32 max_blocksize = svc_max_payload(rqstp); | ||
568 | 570 | ||
569 | if (!(p = decode_fh(p, &args->fh))) | 571 | if (!(p = decode_fh(p, &args->fh))) |
570 | return 0; | 572 | return 0; |
@@ -573,13 +575,12 @@ nfs3svc_decode_readdirplusargs(struct svc_rqst *rqstp, u32 *p, | |||
573 | args->dircount = ntohl(*p++); | 575 | args->dircount = ntohl(*p++); |
574 | args->count = ntohl(*p++); | 576 | args->count = ntohl(*p++); |
575 | 577 | ||
576 | len = (args->count > NFSSVC_MAXBLKSIZE) ? NFSSVC_MAXBLKSIZE : | 578 | len = (args->count > max_blocksize) ? max_blocksize : |
577 | args->count; | 579 | args->count; |
578 | args->count = len; | 580 | args->count = len; |
579 | 581 | ||
580 | while (len > 0) { | 582 | while (len > 0) { |
581 | pn = rqstp->rq_resused; | 583 | pn = rqstp->rq_resused++; |
582 | svc_take_page(rqstp); | ||
583 | if (!args->buffer) | 584 | if (!args->buffer) |
584 | args->buffer = page_address(rqstp->rq_respages[pn]); | 585 | args->buffer = page_address(rqstp->rq_respages[pn]); |
585 | len -= PAGE_SIZE; | 586 | len -= PAGE_SIZE; |
@@ -668,7 +669,6 @@ nfs3svc_encode_readlinkres(struct svc_rqst *rqstp, u32 *p, | |||
668 | rqstp->rq_res.page_len = resp->len; | 669 | rqstp->rq_res.page_len = resp->len; |
669 | if (resp->len & 3) { | 670 | if (resp->len & 3) { |
670 | /* need to pad the tail */ | 671 | /* need to pad the tail */ |
671 | rqstp->rq_restailpage = 0; | ||
672 | rqstp->rq_res.tail[0].iov_base = p; | 672 | rqstp->rq_res.tail[0].iov_base = p; |
673 | *p = 0; | 673 | *p = 0; |
674 | rqstp->rq_res.tail[0].iov_len = 4 - (resp->len&3); | 674 | rqstp->rq_res.tail[0].iov_len = 4 - (resp->len&3); |
@@ -693,7 +693,6 @@ nfs3svc_encode_readres(struct svc_rqst *rqstp, u32 *p, | |||
693 | rqstp->rq_res.page_len = resp->count; | 693 | rqstp->rq_res.page_len = resp->count; |
694 | if (resp->count & 3) { | 694 | if (resp->count & 3) { |
695 | /* need to pad the tail */ | 695 | /* need to pad the tail */ |
696 | rqstp->rq_restailpage = 0; | ||
697 | rqstp->rq_res.tail[0].iov_base = p; | 696 | rqstp->rq_res.tail[0].iov_base = p; |
698 | *p = 0; | 697 | *p = 0; |
699 | rqstp->rq_res.tail[0].iov_len = 4 - (resp->count & 3); | 698 | rqstp->rq_res.tail[0].iov_len = 4 - (resp->count & 3); |
@@ -768,7 +767,6 @@ nfs3svc_encode_readdirres(struct svc_rqst *rqstp, u32 *p, | |||
768 | rqstp->rq_res.page_len = (resp->count) << 2; | 767 | rqstp->rq_res.page_len = (resp->count) << 2; |
769 | 768 | ||
770 | /* add the 'tail' to the end of the 'head' page - page 0. */ | 769 | /* add the 'tail' to the end of the 'head' page - page 0. */ |
771 | rqstp->rq_restailpage = 0; | ||
772 | rqstp->rq_res.tail[0].iov_base = p; | 770 | rqstp->rq_res.tail[0].iov_base = p; |
773 | *p++ = 0; /* no more entries */ | 771 | *p++ = 0; /* no more entries */ |
774 | *p++ = htonl(resp->common.err == nfserr_eof); | 772 | *p++ = htonl(resp->common.err == nfserr_eof); |
diff --git a/fs/nfsd/nfs4acl.c b/fs/nfsd/nfs4acl.c index edb107e61b91..5d94555cdc83 100644 --- a/fs/nfsd/nfs4acl.c +++ b/fs/nfsd/nfs4acl.c | |||
@@ -63,6 +63,8 @@ | |||
63 | #define NFS4_INHERITANCE_FLAGS (NFS4_ACE_FILE_INHERIT_ACE \ | 63 | #define NFS4_INHERITANCE_FLAGS (NFS4_ACE_FILE_INHERIT_ACE \ |
64 | | NFS4_ACE_DIRECTORY_INHERIT_ACE | NFS4_ACE_INHERIT_ONLY_ACE) | 64 | | NFS4_ACE_DIRECTORY_INHERIT_ACE | NFS4_ACE_INHERIT_ONLY_ACE) |
65 | 65 | ||
66 | #define NFS4_SUPPORTED_FLAGS (NFS4_INHERITANCE_FLAGS | NFS4_ACE_IDENTIFIER_GROUP) | ||
67 | |||
66 | #define MASK_EQUAL(mask1, mask2) \ | 68 | #define MASK_EQUAL(mask1, mask2) \ |
67 | ( ((mask1) & NFS4_ACE_MASK_ALL) == ((mask2) & NFS4_ACE_MASK_ALL) ) | 69 | ( ((mask1) & NFS4_ACE_MASK_ALL) == ((mask2) & NFS4_ACE_MASK_ALL) ) |
68 | 70 | ||
@@ -96,24 +98,26 @@ deny_mask(u32 allow_mask, unsigned int flags) | |||
96 | /* XXX: modify functions to return NFS errors; they're only ever | 98 | /* XXX: modify functions to return NFS errors; they're only ever |
97 | * used by nfs code, after all.... */ | 99 | * used by nfs code, after all.... */ |
98 | 100 | ||
99 | static int | 101 | /* We only map from NFSv4 to POSIX ACLs when setting ACLs, when we err on the |
100 | mode_from_nfs4(u32 perm, unsigned short *mode, unsigned int flags) | 102 | * side of being more restrictive, so the mode bit mapping below is |
103 | * pessimistic. An optimistic version would be needed to handle DENY's, | ||
104 | * but we espect to coalesce all ALLOWs and DENYs before mapping to mode | ||
105 | * bits. */ | ||
106 | |||
107 | static void | ||
108 | low_mode_from_nfs4(u32 perm, unsigned short *mode, unsigned int flags) | ||
101 | { | 109 | { |
102 | u32 ignore = 0; | 110 | u32 write_mode = NFS4_WRITE_MODE; |
103 | 111 | ||
104 | if (!(flags & NFS4_ACL_DIR)) | 112 | if (flags & NFS4_ACL_DIR) |
105 | ignore |= NFS4_ACE_DELETE_CHILD; /* ignore it */ | 113 | write_mode |= NFS4_ACE_DELETE_CHILD; |
106 | perm |= ignore; | ||
107 | *mode = 0; | 114 | *mode = 0; |
108 | if ((perm & NFS4_READ_MODE) == NFS4_READ_MODE) | 115 | if ((perm & NFS4_READ_MODE) == NFS4_READ_MODE) |
109 | *mode |= ACL_READ; | 116 | *mode |= ACL_READ; |
110 | if ((perm & NFS4_WRITE_MODE) == NFS4_WRITE_MODE) | 117 | if ((perm & write_mode) == write_mode) |
111 | *mode |= ACL_WRITE; | 118 | *mode |= ACL_WRITE; |
112 | if ((perm & NFS4_EXECUTE_MODE) == NFS4_EXECUTE_MODE) | 119 | if ((perm & NFS4_EXECUTE_MODE) == NFS4_EXECUTE_MODE) |
113 | *mode |= ACL_EXECUTE; | 120 | *mode |= ACL_EXECUTE; |
114 | if (!MASK_EQUAL(perm, ignore|mask_from_posix(*mode, flags))) | ||
115 | return -EINVAL; | ||
116 | return 0; | ||
117 | } | 121 | } |
118 | 122 | ||
119 | struct ace_container { | 123 | struct ace_container { |
@@ -338,38 +342,6 @@ sort_pacl(struct posix_acl *pacl) | |||
338 | return; | 342 | return; |
339 | } | 343 | } |
340 | 344 | ||
341 | static int | ||
342 | write_pace(struct nfs4_ace *ace, struct posix_acl *pacl, | ||
343 | struct posix_acl_entry **pace, short tag, unsigned int flags) | ||
344 | { | ||
345 | struct posix_acl_entry *this = *pace; | ||
346 | |||
347 | if (*pace == pacl->a_entries + pacl->a_count) | ||
348 | return -EINVAL; /* fell off the end */ | ||
349 | (*pace)++; | ||
350 | this->e_tag = tag; | ||
351 | if (tag == ACL_USER_OBJ) | ||
352 | flags |= NFS4_ACL_OWNER; | ||
353 | if (mode_from_nfs4(ace->access_mask, &this->e_perm, flags)) | ||
354 | return -EINVAL; | ||
355 | this->e_id = (tag == ACL_USER || tag == ACL_GROUP ? | ||
356 | ace->who : ACL_UNDEFINED_ID); | ||
357 | return 0; | ||
358 | } | ||
359 | |||
360 | static struct nfs4_ace * | ||
361 | get_next_v4_ace(struct list_head **p, struct list_head *head) | ||
362 | { | ||
363 | struct nfs4_ace *ace; | ||
364 | |||
365 | *p = (*p)->next; | ||
366 | if (*p == head) | ||
367 | return NULL; | ||
368 | ace = list_entry(*p, struct nfs4_ace, l_ace); | ||
369 | |||
370 | return ace; | ||
371 | } | ||
372 | |||
373 | int | 345 | int |
374 | nfs4_acl_nfsv4_to_posix(struct nfs4_acl *acl, struct posix_acl **pacl, | 346 | nfs4_acl_nfsv4_to_posix(struct nfs4_acl *acl, struct posix_acl **pacl, |
375 | struct posix_acl **dpacl, unsigned int flags) | 347 | struct posix_acl **dpacl, unsigned int flags) |
@@ -385,42 +357,23 @@ nfs4_acl_nfsv4_to_posix(struct nfs4_acl *acl, struct posix_acl **pacl, | |||
385 | goto out; | 357 | goto out; |
386 | 358 | ||
387 | error = nfs4_acl_split(acl, dacl); | 359 | error = nfs4_acl_split(acl, dacl); |
388 | if (error < 0) | 360 | if (error) |
389 | goto out_acl; | 361 | goto out_acl; |
390 | 362 | ||
391 | if (pacl != NULL) { | 363 | *pacl = _nfsv4_to_posix_one(acl, flags); |
392 | if (acl->naces == 0) { | 364 | if (IS_ERR(*pacl)) { |
393 | error = -ENODATA; | 365 | error = PTR_ERR(*pacl); |
394 | goto try_dpacl; | 366 | *pacl = NULL; |
395 | } | 367 | goto out_acl; |
396 | |||
397 | *pacl = _nfsv4_to_posix_one(acl, flags); | ||
398 | if (IS_ERR(*pacl)) { | ||
399 | error = PTR_ERR(*pacl); | ||
400 | *pacl = NULL; | ||
401 | goto out_acl; | ||
402 | } | ||
403 | } | 368 | } |
404 | 369 | ||
405 | try_dpacl: | 370 | *dpacl = _nfsv4_to_posix_one(dacl, flags); |
406 | if (dpacl != NULL) { | 371 | if (IS_ERR(*dpacl)) { |
407 | if (dacl->naces == 0) { | 372 | error = PTR_ERR(*dpacl); |
408 | if (pacl == NULL || *pacl == NULL) | 373 | *dpacl = NULL; |
409 | error = -ENODATA; | ||
410 | goto out_acl; | ||
411 | } | ||
412 | |||
413 | error = 0; | ||
414 | *dpacl = _nfsv4_to_posix_one(dacl, flags); | ||
415 | if (IS_ERR(*dpacl)) { | ||
416 | error = PTR_ERR(*dpacl); | ||
417 | *dpacl = NULL; | ||
418 | goto out_acl; | ||
419 | } | ||
420 | } | 374 | } |
421 | |||
422 | out_acl: | 375 | out_acl: |
423 | if (error && pacl) { | 376 | if (error) { |
424 | posix_acl_release(*pacl); | 377 | posix_acl_release(*pacl); |
425 | *pacl = NULL; | 378 | *pacl = NULL; |
426 | } | 379 | } |
@@ -429,349 +382,311 @@ out: | |||
429 | return error; | 382 | return error; |
430 | } | 383 | } |
431 | 384 | ||
385 | /* | ||
386 | * While processing the NFSv4 ACE, this maintains bitmasks representing | ||
387 | * which permission bits have been allowed and which denied to a given | ||
388 | * entity: */ | ||
389 | struct posix_ace_state { | ||
390 | u32 allow; | ||
391 | u32 deny; | ||
392 | }; | ||
393 | |||
394 | struct posix_user_ace_state { | ||
395 | uid_t uid; | ||
396 | struct posix_ace_state perms; | ||
397 | }; | ||
398 | |||
399 | struct posix_ace_state_array { | ||
400 | int n; | ||
401 | struct posix_user_ace_state aces[]; | ||
402 | }; | ||
403 | |||
404 | /* | ||
405 | * While processing the NFSv4 ACE, this maintains the partial permissions | ||
406 | * calculated so far: */ | ||
407 | |||
408 | struct posix_acl_state { | ||
409 | struct posix_ace_state owner; | ||
410 | struct posix_ace_state group; | ||
411 | struct posix_ace_state other; | ||
412 | struct posix_ace_state everyone; | ||
413 | struct posix_ace_state mask; /* Deny unused in this case */ | ||
414 | struct posix_ace_state_array *users; | ||
415 | struct posix_ace_state_array *groups; | ||
416 | }; | ||
417 | |||
432 | static int | 418 | static int |
433 | same_who(struct nfs4_ace *a, struct nfs4_ace *b) | 419 | init_state(struct posix_acl_state *state, int cnt) |
434 | { | 420 | { |
435 | return a->whotype == b->whotype && | 421 | int alloc; |
436 | (a->whotype != NFS4_ACL_WHO_NAMED || a->who == b->who); | 422 | |
423 | memset(state, 0, sizeof(struct posix_acl_state)); | ||
424 | /* | ||
425 | * In the worst case, each individual acl could be for a distinct | ||
426 | * named user or group, but we don't no which, so we allocate | ||
427 | * enough space for either: | ||
428 | */ | ||
429 | alloc = sizeof(struct posix_ace_state_array) | ||
430 | + cnt*sizeof(struct posix_ace_state); | ||
431 | state->users = kzalloc(alloc, GFP_KERNEL); | ||
432 | if (!state->users) | ||
433 | return -ENOMEM; | ||
434 | state->groups = kzalloc(alloc, GFP_KERNEL); | ||
435 | if (!state->groups) { | ||
436 | kfree(state->users); | ||
437 | return -ENOMEM; | ||
438 | } | ||
439 | return 0; | ||
437 | } | 440 | } |
438 | 441 | ||
439 | static int | 442 | static void |
440 | complementary_ace_pair(struct nfs4_ace *allow, struct nfs4_ace *deny, | 443 | free_state(struct posix_acl_state *state) { |
441 | unsigned int flags) | 444 | kfree(state->users); |
442 | { | 445 | kfree(state->groups); |
443 | int ignore = 0; | ||
444 | if (!(flags & NFS4_ACL_DIR)) | ||
445 | ignore |= NFS4_ACE_DELETE_CHILD; | ||
446 | return MASK_EQUAL(ignore|deny_mask(allow->access_mask, flags), | ||
447 | ignore|deny->access_mask) && | ||
448 | allow->type == NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE && | ||
449 | deny->type == NFS4_ACE_ACCESS_DENIED_ACE_TYPE && | ||
450 | allow->flag == deny->flag && | ||
451 | same_who(allow, deny); | ||
452 | } | 446 | } |
453 | 447 | ||
454 | static inline int | 448 | static inline void add_to_mask(struct posix_acl_state *state, struct posix_ace_state *astate) |
455 | user_obj_from_v4(struct nfs4_acl *n4acl, struct list_head **p, | ||
456 | struct posix_acl *pacl, struct posix_acl_entry **pace, | ||
457 | unsigned int flags) | ||
458 | { | 449 | { |
459 | int error = -EINVAL; | 450 | state->mask.allow |= astate->allow; |
460 | struct nfs4_ace *ace, *ace2; | ||
461 | |||
462 | ace = get_next_v4_ace(p, &n4acl->ace_head); | ||
463 | if (ace == NULL) | ||
464 | goto out; | ||
465 | if (ace2type(ace) != ACL_USER_OBJ) | ||
466 | goto out; | ||
467 | error = write_pace(ace, pacl, pace, ACL_USER_OBJ, flags); | ||
468 | if (error < 0) | ||
469 | goto out; | ||
470 | error = -EINVAL; | ||
471 | ace2 = get_next_v4_ace(p, &n4acl->ace_head); | ||
472 | if (ace2 == NULL) | ||
473 | goto out; | ||
474 | if (!complementary_ace_pair(ace, ace2, flags)) | ||
475 | goto out; | ||
476 | error = 0; | ||
477 | out: | ||
478 | return error; | ||
479 | } | 451 | } |
480 | 452 | ||
481 | static inline int | 453 | /* |
482 | users_from_v4(struct nfs4_acl *n4acl, struct list_head **p, | 454 | * Certain bits (SYNCHRONIZE, DELETE, WRITE_OWNER, READ/WRITE_NAMED_ATTRS, |
483 | struct nfs4_ace **mask_ace, | 455 | * READ_ATTRIBUTES, READ_ACL) are currently unenforceable and don't translate |
484 | struct posix_acl *pacl, struct posix_acl_entry **pace, | 456 | * to traditional read/write/execute permissions. |
485 | unsigned int flags) | 457 | * |
486 | { | 458 | * It's problematic to reject acls that use certain mode bits, because it |
487 | int error = -EINVAL; | 459 | * places the burden on users to learn the rules about which bits one |
488 | struct nfs4_ace *ace, *ace2; | 460 | * particular server sets, without giving the user a lot of help--we return an |
461 | * error that could mean any number of different things. To make matters | ||
462 | * worse, the problematic bits might be introduced by some application that's | ||
463 | * automatically mapping from some other acl model. | ||
464 | * | ||
465 | * So wherever possible we accept anything, possibly erring on the side of | ||
466 | * denying more permissions than necessary. | ||
467 | * | ||
468 | * However we do reject *explicit* DENY's of a few bits representing | ||
469 | * permissions we could never deny: | ||
470 | */ | ||
489 | 471 | ||
490 | ace = get_next_v4_ace(p, &n4acl->ace_head); | 472 | static inline int check_deny(u32 mask, int isowner) |
491 | if (ace == NULL) | 473 | { |
492 | goto out; | 474 | if (mask & (NFS4_ACE_READ_ATTRIBUTES | NFS4_ACE_READ_ACL)) |
493 | while (ace2type(ace) == ACL_USER) { | 475 | return -EINVAL; |
494 | if (ace->type != NFS4_ACE_ACCESS_DENIED_ACE_TYPE) | 476 | if (!isowner) |
495 | goto out; | 477 | return 0; |
496 | if (*mask_ace && | 478 | if (mask & (NFS4_ACE_WRITE_ATTRIBUTES | NFS4_ACE_WRITE_ACL)) |
497 | !MASK_EQUAL(ace->access_mask, (*mask_ace)->access_mask)) | 479 | return -EINVAL; |
498 | goto out; | 480 | return 0; |
499 | *mask_ace = ace; | ||
500 | ace = get_next_v4_ace(p, &n4acl->ace_head); | ||
501 | if (ace == NULL) | ||
502 | goto out; | ||
503 | if (ace->type != NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE) | ||
504 | goto out; | ||
505 | error = write_pace(ace, pacl, pace, ACL_USER, flags); | ||
506 | if (error < 0) | ||
507 | goto out; | ||
508 | error = -EINVAL; | ||
509 | ace2 = get_next_v4_ace(p, &n4acl->ace_head); | ||
510 | if (ace2 == NULL) | ||
511 | goto out; | ||
512 | if (!complementary_ace_pair(ace, ace2, flags)) | ||
513 | goto out; | ||
514 | if ((*mask_ace)->flag != ace2->flag || | ||
515 | !same_who(*mask_ace, ace2)) | ||
516 | goto out; | ||
517 | ace = get_next_v4_ace(p, &n4acl->ace_head); | ||
518 | if (ace == NULL) | ||
519 | goto out; | ||
520 | } | ||
521 | error = 0; | ||
522 | out: | ||
523 | return error; | ||
524 | } | 481 | } |
525 | 482 | ||
526 | static inline int | 483 | static struct posix_acl * |
527 | group_obj_and_groups_from_v4(struct nfs4_acl *n4acl, struct list_head **p, | 484 | posix_state_to_acl(struct posix_acl_state *state, unsigned int flags) |
528 | struct nfs4_ace **mask_ace, | ||
529 | struct posix_acl *pacl, struct posix_acl_entry **pace, | ||
530 | unsigned int flags) | ||
531 | { | 485 | { |
532 | int error = -EINVAL; | 486 | struct posix_acl_entry *pace; |
533 | struct nfs4_ace *ace, *ace2; | 487 | struct posix_acl *pacl; |
534 | struct ace_container *ac; | 488 | int nace; |
535 | struct list_head group_l; | 489 | int i, error = 0; |
536 | |||
537 | INIT_LIST_HEAD(&group_l); | ||
538 | ace = list_entry(*p, struct nfs4_ace, l_ace); | ||
539 | |||
540 | /* group owner (mask and allow aces) */ | ||
541 | 490 | ||
542 | if (pacl->a_count != 3) { | 491 | nace = 4 + state->users->n + state->groups->n; |
543 | /* then the group owner should be preceded by mask */ | 492 | pacl = posix_acl_alloc(nace, GFP_KERNEL); |
544 | if (ace->type != NFS4_ACE_ACCESS_DENIED_ACE_TYPE) | 493 | if (!pacl) |
545 | goto out; | 494 | return ERR_PTR(-ENOMEM); |
546 | if (*mask_ace && | ||
547 | !MASK_EQUAL(ace->access_mask, (*mask_ace)->access_mask)) | ||
548 | goto out; | ||
549 | *mask_ace = ace; | ||
550 | ace = get_next_v4_ace(p, &n4acl->ace_head); | ||
551 | if (ace == NULL) | ||
552 | goto out; | ||
553 | 495 | ||
554 | if ((*mask_ace)->flag != ace->flag || !same_who(*mask_ace, ace)) | 496 | pace = pacl->a_entries; |
555 | goto out; | 497 | pace->e_tag = ACL_USER_OBJ; |
498 | error = check_deny(state->owner.deny, 1); | ||
499 | if (error) | ||
500 | goto out_err; | ||
501 | low_mode_from_nfs4(state->owner.allow, &pace->e_perm, flags); | ||
502 | pace->e_id = ACL_UNDEFINED_ID; | ||
503 | |||
504 | for (i=0; i < state->users->n; i++) { | ||
505 | pace++; | ||
506 | pace->e_tag = ACL_USER; | ||
507 | error = check_deny(state->users->aces[i].perms.deny, 0); | ||
508 | if (error) | ||
509 | goto out_err; | ||
510 | low_mode_from_nfs4(state->users->aces[i].perms.allow, | ||
511 | &pace->e_perm, flags); | ||
512 | pace->e_id = state->users->aces[i].uid; | ||
513 | add_to_mask(state, &state->users->aces[i].perms); | ||
556 | } | 514 | } |
557 | 515 | ||
558 | if (ace2type(ace) != ACL_GROUP_OBJ) | 516 | pace++; |
559 | goto out; | 517 | pace->e_tag = ACL_GROUP_OBJ; |
560 | 518 | error = check_deny(state->group.deny, 0); | |
561 | ac = kmalloc(sizeof(*ac), GFP_KERNEL); | 519 | if (error) |
562 | error = -ENOMEM; | 520 | goto out_err; |
563 | if (ac == NULL) | 521 | low_mode_from_nfs4(state->group.allow, &pace->e_perm, flags); |
564 | goto out; | 522 | pace->e_id = ACL_UNDEFINED_ID; |
565 | ac->ace = ace; | 523 | add_to_mask(state, &state->group); |
566 | list_add_tail(&ac->ace_l, &group_l); | 524 | |
567 | 525 | for (i=0; i < state->groups->n; i++) { | |
568 | error = -EINVAL; | 526 | pace++; |
569 | if (ace->type != NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE) | 527 | pace->e_tag = ACL_GROUP; |
570 | goto out; | 528 | error = check_deny(state->groups->aces[i].perms.deny, 0); |
571 | 529 | if (error) | |
572 | error = write_pace(ace, pacl, pace, ACL_GROUP_OBJ, flags); | 530 | goto out_err; |
573 | if (error < 0) | 531 | low_mode_from_nfs4(state->groups->aces[i].perms.allow, |
574 | goto out; | 532 | &pace->e_perm, flags); |
575 | 533 | pace->e_id = state->groups->aces[i].uid; | |
576 | error = -EINVAL; | 534 | add_to_mask(state, &state->groups->aces[i].perms); |
577 | ace = get_next_v4_ace(p, &n4acl->ace_head); | 535 | } |
578 | if (ace == NULL) | ||
579 | goto out; | ||
580 | |||
581 | /* groups (mask and allow aces) */ | ||
582 | |||
583 | while (ace2type(ace) == ACL_GROUP) { | ||
584 | if (*mask_ace == NULL) | ||
585 | goto out; | ||
586 | |||
587 | if (ace->type != NFS4_ACE_ACCESS_DENIED_ACE_TYPE || | ||
588 | !MASK_EQUAL(ace->access_mask, (*mask_ace)->access_mask)) | ||
589 | goto out; | ||
590 | *mask_ace = ace; | ||
591 | 536 | ||
592 | ace = get_next_v4_ace(p, &n4acl->ace_head); | 537 | pace++; |
593 | if (ace == NULL) | 538 | pace->e_tag = ACL_MASK; |
594 | goto out; | 539 | low_mode_from_nfs4(state->mask.allow, &pace->e_perm, flags); |
595 | ac = kmalloc(sizeof(*ac), GFP_KERNEL); | 540 | pace->e_id = ACL_UNDEFINED_ID; |
596 | error = -ENOMEM; | ||
597 | if (ac == NULL) | ||
598 | goto out; | ||
599 | error = -EINVAL; | ||
600 | if (ace->type != NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE || | ||
601 | !same_who(ace, *mask_ace)) | ||
602 | goto out; | ||
603 | 541 | ||
604 | ac->ace = ace; | 542 | pace++; |
605 | list_add_tail(&ac->ace_l, &group_l); | 543 | pace->e_tag = ACL_OTHER; |
544 | error = check_deny(state->other.deny, 0); | ||
545 | if (error) | ||
546 | goto out_err; | ||
547 | low_mode_from_nfs4(state->other.allow, &pace->e_perm, flags); | ||
548 | pace->e_id = ACL_UNDEFINED_ID; | ||
606 | 549 | ||
607 | error = write_pace(ace, pacl, pace, ACL_GROUP, flags); | 550 | return pacl; |
608 | if (error < 0) | 551 | out_err: |
609 | goto out; | 552 | posix_acl_release(pacl); |
610 | error = -EINVAL; | 553 | return ERR_PTR(error); |
611 | ace = get_next_v4_ace(p, &n4acl->ace_head); | 554 | } |
612 | if (ace == NULL) | ||
613 | goto out; | ||
614 | } | ||
615 | 555 | ||
616 | /* group owner (deny ace) */ | 556 | static inline void allow_bits(struct posix_ace_state *astate, u32 mask) |
557 | { | ||
558 | /* Allow all bits in the mask not already denied: */ | ||
559 | astate->allow |= mask & ~astate->deny; | ||
560 | } | ||
617 | 561 | ||
618 | if (ace2type(ace) != ACL_GROUP_OBJ) | 562 | static inline void deny_bits(struct posix_ace_state *astate, u32 mask) |
619 | goto out; | 563 | { |
620 | ac = list_entry(group_l.next, struct ace_container, ace_l); | 564 | /* Deny all bits in the mask not already allowed: */ |
621 | ace2 = ac->ace; | 565 | astate->deny |= mask & ~astate->allow; |
622 | if (!complementary_ace_pair(ace2, ace, flags)) | 566 | } |
623 | goto out; | ||
624 | list_del(group_l.next); | ||
625 | kfree(ac); | ||
626 | 567 | ||
627 | /* groups (deny aces) */ | 568 | static int find_uid(struct posix_acl_state *state, struct posix_ace_state_array *a, uid_t uid) |
569 | { | ||
570 | int i; | ||
628 | 571 | ||
629 | while (!list_empty(&group_l)) { | 572 | for (i = 0; i < a->n; i++) |
630 | ace = get_next_v4_ace(p, &n4acl->ace_head); | 573 | if (a->aces[i].uid == uid) |
631 | if (ace == NULL) | 574 | return i; |
632 | goto out; | 575 | /* Not found: */ |
633 | if (ace2type(ace) != ACL_GROUP) | 576 | a->n++; |
634 | goto out; | 577 | a->aces[i].uid = uid; |
635 | ac = list_entry(group_l.next, struct ace_container, ace_l); | 578 | a->aces[i].perms.allow = state->everyone.allow; |
636 | ace2 = ac->ace; | 579 | a->aces[i].perms.deny = state->everyone.deny; |
637 | if (!complementary_ace_pair(ace2, ace, flags)) | ||
638 | goto out; | ||
639 | list_del(group_l.next); | ||
640 | kfree(ac); | ||
641 | } | ||
642 | 580 | ||
643 | ace = get_next_v4_ace(p, &n4acl->ace_head); | 581 | return i; |
644 | if (ace == NULL) | ||
645 | goto out; | ||
646 | if (ace2type(ace) != ACL_OTHER) | ||
647 | goto out; | ||
648 | error = 0; | ||
649 | out: | ||
650 | while (!list_empty(&group_l)) { | ||
651 | ac = list_entry(group_l.next, struct ace_container, ace_l); | ||
652 | list_del(group_l.next); | ||
653 | kfree(ac); | ||
654 | } | ||
655 | return error; | ||
656 | } | 582 | } |
657 | 583 | ||
658 | static inline int | 584 | static void deny_bits_array(struct posix_ace_state_array *a, u32 mask) |
659 | mask_from_v4(struct nfs4_acl *n4acl, struct list_head **p, | ||
660 | struct nfs4_ace **mask_ace, | ||
661 | struct posix_acl *pacl, struct posix_acl_entry **pace, | ||
662 | unsigned int flags) | ||
663 | { | 585 | { |
664 | int error = -EINVAL; | 586 | int i; |
665 | struct nfs4_ace *ace; | ||
666 | 587 | ||
667 | ace = list_entry(*p, struct nfs4_ace, l_ace); | 588 | for (i=0; i < a->n; i++) |
668 | if (pacl->a_count != 3) { | 589 | deny_bits(&a->aces[i].perms, mask); |
669 | if (*mask_ace == NULL) | ||
670 | goto out; | ||
671 | (*mask_ace)->access_mask = deny_mask((*mask_ace)->access_mask, flags); | ||
672 | write_pace(*mask_ace, pacl, pace, ACL_MASK, flags); | ||
673 | } | ||
674 | error = 0; | ||
675 | out: | ||
676 | return error; | ||
677 | } | 590 | } |
678 | 591 | ||
679 | static inline int | 592 | static void allow_bits_array(struct posix_ace_state_array *a, u32 mask) |
680 | other_from_v4(struct nfs4_acl *n4acl, struct list_head **p, | ||
681 | struct posix_acl *pacl, struct posix_acl_entry **pace, | ||
682 | unsigned int flags) | ||
683 | { | 593 | { |
684 | int error = -EINVAL; | 594 | int i; |
685 | struct nfs4_ace *ace, *ace2; | ||
686 | 595 | ||
687 | ace = list_entry(*p, struct nfs4_ace, l_ace); | 596 | for (i=0; i < a->n; i++) |
688 | if (ace->type != NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE) | 597 | allow_bits(&a->aces[i].perms, mask); |
689 | goto out; | ||
690 | error = write_pace(ace, pacl, pace, ACL_OTHER, flags); | ||
691 | if (error < 0) | ||
692 | goto out; | ||
693 | error = -EINVAL; | ||
694 | ace2 = get_next_v4_ace(p, &n4acl->ace_head); | ||
695 | if (ace2 == NULL) | ||
696 | goto out; | ||
697 | if (!complementary_ace_pair(ace, ace2, flags)) | ||
698 | goto out; | ||
699 | error = 0; | ||
700 | out: | ||
701 | return error; | ||
702 | } | 598 | } |
703 | 599 | ||
704 | static int | 600 | static void process_one_v4_ace(struct posix_acl_state *state, |
705 | calculate_posix_ace_count(struct nfs4_acl *n4acl) | 601 | struct nfs4_ace *ace) |
706 | { | 602 | { |
707 | if (n4acl->naces == 6) /* owner, owner group, and other only */ | 603 | u32 mask = ace->access_mask; |
708 | return 3; | 604 | int i; |
709 | else { /* Otherwise there must be a mask entry. */ | 605 | |
710 | /* Also, the remaining entries are for named users and | 606 | switch (ace2type(ace)) { |
711 | * groups, and come in threes (mask, allow, deny): */ | 607 | case ACL_USER_OBJ: |
712 | if (n4acl->naces < 7) | 608 | if (ace->type == NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE) { |
713 | return -EINVAL; | 609 | allow_bits(&state->owner, mask); |
714 | if ((n4acl->naces - 7) % 3) | 610 | } else { |
715 | return -EINVAL; | 611 | deny_bits(&state->owner, mask); |
716 | return 4 + (n4acl->naces - 7)/3; | 612 | } |
613 | break; | ||
614 | case ACL_USER: | ||
615 | i = find_uid(state, state->users, ace->who); | ||
616 | if (ace->type == NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE) { | ||
617 | allow_bits(&state->users->aces[i].perms, mask); | ||
618 | } else { | ||
619 | deny_bits(&state->users->aces[i].perms, mask); | ||
620 | mask = state->users->aces[i].perms.deny; | ||
621 | deny_bits(&state->owner, mask); | ||
622 | } | ||
623 | break; | ||
624 | case ACL_GROUP_OBJ: | ||
625 | if (ace->type == NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE) { | ||
626 | allow_bits(&state->group, mask); | ||
627 | } else { | ||
628 | deny_bits(&state->group, mask); | ||
629 | mask = state->group.deny; | ||
630 | deny_bits(&state->owner, mask); | ||
631 | deny_bits(&state->everyone, mask); | ||
632 | deny_bits_array(state->users, mask); | ||
633 | deny_bits_array(state->groups, mask); | ||
634 | } | ||
635 | break; | ||
636 | case ACL_GROUP: | ||
637 | i = find_uid(state, state->groups, ace->who); | ||
638 | if (ace->type == NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE) { | ||
639 | allow_bits(&state->groups->aces[i].perms, mask); | ||
640 | } else { | ||
641 | deny_bits(&state->groups->aces[i].perms, mask); | ||
642 | mask = state->groups->aces[i].perms.deny; | ||
643 | deny_bits(&state->owner, mask); | ||
644 | deny_bits(&state->group, mask); | ||
645 | deny_bits(&state->everyone, mask); | ||
646 | deny_bits_array(state->users, mask); | ||
647 | deny_bits_array(state->groups, mask); | ||
648 | } | ||
649 | break; | ||
650 | case ACL_OTHER: | ||
651 | if (ace->type == NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE) { | ||
652 | allow_bits(&state->owner, mask); | ||
653 | allow_bits(&state->group, mask); | ||
654 | allow_bits(&state->other, mask); | ||
655 | allow_bits(&state->everyone, mask); | ||
656 | allow_bits_array(state->users, mask); | ||
657 | allow_bits_array(state->groups, mask); | ||
658 | } else { | ||
659 | deny_bits(&state->owner, mask); | ||
660 | deny_bits(&state->group, mask); | ||
661 | deny_bits(&state->other, mask); | ||
662 | deny_bits(&state->everyone, mask); | ||
663 | deny_bits_array(state->users, mask); | ||
664 | deny_bits_array(state->groups, mask); | ||
665 | } | ||
717 | } | 666 | } |
718 | } | 667 | } |
719 | 668 | ||
720 | |||
721 | static struct posix_acl * | 669 | static struct posix_acl * |
722 | _nfsv4_to_posix_one(struct nfs4_acl *n4acl, unsigned int flags) | 670 | _nfsv4_to_posix_one(struct nfs4_acl *n4acl, unsigned int flags) |
723 | { | 671 | { |
672 | struct posix_acl_state state; | ||
724 | struct posix_acl *pacl; | 673 | struct posix_acl *pacl; |
725 | int error = -EINVAL, nace = 0; | 674 | struct nfs4_ace *ace; |
726 | struct list_head *p; | 675 | int ret; |
727 | struct nfs4_ace *mask_ace = NULL; | ||
728 | struct posix_acl_entry *pace; | ||
729 | |||
730 | nace = calculate_posix_ace_count(n4acl); | ||
731 | if (nace < 0) | ||
732 | goto out_err; | ||
733 | |||
734 | pacl = posix_acl_alloc(nace, GFP_KERNEL); | ||
735 | error = -ENOMEM; | ||
736 | if (pacl == NULL) | ||
737 | goto out_err; | ||
738 | |||
739 | pace = &pacl->a_entries[0]; | ||
740 | p = &n4acl->ace_head; | ||
741 | |||
742 | error = user_obj_from_v4(n4acl, &p, pacl, &pace, flags); | ||
743 | if (error) | ||
744 | goto out_acl; | ||
745 | |||
746 | error = users_from_v4(n4acl, &p, &mask_ace, pacl, &pace, flags); | ||
747 | if (error) | ||
748 | goto out_acl; | ||
749 | 676 | ||
750 | error = group_obj_and_groups_from_v4(n4acl, &p, &mask_ace, pacl, &pace, | 677 | ret = init_state(&state, n4acl->naces); |
751 | flags); | 678 | if (ret) |
752 | if (error) | 679 | return ERR_PTR(ret); |
753 | goto out_acl; | ||
754 | 680 | ||
755 | error = mask_from_v4(n4acl, &p, &mask_ace, pacl, &pace, flags); | 681 | list_for_each_entry(ace, &n4acl->ace_head, l_ace) |
756 | if (error) | 682 | process_one_v4_ace(&state, ace); |
757 | goto out_acl; | ||
758 | error = other_from_v4(n4acl, &p, pacl, &pace, flags); | ||
759 | if (error) | ||
760 | goto out_acl; | ||
761 | 683 | ||
762 | error = -EINVAL; | 684 | pacl = posix_state_to_acl(&state, flags); |
763 | if (p->next != &n4acl->ace_head) | ||
764 | goto out_acl; | ||
765 | if (pace != pacl->a_entries + pacl->a_count) | ||
766 | goto out_acl; | ||
767 | 685 | ||
768 | sort_pacl(pacl); | 686 | free_state(&state); |
769 | 687 | ||
770 | return pacl; | 688 | if (!IS_ERR(pacl)) |
771 | out_acl: | 689 | sort_pacl(pacl); |
772 | posix_acl_release(pacl); | ||
773 | out_err: | ||
774 | pacl = ERR_PTR(error); | ||
775 | return pacl; | 690 | return pacl; |
776 | } | 691 | } |
777 | 692 | ||
@@ -785,22 +700,41 @@ nfs4_acl_split(struct nfs4_acl *acl, struct nfs4_acl *dacl) | |||
785 | list_for_each_safe(h, n, &acl->ace_head) { | 700 | list_for_each_safe(h, n, &acl->ace_head) { |
786 | ace = list_entry(h, struct nfs4_ace, l_ace); | 701 | ace = list_entry(h, struct nfs4_ace, l_ace); |
787 | 702 | ||
788 | if ((ace->flag & NFS4_INHERITANCE_FLAGS) | 703 | if (ace->type != NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE && |
789 | != NFS4_INHERITANCE_FLAGS) | 704 | ace->type != NFS4_ACE_ACCESS_DENIED_ACE_TYPE) |
790 | continue; | 705 | return -EINVAL; |
791 | 706 | ||
792 | error = nfs4_acl_add_ace(dacl, ace->type, ace->flag, | 707 | if (ace->flag & ~NFS4_SUPPORTED_FLAGS) |
793 | ace->access_mask, ace->whotype, ace->who); | 708 | return -EINVAL; |
794 | if (error < 0) | ||
795 | goto out; | ||
796 | 709 | ||
797 | list_del(h); | 710 | switch (ace->flag & NFS4_INHERITANCE_FLAGS) { |
798 | kfree(ace); | 711 | case 0: |
799 | acl->naces--; | 712 | /* Leave this ace in the effective acl: */ |
713 | continue; | ||
714 | case NFS4_INHERITANCE_FLAGS: | ||
715 | /* Add this ace to the default acl and remove it | ||
716 | * from the effective acl: */ | ||
717 | error = nfs4_acl_add_ace(dacl, ace->type, ace->flag, | ||
718 | ace->access_mask, ace->whotype, ace->who); | ||
719 | if (error) | ||
720 | return error; | ||
721 | list_del(h); | ||
722 | kfree(ace); | ||
723 | acl->naces--; | ||
724 | break; | ||
725 | case NFS4_INHERITANCE_FLAGS & ~NFS4_ACE_INHERIT_ONLY_ACE: | ||
726 | /* Add this ace to the default, but leave it in | ||
727 | * the effective acl as well: */ | ||
728 | error = nfs4_acl_add_ace(dacl, ace->type, ace->flag, | ||
729 | ace->access_mask, ace->whotype, ace->who); | ||
730 | if (error) | ||
731 | return error; | ||
732 | break; | ||
733 | default: | ||
734 | return -EINVAL; | ||
735 | } | ||
800 | } | 736 | } |
801 | 737 | return 0; | |
802 | out: | ||
803 | return error; | ||
804 | } | 738 | } |
805 | 739 | ||
806 | static short | 740 | static short |
@@ -930,23 +864,6 @@ nfs4_acl_write_who(int who, char *p) | |||
930 | return -1; | 864 | return -1; |
931 | } | 865 | } |
932 | 866 | ||
933 | static inline int | ||
934 | match_who(struct nfs4_ace *ace, uid_t owner, gid_t group, uid_t who) | ||
935 | { | ||
936 | switch (ace->whotype) { | ||
937 | case NFS4_ACL_WHO_NAMED: | ||
938 | return who == ace->who; | ||
939 | case NFS4_ACL_WHO_OWNER: | ||
940 | return who == owner; | ||
941 | case NFS4_ACL_WHO_GROUP: | ||
942 | return who == group; | ||
943 | case NFS4_ACL_WHO_EVERYONE: | ||
944 | return 1; | ||
945 | default: | ||
946 | return 0; | ||
947 | } | ||
948 | } | ||
949 | |||
950 | EXPORT_SYMBOL(nfs4_acl_new); | 867 | EXPORT_SYMBOL(nfs4_acl_new); |
951 | EXPORT_SYMBOL(nfs4_acl_free); | 868 | EXPORT_SYMBOL(nfs4_acl_free); |
952 | EXPORT_SYMBOL(nfs4_acl_add_ace); | 869 | EXPORT_SYMBOL(nfs4_acl_add_ace); |
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 8583d99ee740..f6ca9fb3fc63 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c | |||
@@ -131,7 +131,7 @@ xdr_error: \ | |||
131 | #define READ_BUF(nbytes) do { \ | 131 | #define READ_BUF(nbytes) do { \ |
132 | p = xdr_inline_decode(xdr, nbytes); \ | 132 | p = xdr_inline_decode(xdr, nbytes); \ |
133 | if (!p) { \ | 133 | if (!p) { \ |
134 | dprintk("NFSD: %s: reply buffer overflowed in line %d.", \ | 134 | dprintk("NFSD: %s: reply buffer overflowed in line %d.\n", \ |
135 | __FUNCTION__, __LINE__); \ | 135 | __FUNCTION__, __LINE__); \ |
136 | return -EIO; \ | 136 | return -EIO; \ |
137 | } \ | 137 | } \ |
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index ee4eff27aedc..8333db12caca 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c | |||
@@ -600,7 +600,7 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_se | |||
600 | &setattr->sa_stateid, CHECK_FH | WR_STATE, NULL); | 600 | &setattr->sa_stateid, CHECK_FH | WR_STATE, NULL); |
601 | nfs4_unlock_state(); | 601 | nfs4_unlock_state(); |
602 | if (status) { | 602 | if (status) { |
603 | dprintk("NFSD: nfsd4_setattr: couldn't process stateid!"); | 603 | dprintk("NFSD: nfsd4_setattr: couldn't process stateid!\n"); |
604 | return status; | 604 | return status; |
605 | } | 605 | } |
606 | } | 606 | } |
@@ -646,7 +646,7 @@ nfsd4_write(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_writ | |||
646 | *p++ = nfssvc_boot.tv_usec; | 646 | *p++ = nfssvc_boot.tv_usec; |
647 | 647 | ||
648 | status = nfsd_write(rqstp, current_fh, filp, write->wr_offset, | 648 | status = nfsd_write(rqstp, current_fh, filp, write->wr_offset, |
649 | write->wr_vec, write->wr_vlen, write->wr_buflen, | 649 | rqstp->rq_vec, write->wr_vlen, write->wr_buflen, |
650 | &write->wr_how_written); | 650 | &write->wr_how_written); |
651 | if (filp) | 651 | if (filp) |
652 | fput(filp); | 652 | fput(filp); |
@@ -802,13 +802,29 @@ nfsd4_proc_compound(struct svc_rqst *rqstp, | |||
802 | * SETCLIENTID_CONFIRM, PUTFH and PUTROOTFH | 802 | * SETCLIENTID_CONFIRM, PUTFH and PUTROOTFH |
803 | * require a valid current filehandle | 803 | * require a valid current filehandle |
804 | */ | 804 | */ |
805 | if ((!current_fh->fh_dentry) && | 805 | if (!current_fh->fh_dentry) { |
806 | !((op->opnum == OP_PUTFH) || (op->opnum == OP_PUTROOTFH) || | 806 | if (!((op->opnum == OP_PUTFH) || |
807 | (op->opnum == OP_SETCLIENTID) || | 807 | (op->opnum == OP_PUTROOTFH) || |
808 | (op->opnum == OP_SETCLIENTID_CONFIRM) || | 808 | (op->opnum == OP_SETCLIENTID) || |
809 | (op->opnum == OP_RENEW) || (op->opnum == OP_RESTOREFH) || | 809 | (op->opnum == OP_SETCLIENTID_CONFIRM) || |
810 | (op->opnum == OP_RELEASE_LOCKOWNER))) { | 810 | (op->opnum == OP_RENEW) || |
811 | op->status = nfserr_nofilehandle; | 811 | (op->opnum == OP_RESTOREFH) || |
812 | (op->opnum == OP_RELEASE_LOCKOWNER))) { | ||
813 | op->status = nfserr_nofilehandle; | ||
814 | goto encode_op; | ||
815 | } | ||
816 | } | ||
817 | /* Check must be done at start of each operation, except | ||
818 | * for GETATTR and ops not listed as returning NFS4ERR_MOVED | ||
819 | */ | ||
820 | else if (current_fh->fh_export->ex_fslocs.migrated && | ||
821 | !((op->opnum == OP_GETATTR) || | ||
822 | (op->opnum == OP_PUTROOTFH) || | ||
823 | (op->opnum == OP_PUTPUBFH) || | ||
824 | (op->opnum == OP_RENEW) || | ||
825 | (op->opnum == OP_SETCLIENTID) || | ||
826 | (op->opnum == OP_RELEASE_LOCKOWNER))) { | ||
827 | op->status = nfserr_moved; | ||
812 | goto encode_op; | 828 | goto encode_op; |
813 | } | 829 | } |
814 | switch (op->opnum) { | 830 | switch (op->opnum) { |
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index e35d7e52fdeb..1cbd2e4ee122 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c | |||
@@ -184,7 +184,7 @@ struct dentry_list_arg { | |||
184 | 184 | ||
185 | static int | 185 | static int |
186 | nfsd4_build_dentrylist(void *arg, const char *name, int namlen, | 186 | nfsd4_build_dentrylist(void *arg, const char *name, int namlen, |
187 | loff_t offset, ino_t ino, unsigned int d_type) | 187 | loff_t offset, u64 ino, unsigned int d_type) |
188 | { | 188 | { |
189 | struct dentry_list_arg *dla = arg; | 189 | struct dentry_list_arg *dla = arg; |
190 | struct list_head *dentries = &dla->dentries; | 190 | struct list_head *dentries = &dla->dentries; |
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 5446a0861d1d..41fc241b729a 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c | |||
@@ -60,6 +60,14 @@ | |||
60 | 60 | ||
61 | #define NFSDDBG_FACILITY NFSDDBG_XDR | 61 | #define NFSDDBG_FACILITY NFSDDBG_XDR |
62 | 62 | ||
63 | /* | ||
64 | * As per referral draft, the fsid for a referral MUST be different from the fsid of the containing | ||
65 | * directory in order to indicate to the client that a filesystem boundary is present | ||
66 | * We use a fixed fsid for a referral | ||
67 | */ | ||
68 | #define NFS4_REFERRAL_FSID_MAJOR 0x8000000ULL | ||
69 | #define NFS4_REFERRAL_FSID_MINOR 0x8000000ULL | ||
70 | |||
63 | static int | 71 | static int |
64 | check_filename(char *str, int len, int err) | 72 | check_filename(char *str, int len, int err) |
65 | { | 73 | { |
@@ -198,8 +206,7 @@ static char *savemem(struct nfsd4_compoundargs *argp, u32 *p, int nbytes) | |||
198 | p = new; | 206 | p = new; |
199 | memcpy(p, argp->tmp, nbytes); | 207 | memcpy(p, argp->tmp, nbytes); |
200 | } else { | 208 | } else { |
201 | if (p != argp->tmpp) | 209 | BUG_ON(p != argp->tmpp); |
202 | BUG(); | ||
203 | argp->tmpp = NULL; | 210 | argp->tmpp = NULL; |
204 | } | 211 | } |
205 | if (defer_free(argp, kfree, p)) { | 212 | if (defer_free(argp, kfree, p)) { |
@@ -927,26 +934,26 @@ nfsd4_decode_write(struct nfsd4_compoundargs *argp, struct nfsd4_write *write) | |||
927 | printk(KERN_NOTICE "xdr error! (%s:%d)\n", __FILE__, __LINE__); | 934 | printk(KERN_NOTICE "xdr error! (%s:%d)\n", __FILE__, __LINE__); |
928 | goto xdr_error; | 935 | goto xdr_error; |
929 | } | 936 | } |
930 | write->wr_vec[0].iov_base = p; | 937 | argp->rqstp->rq_vec[0].iov_base = p; |
931 | write->wr_vec[0].iov_len = avail; | 938 | argp->rqstp->rq_vec[0].iov_len = avail; |
932 | v = 0; | 939 | v = 0; |
933 | len = write->wr_buflen; | 940 | len = write->wr_buflen; |
934 | while (len > write->wr_vec[v].iov_len) { | 941 | while (len > argp->rqstp->rq_vec[v].iov_len) { |
935 | len -= write->wr_vec[v].iov_len; | 942 | len -= argp->rqstp->rq_vec[v].iov_len; |
936 | v++; | 943 | v++; |
937 | write->wr_vec[v].iov_base = page_address(argp->pagelist[0]); | 944 | argp->rqstp->rq_vec[v].iov_base = page_address(argp->pagelist[0]); |
938 | argp->pagelist++; | 945 | argp->pagelist++; |
939 | if (argp->pagelen >= PAGE_SIZE) { | 946 | if (argp->pagelen >= PAGE_SIZE) { |
940 | write->wr_vec[v].iov_len = PAGE_SIZE; | 947 | argp->rqstp->rq_vec[v].iov_len = PAGE_SIZE; |
941 | argp->pagelen -= PAGE_SIZE; | 948 | argp->pagelen -= PAGE_SIZE; |
942 | } else { | 949 | } else { |
943 | write->wr_vec[v].iov_len = argp->pagelen; | 950 | argp->rqstp->rq_vec[v].iov_len = argp->pagelen; |
944 | argp->pagelen -= len; | 951 | argp->pagelen -= len; |
945 | } | 952 | } |
946 | } | 953 | } |
947 | argp->end = (u32*) (write->wr_vec[v].iov_base + write->wr_vec[v].iov_len); | 954 | argp->end = (u32*) (argp->rqstp->rq_vec[v].iov_base + argp->rqstp->rq_vec[v].iov_len); |
948 | argp->p = (u32*) (write->wr_vec[v].iov_base + (XDR_QUADLEN(len) << 2)); | 955 | argp->p = (u32*) (argp->rqstp->rq_vec[v].iov_base + (XDR_QUADLEN(len) << 2)); |
949 | write->wr_vec[v].iov_len = len; | 956 | argp->rqstp->rq_vec[v].iov_len = len; |
950 | write->wr_vlen = v+1; | 957 | write->wr_vlen = v+1; |
951 | 958 | ||
952 | DECODE_TAIL; | 959 | DECODE_TAIL; |
@@ -1224,6 +1231,119 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) | |||
1224 | stateowner->so_replay.rp_buflen); \ | 1231 | stateowner->so_replay.rp_buflen); \ |
1225 | } } while (0); | 1232 | } } while (0); |
1226 | 1233 | ||
1234 | /* Encode as an array of strings the string given with components | ||
1235 | * seperated @sep. | ||
1236 | */ | ||
1237 | static int nfsd4_encode_components(char sep, char *components, | ||
1238 | u32 **pp, int *buflen) | ||
1239 | { | ||
1240 | u32 *p = *pp; | ||
1241 | u32 *countp = p; | ||
1242 | int strlen, count=0; | ||
1243 | char *str, *end; | ||
1244 | |||
1245 | dprintk("nfsd4_encode_components(%s)\n", components); | ||
1246 | if ((*buflen -= 4) < 0) | ||
1247 | return nfserr_resource; | ||
1248 | WRITE32(0); /* We will fill this in with @count later */ | ||
1249 | end = str = components; | ||
1250 | while (*end) { | ||
1251 | for (; *end && (*end != sep); end++) | ||
1252 | ; /* Point to end of component */ | ||
1253 | strlen = end - str; | ||
1254 | if (strlen) { | ||
1255 | if ((*buflen -= ((XDR_QUADLEN(strlen) << 2) + 4)) < 0) | ||
1256 | return nfserr_resource; | ||
1257 | WRITE32(strlen); | ||
1258 | WRITEMEM(str, strlen); | ||
1259 | count++; | ||
1260 | } | ||
1261 | else | ||
1262 | end++; | ||
1263 | str = end; | ||
1264 | } | ||
1265 | *pp = p; | ||
1266 | p = countp; | ||
1267 | WRITE32(count); | ||
1268 | return 0; | ||
1269 | } | ||
1270 | |||
1271 | /* | ||
1272 | * encode a location element of a fs_locations structure | ||
1273 | */ | ||
1274 | static int nfsd4_encode_fs_location4(struct nfsd4_fs_location *location, | ||
1275 | u32 **pp, int *buflen) | ||
1276 | { | ||
1277 | int status; | ||
1278 | u32 *p = *pp; | ||
1279 | |||
1280 | status = nfsd4_encode_components(':', location->hosts, &p, buflen); | ||
1281 | if (status) | ||
1282 | return status; | ||
1283 | status = nfsd4_encode_components('/', location->path, &p, buflen); | ||
1284 | if (status) | ||
1285 | return status; | ||
1286 | *pp = p; | ||
1287 | return 0; | ||
1288 | } | ||
1289 | |||
1290 | /* | ||
1291 | * Return the path to an export point in the pseudo filesystem namespace | ||
1292 | * Returned string is safe to use as long as the caller holds a reference | ||
1293 | * to @exp. | ||
1294 | */ | ||
1295 | static char *nfsd4_path(struct svc_rqst *rqstp, struct svc_export *exp) | ||
1296 | { | ||
1297 | struct svc_fh tmp_fh; | ||
1298 | char *path, *rootpath; | ||
1299 | int stat; | ||
1300 | |||
1301 | fh_init(&tmp_fh, NFS4_FHSIZE); | ||
1302 | stat = exp_pseudoroot(rqstp->rq_client, &tmp_fh, &rqstp->rq_chandle); | ||
1303 | if (stat) | ||
1304 | return ERR_PTR(stat); | ||
1305 | rootpath = tmp_fh.fh_export->ex_path; | ||
1306 | |||
1307 | path = exp->ex_path; | ||
1308 | |||
1309 | if (strncmp(path, rootpath, strlen(rootpath))) { | ||
1310 | printk("nfsd: fs_locations failed;" | ||
1311 | "%s is not contained in %s\n", path, rootpath); | ||
1312 | return ERR_PTR(-EOPNOTSUPP); | ||
1313 | } | ||
1314 | |||
1315 | return path + strlen(rootpath); | ||
1316 | } | ||
1317 | |||
1318 | /* | ||
1319 | * encode a fs_locations structure | ||
1320 | */ | ||
1321 | static int nfsd4_encode_fs_locations(struct svc_rqst *rqstp, | ||
1322 | struct svc_export *exp, | ||
1323 | u32 **pp, int *buflen) | ||
1324 | { | ||
1325 | int status, i; | ||
1326 | u32 *p = *pp; | ||
1327 | struct nfsd4_fs_locations *fslocs = &exp->ex_fslocs; | ||
1328 | char *root = nfsd4_path(rqstp, exp); | ||
1329 | |||
1330 | if (IS_ERR(root)) | ||
1331 | return PTR_ERR(root); | ||
1332 | status = nfsd4_encode_components('/', root, &p, buflen); | ||
1333 | if (status) | ||
1334 | return status; | ||
1335 | if ((*buflen -= 4) < 0) | ||
1336 | return nfserr_resource; | ||
1337 | WRITE32(fslocs->locations_count); | ||
1338 | for (i=0; i<fslocs->locations_count; i++) { | ||
1339 | status = nfsd4_encode_fs_location4(&fslocs->locations[i], | ||
1340 | &p, buflen); | ||
1341 | if (status) | ||
1342 | return status; | ||
1343 | } | ||
1344 | *pp = p; | ||
1345 | return 0; | ||
1346 | } | ||
1227 | 1347 | ||
1228 | static u32 nfs4_ftypes[16] = { | 1348 | static u32 nfs4_ftypes[16] = { |
1229 | NF4BAD, NF4FIFO, NF4CHR, NF4BAD, | 1349 | NF4BAD, NF4FIFO, NF4CHR, NF4BAD, |
@@ -1273,6 +1393,25 @@ nfsd4_encode_aclname(struct svc_rqst *rqstp, int whotype, uid_t id, int group, | |||
1273 | return nfsd4_encode_name(rqstp, whotype, id, group, p, buflen); | 1393 | return nfsd4_encode_name(rqstp, whotype, id, group, p, buflen); |
1274 | } | 1394 | } |
1275 | 1395 | ||
1396 | #define WORD0_ABSENT_FS_ATTRS (FATTR4_WORD0_FS_LOCATIONS | FATTR4_WORD0_FSID | \ | ||
1397 | FATTR4_WORD0_RDATTR_ERROR) | ||
1398 | #define WORD1_ABSENT_FS_ATTRS FATTR4_WORD1_MOUNTED_ON_FILEID | ||
1399 | |||
1400 | static int fattr_handle_absent_fs(u32 *bmval0, u32 *bmval1, u32 *rdattr_err) | ||
1401 | { | ||
1402 | /* As per referral draft: */ | ||
1403 | if (*bmval0 & ~WORD0_ABSENT_FS_ATTRS || | ||
1404 | *bmval1 & ~WORD1_ABSENT_FS_ATTRS) { | ||
1405 | if (*bmval0 & FATTR4_WORD0_RDATTR_ERROR || | ||
1406 | *bmval0 & FATTR4_WORD0_FS_LOCATIONS) | ||
1407 | *rdattr_err = NFSERR_MOVED; | ||
1408 | else | ||
1409 | return nfserr_moved; | ||
1410 | } | ||
1411 | *bmval0 &= WORD0_ABSENT_FS_ATTRS; | ||
1412 | *bmval1 &= WORD1_ABSENT_FS_ATTRS; | ||
1413 | return 0; | ||
1414 | } | ||
1276 | 1415 | ||
1277 | /* | 1416 | /* |
1278 | * Note: @fhp can be NULL; in this case, we might have to compose the filehandle | 1417 | * Note: @fhp can be NULL; in this case, we might have to compose the filehandle |
@@ -1295,6 +1434,7 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp, | |||
1295 | u32 *attrlenp; | 1434 | u32 *attrlenp; |
1296 | u32 dummy; | 1435 | u32 dummy; |
1297 | u64 dummy64; | 1436 | u64 dummy64; |
1437 | u32 rdattr_err = 0; | ||
1298 | u32 *p = buffer; | 1438 | u32 *p = buffer; |
1299 | int status; | 1439 | int status; |
1300 | int aclsupport = 0; | 1440 | int aclsupport = 0; |
@@ -1304,6 +1444,12 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp, | |||
1304 | BUG_ON(bmval0 & ~NFSD_SUPPORTED_ATTRS_WORD0); | 1444 | BUG_ON(bmval0 & ~NFSD_SUPPORTED_ATTRS_WORD0); |
1305 | BUG_ON(bmval1 & ~NFSD_SUPPORTED_ATTRS_WORD1); | 1445 | BUG_ON(bmval1 & ~NFSD_SUPPORTED_ATTRS_WORD1); |
1306 | 1446 | ||
1447 | if (exp->ex_fslocs.migrated) { | ||
1448 | status = fattr_handle_absent_fs(&bmval0, &bmval1, &rdattr_err); | ||
1449 | if (status) | ||
1450 | goto out; | ||
1451 | } | ||
1452 | |||
1307 | status = vfs_getattr(exp->ex_mnt, dentry, &stat); | 1453 | status = vfs_getattr(exp->ex_mnt, dentry, &stat); |
1308 | if (status) | 1454 | if (status) |
1309 | goto out_nfserr; | 1455 | goto out_nfserr; |
@@ -1335,6 +1481,11 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp, | |||
1335 | goto out_nfserr; | 1481 | goto out_nfserr; |
1336 | } | 1482 | } |
1337 | } | 1483 | } |
1484 | if (bmval0 & FATTR4_WORD0_FS_LOCATIONS) { | ||
1485 | if (exp->ex_fslocs.locations == NULL) { | ||
1486 | bmval0 &= ~FATTR4_WORD0_FS_LOCATIONS; | ||
1487 | } | ||
1488 | } | ||
1338 | if ((buflen -= 16) < 0) | 1489 | if ((buflen -= 16) < 0) |
1339 | goto out_resource; | 1490 | goto out_resource; |
1340 | 1491 | ||
@@ -1344,12 +1495,15 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp, | |||
1344 | attrlenp = p++; /* to be backfilled later */ | 1495 | attrlenp = p++; /* to be backfilled later */ |
1345 | 1496 | ||
1346 | if (bmval0 & FATTR4_WORD0_SUPPORTED_ATTRS) { | 1497 | if (bmval0 & FATTR4_WORD0_SUPPORTED_ATTRS) { |
1498 | u32 word0 = NFSD_SUPPORTED_ATTRS_WORD0; | ||
1347 | if ((buflen -= 12) < 0) | 1499 | if ((buflen -= 12) < 0) |
1348 | goto out_resource; | 1500 | goto out_resource; |
1501 | if (!aclsupport) | ||
1502 | word0 &= ~FATTR4_WORD0_ACL; | ||
1503 | if (!exp->ex_fslocs.locations) | ||
1504 | word0 &= ~FATTR4_WORD0_FS_LOCATIONS; | ||
1349 | WRITE32(2); | 1505 | WRITE32(2); |
1350 | WRITE32(aclsupport ? | 1506 | WRITE32(word0); |
1351 | NFSD_SUPPORTED_ATTRS_WORD0 : | ||
1352 | NFSD_SUPPORTED_ATTRS_WORD0 & ~FATTR4_WORD0_ACL); | ||
1353 | WRITE32(NFSD_SUPPORTED_ATTRS_WORD1); | 1507 | WRITE32(NFSD_SUPPORTED_ATTRS_WORD1); |
1354 | } | 1508 | } |
1355 | if (bmval0 & FATTR4_WORD0_TYPE) { | 1509 | if (bmval0 & FATTR4_WORD0_TYPE) { |
@@ -1403,7 +1557,10 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp, | |||
1403 | if (bmval0 & FATTR4_WORD0_FSID) { | 1557 | if (bmval0 & FATTR4_WORD0_FSID) { |
1404 | if ((buflen -= 16) < 0) | 1558 | if ((buflen -= 16) < 0) |
1405 | goto out_resource; | 1559 | goto out_resource; |
1406 | if (is_fsid(fhp, rqstp->rq_reffh)) { | 1560 | if (exp->ex_fslocs.migrated) { |
1561 | WRITE64(NFS4_REFERRAL_FSID_MAJOR); | ||
1562 | WRITE64(NFS4_REFERRAL_FSID_MINOR); | ||
1563 | } else if (is_fsid(fhp, rqstp->rq_reffh)) { | ||
1407 | WRITE64((u64)exp->ex_fsid); | 1564 | WRITE64((u64)exp->ex_fsid); |
1408 | WRITE64((u64)0); | 1565 | WRITE64((u64)0); |
1409 | } else { | 1566 | } else { |
@@ -1426,7 +1583,7 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp, | |||
1426 | if (bmval0 & FATTR4_WORD0_RDATTR_ERROR) { | 1583 | if (bmval0 & FATTR4_WORD0_RDATTR_ERROR) { |
1427 | if ((buflen -= 4) < 0) | 1584 | if ((buflen -= 4) < 0) |
1428 | goto out_resource; | 1585 | goto out_resource; |
1429 | WRITE32(0); | 1586 | WRITE32(rdattr_err); |
1430 | } | 1587 | } |
1431 | if (bmval0 & FATTR4_WORD0_ACL) { | 1588 | if (bmval0 & FATTR4_WORD0_ACL) { |
1432 | struct nfs4_ace *ace; | 1589 | struct nfs4_ace *ace; |
@@ -1514,6 +1671,13 @@ out_acl: | |||
1514 | goto out_resource; | 1671 | goto out_resource; |
1515 | WRITE64((u64) statfs.f_files); | 1672 | WRITE64((u64) statfs.f_files); |
1516 | } | 1673 | } |
1674 | if (bmval0 & FATTR4_WORD0_FS_LOCATIONS) { | ||
1675 | status = nfsd4_encode_fs_locations(rqstp, exp, &p, &buflen); | ||
1676 | if (status == nfserr_resource) | ||
1677 | goto out_resource; | ||
1678 | if (status) | ||
1679 | goto out; | ||
1680 | } | ||
1517 | if (bmval0 & FATTR4_WORD0_HOMOGENEOUS) { | 1681 | if (bmval0 & FATTR4_WORD0_HOMOGENEOUS) { |
1518 | if ((buflen -= 4) < 0) | 1682 | if ((buflen -= 4) < 0) |
1519 | goto out_resource; | 1683 | goto out_resource; |
@@ -1537,12 +1701,12 @@ out_acl: | |||
1537 | if (bmval0 & FATTR4_WORD0_MAXREAD) { | 1701 | if (bmval0 & FATTR4_WORD0_MAXREAD) { |
1538 | if ((buflen -= 8) < 0) | 1702 | if ((buflen -= 8) < 0) |
1539 | goto out_resource; | 1703 | goto out_resource; |
1540 | WRITE64((u64) NFSSVC_MAXBLKSIZE); | 1704 | WRITE64((u64) svc_max_payload(rqstp)); |
1541 | } | 1705 | } |
1542 | if (bmval0 & FATTR4_WORD0_MAXWRITE) { | 1706 | if (bmval0 & FATTR4_WORD0_MAXWRITE) { |
1543 | if ((buflen -= 8) < 0) | 1707 | if ((buflen -= 8) < 0) |
1544 | goto out_resource; | 1708 | goto out_resource; |
1545 | WRITE64((u64) NFSSVC_MAXBLKSIZE); | 1709 | WRITE64((u64) svc_max_payload(rqstp)); |
1546 | } | 1710 | } |
1547 | if (bmval1 & FATTR4_WORD1_MODE) { | 1711 | if (bmval1 & FATTR4_WORD1_MODE) { |
1548 | if ((buflen -= 4) < 0) | 1712 | if ((buflen -= 4) < 0) |
@@ -1846,7 +2010,6 @@ nfsd4_encode_getattr(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_ge | |||
1846 | nfserr = nfsd4_encode_fattr(fhp, fhp->fh_export, fhp->fh_dentry, | 2010 | nfserr = nfsd4_encode_fattr(fhp, fhp->fh_export, fhp->fh_dentry, |
1847 | resp->p, &buflen, getattr->ga_bmval, | 2011 | resp->p, &buflen, getattr->ga_bmval, |
1848 | resp->rqstp); | 2012 | resp->rqstp); |
1849 | |||
1850 | if (!nfserr) | 2013 | if (!nfserr) |
1851 | resp->p += buflen; | 2014 | resp->p += buflen; |
1852 | return nfserr; | 2015 | return nfserr; |
@@ -2040,7 +2203,8 @@ nfsd4_encode_open_downgrade(struct nfsd4_compoundres *resp, int nfserr, struct n | |||
2040 | } | 2203 | } |
2041 | 2204 | ||
2042 | static int | 2205 | static int |
2043 | nfsd4_encode_read(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_read *read) | 2206 | nfsd4_encode_read(struct nfsd4_compoundres *resp, int nfserr, |
2207 | struct nfsd4_read *read) | ||
2044 | { | 2208 | { |
2045 | u32 eof; | 2209 | u32 eof; |
2046 | int v, pn; | 2210 | int v, pn; |
@@ -2055,31 +2219,33 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_read | |||
2055 | 2219 | ||
2056 | RESERVE_SPACE(8); /* eof flag and byte count */ | 2220 | RESERVE_SPACE(8); /* eof flag and byte count */ |
2057 | 2221 | ||
2058 | maxcount = NFSSVC_MAXBLKSIZE; | 2222 | maxcount = svc_max_payload(resp->rqstp); |
2059 | if (maxcount > read->rd_length) | 2223 | if (maxcount > read->rd_length) |
2060 | maxcount = read->rd_length; | 2224 | maxcount = read->rd_length; |
2061 | 2225 | ||
2062 | len = maxcount; | 2226 | len = maxcount; |
2063 | v = 0; | 2227 | v = 0; |
2064 | while (len > 0) { | 2228 | while (len > 0) { |
2065 | pn = resp->rqstp->rq_resused; | 2229 | pn = resp->rqstp->rq_resused++; |
2066 | svc_take_page(resp->rqstp); | 2230 | resp->rqstp->rq_vec[v].iov_base = |
2067 | read->rd_iov[v].iov_base = page_address(resp->rqstp->rq_respages[pn]); | 2231 | page_address(resp->rqstp->rq_respages[pn]); |
2068 | read->rd_iov[v].iov_len = len < PAGE_SIZE ? len : PAGE_SIZE; | 2232 | resp->rqstp->rq_vec[v].iov_len = |
2233 | len < PAGE_SIZE ? len : PAGE_SIZE; | ||
2069 | v++; | 2234 | v++; |
2070 | len -= PAGE_SIZE; | 2235 | len -= PAGE_SIZE; |
2071 | } | 2236 | } |
2072 | read->rd_vlen = v; | 2237 | read->rd_vlen = v; |
2073 | 2238 | ||
2074 | nfserr = nfsd_read(read->rd_rqstp, read->rd_fhp, read->rd_filp, | 2239 | nfserr = nfsd_read(read->rd_rqstp, read->rd_fhp, read->rd_filp, |
2075 | read->rd_offset, read->rd_iov, read->rd_vlen, | 2240 | read->rd_offset, resp->rqstp->rq_vec, read->rd_vlen, |
2076 | &maxcount); | 2241 | &maxcount); |
2077 | 2242 | ||
2078 | if (nfserr == nfserr_symlink) | 2243 | if (nfserr == nfserr_symlink) |
2079 | nfserr = nfserr_inval; | 2244 | nfserr = nfserr_inval; |
2080 | if (nfserr) | 2245 | if (nfserr) |
2081 | return nfserr; | 2246 | return nfserr; |
2082 | eof = (read->rd_offset + maxcount >= read->rd_fhp->fh_dentry->d_inode->i_size); | 2247 | eof = (read->rd_offset + maxcount >= |
2248 | read->rd_fhp->fh_dentry->d_inode->i_size); | ||
2083 | 2249 | ||
2084 | WRITE32(eof); | 2250 | WRITE32(eof); |
2085 | WRITE32(maxcount); | 2251 | WRITE32(maxcount); |
@@ -2089,7 +2255,6 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_read | |||
2089 | resp->xbuf->page_len = maxcount; | 2255 | resp->xbuf->page_len = maxcount; |
2090 | 2256 | ||
2091 | /* Use rest of head for padding and remaining ops: */ | 2257 | /* Use rest of head for padding and remaining ops: */ |
2092 | resp->rqstp->rq_restailpage = 0; | ||
2093 | resp->xbuf->tail[0].iov_base = p; | 2258 | resp->xbuf->tail[0].iov_base = p; |
2094 | resp->xbuf->tail[0].iov_len = 0; | 2259 | resp->xbuf->tail[0].iov_len = 0; |
2095 | if (maxcount&3) { | 2260 | if (maxcount&3) { |
@@ -2114,8 +2279,7 @@ nfsd4_encode_readlink(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_r | |||
2114 | if (resp->xbuf->page_len) | 2279 | if (resp->xbuf->page_len) |
2115 | return nfserr_resource; | 2280 | return nfserr_resource; |
2116 | 2281 | ||
2117 | svc_take_page(resp->rqstp); | 2282 | page = page_address(resp->rqstp->rq_respages[resp->rqstp->rq_resused++]); |
2118 | page = page_address(resp->rqstp->rq_respages[resp->rqstp->rq_resused-1]); | ||
2119 | 2283 | ||
2120 | maxcount = PAGE_SIZE; | 2284 | maxcount = PAGE_SIZE; |
2121 | RESERVE_SPACE(4); | 2285 | RESERVE_SPACE(4); |
@@ -2139,7 +2303,6 @@ nfsd4_encode_readlink(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_r | |||
2139 | resp->xbuf->page_len = maxcount; | 2303 | resp->xbuf->page_len = maxcount; |
2140 | 2304 | ||
2141 | /* Use rest of head for padding and remaining ops: */ | 2305 | /* Use rest of head for padding and remaining ops: */ |
2142 | resp->rqstp->rq_restailpage = 0; | ||
2143 | resp->xbuf->tail[0].iov_base = p; | 2306 | resp->xbuf->tail[0].iov_base = p; |
2144 | resp->xbuf->tail[0].iov_len = 0; | 2307 | resp->xbuf->tail[0].iov_len = 0; |
2145 | if (maxcount&3) { | 2308 | if (maxcount&3) { |
@@ -2190,8 +2353,7 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_re | |||
2190 | goto err_no_verf; | 2353 | goto err_no_verf; |
2191 | } | 2354 | } |
2192 | 2355 | ||
2193 | svc_take_page(resp->rqstp); | 2356 | page = page_address(resp->rqstp->rq_respages[resp->rqstp->rq_resused++]); |
2194 | page = page_address(resp->rqstp->rq_respages[resp->rqstp->rq_resused-1]); | ||
2195 | readdir->common.err = 0; | 2357 | readdir->common.err = 0; |
2196 | readdir->buflen = maxcount; | 2358 | readdir->buflen = maxcount; |
2197 | readdir->buffer = page; | 2359 | readdir->buffer = page; |
@@ -2216,10 +2378,10 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_re | |||
2216 | p = readdir->buffer; | 2378 | p = readdir->buffer; |
2217 | *p++ = 0; /* no more entries */ | 2379 | *p++ = 0; /* no more entries */ |
2218 | *p++ = htonl(readdir->common.err == nfserr_eof); | 2380 | *p++ = htonl(readdir->common.err == nfserr_eof); |
2219 | resp->xbuf->page_len = ((char*)p) - (char*)page_address(resp->rqstp->rq_respages[resp->rqstp->rq_resused-1]); | 2381 | resp->xbuf->page_len = ((char*)p) - (char*)page_address( |
2382 | resp->rqstp->rq_respages[resp->rqstp->rq_resused-1]); | ||
2220 | 2383 | ||
2221 | /* Use rest of head for padding and remaining ops: */ | 2384 | /* Use rest of head for padding and remaining ops: */ |
2222 | resp->rqstp->rq_restailpage = 0; | ||
2223 | resp->xbuf->tail[0].iov_base = tailbase; | 2385 | resp->xbuf->tail[0].iov_base = tailbase; |
2224 | resp->xbuf->tail[0].iov_len = 0; | 2386 | resp->xbuf->tail[0].iov_len = 0; |
2225 | resp->p = resp->xbuf->tail[0].iov_base; | 2387 | resp->p = resp->xbuf->tail[0].iov_base; |
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 7046ac9cf97f..39aed901514b 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c | |||
@@ -23,10 +23,14 @@ | |||
23 | #include <linux/pagemap.h> | 23 | #include <linux/pagemap.h> |
24 | #include <linux/init.h> | 24 | #include <linux/init.h> |
25 | #include <linux/string.h> | 25 | #include <linux/string.h> |
26 | #include <linux/smp_lock.h> | ||
27 | #include <linux/ctype.h> | ||
26 | 28 | ||
27 | #include <linux/nfs.h> | 29 | #include <linux/nfs.h> |
28 | #include <linux/nfsd_idmap.h> | 30 | #include <linux/nfsd_idmap.h> |
31 | #include <linux/lockd/bind.h> | ||
29 | #include <linux/sunrpc/svc.h> | 32 | #include <linux/sunrpc/svc.h> |
33 | #include <linux/sunrpc/svcsock.h> | ||
30 | #include <linux/nfsd/nfsd.h> | 34 | #include <linux/nfsd/nfsd.h> |
31 | #include <linux/nfsd/cache.h> | 35 | #include <linux/nfsd/cache.h> |
32 | #include <linux/nfsd/xdr.h> | 36 | #include <linux/nfsd/xdr.h> |
@@ -35,8 +39,6 @@ | |||
35 | 39 | ||
36 | #include <asm/uaccess.h> | 40 | #include <asm/uaccess.h> |
37 | 41 | ||
38 | unsigned int nfsd_versbits = ~0; | ||
39 | |||
40 | /* | 42 | /* |
41 | * We have a single directory with 9 nodes in it. | 43 | * We have a single directory with 9 nodes in it. |
42 | */ | 44 | */ |
@@ -52,7 +54,10 @@ enum { | |||
52 | NFSD_List, | 54 | NFSD_List, |
53 | NFSD_Fh, | 55 | NFSD_Fh, |
54 | NFSD_Threads, | 56 | NFSD_Threads, |
57 | NFSD_Pool_Threads, | ||
55 | NFSD_Versions, | 58 | NFSD_Versions, |
59 | NFSD_Ports, | ||
60 | NFSD_MaxBlkSize, | ||
56 | /* | 61 | /* |
57 | * The below MUST come last. Otherwise we leave a hole in nfsd_files[] | 62 | * The below MUST come last. Otherwise we leave a hole in nfsd_files[] |
58 | * with !CONFIG_NFSD_V4 and simple_fill_super() goes oops | 63 | * with !CONFIG_NFSD_V4 and simple_fill_super() goes oops |
@@ -75,7 +80,10 @@ static ssize_t write_getfd(struct file *file, char *buf, size_t size); | |||
75 | static ssize_t write_getfs(struct file *file, char *buf, size_t size); | 80 | static ssize_t write_getfs(struct file *file, char *buf, size_t size); |
76 | static ssize_t write_filehandle(struct file *file, char *buf, size_t size); | 81 | static ssize_t write_filehandle(struct file *file, char *buf, size_t size); |
77 | static ssize_t write_threads(struct file *file, char *buf, size_t size); | 82 | static ssize_t write_threads(struct file *file, char *buf, size_t size); |
83 | static ssize_t write_pool_threads(struct file *file, char *buf, size_t size); | ||
78 | static ssize_t write_versions(struct file *file, char *buf, size_t size); | 84 | static ssize_t write_versions(struct file *file, char *buf, size_t size); |
85 | static ssize_t write_ports(struct file *file, char *buf, size_t size); | ||
86 | static ssize_t write_maxblksize(struct file *file, char *buf, size_t size); | ||
79 | #ifdef CONFIG_NFSD_V4 | 87 | #ifdef CONFIG_NFSD_V4 |
80 | static ssize_t write_leasetime(struct file *file, char *buf, size_t size); | 88 | static ssize_t write_leasetime(struct file *file, char *buf, size_t size); |
81 | static ssize_t write_recoverydir(struct file *file, char *buf, size_t size); | 89 | static ssize_t write_recoverydir(struct file *file, char *buf, size_t size); |
@@ -91,7 +99,10 @@ static ssize_t (*write_op[])(struct file *, char *, size_t) = { | |||
91 | [NFSD_Getfs] = write_getfs, | 99 | [NFSD_Getfs] = write_getfs, |
92 | [NFSD_Fh] = write_filehandle, | 100 | [NFSD_Fh] = write_filehandle, |
93 | [NFSD_Threads] = write_threads, | 101 | [NFSD_Threads] = write_threads, |
102 | [NFSD_Pool_Threads] = write_pool_threads, | ||
94 | [NFSD_Versions] = write_versions, | 103 | [NFSD_Versions] = write_versions, |
104 | [NFSD_Ports] = write_ports, | ||
105 | [NFSD_MaxBlkSize] = write_maxblksize, | ||
95 | #ifdef CONFIG_NFSD_V4 | 106 | #ifdef CONFIG_NFSD_V4 |
96 | [NFSD_Leasetime] = write_leasetime, | 107 | [NFSD_Leasetime] = write_leasetime, |
97 | [NFSD_RecoveryDir] = write_recoverydir, | 108 | [NFSD_RecoveryDir] = write_recoverydir, |
@@ -358,6 +369,72 @@ static ssize_t write_threads(struct file *file, char *buf, size_t size) | |||
358 | return strlen(buf); | 369 | return strlen(buf); |
359 | } | 370 | } |
360 | 371 | ||
372 | extern int nfsd_nrpools(void); | ||
373 | extern int nfsd_get_nrthreads(int n, int *); | ||
374 | extern int nfsd_set_nrthreads(int n, int *); | ||
375 | |||
376 | static ssize_t write_pool_threads(struct file *file, char *buf, size_t size) | ||
377 | { | ||
378 | /* if size > 0, look for an array of number of threads per node | ||
379 | * and apply them then write out number of threads per node as reply | ||
380 | */ | ||
381 | char *mesg = buf; | ||
382 | int i; | ||
383 | int rv; | ||
384 | int len; | ||
385 | int npools = nfsd_nrpools(); | ||
386 | int *nthreads; | ||
387 | |||
388 | if (npools == 0) { | ||
389 | /* | ||
390 | * NFS is shut down. The admin can start it by | ||
391 | * writing to the threads file but NOT the pool_threads | ||
392 | * file, sorry. Report zero threads. | ||
393 | */ | ||
394 | strcpy(buf, "0\n"); | ||
395 | return strlen(buf); | ||
396 | } | ||
397 | |||
398 | nthreads = kcalloc(npools, sizeof(int), GFP_KERNEL); | ||
399 | if (nthreads == NULL) | ||
400 | return -ENOMEM; | ||
401 | |||
402 | if (size > 0) { | ||
403 | for (i = 0; i < npools; i++) { | ||
404 | rv = get_int(&mesg, &nthreads[i]); | ||
405 | if (rv == -ENOENT) | ||
406 | break; /* fewer numbers than pools */ | ||
407 | if (rv) | ||
408 | goto out_free; /* syntax error */ | ||
409 | rv = -EINVAL; | ||
410 | if (nthreads[i] < 0) | ||
411 | goto out_free; | ||
412 | } | ||
413 | rv = nfsd_set_nrthreads(i, nthreads); | ||
414 | if (rv) | ||
415 | goto out_free; | ||
416 | } | ||
417 | |||
418 | rv = nfsd_get_nrthreads(npools, nthreads); | ||
419 | if (rv) | ||
420 | goto out_free; | ||
421 | |||
422 | mesg = buf; | ||
423 | size = SIMPLE_TRANSACTION_LIMIT; | ||
424 | for (i = 0; i < npools && size > 0; i++) { | ||
425 | snprintf(mesg, size, "%d%c", nthreads[i], (i == npools-1 ? '\n' : ' ')); | ||
426 | len = strlen(mesg); | ||
427 | size -= len; | ||
428 | mesg += len; | ||
429 | } | ||
430 | |||
431 | return (mesg-buf); | ||
432 | |||
433 | out_free: | ||
434 | kfree(nthreads); | ||
435 | return rv; | ||
436 | } | ||
437 | |||
361 | static ssize_t write_versions(struct file *file, char *buf, size_t size) | 438 | static ssize_t write_versions(struct file *file, char *buf, size_t size) |
362 | { | 439 | { |
363 | /* | 440 | /* |
@@ -372,6 +449,10 @@ static ssize_t write_versions(struct file *file, char *buf, size_t size) | |||
372 | 449 | ||
373 | if (size>0) { | 450 | if (size>0) { |
374 | if (nfsd_serv) | 451 | if (nfsd_serv) |
452 | /* Cannot change versions without updating | ||
453 | * nfsd_serv->sv_xdrsize, and reallocing | ||
454 | * rq_argp and rq_resp | ||
455 | */ | ||
375 | return -EBUSY; | 456 | return -EBUSY; |
376 | if (buf[size-1] != '\n') | 457 | if (buf[size-1] != '\n') |
377 | return -EINVAL; | 458 | return -EINVAL; |
@@ -390,10 +471,7 @@ static ssize_t write_versions(struct file *file, char *buf, size_t size) | |||
390 | case 2: | 471 | case 2: |
391 | case 3: | 472 | case 3: |
392 | case 4: | 473 | case 4: |
393 | if (sign != '-') | 474 | nfsd_vers(num, sign == '-' ? NFSD_CLEAR : NFSD_SET); |
394 | NFSCTL_VERSET(nfsd_versbits, num); | ||
395 | else | ||
396 | NFSCTL_VERUNSET(nfsd_versbits, num); | ||
397 | break; | 475 | break; |
398 | default: | 476 | default: |
399 | return -EINVAL; | 477 | return -EINVAL; |
@@ -404,16 +482,15 @@ static ssize_t write_versions(struct file *file, char *buf, size_t size) | |||
404 | /* If all get turned off, turn them back on, as | 482 | /* If all get turned off, turn them back on, as |
405 | * having no versions is BAD | 483 | * having no versions is BAD |
406 | */ | 484 | */ |
407 | if ((nfsd_versbits & NFSCTL_VERALL)==0) | 485 | nfsd_reset_versions(); |
408 | nfsd_versbits = NFSCTL_VERALL; | ||
409 | } | 486 | } |
410 | /* Now write current state into reply buffer */ | 487 | /* Now write current state into reply buffer */ |
411 | len = 0; | 488 | len = 0; |
412 | sep = ""; | 489 | sep = ""; |
413 | for (num=2 ; num <= 4 ; num++) | 490 | for (num=2 ; num <= 4 ; num++) |
414 | if (NFSCTL_VERISSET(NFSCTL_VERALL, num)) { | 491 | if (nfsd_vers(num, NFSD_AVAIL)) { |
415 | len += sprintf(buf+len, "%s%c%d", sep, | 492 | len += sprintf(buf+len, "%s%c%d", sep, |
416 | NFSCTL_VERISSET(nfsd_versbits, num)?'+':'-', | 493 | nfsd_vers(num, NFSD_TEST)?'+':'-', |
417 | num); | 494 | num); |
418 | sep = " "; | 495 | sep = " "; |
419 | } | 496 | } |
@@ -421,6 +498,95 @@ static ssize_t write_versions(struct file *file, char *buf, size_t size) | |||
421 | return len; | 498 | return len; |
422 | } | 499 | } |
423 | 500 | ||
501 | static ssize_t write_ports(struct file *file, char *buf, size_t size) | ||
502 | { | ||
503 | if (size == 0) { | ||
504 | int len = 0; | ||
505 | lock_kernel(); | ||
506 | if (nfsd_serv) | ||
507 | len = svc_sock_names(buf, nfsd_serv, NULL); | ||
508 | unlock_kernel(); | ||
509 | return len; | ||
510 | } | ||
511 | /* Either a single 'fd' number is written, in which | ||
512 | * case it must be for a socket of a supported family/protocol, | ||
513 | * and we use it as an nfsd socket, or | ||
514 | * A '-' followed by the 'name' of a socket in which case | ||
515 | * we close the socket. | ||
516 | */ | ||
517 | if (isdigit(buf[0])) { | ||
518 | char *mesg = buf; | ||
519 | int fd; | ||
520 | int err; | ||
521 | err = get_int(&mesg, &fd); | ||
522 | if (err) | ||
523 | return -EINVAL; | ||
524 | if (fd < 0) | ||
525 | return -EINVAL; | ||
526 | err = nfsd_create_serv(); | ||
527 | if (!err) { | ||
528 | int proto = 0; | ||
529 | err = svc_addsock(nfsd_serv, fd, buf, &proto); | ||
530 | if (err >= 0) { | ||
531 | err = lockd_up(proto); | ||
532 | if (err < 0) | ||
533 | svc_sock_names(buf+strlen(buf)+1, nfsd_serv, buf); | ||
534 | } | ||
535 | /* Decrease the count, but don't shutdown the | ||
536 | * the service | ||
537 | */ | ||
538 | lock_kernel(); | ||
539 | nfsd_serv->sv_nrthreads--; | ||
540 | unlock_kernel(); | ||
541 | } | ||
542 | return err < 0 ? err : 0; | ||
543 | } | ||
544 | if (buf[0] == '-') { | ||
545 | char *toclose = kstrdup(buf+1, GFP_KERNEL); | ||
546 | int len = 0; | ||
547 | if (!toclose) | ||
548 | return -ENOMEM; | ||
549 | lock_kernel(); | ||
550 | if (nfsd_serv) | ||
551 | len = svc_sock_names(buf, nfsd_serv, toclose); | ||
552 | unlock_kernel(); | ||
553 | if (len >= 0) | ||
554 | lockd_down(); | ||
555 | kfree(toclose); | ||
556 | return len; | ||
557 | } | ||
558 | return -EINVAL; | ||
559 | } | ||
560 | |||
561 | int nfsd_max_blksize; | ||
562 | |||
563 | static ssize_t write_maxblksize(struct file *file, char *buf, size_t size) | ||
564 | { | ||
565 | char *mesg = buf; | ||
566 | if (size > 0) { | ||
567 | int bsize; | ||
568 | int rv = get_int(&mesg, &bsize); | ||
569 | if (rv) | ||
570 | return rv; | ||
571 | /* force bsize into allowed range and | ||
572 | * required alignment. | ||
573 | */ | ||
574 | if (bsize < 1024) | ||
575 | bsize = 1024; | ||
576 | if (bsize > NFSSVC_MAXBLKSIZE) | ||
577 | bsize = NFSSVC_MAXBLKSIZE; | ||
578 | bsize &= ~(1024-1); | ||
579 | lock_kernel(); | ||
580 | if (nfsd_serv && nfsd_serv->sv_nrthreads) { | ||
581 | unlock_kernel(); | ||
582 | return -EBUSY; | ||
583 | } | ||
584 | nfsd_max_blksize = bsize; | ||
585 | unlock_kernel(); | ||
586 | } | ||
587 | return sprintf(buf, "%d\n", nfsd_max_blksize); | ||
588 | } | ||
589 | |||
424 | #ifdef CONFIG_NFSD_V4 | 590 | #ifdef CONFIG_NFSD_V4 |
425 | extern time_t nfs4_leasetime(void); | 591 | extern time_t nfs4_leasetime(void); |
426 | 592 | ||
@@ -483,7 +649,10 @@ static int nfsd_fill_super(struct super_block * sb, void * data, int silent) | |||
483 | [NFSD_List] = {"exports", &exports_operations, S_IRUGO}, | 649 | [NFSD_List] = {"exports", &exports_operations, S_IRUGO}, |
484 | [NFSD_Fh] = {"filehandle", &transaction_ops, S_IWUSR|S_IRUSR}, | 650 | [NFSD_Fh] = {"filehandle", &transaction_ops, S_IWUSR|S_IRUSR}, |
485 | [NFSD_Threads] = {"threads", &transaction_ops, S_IWUSR|S_IRUSR}, | 651 | [NFSD_Threads] = {"threads", &transaction_ops, S_IWUSR|S_IRUSR}, |
652 | [NFSD_Pool_Threads] = {"pool_threads", &transaction_ops, S_IWUSR|S_IRUSR}, | ||
486 | [NFSD_Versions] = {"versions", &transaction_ops, S_IWUSR|S_IRUSR}, | 653 | [NFSD_Versions] = {"versions", &transaction_ops, S_IWUSR|S_IRUSR}, |
654 | [NFSD_Ports] = {"portlist", &transaction_ops, S_IWUSR|S_IRUGO}, | ||
655 | [NFSD_MaxBlkSize] = {"max_block_size", &transaction_ops, S_IWUSR|S_IRUGO}, | ||
487 | #ifdef CONFIG_NFSD_V4 | 656 | #ifdef CONFIG_NFSD_V4 |
488 | [NFSD_Leasetime] = {"nfsv4leasetime", &transaction_ops, S_IWUSR|S_IRUSR}, | 657 | [NFSD_Leasetime] = {"nfsv4leasetime", &transaction_ops, S_IWUSR|S_IRUSR}, |
489 | [NFSD_RecoveryDir] = {"nfsv4recoverydir", &transaction_ops, S_IWUSR|S_IRUSR}, | 658 | [NFSD_RecoveryDir] = {"nfsv4recoverydir", &transaction_ops, S_IWUSR|S_IRUSR}, |
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c index 06cd0db0f32b..9ee1dab5d44a 100644 --- a/fs/nfsd/nfsproc.c +++ b/fs/nfsd/nfsproc.c | |||
@@ -146,20 +146,20 @@ nfsd_proc_read(struct svc_rqst *rqstp, struct nfsd_readargs *argp, | |||
146 | * status, 17 words for fattr, and 1 word for the byte count. | 146 | * status, 17 words for fattr, and 1 word for the byte count. |
147 | */ | 147 | */ |
148 | 148 | ||
149 | if (NFSSVC_MAXBLKSIZE < argp->count) { | 149 | if (NFSSVC_MAXBLKSIZE_V2 < argp->count) { |
150 | printk(KERN_NOTICE | 150 | printk(KERN_NOTICE |
151 | "oversized read request from %u.%u.%u.%u:%d (%d bytes)\n", | 151 | "oversized read request from %u.%u.%u.%u:%d (%d bytes)\n", |
152 | NIPQUAD(rqstp->rq_addr.sin_addr.s_addr), | 152 | NIPQUAD(rqstp->rq_addr.sin_addr.s_addr), |
153 | ntohs(rqstp->rq_addr.sin_port), | 153 | ntohs(rqstp->rq_addr.sin_port), |
154 | argp->count); | 154 | argp->count); |
155 | argp->count = NFSSVC_MAXBLKSIZE; | 155 | argp->count = NFSSVC_MAXBLKSIZE_V2; |
156 | } | 156 | } |
157 | svc_reserve(rqstp, (19<<2) + argp->count + 4); | 157 | svc_reserve(rqstp, (19<<2) + argp->count + 4); |
158 | 158 | ||
159 | resp->count = argp->count; | 159 | resp->count = argp->count; |
160 | nfserr = nfsd_read(rqstp, fh_copy(&resp->fh, &argp->fh), NULL, | 160 | nfserr = nfsd_read(rqstp, fh_copy(&resp->fh, &argp->fh), NULL, |
161 | argp->offset, | 161 | argp->offset, |
162 | argp->vec, argp->vlen, | 162 | rqstp->rq_vec, argp->vlen, |
163 | &resp->count); | 163 | &resp->count); |
164 | 164 | ||
165 | if (nfserr) return nfserr; | 165 | if (nfserr) return nfserr; |
@@ -185,7 +185,7 @@ nfsd_proc_write(struct svc_rqst *rqstp, struct nfsd_writeargs *argp, | |||
185 | 185 | ||
186 | nfserr = nfsd_write(rqstp, fh_copy(&resp->fh, &argp->fh), NULL, | 186 | nfserr = nfsd_write(rqstp, fh_copy(&resp->fh, &argp->fh), NULL, |
187 | argp->offset, | 187 | argp->offset, |
188 | argp->vec, argp->vlen, | 188 | rqstp->rq_vec, argp->vlen, |
189 | argp->len, | 189 | argp->len, |
190 | &stable); | 190 | &stable); |
191 | return nfsd_return_attrs(nfserr, resp); | 191 | return nfsd_return_attrs(nfserr, resp); |
@@ -225,7 +225,7 @@ nfsd_proc_create(struct svc_rqst *rqstp, struct nfsd_createargs *argp, | |||
225 | nfserr = nfserr_exist; | 225 | nfserr = nfserr_exist; |
226 | if (isdotent(argp->name, argp->len)) | 226 | if (isdotent(argp->name, argp->len)) |
227 | goto done; | 227 | goto done; |
228 | fh_lock(dirfhp); | 228 | fh_lock_nested(dirfhp, I_MUTEX_PARENT); |
229 | dchild = lookup_one_len(argp->name, dirfhp->fh_dentry, argp->len); | 229 | dchild = lookup_one_len(argp->name, dirfhp->fh_dentry, argp->len); |
230 | if (IS_ERR(dchild)) { | 230 | if (IS_ERR(dchild)) { |
231 | nfserr = nfserrno(PTR_ERR(dchild)); | 231 | nfserr = nfserrno(PTR_ERR(dchild)); |
@@ -553,7 +553,7 @@ static struct svc_procedure nfsd_procedures2[18] = { | |||
553 | PROC(none, void, void, none, RC_NOCACHE, ST), | 553 | PROC(none, void, void, none, RC_NOCACHE, ST), |
554 | PROC(lookup, diropargs, diropres, fhandle, RC_NOCACHE, ST+FH+AT), | 554 | PROC(lookup, diropargs, diropres, fhandle, RC_NOCACHE, ST+FH+AT), |
555 | PROC(readlink, readlinkargs, readlinkres, none, RC_NOCACHE, ST+1+NFS_MAXPATHLEN/4), | 555 | PROC(readlink, readlinkargs, readlinkres, none, RC_NOCACHE, ST+1+NFS_MAXPATHLEN/4), |
556 | PROC(read, readargs, readres, fhandle, RC_NOCACHE, ST+AT+1+NFSSVC_MAXBLKSIZE/4), | 556 | PROC(read, readargs, readres, fhandle, RC_NOCACHE, ST+AT+1+NFSSVC_MAXBLKSIZE_V2/4), |
557 | PROC(none, void, void, none, RC_NOCACHE, ST), | 557 | PROC(none, void, void, none, RC_NOCACHE, ST), |
558 | PROC(write, writeargs, attrstat, fhandle, RC_REPLBUFF, ST+AT), | 558 | PROC(write, writeargs, attrstat, fhandle, RC_REPLBUFF, ST+AT), |
559 | PROC(create, createargs, diropres, fhandle, RC_REPLBUFF, ST+FH+AT), | 559 | PROC(create, createargs, diropres, fhandle, RC_REPLBUFF, ST+FH+AT), |
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index ec1decf29bab..6fa6340a5fb8 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c | |||
@@ -57,12 +57,6 @@ static atomic_t nfsd_busy; | |||
57 | static unsigned long nfsd_last_call; | 57 | static unsigned long nfsd_last_call; |
58 | static DEFINE_SPINLOCK(nfsd_call_lock); | 58 | static DEFINE_SPINLOCK(nfsd_call_lock); |
59 | 59 | ||
60 | struct nfsd_list { | ||
61 | struct list_head list; | ||
62 | struct task_struct *task; | ||
63 | }; | ||
64 | static struct list_head nfsd_list = LIST_HEAD_INIT(nfsd_list); | ||
65 | |||
66 | #if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) | 60 | #if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) |
67 | static struct svc_stat nfsd_acl_svcstats; | 61 | static struct svc_stat nfsd_acl_svcstats; |
68 | static struct svc_version * nfsd_acl_version[] = { | 62 | static struct svc_version * nfsd_acl_version[] = { |
@@ -117,6 +111,32 @@ struct svc_program nfsd_program = { | |||
117 | 111 | ||
118 | }; | 112 | }; |
119 | 113 | ||
114 | int nfsd_vers(int vers, enum vers_op change) | ||
115 | { | ||
116 | if (vers < NFSD_MINVERS || vers >= NFSD_NRVERS) | ||
117 | return -1; | ||
118 | switch(change) { | ||
119 | case NFSD_SET: | ||
120 | nfsd_versions[vers] = nfsd_version[vers]; | ||
121 | break; | ||
122 | #if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) | ||
123 | if (vers < NFSD_ACL_NRVERS) | ||
124 | nfsd_acl_version[vers] = nfsd_acl_version[vers]; | ||
125 | #endif | ||
126 | case NFSD_CLEAR: | ||
127 | nfsd_versions[vers] = NULL; | ||
128 | #if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) | ||
129 | if (vers < NFSD_ACL_NRVERS) | ||
130 | nfsd_acl_version[vers] = NULL; | ||
131 | #endif | ||
132 | break; | ||
133 | case NFSD_TEST: | ||
134 | return nfsd_versions[vers] != NULL; | ||
135 | case NFSD_AVAIL: | ||
136 | return nfsd_version[vers] != NULL; | ||
137 | } | ||
138 | return 0; | ||
139 | } | ||
120 | /* | 140 | /* |
121 | * Maximum number of nfsd processes | 141 | * Maximum number of nfsd processes |
122 | */ | 142 | */ |
@@ -130,16 +150,192 @@ int nfsd_nrthreads(void) | |||
130 | return nfsd_serv->sv_nrthreads; | 150 | return nfsd_serv->sv_nrthreads; |
131 | } | 151 | } |
132 | 152 | ||
153 | static int killsig; /* signal that was used to kill last nfsd */ | ||
154 | static void nfsd_last_thread(struct svc_serv *serv) | ||
155 | { | ||
156 | /* When last nfsd thread exits we need to do some clean-up */ | ||
157 | struct svc_sock *svsk; | ||
158 | list_for_each_entry(svsk, &serv->sv_permsocks, sk_list) | ||
159 | lockd_down(); | ||
160 | nfsd_serv = NULL; | ||
161 | nfsd_racache_shutdown(); | ||
162 | nfs4_state_shutdown(); | ||
163 | |||
164 | printk(KERN_WARNING "nfsd: last server has exited\n"); | ||
165 | if (killsig != SIG_NOCLEAN) { | ||
166 | printk(KERN_WARNING "nfsd: unexporting all filesystems\n"); | ||
167 | nfsd_export_flush(); | ||
168 | } | ||
169 | } | ||
170 | |||
171 | void nfsd_reset_versions(void) | ||
172 | { | ||
173 | int found_one = 0; | ||
174 | int i; | ||
175 | |||
176 | for (i = NFSD_MINVERS; i < NFSD_NRVERS; i++) { | ||
177 | if (nfsd_program.pg_vers[i]) | ||
178 | found_one = 1; | ||
179 | } | ||
180 | |||
181 | if (!found_one) { | ||
182 | for (i = NFSD_MINVERS; i < NFSD_NRVERS; i++) | ||
183 | nfsd_program.pg_vers[i] = nfsd_version[i]; | ||
184 | #if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) | ||
185 | for (i = NFSD_ACL_MINVERS; i < NFSD_ACL_NRVERS; i++) | ||
186 | nfsd_acl_program.pg_vers[i] = | ||
187 | nfsd_acl_version[i]; | ||
188 | #endif | ||
189 | } | ||
190 | } | ||
191 | |||
192 | int nfsd_create_serv(void) | ||
193 | { | ||
194 | int err = 0; | ||
195 | lock_kernel(); | ||
196 | if (nfsd_serv) { | ||
197 | svc_get(nfsd_serv); | ||
198 | unlock_kernel(); | ||
199 | return 0; | ||
200 | } | ||
201 | if (nfsd_max_blksize == 0) { | ||
202 | /* choose a suitable default */ | ||
203 | struct sysinfo i; | ||
204 | si_meminfo(&i); | ||
205 | /* Aim for 1/4096 of memory per thread | ||
206 | * This gives 1MB on 4Gig machines | ||
207 | * But only uses 32K on 128M machines. | ||
208 | * Bottom out at 8K on 32M and smaller. | ||
209 | * Of course, this is only a default. | ||
210 | */ | ||
211 | nfsd_max_blksize = NFSSVC_MAXBLKSIZE; | ||
212 | i.totalram <<= PAGE_SHIFT - 12; | ||
213 | while (nfsd_max_blksize > i.totalram && | ||
214 | nfsd_max_blksize >= 8*1024*2) | ||
215 | nfsd_max_blksize /= 2; | ||
216 | } | ||
217 | |||
218 | atomic_set(&nfsd_busy, 0); | ||
219 | nfsd_serv = svc_create_pooled(&nfsd_program, | ||
220 | NFSD_BUFSIZE - NFSSVC_MAXBLKSIZE + nfsd_max_blksize, | ||
221 | nfsd_last_thread, | ||
222 | nfsd, SIG_NOCLEAN, THIS_MODULE); | ||
223 | if (nfsd_serv == NULL) | ||
224 | err = -ENOMEM; | ||
225 | unlock_kernel(); | ||
226 | do_gettimeofday(&nfssvc_boot); /* record boot time */ | ||
227 | return err; | ||
228 | } | ||
229 | |||
230 | static int nfsd_init_socks(int port) | ||
231 | { | ||
232 | int error; | ||
233 | if (!list_empty(&nfsd_serv->sv_permsocks)) | ||
234 | return 0; | ||
235 | |||
236 | error = lockd_up(IPPROTO_UDP); | ||
237 | if (error >= 0) { | ||
238 | error = svc_makesock(nfsd_serv, IPPROTO_UDP, port); | ||
239 | if (error < 0) | ||
240 | lockd_down(); | ||
241 | } | ||
242 | if (error < 0) | ||
243 | return error; | ||
244 | |||
245 | #ifdef CONFIG_NFSD_TCP | ||
246 | error = lockd_up(IPPROTO_TCP); | ||
247 | if (error >= 0) { | ||
248 | error = svc_makesock(nfsd_serv, IPPROTO_TCP, port); | ||
249 | if (error < 0) | ||
250 | lockd_down(); | ||
251 | } | ||
252 | if (error < 0) | ||
253 | return error; | ||
254 | #endif | ||
255 | return 0; | ||
256 | } | ||
257 | |||
258 | int nfsd_nrpools(void) | ||
259 | { | ||
260 | if (nfsd_serv == NULL) | ||
261 | return 0; | ||
262 | else | ||
263 | return nfsd_serv->sv_nrpools; | ||
264 | } | ||
265 | |||
266 | int nfsd_get_nrthreads(int n, int *nthreads) | ||
267 | { | ||
268 | int i = 0; | ||
269 | |||
270 | if (nfsd_serv != NULL) { | ||
271 | for (i = 0; i < nfsd_serv->sv_nrpools && i < n; i++) | ||
272 | nthreads[i] = nfsd_serv->sv_pools[i].sp_nrthreads; | ||
273 | } | ||
274 | |||
275 | return 0; | ||
276 | } | ||
277 | |||
278 | int nfsd_set_nrthreads(int n, int *nthreads) | ||
279 | { | ||
280 | int i = 0; | ||
281 | int tot = 0; | ||
282 | int err = 0; | ||
283 | |||
284 | if (nfsd_serv == NULL || n <= 0) | ||
285 | return 0; | ||
286 | |||
287 | if (n > nfsd_serv->sv_nrpools) | ||
288 | n = nfsd_serv->sv_nrpools; | ||
289 | |||
290 | /* enforce a global maximum number of threads */ | ||
291 | tot = 0; | ||
292 | for (i = 0; i < n; i++) { | ||
293 | if (nthreads[i] > NFSD_MAXSERVS) | ||
294 | nthreads[i] = NFSD_MAXSERVS; | ||
295 | tot += nthreads[i]; | ||
296 | } | ||
297 | if (tot > NFSD_MAXSERVS) { | ||
298 | /* total too large: scale down requested numbers */ | ||
299 | for (i = 0; i < n && tot > 0; i++) { | ||
300 | int new = nthreads[i] * NFSD_MAXSERVS / tot; | ||
301 | tot -= (nthreads[i] - new); | ||
302 | nthreads[i] = new; | ||
303 | } | ||
304 | for (i = 0; i < n && tot > 0; i++) { | ||
305 | nthreads[i]--; | ||
306 | tot--; | ||
307 | } | ||
308 | } | ||
309 | |||
310 | /* | ||
311 | * There must always be a thread in pool 0; the admin | ||
312 | * can't shut down NFS completely using pool_threads. | ||
313 | */ | ||
314 | if (nthreads[0] == 0) | ||
315 | nthreads[0] = 1; | ||
316 | |||
317 | /* apply the new numbers */ | ||
318 | lock_kernel(); | ||
319 | svc_get(nfsd_serv); | ||
320 | for (i = 0; i < n; i++) { | ||
321 | err = svc_set_num_threads(nfsd_serv, &nfsd_serv->sv_pools[i], | ||
322 | nthreads[i]); | ||
323 | if (err) | ||
324 | break; | ||
325 | } | ||
326 | svc_destroy(nfsd_serv); | ||
327 | unlock_kernel(); | ||
328 | |||
329 | return err; | ||
330 | } | ||
331 | |||
133 | int | 332 | int |
134 | nfsd_svc(unsigned short port, int nrservs) | 333 | nfsd_svc(unsigned short port, int nrservs) |
135 | { | 334 | { |
136 | int error; | 335 | int error; |
137 | int none_left, found_one, i; | ||
138 | struct list_head *victim; | ||
139 | 336 | ||
140 | lock_kernel(); | 337 | lock_kernel(); |
141 | dprintk("nfsd: creating service: vers 0x%x\n", | 338 | dprintk("nfsd: creating service\n"); |
142 | nfsd_versbits); | ||
143 | error = -EINVAL; | 339 | error = -EINVAL; |
144 | if (nrservs <= 0) | 340 | if (nrservs <= 0) |
145 | nrservs = 0; | 341 | nrservs = 0; |
@@ -153,91 +349,20 @@ nfsd_svc(unsigned short port, int nrservs) | |||
153 | error = nfs4_state_start(); | 349 | error = nfs4_state_start(); |
154 | if (error<0) | 350 | if (error<0) |
155 | goto out; | 351 | goto out; |
156 | if (!nfsd_serv) { | ||
157 | /* | ||
158 | * Use the nfsd_ctlbits to define which | ||
159 | * versions that will be advertised. | ||
160 | * If nfsd_ctlbits doesn't list any version, | ||
161 | * export them all. | ||
162 | */ | ||
163 | found_one = 0; | ||
164 | |||
165 | for (i = NFSD_MINVERS; i < NFSD_NRVERS; i++) { | ||
166 | if (NFSCTL_VERISSET(nfsd_versbits, i)) { | ||
167 | nfsd_program.pg_vers[i] = nfsd_version[i]; | ||
168 | found_one = 1; | ||
169 | } else | ||
170 | nfsd_program.pg_vers[i] = NULL; | ||
171 | } | ||
172 | 352 | ||
173 | if (!found_one) { | 353 | nfsd_reset_versions(); |
174 | for (i = NFSD_MINVERS; i < NFSD_NRVERS; i++) | ||
175 | nfsd_program.pg_vers[i] = nfsd_version[i]; | ||
176 | } | ||
177 | 354 | ||
355 | error = nfsd_create_serv(); | ||
178 | 356 | ||
179 | #if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) | 357 | if (error) |
180 | found_one = 0; | 358 | goto out; |
181 | 359 | error = nfsd_init_socks(port); | |
182 | for (i = NFSD_ACL_MINVERS; i < NFSD_ACL_NRVERS; i++) { | 360 | if (error) |
183 | if (NFSCTL_VERISSET(nfsd_versbits, i)) { | 361 | goto failure; |
184 | nfsd_acl_program.pg_vers[i] = | ||
185 | nfsd_acl_version[i]; | ||
186 | found_one = 1; | ||
187 | } else | ||
188 | nfsd_acl_program.pg_vers[i] = NULL; | ||
189 | } | ||
190 | |||
191 | if (!found_one) { | ||
192 | for (i = NFSD_ACL_MINVERS; i < NFSD_ACL_NRVERS; i++) | ||
193 | nfsd_acl_program.pg_vers[i] = | ||
194 | nfsd_acl_version[i]; | ||
195 | } | ||
196 | #endif | ||
197 | |||
198 | atomic_set(&nfsd_busy, 0); | ||
199 | error = -ENOMEM; | ||
200 | nfsd_serv = svc_create(&nfsd_program, NFSD_BUFSIZE); | ||
201 | if (nfsd_serv == NULL) | ||
202 | goto out; | ||
203 | error = svc_makesock(nfsd_serv, IPPROTO_UDP, port); | ||
204 | if (error < 0) | ||
205 | goto failure; | ||
206 | 362 | ||
207 | #ifdef CONFIG_NFSD_TCP | 363 | error = svc_set_num_threads(nfsd_serv, NULL, nrservs); |
208 | error = svc_makesock(nfsd_serv, IPPROTO_TCP, port); | ||
209 | if (error < 0) | ||
210 | goto failure; | ||
211 | #endif | ||
212 | do_gettimeofday(&nfssvc_boot); /* record boot time */ | ||
213 | } else | ||
214 | nfsd_serv->sv_nrthreads++; | ||
215 | nrservs -= (nfsd_serv->sv_nrthreads-1); | ||
216 | while (nrservs > 0) { | ||
217 | nrservs--; | ||
218 | __module_get(THIS_MODULE); | ||
219 | error = svc_create_thread(nfsd, nfsd_serv); | ||
220 | if (error < 0) { | ||
221 | module_put(THIS_MODULE); | ||
222 | break; | ||
223 | } | ||
224 | } | ||
225 | victim = nfsd_list.next; | ||
226 | while (nrservs < 0 && victim != &nfsd_list) { | ||
227 | struct nfsd_list *nl = | ||
228 | list_entry(victim,struct nfsd_list, list); | ||
229 | victim = victim->next; | ||
230 | send_sig(SIG_NOCLEAN, nl->task, 1); | ||
231 | nrservs++; | ||
232 | } | ||
233 | failure: | 364 | failure: |
234 | none_left = (nfsd_serv->sv_nrthreads == 1); | ||
235 | svc_destroy(nfsd_serv); /* Release server */ | 365 | svc_destroy(nfsd_serv); /* Release server */ |
236 | if (none_left) { | ||
237 | nfsd_serv = NULL; | ||
238 | nfsd_racache_shutdown(); | ||
239 | nfs4_state_shutdown(); | ||
240 | } | ||
241 | out: | 366 | out: |
242 | unlock_kernel(); | 367 | unlock_kernel(); |
243 | return error; | 368 | return error; |
@@ -270,10 +395,8 @@ update_thread_usage(int busy_threads) | |||
270 | static void | 395 | static void |
271 | nfsd(struct svc_rqst *rqstp) | 396 | nfsd(struct svc_rqst *rqstp) |
272 | { | 397 | { |
273 | struct svc_serv *serv = rqstp->rq_server; | ||
274 | struct fs_struct *fsp; | 398 | struct fs_struct *fsp; |
275 | int err; | 399 | int err; |
276 | struct nfsd_list me; | ||
277 | sigset_t shutdown_mask, allowed_mask; | 400 | sigset_t shutdown_mask, allowed_mask; |
278 | 401 | ||
279 | /* Lock module and set up kernel thread */ | 402 | /* Lock module and set up kernel thread */ |
@@ -297,10 +420,7 @@ nfsd(struct svc_rqst *rqstp) | |||
297 | 420 | ||
298 | nfsdstats.th_cnt++; | 421 | nfsdstats.th_cnt++; |
299 | 422 | ||
300 | lockd_up(); /* start lockd */ | 423 | rqstp->rq_task = current; |
301 | |||
302 | me.task = current; | ||
303 | list_add(&me.list, &nfsd_list); | ||
304 | 424 | ||
305 | unlock_kernel(); | 425 | unlock_kernel(); |
306 | 426 | ||
@@ -322,8 +442,7 @@ nfsd(struct svc_rqst *rqstp) | |||
322 | * Find a socket with data available and call its | 442 | * Find a socket with data available and call its |
323 | * recvfrom routine. | 443 | * recvfrom routine. |
324 | */ | 444 | */ |
325 | while ((err = svc_recv(serv, rqstp, | 445 | while ((err = svc_recv(rqstp, 60*60*HZ)) == -EAGAIN) |
326 | 60*60*HZ)) == -EAGAIN) | ||
327 | ; | 446 | ; |
328 | if (err < 0) | 447 | if (err < 0) |
329 | break; | 448 | break; |
@@ -336,7 +455,7 @@ nfsd(struct svc_rqst *rqstp) | |||
336 | /* Process request with signals blocked. */ | 455 | /* Process request with signals blocked. */ |
337 | sigprocmask(SIG_SETMASK, &allowed_mask, NULL); | 456 | sigprocmask(SIG_SETMASK, &allowed_mask, NULL); |
338 | 457 | ||
339 | svc_process(serv, rqstp); | 458 | svc_process(rqstp); |
340 | 459 | ||
341 | /* Unlock export hash tables */ | 460 | /* Unlock export hash tables */ |
342 | exp_readunlock(); | 461 | exp_readunlock(); |
@@ -353,29 +472,13 @@ nfsd(struct svc_rqst *rqstp) | |||
353 | if (sigismember(¤t->pending.signal, signo) && | 472 | if (sigismember(¤t->pending.signal, signo) && |
354 | !sigismember(¤t->blocked, signo)) | 473 | !sigismember(¤t->blocked, signo)) |
355 | break; | 474 | break; |
356 | err = signo; | 475 | killsig = signo; |
357 | } | 476 | } |
358 | /* Clear signals before calling lockd_down() and svc_exit_thread() */ | 477 | /* Clear signals before calling svc_exit_thread() */ |
359 | flush_signals(current); | 478 | flush_signals(current); |
360 | 479 | ||
361 | lock_kernel(); | 480 | lock_kernel(); |
362 | 481 | ||
363 | /* Release lockd */ | ||
364 | lockd_down(); | ||
365 | |||
366 | /* Check if this is last thread */ | ||
367 | if (serv->sv_nrthreads==1) { | ||
368 | |||
369 | printk(KERN_WARNING "nfsd: last server has exited\n"); | ||
370 | if (err != SIG_NOCLEAN) { | ||
371 | printk(KERN_WARNING "nfsd: unexporting all filesystems\n"); | ||
372 | nfsd_export_flush(); | ||
373 | } | ||
374 | nfsd_serv = NULL; | ||
375 | nfsd_racache_shutdown(); /* release read-ahead cache */ | ||
376 | nfs4_state_shutdown(); | ||
377 | } | ||
378 | list_del(&me.list); | ||
379 | nfsdstats.th_cnt --; | 482 | nfsdstats.th_cnt --; |
380 | 483 | ||
381 | out: | 484 | out: |
diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c index e3a0797dd56b..1135c0d14557 100644 --- a/fs/nfsd/nfsxdr.c +++ b/fs/nfsd/nfsxdr.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * linux/fs/nfsd/xdr.c | 2 | * linux/fs/nfsd/nfsxdr.c |
3 | * | 3 | * |
4 | * XDR support for nfsd | 4 | * XDR support for nfsd |
5 | * | 5 | * |
@@ -254,19 +254,18 @@ nfssvc_decode_readargs(struct svc_rqst *rqstp, u32 *p, | |||
254 | len = args->count = ntohl(*p++); | 254 | len = args->count = ntohl(*p++); |
255 | p++; /* totalcount - unused */ | 255 | p++; /* totalcount - unused */ |
256 | 256 | ||
257 | if (len > NFSSVC_MAXBLKSIZE) | 257 | if (len > NFSSVC_MAXBLKSIZE_V2) |
258 | len = NFSSVC_MAXBLKSIZE; | 258 | len = NFSSVC_MAXBLKSIZE_V2; |
259 | 259 | ||
260 | /* set up somewhere to store response. | 260 | /* set up somewhere to store response. |
261 | * We take pages, put them on reslist and include in iovec | 261 | * We take pages, put them on reslist and include in iovec |
262 | */ | 262 | */ |
263 | v=0; | 263 | v=0; |
264 | while (len > 0) { | 264 | while (len > 0) { |
265 | pn=rqstp->rq_resused; | 265 | pn = rqstp->rq_resused++; |
266 | svc_take_page(rqstp); | 266 | rqstp->rq_vec[v].iov_base = page_address(rqstp->rq_respages[pn]); |
267 | args->vec[v].iov_base = page_address(rqstp->rq_respages[pn]); | 267 | rqstp->rq_vec[v].iov_len = len < PAGE_SIZE?len:PAGE_SIZE; |
268 | args->vec[v].iov_len = len < PAGE_SIZE?len:PAGE_SIZE; | 268 | len -= rqstp->rq_vec[v].iov_len; |
269 | len -= args->vec[v].iov_len; | ||
270 | v++; | 269 | v++; |
271 | } | 270 | } |
272 | args->vlen = v; | 271 | args->vlen = v; |
@@ -286,21 +285,21 @@ nfssvc_decode_writeargs(struct svc_rqst *rqstp, u32 *p, | |||
286 | args->offset = ntohl(*p++); /* offset */ | 285 | args->offset = ntohl(*p++); /* offset */ |
287 | p++; /* totalcount */ | 286 | p++; /* totalcount */ |
288 | len = args->len = ntohl(*p++); | 287 | len = args->len = ntohl(*p++); |
289 | args->vec[0].iov_base = (void*)p; | 288 | rqstp->rq_vec[0].iov_base = (void*)p; |
290 | args->vec[0].iov_len = rqstp->rq_arg.head[0].iov_len - | 289 | rqstp->rq_vec[0].iov_len = rqstp->rq_arg.head[0].iov_len - |
291 | (((void*)p) - rqstp->rq_arg.head[0].iov_base); | 290 | (((void*)p) - rqstp->rq_arg.head[0].iov_base); |
292 | if (len > NFSSVC_MAXBLKSIZE) | 291 | if (len > NFSSVC_MAXBLKSIZE_V2) |
293 | len = NFSSVC_MAXBLKSIZE; | 292 | len = NFSSVC_MAXBLKSIZE_V2; |
294 | v = 0; | 293 | v = 0; |
295 | while (len > args->vec[v].iov_len) { | 294 | while (len > rqstp->rq_vec[v].iov_len) { |
296 | len -= args->vec[v].iov_len; | 295 | len -= rqstp->rq_vec[v].iov_len; |
297 | v++; | 296 | v++; |
298 | args->vec[v].iov_base = page_address(rqstp->rq_argpages[v]); | 297 | rqstp->rq_vec[v].iov_base = page_address(rqstp->rq_pages[v]); |
299 | args->vec[v].iov_len = PAGE_SIZE; | 298 | rqstp->rq_vec[v].iov_len = PAGE_SIZE; |
300 | } | 299 | } |
301 | args->vec[v].iov_len = len; | 300 | rqstp->rq_vec[v].iov_len = len; |
302 | args->vlen = v+1; | 301 | args->vlen = v+1; |
303 | return args->vec[0].iov_len > 0; | 302 | return rqstp->rq_vec[0].iov_len > 0; |
304 | } | 303 | } |
305 | 304 | ||
306 | int | 305 | int |
@@ -333,8 +332,7 @@ nfssvc_decode_readlinkargs(struct svc_rqst *rqstp, u32 *p, struct nfsd_readlinka | |||
333 | { | 332 | { |
334 | if (!(p = decode_fh(p, &args->fh))) | 333 | if (!(p = decode_fh(p, &args->fh))) |
335 | return 0; | 334 | return 0; |
336 | svc_take_page(rqstp); | 335 | args->buffer = page_address(rqstp->rq_respages[rqstp->rq_resused++]); |
337 | args->buffer = page_address(rqstp->rq_respages[rqstp->rq_resused-1]); | ||
338 | 336 | ||
339 | return xdr_argsize_check(rqstp, p); | 337 | return xdr_argsize_check(rqstp, p); |
340 | } | 338 | } |
@@ -375,8 +373,7 @@ nfssvc_decode_readdirargs(struct svc_rqst *rqstp, u32 *p, | |||
375 | if (args->count > PAGE_SIZE) | 373 | if (args->count > PAGE_SIZE) |
376 | args->count = PAGE_SIZE; | 374 | args->count = PAGE_SIZE; |
377 | 375 | ||
378 | svc_take_page(rqstp); | 376 | args->buffer = page_address(rqstp->rq_respages[rqstp->rq_resused++]); |
379 | args->buffer = page_address(rqstp->rq_respages[rqstp->rq_resused-1]); | ||
380 | 377 | ||
381 | return xdr_argsize_check(rqstp, p); | 378 | return xdr_argsize_check(rqstp, p); |
382 | } | 379 | } |
@@ -416,7 +413,6 @@ nfssvc_encode_readlinkres(struct svc_rqst *rqstp, u32 *p, | |||
416 | rqstp->rq_res.page_len = resp->len; | 413 | rqstp->rq_res.page_len = resp->len; |
417 | if (resp->len & 3) { | 414 | if (resp->len & 3) { |
418 | /* need to pad the tail */ | 415 | /* need to pad the tail */ |
419 | rqstp->rq_restailpage = 0; | ||
420 | rqstp->rq_res.tail[0].iov_base = p; | 416 | rqstp->rq_res.tail[0].iov_base = p; |
421 | *p = 0; | 417 | *p = 0; |
422 | rqstp->rq_res.tail[0].iov_len = 4 - (resp->len&3); | 418 | rqstp->rq_res.tail[0].iov_len = 4 - (resp->len&3); |
@@ -436,7 +432,6 @@ nfssvc_encode_readres(struct svc_rqst *rqstp, u32 *p, | |||
436 | rqstp->rq_res.page_len = resp->count; | 432 | rqstp->rq_res.page_len = resp->count; |
437 | if (resp->count & 3) { | 433 | if (resp->count & 3) { |
438 | /* need to pad the tail */ | 434 | /* need to pad the tail */ |
439 | rqstp->rq_restailpage = 0; | ||
440 | rqstp->rq_res.tail[0].iov_base = p; | 435 | rqstp->rq_res.tail[0].iov_base = p; |
441 | *p = 0; | 436 | *p = 0; |
442 | rqstp->rq_res.tail[0].iov_len = 4 - (resp->count&3); | 437 | rqstp->rq_res.tail[0].iov_len = 4 - (resp->count&3); |
@@ -463,7 +458,7 @@ nfssvc_encode_statfsres(struct svc_rqst *rqstp, u32 *p, | |||
463 | { | 458 | { |
464 | struct kstatfs *stat = &resp->stats; | 459 | struct kstatfs *stat = &resp->stats; |
465 | 460 | ||
466 | *p++ = htonl(NFSSVC_MAXBLKSIZE); /* max transfer size */ | 461 | *p++ = htonl(NFSSVC_MAXBLKSIZE_V2); /* max transfer size */ |
467 | *p++ = htonl(stat->f_bsize); | 462 | *p++ = htonl(stat->f_bsize); |
468 | *p++ = htonl(stat->f_blocks); | 463 | *p++ = htonl(stat->f_blocks); |
469 | *p++ = htonl(stat->f_bfree); | 464 | *p++ = htonl(stat->f_bfree); |
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index c9e3b5a8fe07..1141bd29e4e3 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c | |||
@@ -54,6 +54,7 @@ | |||
54 | #include <linux/nfsd_idmap.h> | 54 | #include <linux/nfsd_idmap.h> |
55 | #include <linux/security.h> | 55 | #include <linux/security.h> |
56 | #endif /* CONFIG_NFSD_V4 */ | 56 | #endif /* CONFIG_NFSD_V4 */ |
57 | #include <linux/jhash.h> | ||
57 | 58 | ||
58 | #include <asm/uaccess.h> | 59 | #include <asm/uaccess.h> |
59 | 60 | ||
@@ -81,10 +82,19 @@ struct raparms { | |||
81 | dev_t p_dev; | 82 | dev_t p_dev; |
82 | int p_set; | 83 | int p_set; |
83 | struct file_ra_state p_ra; | 84 | struct file_ra_state p_ra; |
85 | unsigned int p_hindex; | ||
84 | }; | 86 | }; |
85 | 87 | ||
88 | struct raparm_hbucket { | ||
89 | struct raparms *pb_head; | ||
90 | spinlock_t pb_lock; | ||
91 | } ____cacheline_aligned_in_smp; | ||
92 | |||
86 | static struct raparms * raparml; | 93 | static struct raparms * raparml; |
87 | static struct raparms * raparm_cache; | 94 | #define RAPARM_HASH_BITS 4 |
95 | #define RAPARM_HASH_SIZE (1<<RAPARM_HASH_BITS) | ||
96 | #define RAPARM_HASH_MASK (RAPARM_HASH_SIZE-1) | ||
97 | static struct raparm_hbucket raparm_hash[RAPARM_HASH_SIZE]; | ||
88 | 98 | ||
89 | /* | 99 | /* |
90 | * Called from nfsd_lookup and encode_dirent. Check if we have crossed | 100 | * Called from nfsd_lookup and encode_dirent. Check if we have crossed |
@@ -437,13 +447,11 @@ nfsd4_set_nfs4_acl(struct svc_rqst *rqstp, struct svc_fh *fhp, | |||
437 | } else if (error < 0) | 447 | } else if (error < 0) |
438 | goto out_nfserr; | 448 | goto out_nfserr; |
439 | 449 | ||
440 | if (pacl) { | 450 | error = set_nfsv4_acl_one(dentry, pacl, POSIX_ACL_XATTR_ACCESS); |
441 | error = set_nfsv4_acl_one(dentry, pacl, POSIX_ACL_XATTR_ACCESS); | 451 | if (error < 0) |
442 | if (error < 0) | 452 | goto out_nfserr; |
443 | goto out_nfserr; | ||
444 | } | ||
445 | 453 | ||
446 | if (dpacl) { | 454 | if (S_ISDIR(inode->i_mode)) { |
447 | error = set_nfsv4_acl_one(dentry, dpacl, POSIX_ACL_XATTR_DEFAULT); | 455 | error = set_nfsv4_acl_one(dentry, dpacl, POSIX_ACL_XATTR_DEFAULT); |
448 | if (error < 0) | 456 | if (error < 0) |
449 | goto out_nfserr; | 457 | goto out_nfserr; |
@@ -743,16 +751,20 @@ nfsd_sync_dir(struct dentry *dp) | |||
743 | * Obtain the readahead parameters for the file | 751 | * Obtain the readahead parameters for the file |
744 | * specified by (dev, ino). | 752 | * specified by (dev, ino). |
745 | */ | 753 | */ |
746 | static DEFINE_SPINLOCK(ra_lock); | ||
747 | 754 | ||
748 | static inline struct raparms * | 755 | static inline struct raparms * |
749 | nfsd_get_raparms(dev_t dev, ino_t ino) | 756 | nfsd_get_raparms(dev_t dev, ino_t ino) |
750 | { | 757 | { |
751 | struct raparms *ra, **rap, **frap = NULL; | 758 | struct raparms *ra, **rap, **frap = NULL; |
752 | int depth = 0; | 759 | int depth = 0; |
760 | unsigned int hash; | ||
761 | struct raparm_hbucket *rab; | ||
753 | 762 | ||
754 | spin_lock(&ra_lock); | 763 | hash = jhash_2words(dev, ino, 0xfeedbeef) & RAPARM_HASH_MASK; |
755 | for (rap = &raparm_cache; (ra = *rap); rap = &ra->p_next) { | 764 | rab = &raparm_hash[hash]; |
765 | |||
766 | spin_lock(&rab->pb_lock); | ||
767 | for (rap = &rab->pb_head; (ra = *rap); rap = &ra->p_next) { | ||
756 | if (ra->p_ino == ino && ra->p_dev == dev) | 768 | if (ra->p_ino == ino && ra->p_dev == dev) |
757 | goto found; | 769 | goto found; |
758 | depth++; | 770 | depth++; |
@@ -761,7 +773,7 @@ nfsd_get_raparms(dev_t dev, ino_t ino) | |||
761 | } | 773 | } |
762 | depth = nfsdstats.ra_size*11/10; | 774 | depth = nfsdstats.ra_size*11/10; |
763 | if (!frap) { | 775 | if (!frap) { |
764 | spin_unlock(&ra_lock); | 776 | spin_unlock(&rab->pb_lock); |
765 | return NULL; | 777 | return NULL; |
766 | } | 778 | } |
767 | rap = frap; | 779 | rap = frap; |
@@ -769,15 +781,16 @@ nfsd_get_raparms(dev_t dev, ino_t ino) | |||
769 | ra->p_dev = dev; | 781 | ra->p_dev = dev; |
770 | ra->p_ino = ino; | 782 | ra->p_ino = ino; |
771 | ra->p_set = 0; | 783 | ra->p_set = 0; |
784 | ra->p_hindex = hash; | ||
772 | found: | 785 | found: |
773 | if (rap != &raparm_cache) { | 786 | if (rap != &rab->pb_head) { |
774 | *rap = ra->p_next; | 787 | *rap = ra->p_next; |
775 | ra->p_next = raparm_cache; | 788 | ra->p_next = rab->pb_head; |
776 | raparm_cache = ra; | 789 | rab->pb_head = ra; |
777 | } | 790 | } |
778 | ra->p_count++; | 791 | ra->p_count++; |
779 | nfsdstats.ra_depth[depth*10/nfsdstats.ra_size]++; | 792 | nfsdstats.ra_depth[depth*10/nfsdstats.ra_size]++; |
780 | spin_unlock(&ra_lock); | 793 | spin_unlock(&rab->pb_lock); |
781 | return ra; | 794 | return ra; |
782 | } | 795 | } |
783 | 796 | ||
@@ -791,22 +804,26 @@ nfsd_read_actor(read_descriptor_t *desc, struct page *page, unsigned long offset | |||
791 | { | 804 | { |
792 | unsigned long count = desc->count; | 805 | unsigned long count = desc->count; |
793 | struct svc_rqst *rqstp = desc->arg.data; | 806 | struct svc_rqst *rqstp = desc->arg.data; |
807 | struct page **pp = rqstp->rq_respages + rqstp->rq_resused; | ||
794 | 808 | ||
795 | if (size > count) | 809 | if (size > count) |
796 | size = count; | 810 | size = count; |
797 | 811 | ||
798 | if (rqstp->rq_res.page_len == 0) { | 812 | if (rqstp->rq_res.page_len == 0) { |
799 | get_page(page); | 813 | get_page(page); |
800 | rqstp->rq_respages[rqstp->rq_resused++] = page; | 814 | put_page(*pp); |
815 | *pp = page; | ||
816 | rqstp->rq_resused++; | ||
801 | rqstp->rq_res.page_base = offset; | 817 | rqstp->rq_res.page_base = offset; |
802 | rqstp->rq_res.page_len = size; | 818 | rqstp->rq_res.page_len = size; |
803 | } else if (page != rqstp->rq_respages[rqstp->rq_resused-1]) { | 819 | } else if (page != pp[-1]) { |
804 | get_page(page); | 820 | get_page(page); |
805 | rqstp->rq_respages[rqstp->rq_resused++] = page; | 821 | put_page(*pp); |
822 | *pp = page; | ||
823 | rqstp->rq_resused++; | ||
806 | rqstp->rq_res.page_len += size; | 824 | rqstp->rq_res.page_len += size; |
807 | } else { | 825 | } else |
808 | rqstp->rq_res.page_len += size; | 826 | rqstp->rq_res.page_len += size; |
809 | } | ||
810 | 827 | ||
811 | desc->count = count - size; | 828 | desc->count = count - size; |
812 | desc->written += size; | 829 | desc->written += size; |
@@ -837,7 +854,7 @@ nfsd_vfs_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, | |||
837 | file->f_ra = ra->p_ra; | 854 | file->f_ra = ra->p_ra; |
838 | 855 | ||
839 | if (file->f_op->sendfile && rqstp->rq_sendfile_ok) { | 856 | if (file->f_op->sendfile && rqstp->rq_sendfile_ok) { |
840 | svc_pushback_unused_pages(rqstp); | 857 | rqstp->rq_resused = 1; |
841 | err = file->f_op->sendfile(file, &offset, *count, | 858 | err = file->f_op->sendfile(file, &offset, *count, |
842 | nfsd_read_actor, rqstp); | 859 | nfsd_read_actor, rqstp); |
843 | } else { | 860 | } else { |
@@ -849,11 +866,12 @@ nfsd_vfs_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, | |||
849 | 866 | ||
850 | /* Write back readahead params */ | 867 | /* Write back readahead params */ |
851 | if (ra) { | 868 | if (ra) { |
852 | spin_lock(&ra_lock); | 869 | struct raparm_hbucket *rab = &raparm_hash[ra->p_hindex]; |
870 | spin_lock(&rab->pb_lock); | ||
853 | ra->p_ra = file->f_ra; | 871 | ra->p_ra = file->f_ra; |
854 | ra->p_set = 1; | 872 | ra->p_set = 1; |
855 | ra->p_count--; | 873 | ra->p_count--; |
856 | spin_unlock(&ra_lock); | 874 | spin_unlock(&rab->pb_lock); |
857 | } | 875 | } |
858 | 876 | ||
859 | if (err >= 0) { | 877 | if (err >= 0) { |
@@ -1114,7 +1132,7 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, | |||
1114 | */ | 1132 | */ |
1115 | if (!resfhp->fh_dentry) { | 1133 | if (!resfhp->fh_dentry) { |
1116 | /* called from nfsd_proc_mkdir, or possibly nfsd3_proc_create */ | 1134 | /* called from nfsd_proc_mkdir, or possibly nfsd3_proc_create */ |
1117 | fh_lock(fhp); | 1135 | fh_lock_nested(fhp, I_MUTEX_PARENT); |
1118 | dchild = lookup_one_len(fname, dentry, flen); | 1136 | dchild = lookup_one_len(fname, dentry, flen); |
1119 | err = PTR_ERR(dchild); | 1137 | err = PTR_ERR(dchild); |
1120 | if (IS_ERR(dchild)) | 1138 | if (IS_ERR(dchild)) |
@@ -1240,7 +1258,7 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp, | |||
1240 | err = nfserr_notdir; | 1258 | err = nfserr_notdir; |
1241 | if(!dirp->i_op || !dirp->i_op->lookup) | 1259 | if(!dirp->i_op || !dirp->i_op->lookup) |
1242 | goto out; | 1260 | goto out; |
1243 | fh_lock(fhp); | 1261 | fh_lock_nested(fhp, I_MUTEX_PARENT); |
1244 | 1262 | ||
1245 | /* | 1263 | /* |
1246 | * Compose the response file handle. | 1264 | * Compose the response file handle. |
@@ -1494,7 +1512,7 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp, | |||
1494 | if (isdotent(name, len)) | 1512 | if (isdotent(name, len)) |
1495 | goto out; | 1513 | goto out; |
1496 | 1514 | ||
1497 | fh_lock(ffhp); | 1515 | fh_lock_nested(ffhp, I_MUTEX_PARENT); |
1498 | ddir = ffhp->fh_dentry; | 1516 | ddir = ffhp->fh_dentry; |
1499 | dirp = ddir->d_inode; | 1517 | dirp = ddir->d_inode; |
1500 | 1518 | ||
@@ -1644,7 +1662,7 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, | |||
1644 | if (err) | 1662 | if (err) |
1645 | goto out; | 1663 | goto out; |
1646 | 1664 | ||
1647 | fh_lock(fhp); | 1665 | fh_lock_nested(fhp, I_MUTEX_PARENT); |
1648 | dentry = fhp->fh_dentry; | 1666 | dentry = fhp->fh_dentry; |
1649 | dirp = dentry->d_inode; | 1667 | dirp = dentry->d_inode; |
1650 | 1668 | ||
@@ -1829,11 +1847,11 @@ nfsd_permission(struct svc_export *exp, struct dentry *dentry, int acc) | |||
1829 | void | 1847 | void |
1830 | nfsd_racache_shutdown(void) | 1848 | nfsd_racache_shutdown(void) |
1831 | { | 1849 | { |
1832 | if (!raparm_cache) | 1850 | if (!raparml) |
1833 | return; | 1851 | return; |
1834 | dprintk("nfsd: freeing readahead buffers.\n"); | 1852 | dprintk("nfsd: freeing readahead buffers.\n"); |
1835 | kfree(raparml); | 1853 | kfree(raparml); |
1836 | raparm_cache = raparml = NULL; | 1854 | raparml = NULL; |
1837 | } | 1855 | } |
1838 | /* | 1856 | /* |
1839 | * Initialize readahead param cache | 1857 | * Initialize readahead param cache |
@@ -1842,19 +1860,31 @@ int | |||
1842 | nfsd_racache_init(int cache_size) | 1860 | nfsd_racache_init(int cache_size) |
1843 | { | 1861 | { |
1844 | int i; | 1862 | int i; |
1863 | int j = 0; | ||
1864 | int nperbucket; | ||
1865 | |||
1845 | 1866 | ||
1846 | if (raparm_cache) | 1867 | if (raparml) |
1847 | return 0; | 1868 | return 0; |
1869 | if (cache_size < 2*RAPARM_HASH_SIZE) | ||
1870 | cache_size = 2*RAPARM_HASH_SIZE; | ||
1848 | raparml = kmalloc(sizeof(struct raparms) * cache_size, GFP_KERNEL); | 1871 | raparml = kmalloc(sizeof(struct raparms) * cache_size, GFP_KERNEL); |
1849 | 1872 | ||
1850 | if (raparml != NULL) { | 1873 | if (raparml != NULL) { |
1851 | dprintk("nfsd: allocating %d readahead buffers.\n", | 1874 | dprintk("nfsd: allocating %d readahead buffers.\n", |
1852 | cache_size); | 1875 | cache_size); |
1876 | for (i = 0 ; i < RAPARM_HASH_SIZE ; i++) { | ||
1877 | raparm_hash[i].pb_head = NULL; | ||
1878 | spin_lock_init(&raparm_hash[i].pb_lock); | ||
1879 | } | ||
1880 | nperbucket = cache_size >> RAPARM_HASH_BITS; | ||
1853 | memset(raparml, 0, sizeof(struct raparms) * cache_size); | 1881 | memset(raparml, 0, sizeof(struct raparms) * cache_size); |
1854 | for (i = 0; i < cache_size - 1; i++) { | 1882 | for (i = 0; i < cache_size - 1; i++) { |
1855 | raparml[i].p_next = raparml + i + 1; | 1883 | if (i % nperbucket == 0) |
1884 | raparm_hash[j++].pb_head = raparml + i; | ||
1885 | if (i % nperbucket < nperbucket-1) | ||
1886 | raparml[i].p_next = raparml + i + 1; | ||
1856 | } | 1887 | } |
1857 | raparm_cache = raparml; | ||
1858 | } else { | 1888 | } else { |
1859 | printk(KERN_WARNING | 1889 | printk(KERN_WARNING |
1860 | "nfsd: Could not allocate memory read-ahead cache.\n"); | 1890 | "nfsd: Could not allocate memory read-ahead cache.\n"); |
diff --git a/fs/nls/nls_ascii.c b/fs/nls/nls_ascii.c index b83381c07ad6..6993faea28ac 100644 --- a/fs/nls/nls_ascii.c +++ b/fs/nls/nls_ascii.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * linux/fs/nls_ascii.c | 2 | * linux/fs/nls/nls_ascii.c |
3 | * | 3 | * |
4 | * Charset ascii translation tables. | 4 | * Charset ascii translation tables. |
5 | * Generated automatically from the Unicode and charset | 5 | * Generated automatically from the Unicode and charset |
diff --git a/fs/nls/nls_base.c b/fs/nls/nls_base.c index 9de6b495f112..7dfdab98729b 100644 --- a/fs/nls/nls_base.c +++ b/fs/nls/nls_base.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * linux/fs/nls_base.c | 2 | * linux/fs/nls/nls_base.c |
3 | * | 3 | * |
4 | * Native language support--charsets and unicode translations. | 4 | * Native language support--charsets and unicode translations. |
5 | * By Gordon Chaffee 1996, 1997 | 5 | * By Gordon Chaffee 1996, 1997 |
@@ -163,8 +163,6 @@ int register_nls(struct nls_table * nls) | |||
163 | { | 163 | { |
164 | struct nls_table ** tmp = &tables; | 164 | struct nls_table ** tmp = &tables; |
165 | 165 | ||
166 | if (!nls) | ||
167 | return -EINVAL; | ||
168 | if (nls->next) | 166 | if (nls->next) |
169 | return -EBUSY; | 167 | return -EBUSY; |
170 | 168 | ||
diff --git a/fs/nls/nls_cp1250.c b/fs/nls/nls_cp1250.c index 32e78cf95180..570aa69846a0 100644 --- a/fs/nls/nls_cp1250.c +++ b/fs/nls/nls_cp1250.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * linux/fs/nls_cp1250.c | 2 | * linux/fs/nls/nls_cp1250.c |
3 | * | 3 | * |
4 | * Charset cp1250 translation tables. | 4 | * Charset cp1250 translation tables. |
5 | * Generated automatically from the Unicode and charset | 5 | * Generated automatically from the Unicode and charset |
diff --git a/fs/nls/nls_cp1251.c b/fs/nls/nls_cp1251.c index cb41c8ae4486..f114afa069db 100644 --- a/fs/nls/nls_cp1251.c +++ b/fs/nls/nls_cp1251.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * linux/fs/nls_cp1251.c | 2 | * linux/fs/nls/nls_cp1251.c |
3 | * | 3 | * |
4 | * Charset cp1251 translation tables. | 4 | * Charset cp1251 translation tables. |
5 | * Generated automatically from the Unicode and charset | 5 | * Generated automatically from the Unicode and charset |
diff --git a/fs/nls/nls_cp1255.c b/fs/nls/nls_cp1255.c index efdeefee5346..e57f2cbf5bc0 100644 --- a/fs/nls/nls_cp1255.c +++ b/fs/nls/nls_cp1255.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * linux/fs/nls_cp1255.c | 2 | * linux/fs/nls/nls_cp1255.c |
3 | * | 3 | * |
4 | * Charset cp1255 translation tables. | 4 | * Charset cp1255 translation tables. |
5 | * The Unicode to charset table has only exact mappings. | 5 | * The Unicode to charset table has only exact mappings. |
diff --git a/fs/nls/nls_cp437.c b/fs/nls/nls_cp437.c index 5c4a1cd685dd..d41930ce4a44 100644 --- a/fs/nls/nls_cp437.c +++ b/fs/nls/nls_cp437.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * linux/fs/nls_cp437.c | 2 | * linux/fs/nls/nls_cp437.c |
3 | * | 3 | * |
4 | * Charset cp437 translation tables. | 4 | * Charset cp437 translation tables. |
5 | * Generated automatically from the Unicode and charset | 5 | * Generated automatically from the Unicode and charset |
diff --git a/fs/nls/nls_cp737.c b/fs/nls/nls_cp737.c index e8b3ca8462e7..d21f8790aa19 100644 --- a/fs/nls/nls_cp737.c +++ b/fs/nls/nls_cp737.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * linux/fs/nls_cp737.c | 2 | * linux/fs/nls/nls_cp737.c |
3 | * | 3 | * |
4 | * Charset cp737 translation tables. | 4 | * Charset cp737 translation tables. |
5 | * Generated automatically from the Unicode and charset | 5 | * Generated automatically from the Unicode and charset |
diff --git a/fs/nls/nls_cp775.c b/fs/nls/nls_cp775.c index bdb290ea523a..c97714c38a90 100644 --- a/fs/nls/nls_cp775.c +++ b/fs/nls/nls_cp775.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * linux/fs/nls_cp775.c | 2 | * linux/fs/nls/nls_cp775.c |
3 | * | 3 | * |
4 | * Charset cp775 translation tables. | 4 | * Charset cp775 translation tables. |
5 | * Generated automatically from the Unicode and charset | 5 | * Generated automatically from the Unicode and charset |
diff --git a/fs/nls/nls_cp850.c b/fs/nls/nls_cp850.c index 25deaa4c8648..843b7d975ba2 100644 --- a/fs/nls/nls_cp850.c +++ b/fs/nls/nls_cp850.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * linux/fs/nls_cp850.c | 2 | * linux/fs/nls/nls_cp850.c |
3 | * | 3 | * |
4 | * Charset cp850 translation tables. | 4 | * Charset cp850 translation tables. |
5 | * Generated automatically from the Unicode and charset | 5 | * Generated automatically from the Unicode and charset |
diff --git a/fs/nls/nls_cp852.c b/fs/nls/nls_cp852.c index b822a7b6b970..83cfd844d5ca 100644 --- a/fs/nls/nls_cp852.c +++ b/fs/nls/nls_cp852.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * linux/fs/nls_cp852.c | 2 | * linux/fs/nls/nls_cp852.c |
3 | * | 3 | * |
4 | * Charset cp852 translation tables. | 4 | * Charset cp852 translation tables. |
5 | * Generated automatically from the Unicode and charset | 5 | * Generated automatically from the Unicode and charset |
diff --git a/fs/nls/nls_cp855.c b/fs/nls/nls_cp855.c index e8641b7a8b27..9190b7b574ff 100644 --- a/fs/nls/nls_cp855.c +++ b/fs/nls/nls_cp855.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * linux/fs/nls_cp855.c | 2 | * linux/fs/nls/nls_cp855.c |
3 | * | 3 | * |
4 | * Charset cp855 translation tables. | 4 | * Charset cp855 translation tables. |
5 | * Generated automatically from the Unicode and charset | 5 | * Generated automatically from the Unicode and charset |
diff --git a/fs/nls/nls_cp857.c b/fs/nls/nls_cp857.c index 7ba589ef8cc0..ef3d36db8082 100644 --- a/fs/nls/nls_cp857.c +++ b/fs/nls/nls_cp857.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * linux/fs/nls_cp857.c | 2 | * linux/fs/nls/nls_cp857.c |
3 | * | 3 | * |
4 | * Charset cp857 translation tables. | 4 | * Charset cp857 translation tables. |
5 | * Generated automatically from the Unicode and charset | 5 | * Generated automatically from the Unicode and charset |
diff --git a/fs/nls/nls_cp860.c b/fs/nls/nls_cp860.c index 3b9e49ce8c80..7e2fb6645893 100644 --- a/fs/nls/nls_cp860.c +++ b/fs/nls/nls_cp860.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * linux/fs/nls_cp860.c | 2 | * linux/fs/nls/nls_cp860.c |
3 | * | 3 | * |
4 | * Charset cp860 translation tables. | 4 | * Charset cp860 translation tables. |
5 | * Generated automatically from the Unicode and charset | 5 | * Generated automatically from the Unicode and charset |
diff --git a/fs/nls/nls_cp861.c b/fs/nls/nls_cp861.c index 959ff64ee971..66d8d808ccf1 100644 --- a/fs/nls/nls_cp861.c +++ b/fs/nls/nls_cp861.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * linux/fs/nls_cp861.c | 2 | * linux/fs/nls/nls_cp861.c |
3 | * | 3 | * |
4 | * Charset cp861 translation tables. | 4 | * Charset cp861 translation tables. |
5 | * Generated automatically from the Unicode and charset | 5 | * Generated automatically from the Unicode and charset |
diff --git a/fs/nls/nls_cp862.c b/fs/nls/nls_cp862.c index b96928f5a023..360ba388485f 100644 --- a/fs/nls/nls_cp862.c +++ b/fs/nls/nls_cp862.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * linux/fs/nls_cp862.c | 2 | * linux/fs/nls/nls_cp862.c |
3 | * | 3 | * |
4 | * Charset cp862 translation tables. | 4 | * Charset cp862 translation tables. |
5 | * Generated automatically from the Unicode and charset | 5 | * Generated automatically from the Unicode and charset |
diff --git a/fs/nls/nls_cp863.c b/fs/nls/nls_cp863.c index baa6e0eab1d6..656a93113e37 100644 --- a/fs/nls/nls_cp863.c +++ b/fs/nls/nls_cp863.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * linux/fs/nls_cp863.c | 2 | * linux/fs/nls/nls_cp863.c |
3 | * | 3 | * |
4 | * Charset cp863 translation tables. | 4 | * Charset cp863 translation tables. |
5 | * Generated automatically from the Unicode and charset | 5 | * Generated automatically from the Unicode and charset |
diff --git a/fs/nls/nls_cp864.c b/fs/nls/nls_cp864.c index f4dabb037dfe..01ca7309753e 100644 --- a/fs/nls/nls_cp864.c +++ b/fs/nls/nls_cp864.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * linux/fs/nls_cp864.c | 2 | * linux/fs/nls/nls_cp864.c |
3 | * | 3 | * |
4 | * Charset cp864 translation tables. | 4 | * Charset cp864 translation tables. |
5 | * Generated automatically from the Unicode and charset | 5 | * Generated automatically from the Unicode and charset |
diff --git a/fs/nls/nls_cp865.c b/fs/nls/nls_cp865.c index 4caeafae32c2..5ba6ee13e109 100644 --- a/fs/nls/nls_cp865.c +++ b/fs/nls/nls_cp865.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * linux/fs/nls_cp865.c | 2 | * linux/fs/nls/nls_cp865.c |
3 | * | 3 | * |
4 | * Charset cp865 translation tables. | 4 | * Charset cp865 translation tables. |
5 | * Generated automatically from the Unicode and charset | 5 | * Generated automatically from the Unicode and charset |
diff --git a/fs/nls/nls_cp866.c b/fs/nls/nls_cp866.c index f2b4a9a293fb..c5f82221c9fe 100644 --- a/fs/nls/nls_cp866.c +++ b/fs/nls/nls_cp866.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * linux/fs/nls_cp866.c | 2 | * linux/fs/nls/nls_cp866.c |
3 | * | 3 | * |
4 | * Charset cp866 translation tables. | 4 | * Charset cp866 translation tables. |
5 | * Generated automatically from the Unicode and charset | 5 | * Generated automatically from the Unicode and charset |
diff --git a/fs/nls/nls_cp869.c b/fs/nls/nls_cp869.c index 12b436f4eca1..8d4015124d11 100644 --- a/fs/nls/nls_cp869.c +++ b/fs/nls/nls_cp869.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * linux/fs/nls_cp869.c | 2 | * linux/fs/nls/nls_cp869.c |
3 | * | 3 | * |
4 | * Charset cp869 translation tables. | 4 | * Charset cp869 translation tables. |
5 | * Generated automatically from the Unicode and charset | 5 | * Generated automatically from the Unicode and charset |
diff --git a/fs/nls/nls_cp874.c b/fs/nls/nls_cp874.c index b5766a01703a..df042052c2db 100644 --- a/fs/nls/nls_cp874.c +++ b/fs/nls/nls_cp874.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * linux/fs/nls_cp874.c | 2 | * linux/fs/nls/nls_cp874.c |
3 | * | 3 | * |
4 | * Charset cp874 translation tables. | 4 | * Charset cp874 translation tables. |
5 | * Generated automatically from the Unicode and charset | 5 | * Generated automatically from the Unicode and charset |
diff --git a/fs/nls/nls_cp932.c b/fs/nls/nls_cp932.c index 2c1a17cdcd24..2a9ccf3bc7ef 100644 --- a/fs/nls/nls_cp932.c +++ b/fs/nls/nls_cp932.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * linux/fs/nls_cp932.c | 2 | * linux/fs/nls/nls_cp932.c |
3 | * | 3 | * |
4 | * Charset cp932 translation tables. | 4 | * Charset cp932 translation tables. |
5 | * This translation table was generated automatically, the | 5 | * This translation table was generated automatically, the |
diff --git a/fs/nls/nls_cp936.c b/fs/nls/nls_cp936.c index ef4cef464aba..046fde8170ea 100644 --- a/fs/nls/nls_cp936.c +++ b/fs/nls/nls_cp936.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * linux/fs/nls_cp936.c | 2 | * linux/fs/nls/nls_cp936.c |
3 | * | 3 | * |
4 | * Charset cp936 translation tables. | 4 | * Charset cp936 translation tables. |
5 | * This translation table was generated automatically, the | 5 | * This translation table was generated automatically, the |
diff --git a/fs/nls/nls_cp949.c b/fs/nls/nls_cp949.c index 4351ae21d897..92ae19372f0f 100644 --- a/fs/nls/nls_cp949.c +++ b/fs/nls/nls_cp949.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * linux/fs/nls_cp949.c | 2 | * linux/fs/nls/nls_cp949.c |
3 | * | 3 | * |
4 | * Charset cp949 translation tables. | 4 | * Charset cp949 translation tables. |
5 | * This translation table was generated automatically, the | 5 | * This translation table was generated automatically, the |
diff --git a/fs/nls/nls_cp950.c b/fs/nls/nls_cp950.c index 8167a2858879..5665945fb88c 100644 --- a/fs/nls/nls_cp950.c +++ b/fs/nls/nls_cp950.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * linux/fs/nls_cp950.c | 2 | * linux/fs/nls/nls_cp950.c |
3 | * | 3 | * |
4 | * Charset cp950 translation tables. | 4 | * Charset cp950 translation tables. |
5 | * This translation table was generated automatically, the | 5 | * This translation table was generated automatically, the |
diff --git a/fs/nls/nls_euc-jp.c b/fs/nls/nls_euc-jp.c index 06640c3e4021..73293511578b 100644 --- a/fs/nls/nls_euc-jp.c +++ b/fs/nls/nls_euc-jp.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * linux/fs/nls_euc-jp.c | 2 | * linux/fs/nls/nls_euc-jp.c |
3 | * | 3 | * |
4 | * Added `OSF/JVC Recommended Code Set Conversion Specification | 4 | * Added `OSF/JVC Recommended Code Set Conversion Specification |
5 | * between Japanese EUC and Shift-JIS' support: <hirofumi@mail.parknet.co.jp> | 5 | * between Japanese EUC and Shift-JIS' support: <hirofumi@mail.parknet.co.jp> |
diff --git a/fs/nls/nls_iso8859-1.c b/fs/nls/nls_iso8859-1.c index 70a2c1956723..2483c3c6c1c1 100644 --- a/fs/nls/nls_iso8859-1.c +++ b/fs/nls/nls_iso8859-1.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * linux/fs/nls_iso8859-1.c | 2 | * linux/fs/nls/nls_iso8859-1.c |
3 | * | 3 | * |
4 | * Charset iso8859-1 translation tables. | 4 | * Charset iso8859-1 translation tables. |
5 | * Generated automatically from the Unicode and charset | 5 | * Generated automatically from the Unicode and charset |
diff --git a/fs/nls/nls_iso8859-13.c b/fs/nls/nls_iso8859-13.c index 4547035f21a3..7b8721d74368 100644 --- a/fs/nls/nls_iso8859-13.c +++ b/fs/nls/nls_iso8859-13.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * linux/fs/nls_iso8859-13.c | 2 | * linux/fs/nls/nls_iso8859-13.c |
3 | * | 3 | * |
4 | * Charset iso8859-13 translation tables. | 4 | * Charset iso8859-13 translation tables. |
5 | * The Unicode to charset table has only exact mappings. | 5 | * The Unicode to charset table has only exact mappings. |
diff --git a/fs/nls/nls_iso8859-14.c b/fs/nls/nls_iso8859-14.c index 13628d0dd3a9..2e895e638dba 100644 --- a/fs/nls/nls_iso8859-14.c +++ b/fs/nls/nls_iso8859-14.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * linux/fs/nls_iso8859-14.c | 2 | * linux/fs/nls/nls_iso8859-14.c |
3 | * | 3 | * |
4 | * Charset iso8859-14 translation tables. | 4 | * Charset iso8859-14 translation tables. |
5 | * | 5 | * |
diff --git a/fs/nls/nls_iso8859-15.c b/fs/nls/nls_iso8859-15.c index 88b924bf7e18..5c91592779fe 100644 --- a/fs/nls/nls_iso8859-15.c +++ b/fs/nls/nls_iso8859-15.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * linux/fs/nls_iso8859-15.c | 2 | * linux/fs/nls/nls_iso8859-15.c |
3 | * | 3 | * |
4 | * Charset iso8859-15 translation tables. | 4 | * Charset iso8859-15 translation tables. |
5 | * The Unicode to charset table has only exact mappings. | 5 | * The Unicode to charset table has only exact mappings. |
diff --git a/fs/nls/nls_iso8859-2.c b/fs/nls/nls_iso8859-2.c index 372528a6c40c..892d38fe9530 100644 --- a/fs/nls/nls_iso8859-2.c +++ b/fs/nls/nls_iso8859-2.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * linux/fs/nls_iso8859-2.c | 2 | * linux/fs/nls/nls_iso8859-2.c |
3 | * | 3 | * |
4 | * Charset iso8859-2 translation tables. | 4 | * Charset iso8859-2 translation tables. |
5 | * Generated automatically from the Unicode and charset | 5 | * Generated automatically from the Unicode and charset |
diff --git a/fs/nls/nls_iso8859-3.c b/fs/nls/nls_iso8859-3.c index 81b45a234369..49317bcdb4be 100644 --- a/fs/nls/nls_iso8859-3.c +++ b/fs/nls/nls_iso8859-3.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * linux/fs/nls_iso8859-3.c | 2 | * linux/fs/nls/nls_iso8859-3.c |
3 | * | 3 | * |
4 | * Charset iso8859-3 translation tables. | 4 | * Charset iso8859-3 translation tables. |
5 | * Generated automatically from the Unicode and charset | 5 | * Generated automatically from the Unicode and charset |
diff --git a/fs/nls/nls_iso8859-4.c b/fs/nls/nls_iso8859-4.c index 101b87f5a49b..9f3b9368c2cf 100644 --- a/fs/nls/nls_iso8859-4.c +++ b/fs/nls/nls_iso8859-4.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * linux/fs/nls_iso8859-4.c | 2 | * linux/fs/nls/nls_iso8859-4.c |
3 | * | 3 | * |
4 | * Charset iso8859-4 translation tables. | 4 | * Charset iso8859-4 translation tables. |
5 | * Generated automatically from the Unicode and charset | 5 | * Generated automatically from the Unicode and charset |
diff --git a/fs/nls/nls_iso8859-5.c b/fs/nls/nls_iso8859-5.c index 83b0084de5eb..001a2bb132ce 100644 --- a/fs/nls/nls_iso8859-5.c +++ b/fs/nls/nls_iso8859-5.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * linux/fs/nls_iso8859-5.c | 2 | * linux/fs/nls/nls_iso8859-5.c |
3 | * | 3 | * |
4 | * Charset iso8859-5 translation tables. | 4 | * Charset iso8859-5 translation tables. |
5 | * Generated automatically from the Unicode and charset | 5 | * Generated automatically from the Unicode and charset |
diff --git a/fs/nls/nls_iso8859-6.c b/fs/nls/nls_iso8859-6.c index 0c519d65f55b..8cec03d66088 100644 --- a/fs/nls/nls_iso8859-6.c +++ b/fs/nls/nls_iso8859-6.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * linux/fs/nls_iso8859-6.c | 2 | * linux/fs/nls/nls_iso8859-6.c |
3 | * | 3 | * |
4 | * Charset iso8859-6 translation tables. | 4 | * Charset iso8859-6 translation tables. |
5 | * Generated automatically from the Unicode and charset | 5 | * Generated automatically from the Unicode and charset |
diff --git a/fs/nls/nls_iso8859-7.c b/fs/nls/nls_iso8859-7.c index bd0854625acf..1be707d5ac31 100644 --- a/fs/nls/nls_iso8859-7.c +++ b/fs/nls/nls_iso8859-7.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * linux/fs/nls_iso8859-7.c | 2 | * linux/fs/nls/nls_iso8859-7.c |
3 | * | 3 | * |
4 | * Charset iso8859-7 translation tables. | 4 | * Charset iso8859-7 translation tables. |
5 | * Generated automatically from the Unicode and charset | 5 | * Generated automatically from the Unicode and charset |
diff --git a/fs/nls/nls_iso8859-9.c b/fs/nls/nls_iso8859-9.c index 988eff791c06..8c0146f73834 100644 --- a/fs/nls/nls_iso8859-9.c +++ b/fs/nls/nls_iso8859-9.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * linux/fs/nls_iso8859-9.c | 2 | * linux/fs/nls/nls_iso8859-9.c |
3 | * | 3 | * |
4 | * Charset iso8859-9 translation tables. | 4 | * Charset iso8859-9 translation tables. |
5 | * Generated automatically from the Unicode and charset | 5 | * Generated automatically from the Unicode and charset |
diff --git a/fs/nls/nls_koi8-r.c b/fs/nls/nls_koi8-r.c index 0ad22c249796..fefbe0807265 100644 --- a/fs/nls/nls_koi8-r.c +++ b/fs/nls/nls_koi8-r.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * linux/fs/nls_koi8-r.c | 2 | * linux/fs/nls/nls_koi8-r.c |
3 | * | 3 | * |
4 | * Charset koi8-r translation tables. | 4 | * Charset koi8-r translation tables. |
5 | * Generated automatically from the Unicode and charset | 5 | * Generated automatically from the Unicode and charset |
diff --git a/fs/nls/nls_koi8-ru.c b/fs/nls/nls_koi8-ru.c index 5db83efe27c6..e7bc1d75c78c 100644 --- a/fs/nls/nls_koi8-ru.c +++ b/fs/nls/nls_koi8-ru.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * linux/fs/nls_koi8-ru.c | 2 | * linux/fs/nls/nls_koi8-ru.c |
3 | * | 3 | * |
4 | * Charset koi8-ru translation based on charset koi8-u. | 4 | * Charset koi8-ru translation based on charset koi8-u. |
5 | * The Unicode to charset table has only exact mappings. | 5 | * The Unicode to charset table has only exact mappings. |
diff --git a/fs/nls/nls_koi8-u.c b/fs/nls/nls_koi8-u.c index 9d30fd61cf46..015070211f22 100644 --- a/fs/nls/nls_koi8-u.c +++ b/fs/nls/nls_koi8-u.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * linux/fs/nls_koi8-u.c | 2 | * linux/fs/nls/nls_koi8-u.c |
3 | * | 3 | * |
4 | * Charset koi8-u translation tables. | 4 | * Charset koi8-u translation tables. |
5 | * The Unicode to charset table has only exact mappings. | 5 | * The Unicode to charset table has only exact mappings. |
diff --git a/fs/proc/array.c b/fs/proc/array.c index c0e554971df0..25e917fb4739 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c | |||
@@ -162,7 +162,7 @@ static inline char * task_state(struct task_struct *p, char *buffer) | |||
162 | int g; | 162 | int g; |
163 | struct fdtable *fdt = NULL; | 163 | struct fdtable *fdt = NULL; |
164 | 164 | ||
165 | read_lock(&tasklist_lock); | 165 | rcu_read_lock(); |
166 | buffer += sprintf(buffer, | 166 | buffer += sprintf(buffer, |
167 | "State:\t%s\n" | 167 | "State:\t%s\n" |
168 | "SleepAVG:\t%lu%%\n" | 168 | "SleepAVG:\t%lu%%\n" |
@@ -174,14 +174,13 @@ static inline char * task_state(struct task_struct *p, char *buffer) | |||
174 | "Gid:\t%d\t%d\t%d\t%d\n", | 174 | "Gid:\t%d\t%d\t%d\t%d\n", |
175 | get_task_state(p), | 175 | get_task_state(p), |
176 | (p->sleep_avg/1024)*100/(1020000000/1024), | 176 | (p->sleep_avg/1024)*100/(1020000000/1024), |
177 | p->tgid, | 177 | p->tgid, p->pid, |
178 | p->pid, pid_alive(p) ? p->group_leader->real_parent->tgid : 0, | 178 | pid_alive(p) ? rcu_dereference(p->real_parent)->tgid : 0, |
179 | pid_alive(p) && p->ptrace ? p->parent->pid : 0, | 179 | pid_alive(p) && p->ptrace ? rcu_dereference(p->parent)->pid : 0, |
180 | p->uid, p->euid, p->suid, p->fsuid, | 180 | p->uid, p->euid, p->suid, p->fsuid, |
181 | p->gid, p->egid, p->sgid, p->fsgid); | 181 | p->gid, p->egid, p->sgid, p->fsgid); |
182 | read_unlock(&tasklist_lock); | 182 | |
183 | task_lock(p); | 183 | task_lock(p); |
184 | rcu_read_lock(); | ||
185 | if (p->files) | 184 | if (p->files) |
186 | fdt = files_fdtable(p->files); | 185 | fdt = files_fdtable(p->files); |
187 | buffer += sprintf(buffer, | 186 | buffer += sprintf(buffer, |
@@ -244,6 +243,7 @@ static void collect_sigign_sigcatch(struct task_struct *p, sigset_t *ign, | |||
244 | 243 | ||
245 | static inline char * task_sig(struct task_struct *p, char *buffer) | 244 | static inline char * task_sig(struct task_struct *p, char *buffer) |
246 | { | 245 | { |
246 | unsigned long flags; | ||
247 | sigset_t pending, shpending, blocked, ignored, caught; | 247 | sigset_t pending, shpending, blocked, ignored, caught; |
248 | int num_threads = 0; | 248 | int num_threads = 0; |
249 | unsigned long qsize = 0; | 249 | unsigned long qsize = 0; |
@@ -255,10 +255,8 @@ static inline char * task_sig(struct task_struct *p, char *buffer) | |||
255 | sigemptyset(&ignored); | 255 | sigemptyset(&ignored); |
256 | sigemptyset(&caught); | 256 | sigemptyset(&caught); |
257 | 257 | ||
258 | /* Gather all the data with the appropriate locks held */ | 258 | rcu_read_lock(); |
259 | read_lock(&tasklist_lock); | 259 | if (lock_task_sighand(p, &flags)) { |
260 | if (p->sighand) { | ||
261 | spin_lock_irq(&p->sighand->siglock); | ||
262 | pending = p->pending.signal; | 260 | pending = p->pending.signal; |
263 | shpending = p->signal->shared_pending.signal; | 261 | shpending = p->signal->shared_pending.signal; |
264 | blocked = p->blocked; | 262 | blocked = p->blocked; |
@@ -266,9 +264,9 @@ static inline char * task_sig(struct task_struct *p, char *buffer) | |||
266 | num_threads = atomic_read(&p->signal->count); | 264 | num_threads = atomic_read(&p->signal->count); |
267 | qsize = atomic_read(&p->user->sigpending); | 265 | qsize = atomic_read(&p->user->sigpending); |
268 | qlim = p->signal->rlim[RLIMIT_SIGPENDING].rlim_cur; | 266 | qlim = p->signal->rlim[RLIMIT_SIGPENDING].rlim_cur; |
269 | spin_unlock_irq(&p->sighand->siglock); | 267 | unlock_task_sighand(p, &flags); |
270 | } | 268 | } |
271 | read_unlock(&tasklist_lock); | 269 | rcu_read_unlock(); |
272 | 270 | ||
273 | buffer += sprintf(buffer, "Threads:\t%d\n", num_threads); | 271 | buffer += sprintf(buffer, "Threads:\t%d\n", num_threads); |
274 | buffer += sprintf(buffer, "SigQ:\t%lu/%lu\n", qsize, qlim); | 272 | buffer += sprintf(buffer, "SigQ:\t%lu/%lu\n", qsize, qlim); |
@@ -322,7 +320,7 @@ static int do_task_stat(struct task_struct *task, char * buffer, int whole) | |||
322 | sigset_t sigign, sigcatch; | 320 | sigset_t sigign, sigcatch; |
323 | char state; | 321 | char state; |
324 | int res; | 322 | int res; |
325 | pid_t ppid, pgid = -1, sid = -1; | 323 | pid_t ppid = 0, pgid = -1, sid = -1; |
326 | int num_threads = 0; | 324 | int num_threads = 0; |
327 | struct mm_struct *mm; | 325 | struct mm_struct *mm; |
328 | unsigned long long start_time; | 326 | unsigned long long start_time; |
@@ -330,8 +328,8 @@ static int do_task_stat(struct task_struct *task, char * buffer, int whole) | |||
330 | unsigned long min_flt = 0, maj_flt = 0; | 328 | unsigned long min_flt = 0, maj_flt = 0; |
331 | cputime_t cutime, cstime, utime, stime; | 329 | cputime_t cutime, cstime, utime, stime; |
332 | unsigned long rsslim = 0; | 330 | unsigned long rsslim = 0; |
333 | struct task_struct *t; | ||
334 | char tcomm[sizeof(task->comm)]; | 331 | char tcomm[sizeof(task->comm)]; |
332 | unsigned long flags; | ||
335 | 333 | ||
336 | state = *get_task_state(task); | 334 | state = *get_task_state(task); |
337 | vsize = eip = esp = 0; | 335 | vsize = eip = esp = 0; |
@@ -349,15 +347,33 @@ static int do_task_stat(struct task_struct *task, char * buffer, int whole) | |||
349 | cutime = cstime = utime = stime = cputime_zero; | 347 | cutime = cstime = utime = stime = cputime_zero; |
350 | 348 | ||
351 | mutex_lock(&tty_mutex); | 349 | mutex_lock(&tty_mutex); |
352 | read_lock(&tasklist_lock); | 350 | rcu_read_lock(); |
353 | if (task->sighand) { | 351 | if (lock_task_sighand(task, &flags)) { |
354 | spin_lock_irq(&task->sighand->siglock); | 352 | struct signal_struct *sig = task->signal; |
355 | num_threads = atomic_read(&task->signal->count); | 353 | struct tty_struct *tty = sig->tty; |
354 | |||
355 | if (tty) { | ||
356 | /* | ||
357 | * sig->tty is not stable, but tty_mutex | ||
358 | * protects us from release_dev(tty) | ||
359 | */ | ||
360 | barrier(); | ||
361 | tty_pgrp = tty->pgrp; | ||
362 | tty_nr = new_encode_dev(tty_devnum(tty)); | ||
363 | } | ||
364 | |||
365 | num_threads = atomic_read(&sig->count); | ||
356 | collect_sigign_sigcatch(task, &sigign, &sigcatch); | 366 | collect_sigign_sigcatch(task, &sigign, &sigcatch); |
357 | 367 | ||
368 | cmin_flt = sig->cmin_flt; | ||
369 | cmaj_flt = sig->cmaj_flt; | ||
370 | cutime = sig->cutime; | ||
371 | cstime = sig->cstime; | ||
372 | rsslim = sig->rlim[RLIMIT_RSS].rlim_cur; | ||
373 | |||
358 | /* add up live thread stats at the group level */ | 374 | /* add up live thread stats at the group level */ |
359 | if (whole) { | 375 | if (whole) { |
360 | t = task; | 376 | struct task_struct *t = task; |
361 | do { | 377 | do { |
362 | min_flt += t->min_flt; | 378 | min_flt += t->min_flt; |
363 | maj_flt += t->maj_flt; | 379 | maj_flt += t->maj_flt; |
@@ -365,31 +381,20 @@ static int do_task_stat(struct task_struct *task, char * buffer, int whole) | |||
365 | stime = cputime_add(stime, t->stime); | 381 | stime = cputime_add(stime, t->stime); |
366 | t = next_thread(t); | 382 | t = next_thread(t); |
367 | } while (t != task); | 383 | } while (t != task); |
368 | } | ||
369 | 384 | ||
370 | spin_unlock_irq(&task->sighand->siglock); | 385 | min_flt += sig->min_flt; |
371 | } | 386 | maj_flt += sig->maj_flt; |
372 | if (task->signal) { | 387 | utime = cputime_add(utime, sig->utime); |
373 | if (task->signal->tty) { | 388 | stime = cputime_add(stime, sig->stime); |
374 | tty_pgrp = task->signal->tty->pgrp; | ||
375 | tty_nr = new_encode_dev(tty_devnum(task->signal->tty)); | ||
376 | } | 389 | } |
390 | |||
391 | sid = sig->session; | ||
377 | pgid = process_group(task); | 392 | pgid = process_group(task); |
378 | sid = task->signal->session; | 393 | ppid = rcu_dereference(task->real_parent)->tgid; |
379 | cmin_flt = task->signal->cmin_flt; | 394 | |
380 | cmaj_flt = task->signal->cmaj_flt; | 395 | unlock_task_sighand(task, &flags); |
381 | cutime = task->signal->cutime; | ||
382 | cstime = task->signal->cstime; | ||
383 | rsslim = task->signal->rlim[RLIMIT_RSS].rlim_cur; | ||
384 | if (whole) { | ||
385 | min_flt += task->signal->min_flt; | ||
386 | maj_flt += task->signal->maj_flt; | ||
387 | utime = cputime_add(utime, task->signal->utime); | ||
388 | stime = cputime_add(stime, task->signal->stime); | ||
389 | } | ||
390 | } | 396 | } |
391 | ppid = pid_alive(task) ? task->group_leader->real_parent->tgid : 0; | 397 | rcu_read_unlock(); |
392 | read_unlock(&tasklist_lock); | ||
393 | mutex_unlock(&tty_mutex); | 398 | mutex_unlock(&tty_mutex); |
394 | 399 | ||
395 | if (!whole || num_threads<2) | 400 | if (!whole || num_threads<2) |
diff --git a/fs/proc/base.c b/fs/proc/base.c index 89c20d9d50bf..82da55b5cffe 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c | |||
@@ -71,6 +71,7 @@ | |||
71 | #include <linux/cpuset.h> | 71 | #include <linux/cpuset.h> |
72 | #include <linux/audit.h> | 72 | #include <linux/audit.h> |
73 | #include <linux/poll.h> | 73 | #include <linux/poll.h> |
74 | #include <linux/nsproxy.h> | ||
74 | #include "internal.h" | 75 | #include "internal.h" |
75 | 76 | ||
76 | /* NOTE: | 77 | /* NOTE: |
@@ -83,262 +84,44 @@ | |||
83 | * in /proc for a task before it execs a suid executable. | 84 | * in /proc for a task before it execs a suid executable. |
84 | */ | 85 | */ |
85 | 86 | ||
86 | /* | ||
87 | * For hysterical raisins we keep the same inumbers as in the old procfs. | ||
88 | * Feel free to change the macro below - just keep the range distinct from | ||
89 | * inumbers of the rest of procfs (currently those are in 0x0000--0xffff). | ||
90 | * As soon as we'll get a separate superblock we will be able to forget | ||
91 | * about magical ranges too. | ||
92 | */ | ||
93 | |||
94 | #define fake_ino(pid,ino) (((pid)<<16)|(ino)) | ||
95 | |||
96 | enum pid_directory_inos { | ||
97 | PROC_TGID_INO = 2, | ||
98 | PROC_TGID_TASK, | ||
99 | PROC_TGID_STATUS, | ||
100 | PROC_TGID_MEM, | ||
101 | #ifdef CONFIG_SECCOMP | ||
102 | PROC_TGID_SECCOMP, | ||
103 | #endif | ||
104 | PROC_TGID_CWD, | ||
105 | PROC_TGID_ROOT, | ||
106 | PROC_TGID_EXE, | ||
107 | PROC_TGID_FD, | ||
108 | PROC_TGID_ENVIRON, | ||
109 | PROC_TGID_AUXV, | ||
110 | PROC_TGID_CMDLINE, | ||
111 | PROC_TGID_STAT, | ||
112 | PROC_TGID_STATM, | ||
113 | PROC_TGID_MAPS, | ||
114 | PROC_TGID_NUMA_MAPS, | ||
115 | PROC_TGID_MOUNTS, | ||
116 | PROC_TGID_MOUNTSTATS, | ||
117 | PROC_TGID_WCHAN, | ||
118 | #ifdef CONFIG_MMU | ||
119 | PROC_TGID_SMAPS, | ||
120 | #endif | ||
121 | #ifdef CONFIG_SCHEDSTATS | ||
122 | PROC_TGID_SCHEDSTAT, | ||
123 | #endif | ||
124 | #ifdef CONFIG_CPUSETS | ||
125 | PROC_TGID_CPUSET, | ||
126 | #endif | ||
127 | #ifdef CONFIG_SECURITY | ||
128 | PROC_TGID_ATTR, | ||
129 | PROC_TGID_ATTR_CURRENT, | ||
130 | PROC_TGID_ATTR_PREV, | ||
131 | PROC_TGID_ATTR_EXEC, | ||
132 | PROC_TGID_ATTR_FSCREATE, | ||
133 | PROC_TGID_ATTR_KEYCREATE, | ||
134 | PROC_TGID_ATTR_SOCKCREATE, | ||
135 | #endif | ||
136 | #ifdef CONFIG_AUDITSYSCALL | ||
137 | PROC_TGID_LOGINUID, | ||
138 | #endif | ||
139 | PROC_TGID_OOM_SCORE, | ||
140 | PROC_TGID_OOM_ADJUST, | ||
141 | PROC_TID_INO, | ||
142 | PROC_TID_STATUS, | ||
143 | PROC_TID_MEM, | ||
144 | #ifdef CONFIG_SECCOMP | ||
145 | PROC_TID_SECCOMP, | ||
146 | #endif | ||
147 | PROC_TID_CWD, | ||
148 | PROC_TID_ROOT, | ||
149 | PROC_TID_EXE, | ||
150 | PROC_TID_FD, | ||
151 | PROC_TID_ENVIRON, | ||
152 | PROC_TID_AUXV, | ||
153 | PROC_TID_CMDLINE, | ||
154 | PROC_TID_STAT, | ||
155 | PROC_TID_STATM, | ||
156 | PROC_TID_MAPS, | ||
157 | PROC_TID_NUMA_MAPS, | ||
158 | PROC_TID_MOUNTS, | ||
159 | PROC_TID_MOUNTSTATS, | ||
160 | PROC_TID_WCHAN, | ||
161 | #ifdef CONFIG_MMU | ||
162 | PROC_TID_SMAPS, | ||
163 | #endif | ||
164 | #ifdef CONFIG_SCHEDSTATS | ||
165 | PROC_TID_SCHEDSTAT, | ||
166 | #endif | ||
167 | #ifdef CONFIG_CPUSETS | ||
168 | PROC_TID_CPUSET, | ||
169 | #endif | ||
170 | #ifdef CONFIG_SECURITY | ||
171 | PROC_TID_ATTR, | ||
172 | PROC_TID_ATTR_CURRENT, | ||
173 | PROC_TID_ATTR_PREV, | ||
174 | PROC_TID_ATTR_EXEC, | ||
175 | PROC_TID_ATTR_FSCREATE, | ||
176 | PROC_TID_ATTR_KEYCREATE, | ||
177 | PROC_TID_ATTR_SOCKCREATE, | ||
178 | #endif | ||
179 | #ifdef CONFIG_AUDITSYSCALL | ||
180 | PROC_TID_LOGINUID, | ||
181 | #endif | ||
182 | PROC_TID_OOM_SCORE, | ||
183 | PROC_TID_OOM_ADJUST, | ||
184 | |||
185 | /* Add new entries before this */ | ||
186 | PROC_TID_FD_DIR = 0x8000, /* 0x8000-0xffff */ | ||
187 | }; | ||
188 | 87 | ||
189 | /* Worst case buffer size needed for holding an integer. */ | 88 | /* Worst case buffer size needed for holding an integer. */ |
190 | #define PROC_NUMBUF 10 | 89 | #define PROC_NUMBUF 10 |
191 | 90 | ||
192 | struct pid_entry { | 91 | struct pid_entry { |
193 | int type; | ||
194 | int len; | 92 | int len; |
195 | char *name; | 93 | char *name; |
196 | mode_t mode; | 94 | mode_t mode; |
95 | struct inode_operations *iop; | ||
96 | struct file_operations *fop; | ||
97 | union proc_op op; | ||
197 | }; | 98 | }; |
198 | 99 | ||
199 | #define E(type,name,mode) {(type),sizeof(name)-1,(name),(mode)} | 100 | #define NOD(NAME, MODE, IOP, FOP, OP) { \ |
200 | 101 | .len = sizeof(NAME) - 1, \ | |
201 | static struct pid_entry tgid_base_stuff[] = { | 102 | .name = (NAME), \ |
202 | E(PROC_TGID_TASK, "task", S_IFDIR|S_IRUGO|S_IXUGO), | 103 | .mode = MODE, \ |
203 | E(PROC_TGID_FD, "fd", S_IFDIR|S_IRUSR|S_IXUSR), | 104 | .iop = IOP, \ |
204 | E(PROC_TGID_ENVIRON, "environ", S_IFREG|S_IRUSR), | 105 | .fop = FOP, \ |
205 | E(PROC_TGID_AUXV, "auxv", S_IFREG|S_IRUSR), | 106 | .op = OP, \ |
206 | E(PROC_TGID_STATUS, "status", S_IFREG|S_IRUGO), | ||
207 | E(PROC_TGID_CMDLINE, "cmdline", S_IFREG|S_IRUGO), | ||
208 | E(PROC_TGID_STAT, "stat", S_IFREG|S_IRUGO), | ||
209 | E(PROC_TGID_STATM, "statm", S_IFREG|S_IRUGO), | ||
210 | E(PROC_TGID_MAPS, "maps", S_IFREG|S_IRUGO), | ||
211 | #ifdef CONFIG_NUMA | ||
212 | E(PROC_TGID_NUMA_MAPS, "numa_maps", S_IFREG|S_IRUGO), | ||
213 | #endif | ||
214 | E(PROC_TGID_MEM, "mem", S_IFREG|S_IRUSR|S_IWUSR), | ||
215 | #ifdef CONFIG_SECCOMP | ||
216 | E(PROC_TGID_SECCOMP, "seccomp", S_IFREG|S_IRUSR|S_IWUSR), | ||
217 | #endif | ||
218 | E(PROC_TGID_CWD, "cwd", S_IFLNK|S_IRWXUGO), | ||
219 | E(PROC_TGID_ROOT, "root", S_IFLNK|S_IRWXUGO), | ||
220 | E(PROC_TGID_EXE, "exe", S_IFLNK|S_IRWXUGO), | ||
221 | E(PROC_TGID_MOUNTS, "mounts", S_IFREG|S_IRUGO), | ||
222 | E(PROC_TGID_MOUNTSTATS, "mountstats", S_IFREG|S_IRUSR), | ||
223 | #ifdef CONFIG_MMU | ||
224 | E(PROC_TGID_SMAPS, "smaps", S_IFREG|S_IRUGO), | ||
225 | #endif | ||
226 | #ifdef CONFIG_SECURITY | ||
227 | E(PROC_TGID_ATTR, "attr", S_IFDIR|S_IRUGO|S_IXUGO), | ||
228 | #endif | ||
229 | #ifdef CONFIG_KALLSYMS | ||
230 | E(PROC_TGID_WCHAN, "wchan", S_IFREG|S_IRUGO), | ||
231 | #endif | ||
232 | #ifdef CONFIG_SCHEDSTATS | ||
233 | E(PROC_TGID_SCHEDSTAT, "schedstat", S_IFREG|S_IRUGO), | ||
234 | #endif | ||
235 | #ifdef CONFIG_CPUSETS | ||
236 | E(PROC_TGID_CPUSET, "cpuset", S_IFREG|S_IRUGO), | ||
237 | #endif | ||
238 | E(PROC_TGID_OOM_SCORE, "oom_score",S_IFREG|S_IRUGO), | ||
239 | E(PROC_TGID_OOM_ADJUST,"oom_adj", S_IFREG|S_IRUGO|S_IWUSR), | ||
240 | #ifdef CONFIG_AUDITSYSCALL | ||
241 | E(PROC_TGID_LOGINUID, "loginuid", S_IFREG|S_IWUSR|S_IRUGO), | ||
242 | #endif | ||
243 | {0,0,NULL,0} | ||
244 | }; | ||
245 | static struct pid_entry tid_base_stuff[] = { | ||
246 | E(PROC_TID_FD, "fd", S_IFDIR|S_IRUSR|S_IXUSR), | ||
247 | E(PROC_TID_ENVIRON, "environ", S_IFREG|S_IRUSR), | ||
248 | E(PROC_TID_AUXV, "auxv", S_IFREG|S_IRUSR), | ||
249 | E(PROC_TID_STATUS, "status", S_IFREG|S_IRUGO), | ||
250 | E(PROC_TID_CMDLINE, "cmdline", S_IFREG|S_IRUGO), | ||
251 | E(PROC_TID_STAT, "stat", S_IFREG|S_IRUGO), | ||
252 | E(PROC_TID_STATM, "statm", S_IFREG|S_IRUGO), | ||
253 | E(PROC_TID_MAPS, "maps", S_IFREG|S_IRUGO), | ||
254 | #ifdef CONFIG_NUMA | ||
255 | E(PROC_TID_NUMA_MAPS, "numa_maps", S_IFREG|S_IRUGO), | ||
256 | #endif | ||
257 | E(PROC_TID_MEM, "mem", S_IFREG|S_IRUSR|S_IWUSR), | ||
258 | #ifdef CONFIG_SECCOMP | ||
259 | E(PROC_TID_SECCOMP, "seccomp", S_IFREG|S_IRUSR|S_IWUSR), | ||
260 | #endif | ||
261 | E(PROC_TID_CWD, "cwd", S_IFLNK|S_IRWXUGO), | ||
262 | E(PROC_TID_ROOT, "root", S_IFLNK|S_IRWXUGO), | ||
263 | E(PROC_TID_EXE, "exe", S_IFLNK|S_IRWXUGO), | ||
264 | E(PROC_TID_MOUNTS, "mounts", S_IFREG|S_IRUGO), | ||
265 | #ifdef CONFIG_MMU | ||
266 | E(PROC_TID_SMAPS, "smaps", S_IFREG|S_IRUGO), | ||
267 | #endif | ||
268 | #ifdef CONFIG_SECURITY | ||
269 | E(PROC_TID_ATTR, "attr", S_IFDIR|S_IRUGO|S_IXUGO), | ||
270 | #endif | ||
271 | #ifdef CONFIG_KALLSYMS | ||
272 | E(PROC_TID_WCHAN, "wchan", S_IFREG|S_IRUGO), | ||
273 | #endif | ||
274 | #ifdef CONFIG_SCHEDSTATS | ||
275 | E(PROC_TID_SCHEDSTAT, "schedstat",S_IFREG|S_IRUGO), | ||
276 | #endif | ||
277 | #ifdef CONFIG_CPUSETS | ||
278 | E(PROC_TID_CPUSET, "cpuset", S_IFREG|S_IRUGO), | ||
279 | #endif | ||
280 | E(PROC_TID_OOM_SCORE, "oom_score",S_IFREG|S_IRUGO), | ||
281 | E(PROC_TID_OOM_ADJUST, "oom_adj", S_IFREG|S_IRUGO|S_IWUSR), | ||
282 | #ifdef CONFIG_AUDITSYSCALL | ||
283 | E(PROC_TID_LOGINUID, "loginuid", S_IFREG|S_IWUSR|S_IRUGO), | ||
284 | #endif | ||
285 | {0,0,NULL,0} | ||
286 | }; | ||
287 | |||
288 | #ifdef CONFIG_SECURITY | ||
289 | static struct pid_entry tgid_attr_stuff[] = { | ||
290 | E(PROC_TGID_ATTR_CURRENT, "current", S_IFREG|S_IRUGO|S_IWUGO), | ||
291 | E(PROC_TGID_ATTR_PREV, "prev", S_IFREG|S_IRUGO), | ||
292 | E(PROC_TGID_ATTR_EXEC, "exec", S_IFREG|S_IRUGO|S_IWUGO), | ||
293 | E(PROC_TGID_ATTR_FSCREATE, "fscreate", S_IFREG|S_IRUGO|S_IWUGO), | ||
294 | E(PROC_TGID_ATTR_KEYCREATE, "keycreate", S_IFREG|S_IRUGO|S_IWUGO), | ||
295 | E(PROC_TGID_ATTR_SOCKCREATE, "sockcreate", S_IFREG|S_IRUGO|S_IWUGO), | ||
296 | {0,0,NULL,0} | ||
297 | }; | ||
298 | static struct pid_entry tid_attr_stuff[] = { | ||
299 | E(PROC_TID_ATTR_CURRENT, "current", S_IFREG|S_IRUGO|S_IWUGO), | ||
300 | E(PROC_TID_ATTR_PREV, "prev", S_IFREG|S_IRUGO), | ||
301 | E(PROC_TID_ATTR_EXEC, "exec", S_IFREG|S_IRUGO|S_IWUGO), | ||
302 | E(PROC_TID_ATTR_FSCREATE, "fscreate", S_IFREG|S_IRUGO|S_IWUGO), | ||
303 | E(PROC_TID_ATTR_KEYCREATE, "keycreate", S_IFREG|S_IRUGO|S_IWUGO), | ||
304 | E(PROC_TID_ATTR_SOCKCREATE, "sockcreate", S_IFREG|S_IRUGO|S_IWUGO), | ||
305 | {0,0,NULL,0} | ||
306 | }; | ||
307 | #endif | ||
308 | |||
309 | #undef E | ||
310 | |||
311 | static int proc_fd_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt) | ||
312 | { | ||
313 | struct task_struct *task = get_proc_task(inode); | ||
314 | struct files_struct *files = NULL; | ||
315 | struct file *file; | ||
316 | int fd = proc_fd(inode); | ||
317 | |||
318 | if (task) { | ||
319 | files = get_files_struct(task); | ||
320 | put_task_struct(task); | ||
321 | } | ||
322 | if (files) { | ||
323 | /* | ||
324 | * We are not taking a ref to the file structure, so we must | ||
325 | * hold ->file_lock. | ||
326 | */ | ||
327 | spin_lock(&files->file_lock); | ||
328 | file = fcheck_files(files, fd); | ||
329 | if (file) { | ||
330 | *mnt = mntget(file->f_vfsmnt); | ||
331 | *dentry = dget(file->f_dentry); | ||
332 | spin_unlock(&files->file_lock); | ||
333 | put_files_struct(files); | ||
334 | return 0; | ||
335 | } | ||
336 | spin_unlock(&files->file_lock); | ||
337 | put_files_struct(files); | ||
338 | } | ||
339 | return -ENOENT; | ||
340 | } | 107 | } |
341 | 108 | ||
109 | #define DIR(NAME, MODE, OTYPE) \ | ||
110 | NOD(NAME, (S_IFDIR|(MODE)), \ | ||
111 | &proc_##OTYPE##_inode_operations, &proc_##OTYPE##_operations, \ | ||
112 | {} ) | ||
113 | #define LNK(NAME, OTYPE) \ | ||
114 | NOD(NAME, (S_IFLNK|S_IRWXUGO), \ | ||
115 | &proc_pid_link_inode_operations, NULL, \ | ||
116 | { .proc_get_link = &proc_##OTYPE##_link } ) | ||
117 | #define REG(NAME, MODE, OTYPE) \ | ||
118 | NOD(NAME, (S_IFREG|(MODE)), NULL, \ | ||
119 | &proc_##OTYPE##_operations, {}) | ||
120 | #define INF(NAME, MODE, OTYPE) \ | ||
121 | NOD(NAME, (S_IFREG|(MODE)), \ | ||
122 | NULL, &proc_info_file_operations, \ | ||
123 | { .proc_read = &proc_##OTYPE } ) | ||
124 | |||
342 | static struct fs_struct *get_fs_struct(struct task_struct *task) | 125 | static struct fs_struct *get_fs_struct(struct task_struct *task) |
343 | { | 126 | { |
344 | struct fs_struct *fs; | 127 | struct fs_struct *fs; |
@@ -587,7 +370,7 @@ static int mounts_open(struct inode *inode, struct file *file) | |||
587 | 370 | ||
588 | if (task) { | 371 | if (task) { |
589 | task_lock(task); | 372 | task_lock(task); |
590 | namespace = task->namespace; | 373 | namespace = task->nsproxy->namespace; |
591 | if (namespace) | 374 | if (namespace) |
592 | get_namespace(namespace); | 375 | get_namespace(namespace); |
593 | task_unlock(task); | 376 | task_unlock(task); |
@@ -658,7 +441,7 @@ static int mountstats_open(struct inode *inode, struct file *file) | |||
658 | 441 | ||
659 | if (task) { | 442 | if (task) { |
660 | task_lock(task); | 443 | task_lock(task); |
661 | namespace = task->namespace; | 444 | namespace = task->nsproxy->namespace; |
662 | if (namespace) | 445 | if (namespace) |
663 | get_namespace(namespace); | 446 | get_namespace(namespace); |
664 | task_unlock(task); | 447 | task_unlock(task); |
@@ -1137,143 +920,6 @@ static struct inode_operations proc_pid_link_inode_operations = { | |||
1137 | .setattr = proc_setattr, | 920 | .setattr = proc_setattr, |
1138 | }; | 921 | }; |
1139 | 922 | ||
1140 | static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir) | ||
1141 | { | ||
1142 | struct dentry *dentry = filp->f_dentry; | ||
1143 | struct inode *inode = dentry->d_inode; | ||
1144 | struct task_struct *p = get_proc_task(inode); | ||
1145 | unsigned int fd, tid, ino; | ||
1146 | int retval; | ||
1147 | char buf[PROC_NUMBUF]; | ||
1148 | struct files_struct * files; | ||
1149 | struct fdtable *fdt; | ||
1150 | |||
1151 | retval = -ENOENT; | ||
1152 | if (!p) | ||
1153 | goto out_no_task; | ||
1154 | retval = 0; | ||
1155 | tid = p->pid; | ||
1156 | |||
1157 | fd = filp->f_pos; | ||
1158 | switch (fd) { | ||
1159 | case 0: | ||
1160 | if (filldir(dirent, ".", 1, 0, inode->i_ino, DT_DIR) < 0) | ||
1161 | goto out; | ||
1162 | filp->f_pos++; | ||
1163 | case 1: | ||
1164 | ino = parent_ino(dentry); | ||
1165 | if (filldir(dirent, "..", 2, 1, ino, DT_DIR) < 0) | ||
1166 | goto out; | ||
1167 | filp->f_pos++; | ||
1168 | default: | ||
1169 | files = get_files_struct(p); | ||
1170 | if (!files) | ||
1171 | goto out; | ||
1172 | rcu_read_lock(); | ||
1173 | fdt = files_fdtable(files); | ||
1174 | for (fd = filp->f_pos-2; | ||
1175 | fd < fdt->max_fds; | ||
1176 | fd++, filp->f_pos++) { | ||
1177 | unsigned int i,j; | ||
1178 | |||
1179 | if (!fcheck_files(files, fd)) | ||
1180 | continue; | ||
1181 | rcu_read_unlock(); | ||
1182 | |||
1183 | j = PROC_NUMBUF; | ||
1184 | i = fd; | ||
1185 | do { | ||
1186 | j--; | ||
1187 | buf[j] = '0' + (i % 10); | ||
1188 | i /= 10; | ||
1189 | } while (i); | ||
1190 | |||
1191 | ino = fake_ino(tid, PROC_TID_FD_DIR + fd); | ||
1192 | if (filldir(dirent, buf+j, PROC_NUMBUF-j, fd+2, ino, DT_LNK) < 0) { | ||
1193 | rcu_read_lock(); | ||
1194 | break; | ||
1195 | } | ||
1196 | rcu_read_lock(); | ||
1197 | } | ||
1198 | rcu_read_unlock(); | ||
1199 | put_files_struct(files); | ||
1200 | } | ||
1201 | out: | ||
1202 | put_task_struct(p); | ||
1203 | out_no_task: | ||
1204 | return retval; | ||
1205 | } | ||
1206 | |||
1207 | static int proc_pident_readdir(struct file *filp, | ||
1208 | void *dirent, filldir_t filldir, | ||
1209 | struct pid_entry *ents, unsigned int nents) | ||
1210 | { | ||
1211 | int i; | ||
1212 | int pid; | ||
1213 | struct dentry *dentry = filp->f_dentry; | ||
1214 | struct inode *inode = dentry->d_inode; | ||
1215 | struct task_struct *task = get_proc_task(inode); | ||
1216 | struct pid_entry *p; | ||
1217 | ino_t ino; | ||
1218 | int ret; | ||
1219 | |||
1220 | ret = -ENOENT; | ||
1221 | if (!task) | ||
1222 | goto out; | ||
1223 | |||
1224 | ret = 0; | ||
1225 | pid = task->pid; | ||
1226 | put_task_struct(task); | ||
1227 | i = filp->f_pos; | ||
1228 | switch (i) { | ||
1229 | case 0: | ||
1230 | ino = inode->i_ino; | ||
1231 | if (filldir(dirent, ".", 1, i, ino, DT_DIR) < 0) | ||
1232 | goto out; | ||
1233 | i++; | ||
1234 | filp->f_pos++; | ||
1235 | /* fall through */ | ||
1236 | case 1: | ||
1237 | ino = parent_ino(dentry); | ||
1238 | if (filldir(dirent, "..", 2, i, ino, DT_DIR) < 0) | ||
1239 | goto out; | ||
1240 | i++; | ||
1241 | filp->f_pos++; | ||
1242 | /* fall through */ | ||
1243 | default: | ||
1244 | i -= 2; | ||
1245 | if (i >= nents) { | ||
1246 | ret = 1; | ||
1247 | goto out; | ||
1248 | } | ||
1249 | p = ents + i; | ||
1250 | while (p->name) { | ||
1251 | if (filldir(dirent, p->name, p->len, filp->f_pos, | ||
1252 | fake_ino(pid, p->type), p->mode >> 12) < 0) | ||
1253 | goto out; | ||
1254 | filp->f_pos++; | ||
1255 | p++; | ||
1256 | } | ||
1257 | } | ||
1258 | |||
1259 | ret = 1; | ||
1260 | out: | ||
1261 | return ret; | ||
1262 | } | ||
1263 | |||
1264 | static int proc_tgid_base_readdir(struct file * filp, | ||
1265 | void * dirent, filldir_t filldir) | ||
1266 | { | ||
1267 | return proc_pident_readdir(filp,dirent,filldir, | ||
1268 | tgid_base_stuff,ARRAY_SIZE(tgid_base_stuff)); | ||
1269 | } | ||
1270 | |||
1271 | static int proc_tid_base_readdir(struct file * filp, | ||
1272 | void * dirent, filldir_t filldir) | ||
1273 | { | ||
1274 | return proc_pident_readdir(filp,dirent,filldir, | ||
1275 | tid_base_stuff,ARRAY_SIZE(tid_base_stuff)); | ||
1276 | } | ||
1277 | 923 | ||
1278 | /* building an inode */ | 924 | /* building an inode */ |
1279 | 925 | ||
@@ -1293,13 +939,13 @@ static int task_dumpable(struct task_struct *task) | |||
1293 | } | 939 | } |
1294 | 940 | ||
1295 | 941 | ||
1296 | static struct inode *proc_pid_make_inode(struct super_block * sb, struct task_struct *task, int ino) | 942 | static struct inode *proc_pid_make_inode(struct super_block * sb, struct task_struct *task) |
1297 | { | 943 | { |
1298 | struct inode * inode; | 944 | struct inode * inode; |
1299 | struct proc_inode *ei; | 945 | struct proc_inode *ei; |
1300 | 946 | ||
1301 | /* We need a new inode */ | 947 | /* We need a new inode */ |
1302 | 948 | ||
1303 | inode = new_inode(sb); | 949 | inode = new_inode(sb); |
1304 | if (!inode) | 950 | if (!inode) |
1305 | goto out; | 951 | goto out; |
@@ -1307,13 +953,12 @@ static struct inode *proc_pid_make_inode(struct super_block * sb, struct task_st | |||
1307 | /* Common stuff */ | 953 | /* Common stuff */ |
1308 | ei = PROC_I(inode); | 954 | ei = PROC_I(inode); |
1309 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; | 955 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; |
1310 | inode->i_ino = fake_ino(task->pid, ino); | ||
1311 | inode->i_op = &proc_def_inode_operations; | 956 | inode->i_op = &proc_def_inode_operations; |
1312 | 957 | ||
1313 | /* | 958 | /* |
1314 | * grab the reference to task. | 959 | * grab the reference to task. |
1315 | */ | 960 | */ |
1316 | ei->pid = get_pid(task->pids[PIDTYPE_PID].pid); | 961 | ei->pid = get_task_pid(task, PIDTYPE_PID); |
1317 | if (!ei->pid) | 962 | if (!ei->pid) |
1318 | goto out_unlock; | 963 | goto out_unlock; |
1319 | 964 | ||
@@ -1333,6 +978,27 @@ out_unlock: | |||
1333 | return NULL; | 978 | return NULL; |
1334 | } | 979 | } |
1335 | 980 | ||
981 | static int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) | ||
982 | { | ||
983 | struct inode *inode = dentry->d_inode; | ||
984 | struct task_struct *task; | ||
985 | generic_fillattr(inode, stat); | ||
986 | |||
987 | rcu_read_lock(); | ||
988 | stat->uid = 0; | ||
989 | stat->gid = 0; | ||
990 | task = pid_task(proc_pid(inode), PIDTYPE_PID); | ||
991 | if (task) { | ||
992 | if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) || | ||
993 | task_dumpable(task)) { | ||
994 | stat->uid = task->euid; | ||
995 | stat->gid = task->egid; | ||
996 | } | ||
997 | } | ||
998 | rcu_read_unlock(); | ||
999 | return 0; | ||
1000 | } | ||
1001 | |||
1336 | /* dentry stuff */ | 1002 | /* dentry stuff */ |
1337 | 1003 | ||
1338 | /* | 1004 | /* |
@@ -1372,25 +1038,130 @@ static int pid_revalidate(struct dentry *dentry, struct nameidata *nd) | |||
1372 | return 0; | 1038 | return 0; |
1373 | } | 1039 | } |
1374 | 1040 | ||
1375 | static int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) | 1041 | static int pid_delete_dentry(struct dentry * dentry) |
1376 | { | 1042 | { |
1377 | struct inode *inode = dentry->d_inode; | 1043 | /* Is the task we represent dead? |
1378 | struct task_struct *task; | 1044 | * If so, then don't put the dentry on the lru list, |
1379 | generic_fillattr(inode, stat); | 1045 | * kill it immediately. |
1046 | */ | ||
1047 | return !proc_pid(dentry->d_inode)->tasks[PIDTYPE_PID].first; | ||
1048 | } | ||
1049 | |||
1050 | static struct dentry_operations pid_dentry_operations = | ||
1051 | { | ||
1052 | .d_revalidate = pid_revalidate, | ||
1053 | .d_delete = pid_delete_dentry, | ||
1054 | }; | ||
1055 | |||
1056 | /* Lookups */ | ||
1057 | |||
1058 | typedef struct dentry *instantiate_t(struct inode *, struct dentry *, struct task_struct *, void *); | ||
1059 | |||
1060 | /* | ||
1061 | * Fill a directory entry. | ||
1062 | * | ||
1063 | * If possible create the dcache entry and derive our inode number and | ||
1064 | * file type from dcache entry. | ||
1065 | * | ||
1066 | * Since all of the proc inode numbers are dynamically generated, the inode | ||
1067 | * numbers do not exist until the inode is cache. This means creating the | ||
1068 | * the dcache entry in readdir is necessary to keep the inode numbers | ||
1069 | * reported by readdir in sync with the inode numbers reported | ||
1070 | * by stat. | ||
1071 | */ | ||
1072 | static int proc_fill_cache(struct file *filp, void *dirent, filldir_t filldir, | ||
1073 | char *name, int len, | ||
1074 | instantiate_t instantiate, struct task_struct *task, void *ptr) | ||
1075 | { | ||
1076 | struct dentry *child, *dir = filp->f_dentry; | ||
1077 | struct inode *inode; | ||
1078 | struct qstr qname; | ||
1079 | ino_t ino = 0; | ||
1080 | unsigned type = DT_UNKNOWN; | ||
1081 | |||
1082 | qname.name = name; | ||
1083 | qname.len = len; | ||
1084 | qname.hash = full_name_hash(name, len); | ||
1085 | |||
1086 | child = d_lookup(dir, &qname); | ||
1087 | if (!child) { | ||
1088 | struct dentry *new; | ||
1089 | new = d_alloc(dir, &qname); | ||
1090 | if (new) { | ||
1091 | child = instantiate(dir->d_inode, new, task, ptr); | ||
1092 | if (child) | ||
1093 | dput(new); | ||
1094 | else | ||
1095 | child = new; | ||
1096 | } | ||
1097 | } | ||
1098 | if (!child || IS_ERR(child) || !child->d_inode) | ||
1099 | goto end_instantiate; | ||
1100 | inode = child->d_inode; | ||
1101 | if (inode) { | ||
1102 | ino = inode->i_ino; | ||
1103 | type = inode->i_mode >> 12; | ||
1104 | } | ||
1105 | dput(child); | ||
1106 | end_instantiate: | ||
1107 | if (!ino) | ||
1108 | ino = find_inode_number(dir, &qname); | ||
1109 | if (!ino) | ||
1110 | ino = 1; | ||
1111 | return filldir(dirent, name, len, filp->f_pos, ino, type); | ||
1112 | } | ||
1113 | |||
1114 | static unsigned name_to_int(struct dentry *dentry) | ||
1115 | { | ||
1116 | const char *name = dentry->d_name.name; | ||
1117 | int len = dentry->d_name.len; | ||
1118 | unsigned n = 0; | ||
1119 | |||
1120 | if (len > 1 && *name == '0') | ||
1121 | goto out; | ||
1122 | while (len-- > 0) { | ||
1123 | unsigned c = *name++ - '0'; | ||
1124 | if (c > 9) | ||
1125 | goto out; | ||
1126 | if (n >= (~0U-9)/10) | ||
1127 | goto out; | ||
1128 | n *= 10; | ||
1129 | n += c; | ||
1130 | } | ||
1131 | return n; | ||
1132 | out: | ||
1133 | return ~0U; | ||
1134 | } | ||
1135 | |||
1136 | static int proc_fd_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt) | ||
1137 | { | ||
1138 | struct task_struct *task = get_proc_task(inode); | ||
1139 | struct files_struct *files = NULL; | ||
1140 | struct file *file; | ||
1141 | int fd = proc_fd(inode); | ||
1380 | 1142 | ||
1381 | rcu_read_lock(); | ||
1382 | stat->uid = 0; | ||
1383 | stat->gid = 0; | ||
1384 | task = pid_task(proc_pid(inode), PIDTYPE_PID); | ||
1385 | if (task) { | 1143 | if (task) { |
1386 | if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) || | 1144 | files = get_files_struct(task); |
1387 | task_dumpable(task)) { | 1145 | put_task_struct(task); |
1388 | stat->uid = task->euid; | 1146 | } |
1389 | stat->gid = task->egid; | 1147 | if (files) { |
1148 | /* | ||
1149 | * We are not taking a ref to the file structure, so we must | ||
1150 | * hold ->file_lock. | ||
1151 | */ | ||
1152 | spin_lock(&files->file_lock); | ||
1153 | file = fcheck_files(files, fd); | ||
1154 | if (file) { | ||
1155 | *mnt = mntget(file->f_vfsmnt); | ||
1156 | *dentry = dget(file->f_dentry); | ||
1157 | spin_unlock(&files->file_lock); | ||
1158 | put_files_struct(files); | ||
1159 | return 0; | ||
1390 | } | 1160 | } |
1161 | spin_unlock(&files->file_lock); | ||
1162 | put_files_struct(files); | ||
1391 | } | 1163 | } |
1392 | rcu_read_unlock(); | 1164 | return -ENOENT; |
1393 | return 0; | ||
1394 | } | 1165 | } |
1395 | 1166 | ||
1396 | static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd) | 1167 | static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd) |
@@ -1428,75 +1199,30 @@ static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd) | |||
1428 | return 0; | 1199 | return 0; |
1429 | } | 1200 | } |
1430 | 1201 | ||
1431 | static int pid_delete_dentry(struct dentry * dentry) | ||
1432 | { | ||
1433 | /* Is the task we represent dead? | ||
1434 | * If so, then don't put the dentry on the lru list, | ||
1435 | * kill it immediately. | ||
1436 | */ | ||
1437 | return !proc_pid(dentry->d_inode)->tasks[PIDTYPE_PID].first; | ||
1438 | } | ||
1439 | |||
1440 | static struct dentry_operations tid_fd_dentry_operations = | 1202 | static struct dentry_operations tid_fd_dentry_operations = |
1441 | { | 1203 | { |
1442 | .d_revalidate = tid_fd_revalidate, | 1204 | .d_revalidate = tid_fd_revalidate, |
1443 | .d_delete = pid_delete_dentry, | 1205 | .d_delete = pid_delete_dentry, |
1444 | }; | 1206 | }; |
1445 | 1207 | ||
1446 | static struct dentry_operations pid_dentry_operations = | 1208 | static struct dentry *proc_fd_instantiate(struct inode *dir, |
1447 | { | 1209 | struct dentry *dentry, struct task_struct *task, void *ptr) |
1448 | .d_revalidate = pid_revalidate, | ||
1449 | .d_delete = pid_delete_dentry, | ||
1450 | }; | ||
1451 | |||
1452 | /* Lookups */ | ||
1453 | |||
1454 | static unsigned name_to_int(struct dentry *dentry) | ||
1455 | { | ||
1456 | const char *name = dentry->d_name.name; | ||
1457 | int len = dentry->d_name.len; | ||
1458 | unsigned n = 0; | ||
1459 | |||
1460 | if (len > 1 && *name == '0') | ||
1461 | goto out; | ||
1462 | while (len-- > 0) { | ||
1463 | unsigned c = *name++ - '0'; | ||
1464 | if (c > 9) | ||
1465 | goto out; | ||
1466 | if (n >= (~0U-9)/10) | ||
1467 | goto out; | ||
1468 | n *= 10; | ||
1469 | n += c; | ||
1470 | } | ||
1471 | return n; | ||
1472 | out: | ||
1473 | return ~0U; | ||
1474 | } | ||
1475 | |||
1476 | /* SMP-safe */ | ||
1477 | static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry, struct nameidata *nd) | ||
1478 | { | 1210 | { |
1479 | struct task_struct *task = get_proc_task(dir); | 1211 | unsigned fd = *(unsigned *)ptr; |
1480 | unsigned fd = name_to_int(dentry); | 1212 | struct file *file; |
1481 | struct dentry *result = ERR_PTR(-ENOENT); | 1213 | struct files_struct *files; |
1482 | struct file * file; | 1214 | struct inode *inode; |
1483 | struct files_struct * files; | 1215 | struct proc_inode *ei; |
1484 | struct inode *inode; | 1216 | struct dentry *error = ERR_PTR(-ENOENT); |
1485 | struct proc_inode *ei; | ||
1486 | |||
1487 | if (!task) | ||
1488 | goto out_no_task; | ||
1489 | if (fd == ~0U) | ||
1490 | goto out; | ||
1491 | 1217 | ||
1492 | inode = proc_pid_make_inode(dir->i_sb, task, PROC_TID_FD_DIR+fd); | 1218 | inode = proc_pid_make_inode(dir->i_sb, task); |
1493 | if (!inode) | 1219 | if (!inode) |
1494 | goto out; | 1220 | goto out; |
1495 | ei = PROC_I(inode); | 1221 | ei = PROC_I(inode); |
1496 | ei->fd = fd; | 1222 | ei->fd = fd; |
1497 | files = get_files_struct(task); | 1223 | files = get_files_struct(task); |
1498 | if (!files) | 1224 | if (!files) |
1499 | goto out_unlock; | 1225 | goto out_iput; |
1500 | inode->i_mode = S_IFLNK; | 1226 | inode->i_mode = S_IFLNK; |
1501 | 1227 | ||
1502 | /* | 1228 | /* |
@@ -1506,13 +1232,14 @@ static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry, | |||
1506 | spin_lock(&files->file_lock); | 1232 | spin_lock(&files->file_lock); |
1507 | file = fcheck_files(files, fd); | 1233 | file = fcheck_files(files, fd); |
1508 | if (!file) | 1234 | if (!file) |
1509 | goto out_unlock2; | 1235 | goto out_unlock; |
1510 | if (file->f_mode & 1) | 1236 | if (file->f_mode & 1) |
1511 | inode->i_mode |= S_IRUSR | S_IXUSR; | 1237 | inode->i_mode |= S_IRUSR | S_IXUSR; |
1512 | if (file->f_mode & 2) | 1238 | if (file->f_mode & 2) |
1513 | inode->i_mode |= S_IWUSR | S_IXUSR; | 1239 | inode->i_mode |= S_IWUSR | S_IXUSR; |
1514 | spin_unlock(&files->file_lock); | 1240 | spin_unlock(&files->file_lock); |
1515 | put_files_struct(files); | 1241 | put_files_struct(files); |
1242 | |||
1516 | inode->i_op = &proc_pid_link_inode_operations; | 1243 | inode->i_op = &proc_pid_link_inode_operations; |
1517 | inode->i_size = 64; | 1244 | inode->i_size = 64; |
1518 | ei->op.proc_get_link = proc_fd_link; | 1245 | ei->op.proc_get_link = proc_fd_link; |
@@ -1520,34 +1247,106 @@ static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry, | |||
1520 | d_add(dentry, inode); | 1247 | d_add(dentry, inode); |
1521 | /* Close the race of the process dying before we return the dentry */ | 1248 | /* Close the race of the process dying before we return the dentry */ |
1522 | if (tid_fd_revalidate(dentry, NULL)) | 1249 | if (tid_fd_revalidate(dentry, NULL)) |
1523 | result = NULL; | 1250 | error = NULL; |
1524 | out: | ||
1525 | put_task_struct(task); | ||
1526 | out_no_task: | ||
1527 | return result; | ||
1528 | 1251 | ||
1529 | out_unlock2: | 1252 | out: |
1253 | return error; | ||
1254 | out_unlock: | ||
1530 | spin_unlock(&files->file_lock); | 1255 | spin_unlock(&files->file_lock); |
1531 | put_files_struct(files); | 1256 | put_files_struct(files); |
1532 | out_unlock: | 1257 | out_iput: |
1533 | iput(inode); | 1258 | iput(inode); |
1534 | goto out; | 1259 | goto out; |
1535 | } | 1260 | } |
1536 | 1261 | ||
1537 | static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldir); | 1262 | static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry, struct nameidata *nd) |
1538 | static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd); | 1263 | { |
1539 | static int proc_task_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat); | 1264 | struct task_struct *task = get_proc_task(dir); |
1265 | unsigned fd = name_to_int(dentry); | ||
1266 | struct dentry *result = ERR_PTR(-ENOENT); | ||
1267 | |||
1268 | if (!task) | ||
1269 | goto out_no_task; | ||
1270 | if (fd == ~0U) | ||
1271 | goto out; | ||
1272 | |||
1273 | result = proc_fd_instantiate(dir, dentry, task, &fd); | ||
1274 | out: | ||
1275 | put_task_struct(task); | ||
1276 | out_no_task: | ||
1277 | return result; | ||
1278 | } | ||
1279 | |||
1280 | static int proc_fd_fill_cache(struct file *filp, void *dirent, filldir_t filldir, | ||
1281 | struct task_struct *task, int fd) | ||
1282 | { | ||
1283 | char name[PROC_NUMBUF]; | ||
1284 | int len = snprintf(name, sizeof(name), "%d", fd); | ||
1285 | return proc_fill_cache(filp, dirent, filldir, name, len, | ||
1286 | proc_fd_instantiate, task, &fd); | ||
1287 | } | ||
1288 | |||
1289 | static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir) | ||
1290 | { | ||
1291 | struct dentry *dentry = filp->f_dentry; | ||
1292 | struct inode *inode = dentry->d_inode; | ||
1293 | struct task_struct *p = get_proc_task(inode); | ||
1294 | unsigned int fd, tid, ino; | ||
1295 | int retval; | ||
1296 | struct files_struct * files; | ||
1297 | struct fdtable *fdt; | ||
1298 | |||
1299 | retval = -ENOENT; | ||
1300 | if (!p) | ||
1301 | goto out_no_task; | ||
1302 | retval = 0; | ||
1303 | tid = p->pid; | ||
1304 | |||
1305 | fd = filp->f_pos; | ||
1306 | switch (fd) { | ||
1307 | case 0: | ||
1308 | if (filldir(dirent, ".", 1, 0, inode->i_ino, DT_DIR) < 0) | ||
1309 | goto out; | ||
1310 | filp->f_pos++; | ||
1311 | case 1: | ||
1312 | ino = parent_ino(dentry); | ||
1313 | if (filldir(dirent, "..", 2, 1, ino, DT_DIR) < 0) | ||
1314 | goto out; | ||
1315 | filp->f_pos++; | ||
1316 | default: | ||
1317 | files = get_files_struct(p); | ||
1318 | if (!files) | ||
1319 | goto out; | ||
1320 | rcu_read_lock(); | ||
1321 | fdt = files_fdtable(files); | ||
1322 | for (fd = filp->f_pos-2; | ||
1323 | fd < fdt->max_fds; | ||
1324 | fd++, filp->f_pos++) { | ||
1325 | |||
1326 | if (!fcheck_files(files, fd)) | ||
1327 | continue; | ||
1328 | rcu_read_unlock(); | ||
1329 | |||
1330 | if (proc_fd_fill_cache(filp, dirent, filldir, p, fd) < 0) { | ||
1331 | rcu_read_lock(); | ||
1332 | break; | ||
1333 | } | ||
1334 | rcu_read_lock(); | ||
1335 | } | ||
1336 | rcu_read_unlock(); | ||
1337 | put_files_struct(files); | ||
1338 | } | ||
1339 | out: | ||
1340 | put_task_struct(p); | ||
1341 | out_no_task: | ||
1342 | return retval; | ||
1343 | } | ||
1540 | 1344 | ||
1541 | static struct file_operations proc_fd_operations = { | 1345 | static struct file_operations proc_fd_operations = { |
1542 | .read = generic_read_dir, | 1346 | .read = generic_read_dir, |
1543 | .readdir = proc_readfd, | 1347 | .readdir = proc_readfd, |
1544 | }; | 1348 | }; |
1545 | 1349 | ||
1546 | static struct file_operations proc_task_operations = { | ||
1547 | .read = generic_read_dir, | ||
1548 | .readdir = proc_task_readdir, | ||
1549 | }; | ||
1550 | |||
1551 | /* | 1350 | /* |
1552 | * proc directories can do almost nothing.. | 1351 | * proc directories can do almost nothing.. |
1553 | */ | 1352 | */ |
@@ -1556,11 +1355,137 @@ static struct inode_operations proc_fd_inode_operations = { | |||
1556 | .setattr = proc_setattr, | 1355 | .setattr = proc_setattr, |
1557 | }; | 1356 | }; |
1558 | 1357 | ||
1559 | static struct inode_operations proc_task_inode_operations = { | 1358 | static struct dentry *proc_pident_instantiate(struct inode *dir, |
1560 | .lookup = proc_task_lookup, | 1359 | struct dentry *dentry, struct task_struct *task, void *ptr) |
1561 | .getattr = proc_task_getattr, | 1360 | { |
1562 | .setattr = proc_setattr, | 1361 | struct pid_entry *p = ptr; |
1563 | }; | 1362 | struct inode *inode; |
1363 | struct proc_inode *ei; | ||
1364 | struct dentry *error = ERR_PTR(-EINVAL); | ||
1365 | |||
1366 | inode = proc_pid_make_inode(dir->i_sb, task); | ||
1367 | if (!inode) | ||
1368 | goto out; | ||
1369 | |||
1370 | ei = PROC_I(inode); | ||
1371 | inode->i_mode = p->mode; | ||
1372 | if (S_ISDIR(inode->i_mode)) | ||
1373 | inode->i_nlink = 2; /* Use getattr to fix if necessary */ | ||
1374 | if (p->iop) | ||
1375 | inode->i_op = p->iop; | ||
1376 | if (p->fop) | ||
1377 | inode->i_fop = p->fop; | ||
1378 | ei->op = p->op; | ||
1379 | dentry->d_op = &pid_dentry_operations; | ||
1380 | d_add(dentry, inode); | ||
1381 | /* Close the race of the process dying before we return the dentry */ | ||
1382 | if (pid_revalidate(dentry, NULL)) | ||
1383 | error = NULL; | ||
1384 | out: | ||
1385 | return error; | ||
1386 | } | ||
1387 | |||
1388 | static struct dentry *proc_pident_lookup(struct inode *dir, | ||
1389 | struct dentry *dentry, | ||
1390 | struct pid_entry *ents, | ||
1391 | unsigned int nents) | ||
1392 | { | ||
1393 | struct inode *inode; | ||
1394 | struct dentry *error; | ||
1395 | struct task_struct *task = get_proc_task(dir); | ||
1396 | struct pid_entry *p, *last; | ||
1397 | |||
1398 | error = ERR_PTR(-ENOENT); | ||
1399 | inode = NULL; | ||
1400 | |||
1401 | if (!task) | ||
1402 | goto out_no_task; | ||
1403 | |||
1404 | /* | ||
1405 | * Yes, it does not scale. And it should not. Don't add | ||
1406 | * new entries into /proc/<tgid>/ without very good reasons. | ||
1407 | */ | ||
1408 | last = &ents[nents - 1]; | ||
1409 | for (p = ents; p <= last; p++) { | ||
1410 | if (p->len != dentry->d_name.len) | ||
1411 | continue; | ||
1412 | if (!memcmp(dentry->d_name.name, p->name, p->len)) | ||
1413 | break; | ||
1414 | } | ||
1415 | if (p > last) | ||
1416 | goto out; | ||
1417 | |||
1418 | error = proc_pident_instantiate(dir, dentry, task, p); | ||
1419 | out: | ||
1420 | put_task_struct(task); | ||
1421 | out_no_task: | ||
1422 | return error; | ||
1423 | } | ||
1424 | |||
1425 | static int proc_pident_fill_cache(struct file *filp, void *dirent, filldir_t filldir, | ||
1426 | struct task_struct *task, struct pid_entry *p) | ||
1427 | { | ||
1428 | return proc_fill_cache(filp, dirent, filldir, p->name, p->len, | ||
1429 | proc_pident_instantiate, task, p); | ||
1430 | } | ||
1431 | |||
1432 | static int proc_pident_readdir(struct file *filp, | ||
1433 | void *dirent, filldir_t filldir, | ||
1434 | struct pid_entry *ents, unsigned int nents) | ||
1435 | { | ||
1436 | int i; | ||
1437 | int pid; | ||
1438 | struct dentry *dentry = filp->f_dentry; | ||
1439 | struct inode *inode = dentry->d_inode; | ||
1440 | struct task_struct *task = get_proc_task(inode); | ||
1441 | struct pid_entry *p, *last; | ||
1442 | ino_t ino; | ||
1443 | int ret; | ||
1444 | |||
1445 | ret = -ENOENT; | ||
1446 | if (!task) | ||
1447 | goto out_no_task; | ||
1448 | |||
1449 | ret = 0; | ||
1450 | pid = task->pid; | ||
1451 | i = filp->f_pos; | ||
1452 | switch (i) { | ||
1453 | case 0: | ||
1454 | ino = inode->i_ino; | ||
1455 | if (filldir(dirent, ".", 1, i, ino, DT_DIR) < 0) | ||
1456 | goto out; | ||
1457 | i++; | ||
1458 | filp->f_pos++; | ||
1459 | /* fall through */ | ||
1460 | case 1: | ||
1461 | ino = parent_ino(dentry); | ||
1462 | if (filldir(dirent, "..", 2, i, ino, DT_DIR) < 0) | ||
1463 | goto out; | ||
1464 | i++; | ||
1465 | filp->f_pos++; | ||
1466 | /* fall through */ | ||
1467 | default: | ||
1468 | i -= 2; | ||
1469 | if (i >= nents) { | ||
1470 | ret = 1; | ||
1471 | goto out; | ||
1472 | } | ||
1473 | p = ents + i; | ||
1474 | last = &ents[nents - 1]; | ||
1475 | while (p <= last) { | ||
1476 | if (proc_pident_fill_cache(filp, dirent, filldir, task, p) < 0) | ||
1477 | goto out; | ||
1478 | filp->f_pos++; | ||
1479 | p++; | ||
1480 | } | ||
1481 | } | ||
1482 | |||
1483 | ret = 1; | ||
1484 | out: | ||
1485 | put_task_struct(task); | ||
1486 | out_no_task: | ||
1487 | return ret; | ||
1488 | } | ||
1564 | 1489 | ||
1565 | #ifdef CONFIG_SECURITY | 1490 | #ifdef CONFIG_SECURITY |
1566 | static ssize_t proc_pid_attr_read(struct file * file, char __user * buf, | 1491 | static ssize_t proc_pid_attr_read(struct file * file, char __user * buf, |
@@ -1581,8 +1506,8 @@ static ssize_t proc_pid_attr_read(struct file * file, char __user * buf, | |||
1581 | if (!(page = __get_free_page(GFP_KERNEL))) | 1506 | if (!(page = __get_free_page(GFP_KERNEL))) |
1582 | goto out; | 1507 | goto out; |
1583 | 1508 | ||
1584 | length = security_getprocattr(task, | 1509 | length = security_getprocattr(task, |
1585 | (char*)file->f_dentry->d_name.name, | 1510 | (char*)file->f_dentry->d_name.name, |
1586 | (void*)page, count); | 1511 | (void*)page, count); |
1587 | if (length >= 0) | 1512 | if (length >= 0) |
1588 | length = simple_read_from_buffer(buf, count, ppos, (char *)page, length); | 1513 | length = simple_read_from_buffer(buf, count, ppos, (char *)page, length); |
@@ -1595,17 +1520,17 @@ out_no_task: | |||
1595 | 1520 | ||
1596 | static ssize_t proc_pid_attr_write(struct file * file, const char __user * buf, | 1521 | static ssize_t proc_pid_attr_write(struct file * file, const char __user * buf, |
1597 | size_t count, loff_t *ppos) | 1522 | size_t count, loff_t *ppos) |
1598 | { | 1523 | { |
1599 | struct inode * inode = file->f_dentry->d_inode; | 1524 | struct inode * inode = file->f_dentry->d_inode; |
1600 | char *page; | 1525 | char *page; |
1601 | ssize_t length; | 1526 | ssize_t length; |
1602 | struct task_struct *task = get_proc_task(inode); | 1527 | struct task_struct *task = get_proc_task(inode); |
1603 | 1528 | ||
1604 | length = -ESRCH; | 1529 | length = -ESRCH; |
1605 | if (!task) | 1530 | if (!task) |
1606 | goto out_no_task; | 1531 | goto out_no_task; |
1607 | if (count > PAGE_SIZE) | 1532 | if (count > PAGE_SIZE) |
1608 | count = PAGE_SIZE; | 1533 | count = PAGE_SIZE; |
1609 | 1534 | ||
1610 | /* No partial writes. */ | 1535 | /* No partial writes. */ |
1611 | length = -EINVAL; | 1536 | length = -EINVAL; |
@@ -1613,16 +1538,16 @@ static ssize_t proc_pid_attr_write(struct file * file, const char __user * buf, | |||
1613 | goto out; | 1538 | goto out; |
1614 | 1539 | ||
1615 | length = -ENOMEM; | 1540 | length = -ENOMEM; |
1616 | page = (char*)__get_free_page(GFP_USER); | 1541 | page = (char*)__get_free_page(GFP_USER); |
1617 | if (!page) | 1542 | if (!page) |
1618 | goto out; | 1543 | goto out; |
1619 | 1544 | ||
1620 | length = -EFAULT; | 1545 | length = -EFAULT; |
1621 | if (copy_from_user(page, buf, count)) | 1546 | if (copy_from_user(page, buf, count)) |
1622 | goto out_free; | 1547 | goto out_free; |
1623 | 1548 | ||
1624 | length = security_setprocattr(task, | 1549 | length = security_setprocattr(task, |
1625 | (char*)file->f_dentry->d_name.name, | 1550 | (char*)file->f_dentry->d_name.name, |
1626 | (void*)page, count); | 1551 | (void*)page, count); |
1627 | out_free: | 1552 | out_free: |
1628 | free_page((unsigned long) page); | 1553 | free_page((unsigned long) page); |
@@ -1630,330 +1555,263 @@ out: | |||
1630 | put_task_struct(task); | 1555 | put_task_struct(task); |
1631 | out_no_task: | 1556 | out_no_task: |
1632 | return length; | 1557 | return length; |
1633 | } | 1558 | } |
1634 | 1559 | ||
1635 | static struct file_operations proc_pid_attr_operations = { | 1560 | static struct file_operations proc_pid_attr_operations = { |
1636 | .read = proc_pid_attr_read, | 1561 | .read = proc_pid_attr_read, |
1637 | .write = proc_pid_attr_write, | 1562 | .write = proc_pid_attr_write, |
1638 | }; | 1563 | }; |
1639 | 1564 | ||
1640 | static struct file_operations proc_tid_attr_operations; | 1565 | static struct pid_entry attr_dir_stuff[] = { |
1641 | static struct inode_operations proc_tid_attr_inode_operations; | 1566 | REG("current", S_IRUGO|S_IWUGO, pid_attr), |
1642 | static struct file_operations proc_tgid_attr_operations; | 1567 | REG("prev", S_IRUGO, pid_attr), |
1643 | static struct inode_operations proc_tgid_attr_inode_operations; | 1568 | REG("exec", S_IRUGO|S_IWUGO, pid_attr), |
1569 | REG("fscreate", S_IRUGO|S_IWUGO, pid_attr), | ||
1570 | REG("keycreate", S_IRUGO|S_IWUGO, pid_attr), | ||
1571 | REG("sockcreate", S_IRUGO|S_IWUGO, pid_attr), | ||
1572 | }; | ||
1573 | |||
1574 | static int proc_attr_dir_readdir(struct file * filp, | ||
1575 | void * dirent, filldir_t filldir) | ||
1576 | { | ||
1577 | return proc_pident_readdir(filp,dirent,filldir, | ||
1578 | attr_dir_stuff,ARRAY_SIZE(attr_dir_stuff)); | ||
1579 | } | ||
1580 | |||
1581 | static struct file_operations proc_attr_dir_operations = { | ||
1582 | .read = generic_read_dir, | ||
1583 | .readdir = proc_attr_dir_readdir, | ||
1584 | }; | ||
1585 | |||
1586 | static struct dentry *proc_attr_dir_lookup(struct inode *dir, | ||
1587 | struct dentry *dentry, struct nameidata *nd) | ||
1588 | { | ||
1589 | return proc_pident_lookup(dir, dentry, | ||
1590 | attr_dir_stuff, ARRAY_SIZE(attr_dir_stuff)); | ||
1591 | } | ||
1592 | |||
1593 | static struct inode_operations proc_attr_dir_inode_operations = { | ||
1594 | .lookup = proc_attr_dir_lookup, | ||
1595 | .getattr = pid_getattr, | ||
1596 | .setattr = proc_setattr, | ||
1597 | }; | ||
1598 | |||
1644 | #endif | 1599 | #endif |
1645 | 1600 | ||
1646 | /* SMP-safe */ | 1601 | /* |
1647 | static struct dentry *proc_pident_lookup(struct inode *dir, | 1602 | * /proc/self: |
1648 | struct dentry *dentry, | 1603 | */ |
1649 | struct pid_entry *ents) | 1604 | static int proc_self_readlink(struct dentry *dentry, char __user *buffer, |
1605 | int buflen) | ||
1606 | { | ||
1607 | char tmp[PROC_NUMBUF]; | ||
1608 | sprintf(tmp, "%d", current->tgid); | ||
1609 | return vfs_readlink(dentry,buffer,buflen,tmp); | ||
1610 | } | ||
1611 | |||
1612 | static void *proc_self_follow_link(struct dentry *dentry, struct nameidata *nd) | ||
1650 | { | 1613 | { |
1614 | char tmp[PROC_NUMBUF]; | ||
1615 | sprintf(tmp, "%d", current->tgid); | ||
1616 | return ERR_PTR(vfs_follow_link(nd,tmp)); | ||
1617 | } | ||
1618 | |||
1619 | static struct inode_operations proc_self_inode_operations = { | ||
1620 | .readlink = proc_self_readlink, | ||
1621 | .follow_link = proc_self_follow_link, | ||
1622 | }; | ||
1623 | |||
1624 | /* | ||
1625 | * proc base | ||
1626 | * | ||
1627 | * These are the directory entries in the root directory of /proc | ||
1628 | * that properly belong to the /proc filesystem, as they describe | ||
1629 | * describe something that is process related. | ||
1630 | */ | ||
1631 | static struct pid_entry proc_base_stuff[] = { | ||
1632 | NOD("self", S_IFLNK|S_IRWXUGO, | ||
1633 | &proc_self_inode_operations, NULL, {}), | ||
1634 | }; | ||
1635 | |||
1636 | /* | ||
1637 | * Exceptional case: normally we are not allowed to unhash a busy | ||
1638 | * directory. In this case, however, we can do it - no aliasing problems | ||
1639 | * due to the way we treat inodes. | ||
1640 | */ | ||
1641 | static int proc_base_revalidate(struct dentry *dentry, struct nameidata *nd) | ||
1642 | { | ||
1643 | struct inode *inode = dentry->d_inode; | ||
1644 | struct task_struct *task = get_proc_task(inode); | ||
1645 | if (task) { | ||
1646 | put_task_struct(task); | ||
1647 | return 1; | ||
1648 | } | ||
1649 | d_drop(dentry); | ||
1650 | return 0; | ||
1651 | } | ||
1652 | |||
1653 | static struct dentry_operations proc_base_dentry_operations = | ||
1654 | { | ||
1655 | .d_revalidate = proc_base_revalidate, | ||
1656 | .d_delete = pid_delete_dentry, | ||
1657 | }; | ||
1658 | |||
1659 | static struct dentry *proc_base_instantiate(struct inode *dir, | ||
1660 | struct dentry *dentry, struct task_struct *task, void *ptr) | ||
1661 | { | ||
1662 | struct pid_entry *p = ptr; | ||
1651 | struct inode *inode; | 1663 | struct inode *inode; |
1664 | struct proc_inode *ei; | ||
1665 | struct dentry *error = ERR_PTR(-EINVAL); | ||
1666 | |||
1667 | /* Allocate the inode */ | ||
1668 | error = ERR_PTR(-ENOMEM); | ||
1669 | inode = new_inode(dir->i_sb); | ||
1670 | if (!inode) | ||
1671 | goto out; | ||
1672 | |||
1673 | /* Initialize the inode */ | ||
1674 | ei = PROC_I(inode); | ||
1675 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; | ||
1676 | |||
1677 | /* | ||
1678 | * grab the reference to the task. | ||
1679 | */ | ||
1680 | ei->pid = get_task_pid(task, PIDTYPE_PID); | ||
1681 | if (!ei->pid) | ||
1682 | goto out_iput; | ||
1683 | |||
1684 | inode->i_uid = 0; | ||
1685 | inode->i_gid = 0; | ||
1686 | inode->i_mode = p->mode; | ||
1687 | if (S_ISDIR(inode->i_mode)) | ||
1688 | inode->i_nlink = 2; | ||
1689 | if (S_ISLNK(inode->i_mode)) | ||
1690 | inode->i_size = 64; | ||
1691 | if (p->iop) | ||
1692 | inode->i_op = p->iop; | ||
1693 | if (p->fop) | ||
1694 | inode->i_fop = p->fop; | ||
1695 | ei->op = p->op; | ||
1696 | dentry->d_op = &proc_base_dentry_operations; | ||
1697 | d_add(dentry, inode); | ||
1698 | error = NULL; | ||
1699 | out: | ||
1700 | return error; | ||
1701 | out_iput: | ||
1702 | iput(inode); | ||
1703 | goto out; | ||
1704 | } | ||
1705 | |||
1706 | static struct dentry *proc_base_lookup(struct inode *dir, struct dentry *dentry) | ||
1707 | { | ||
1652 | struct dentry *error; | 1708 | struct dentry *error; |
1653 | struct task_struct *task = get_proc_task(dir); | 1709 | struct task_struct *task = get_proc_task(dir); |
1654 | struct pid_entry *p; | 1710 | struct pid_entry *p, *last; |
1655 | struct proc_inode *ei; | ||
1656 | 1711 | ||
1657 | error = ERR_PTR(-ENOENT); | 1712 | error = ERR_PTR(-ENOENT); |
1658 | inode = NULL; | ||
1659 | 1713 | ||
1660 | if (!task) | 1714 | if (!task) |
1661 | goto out_no_task; | 1715 | goto out_no_task; |
1662 | 1716 | ||
1663 | for (p = ents; p->name; p++) { | 1717 | /* Lookup the directory entry */ |
1718 | last = &proc_base_stuff[ARRAY_SIZE(proc_base_stuff) - 1]; | ||
1719 | for (p = proc_base_stuff; p <= last; p++) { | ||
1664 | if (p->len != dentry->d_name.len) | 1720 | if (p->len != dentry->d_name.len) |
1665 | continue; | 1721 | continue; |
1666 | if (!memcmp(dentry->d_name.name, p->name, p->len)) | 1722 | if (!memcmp(dentry->d_name.name, p->name, p->len)) |
1667 | break; | 1723 | break; |
1668 | } | 1724 | } |
1669 | if (!p->name) | 1725 | if (p > last) |
1670 | goto out; | 1726 | goto out; |
1671 | 1727 | ||
1672 | error = ERR_PTR(-EINVAL); | 1728 | error = proc_base_instantiate(dir, dentry, task, p); |
1673 | inode = proc_pid_make_inode(dir->i_sb, task, p->type); | ||
1674 | if (!inode) | ||
1675 | goto out; | ||
1676 | 1729 | ||
1677 | ei = PROC_I(inode); | 1730 | out: |
1678 | inode->i_mode = p->mode; | 1731 | put_task_struct(task); |
1679 | /* | 1732 | out_no_task: |
1680 | * Yes, it does not scale. And it should not. Don't add | 1733 | return error; |
1681 | * new entries into /proc/<tgid>/ without very good reasons. | 1734 | } |
1682 | */ | 1735 | |
1683 | switch(p->type) { | 1736 | static int proc_base_fill_cache(struct file *filp, void *dirent, filldir_t filldir, |
1684 | case PROC_TGID_TASK: | 1737 | struct task_struct *task, struct pid_entry *p) |
1685 | inode->i_nlink = 2; | 1738 | { |
1686 | inode->i_op = &proc_task_inode_operations; | 1739 | return proc_fill_cache(filp, dirent, filldir, p->name, p->len, |
1687 | inode->i_fop = &proc_task_operations; | 1740 | proc_base_instantiate, task, p); |
1688 | break; | 1741 | } |
1689 | case PROC_TID_FD: | 1742 | |
1690 | case PROC_TGID_FD: | 1743 | /* |
1691 | inode->i_nlink = 2; | 1744 | * Thread groups |
1692 | inode->i_op = &proc_fd_inode_operations; | 1745 | */ |
1693 | inode->i_fop = &proc_fd_operations; | 1746 | static struct file_operations proc_task_operations; |
1694 | break; | 1747 | static struct inode_operations proc_task_inode_operations; |
1695 | case PROC_TID_EXE: | 1748 | |
1696 | case PROC_TGID_EXE: | 1749 | static struct pid_entry tgid_base_stuff[] = { |
1697 | inode->i_op = &proc_pid_link_inode_operations; | 1750 | DIR("task", S_IRUGO|S_IXUGO, task), |
1698 | ei->op.proc_get_link = proc_exe_link; | 1751 | DIR("fd", S_IRUSR|S_IXUSR, fd), |
1699 | break; | 1752 | INF("environ", S_IRUSR, pid_environ), |
1700 | case PROC_TID_CWD: | 1753 | INF("auxv", S_IRUSR, pid_auxv), |
1701 | case PROC_TGID_CWD: | 1754 | INF("status", S_IRUGO, pid_status), |
1702 | inode->i_op = &proc_pid_link_inode_operations; | 1755 | INF("cmdline", S_IRUGO, pid_cmdline), |
1703 | ei->op.proc_get_link = proc_cwd_link; | 1756 | INF("stat", S_IRUGO, tgid_stat), |
1704 | break; | 1757 | INF("statm", S_IRUGO, pid_statm), |
1705 | case PROC_TID_ROOT: | 1758 | REG("maps", S_IRUGO, maps), |
1706 | case PROC_TGID_ROOT: | ||
1707 | inode->i_op = &proc_pid_link_inode_operations; | ||
1708 | ei->op.proc_get_link = proc_root_link; | ||
1709 | break; | ||
1710 | case PROC_TID_ENVIRON: | ||
1711 | case PROC_TGID_ENVIRON: | ||
1712 | inode->i_fop = &proc_info_file_operations; | ||
1713 | ei->op.proc_read = proc_pid_environ; | ||
1714 | break; | ||
1715 | case PROC_TID_AUXV: | ||
1716 | case PROC_TGID_AUXV: | ||
1717 | inode->i_fop = &proc_info_file_operations; | ||
1718 | ei->op.proc_read = proc_pid_auxv; | ||
1719 | break; | ||
1720 | case PROC_TID_STATUS: | ||
1721 | case PROC_TGID_STATUS: | ||
1722 | inode->i_fop = &proc_info_file_operations; | ||
1723 | ei->op.proc_read = proc_pid_status; | ||
1724 | break; | ||
1725 | case PROC_TID_STAT: | ||
1726 | inode->i_fop = &proc_info_file_operations; | ||
1727 | ei->op.proc_read = proc_tid_stat; | ||
1728 | break; | ||
1729 | case PROC_TGID_STAT: | ||
1730 | inode->i_fop = &proc_info_file_operations; | ||
1731 | ei->op.proc_read = proc_tgid_stat; | ||
1732 | break; | ||
1733 | case PROC_TID_CMDLINE: | ||
1734 | case PROC_TGID_CMDLINE: | ||
1735 | inode->i_fop = &proc_info_file_operations; | ||
1736 | ei->op.proc_read = proc_pid_cmdline; | ||
1737 | break; | ||
1738 | case PROC_TID_STATM: | ||
1739 | case PROC_TGID_STATM: | ||
1740 | inode->i_fop = &proc_info_file_operations; | ||
1741 | ei->op.proc_read = proc_pid_statm; | ||
1742 | break; | ||
1743 | case PROC_TID_MAPS: | ||
1744 | case PROC_TGID_MAPS: | ||
1745 | inode->i_fop = &proc_maps_operations; | ||
1746 | break; | ||
1747 | #ifdef CONFIG_NUMA | 1759 | #ifdef CONFIG_NUMA |
1748 | case PROC_TID_NUMA_MAPS: | 1760 | REG("numa_maps", S_IRUGO, numa_maps), |
1749 | case PROC_TGID_NUMA_MAPS: | ||
1750 | inode->i_fop = &proc_numa_maps_operations; | ||
1751 | break; | ||
1752 | #endif | 1761 | #endif |
1753 | case PROC_TID_MEM: | 1762 | REG("mem", S_IRUSR|S_IWUSR, mem), |
1754 | case PROC_TGID_MEM: | ||
1755 | inode->i_fop = &proc_mem_operations; | ||
1756 | break; | ||
1757 | #ifdef CONFIG_SECCOMP | 1763 | #ifdef CONFIG_SECCOMP |
1758 | case PROC_TID_SECCOMP: | 1764 | REG("seccomp", S_IRUSR|S_IWUSR, seccomp), |
1759 | case PROC_TGID_SECCOMP: | 1765 | #endif |
1760 | inode->i_fop = &proc_seccomp_operations; | 1766 | LNK("cwd", cwd), |
1761 | break; | 1767 | LNK("root", root), |
1762 | #endif /* CONFIG_SECCOMP */ | 1768 | LNK("exe", exe), |
1763 | case PROC_TID_MOUNTS: | 1769 | REG("mounts", S_IRUGO, mounts), |
1764 | case PROC_TGID_MOUNTS: | 1770 | REG("mountstats", S_IRUSR, mountstats), |
1765 | inode->i_fop = &proc_mounts_operations; | ||
1766 | break; | ||
1767 | #ifdef CONFIG_MMU | 1771 | #ifdef CONFIG_MMU |
1768 | case PROC_TID_SMAPS: | 1772 | REG("smaps", S_IRUGO, smaps), |
1769 | case PROC_TGID_SMAPS: | ||
1770 | inode->i_fop = &proc_smaps_operations; | ||
1771 | break; | ||
1772 | #endif | 1773 | #endif |
1773 | case PROC_TID_MOUNTSTATS: | ||
1774 | case PROC_TGID_MOUNTSTATS: | ||
1775 | inode->i_fop = &proc_mountstats_operations; | ||
1776 | break; | ||
1777 | #ifdef CONFIG_SECURITY | 1774 | #ifdef CONFIG_SECURITY |
1778 | case PROC_TID_ATTR: | 1775 | DIR("attr", S_IRUGO|S_IXUGO, attr_dir), |
1779 | inode->i_nlink = 2; | ||
1780 | inode->i_op = &proc_tid_attr_inode_operations; | ||
1781 | inode->i_fop = &proc_tid_attr_operations; | ||
1782 | break; | ||
1783 | case PROC_TGID_ATTR: | ||
1784 | inode->i_nlink = 2; | ||
1785 | inode->i_op = &proc_tgid_attr_inode_operations; | ||
1786 | inode->i_fop = &proc_tgid_attr_operations; | ||
1787 | break; | ||
1788 | case PROC_TID_ATTR_CURRENT: | ||
1789 | case PROC_TGID_ATTR_CURRENT: | ||
1790 | case PROC_TID_ATTR_PREV: | ||
1791 | case PROC_TGID_ATTR_PREV: | ||
1792 | case PROC_TID_ATTR_EXEC: | ||
1793 | case PROC_TGID_ATTR_EXEC: | ||
1794 | case PROC_TID_ATTR_FSCREATE: | ||
1795 | case PROC_TGID_ATTR_FSCREATE: | ||
1796 | case PROC_TID_ATTR_KEYCREATE: | ||
1797 | case PROC_TGID_ATTR_KEYCREATE: | ||
1798 | case PROC_TID_ATTR_SOCKCREATE: | ||
1799 | case PROC_TGID_ATTR_SOCKCREATE: | ||
1800 | inode->i_fop = &proc_pid_attr_operations; | ||
1801 | break; | ||
1802 | #endif | 1776 | #endif |
1803 | #ifdef CONFIG_KALLSYMS | 1777 | #ifdef CONFIG_KALLSYMS |
1804 | case PROC_TID_WCHAN: | 1778 | INF("wchan", S_IRUGO, pid_wchan), |
1805 | case PROC_TGID_WCHAN: | ||
1806 | inode->i_fop = &proc_info_file_operations; | ||
1807 | ei->op.proc_read = proc_pid_wchan; | ||
1808 | break; | ||
1809 | #endif | 1779 | #endif |
1810 | #ifdef CONFIG_SCHEDSTATS | 1780 | #ifdef CONFIG_SCHEDSTATS |
1811 | case PROC_TID_SCHEDSTAT: | 1781 | INF("schedstat", S_IRUGO, pid_schedstat), |
1812 | case PROC_TGID_SCHEDSTAT: | ||
1813 | inode->i_fop = &proc_info_file_operations; | ||
1814 | ei->op.proc_read = proc_pid_schedstat; | ||
1815 | break; | ||
1816 | #endif | 1782 | #endif |
1817 | #ifdef CONFIG_CPUSETS | 1783 | #ifdef CONFIG_CPUSETS |
1818 | case PROC_TID_CPUSET: | 1784 | REG("cpuset", S_IRUGO, cpuset), |
1819 | case PROC_TGID_CPUSET: | ||
1820 | inode->i_fop = &proc_cpuset_operations; | ||
1821 | break; | ||
1822 | #endif | 1785 | #endif |
1823 | case PROC_TID_OOM_SCORE: | 1786 | INF("oom_score", S_IRUGO, oom_score), |
1824 | case PROC_TGID_OOM_SCORE: | 1787 | REG("oom_adj", S_IRUGO|S_IWUSR, oom_adjust), |
1825 | inode->i_fop = &proc_info_file_operations; | ||
1826 | ei->op.proc_read = proc_oom_score; | ||
1827 | break; | ||
1828 | case PROC_TID_OOM_ADJUST: | ||
1829 | case PROC_TGID_OOM_ADJUST: | ||
1830 | inode->i_fop = &proc_oom_adjust_operations; | ||
1831 | break; | ||
1832 | #ifdef CONFIG_AUDITSYSCALL | 1788 | #ifdef CONFIG_AUDITSYSCALL |
1833 | case PROC_TID_LOGINUID: | 1789 | REG("loginuid", S_IWUSR|S_IRUGO, loginuid), |
1834 | case PROC_TGID_LOGINUID: | ||
1835 | inode->i_fop = &proc_loginuid_operations; | ||
1836 | break; | ||
1837 | #endif | 1790 | #endif |
1838 | default: | ||
1839 | printk("procfs: impossible type (%d)",p->type); | ||
1840 | iput(inode); | ||
1841 | error = ERR_PTR(-EINVAL); | ||
1842 | goto out; | ||
1843 | } | ||
1844 | dentry->d_op = &pid_dentry_operations; | ||
1845 | d_add(dentry, inode); | ||
1846 | /* Close the race of the process dying before we return the dentry */ | ||
1847 | if (pid_revalidate(dentry, NULL)) | ||
1848 | error = NULL; | ||
1849 | out: | ||
1850 | put_task_struct(task); | ||
1851 | out_no_task: | ||
1852 | return error; | ||
1853 | } | ||
1854 | |||
1855 | static struct dentry *proc_tgid_base_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd){ | ||
1856 | return proc_pident_lookup(dir, dentry, tgid_base_stuff); | ||
1857 | } | ||
1858 | |||
1859 | static struct dentry *proc_tid_base_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd){ | ||
1860 | return proc_pident_lookup(dir, dentry, tid_base_stuff); | ||
1861 | } | ||
1862 | |||
1863 | static struct file_operations proc_tgid_base_operations = { | ||
1864 | .read = generic_read_dir, | ||
1865 | .readdir = proc_tgid_base_readdir, | ||
1866 | }; | 1791 | }; |
1867 | 1792 | ||
1868 | static struct file_operations proc_tid_base_operations = { | 1793 | static int proc_tgid_base_readdir(struct file * filp, |
1869 | .read = generic_read_dir, | ||
1870 | .readdir = proc_tid_base_readdir, | ||
1871 | }; | ||
1872 | |||
1873 | static struct inode_operations proc_tgid_base_inode_operations = { | ||
1874 | .lookup = proc_tgid_base_lookup, | ||
1875 | .getattr = pid_getattr, | ||
1876 | .setattr = proc_setattr, | ||
1877 | }; | ||
1878 | |||
1879 | static struct inode_operations proc_tid_base_inode_operations = { | ||
1880 | .lookup = proc_tid_base_lookup, | ||
1881 | .getattr = pid_getattr, | ||
1882 | .setattr = proc_setattr, | ||
1883 | }; | ||
1884 | |||
1885 | #ifdef CONFIG_SECURITY | ||
1886 | static int proc_tgid_attr_readdir(struct file * filp, | ||
1887 | void * dirent, filldir_t filldir) | ||
1888 | { | ||
1889 | return proc_pident_readdir(filp,dirent,filldir, | ||
1890 | tgid_attr_stuff,ARRAY_SIZE(tgid_attr_stuff)); | ||
1891 | } | ||
1892 | |||
1893 | static int proc_tid_attr_readdir(struct file * filp, | ||
1894 | void * dirent, filldir_t filldir) | 1794 | void * dirent, filldir_t filldir) |
1895 | { | 1795 | { |
1896 | return proc_pident_readdir(filp,dirent,filldir, | 1796 | return proc_pident_readdir(filp,dirent,filldir, |
1897 | tid_attr_stuff,ARRAY_SIZE(tid_attr_stuff)); | 1797 | tgid_base_stuff,ARRAY_SIZE(tgid_base_stuff)); |
1898 | } | 1798 | } |
1899 | 1799 | ||
1900 | static struct file_operations proc_tgid_attr_operations = { | 1800 | static struct file_operations proc_tgid_base_operations = { |
1901 | .read = generic_read_dir, | ||
1902 | .readdir = proc_tgid_attr_readdir, | ||
1903 | }; | ||
1904 | |||
1905 | static struct file_operations proc_tid_attr_operations = { | ||
1906 | .read = generic_read_dir, | 1801 | .read = generic_read_dir, |
1907 | .readdir = proc_tid_attr_readdir, | 1802 | .readdir = proc_tgid_base_readdir, |
1908 | }; | 1803 | }; |
1909 | 1804 | ||
1910 | static struct dentry *proc_tgid_attr_lookup(struct inode *dir, | 1805 | static struct dentry *proc_tgid_base_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd){ |
1911 | struct dentry *dentry, struct nameidata *nd) | 1806 | return proc_pident_lookup(dir, dentry, |
1912 | { | 1807 | tgid_base_stuff, ARRAY_SIZE(tgid_base_stuff)); |
1913 | return proc_pident_lookup(dir, dentry, tgid_attr_stuff); | ||
1914 | } | ||
1915 | |||
1916 | static struct dentry *proc_tid_attr_lookup(struct inode *dir, | ||
1917 | struct dentry *dentry, struct nameidata *nd) | ||
1918 | { | ||
1919 | return proc_pident_lookup(dir, dentry, tid_attr_stuff); | ||
1920 | } | 1808 | } |
1921 | 1809 | ||
1922 | static struct inode_operations proc_tgid_attr_inode_operations = { | 1810 | static struct inode_operations proc_tgid_base_inode_operations = { |
1923 | .lookup = proc_tgid_attr_lookup, | 1811 | .lookup = proc_tgid_base_lookup, |
1924 | .getattr = pid_getattr, | ||
1925 | .setattr = proc_setattr, | ||
1926 | }; | ||
1927 | |||
1928 | static struct inode_operations proc_tid_attr_inode_operations = { | ||
1929 | .lookup = proc_tid_attr_lookup, | ||
1930 | .getattr = pid_getattr, | 1812 | .getattr = pid_getattr, |
1931 | .setattr = proc_setattr, | 1813 | .setattr = proc_setattr, |
1932 | }; | 1814 | }; |
1933 | #endif | ||
1934 | |||
1935 | /* | ||
1936 | * /proc/self: | ||
1937 | */ | ||
1938 | static int proc_self_readlink(struct dentry *dentry, char __user *buffer, | ||
1939 | int buflen) | ||
1940 | { | ||
1941 | char tmp[PROC_NUMBUF]; | ||
1942 | sprintf(tmp, "%d", current->tgid); | ||
1943 | return vfs_readlink(dentry,buffer,buflen,tmp); | ||
1944 | } | ||
1945 | |||
1946 | static void *proc_self_follow_link(struct dentry *dentry, struct nameidata *nd) | ||
1947 | { | ||
1948 | char tmp[PROC_NUMBUF]; | ||
1949 | sprintf(tmp, "%d", current->tgid); | ||
1950 | return ERR_PTR(vfs_follow_link(nd,tmp)); | ||
1951 | } | ||
1952 | |||
1953 | static struct inode_operations proc_self_inode_operations = { | ||
1954 | .readlink = proc_self_readlink, | ||
1955 | .follow_link = proc_self_follow_link, | ||
1956 | }; | ||
1957 | 1815 | ||
1958 | /** | 1816 | /** |
1959 | * proc_flush_task - Remove dcache entries for @task from the /proc dcache. | 1817 | * proc_flush_task - Remove dcache entries for @task from the /proc dcache. |
@@ -2022,54 +1880,23 @@ out: | |||
2022 | return; | 1880 | return; |
2023 | } | 1881 | } |
2024 | 1882 | ||
2025 | /* SMP-safe */ | 1883 | struct dentry *proc_pid_instantiate(struct inode *dir, |
2026 | struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd) | 1884 | struct dentry * dentry, struct task_struct *task, void *ptr) |
2027 | { | 1885 | { |
2028 | struct dentry *result = ERR_PTR(-ENOENT); | 1886 | struct dentry *error = ERR_PTR(-ENOENT); |
2029 | struct task_struct *task; | ||
2030 | struct inode *inode; | 1887 | struct inode *inode; |
2031 | struct proc_inode *ei; | ||
2032 | unsigned tgid; | ||
2033 | |||
2034 | if (dentry->d_name.len == 4 && !memcmp(dentry->d_name.name,"self",4)) { | ||
2035 | inode = new_inode(dir->i_sb); | ||
2036 | if (!inode) | ||
2037 | return ERR_PTR(-ENOMEM); | ||
2038 | ei = PROC_I(inode); | ||
2039 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; | ||
2040 | inode->i_ino = fake_ino(0, PROC_TGID_INO); | ||
2041 | ei->pde = NULL; | ||
2042 | inode->i_mode = S_IFLNK|S_IRWXUGO; | ||
2043 | inode->i_uid = inode->i_gid = 0; | ||
2044 | inode->i_size = 64; | ||
2045 | inode->i_op = &proc_self_inode_operations; | ||
2046 | d_add(dentry, inode); | ||
2047 | return NULL; | ||
2048 | } | ||
2049 | tgid = name_to_int(dentry); | ||
2050 | if (tgid == ~0U) | ||
2051 | goto out; | ||
2052 | 1888 | ||
2053 | rcu_read_lock(); | 1889 | inode = proc_pid_make_inode(dir->i_sb, task); |
2054 | task = find_task_by_pid(tgid); | ||
2055 | if (task) | ||
2056 | get_task_struct(task); | ||
2057 | rcu_read_unlock(); | ||
2058 | if (!task) | ||
2059 | goto out; | ||
2060 | |||
2061 | inode = proc_pid_make_inode(dir->i_sb, task, PROC_TGID_INO); | ||
2062 | if (!inode) | 1890 | if (!inode) |
2063 | goto out_put_task; | 1891 | goto out; |
2064 | 1892 | ||
2065 | inode->i_mode = S_IFDIR|S_IRUGO|S_IXUGO; | 1893 | inode->i_mode = S_IFDIR|S_IRUGO|S_IXUGO; |
2066 | inode->i_op = &proc_tgid_base_inode_operations; | 1894 | inode->i_op = &proc_tgid_base_inode_operations; |
2067 | inode->i_fop = &proc_tgid_base_operations; | 1895 | inode->i_fop = &proc_tgid_base_operations; |
2068 | inode->i_flags|=S_IMMUTABLE; | 1896 | inode->i_flags|=S_IMMUTABLE; |
2069 | #ifdef CONFIG_SECURITY | ||
2070 | inode->i_nlink = 5; | ||
2071 | #else | ||
2072 | inode->i_nlink = 4; | 1897 | inode->i_nlink = 4; |
1898 | #ifdef CONFIG_SECURITY | ||
1899 | inode->i_nlink += 1; | ||
2073 | #endif | 1900 | #endif |
2074 | 1901 | ||
2075 | dentry->d_op = &pid_dentry_operations; | 1902 | dentry->d_op = &pid_dentry_operations; |
@@ -2077,179 +1904,251 @@ struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct | |||
2077 | d_add(dentry, inode); | 1904 | d_add(dentry, inode); |
2078 | /* Close the race of the process dying before we return the dentry */ | 1905 | /* Close the race of the process dying before we return the dentry */ |
2079 | if (pid_revalidate(dentry, NULL)) | 1906 | if (pid_revalidate(dentry, NULL)) |
2080 | result = NULL; | 1907 | error = NULL; |
2081 | |||
2082 | out_put_task: | ||
2083 | put_task_struct(task); | ||
2084 | out: | 1908 | out: |
2085 | return result; | 1909 | return error; |
2086 | } | 1910 | } |
2087 | 1911 | ||
2088 | /* SMP-safe */ | 1912 | struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd) |
2089 | static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd) | ||
2090 | { | 1913 | { |
2091 | struct dentry *result = ERR_PTR(-ENOENT); | 1914 | struct dentry *result = ERR_PTR(-ENOENT); |
2092 | struct task_struct *task; | 1915 | struct task_struct *task; |
2093 | struct task_struct *leader = get_proc_task(dir); | 1916 | unsigned tgid; |
2094 | struct inode *inode; | ||
2095 | unsigned tid; | ||
2096 | 1917 | ||
2097 | if (!leader) | 1918 | result = proc_base_lookup(dir, dentry); |
2098 | goto out_no_task; | 1919 | if (!IS_ERR(result) || PTR_ERR(result) != -ENOENT) |
1920 | goto out; | ||
2099 | 1921 | ||
2100 | tid = name_to_int(dentry); | 1922 | tgid = name_to_int(dentry); |
2101 | if (tid == ~0U) | 1923 | if (tgid == ~0U) |
2102 | goto out; | 1924 | goto out; |
2103 | 1925 | ||
2104 | rcu_read_lock(); | 1926 | rcu_read_lock(); |
2105 | task = find_task_by_pid(tid); | 1927 | task = find_task_by_pid(tgid); |
2106 | if (task) | 1928 | if (task) |
2107 | get_task_struct(task); | 1929 | get_task_struct(task); |
2108 | rcu_read_unlock(); | 1930 | rcu_read_unlock(); |
2109 | if (!task) | 1931 | if (!task) |
2110 | goto out; | 1932 | goto out; |
2111 | if (leader->tgid != task->tgid) | ||
2112 | goto out_drop_task; | ||
2113 | |||
2114 | inode = proc_pid_make_inode(dir->i_sb, task, PROC_TID_INO); | ||
2115 | |||
2116 | |||
2117 | if (!inode) | ||
2118 | goto out_drop_task; | ||
2119 | inode->i_mode = S_IFDIR|S_IRUGO|S_IXUGO; | ||
2120 | inode->i_op = &proc_tid_base_inode_operations; | ||
2121 | inode->i_fop = &proc_tid_base_operations; | ||
2122 | inode->i_flags|=S_IMMUTABLE; | ||
2123 | #ifdef CONFIG_SECURITY | ||
2124 | inode->i_nlink = 4; | ||
2125 | #else | ||
2126 | inode->i_nlink = 3; | ||
2127 | #endif | ||
2128 | |||
2129 | dentry->d_op = &pid_dentry_operations; | ||
2130 | |||
2131 | d_add(dentry, inode); | ||
2132 | /* Close the race of the process dying before we return the dentry */ | ||
2133 | if (pid_revalidate(dentry, NULL)) | ||
2134 | result = NULL; | ||
2135 | 1933 | ||
2136 | out_drop_task: | 1934 | result = proc_pid_instantiate(dir, dentry, task, NULL); |
2137 | put_task_struct(task); | 1935 | put_task_struct(task); |
2138 | out: | 1936 | out: |
2139 | put_task_struct(leader); | ||
2140 | out_no_task: | ||
2141 | return result; | 1937 | return result; |
2142 | } | 1938 | } |
2143 | 1939 | ||
2144 | /* | 1940 | /* |
2145 | * Find the first tgid to return to user space. | 1941 | * Find the first task with tgid >= tgid |
2146 | * | ||
2147 | * Usually this is just whatever follows &init_task, but if the users | ||
2148 | * buffer was too small to hold the full list or there was a seek into | ||
2149 | * the middle of the directory we have more work to do. | ||
2150 | * | ||
2151 | * In the case of a short read we start with find_task_by_pid. | ||
2152 | * | 1942 | * |
2153 | * In the case of a seek we start with &init_task and walk nr | ||
2154 | * threads past it. | ||
2155 | */ | 1943 | */ |
2156 | static struct task_struct *first_tgid(int tgid, unsigned int nr) | 1944 | static struct task_struct *next_tgid(unsigned int tgid) |
2157 | { | 1945 | { |
2158 | struct task_struct *pos; | 1946 | struct task_struct *task; |
2159 | rcu_read_lock(); | 1947 | struct pid *pid; |
2160 | if (tgid && nr) { | ||
2161 | pos = find_task_by_pid(tgid); | ||
2162 | if (pos && thread_group_leader(pos)) | ||
2163 | goto found; | ||
2164 | } | ||
2165 | /* If nr exceeds the number of processes get out quickly */ | ||
2166 | pos = NULL; | ||
2167 | if (nr && nr >= nr_processes()) | ||
2168 | goto done; | ||
2169 | 1948 | ||
2170 | /* If we haven't found our starting place yet start with | 1949 | rcu_read_lock(); |
2171 | * the init_task and walk nr tasks forward. | 1950 | retry: |
2172 | */ | 1951 | task = NULL; |
2173 | for (pos = next_task(&init_task); nr > 0; --nr) { | 1952 | pid = find_ge_pid(tgid); |
2174 | pos = next_task(pos); | 1953 | if (pid) { |
2175 | if (pos == &init_task) { | 1954 | tgid = pid->nr + 1; |
2176 | pos = NULL; | 1955 | task = pid_task(pid, PIDTYPE_PID); |
2177 | goto done; | 1956 | /* What we to know is if the pid we have find is the |
2178 | } | 1957 | * pid of a thread_group_leader. Testing for task |
1958 | * being a thread_group_leader is the obvious thing | ||
1959 | * todo but there is a window when it fails, due to | ||
1960 | * the pid transfer logic in de_thread. | ||
1961 | * | ||
1962 | * So we perform the straight forward test of seeing | ||
1963 | * if the pid we have found is the pid of a thread | ||
1964 | * group leader, and don't worry if the task we have | ||
1965 | * found doesn't happen to be a thread group leader. | ||
1966 | * As we don't care in the case of readdir. | ||
1967 | */ | ||
1968 | if (!task || !has_group_leader_pid(task)) | ||
1969 | goto retry; | ||
1970 | get_task_struct(task); | ||
2179 | } | 1971 | } |
2180 | found: | ||
2181 | get_task_struct(pos); | ||
2182 | done: | ||
2183 | rcu_read_unlock(); | 1972 | rcu_read_unlock(); |
2184 | return pos; | 1973 | return task; |
2185 | } | 1974 | } |
2186 | 1975 | ||
2187 | /* | 1976 | #define TGID_OFFSET (FIRST_PROCESS_ENTRY + ARRAY_SIZE(proc_base_stuff)) |
2188 | * Find the next task in the task list. | 1977 | |
2189 | * Return NULL if we loop or there is any error. | 1978 | static int proc_pid_fill_cache(struct file *filp, void *dirent, filldir_t filldir, |
2190 | * | 1979 | struct task_struct *task, int tgid) |
2191 | * The reference to the input task_struct is released. | ||
2192 | */ | ||
2193 | static struct task_struct *next_tgid(struct task_struct *start) | ||
2194 | { | 1980 | { |
2195 | struct task_struct *pos; | 1981 | char name[PROC_NUMBUF]; |
2196 | rcu_read_lock(); | 1982 | int len = snprintf(name, sizeof(name), "%d", tgid); |
2197 | pos = start; | 1983 | return proc_fill_cache(filp, dirent, filldir, name, len, |
2198 | if (pid_alive(start)) | 1984 | proc_pid_instantiate, task, NULL); |
2199 | pos = next_task(start); | ||
2200 | if (pid_alive(pos) && (pos != &init_task)) { | ||
2201 | get_task_struct(pos); | ||
2202 | goto done; | ||
2203 | } | ||
2204 | pos = NULL; | ||
2205 | done: | ||
2206 | rcu_read_unlock(); | ||
2207 | put_task_struct(start); | ||
2208 | return pos; | ||
2209 | } | 1985 | } |
2210 | 1986 | ||
2211 | /* for the /proc/ directory itself, after non-process stuff has been done */ | 1987 | /* for the /proc/ directory itself, after non-process stuff has been done */ |
2212 | int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir) | 1988 | int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir) |
2213 | { | 1989 | { |
2214 | char buf[PROC_NUMBUF]; | ||
2215 | unsigned int nr = filp->f_pos - FIRST_PROCESS_ENTRY; | 1990 | unsigned int nr = filp->f_pos - FIRST_PROCESS_ENTRY; |
1991 | struct task_struct *reaper = get_proc_task(filp->f_dentry->d_inode); | ||
2216 | struct task_struct *task; | 1992 | struct task_struct *task; |
2217 | int tgid; | 1993 | int tgid; |
2218 | 1994 | ||
2219 | if (!nr) { | 1995 | if (!reaper) |
2220 | ino_t ino = fake_ino(0,PROC_TGID_INO); | 1996 | goto out_no_task; |
2221 | if (filldir(dirent, "self", 4, filp->f_pos, ino, DT_LNK) < 0) | 1997 | |
2222 | return 0; | 1998 | for (; nr < ARRAY_SIZE(proc_base_stuff); filp->f_pos++, nr++) { |
2223 | filp->f_pos++; | 1999 | struct pid_entry *p = &proc_base_stuff[nr]; |
2224 | nr++; | 2000 | if (proc_base_fill_cache(filp, dirent, filldir, reaper, p) < 0) |
2001 | goto out; | ||
2225 | } | 2002 | } |
2226 | nr -= 1; | ||
2227 | 2003 | ||
2228 | /* f_version caches the tgid value that the last readdir call couldn't | 2004 | tgid = filp->f_pos - TGID_OFFSET; |
2229 | * return. lseek aka telldir automagically resets f_version to 0. | 2005 | for (task = next_tgid(tgid); |
2230 | */ | ||
2231 | tgid = filp->f_version; | ||
2232 | filp->f_version = 0; | ||
2233 | for (task = first_tgid(tgid, nr); | ||
2234 | task; | 2006 | task; |
2235 | task = next_tgid(task), filp->f_pos++) { | 2007 | put_task_struct(task), task = next_tgid(tgid + 1)) { |
2236 | int len; | ||
2237 | ino_t ino; | ||
2238 | tgid = task->pid; | 2008 | tgid = task->pid; |
2239 | len = snprintf(buf, sizeof(buf), "%d", tgid); | 2009 | filp->f_pos = tgid + TGID_OFFSET; |
2240 | ino = fake_ino(tgid, PROC_TGID_INO); | 2010 | if (proc_pid_fill_cache(filp, dirent, filldir, task, tgid) < 0) { |
2241 | if (filldir(dirent, buf, len, filp->f_pos, ino, DT_DIR) < 0) { | ||
2242 | /* returning this tgid failed, save it as the first | ||
2243 | * pid for the next readir call */ | ||
2244 | filp->f_version = tgid; | ||
2245 | put_task_struct(task); | 2011 | put_task_struct(task); |
2246 | break; | 2012 | goto out; |
2247 | } | 2013 | } |
2248 | } | 2014 | } |
2015 | filp->f_pos = PID_MAX_LIMIT + TGID_OFFSET; | ||
2016 | out: | ||
2017 | put_task_struct(reaper); | ||
2018 | out_no_task: | ||
2249 | return 0; | 2019 | return 0; |
2250 | } | 2020 | } |
2251 | 2021 | ||
2252 | /* | 2022 | /* |
2023 | * Tasks | ||
2024 | */ | ||
2025 | static struct pid_entry tid_base_stuff[] = { | ||
2026 | DIR("fd", S_IRUSR|S_IXUSR, fd), | ||
2027 | INF("environ", S_IRUSR, pid_environ), | ||
2028 | INF("auxv", S_IRUSR, pid_auxv), | ||
2029 | INF("status", S_IRUGO, pid_status), | ||
2030 | INF("cmdline", S_IRUGO, pid_cmdline), | ||
2031 | INF("stat", S_IRUGO, tid_stat), | ||
2032 | INF("statm", S_IRUGO, pid_statm), | ||
2033 | REG("maps", S_IRUGO, maps), | ||
2034 | #ifdef CONFIG_NUMA | ||
2035 | REG("numa_maps", S_IRUGO, numa_maps), | ||
2036 | #endif | ||
2037 | REG("mem", S_IRUSR|S_IWUSR, mem), | ||
2038 | #ifdef CONFIG_SECCOMP | ||
2039 | REG("seccomp", S_IRUSR|S_IWUSR, seccomp), | ||
2040 | #endif | ||
2041 | LNK("cwd", cwd), | ||
2042 | LNK("root", root), | ||
2043 | LNK("exe", exe), | ||
2044 | REG("mounts", S_IRUGO, mounts), | ||
2045 | #ifdef CONFIG_MMU | ||
2046 | REG("smaps", S_IRUGO, smaps), | ||
2047 | #endif | ||
2048 | #ifdef CONFIG_SECURITY | ||
2049 | DIR("attr", S_IRUGO|S_IXUGO, attr_dir), | ||
2050 | #endif | ||
2051 | #ifdef CONFIG_KALLSYMS | ||
2052 | INF("wchan", S_IRUGO, pid_wchan), | ||
2053 | #endif | ||
2054 | #ifdef CONFIG_SCHEDSTATS | ||
2055 | INF("schedstat", S_IRUGO, pid_schedstat), | ||
2056 | #endif | ||
2057 | #ifdef CONFIG_CPUSETS | ||
2058 | REG("cpuset", S_IRUGO, cpuset), | ||
2059 | #endif | ||
2060 | INF("oom_score", S_IRUGO, oom_score), | ||
2061 | REG("oom_adj", S_IRUGO|S_IWUSR, oom_adjust), | ||
2062 | #ifdef CONFIG_AUDITSYSCALL | ||
2063 | REG("loginuid", S_IWUSR|S_IRUGO, loginuid), | ||
2064 | #endif | ||
2065 | }; | ||
2066 | |||
2067 | static int proc_tid_base_readdir(struct file * filp, | ||
2068 | void * dirent, filldir_t filldir) | ||
2069 | { | ||
2070 | return proc_pident_readdir(filp,dirent,filldir, | ||
2071 | tid_base_stuff,ARRAY_SIZE(tid_base_stuff)); | ||
2072 | } | ||
2073 | |||
2074 | static struct dentry *proc_tid_base_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd){ | ||
2075 | return proc_pident_lookup(dir, dentry, | ||
2076 | tid_base_stuff, ARRAY_SIZE(tid_base_stuff)); | ||
2077 | } | ||
2078 | |||
2079 | static struct file_operations proc_tid_base_operations = { | ||
2080 | .read = generic_read_dir, | ||
2081 | .readdir = proc_tid_base_readdir, | ||
2082 | }; | ||
2083 | |||
2084 | static struct inode_operations proc_tid_base_inode_operations = { | ||
2085 | .lookup = proc_tid_base_lookup, | ||
2086 | .getattr = pid_getattr, | ||
2087 | .setattr = proc_setattr, | ||
2088 | }; | ||
2089 | |||
2090 | static struct dentry *proc_task_instantiate(struct inode *dir, | ||
2091 | struct dentry *dentry, struct task_struct *task, void *ptr) | ||
2092 | { | ||
2093 | struct dentry *error = ERR_PTR(-ENOENT); | ||
2094 | struct inode *inode; | ||
2095 | inode = proc_pid_make_inode(dir->i_sb, task); | ||
2096 | |||
2097 | if (!inode) | ||
2098 | goto out; | ||
2099 | inode->i_mode = S_IFDIR|S_IRUGO|S_IXUGO; | ||
2100 | inode->i_op = &proc_tid_base_inode_operations; | ||
2101 | inode->i_fop = &proc_tid_base_operations; | ||
2102 | inode->i_flags|=S_IMMUTABLE; | ||
2103 | inode->i_nlink = 3; | ||
2104 | #ifdef CONFIG_SECURITY | ||
2105 | inode->i_nlink += 1; | ||
2106 | #endif | ||
2107 | |||
2108 | dentry->d_op = &pid_dentry_operations; | ||
2109 | |||
2110 | d_add(dentry, inode); | ||
2111 | /* Close the race of the process dying before we return the dentry */ | ||
2112 | if (pid_revalidate(dentry, NULL)) | ||
2113 | error = NULL; | ||
2114 | out: | ||
2115 | return error; | ||
2116 | } | ||
2117 | |||
2118 | static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd) | ||
2119 | { | ||
2120 | struct dentry *result = ERR_PTR(-ENOENT); | ||
2121 | struct task_struct *task; | ||
2122 | struct task_struct *leader = get_proc_task(dir); | ||
2123 | unsigned tid; | ||
2124 | |||
2125 | if (!leader) | ||
2126 | goto out_no_task; | ||
2127 | |||
2128 | tid = name_to_int(dentry); | ||
2129 | if (tid == ~0U) | ||
2130 | goto out; | ||
2131 | |||
2132 | rcu_read_lock(); | ||
2133 | task = find_task_by_pid(tid); | ||
2134 | if (task) | ||
2135 | get_task_struct(task); | ||
2136 | rcu_read_unlock(); | ||
2137 | if (!task) | ||
2138 | goto out; | ||
2139 | if (leader->tgid != task->tgid) | ||
2140 | goto out_drop_task; | ||
2141 | |||
2142 | result = proc_task_instantiate(dir, dentry, task, NULL); | ||
2143 | out_drop_task: | ||
2144 | put_task_struct(task); | ||
2145 | out: | ||
2146 | put_task_struct(leader); | ||
2147 | out_no_task: | ||
2148 | return result; | ||
2149 | } | ||
2150 | |||
2151 | /* | ||
2253 | * Find the first tid of a thread group to return to user space. | 2152 | * Find the first tid of a thread group to return to user space. |
2254 | * | 2153 | * |
2255 | * Usually this is just the thread group leader, but if the users | 2154 | * Usually this is just the thread group leader, but if the users |
@@ -2318,10 +2217,18 @@ static struct task_struct *next_tid(struct task_struct *start) | |||
2318 | return pos; | 2217 | return pos; |
2319 | } | 2218 | } |
2320 | 2219 | ||
2220 | static int proc_task_fill_cache(struct file *filp, void *dirent, filldir_t filldir, | ||
2221 | struct task_struct *task, int tid) | ||
2222 | { | ||
2223 | char name[PROC_NUMBUF]; | ||
2224 | int len = snprintf(name, sizeof(name), "%d", tid); | ||
2225 | return proc_fill_cache(filp, dirent, filldir, name, len, | ||
2226 | proc_task_instantiate, task, NULL); | ||
2227 | } | ||
2228 | |||
2321 | /* for the /proc/TGID/task/ directories */ | 2229 | /* for the /proc/TGID/task/ directories */ |
2322 | static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldir) | 2230 | static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldir) |
2323 | { | 2231 | { |
2324 | char buf[PROC_NUMBUF]; | ||
2325 | struct dentry *dentry = filp->f_dentry; | 2232 | struct dentry *dentry = filp->f_dentry; |
2326 | struct inode *inode = dentry->d_inode; | 2233 | struct inode *inode = dentry->d_inode; |
2327 | struct task_struct *leader = get_proc_task(inode); | 2234 | struct task_struct *leader = get_proc_task(inode); |
@@ -2358,11 +2265,8 @@ static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldi | |||
2358 | for (task = first_tid(leader, tid, pos - 2); | 2265 | for (task = first_tid(leader, tid, pos - 2); |
2359 | task; | 2266 | task; |
2360 | task = next_tid(task), pos++) { | 2267 | task = next_tid(task), pos++) { |
2361 | int len; | ||
2362 | tid = task->pid; | 2268 | tid = task->pid; |
2363 | len = snprintf(buf, sizeof(buf), "%d", tid); | 2269 | if (proc_task_fill_cache(filp, dirent, filldir, task, tid) < 0) { |
2364 | ino = fake_ino(tid, PROC_TID_INO); | ||
2365 | if (filldir(dirent, buf, len, pos, ino, DT_DIR < 0)) { | ||
2366 | /* returning this tgid failed, save it as the first | 2270 | /* returning this tgid failed, save it as the first |
2367 | * pid for the next readir call */ | 2271 | * pid for the next readir call */ |
2368 | filp->f_version = tid; | 2272 | filp->f_version = tid; |
@@ -2392,3 +2296,14 @@ static int proc_task_getattr(struct vfsmount *mnt, struct dentry *dentry, struct | |||
2392 | 2296 | ||
2393 | return 0; | 2297 | return 0; |
2394 | } | 2298 | } |
2299 | |||
2300 | static struct inode_operations proc_task_inode_operations = { | ||
2301 | .lookup = proc_task_lookup, | ||
2302 | .getattr = proc_task_getattr, | ||
2303 | .setattr = proc_setattr, | ||
2304 | }; | ||
2305 | |||
2306 | static struct file_operations proc_task_operations = { | ||
2307 | .read = generic_read_dir, | ||
2308 | .readdir = proc_task_readdir, | ||
2309 | }; | ||
diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c index 66bc425f2f3d..8d88e58ed5cc 100644 --- a/fs/proc/proc_misc.c +++ b/fs/proc/proc_misc.c | |||
@@ -45,6 +45,7 @@ | |||
45 | #include <linux/sysrq.h> | 45 | #include <linux/sysrq.h> |
46 | #include <linux/vmalloc.h> | 46 | #include <linux/vmalloc.h> |
47 | #include <linux/crash_dump.h> | 47 | #include <linux/crash_dump.h> |
48 | #include <linux/pspace.h> | ||
48 | #include <asm/uaccess.h> | 49 | #include <asm/uaccess.h> |
49 | #include <asm/pgtable.h> | 50 | #include <asm/pgtable.h> |
50 | #include <asm/io.h> | 51 | #include <asm/io.h> |
@@ -91,7 +92,7 @@ static int loadavg_read_proc(char *page, char **start, off_t off, | |||
91 | LOAD_INT(a), LOAD_FRAC(a), | 92 | LOAD_INT(a), LOAD_FRAC(a), |
92 | LOAD_INT(b), LOAD_FRAC(b), | 93 | LOAD_INT(b), LOAD_FRAC(b), |
93 | LOAD_INT(c), LOAD_FRAC(c), | 94 | LOAD_INT(c), LOAD_FRAC(c), |
94 | nr_running(), nr_threads, last_pid); | 95 | nr_running(), nr_threads, init_pspace.last_pid); |
95 | return proc_calc_metrics(page, start, off, count, eof, len); | 96 | return proc_calc_metrics(page, start, off, count, eof, len); |
96 | } | 97 | } |
97 | 98 | ||
diff --git a/fs/proc/root.c b/fs/proc/root.c index 8901c65caca8..ffe66c38488b 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c | |||
@@ -16,6 +16,7 @@ | |||
16 | #include <linux/module.h> | 16 | #include <linux/module.h> |
17 | #include <linux/bitops.h> | 17 | #include <linux/bitops.h> |
18 | #include <linux/smp_lock.h> | 18 | #include <linux/smp_lock.h> |
19 | #include <linux/mount.h> | ||
19 | 20 | ||
20 | #include "internal.h" | 21 | #include "internal.h" |
21 | 22 | ||
@@ -28,6 +29,17 @@ struct proc_dir_entry *proc_sys_root; | |||
28 | static int proc_get_sb(struct file_system_type *fs_type, | 29 | static int proc_get_sb(struct file_system_type *fs_type, |
29 | int flags, const char *dev_name, void *data, struct vfsmount *mnt) | 30 | int flags, const char *dev_name, void *data, struct vfsmount *mnt) |
30 | { | 31 | { |
32 | if (proc_mnt) { | ||
33 | /* Seed the root directory with a pid so it doesn't need | ||
34 | * to be special in base.c. I would do this earlier but | ||
35 | * the only task alive when /proc is mounted the first time | ||
36 | * is the init_task and it doesn't have any pids. | ||
37 | */ | ||
38 | struct proc_inode *ei; | ||
39 | ei = PROC_I(proc_mnt->mnt_sb->s_root->d_inode); | ||
40 | if (!ei->pid) | ||
41 | ei->pid = find_get_pid(1); | ||
42 | } | ||
31 | return get_sb_single(fs_type, flags, data, proc_fill_super, mnt); | 43 | return get_sb_single(fs_type, flags, data, proc_fill_super, mnt); |
32 | } | 44 | } |
33 | 45 | ||
diff --git a/fs/readdir.c b/fs/readdir.c index b6109329b607..bff3ee58e2f8 100644 --- a/fs/readdir.c +++ b/fs/readdir.c | |||
@@ -69,20 +69,24 @@ struct readdir_callback { | |||
69 | }; | 69 | }; |
70 | 70 | ||
71 | static int fillonedir(void * __buf, const char * name, int namlen, loff_t offset, | 71 | static int fillonedir(void * __buf, const char * name, int namlen, loff_t offset, |
72 | ino_t ino, unsigned int d_type) | 72 | u64 ino, unsigned int d_type) |
73 | { | 73 | { |
74 | struct readdir_callback * buf = (struct readdir_callback *) __buf; | 74 | struct readdir_callback * buf = (struct readdir_callback *) __buf; |
75 | struct old_linux_dirent __user * dirent; | 75 | struct old_linux_dirent __user * dirent; |
76 | unsigned long d_ino; | ||
76 | 77 | ||
77 | if (buf->result) | 78 | if (buf->result) |
78 | return -EINVAL; | 79 | return -EINVAL; |
80 | d_ino = ino; | ||
81 | if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) | ||
82 | return -EOVERFLOW; | ||
79 | buf->result++; | 83 | buf->result++; |
80 | dirent = buf->dirent; | 84 | dirent = buf->dirent; |
81 | if (!access_ok(VERIFY_WRITE, dirent, | 85 | if (!access_ok(VERIFY_WRITE, dirent, |
82 | (unsigned long)(dirent->d_name + namlen + 1) - | 86 | (unsigned long)(dirent->d_name + namlen + 1) - |
83 | (unsigned long)dirent)) | 87 | (unsigned long)dirent)) |
84 | goto efault; | 88 | goto efault; |
85 | if ( __put_user(ino, &dirent->d_ino) || | 89 | if ( __put_user(d_ino, &dirent->d_ino) || |
86 | __put_user(offset, &dirent->d_offset) || | 90 | __put_user(offset, &dirent->d_offset) || |
87 | __put_user(namlen, &dirent->d_namlen) || | 91 | __put_user(namlen, &dirent->d_namlen) || |
88 | __copy_to_user(dirent->d_name, name, namlen) || | 92 | __copy_to_user(dirent->d_name, name, namlen) || |
@@ -138,22 +142,26 @@ struct getdents_callback { | |||
138 | }; | 142 | }; |
139 | 143 | ||
140 | static int filldir(void * __buf, const char * name, int namlen, loff_t offset, | 144 | static int filldir(void * __buf, const char * name, int namlen, loff_t offset, |
141 | ino_t ino, unsigned int d_type) | 145 | u64 ino, unsigned int d_type) |
142 | { | 146 | { |
143 | struct linux_dirent __user * dirent; | 147 | struct linux_dirent __user * dirent; |
144 | struct getdents_callback * buf = (struct getdents_callback *) __buf; | 148 | struct getdents_callback * buf = (struct getdents_callback *) __buf; |
149 | unsigned long d_ino; | ||
145 | int reclen = ROUND_UP(NAME_OFFSET(dirent) + namlen + 2); | 150 | int reclen = ROUND_UP(NAME_OFFSET(dirent) + namlen + 2); |
146 | 151 | ||
147 | buf->error = -EINVAL; /* only used if we fail.. */ | 152 | buf->error = -EINVAL; /* only used if we fail.. */ |
148 | if (reclen > buf->count) | 153 | if (reclen > buf->count) |
149 | return -EINVAL; | 154 | return -EINVAL; |
155 | d_ino = ino; | ||
156 | if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) | ||
157 | return -EOVERFLOW; | ||
150 | dirent = buf->previous; | 158 | dirent = buf->previous; |
151 | if (dirent) { | 159 | if (dirent) { |
152 | if (__put_user(offset, &dirent->d_off)) | 160 | if (__put_user(offset, &dirent->d_off)) |
153 | goto efault; | 161 | goto efault; |
154 | } | 162 | } |
155 | dirent = buf->current_dir; | 163 | dirent = buf->current_dir; |
156 | if (__put_user(ino, &dirent->d_ino)) | 164 | if (__put_user(d_ino, &dirent->d_ino)) |
157 | goto efault; | 165 | goto efault; |
158 | if (__put_user(reclen, &dirent->d_reclen)) | 166 | if (__put_user(reclen, &dirent->d_reclen)) |
159 | goto efault; | 167 | goto efault; |
@@ -222,7 +230,7 @@ struct getdents_callback64 { | |||
222 | }; | 230 | }; |
223 | 231 | ||
224 | static int filldir64(void * __buf, const char * name, int namlen, loff_t offset, | 232 | static int filldir64(void * __buf, const char * name, int namlen, loff_t offset, |
225 | ino_t ino, unsigned int d_type) | 233 | u64 ino, unsigned int d_type) |
226 | { | 234 | { |
227 | struct linux_dirent64 __user *dirent; | 235 | struct linux_dirent64 __user *dirent; |
228 | struct getdents_callback64 * buf = (struct getdents_callback64 *) __buf; | 236 | struct getdents_callback64 * buf = (struct getdents_callback64 *) __buf; |
diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c index 41f24369e47a..c093642fb983 100644 --- a/fs/reiserfs/file.c +++ b/fs/reiserfs/file.c | |||
@@ -38,8 +38,7 @@ static int reiserfs_file_release(struct inode *inode, struct file *filp) | |||
38 | int err; | 38 | int err; |
39 | int jbegin_failure = 0; | 39 | int jbegin_failure = 0; |
40 | 40 | ||
41 | if (!S_ISREG(inode->i_mode)) | 41 | BUG_ON(!S_ISREG(inode->i_mode)); |
42 | BUG(); | ||
43 | 42 | ||
44 | /* fast out for when nothing needs to be done */ | 43 | /* fast out for when nothing needs to be done */ |
45 | if ((atomic_read(&inode->i_count) > 1 || | 44 | if ((atomic_read(&inode->i_count) > 1 || |
@@ -125,8 +124,7 @@ static int reiserfs_sync_file(struct file *p_s_filp, | |||
125 | int n_err; | 124 | int n_err; |
126 | int barrier_done; | 125 | int barrier_done; |
127 | 126 | ||
128 | if (!S_ISREG(p_s_inode->i_mode)) | 127 | BUG_ON(!S_ISREG(p_s_inode->i_mode)); |
129 | BUG(); | ||
130 | n_err = sync_mapping_buffers(p_s_inode->i_mapping); | 128 | n_err = sync_mapping_buffers(p_s_inode->i_mapping); |
131 | reiserfs_write_lock(p_s_inode->i_sb); | 129 | reiserfs_write_lock(p_s_inode->i_sb); |
132 | barrier_done = reiserfs_commit_for_inode(p_s_inode); | 130 | barrier_done = reiserfs_commit_for_inode(p_s_inode); |
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index 7e5a2f5ebeb0..9c69bcacad22 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c | |||
@@ -1780,7 +1780,7 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th, | |||
1780 | err = -EDQUOT; | 1780 | err = -EDQUOT; |
1781 | goto out_end_trans; | 1781 | goto out_end_trans; |
1782 | } | 1782 | } |
1783 | if (!dir || !dir->i_nlink) { | 1783 | if (!dir->i_nlink) { |
1784 | err = -EPERM; | 1784 | err = -EPERM; |
1785 | goto out_bad_inode; | 1785 | goto out_bad_inode; |
1786 | } | 1786 | } |
diff --git a/fs/reiserfs/item_ops.c b/fs/reiserfs/item_ops.c index 7a88adbceef6..b9b423b22a8b 100644 --- a/fs/reiserfs/item_ops.c +++ b/fs/reiserfs/item_ops.c | |||
@@ -75,8 +75,7 @@ static int sd_create_vi(struct virtual_node *vn, | |||
75 | static int sd_check_left(struct virtual_item *vi, int free, | 75 | static int sd_check_left(struct virtual_item *vi, int free, |
76 | int start_skip, int end_skip) | 76 | int start_skip, int end_skip) |
77 | { | 77 | { |
78 | if (start_skip || end_skip) | 78 | BUG_ON(start_skip || end_skip); |
79 | BUG(); | ||
80 | return -1; | 79 | return -1; |
81 | } | 80 | } |
82 | 81 | ||
@@ -87,8 +86,7 @@ static int sd_check_right(struct virtual_item *vi, int free) | |||
87 | 86 | ||
88 | static int sd_part_size(struct virtual_item *vi, int first, int count) | 87 | static int sd_part_size(struct virtual_item *vi, int first, int count) |
89 | { | 88 | { |
90 | if (count) | 89 | BUG_ON(count); |
91 | BUG(); | ||
92 | return 0; | 90 | return 0; |
93 | } | 91 | } |
94 | 92 | ||
@@ -476,8 +474,7 @@ static int direntry_create_vi(struct virtual_node *vn, | |||
476 | 474 | ||
477 | vi->vi_index = TYPE_DIRENTRY; | 475 | vi->vi_index = TYPE_DIRENTRY; |
478 | 476 | ||
479 | if (!(vi->vi_ih) || !vi->vi_item) | 477 | BUG_ON(!(vi->vi_ih) || !vi->vi_item); |
480 | BUG(); | ||
481 | 478 | ||
482 | dir_u->flags = 0; | 479 | dir_u->flags = 0; |
483 | if (le_ih_k_offset(vi->vi_ih) == DOT_OFFSET) | 480 | if (le_ih_k_offset(vi->vi_ih) == DOT_OFFSET) |
@@ -575,8 +572,7 @@ static int direntry_check_right(struct virtual_item *vi, int free) | |||
575 | free -= dir_u->entry_sizes[i]; | 572 | free -= dir_u->entry_sizes[i]; |
576 | entries++; | 573 | entries++; |
577 | } | 574 | } |
578 | if (entries == dir_u->entry_count) | 575 | BUG_ON(entries == dir_u->entry_count); |
579 | BUG(); | ||
580 | 576 | ||
581 | /* "." and ".." can not be separated from each other */ | 577 | /* "." and ".." can not be separated from each other */ |
582 | if ((dir_u->flags & DIRENTRY_VI_FIRST_DIRENTRY_ITEM) | 578 | if ((dir_u->flags & DIRENTRY_VI_FIRST_DIRENTRY_ITEM) |
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c index e6b5ccf23f15..ad8cbc49883a 100644 --- a/fs/reiserfs/journal.c +++ b/fs/reiserfs/journal.c | |||
@@ -718,8 +718,7 @@ static int add_to_chunk(struct buffer_chunk *chunk, struct buffer_head *bh, | |||
718 | spinlock_t * lock, void (fn) (struct buffer_chunk *)) | 718 | spinlock_t * lock, void (fn) (struct buffer_chunk *)) |
719 | { | 719 | { |
720 | int ret = 0; | 720 | int ret = 0; |
721 | if (chunk->nr >= CHUNK_SIZE) | 721 | BUG_ON(chunk->nr >= CHUNK_SIZE); |
722 | BUG(); | ||
723 | chunk->bh[chunk->nr++] = bh; | 722 | chunk->bh[chunk->nr++] = bh; |
724 | if (chunk->nr >= CHUNK_SIZE) { | 723 | if (chunk->nr >= CHUNK_SIZE) { |
725 | ret = 1; | 724 | ret = 1; |
@@ -788,8 +787,7 @@ static inline int __add_jh(struct reiserfs_journal *j, struct buffer_head *bh, | |||
788 | /* buffer must be locked for __add_jh, should be able to have | 787 | /* buffer must be locked for __add_jh, should be able to have |
789 | * two adds at the same time | 788 | * two adds at the same time |
790 | */ | 789 | */ |
791 | if (bh->b_private) | 790 | BUG_ON(bh->b_private); |
792 | BUG(); | ||
793 | jh->bh = bh; | 791 | jh->bh = bh; |
794 | bh->b_private = jh; | 792 | bh->b_private = jh; |
795 | } | 793 | } |
@@ -2967,8 +2965,7 @@ static int do_journal_begin_r(struct reiserfs_transaction_handle *th, | |||
2967 | int retval; | 2965 | int retval; |
2968 | 2966 | ||
2969 | reiserfs_check_lock_depth(p_s_sb, "journal_begin"); | 2967 | reiserfs_check_lock_depth(p_s_sb, "journal_begin"); |
2970 | if (nblocks > journal->j_trans_max) | 2968 | BUG_ON(nblocks > journal->j_trans_max); |
2971 | BUG(); | ||
2972 | 2969 | ||
2973 | PROC_INFO_INC(p_s_sb, journal.journal_being); | 2970 | PROC_INFO_INC(p_s_sb, journal.journal_being); |
2974 | /* set here for journal_join */ | 2971 | /* set here for journal_join */ |
@@ -3084,9 +3081,8 @@ struct reiserfs_transaction_handle *reiserfs_persistent_transaction(struct | |||
3084 | if (reiserfs_transaction_running(s)) { | 3081 | if (reiserfs_transaction_running(s)) { |
3085 | th = current->journal_info; | 3082 | th = current->journal_info; |
3086 | th->t_refcount++; | 3083 | th->t_refcount++; |
3087 | if (th->t_refcount < 2) { | 3084 | BUG_ON(th->t_refcount < 2); |
3088 | BUG(); | 3085 | |
3089 | } | ||
3090 | return th; | 3086 | return th; |
3091 | } | 3087 | } |
3092 | th = kmalloc(sizeof(struct reiserfs_transaction_handle), GFP_NOFS); | 3088 | th = kmalloc(sizeof(struct reiserfs_transaction_handle), GFP_NOFS); |
@@ -3126,9 +3122,7 @@ static int journal_join(struct reiserfs_transaction_handle *th, | |||
3126 | ** pointer | 3122 | ** pointer |
3127 | */ | 3123 | */ |
3128 | th->t_handle_save = cur_th; | 3124 | th->t_handle_save = cur_th; |
3129 | if (cur_th && cur_th->t_refcount > 1) { | 3125 | BUG_ON(cur_th && cur_th->t_refcount > 1); |
3130 | BUG(); | ||
3131 | } | ||
3132 | return do_journal_begin_r(th, p_s_sb, nblocks, JBEGIN_JOIN); | 3126 | return do_journal_begin_r(th, p_s_sb, nblocks, JBEGIN_JOIN); |
3133 | } | 3127 | } |
3134 | 3128 | ||
@@ -3141,9 +3135,7 @@ int journal_join_abort(struct reiserfs_transaction_handle *th, | |||
3141 | ** pointer | 3135 | ** pointer |
3142 | */ | 3136 | */ |
3143 | th->t_handle_save = cur_th; | 3137 | th->t_handle_save = cur_th; |
3144 | if (cur_th && cur_th->t_refcount > 1) { | 3138 | BUG_ON(cur_th && cur_th->t_refcount > 1); |
3145 | BUG(); | ||
3146 | } | ||
3147 | return do_journal_begin_r(th, p_s_sb, nblocks, JBEGIN_ABORT); | 3139 | return do_journal_begin_r(th, p_s_sb, nblocks, JBEGIN_ABORT); |
3148 | } | 3140 | } |
3149 | 3141 | ||
@@ -3178,8 +3170,7 @@ int journal_begin(struct reiserfs_transaction_handle *th, | |||
3178 | current->journal_info = th; | 3170 | current->journal_info = th; |
3179 | } | 3171 | } |
3180 | ret = do_journal_begin_r(th, p_s_sb, nblocks, JBEGIN_REG); | 3172 | ret = do_journal_begin_r(th, p_s_sb, nblocks, JBEGIN_REG); |
3181 | if (current->journal_info != th) | 3173 | BUG_ON(current->journal_info != th); |
3182 | BUG(); | ||
3183 | 3174 | ||
3184 | /* I guess this boils down to being the reciprocal of clm-2100 above. | 3175 | /* I guess this boils down to being the reciprocal of clm-2100 above. |
3185 | * If do_journal_begin_r fails, we need to put it back, since journal_end | 3176 | * If do_journal_begin_r fails, we need to put it back, since journal_end |
@@ -3324,8 +3315,7 @@ int journal_end(struct reiserfs_transaction_handle *th, | |||
3324 | /* we aren't allowed to close a nested transaction on a different | 3315 | /* we aren't allowed to close a nested transaction on a different |
3325 | ** filesystem from the one in the task struct | 3316 | ** filesystem from the one in the task struct |
3326 | */ | 3317 | */ |
3327 | if (cur_th->t_super != th->t_super) | 3318 | BUG_ON(cur_th->t_super != th->t_super); |
3328 | BUG(); | ||
3329 | 3319 | ||
3330 | if (th != cur_th) { | 3320 | if (th != cur_th) { |
3331 | memcpy(current->journal_info, th, sizeof(*th)); | 3321 | memcpy(current->journal_info, th, sizeof(*th)); |
@@ -3444,9 +3434,7 @@ int journal_end_sync(struct reiserfs_transaction_handle *th, | |||
3444 | 3434 | ||
3445 | BUG_ON(!th->t_trans_id); | 3435 | BUG_ON(!th->t_trans_id); |
3446 | /* you can sync while nested, very, very bad */ | 3436 | /* you can sync while nested, very, very bad */ |
3447 | if (th->t_refcount > 1) { | 3437 | BUG_ON(th->t_refcount > 1); |
3448 | BUG(); | ||
3449 | } | ||
3450 | if (journal->j_len == 0) { | 3438 | if (journal->j_len == 0) { |
3451 | reiserfs_prepare_for_journal(p_s_sb, SB_BUFFER_WITH_SB(p_s_sb), | 3439 | reiserfs_prepare_for_journal(p_s_sb, SB_BUFFER_WITH_SB(p_s_sb), |
3452 | 1); | 3440 | 1); |
@@ -3556,9 +3544,8 @@ static int check_journal_end(struct reiserfs_transaction_handle *th, | |||
3556 | ** will be dealt with by next transaction that actually writes something, but should be taken | 3544 | ** will be dealt with by next transaction that actually writes something, but should be taken |
3557 | ** care of in this trans | 3545 | ** care of in this trans |
3558 | */ | 3546 | */ |
3559 | if (journal->j_len == 0) { | 3547 | BUG_ON(journal->j_len == 0); |
3560 | BUG(); | 3548 | |
3561 | } | ||
3562 | /* if wcount > 0, and we are called to with flush or commit_now, | 3549 | /* if wcount > 0, and we are called to with flush or commit_now, |
3563 | ** we wait on j_join_wait. We will wake up when the last writer has | 3550 | ** we wait on j_join_wait. We will wake up when the last writer has |
3564 | ** finished the transaction, and started it on its way to the disk. | 3551 | ** finished the transaction, and started it on its way to the disk. |
@@ -3592,9 +3579,8 @@ static int check_journal_end(struct reiserfs_transaction_handle *th, | |||
3592 | unlock_journal(p_s_sb); | 3579 | unlock_journal(p_s_sb); |
3593 | } | 3580 | } |
3594 | } | 3581 | } |
3595 | if (journal->j_trans_id == trans_id) { | 3582 | BUG_ON(journal->j_trans_id == trans_id); |
3596 | BUG(); | 3583 | |
3597 | } | ||
3598 | if (commit_now | 3584 | if (commit_now |
3599 | && journal_list_still_alive(p_s_sb, trans_id) | 3585 | && journal_list_still_alive(p_s_sb, trans_id) |
3600 | && wait_on_commit) { | 3586 | && wait_on_commit) { |
@@ -4074,9 +4060,7 @@ static int do_journal_end(struct reiserfs_transaction_handle *th, | |||
4074 | set_commit_trans_len(commit, journal->j_len); | 4060 | set_commit_trans_len(commit, journal->j_len); |
4075 | 4061 | ||
4076 | /* special check in case all buffers in the journal were marked for not logging */ | 4062 | /* special check in case all buffers in the journal were marked for not logging */ |
4077 | if (journal->j_len == 0) { | 4063 | BUG_ON(journal->j_len == 0); |
4078 | BUG(); | ||
4079 | } | ||
4080 | 4064 | ||
4081 | /* we're about to dirty all the log blocks, mark the description block | 4065 | /* we're about to dirty all the log blocks, mark the description block |
4082 | * dirty now too. Don't mark the commit block dirty until all the | 4066 | * dirty now too. Don't mark the commit block dirty until all the |
@@ -4173,8 +4157,7 @@ static int do_journal_end(struct reiserfs_transaction_handle *th, | |||
4173 | journal, jl, &jl->j_tail_bh_list); | 4157 | journal, jl, &jl->j_tail_bh_list); |
4174 | lock_kernel(); | 4158 | lock_kernel(); |
4175 | } | 4159 | } |
4176 | if (!list_empty(&jl->j_tail_bh_list)) | 4160 | BUG_ON(!list_empty(&jl->j_tail_bh_list)); |
4177 | BUG(); | ||
4178 | up(&jl->j_commit_lock); | 4161 | up(&jl->j_commit_lock); |
4179 | 4162 | ||
4180 | /* honor the flush wishes from the caller, simple commits can | 4163 | /* honor the flush wishes from the caller, simple commits can |
diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c index 16e9cff8f15d..abde1edc2235 100644 --- a/fs/reiserfs/namei.c +++ b/fs/reiserfs/namei.c | |||
@@ -67,8 +67,7 @@ inline void set_de_name_and_namelen(struct reiserfs_dir_entry *de) | |||
67 | { | 67 | { |
68 | struct reiserfs_de_head *deh = de->de_deh + de->de_entry_num; | 68 | struct reiserfs_de_head *deh = de->de_deh + de->de_entry_num; |
69 | 69 | ||
70 | if (de->de_entry_num >= ih_entry_count(de->de_ih)) | 70 | BUG_ON(de->de_entry_num >= ih_entry_count(de->de_ih)); |
71 | BUG(); | ||
72 | 71 | ||
73 | de->de_entrylen = entry_length(de->de_bh, de->de_ih, de->de_entry_num); | 72 | de->de_entrylen = entry_length(de->de_bh, de->de_ih, de->de_entry_num); |
74 | de->de_namelen = de->de_entrylen - (de_with_sd(deh) ? SD_SIZE : 0); | 73 | de->de_namelen = de->de_entrylen - (de_with_sd(deh) ? SD_SIZE : 0); |
@@ -80,8 +79,7 @@ inline void set_de_name_and_namelen(struct reiserfs_dir_entry *de) | |||
80 | // what entry points to | 79 | // what entry points to |
81 | static inline void set_de_object_key(struct reiserfs_dir_entry *de) | 80 | static inline void set_de_object_key(struct reiserfs_dir_entry *de) |
82 | { | 81 | { |
83 | if (de->de_entry_num >= ih_entry_count(de->de_ih)) | 82 | BUG_ON(de->de_entry_num >= ih_entry_count(de->de_ih)); |
84 | BUG(); | ||
85 | de->de_dir_id = deh_dir_id(&(de->de_deh[de->de_entry_num])); | 83 | de->de_dir_id = deh_dir_id(&(de->de_deh[de->de_entry_num])); |
86 | de->de_objectid = deh_objectid(&(de->de_deh[de->de_entry_num])); | 84 | de->de_objectid = deh_objectid(&(de->de_deh[de->de_entry_num])); |
87 | } | 85 | } |
@@ -90,8 +88,7 @@ static inline void store_de_entry_key(struct reiserfs_dir_entry *de) | |||
90 | { | 88 | { |
91 | struct reiserfs_de_head *deh = de->de_deh + de->de_entry_num; | 89 | struct reiserfs_de_head *deh = de->de_deh + de->de_entry_num; |
92 | 90 | ||
93 | if (de->de_entry_num >= ih_entry_count(de->de_ih)) | 91 | BUG_ON(de->de_entry_num >= ih_entry_count(de->de_ih)); |
94 | BUG(); | ||
95 | 92 | ||
96 | /* store key of the found entry */ | 93 | /* store key of the found entry */ |
97 | de->de_entry_key.version = KEY_FORMAT_3_5; | 94 | de->de_entry_key.version = KEY_FORMAT_3_5; |
diff --git a/fs/reiserfs/stree.c b/fs/reiserfs/stree.c index 8b9b13127136..5240abe1a709 100644 --- a/fs/reiserfs/stree.c +++ b/fs/reiserfs/stree.c | |||
@@ -1476,9 +1476,7 @@ static int maybe_indirect_to_direct(struct reiserfs_transaction_handle *th, | |||
1476 | int n_block_size = p_s_sb->s_blocksize; | 1476 | int n_block_size = p_s_sb->s_blocksize; |
1477 | int cut_bytes; | 1477 | int cut_bytes; |
1478 | BUG_ON(!th->t_trans_id); | 1478 | BUG_ON(!th->t_trans_id); |
1479 | 1479 | BUG_ON(n_new_file_size != p_s_inode->i_size); | |
1480 | if (n_new_file_size != p_s_inode->i_size) | ||
1481 | BUG(); | ||
1482 | 1480 | ||
1483 | /* the page being sent in could be NULL if there was an i/o error | 1481 | /* the page being sent in could be NULL if there was an i/o error |
1484 | ** reading in the last block. The user will hit problems trying to | 1482 | ** reading in the last block. The user will hit problems trying to |
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c index d935fb9394e3..7bdb0ed443e1 100644 --- a/fs/reiserfs/xattr.c +++ b/fs/reiserfs/xattr.c | |||
@@ -773,7 +773,7 @@ int reiserfs_xattr_del(struct inode *inode, const char *name) | |||
773 | 773 | ||
774 | static int | 774 | static int |
775 | reiserfs_delete_xattrs_filler(void *buf, const char *name, int namelen, | 775 | reiserfs_delete_xattrs_filler(void *buf, const char *name, int namelen, |
776 | loff_t offset, ino_t ino, unsigned int d_type) | 776 | loff_t offset, u64 ino, unsigned int d_type) |
777 | { | 777 | { |
778 | struct dentry *xadir = (struct dentry *)buf; | 778 | struct dentry *xadir = (struct dentry *)buf; |
779 | 779 | ||
@@ -851,7 +851,7 @@ struct reiserfs_chown_buf { | |||
851 | /* XXX: If there is a better way to do this, I'd love to hear about it */ | 851 | /* XXX: If there is a better way to do this, I'd love to hear about it */ |
852 | static int | 852 | static int |
853 | reiserfs_chown_xattrs_filler(void *buf, const char *name, int namelen, | 853 | reiserfs_chown_xattrs_filler(void *buf, const char *name, int namelen, |
854 | loff_t offset, ino_t ino, unsigned int d_type) | 854 | loff_t offset, u64 ino, unsigned int d_type) |
855 | { | 855 | { |
856 | struct reiserfs_chown_buf *chown_buf = (struct reiserfs_chown_buf *)buf; | 856 | struct reiserfs_chown_buf *chown_buf = (struct reiserfs_chown_buf *)buf; |
857 | struct dentry *xafile, *xadir = chown_buf->xadir; | 857 | struct dentry *xafile, *xadir = chown_buf->xadir; |
@@ -1036,7 +1036,7 @@ struct reiserfs_listxattr_buf { | |||
1036 | 1036 | ||
1037 | static int | 1037 | static int |
1038 | reiserfs_listxattr_filler(void *buf, const char *name, int namelen, | 1038 | reiserfs_listxattr_filler(void *buf, const char *name, int namelen, |
1039 | loff_t offset, ino_t ino, unsigned int d_type) | 1039 | loff_t offset, u64 ino, unsigned int d_type) |
1040 | { | 1040 | { |
1041 | struct reiserfs_listxattr_buf *b = (struct reiserfs_listxattr_buf *)buf; | 1041 | struct reiserfs_listxattr_buf *b = (struct reiserfs_listxattr_buf *)buf; |
1042 | int len = 0; | 1042 | int len = 0; |
@@ -140,6 +140,8 @@ static int cp_old_stat(struct kstat *stat, struct __old_kernel_stat __user * sta | |||
140 | memset(&tmp, 0, sizeof(struct __old_kernel_stat)); | 140 | memset(&tmp, 0, sizeof(struct __old_kernel_stat)); |
141 | tmp.st_dev = old_encode_dev(stat->dev); | 141 | tmp.st_dev = old_encode_dev(stat->dev); |
142 | tmp.st_ino = stat->ino; | 142 | tmp.st_ino = stat->ino; |
143 | if (sizeof(tmp.st_ino) < sizeof(stat->ino) && tmp.st_ino != stat->ino) | ||
144 | return -EOVERFLOW; | ||
143 | tmp.st_mode = stat->mode; | 145 | tmp.st_mode = stat->mode; |
144 | tmp.st_nlink = stat->nlink; | 146 | tmp.st_nlink = stat->nlink; |
145 | if (tmp.st_nlink != stat->nlink) | 147 | if (tmp.st_nlink != stat->nlink) |
@@ -210,6 +212,8 @@ static int cp_new_stat(struct kstat *stat, struct stat __user *statbuf) | |||
210 | tmp.st_dev = new_encode_dev(stat->dev); | 212 | tmp.st_dev = new_encode_dev(stat->dev); |
211 | #endif | 213 | #endif |
212 | tmp.st_ino = stat->ino; | 214 | tmp.st_ino = stat->ino; |
215 | if (sizeof(tmp.st_ino) < sizeof(stat->ino) && tmp.st_ino != stat->ino) | ||
216 | return -EOVERFLOW; | ||
213 | tmp.st_mode = stat->mode; | 217 | tmp.st_mode = stat->mode; |
214 | tmp.st_nlink = stat->nlink; | 218 | tmp.st_nlink = stat->nlink; |
215 | if (tmp.st_nlink != stat->nlink) | 219 | if (tmp.st_nlink != stat->nlink) |
@@ -347,6 +351,8 @@ static long cp_new_stat64(struct kstat *stat, struct stat64 __user *statbuf) | |||
347 | tmp.st_rdev = huge_encode_dev(stat->rdev); | 351 | tmp.st_rdev = huge_encode_dev(stat->rdev); |
348 | #endif | 352 | #endif |
349 | tmp.st_ino = stat->ino; | 353 | tmp.st_ino = stat->ino; |
354 | if (sizeof(tmp.st_ino) < sizeof(stat->ino) && tmp.st_ino != stat->ino) | ||
355 | return -EOVERFLOW; | ||
350 | #ifdef STAT64_HAS_BROKEN_ST_INO | 356 | #ifdef STAT64_HAS_BROKEN_ST_INO |
351 | tmp.__st_ino = stat->ino; | 357 | tmp.__st_ino = stat->ino; |
352 | #endif | 358 | #endif |
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c index cf3786625bfa..146f1dedec84 100644 --- a/fs/sysfs/file.c +++ b/fs/sysfs/file.c | |||
@@ -157,8 +157,8 @@ sysfs_read_file(struct file *file, char __user *buf, size_t count, loff_t *ppos) | |||
157 | if ((retval = fill_read_buffer(file->f_dentry,buffer))) | 157 | if ((retval = fill_read_buffer(file->f_dentry,buffer))) |
158 | goto out; | 158 | goto out; |
159 | } | 159 | } |
160 | pr_debug("%s: count = %d, ppos = %lld, buf = %s\n", | 160 | pr_debug("%s: count = %zd, ppos = %lld, buf = %s\n", |
161 | __FUNCTION__,count,*ppos,buffer->page); | 161 | __FUNCTION__, count, *ppos, buffer->page); |
162 | retval = flush_read_buffer(buffer,buf,count,ppos); | 162 | retval = flush_read_buffer(buffer,buf,count,ppos); |
163 | out: | 163 | out: |
164 | up(&buffer->sem); | 164 | up(&buffer->sem); |
diff --git a/fs/xfs/support/debug.c b/fs/xfs/support/debug.c index 36fbeccdc722..c75f68361e33 100644 --- a/fs/xfs/support/debug.c +++ b/fs/xfs/support/debug.c | |||
@@ -53,8 +53,7 @@ cmn_err(register int level, char *fmt, ...) | |||
53 | va_end(ap); | 53 | va_end(ap); |
54 | spin_unlock_irqrestore(&xfs_err_lock,flags); | 54 | spin_unlock_irqrestore(&xfs_err_lock,flags); |
55 | 55 | ||
56 | if (level == CE_PANIC) | 56 | BUG_ON(level == CE_PANIC); |
57 | BUG(); | ||
58 | } | 57 | } |
59 | 58 | ||
60 | void | 59 | void |
@@ -72,8 +71,7 @@ icmn_err(register int level, char *fmt, va_list ap) | |||
72 | strcat(message, "\n"); | 71 | strcat(message, "\n"); |
73 | spin_unlock_irqrestore(&xfs_err_lock,flags); | 72 | spin_unlock_irqrestore(&xfs_err_lock,flags); |
74 | printk("%s%s", err_level[level], message); | 73 | printk("%s%s", err_level[level], message); |
75 | if (level == CE_PANIC) | 74 | BUG_ON(level == CE_PANIC); |
76 | BUG(); | ||
77 | } | 75 | } |
78 | 76 | ||
79 | void | 77 | void |